arthurbresnu commited on
Commit
cf41c3b
·
1 Parent(s): 1a3b1c5

Adapt to cosine and update readme

Browse files
3_SparseAutoEncoder/clearn_nfcorpus.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:95fadff86747200fd73530b43fcb90fd9584b0c5862c7a3726522ef5b61bc27a
3
- size 268650896
 
 
 
 
README.md CHANGED
@@ -25,39 +25,39 @@ model-index:
25
  - eng-Latn
26
  metrics:
27
  - type: ndcg@1
28
- value: 0.55108
29
  - type: ndcg@3
30
- value: 0.50489
31
  - type: ndcg@5
32
- value: 0.48476
33
  - type: ndcg@10
34
- value: 0.45184
35
  - type: ndcg@20
36
- value: 0.42492
37
  - type: ndcg@100
38
- value: 0.42262
39
  - type: ndcg@1000
40
- value: 0.51072
41
  - type: map@10
42
- value: 0.17877
43
  - type: map@100
44
- value: 0.23468
45
  - type: map@1000
46
- value: 0.2548
47
  - type: recall@10
48
- value: 0.22986
49
  - type: recall@100
50
- value: 0.43691
51
  - type: recall@1000
52
- value: 0.76026
53
  - type: precision@1
54
- value: 0.56347
55
  - type: precision@10
56
- value: 0.33437
57
  - type: mrr@10
58
- value: 0.657083
59
  - type: main_score
60
- value: 0.45184
61
  task:
62
  type: Retrieval
63
  ---
@@ -75,25 +75,33 @@ We recommend using ``Transformers 4.47.0.``
75
  You can evaluate this model loaded by Sentence Transformers with the following code snippet:
76
  ```python
77
  import mteb
78
- from sentence_transformers import SentenceTransformer
79
- model = SentenceTransformer(
80
- "Y-Research-Group/CSR-NV_Embed_v2-Retrieval-NFcorpus ",
81
- trust_remote_code=True
82
- )
83
  model.prompts = {
84
- "NFCorpus-query": "Instruct: Given a question, retrieve relevant documents that answer the question\nQuery:"
85
  }
 
86
  task = mteb.get_tasks(tasks=["NFCorpus"])
87
  evaluation = mteb.MTEB(tasks=task)
88
- evaluation.run(model, eval_splits=["test"], output_folder="./results/NFCorpus",
89
- batch_size=32, show_progress_bar=True)
 
 
 
 
 
90
  ```
91
 
92
  ## Citation
93
  ```bibtex
94
- @inproceedings{wenbeyond,
95
- title={Beyond Matryoshka: Revisiting Sparse Coding for Adaptive Representation},
96
- author={Wen, Tiansheng and Wang, Yifei and Zeng, Zequn and Peng, Zhong and Su, Yudi and Liu, Xinyang and Chen, Bo and Liu, Hongwei and Jegelka, Stefanie and You, Chenyu},
97
- booktitle={Forty-second International Conference on Machine Learning}
 
 
 
 
98
  }
99
  ```
 
25
  - eng-Latn
26
  metrics:
27
  - type: ndcg@1
28
+ value: 0.43189
29
  - type: ndcg@3
30
+ value: 0.41132
31
  - type: ndcg@5
32
+ value: 0.40406
33
  - type: ndcg@10
34
+ value: 0.39624
35
  - type: ndcg@20
36
+ value: 0.38517
37
  - type: ndcg@100
38
+ value: 0.40068
39
  - type: ndcg@1000
40
+ value: 0.49126
41
  - type: map@10
42
+ value: 0.14342
43
  - type: map@100
44
+ value: 0.21866
45
  - type: map@1000
46
+ value: 0.2427
47
  - type: recall@10
48
+ value: 0.1968
49
  - type: recall@100
50
+ value: 0.45592
51
  - type: recall@1000
52
+ value: 0.78216
53
  - type: precision@1
54
+ value: 0.45511
55
  - type: precision@10
56
+ value: 0.32353
57
  - type: mrr@10
58
+ value: 0.537792
59
  - type: main_score
60
+ value: 0.39624
61
  task:
62
  type: Retrieval
63
  ---
 
75
  You can evaluate this model loaded by Sentence Transformers with the following code snippet:
76
  ```python
77
  import mteb
78
+ from sentence_transformers import SparseEncoder
79
+
80
+ model = SparseEncoder("Y-Research-Group/CSR-NV_Embed_v2-Retrieval-NFcorpus", trust_remote_code=True)
 
 
81
  model.prompts = {
82
+ "NFCorpus-query": "Instruct: Given a question, retrieve relevant documents that answer the question\nQuery:"
83
  }
84
+
85
  task = mteb.get_tasks(tasks=["NFCorpus"])
86
  evaluation = mteb.MTEB(tasks=task)
87
+ evaluation.run(
88
+ model,
89
+ eval_splits=["test"],
90
+ output_folder="./results/NFCorpus",
91
+ show_progress_bar=True,
92
+ encode_kwargs={"convert_to_sparse_tensor": False, "batch_size": 8},
93
+ ) # MTEB don't support sparse tensors yet, so we need to convert to dense tensors
94
  ```
95
 
96
  ## Citation
97
  ```bibtex
98
+ @misc{wen2025matryoshkarevisitingsparsecoding,
99
+ title={Beyond Matryoshka: Revisiting Sparse Coding for Adaptive Representation},
100
+ author={Tiansheng Wen and Yifei Wang and Zequn Zeng and Zhong Peng and Yudi Su and Xinyang Liu and Bo Chen and Hongwei Liu and Stefanie Jegelka and Chenyu You},
101
+ year={2025},
102
+ eprint={2503.01776},
103
+ archivePrefix={arXiv},
104
+ primaryClass={cs.LG},
105
+ url={https://arxiv.org/abs/2503.01776},
106
  }
107
  ```
config_sentence_transformers.json CHANGED
@@ -1,27 +1,15 @@
1
  {
2
  "__version__": {
3
- "sentence_transformers": "4.2.0",
4
  "transformers": "4.47.0",
5
  "pytorch": "2.5.1+cu12"
6
  },
7
  "prompts": {
8
- "Banking77Classification": "Instruct: Given a question, please describe the intent of this question. \n Question: ",
9
- "MTOPIntentClassification": "Instruct: Given a question, please describe the intent of this question. \n Question: ",
10
- "TweetSentimentClassification": "Classify the sentiment of a given tweet as either positive, negative, or neutral.",
11
- "BiorxivClusteringP2P.v2": "Identify the main category of Biorxiv papers based on the titles and abstracts",
12
- "BiorxivClusteringS2S.v2": "Identify the main category of Biorxiv papers based on the titles",
13
- "TwentyNewsgroupsClustering": "Identify the topic or theme of the given news articles",
14
- "FiQA2018": {
15
- "query": "Given a financial question, retrieve relevant passages that answer the query"
16
- },
17
- "SciFact": {
18
- "query": "Given a scientific claim, retrieve documents that support or refute the claim"
19
- },
20
  "NFCorpus": {
21
- "query": "Given a question, retrieve relevant documents that answer the question"
22
  }
23
  },
24
  "default_prompt_name": null,
25
  "model_type": "SparseEncoder",
26
- "similarity_fn_name": "dot"
27
  }
 
1
  {
2
  "__version__": {
3
+ "sentence_transformers": "5.0.0",
4
  "transformers": "4.47.0",
5
  "pytorch": "2.5.1+cu12"
6
  },
7
  "prompts": {
 
 
 
 
 
 
 
 
 
 
 
 
8
  "NFCorpus": {
9
+ "query": "Instruct: Given a question, retrieve relevant documents that answer the question. \n Question: "
10
  }
11
  },
12
  "default_prompt_name": null,
13
  "model_type": "SparseEncoder",
14
+ "similarity_fn_name": "cosine"
15
  }