Commit
·
cf41c3b
1
Parent(s):
1a3b1c5
Adapt to cosine and update readme
Browse files- 3_SparseAutoEncoder/clearn_nfcorpus.pth +0 -3
- README.md +37 -29
- config_sentence_transformers.json +3 -15
3_SparseAutoEncoder/clearn_nfcorpus.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:95fadff86747200fd73530b43fcb90fd9584b0c5862c7a3726522ef5b61bc27a
|
| 3 |
-
size 268650896
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -25,39 +25,39 @@ model-index:
|
|
| 25 |
- eng-Latn
|
| 26 |
metrics:
|
| 27 |
- type: ndcg@1
|
| 28 |
-
value: 0.
|
| 29 |
- type: ndcg@3
|
| 30 |
-
value: 0.
|
| 31 |
- type: ndcg@5
|
| 32 |
-
value: 0.
|
| 33 |
- type: ndcg@10
|
| 34 |
-
value: 0.
|
| 35 |
- type: ndcg@20
|
| 36 |
-
value: 0.
|
| 37 |
- type: ndcg@100
|
| 38 |
-
value: 0.
|
| 39 |
- type: ndcg@1000
|
| 40 |
-
value: 0.
|
| 41 |
- type: map@10
|
| 42 |
-
value: 0.
|
| 43 |
- type: map@100
|
| 44 |
-
value: 0.
|
| 45 |
- type: map@1000
|
| 46 |
-
value: 0.
|
| 47 |
- type: recall@10
|
| 48 |
-
value: 0.
|
| 49 |
- type: recall@100
|
| 50 |
-
value: 0.
|
| 51 |
- type: recall@1000
|
| 52 |
-
value: 0.
|
| 53 |
- type: precision@1
|
| 54 |
-
value: 0.
|
| 55 |
- type: precision@10
|
| 56 |
-
value: 0.
|
| 57 |
- type: mrr@10
|
| 58 |
-
value: 0.
|
| 59 |
- type: main_score
|
| 60 |
-
value: 0.
|
| 61 |
task:
|
| 62 |
type: Retrieval
|
| 63 |
---
|
|
@@ -75,25 +75,33 @@ We recommend using ``Transformers 4.47.0.``
|
|
| 75 |
You can evaluate this model loaded by Sentence Transformers with the following code snippet:
|
| 76 |
```python
|
| 77 |
import mteb
|
| 78 |
-
from sentence_transformers import
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
trust_remote_code=True
|
| 82 |
-
)
|
| 83 |
model.prompts = {
|
| 84 |
-
|
| 85 |
}
|
|
|
|
| 86 |
task = mteb.get_tasks(tasks=["NFCorpus"])
|
| 87 |
evaluation = mteb.MTEB(tasks=task)
|
| 88 |
-
evaluation.run(
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
```
|
| 91 |
|
| 92 |
## Citation
|
| 93 |
```bibtex
|
| 94 |
-
@
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
}
|
| 99 |
```
|
|
|
|
| 25 |
- eng-Latn
|
| 26 |
metrics:
|
| 27 |
- type: ndcg@1
|
| 28 |
+
value: 0.43189
|
| 29 |
- type: ndcg@3
|
| 30 |
+
value: 0.41132
|
| 31 |
- type: ndcg@5
|
| 32 |
+
value: 0.40406
|
| 33 |
- type: ndcg@10
|
| 34 |
+
value: 0.39624
|
| 35 |
- type: ndcg@20
|
| 36 |
+
value: 0.38517
|
| 37 |
- type: ndcg@100
|
| 38 |
+
value: 0.40068
|
| 39 |
- type: ndcg@1000
|
| 40 |
+
value: 0.49126
|
| 41 |
- type: map@10
|
| 42 |
+
value: 0.14342
|
| 43 |
- type: map@100
|
| 44 |
+
value: 0.21866
|
| 45 |
- type: map@1000
|
| 46 |
+
value: 0.2427
|
| 47 |
- type: recall@10
|
| 48 |
+
value: 0.1968
|
| 49 |
- type: recall@100
|
| 50 |
+
value: 0.45592
|
| 51 |
- type: recall@1000
|
| 52 |
+
value: 0.78216
|
| 53 |
- type: precision@1
|
| 54 |
+
value: 0.45511
|
| 55 |
- type: precision@10
|
| 56 |
+
value: 0.32353
|
| 57 |
- type: mrr@10
|
| 58 |
+
value: 0.537792
|
| 59 |
- type: main_score
|
| 60 |
+
value: 0.39624
|
| 61 |
task:
|
| 62 |
type: Retrieval
|
| 63 |
---
|
|
|
|
| 75 |
You can evaluate this model loaded by Sentence Transformers with the following code snippet:
|
| 76 |
```python
|
| 77 |
import mteb
|
| 78 |
+
from sentence_transformers import SparseEncoder
|
| 79 |
+
|
| 80 |
+
model = SparseEncoder("Y-Research-Group/CSR-NV_Embed_v2-Retrieval-NFcorpus", trust_remote_code=True)
|
|
|
|
|
|
|
| 81 |
model.prompts = {
|
| 82 |
+
"NFCorpus-query": "Instruct: Given a question, retrieve relevant documents that answer the question\nQuery:"
|
| 83 |
}
|
| 84 |
+
|
| 85 |
task = mteb.get_tasks(tasks=["NFCorpus"])
|
| 86 |
evaluation = mteb.MTEB(tasks=task)
|
| 87 |
+
evaluation.run(
|
| 88 |
+
model,
|
| 89 |
+
eval_splits=["test"],
|
| 90 |
+
output_folder="./results/NFCorpus",
|
| 91 |
+
show_progress_bar=True,
|
| 92 |
+
encode_kwargs={"convert_to_sparse_tensor": False, "batch_size": 8},
|
| 93 |
+
) # MTEB don't support sparse tensors yet, so we need to convert to dense tensors
|
| 94 |
```
|
| 95 |
|
| 96 |
## Citation
|
| 97 |
```bibtex
|
| 98 |
+
@misc{wen2025matryoshkarevisitingsparsecoding,
|
| 99 |
+
title={Beyond Matryoshka: Revisiting Sparse Coding for Adaptive Representation},
|
| 100 |
+
author={Tiansheng Wen and Yifei Wang and Zequn Zeng and Zhong Peng and Yudi Su and Xinyang Liu and Bo Chen and Hongwei Liu and Stefanie Jegelka and Chenyu You},
|
| 101 |
+
year={2025},
|
| 102 |
+
eprint={2503.01776},
|
| 103 |
+
archivePrefix={arXiv},
|
| 104 |
+
primaryClass={cs.LG},
|
| 105 |
+
url={https://arxiv.org/abs/2503.01776},
|
| 106 |
}
|
| 107 |
```
|
config_sentence_transformers.json
CHANGED
|
@@ -1,27 +1,15 @@
|
|
| 1 |
{
|
| 2 |
"__version__": {
|
| 3 |
-
"sentence_transformers": "
|
| 4 |
"transformers": "4.47.0",
|
| 5 |
"pytorch": "2.5.1+cu12"
|
| 6 |
},
|
| 7 |
"prompts": {
|
| 8 |
-
"Banking77Classification": "Instruct: Given a question, please describe the intent of this question. \n Question: ",
|
| 9 |
-
"MTOPIntentClassification": "Instruct: Given a question, please describe the intent of this question. \n Question: ",
|
| 10 |
-
"TweetSentimentClassification": "Classify the sentiment of a given tweet as either positive, negative, or neutral.",
|
| 11 |
-
"BiorxivClusteringP2P.v2": "Identify the main category of Biorxiv papers based on the titles and abstracts",
|
| 12 |
-
"BiorxivClusteringS2S.v2": "Identify the main category of Biorxiv papers based on the titles",
|
| 13 |
-
"TwentyNewsgroupsClustering": "Identify the topic or theme of the given news articles",
|
| 14 |
-
"FiQA2018": {
|
| 15 |
-
"query": "Given a financial question, retrieve relevant passages that answer the query"
|
| 16 |
-
},
|
| 17 |
-
"SciFact": {
|
| 18 |
-
"query": "Given a scientific claim, retrieve documents that support or refute the claim"
|
| 19 |
-
},
|
| 20 |
"NFCorpus": {
|
| 21 |
-
"query": "Given a question, retrieve relevant documents that answer the question"
|
| 22 |
}
|
| 23 |
},
|
| 24 |
"default_prompt_name": null,
|
| 25 |
"model_type": "SparseEncoder",
|
| 26 |
-
"similarity_fn_name": "
|
| 27 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.0.0",
|
| 4 |
"transformers": "4.47.0",
|
| 5 |
"pytorch": "2.5.1+cu12"
|
| 6 |
},
|
| 7 |
"prompts": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"NFCorpus": {
|
| 9 |
+
"query": "Instruct: Given a question, retrieve relevant documents that answer the question. \n Question: "
|
| 10 |
}
|
| 11 |
},
|
| 12 |
"default_prompt_name": null,
|
| 13 |
"model_type": "SparseEncoder",
|
| 14 |
+
"similarity_fn_name": "cosine"
|
| 15 |
}
|