|
--- |
|
tags: |
|
- mteb |
|
model-index: |
|
- name: Zhihui_LLM_Embedding |
|
results: |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/CmedqaRetrieval |
|
name: MTEB CmedqaRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 29.012 |
|
- type: map_at_10 |
|
value: 41.998000000000005 |
|
- type: map_at_100 |
|
value: 43.821 |
|
- type: map_at_1000 |
|
value: 43.924 |
|
- type: map_at_3 |
|
value: 37.804 |
|
- type: map_at_5 |
|
value: 40.025 |
|
- type: mrr_at_1 |
|
value: 43.536 |
|
- type: mrr_at_10 |
|
value: 51.413 |
|
- type: mrr_at_100 |
|
value: 52.329 |
|
- type: mrr_at_1000 |
|
value: 52.366 |
|
- type: mrr_at_3 |
|
value: 49.058 |
|
- type: mrr_at_5 |
|
value: 50.291 |
|
- type: ndcg_at_1 |
|
value: 43.536 |
|
- type: ndcg_at_10 |
|
value: 48.693 |
|
- type: ndcg_at_100 |
|
value: 55.644000000000005 |
|
- type: ndcg_at_1000 |
|
value: 57.354000000000006 |
|
- type: ndcg_at_3 |
|
value: 43.627 |
|
- type: ndcg_at_5 |
|
value: 45.462 |
|
- type: precision_at_1 |
|
value: 43.536 |
|
- type: precision_at_10 |
|
value: 10.552999999999999 |
|
- type: precision_at_100 |
|
value: 1.624 |
|
- type: precision_at_1000 |
|
value: 0.184 |
|
- type: precision_at_3 |
|
value: 24.314 |
|
- type: precision_at_5 |
|
value: 17.299 |
|
- type: recall_at_1 |
|
value: 29.012 |
|
- type: recall_at_10 |
|
value: 59.123000000000005 |
|
- type: recall_at_100 |
|
value: 87.783 |
|
- type: recall_at_1000 |
|
value: 99.078 |
|
- type: recall_at_3 |
|
value: 43.474000000000004 |
|
- type: recall_at_5 |
|
value: 49.557 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/CovidRetrieval |
|
name: MTEB CovidRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 73.235 |
|
- type: map_at_10 |
|
value: 80.87100000000001 |
|
- type: map_at_100 |
|
value: 81.10300000000001 |
|
- type: map_at_1000 |
|
value: 81.105 |
|
- type: map_at_3 |
|
value: 79.171 |
|
- type: map_at_5 |
|
value: 80.163 |
|
- type: mrr_at_1 |
|
value: 73.235 |
|
- type: mrr_at_10 |
|
value: 80.80000000000001 |
|
- type: mrr_at_100 |
|
value: 81.024 |
|
- type: mrr_at_1000 |
|
value: 81.026 |
|
- type: mrr_at_3 |
|
value: 79.15299999999999 |
|
- type: mrr_at_5 |
|
value: 80.133 |
|
- type: ndcg_at_1 |
|
value: 73.34 |
|
- type: ndcg_at_10 |
|
value: 84.387 |
|
- type: ndcg_at_100 |
|
value: 85.348 |
|
- type: ndcg_at_1000 |
|
value: 85.411 |
|
- type: ndcg_at_3 |
|
value: 80.97 |
|
- type: ndcg_at_5 |
|
value: 82.757 |
|
- type: precision_at_1 |
|
value: 73.34 |
|
- type: precision_at_10 |
|
value: 9.631 |
|
- type: precision_at_100 |
|
value: 1.005 |
|
- type: precision_at_1000 |
|
value: 0.101 |
|
- type: precision_at_3 |
|
value: 28.837000000000003 |
|
- type: precision_at_5 |
|
value: 18.209 |
|
- type: recall_at_1 |
|
value: 73.235 |
|
- type: recall_at_10 |
|
value: 95.311 |
|
- type: recall_at_100 |
|
value: 99.473 |
|
- type: recall_at_1000 |
|
value: 100 |
|
- type: recall_at_3 |
|
value: 86.091 |
|
- type: recall_at_5 |
|
value: 90.411 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/DuRetrieval |
|
name: MTEB DuRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 28.059 |
|
- type: map_at_10 |
|
value: 85.309 |
|
- type: map_at_100 |
|
value: 87.926 |
|
- type: map_at_1000 |
|
value: 87.945 |
|
- type: map_at_3 |
|
value: 59.862 |
|
- type: map_at_5 |
|
value: 75.345 |
|
- type: mrr_at_1 |
|
value: 93.30000000000001 |
|
- type: mrr_at_10 |
|
value: 95.624 |
|
- type: mrr_at_100 |
|
value: 95.647 |
|
- type: mrr_at_1000 |
|
value: 95.649 |
|
- type: mrr_at_3 |
|
value: 95.42500000000001 |
|
- type: mrr_at_5 |
|
value: 95.572 |
|
- type: ndcg_at_1 |
|
value: 93.30000000000001 |
|
- type: ndcg_at_10 |
|
value: 91.338 |
|
- type: ndcg_at_100 |
|
value: 93.38 |
|
- type: ndcg_at_1000 |
|
value: 93.57 |
|
- type: ndcg_at_3 |
|
value: 90.512 |
|
- type: ndcg_at_5 |
|
value: 89.617 |
|
- type: precision_at_1 |
|
value: 93.30000000000001 |
|
- type: precision_at_10 |
|
value: 43.169999999999995 |
|
- type: precision_at_100 |
|
value: 4.868 |
|
- type: precision_at_1000 |
|
value: 0.49100000000000005 |
|
- type: precision_at_3 |
|
value: 80.7 |
|
- type: precision_at_5 |
|
value: 68.12 |
|
- type: recall_at_1 |
|
value: 28.059 |
|
- type: recall_at_10 |
|
value: 91.949 |
|
- type: recall_at_100 |
|
value: 98.777 |
|
- type: recall_at_1000 |
|
value: 99.816 |
|
- type: recall_at_3 |
|
value: 61.699000000000005 |
|
- type: recall_at_5 |
|
value: 79.134 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/EcomRetrieval |
|
name: MTEB EcomRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 57.599999999999994 |
|
- type: map_at_10 |
|
value: 67.169 |
|
- type: map_at_100 |
|
value: 67.654 |
|
- type: map_at_1000 |
|
value: 67.663 |
|
- type: map_at_3 |
|
value: 64.833 |
|
- type: map_at_5 |
|
value: 66.298 |
|
- type: mrr_at_1 |
|
value: 57.599999999999994 |
|
- type: mrr_at_10 |
|
value: 67.169 |
|
- type: mrr_at_100 |
|
value: 67.654 |
|
- type: mrr_at_1000 |
|
value: 67.663 |
|
- type: mrr_at_3 |
|
value: 64.833 |
|
- type: mrr_at_5 |
|
value: 66.298 |
|
- type: ndcg_at_1 |
|
value: 57.599999999999994 |
|
- type: ndcg_at_10 |
|
value: 71.95899999999999 |
|
- type: ndcg_at_100 |
|
value: 74.092 |
|
- type: ndcg_at_1000 |
|
value: 74.323 |
|
- type: ndcg_at_3 |
|
value: 67.212 |
|
- type: ndcg_at_5 |
|
value: 69.892 |
|
- type: precision_at_1 |
|
value: 57.599999999999994 |
|
- type: precision_at_10 |
|
value: 8.7 |
|
- type: precision_at_100 |
|
value: 0.9650000000000001 |
|
- type: precision_at_1000 |
|
value: 0.098 |
|
- type: precision_at_3 |
|
value: 24.7 |
|
- type: precision_at_5 |
|
value: 16.14 |
|
- type: recall_at_1 |
|
value: 57.599999999999994 |
|
- type: recall_at_10 |
|
value: 87 |
|
- type: recall_at_100 |
|
value: 96.5 |
|
- type: recall_at_1000 |
|
value: 98.3 |
|
- type: recall_at_3 |
|
value: 74.1 |
|
- type: recall_at_5 |
|
value: 80.7 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/MMarcoRetrieval |
|
name: MTEB MMarcoRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 72.858 |
|
- type: map_at_10 |
|
value: 81.559 |
|
- type: map_at_100 |
|
value: 81.808 |
|
- type: map_at_1000 |
|
value: 81.813 |
|
- type: map_at_3 |
|
value: 80.018 |
|
- type: map_at_5 |
|
value: 81.04299999999999 |
|
- type: mrr_at_1 |
|
value: 75.27199999999999 |
|
- type: mrr_at_10 |
|
value: 81.989 |
|
- type: mrr_at_100 |
|
value: 82.202 |
|
- type: mrr_at_1000 |
|
value: 82.206 |
|
- type: mrr_at_3 |
|
value: 80.647 |
|
- type: mrr_at_5 |
|
value: 81.53399999999999 |
|
- type: ndcg_at_1 |
|
value: 75.27199999999999 |
|
- type: ndcg_at_10 |
|
value: 84.772 |
|
- type: ndcg_at_100 |
|
value: 85.79599999999999 |
|
- type: ndcg_at_1000 |
|
value: 85.925 |
|
- type: ndcg_at_3 |
|
value: 81.884 |
|
- type: ndcg_at_5 |
|
value: 83.60300000000001 |
|
- type: precision_at_1 |
|
value: 75.27199999999999 |
|
- type: precision_at_10 |
|
value: 10.017 |
|
- type: precision_at_100 |
|
value: 1.051 |
|
- type: precision_at_1000 |
|
value: 0.106 |
|
- type: precision_at_3 |
|
value: 30.578 |
|
- type: precision_at_5 |
|
value: 19.261 |
|
- type: recall_at_1 |
|
value: 72.858 |
|
- type: recall_at_10 |
|
value: 94.197 |
|
- type: recall_at_100 |
|
value: 98.634 |
|
- type: recall_at_1000 |
|
value: 99.63499999999999 |
|
- type: recall_at_3 |
|
value: 86.6 |
|
- type: recall_at_5 |
|
value: 90.692 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/MedicalRetrieval |
|
name: MTEB MedicalRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 56.2 |
|
- type: map_at_10 |
|
value: 62.198 |
|
- type: map_at_100 |
|
value: 62.794000000000004 |
|
- type: map_at_1000 |
|
value: 62.829 |
|
- type: map_at_3 |
|
value: 60.699999999999996 |
|
- type: map_at_5 |
|
value: 61.660000000000004 |
|
- type: mrr_at_1 |
|
value: 56.49999999999999 |
|
- type: mrr_at_10 |
|
value: 62.348000000000006 |
|
- type: mrr_at_100 |
|
value: 62.944 |
|
- type: mrr_at_1000 |
|
value: 62.979 |
|
- type: mrr_at_3 |
|
value: 60.85 |
|
- type: mrr_at_5 |
|
value: 61.809999999999995 |
|
- type: ndcg_at_1 |
|
value: 56.2 |
|
- type: ndcg_at_10 |
|
value: 65.19200000000001 |
|
- type: ndcg_at_100 |
|
value: 68.341 |
|
- type: ndcg_at_1000 |
|
value: 69.392 |
|
- type: ndcg_at_3 |
|
value: 62.163999999999994 |
|
- type: ndcg_at_5 |
|
value: 63.894 |
|
- type: precision_at_1 |
|
value: 56.2 |
|
- type: precision_at_10 |
|
value: 7.46 |
|
- type: precision_at_100 |
|
value: 0.899 |
|
- type: precision_at_1000 |
|
value: 0.098 |
|
- type: precision_at_3 |
|
value: 22.133 |
|
- type: precision_at_5 |
|
value: 14.12 |
|
- type: recall_at_1 |
|
value: 56.2 |
|
- type: recall_at_10 |
|
value: 74.6 |
|
- type: recall_at_100 |
|
value: 89.9 |
|
- type: recall_at_1000 |
|
value: 98.4 |
|
- type: recall_at_3 |
|
value: 66.4 |
|
- type: recall_at_5 |
|
value: 70.6 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/T2Retrieval |
|
name: MTEB T2Retrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 28.79 |
|
- type: map_at_10 |
|
value: 81.687 |
|
- type: map_at_100 |
|
value: 85.19200000000001 |
|
- type: map_at_1000 |
|
value: 85.232 |
|
- type: map_at_3 |
|
value: 57.145999999999994 |
|
- type: map_at_5 |
|
value: 70.491 |
|
- type: mrr_at_1 |
|
value: 92.21000000000001 |
|
- type: mrr_at_10 |
|
value: 94.303 |
|
- type: mrr_at_100 |
|
value: 94.368 |
|
- type: mrr_at_1000 |
|
value: 94.37 |
|
- type: mrr_at_3 |
|
value: 93.94500000000001 |
|
- type: mrr_at_5 |
|
value: 94.175 |
|
- type: ndcg_at_1 |
|
value: 92.21000000000001 |
|
- type: ndcg_at_10 |
|
value: 88.29599999999999 |
|
- type: ndcg_at_100 |
|
value: 91.268 |
|
- type: ndcg_at_1000 |
|
value: 91.645 |
|
- type: ndcg_at_3 |
|
value: 89.031 |
|
- type: ndcg_at_5 |
|
value: 88.075 |
|
- type: precision_at_1 |
|
value: 92.21000000000001 |
|
- type: precision_at_10 |
|
value: 43.775 |
|
- type: precision_at_100 |
|
value: 5.097 |
|
- type: precision_at_1000 |
|
value: 0.518 |
|
- type: precision_at_3 |
|
value: 77.708 |
|
- type: precision_at_5 |
|
value: 65.473 |
|
- type: recall_at_1 |
|
value: 28.79 |
|
- type: recall_at_10 |
|
value: 87.457 |
|
- type: recall_at_100 |
|
value: 97.21499999999999 |
|
- type: recall_at_1000 |
|
value: 99.14 |
|
- type: recall_at_3 |
|
value: 58.606 |
|
- type: recall_at_5 |
|
value: 73.52300000000001 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/VideoRetrieval |
|
name: MTEB VideoRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 67 |
|
- type: map_at_10 |
|
value: 75.44999999999999 |
|
- type: map_at_100 |
|
value: 75.771 |
|
- type: map_at_1000 |
|
value: 75.776 |
|
- type: map_at_3 |
|
value: 73.867 |
|
- type: map_at_5 |
|
value: 74.837 |
|
- type: mrr_at_1 |
|
value: 67 |
|
- type: mrr_at_10 |
|
value: 75.44999999999999 |
|
- type: mrr_at_100 |
|
value: 75.771 |
|
- type: mrr_at_1000 |
|
value: 75.776 |
|
- type: mrr_at_3 |
|
value: 73.867 |
|
- type: mrr_at_5 |
|
value: 74.837 |
|
- type: ndcg_at_1 |
|
value: 67 |
|
- type: ndcg_at_10 |
|
value: 79.313 |
|
- type: ndcg_at_100 |
|
value: 80.894 |
|
- type: ndcg_at_1000 |
|
value: 80.989 |
|
- type: ndcg_at_3 |
|
value: 76.08500000000001 |
|
- type: ndcg_at_5 |
|
value: 77.845 |
|
- type: precision_at_1 |
|
value: 67 |
|
- type: precision_at_10 |
|
value: 9.13 |
|
- type: precision_at_100 |
|
value: 0.987 |
|
- type: precision_at_1000 |
|
value: 0.099 |
|
- type: precision_at_3 |
|
value: 27.500000000000004 |
|
- type: precision_at_5 |
|
value: 17.36 |
|
- type: recall_at_1 |
|
value: 67 |
|
- type: recall_at_10 |
|
value: 91.3 |
|
- type: recall_at_100 |
|
value: 98.7 |
|
- type: recall_at_1000 |
|
value: 99.4 |
|
- type: recall_at_3 |
|
value: 82.5 |
|
- type: recall_at_5 |
|
value: 86.8 |
|
license: cc-by-nc-4.0 |
|
language: |
|
- zh |
|
library_name: transformers |
|
--- |
|
|
|
|
|
|
|
## Zhihui_LLM_Embedding |
|
|
|
### Model Introduction |
|
|
|
**Zhihui_LLM_Embedding** is an embedding model specifically designed to enhance Chinese text retrieval capabilities. It is built on a 7B LLM and enhanced bidirectional attention mechanism to improved contextual understanding. The model is trained on an extensive corpus from various fields within an extremely large batch. **Zhihui_LLM_Embedding** excels in retrieval tasks, ranking **1st position** on the C-MTEB leaderboard with a leading performance score of **76.74** as of June 25, 2024. |
|
|
|
### Optimization points |
|
* Data source enhancement: Leverages the knowledge of LLMs through three types of distillation methods.(GPT3.5 & GPT4) |
|
* Data Refinement: LLM scores candidate positive passages to select the most relevant examples. |
|
* Query Rewriting: LLM generates queries that can be answered by positive documents but are unrelated to negatives, thus enhancing the query's quality and diversity. |
|
* Query Expansion: Queries are expanded based on multiple topics for long documents. |
|
* Negative example mining: Use multiple methods and different ranges of negative selection to mine hard negative examples. |
|
* Improved Contrastive Loss: Design a novel InfoNCE loss assigns higher weights to the harder negative examples to improve the fine-grained feature representation of the model. |
|
* Bidirectional-attention: Remove the causal attention of LLMs during contrastive training of decoder-only LLM to produce rich contextualized representations. |
|
* Training efficiency: Using Gradient Cache to scale contrastive learning batches beyond GPU memory constraints allows the model to learn from more challenging negative examples. |
|
* Others: Dataset-Homogenous Batching、cross-batch negative sampling |
|
|
|
### Model Details |
|
* Base Decoder-only LLM: [gte-Qwen2-7B-instruct](https://huggingface.co./Alibaba-NLP/gte-Qwen2-7B-instruct) |
|
* Pooling Methods: Last token |
|
* Embedding Dimension: 3584 |
|
|
|
### Usage |
|
##### Requirements |
|
``` |
|
transformers>=4.40.2 |
|
flash_attn>=2.5.8 |
|
sentence-transformers>=2.7.0 |
|
``` |
|
##### How to use |
|
Here is an example of how to encode queries and passages using Huggingface-transformer and Sentence-transformer. |
|
##### Usage (HuggingFace Transformers) |
|
```python |
|
import torch |
|
import torch.nn.functional as F |
|
|
|
from torch import Tensor |
|
from transformers import AutoTokenizer, AutoModel |
|
|
|
|
|
def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor: |
|
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0]) |
|
if left_padding: |
|
return last_hidden_states[:, -1] |
|
else: |
|
sequence_lengths = attention_mask.sum(dim=1) - 1 |
|
batch_size = last_hidden_states.shape[0] |
|
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths] |
|
|
|
|
|
def get_detailed_instruct(task_description: str, query: str) -> str: |
|
return f'Instruct: {task_description}\nQuery: {query}' |
|
|
|
|
|
|
|
task = 'Given a web search query, retrieve relevant passages that answer the query' |
|
queries = [ |
|
get_detailed_instruct(task, "国家法定节假日共多少天"), |
|
get_detailed_instruct(task, "如何查看好友申请") |
|
] |
|
|
|
documents = [ |
|
"一年国家法定节假日为11天。根据公布的国家法定节假日调整方案,调整的主要内容包括:元旦放假1天不变;春节放假3天,放假时间为农历正月初一、初二、初三;“五一”国际劳动节1天不变;“十一”国庆节放假3天;清明节、端午节、中秋节增设为国家法定节假日,各放假1天(农历节日如遇闰月,以第一个月为休假日)。3、允许周末上移下错,与法定节假日形成连休。", |
|
"这个直接去我的QQ中心不就好了么那里可以查到 我的好友单向好友好友恢复、 以及好友申请 啊可以是你加别人的 或 别人加你的都可以查得到QQ空间里 这个没注意 要有的话也会在你进空间的时候会提示你的QQ 空间里 上面消息 就可以看见了!望采纳!谢谢这个直接去我的QQ中心不就好了么那里可以查到 我的好友单向好友好友恢复、 以及好友申请 啊可以是你加别人的 或 别人加你的都可以查得到", |
|
] |
|
input_texts = queries + documents |
|
|
|
tokenizer = AutoTokenizer.from_pretrained('Lenovo-Zhihui/Zhihui_LLM_Embedding', trust_remote_code=True) |
|
model = AutoModel.from_pretrained('Lenovo-Zhihui/Zhihui_LLM_Embedding', trust_remote_code=True) |
|
|
|
max_length = 512 |
|
|
|
# Tokenize the input texts |
|
batch_dict = tokenizer(input_texts, max_length=max_length, padding=True, truncation=True, return_tensors='pt') |
|
outputs = model(**batch_dict) |
|
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask']) |
|
|
|
# normalize embeddings |
|
embeddings = F.normalize(embeddings, p=2, dim=1) |
|
scores = (embeddings[:2] @ embeddings[2:].T) |
|
print(scores.tolist()) |
|
|
|
``` |
|
##### Usage (Sentence-Transformers) |
|
```python |
|
from sentence_transformers import SentenceTransformer |
|
model = SentenceTransformer("Lenovo-Zhihui/Zhihui_LLM_Embedding", trust_remote_code=True) |
|
model.max_seq_length = 512 |
|
# 数据来源DuRetrieval https://huggingface.co./datasets/C-MTEB/DuRetrieval |
|
queries = [ |
|
"国家法定节假日共多少天", |
|
"如何查看好友申请", |
|
] |
|
documents = [ |
|
"一年国家法定节假日为11天。根据公布的国家法定节假日调整方案,调整的主要内容包括:元旦放假1天不变;春节放假3天,放假时间为农历正月初一、初二、初三;“五一”国际劳动节1天不变;“十一”国庆节放假3天;清明节、端午节、中秋节增设为国家法定节假日,各放假1天(农历节日如遇闰月,以第一个月为休假日)。3、允许周末上移下错,与法定节假日形成连休。", |
|
"这个直接去我的QQ中心不就好了么那里可以查到 我的好友单向好友好友恢复、 以及好友申请 啊可以是你加别人的 或 别人加你的都可以查得到QQ空间里 这个没注意 要有的话也会在你进空间的时候会提示你的QQ 空间里 上面消息 就可以看见了!望采纳!谢谢这个直接去我的QQ中心不就好了么那里可以查到 我的好友单向好友好友恢复、 以及好友申请 啊可以是你加别人的 或 别人加你的都可以查得到", |
|
] |
|
|
|
query_embeddings = model.encode(queries, prompt_name="query", normalize_embeddings=True) |
|
document_embeddings = model.encode(documents, normalize_embeddings=True) |
|
|
|
scores = (query_embeddings @ document_embeddings.T) |
|
print(scores.tolist()) |
|
``` |
|
### Reproduce our results(C-MTEB): |
|
Check out scripts/eval_mteb.py to reproduce evaluation results on C-MTEB benchmark. |
|
|
|
| Model | T2Retrieval | MMarcoRetrieval | DuRetrieval | CovidRetrieval | CmedqaRetrieval | EcomRetrieval | MedicalRetrieval | VideoRetrieval | Avg | |
|
|:-------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:| |
|
|**Zhihui_LLM_Embedding** | 88.30 | 84.77 | 91.34 | 84.39 | 48.69 | 71.96 | 65.19 | 79.31 | **76.74** | |
|
|zpoint_large_embedding_zh | 83.81 | 82.38 | 89.23 | 89.14 | 47.16 | 70.74 | 68.14 | 80.26 | 76.36 | |
|
|gte-Qwen2-7B-instruct | 87.73 | 85.16 | 87.44 | 83.65 | 48.69 | 71.15 | 65.59 | 78.84 | 76.03 | |
|
|360Zhinao-search | 87.12 | 83.32 | 87.57 | 85.02 | 46.73 | 68.9 | 63.69 | 78.09 | 75.06 | |
|
|AGE_Hybrid | 86.88 | 80.65 | 89.28 | 83.66 | 47.26 | 69.28 | 65.94 | 76.79 | 74.97 | |