|
--- |
|
language: |
|
- af |
|
- ar |
|
- az |
|
- be |
|
- bg |
|
- bn |
|
- ca |
|
- ceb |
|
- cs |
|
- cy |
|
- da |
|
- de |
|
- el |
|
- en |
|
- es |
|
- et |
|
- eu |
|
- fa |
|
- fi |
|
- fr |
|
- gl |
|
- gu |
|
- he |
|
- hi |
|
- hr |
|
- ht |
|
- hu |
|
- hy |
|
- id |
|
- is |
|
- it |
|
- ja |
|
- jv |
|
- ka |
|
- kk |
|
- km |
|
- kn |
|
- ko |
|
- ky |
|
- lo |
|
- lt |
|
- lv |
|
- mk |
|
- ml |
|
- mn |
|
- mr |
|
- ms |
|
- my |
|
- ne |
|
- nl |
|
- 'no' |
|
- pa |
|
- pl |
|
- pt |
|
- qu |
|
- ro |
|
- ru |
|
- si |
|
- sk |
|
- sl |
|
- so |
|
- sq |
|
- sr |
|
- sv |
|
- sw |
|
- ta |
|
- te |
|
- th |
|
- tl |
|
- tr |
|
- uk |
|
- ur |
|
- vi |
|
- yo |
|
- zh |
|
license: apache-2.0 |
|
model-index: |
|
- name: gte-multilingual-base (dense) |
|
results: |
|
- dataset: |
|
config: default |
|
name: MTEB 8TagsClustering |
|
revision: None |
|
split: test |
|
type: PL-MTEB/8tags-clustering |
|
metrics: |
|
- type: v_measure |
|
value: 33.66681726329994 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB AFQMC |
|
revision: b44c3b011063adb25877c13823db83bb193913c4 |
|
split: validation |
|
type: C-MTEB/AFQMC |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 43.54760696384009 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB ATEC |
|
revision: 0f319b1142f28d00e055a6770f3f726ae9b7d865 |
|
split: test |
|
type: C-MTEB/ATEC |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 48.91186363417501 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB AllegroReviews |
|
revision: None |
|
split: test |
|
type: PL-MTEB/allegro-reviews |
|
metrics: |
|
- type: accuracy |
|
value: 41.689860834990064 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB AlloProfClusteringP2P |
|
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b |
|
split: test |
|
type: lyon-nlp/alloprof |
|
metrics: |
|
- type: v_measure |
|
value: 54.20241337977897 |
|
- type: v_measure |
|
value: 44.34083695608643 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB AlloprofReranking |
|
revision: 666fdacebe0291776e86f29345663dfaf80a0db9 |
|
split: test |
|
type: lyon-nlp/mteb-fr-reranking-alloprof-s2p |
|
metrics: |
|
- type: map |
|
value: 64.91495250072002 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB AlloprofRetrieval |
|
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b |
|
split: test |
|
type: lyon-nlp/alloprof |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 53.638 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: en |
|
name: MTEB AmazonCounterfactualClassification (en) |
|
revision: e8379541af4e31359cca9fbcf4b00f2671dba205 |
|
split: test |
|
type: mteb/amazon_counterfactual |
|
metrics: |
|
- type: accuracy |
|
value: 75.95522388059702 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB AmazonPolarityClassification |
|
revision: e2d317d38cd51312af73b3d32a06d1a08b442046 |
|
split: test |
|
type: mteb/amazon_polarity |
|
metrics: |
|
- type: accuracy |
|
value: 80.717625 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: en |
|
name: MTEB AmazonReviewsClassification (en) |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
split: test |
|
type: mteb/amazon_reviews_multi |
|
metrics: |
|
- type: accuracy |
|
value: 43.64199999999999 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: de |
|
name: MTEB AmazonReviewsClassification (de) |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
split: test |
|
type: mteb/amazon_reviews_multi |
|
metrics: |
|
- type: accuracy |
|
value: 40.108 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: es |
|
name: MTEB AmazonReviewsClassification (es) |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
split: test |
|
type: mteb/amazon_reviews_multi |
|
metrics: |
|
- type: accuracy |
|
value: 40.169999999999995 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fr |
|
name: MTEB AmazonReviewsClassification (fr) |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
split: test |
|
type: mteb/amazon_reviews_multi |
|
metrics: |
|
- type: accuracy |
|
value: 39.56799999999999 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ja |
|
name: MTEB AmazonReviewsClassification (ja) |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
split: test |
|
type: mteb/amazon_reviews_multi |
|
metrics: |
|
- type: accuracy |
|
value: 35.75000000000001 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: zh |
|
name: MTEB AmazonReviewsClassification (zh) |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
split: test |
|
type: mteb/amazon_reviews_multi |
|
metrics: |
|
- type: accuracy |
|
value: 33.342000000000006 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB ArguAna |
|
revision: c22ab2a51041ffd869aaddef7af8d8215647e41a |
|
split: test |
|
type: mteb/arguana |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 58.231 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB ArguAna-PL |
|
revision: 63fc86750af76253e8c760fc9e534bbf24d260a2 |
|
split: test |
|
type: clarin-knext/arguana-pl |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 53.166000000000004 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB ArxivClusteringP2P |
|
revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d |
|
split: test |
|
type: mteb/arxiv-clustering-p2p |
|
metrics: |
|
- type: v_measure |
|
value: 46.01900557959478 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB ArxivClusteringS2S |
|
revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53 |
|
split: test |
|
type: mteb/arxiv-clustering-s2s |
|
metrics: |
|
- type: v_measure |
|
value: 41.06626465345723 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB AskUbuntuDupQuestions |
|
revision: 2000358ca161889fa9c082cb41daa8dcfb161a54 |
|
split: test |
|
type: mteb/askubuntudupquestions-reranking |
|
metrics: |
|
- type: map |
|
value: 61.87514497610431 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB BIOSSES |
|
revision: d3fb88f8f02e40887cd149695127462bbcf29b4a |
|
split: test |
|
type: mteb/biosses-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 81.21450112991194 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB BQ |
|
revision: e3dda5e115e487b39ec7e618c0c6a29137052a55 |
|
split: test |
|
type: C-MTEB/BQ |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 51.71589543397271 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB BSARDRetrieval |
|
revision: 5effa1b9b5fa3b0f9e12523e6e43e5f86a6e6d59 |
|
split: test |
|
type: maastrichtlawtech/bsard |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 26.115 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: de-en |
|
name: MTEB BUCC (de-en) |
|
revision: d51519689f32196a32af33b075a01d0e7c51e252 |
|
split: test |
|
type: mteb/bucc-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 98.6169102296451 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: fr-en |
|
name: MTEB BUCC (fr-en) |
|
revision: d51519689f32196a32af33b075a01d0e7c51e252 |
|
split: test |
|
type: mteb/bucc-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 97.89603052314916 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ru-en |
|
name: MTEB BUCC (ru-en) |
|
revision: d51519689f32196a32af33b075a01d0e7c51e252 |
|
split: test |
|
type: mteb/bucc-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 97.12388869645537 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: zh-en |
|
name: MTEB BUCC (zh-en) |
|
revision: d51519689f32196a32af33b075a01d0e7c51e252 |
|
split: test |
|
type: mteb/bucc-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 98.15692469720906 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: default |
|
name: MTEB Banking77Classification |
|
revision: 0fd18e25b25c072e09e0d92ab615fda904d66300 |
|
split: test |
|
type: mteb/banking77 |
|
metrics: |
|
- type: accuracy |
|
value: 85.36038961038962 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB BiorxivClusteringP2P |
|
revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40 |
|
split: test |
|
type: mteb/biorxiv-clustering-p2p |
|
metrics: |
|
- type: v_measure |
|
value: 37.5903826674123 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB BiorxivClusteringS2S |
|
revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908 |
|
split: test |
|
type: mteb/biorxiv-clustering-s2s |
|
metrics: |
|
- type: v_measure |
|
value: 34.21474277151329 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB CBD |
|
revision: None |
|
split: test |
|
type: PL-MTEB/cbd |
|
metrics: |
|
- type: accuracy |
|
value: 62.519999999999996 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB CDSC-E |
|
revision: None |
|
split: test |
|
type: PL-MTEB/cdsce-pairclassification |
|
metrics: |
|
- type: cos_sim_ap |
|
value: 74.90132799162956 |
|
task: |
|
type: PairClassification |
|
- dataset: |
|
config: default |
|
name: MTEB CDSC-R |
|
revision: None |
|
split: test |
|
type: PL-MTEB/cdscr-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 90.30727955142524 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB CLSClusteringP2P |
|
revision: 4b6227591c6c1a73bc76b1055f3b7f3588e72476 |
|
split: test |
|
type: C-MTEB/CLSClusteringP2P |
|
metrics: |
|
- type: v_measure |
|
value: 37.94850105022274 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB CLSClusteringS2S |
|
revision: e458b3f5414b62b7f9f83499ac1f5497ae2e869f |
|
split: test |
|
type: C-MTEB/CLSClusteringS2S |
|
metrics: |
|
- type: v_measure |
|
value: 38.11958675421534 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB CMedQAv1 |
|
revision: 8d7f1e942507dac42dc58017c1a001c3717da7df |
|
split: test |
|
type: C-MTEB/CMedQAv1-reranking |
|
metrics: |
|
- type: map |
|
value: 86.10950950485399 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB CMedQAv2 |
|
revision: 23d186750531a14a0357ca22cd92d712fd512ea0 |
|
split: test |
|
type: C-MTEB/CMedQAv2-reranking |
|
metrics: |
|
- type: map |
|
value: 87.28038294231966 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackAndroidRetrieval |
|
revision: f46a197baaae43b4f621051089b82a364682dfeb |
|
split: test |
|
type: mteb/cqadupstack-android |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 47.099000000000004 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackEnglishRetrieval |
|
revision: ad9991cb51e31e31e430383c75ffb2885547b5f0 |
|
split: test |
|
type: mteb/cqadupstack-english |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 45.973000000000006 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackGamingRetrieval |
|
revision: 4885aa143210c98657558c04aaf3dc47cfb54340 |
|
split: test |
|
type: mteb/cqadupstack-gaming |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 55.606 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackGisRetrieval |
|
revision: 5003b3064772da1887988e05400cf3806fe491f2 |
|
split: test |
|
type: mteb/cqadupstack-gis |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 36.638 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackMathematicaRetrieval |
|
revision: 90fceea13679c63fe563ded68f3b6f06e50061de |
|
split: test |
|
type: mteb/cqadupstack-mathematica |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 30.711 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackPhysicsRetrieval |
|
revision: 79531abbd1fb92d06c6d6315a0cbbbf5bb247ea4 |
|
split: test |
|
type: mteb/cqadupstack-physics |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 44.523 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackProgrammersRetrieval |
|
revision: 6184bc1440d2dbc7612be22b50686b8826d22b32 |
|
split: test |
|
type: mteb/cqadupstack-programmers |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 37.940000000000005 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackRetrieval |
|
revision: 4ffe81d471b1924886b33c7567bfb200e9eec5c4 |
|
split: test |
|
type: mteb/cqadupstack |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 38.12183333333333 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackStatsRetrieval |
|
revision: 65ac3a16b8e91f9cee4c9828cc7c335575432a2a |
|
split: test |
|
type: mteb/cqadupstack-stats |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 32.684000000000005 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackTexRetrieval |
|
revision: 46989137a86843e03a6195de44b09deda022eec7 |
|
split: test |
|
type: mteb/cqadupstack-tex |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 26.735 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackUnixRetrieval |
|
revision: 6c6430d3a6d36f8d2a829195bc5dc94d7e063e53 |
|
split: test |
|
type: mteb/cqadupstack-unix |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 36.933 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackWebmastersRetrieval |
|
revision: 160c094312a0e1facb97e55eeddb698c0abe3571 |
|
split: test |
|
type: mteb/cqadupstack-webmasters |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 33.747 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CQADupstackWordpressRetrieval |
|
revision: 4ffe81d471b1924886b33c7567bfb200e9eec5c4 |
|
split: test |
|
type: mteb/cqadupstack-wordpress |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 28.872999999999998 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB ClimateFEVER |
|
revision: 47f2ac6acb640fc46020b02a5b59fdda04d39380 |
|
split: test |
|
type: mteb/climate-fever |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 34.833 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CmedqaRetrieval |
|
revision: cd540c506dae1cf9e9a59c3e06f42030d54e7301 |
|
split: dev |
|
type: C-MTEB/CmedqaRetrieval |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 43.78 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB Cmnli |
|
revision: 41bc36f332156f7adc9e38f53777c959b2ae9766 |
|
split: validation |
|
type: C-MTEB/CMNLI |
|
metrics: |
|
- type: cos_sim_ap |
|
value: 84.00640599186677 |
|
task: |
|
type: PairClassification |
|
- dataset: |
|
config: default |
|
name: MTEB CovidRetrieval |
|
revision: 1271c7809071a13532e05f25fb53511ffce77117 |
|
split: dev |
|
type: C-MTEB/CovidRetrieval |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 80.60000000000001 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB DBPedia |
|
revision: c0f706b76e590d620bd6618b3ca8efdd34e2d659 |
|
split: test |
|
type: mteb/dbpedia |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 40.116 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB DBPedia-PL |
|
revision: 76afe41d9af165cc40999fcaa92312b8b012064a |
|
split: test |
|
type: clarin-knext/dbpedia-pl |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 32.498 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB DuRetrieval |
|
revision: a1a333e290fe30b10f3f56498e3a0d911a693ced |
|
split: dev |
|
type: C-MTEB/DuRetrieval |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 87.547 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB EcomRetrieval |
|
revision: 687de13dc7294d6fd9be10c6945f9e8fec8166b9 |
|
split: dev |
|
type: C-MTEB/EcomRetrieval |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 64.85 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB EmotionClassification |
|
revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37 |
|
split: test |
|
type: mteb/emotion |
|
metrics: |
|
- type: accuracy |
|
value: 47.949999999999996 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB FEVER |
|
revision: bea83ef9e8fb933d90a2f1d5515737465d613e12 |
|
split: test |
|
type: mteb/fever |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 92.111 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB FiQA-PL |
|
revision: 2e535829717f8bf9dc829b7f911cc5bbd4e6608e |
|
split: test |
|
type: clarin-knext/fiqa-pl |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 28.962 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB FiQA2018 |
|
revision: 27a168819829fe9bcd655c2df245fb19452e8e06 |
|
split: test |
|
type: mteb/fiqa |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 45.005 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB HALClusteringS2S |
|
revision: e06ebbbb123f8144bef1a5d18796f3dec9ae2915 |
|
split: test |
|
type: lyon-nlp/clustering-hal-s2s |
|
metrics: |
|
- type: v_measure |
|
value: 25.133776435657595 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB HotpotQA |
|
revision: ab518f4d6fcca38d87c25209f94beba119d02014 |
|
split: test |
|
type: mteb/hotpotqa |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 63.036 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB HotpotQA-PL |
|
revision: a0bd479ac97b4ccb5bd6ce320c415d0bb4beb907 |
|
split: test |
|
type: clarin-knext/hotpotqa-pl |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 56.904999999999994 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB IFlyTek |
|
revision: 421605374b29664c5fc098418fe20ada9bd55f8a |
|
split: validation |
|
type: C-MTEB/IFlyTek-classification |
|
metrics: |
|
- type: accuracy |
|
value: 44.59407464409388 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB ImdbClassification |
|
revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7 |
|
split: test |
|
type: mteb/imdb |
|
metrics: |
|
- type: accuracy |
|
value: 74.912 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB JDReview |
|
revision: b7c64bd89eb87f8ded463478346f76731f07bf8b |
|
split: test |
|
type: C-MTEB/JDReview-classification |
|
metrics: |
|
- type: accuracy |
|
value: 79.26829268292683 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB LCQMC |
|
revision: 17f9b096f80380fce5ed12a9be8be7784b337daf |
|
split: test |
|
type: C-MTEB/LCQMC |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 74.8601229809791 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB MLSUMClusteringP2P |
|
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 |
|
split: test |
|
type: mlsum |
|
metrics: |
|
- type: v_measure |
|
value: 42.331902754246556 |
|
- type: v_measure |
|
value: 40.92029335502153 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB MMarcoReranking |
|
revision: 8e0c766dbe9e16e1d221116a3f36795fbade07f6 |
|
split: dev |
|
type: C-MTEB/Mmarco-reranking |
|
metrics: |
|
- type: map |
|
value: 32.19266316591337 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB MMarcoRetrieval |
|
revision: 539bbde593d947e2a124ba72651aafc09eb33fc2 |
|
split: dev |
|
type: C-MTEB/MMarcoRetrieval |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 79.346 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB MSMARCO |
|
revision: c5a29a104738b98a9e76336939199e264163d4a0 |
|
split: dev |
|
type: mteb/msmarco |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 39.922999999999995 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB MSMARCO-PL |
|
revision: 8634c07806d5cce3a6138e260e59b81760a0a640 |
|
split: test |
|
type: clarin-knext/msmarco-pl |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 55.620999999999995 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: en |
|
name: MTEB MTOPDomainClassification (en) |
|
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
|
split: test |
|
type: mteb/mtop_domain |
|
metrics: |
|
- type: accuracy |
|
value: 92.53989968080255 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: de |
|
name: MTEB MTOPDomainClassification (de) |
|
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
|
split: test |
|
type: mteb/mtop_domain |
|
metrics: |
|
- type: accuracy |
|
value: 88.26993519301212 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: es |
|
name: MTEB MTOPDomainClassification (es) |
|
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
|
split: test |
|
type: mteb/mtop_domain |
|
metrics: |
|
- type: accuracy |
|
value: 90.87725150100067 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fr |
|
name: MTEB MTOPDomainClassification (fr) |
|
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
|
split: test |
|
type: mteb/mtop_domain |
|
metrics: |
|
- type: accuracy |
|
value: 87.48512370811149 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: hi |
|
name: MTEB MTOPDomainClassification (hi) |
|
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
|
split: test |
|
type: mteb/mtop_domain |
|
metrics: |
|
- type: accuracy |
|
value: 89.45141627823591 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: th |
|
name: MTEB MTOPDomainClassification (th) |
|
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
|
split: test |
|
type: mteb/mtop_domain |
|
metrics: |
|
- type: accuracy |
|
value: 83.45750452079565 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: en |
|
name: MTEB MTOPIntentClassification (en) |
|
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
|
split: test |
|
type: mteb/mtop_intent |
|
metrics: |
|
- type: accuracy |
|
value: 72.57637938896488 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: de |
|
name: MTEB MTOPIntentClassification (de) |
|
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
|
split: test |
|
type: mteb/mtop_intent |
|
metrics: |
|
- type: accuracy |
|
value: 63.50803043110736 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: es |
|
name: MTEB MTOPIntentClassification (es) |
|
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
|
split: test |
|
type: mteb/mtop_intent |
|
metrics: |
|
- type: accuracy |
|
value: 71.6577718478986 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fr |
|
name: MTEB MTOPIntentClassification (fr) |
|
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
|
split: test |
|
type: mteb/mtop_intent |
|
metrics: |
|
- type: accuracy |
|
value: 64.05887879736925 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: hi |
|
name: MTEB MTOPIntentClassification (hi) |
|
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
|
split: test |
|
type: mteb/mtop_intent |
|
metrics: |
|
- type: accuracy |
|
value: 65.27070634636071 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: th |
|
name: MTEB MTOPIntentClassification (th) |
|
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
|
split: test |
|
type: mteb/mtop_intent |
|
metrics: |
|
- type: accuracy |
|
value: 63.04520795660037 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fra |
|
name: MTEB MasakhaNEWSClassification (fra) |
|
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 |
|
split: test |
|
type: masakhane/masakhanews |
|
metrics: |
|
- type: accuracy |
|
value: 80.66350710900474 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fra |
|
name: MTEB MasakhaNEWSClusteringP2P (fra) |
|
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 |
|
split: test |
|
type: masakhane/masakhanews |
|
metrics: |
|
- type: v_measure |
|
value: 44.016506455899425 |
|
- type: v_measure |
|
value: 40.67730129573544 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: af |
|
name: MTEB MassiveIntentClassification (af) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 57.94552790854068 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: am |
|
name: MTEB MassiveIntentClassification (am) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 49.273705447209146 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ar |
|
name: MTEB MassiveIntentClassification (ar) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 55.490921318090116 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: az |
|
name: MTEB MassiveIntentClassification (az) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 60.97511768661733 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: bn |
|
name: MTEB MassiveIntentClassification (bn) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 57.5689307330195 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: cy |
|
name: MTEB MassiveIntentClassification (cy) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 48.34902488231337 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: da |
|
name: MTEB MassiveIntentClassification (da) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 63.6684599865501 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: de |
|
name: MTEB MassiveIntentClassification (de) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 62.54539340954942 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: el |
|
name: MTEB MassiveIntentClassification (el) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 63.08675184936112 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: en |
|
name: MTEB MassiveIntentClassification (en) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 72.12508406186953 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: es |
|
name: MTEB MassiveIntentClassification (es) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 67.41425689307331 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fa |
|
name: MTEB MassiveIntentClassification (fa) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 65.59515803631474 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fi |
|
name: MTEB MassiveIntentClassification (fi) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 62.90517821116342 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fr |
|
name: MTEB MassiveIntentClassification (fr) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 67.91526563550774 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: he |
|
name: MTEB MassiveIntentClassification (he) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 55.198386012104905 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: hi |
|
name: MTEB MassiveIntentClassification (hi) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 65.04371217215869 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: hu |
|
name: MTEB MassiveIntentClassification (hu) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 63.31203765971756 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: hy |
|
name: MTEB MassiveIntentClassification (hy) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 55.521183591123055 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: id |
|
name: MTEB MassiveIntentClassification (id) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 66.06254203093476 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: is |
|
name: MTEB MassiveIntentClassification (is) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 56.01546738399461 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: it |
|
name: MTEB MassiveIntentClassification (it) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 67.27975790181574 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ja |
|
name: MTEB MassiveIntentClassification (ja) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 66.79556153328849 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: jv |
|
name: MTEB MassiveIntentClassification (jv) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 50.18493611297915 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ka |
|
name: MTEB MassiveIntentClassification (ka) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 47.888365837256224 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: km |
|
name: MTEB MassiveIntentClassification (km) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 50.79690652320108 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: kn |
|
name: MTEB MassiveIntentClassification (kn) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 57.225958305312716 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ko |
|
name: MTEB MassiveIntentClassification (ko) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 64.58641560188299 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: lv |
|
name: MTEB MassiveIntentClassification (lv) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 59.08204438466711 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ml |
|
name: MTEB MassiveIntentClassification (ml) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 59.54606590450572 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: mn |
|
name: MTEB MassiveIntentClassification (mn) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 53.443174176193665 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ms |
|
name: MTEB MassiveIntentClassification (ms) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 61.65097511768661 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: my |
|
name: MTEB MassiveIntentClassification (my) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 53.45662407531944 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: nb |
|
name: MTEB MassiveIntentClassification (nb) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 63.739071956960316 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: nl |
|
name: MTEB MassiveIntentClassification (nl) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 66.36180228648286 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: pl |
|
name: MTEB MassiveIntentClassification (pl) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 66.3920645595158 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: pt |
|
name: MTEB MassiveIntentClassification (pt) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 68.06993947545395 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ro |
|
name: MTEB MassiveIntentClassification (ro) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 63.123739071956955 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ru |
|
name: MTEB MassiveIntentClassification (ru) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 67.46133154001346 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: sl |
|
name: MTEB MassiveIntentClassification (sl) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 60.54472091459314 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: sq |
|
name: MTEB MassiveIntentClassification (sq) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 58.204438466711494 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: sv |
|
name: MTEB MassiveIntentClassification (sv) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 65.69603227975792 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: sw |
|
name: MTEB MassiveIntentClassification (sw) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 51.684599865501 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ta |
|
name: MTEB MassiveIntentClassification (ta) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 58.523873570948226 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: te |
|
name: MTEB MassiveIntentClassification (te) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 58.53396099529253 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: th |
|
name: MTEB MassiveIntentClassification (th) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 61.88298587760591 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: tl |
|
name: MTEB MassiveIntentClassification (tl) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 56.65097511768662 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: tr |
|
name: MTEB MassiveIntentClassification (tr) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 64.8453261600538 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ur |
|
name: MTEB MassiveIntentClassification (ur) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 58.6247478143914 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: vi |
|
name: MTEB MassiveIntentClassification (vi) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 64.16274377942166 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: zh-CN |
|
name: MTEB MassiveIntentClassification (zh-CN) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 69.61667787491594 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: zh-TW |
|
name: MTEB MassiveIntentClassification (zh-TW) |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 64.17283120376598 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: af |
|
name: MTEB MassiveScenarioClassification (af) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 64.89912575655683 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: am |
|
name: MTEB MassiveScenarioClassification (am) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 57.27975790181573 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ar |
|
name: MTEB MassiveScenarioClassification (ar) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 62.269670477471415 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: az |
|
name: MTEB MassiveScenarioClassification (az) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 65.10423671822461 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: bn |
|
name: MTEB MassiveScenarioClassification (bn) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 62.40753194351043 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: cy |
|
name: MTEB MassiveScenarioClassification (cy) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 55.369872225958304 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: da |
|
name: MTEB MassiveScenarioClassification (da) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 71.60726294552792 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: de |
|
name: MTEB MassiveScenarioClassification (de) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 70.30262273032952 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: el |
|
name: MTEB MassiveScenarioClassification (el) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 69.52925353059851 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: en |
|
name: MTEB MassiveScenarioClassification (en) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 76.28446536650976 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: es |
|
name: MTEB MassiveScenarioClassification (es) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 72.45460659045058 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fa |
|
name: MTEB MassiveScenarioClassification (fa) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 70.26563550773368 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fi |
|
name: MTEB MassiveScenarioClassification (fi) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 67.20578345662408 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fr |
|
name: MTEB MassiveScenarioClassification (fr) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 72.64963012777405 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: he |
|
name: MTEB MassiveScenarioClassification (he) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 61.698049764626774 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: hi |
|
name: MTEB MassiveScenarioClassification (hi) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 70.14458641560188 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: hu |
|
name: MTEB MassiveScenarioClassification (hu) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 70.51445864156018 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: hy |
|
name: MTEB MassiveScenarioClassification (hy) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 60.13786146603901 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: id |
|
name: MTEB MassiveScenarioClassification (id) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 70.61533288500337 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: is |
|
name: MTEB MassiveScenarioClassification (is) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 61.526563550773375 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: it |
|
name: MTEB MassiveScenarioClassification (it) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 71.99731002017484 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ja |
|
name: MTEB MassiveScenarioClassification (ja) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 71.59381304640216 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: jv |
|
name: MTEB MassiveScenarioClassification (jv) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 57.010759919300604 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ka |
|
name: MTEB MassiveScenarioClassification (ka) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 53.26160053799597 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: km |
|
name: MTEB MassiveScenarioClassification (km) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 57.800941492938804 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: kn |
|
name: MTEB MassiveScenarioClassification (kn) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 62.387357094821795 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ko |
|
name: MTEB MassiveScenarioClassification (ko) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 69.5359784801614 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: lv |
|
name: MTEB MassiveScenarioClassification (lv) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 63.36919973100203 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ml |
|
name: MTEB MassiveScenarioClassification (ml) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 64.81506388702084 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: mn |
|
name: MTEB MassiveScenarioClassification (mn) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 59.35104236718225 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ms |
|
name: MTEB MassiveScenarioClassification (ms) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 66.67787491593813 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: my |
|
name: MTEB MassiveScenarioClassification (my) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 59.4250168123739 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: nb |
|
name: MTEB MassiveScenarioClassification (nb) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 71.49630127774043 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: nl |
|
name: MTEB MassiveScenarioClassification (nl) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 71.95696032279758 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: pl |
|
name: MTEB MassiveScenarioClassification (pl) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 70.11768661735036 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: pt |
|
name: MTEB MassiveScenarioClassification (pt) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 71.86953597848016 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ro |
|
name: MTEB MassiveScenarioClassification (ro) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 68.51042367182247 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ru |
|
name: MTEB MassiveScenarioClassification (ru) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 71.65097511768661 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: sl |
|
name: MTEB MassiveScenarioClassification (sl) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 66.81573638197713 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: sq |
|
name: MTEB MassiveScenarioClassification (sq) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 65.26227303295225 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: sv |
|
name: MTEB MassiveScenarioClassification (sv) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 72.51513113651646 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: sw |
|
name: MTEB MassiveScenarioClassification (sw) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 58.29858776059179 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ta |
|
name: MTEB MassiveScenarioClassification (ta) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 62.72696704774714 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: te |
|
name: MTEB MassiveScenarioClassification (te) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 66.57700067249496 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: th |
|
name: MTEB MassiveScenarioClassification (th) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 68.22797579018157 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: tl |
|
name: MTEB MassiveScenarioClassification (tl) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 61.97041022192333 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: tr |
|
name: MTEB MassiveScenarioClassification (tr) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 70.72629455279085 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ur |
|
name: MTEB MassiveScenarioClassification (ur) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 63.16072629455278 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: vi |
|
name: MTEB MassiveScenarioClassification (vi) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 67.92199058507062 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: zh-CN |
|
name: MTEB MassiveScenarioClassification (zh-CN) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 74.40484196368527 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: zh-TW |
|
name: MTEB MassiveScenarioClassification (zh-TW) |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 71.61398789509079 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB MedicalRetrieval |
|
revision: 2039188fb5800a9803ba5048df7b76e6fb151fc6 |
|
split: dev |
|
type: C-MTEB/MedicalRetrieval |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 61.934999999999995 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB MedrxivClusteringP2P |
|
revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73 |
|
split: test |
|
type: mteb/medrxiv-clustering-p2p |
|
metrics: |
|
- type: v_measure |
|
value: 33.052031054565205 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB MedrxivClusteringS2S |
|
revision: 35191c8c0dca72d8ff3efcd72aa802307d469663 |
|
split: test |
|
type: mteb/medrxiv-clustering-s2s |
|
metrics: |
|
- type: v_measure |
|
value: 31.969909524076794 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB MindSmallReranking |
|
revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69 |
|
split: test |
|
type: mteb/mind_small |
|
metrics: |
|
- type: map |
|
value: 31.7530992892652 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: fr |
|
name: MTEB MintakaRetrieval (fr) |
|
revision: efa78cc2f74bbcd21eff2261f9e13aebe40b814e |
|
split: test |
|
type: jinaai/mintakaqa |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 34.705999999999996 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: ar |
|
name: MTEB MultiLongDocRetrieval (ar) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 55.166000000000004 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: de |
|
name: MTEB MultiLongDocRetrieval (de) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 55.155 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: en |
|
name: MTEB MultiLongDocRetrieval (en) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 50.993 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: es |
|
name: MTEB MultiLongDocRetrieval (es) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 81.228 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: fr |
|
name: MTEB MultiLongDocRetrieval (fr) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 76.19 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: hi |
|
name: MTEB MultiLongDocRetrieval (hi) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 45.206 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: it |
|
name: MTEB MultiLongDocRetrieval (it) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 66.741 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: ja |
|
name: MTEB MultiLongDocRetrieval (ja) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 52.111 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: ko |
|
name: MTEB MultiLongDocRetrieval (ko) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 46.733000000000004 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: pt |
|
name: MTEB MultiLongDocRetrieval (pt) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 79.105 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: ru |
|
name: MTEB MultiLongDocRetrieval (ru) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 64.21 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: th |
|
name: MTEB MultiLongDocRetrieval (th) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 35.467 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: zh |
|
name: MTEB MultiLongDocRetrieval (zh) |
|
revision: None |
|
split: test |
|
type: Shitao/MLDR |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 27.419 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB MultilingualSentiment |
|
revision: 46958b007a63fdbf239b7672c25d0bea67b5ea1a |
|
split: validation |
|
type: C-MTEB/MultilingualSentiment-classification |
|
metrics: |
|
- type: accuracy |
|
value: 61.02000000000001 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB NFCorpus |
|
revision: ec0fa4fe99da2ff19ca1214b7966684033a58814 |
|
split: test |
|
type: mteb/nfcorpus |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 36.65 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB NFCorpus-PL |
|
revision: 9a6f9567fda928260afed2de480d79c98bf0bec0 |
|
split: test |
|
type: clarin-knext/nfcorpus-pl |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 26.831 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB NQ |
|
revision: b774495ed302d8c44a3a7ea25c90dbce03968f31 |
|
split: test |
|
type: mteb/nq |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 58.111000000000004 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB NQ-PL |
|
revision: f171245712cf85dd4700b06bef18001578d0ca8d |
|
split: test |
|
type: clarin-knext/nq-pl |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 43.126999999999995 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB Ocnli |
|
revision: 66e76a618a34d6d565d5538088562851e6daa7ec |
|
split: validation |
|
type: C-MTEB/OCNLI |
|
metrics: |
|
- type: cos_sim_ap |
|
value: 72.67630697316041 |
|
task: |
|
type: PairClassification |
|
- dataset: |
|
config: default |
|
name: MTEB OnlineShopping |
|
revision: e610f2ebd179a8fda30ae534c3878750a96db120 |
|
split: test |
|
type: C-MTEB/OnlineShopping-classification |
|
metrics: |
|
- type: accuracy |
|
value: 84.85000000000001 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fr |
|
name: MTEB OpusparcusPC (fr) |
|
revision: 9e9b1f8ef51616073f47f306f7f47dd91663f86a |
|
split: test |
|
type: GEM/opusparcus |
|
metrics: |
|
- type: cos_sim_ap |
|
value: 100 |
|
task: |
|
type: PairClassification |
|
- dataset: |
|
config: default |
|
name: MTEB PAC |
|
revision: None |
|
split: test |
|
type: laugustyniak/abusive-clauses-pl |
|
metrics: |
|
- type: accuracy |
|
value: 65.99189110918043 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB PAWSX |
|
revision: 9c6a90e430ac22b5779fb019a23e820b11a8b5e1 |
|
split: test |
|
type: C-MTEB/PAWSX |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 16.124364530596228 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB PPC |
|
revision: None |
|
split: test |
|
type: PL-MTEB/ppc-pairclassification |
|
metrics: |
|
- type: cos_sim_ap |
|
value: 92.43431057460192 |
|
task: |
|
type: PairClassification |
|
- dataset: |
|
config: default |
|
name: MTEB PSC |
|
revision: None |
|
split: test |
|
type: PL-MTEB/psc-pairclassification |
|
metrics: |
|
- type: cos_sim_ap |
|
value: 99.06090138049724 |
|
task: |
|
type: PairClassification |
|
- dataset: |
|
config: fr |
|
name: MTEB PawsX (fr) |
|
revision: 8a04d940a42cd40658986fdd8e3da561533a3646 |
|
split: test |
|
type: paws-x |
|
metrics: |
|
- type: cos_sim_ap |
|
value: 58.9314954874314 |
|
task: |
|
type: PairClassification |
|
- dataset: |
|
config: default |
|
name: MTEB PolEmo2.0-IN |
|
revision: None |
|
split: test |
|
type: PL-MTEB/polemo2_in |
|
metrics: |
|
- type: accuracy |
|
value: 69.59833795013851 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB PolEmo2.0-OUT |
|
revision: None |
|
split: test |
|
type: PL-MTEB/polemo2_out |
|
metrics: |
|
- type: accuracy |
|
value: 44.73684210526315 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB QBQTC |
|
revision: 790b0510dc52b1553e8c49f3d2afb48c0e5c48b7 |
|
split: test |
|
type: C-MTEB/QBQTC |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 39.36450754137984 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB Quora-PL |
|
revision: 0be27e93455051e531182b85e85e425aba12e9d4 |
|
split: test |
|
type: clarin-knext/quora-pl |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 80.76299999999999 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB QuoraRetrieval |
|
revision: None |
|
split: test |
|
type: mteb/quora |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 88.022 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB RedditClustering |
|
revision: 24640382cdbf8abc73003fb0fa6d111a705499eb |
|
split: test |
|
type: mteb/reddit-clustering |
|
metrics: |
|
- type: v_measure |
|
value: 55.719165988934385 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB RedditClusteringP2P |
|
revision: 282350215ef01743dc01b456c7f5241fa8937f16 |
|
split: test |
|
type: mteb/reddit-clustering-p2p |
|
metrics: |
|
- type: v_measure |
|
value: 62.25390069273025 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB SCIDOCS |
|
revision: None |
|
split: test |
|
type: mteb/scidocs |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 18.243000000000002 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB SCIDOCS-PL |
|
revision: 45452b03f05560207ef19149545f168e596c9337 |
|
split: test |
|
type: clarin-knext/scidocs-pl |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 14.219000000000001 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB SICK-E-PL |
|
revision: None |
|
split: test |
|
type: PL-MTEB/sicke-pl-pairclassification |
|
metrics: |
|
- type: cos_sim_ap |
|
value: 75.4022630307816 |
|
task: |
|
type: PairClassification |
|
- dataset: |
|
config: default |
|
name: MTEB SICK-R |
|
revision: a6ea5a8cab320b040a23452cc28066d9beae2cee |
|
split: test |
|
type: mteb/sickr-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 79.34269390198548 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB SICK-R-PL |
|
revision: None |
|
split: test |
|
type: PL-MTEB/sickr-pl-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 74.0651660446132 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB SICKFr |
|
revision: e077ab4cf4774a1e36d86d593b150422fafd8e8a |
|
split: test |
|
type: Lajavaness/SICK-fr |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 78.62693119733123 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB STS12 |
|
revision: a0d554a64d88156834ff5ae9920b964011b16384 |
|
split: test |
|
type: mteb/sts12-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 77.50660544631359 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB STS13 |
|
revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca |
|
split: test |
|
type: mteb/sts13-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 85.55415077723738 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB STS14 |
|
revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375 |
|
split: test |
|
type: mteb/sts14-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 81.67550814479077 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB STS15 |
|
revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3 |
|
split: test |
|
type: mteb/sts15-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 88.94601412322764 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB STS16 |
|
revision: 4d8694f8f0e0100860b497b999b3dbed754a0513 |
|
split: test |
|
type: mteb/sts16-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 84.33844259337481 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: ko-ko |
|
name: MTEB STS17 (ko-ko) |
|
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
|
split: test |
|
type: mteb/sts17-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 81.58650681159105 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: ar-ar |
|
name: MTEB STS17 (ar-ar) |
|
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
|
split: test |
|
type: mteb/sts17-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 78.82472265884256 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: en-ar |
|
name: MTEB STS17 (en-ar) |
|
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
|
split: test |
|
type: mteb/sts17-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 76.43637938260397 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: en-de |
|
name: MTEB STS17 (en-de) |
|
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
|
split: test |
|
type: mteb/sts17-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 84.71008299464059 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: en-en |
|
name: MTEB STS17 (en-en) |
|
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
|
split: test |
|
type: mteb/sts17-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 88.88074713413747 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: en-tr |
|
name: MTEB STS17 (en-tr) |
|
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
|
split: test |
|
type: mteb/sts17-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 76.36405640457285 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: es-en |
|
name: MTEB STS17 (es-en) |
|
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
|
split: test |
|
type: mteb/sts17-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 83.84737910084762 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: es-es |
|
name: MTEB STS17 (es-es) |
|
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
|
split: test |
|
type: mteb/sts17-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 87.03931621433031 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: fr-en |
|
name: MTEB STS17 (fr-en) |
|
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
|
split: test |
|
type: mteb/sts17-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 84.43335591752246 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: it-en |
|
name: MTEB STS17 (it-en) |
|
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
|
split: test |
|
type: mteb/sts17-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 83.85268648747021 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: nl-en |
|
name: MTEB STS17 (nl-en) |
|
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
|
split: test |
|
type: mteb/sts17-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 82.45786516224341 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: en |
|
name: MTEB STS22 (en) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 67.20227303970304 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: de |
|
name: MTEB STS22 (de) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 60.892838305537126 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: es |
|
name: MTEB STS22 (es) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 72.01876318464508 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: pl |
|
name: MTEB STS22 (pl) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 42.3879320510127 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: tr |
|
name: MTEB STS22 (tr) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 65.54048784845729 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: ar |
|
name: MTEB STS22 (ar) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 58.55244068334867 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: ru |
|
name: MTEB STS22 (ru) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 66.48710288440624 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: zh |
|
name: MTEB STS22 (zh) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 66.585754901838 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: fr |
|
name: MTEB STS22 (fr) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 81.03001290557805 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: de-en |
|
name: MTEB STS22 (de-en) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 62.28001859884359 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: es-en |
|
name: MTEB STS22 (es-en) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 79.64106342105019 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: it |
|
name: MTEB STS22 (it) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 78.27915339361124 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: pl-en |
|
name: MTEB STS22 (pl-en) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 78.28574268257462 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: zh-en |
|
name: MTEB STS22 (zh-en) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 72.92658860751482 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: es-it |
|
name: MTEB STS22 (es-it) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 74.83418886368217 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: de-fr |
|
name: MTEB STS22 (de-fr) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 56.01064022625769 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: de-pl |
|
name: MTEB STS22 (de-pl) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 53.64332829635126 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: fr-pl |
|
name: MTEB STS22 (fr-pl) |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 73.24670207647144 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB STSB |
|
revision: 0cde68302b3541bb8b3c340dc0644b0b745b3dc0 |
|
split: test |
|
type: C-MTEB/STSB |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 80.7157790971544 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB STSBenchmark |
|
revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831 |
|
split: test |
|
type: mteb/stsbenchmark-sts |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 86.45763616928973 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: fr |
|
name: MTEB STSBenchmarkMultilingualSTS (fr) |
|
revision: 93d57ef91790589e3ce9c365164337a8a78b7632 |
|
split: test |
|
type: stsb_multi_mt |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 84.4335500335282 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB SciDocsRR |
|
revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab |
|
split: test |
|
type: mteb/scidocs-reranking |
|
metrics: |
|
- type: map |
|
value: 84.15276484499303 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB SciFact |
|
revision: 0228b52cf27578f30900b9e5271d331663a030d7 |
|
split: test |
|
type: mteb/scifact |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 73.433 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB SciFact-PL |
|
revision: 47932a35f045ef8ed01ba82bf9ff67f6e109207e |
|
split: test |
|
type: clarin-knext/scifact-pl |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 58.919999999999995 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB SprintDuplicateQuestions |
|
revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46 |
|
split: test |
|
type: mteb/sprintduplicatequestions-pairclassification |
|
metrics: |
|
- type: cos_sim_ap |
|
value: 95.40564890916419 |
|
task: |
|
type: PairClassification |
|
- dataset: |
|
config: default |
|
name: MTEB StackExchangeClustering |
|
revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259 |
|
split: test |
|
type: mteb/stackexchange-clustering |
|
metrics: |
|
- type: v_measure |
|
value: 63.41856697730145 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB StackExchangeClusteringP2P |
|
revision: 815ca46b2622cec33ccafc3735d572c266efdb44 |
|
split: test |
|
type: mteb/stackexchange-clustering-p2p |
|
metrics: |
|
- type: v_measure |
|
value: 31.709285904909112 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB StackOverflowDupQuestions |
|
revision: e185fbe320c72810689fc5848eb6114e1ef5ec69 |
|
split: test |
|
type: mteb/stackoverflowdupquestions-reranking |
|
metrics: |
|
- type: map |
|
value: 52.09341030060322 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB SummEval |
|
revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c |
|
split: test |
|
type: mteb/summeval |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 30.58262517835034 |
|
task: |
|
type: Summarization |
|
- dataset: |
|
config: default |
|
name: MTEB SummEvalFr |
|
revision: b385812de6a9577b6f4d0f88c6a6e35395a94054 |
|
split: test |
|
type: lyon-nlp/summarization-summeval-fr-p2p |
|
metrics: |
|
- type: cos_sim_spearman |
|
value: 29.744542072951358 |
|
task: |
|
type: Summarization |
|
- dataset: |
|
config: default |
|
name: MTEB SyntecReranking |
|
revision: b205c5084a0934ce8af14338bf03feb19499c84d |
|
split: test |
|
type: lyon-nlp/mteb-fr-reranking-syntec-s2p |
|
metrics: |
|
- type: map |
|
value: 88.03333333333333 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB SyntecRetrieval |
|
revision: 77f7e271bf4a92b24fce5119f3486b583ca016ff |
|
split: test |
|
type: lyon-nlp/mteb-fr-retrieval-syntec-s2p |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 83.043 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB T2Reranking |
|
revision: 76631901a18387f85eaa53e5450019b87ad58ef9 |
|
split: dev |
|
type: C-MTEB/T2Reranking |
|
metrics: |
|
- type: map |
|
value: 67.08577894804324 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB T2Retrieval |
|
revision: 8731a845f1bf500a4f111cf1070785c793d10e64 |
|
split: dev |
|
type: C-MTEB/T2Retrieval |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 84.718 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB TNews |
|
revision: 317f262bf1e6126357bbe89e875451e4b0938fe4 |
|
split: validation |
|
type: C-MTEB/TNews-classification |
|
metrics: |
|
- type: accuracy |
|
value: 48.726 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB TRECCOVID |
|
revision: None |
|
split: test |
|
type: mteb/trec-covid |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 57.56 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB TRECCOVID-PL |
|
revision: 81bcb408f33366c2a20ac54adafad1ae7e877fdd |
|
split: test |
|
type: clarin-knext/trec-covid-pl |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 59.355999999999995 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: sqi-eng |
|
name: MTEB Tatoeba (sqi-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 82.765 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: fry-eng |
|
name: MTEB Tatoeba (fry-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 73.69942196531792 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: kur-eng |
|
name: MTEB Tatoeba (kur-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 32.86585365853657 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: tur-eng |
|
name: MTEB Tatoeba (tur-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 95.81666666666666 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: deu-eng |
|
name: MTEB Tatoeba (deu-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 97.75 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: nld-eng |
|
name: MTEB Tatoeba (nld-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 93.78333333333335 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ron-eng |
|
name: MTEB Tatoeba (ron-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 90.72333333333333 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ang-eng |
|
name: MTEB Tatoeba (ang-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 42.45202558635395 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ido-eng |
|
name: MTEB Tatoeba (ido-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 77.59238095238095 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: jav-eng |
|
name: MTEB Tatoeba (jav-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 35.69686411149825 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: isl-eng |
|
name: MTEB Tatoeba (isl-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 82.59333333333333 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: slv-eng |
|
name: MTEB Tatoeba (slv-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 84.1456922987907 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: cym-eng |
|
name: MTEB Tatoeba (cym-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 52.47462133594857 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: kaz-eng |
|
name: MTEB Tatoeba (kaz-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 67.62965440356746 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: est-eng |
|
name: MTEB Tatoeba (est-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 79.48412698412699 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: heb-eng |
|
name: MTEB Tatoeba (heb-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 75.85 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: gla-eng |
|
name: MTEB Tatoeba (gla-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 27.32600866497127 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: mar-eng |
|
name: MTEB Tatoeba (mar-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 84.38 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: lat-eng |
|
name: MTEB Tatoeba (lat-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 42.98888712165028 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: bel-eng |
|
name: MTEB Tatoeba (bel-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 85.55690476190476 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: pms-eng |
|
name: MTEB Tatoeba (pms-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 46.68466031323174 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: gle-eng |
|
name: MTEB Tatoeba (gle-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 32.73071428571428 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: pes-eng |
|
name: MTEB Tatoeba (pes-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 88.26333333333334 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: nob-eng |
|
name: MTEB Tatoeba (nob-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 96.61666666666666 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: bul-eng |
|
name: MTEB Tatoeba (bul-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 91.30666666666666 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: cbk-eng |
|
name: MTEB Tatoeba (cbk-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 70.03714285714285 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: hun-eng |
|
name: MTEB Tatoeba (hun-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 89.09 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: uig-eng |
|
name: MTEB Tatoeba (uig-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 59.570476190476185 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: rus-eng |
|
name: MTEB Tatoeba (rus-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 92.9 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: spa-eng |
|
name: MTEB Tatoeba (spa-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 97.68333333333334 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: hye-eng |
|
name: MTEB Tatoeba (hye-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 80.40880503144653 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: tel-eng |
|
name: MTEB Tatoeba (tel-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 89.7008547008547 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: afr-eng |
|
name: MTEB Tatoeba (afr-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 81.84833333333333 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: mon-eng |
|
name: MTEB Tatoeba (mon-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 71.69696969696969 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: arz-eng |
|
name: MTEB Tatoeba (arz-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 55.76985790822269 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: hrv-eng |
|
name: MTEB Tatoeba (hrv-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 91.66666666666666 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: nov-eng |
|
name: MTEB Tatoeba (nov-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 68.36668519547896 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: gsw-eng |
|
name: MTEB Tatoeba (gsw-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 36.73992673992674 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: nds-eng |
|
name: MTEB Tatoeba (nds-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 63.420952380952365 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ukr-eng |
|
name: MTEB Tatoeba (ukr-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 91.28999999999999 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: uzb-eng |
|
name: MTEB Tatoeba (uzb-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 40.95392490046146 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: lit-eng |
|
name: MTEB Tatoeba (lit-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 77.58936507936508 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ina-eng |
|
name: MTEB Tatoeba (ina-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 91.28999999999999 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: lfn-eng |
|
name: MTEB Tatoeba (lfn-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 63.563650793650794 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: zsm-eng |
|
name: MTEB Tatoeba (zsm-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 94.35 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ita-eng |
|
name: MTEB Tatoeba (ita-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 91.43 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: cmn-eng |
|
name: MTEB Tatoeba (cmn-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 95.73333333333332 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: lvs-eng |
|
name: MTEB Tatoeba (lvs-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 79.38666666666667 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: glg-eng |
|
name: MTEB Tatoeba (glg-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 89.64 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ceb-eng |
|
name: MTEB Tatoeba (ceb-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 21.257184628237262 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: bre-eng |
|
name: MTEB Tatoeba (bre-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 13.592316017316017 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ben-eng |
|
name: MTEB Tatoeba (ben-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 73.22666666666666 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: swg-eng |
|
name: MTEB Tatoeba (swg-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 51.711309523809526 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: arq-eng |
|
name: MTEB Tatoeba (arq-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 24.98790634904795 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: kab-eng |
|
name: MTEB Tatoeba (kab-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 17.19218192918193 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: fra-eng |
|
name: MTEB Tatoeba (fra-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 93.26666666666667 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: por-eng |
|
name: MTEB Tatoeba (por-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 94.57333333333334 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: tat-eng |
|
name: MTEB Tatoeba (tat-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 42.35127206127206 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: oci-eng |
|
name: MTEB Tatoeba (oci-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 51.12318903318903 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: pol-eng |
|
name: MTEB Tatoeba (pol-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 94.89999999999999 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: war-eng |
|
name: MTEB Tatoeba (war-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 23.856320290390055 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: aze-eng |
|
name: MTEB Tatoeba (aze-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 79.52833333333334 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: vie-eng |
|
name: MTEB Tatoeba (vie-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 95.93333333333334 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: nno-eng |
|
name: MTEB Tatoeba (nno-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 90.75333333333333 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: cha-eng |
|
name: MTEB Tatoeba (cha-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 30.802919708029197 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: mhr-eng |
|
name: MTEB Tatoeba (mhr-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 15.984076294076294 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: dan-eng |
|
name: MTEB Tatoeba (dan-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 91.82666666666667 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ell-eng |
|
name: MTEB Tatoeba (ell-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 91.9 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: amh-eng |
|
name: MTEB Tatoeba (amh-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 76.36054421768706 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: pam-eng |
|
name: MTEB Tatoeba (pam-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 9.232711399711398 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: hsb-eng |
|
name: MTEB Tatoeba (hsb-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 45.640803181175855 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: srp-eng |
|
name: MTEB Tatoeba (srp-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 86.29 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: epo-eng |
|
name: MTEB Tatoeba (epo-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 88.90833333333332 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: kzj-eng |
|
name: MTEB Tatoeba (kzj-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 11.11880248978075 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: awa-eng |
|
name: MTEB Tatoeba (awa-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 48.45839345839346 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: fao-eng |
|
name: MTEB Tatoeba (fao-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 65.68157033805888 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: mal-eng |
|
name: MTEB Tatoeba (mal-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 94.63852498786997 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ile-eng |
|
name: MTEB Tatoeba (ile-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 81.67904761904761 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: bos-eng |
|
name: MTEB Tatoeba (bos-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 89.35969868173258 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: cor-eng |
|
name: MTEB Tatoeba (cor-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 5.957229437229437 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: cat-eng |
|
name: MTEB Tatoeba (cat-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 91.50333333333333 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: eus-eng |
|
name: MTEB Tatoeba (eus-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 63.75498778998778 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: yue-eng |
|
name: MTEB Tatoeba (yue-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 82.99190476190476 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: swe-eng |
|
name: MTEB Tatoeba (swe-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 92.95 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: dtp-eng |
|
name: MTEB Tatoeba (dtp-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 9.054042624042623 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: kat-eng |
|
name: MTEB Tatoeba (kat-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 72.77064981488574 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: jpn-eng |
|
name: MTEB Tatoeba (jpn-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 93.14 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: csb-eng |
|
name: MTEB Tatoeba (csb-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 29.976786498525627 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: xho-eng |
|
name: MTEB Tatoeba (xho-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 67.6525821596244 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: orv-eng |
|
name: MTEB Tatoeba (orv-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 33.12964812964813 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ind-eng |
|
name: MTEB Tatoeba (ind-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 92.30666666666666 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: tuk-eng |
|
name: MTEB Tatoeba (tuk-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 34.36077879427633 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: max-eng |
|
name: MTEB Tatoeba (max-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 52.571845212690285 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: swh-eng |
|
name: MTEB Tatoeba (swh-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 58.13107263107262 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: hin-eng |
|
name: MTEB Tatoeba (hin-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 93.33333333333333 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: dsb-eng |
|
name: MTEB Tatoeba (dsb-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 42.87370133925458 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ber-eng |
|
name: MTEB Tatoeba (ber-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 20.394327616827614 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: tam-eng |
|
name: MTEB Tatoeba (tam-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 84.29967426710098 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: slk-eng |
|
name: MTEB Tatoeba (slk-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 88.80666666666667 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: tgl-eng |
|
name: MTEB Tatoeba (tgl-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 67.23062271062273 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ast-eng |
|
name: MTEB Tatoeba (ast-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 78.08398950131233 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: mkd-eng |
|
name: MTEB Tatoeba (mkd-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 77.85166666666666 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: khm-eng |
|
name: MTEB Tatoeba (khm-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 67.63004001231148 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ces-eng |
|
name: MTEB Tatoeba (ces-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 89.77000000000001 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: tzl-eng |
|
name: MTEB Tatoeba (tzl-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 40.2654503616042 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: urd-eng |
|
name: MTEB Tatoeba (urd-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 83.90333333333334 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: ara-eng |
|
name: MTEB Tatoeba (ara-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 77.80666666666666 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: kor-eng |
|
name: MTEB Tatoeba (kor-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 84.08 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: yid-eng |
|
name: MTEB Tatoeba (yid-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 60.43098607367475 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: fin-eng |
|
name: MTEB Tatoeba (fin-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 88.19333333333333 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: tha-eng |
|
name: MTEB Tatoeba (tha-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 90.55352798053529 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: wuu-eng |
|
name: MTEB Tatoeba (wuu-eng) |
|
revision: 9080400076fbadbb4c4dcb136ff4eddc40b42553 |
|
split: test |
|
type: mteb/tatoeba-bitext-mining |
|
metrics: |
|
- type: f1 |
|
value: 88.44999999999999 |
|
task: |
|
type: BitextMining |
|
- dataset: |
|
config: default |
|
name: MTEB ThuNewsClusteringP2P |
|
revision: 5798586b105c0434e4f0fe5e767abe619442cf93 |
|
split: test |
|
type: C-MTEB/ThuNewsClusteringP2P |
|
metrics: |
|
- type: v_measure |
|
value: 57.25416429643288 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB ThuNewsClusteringS2S |
|
revision: 8a8b2caeda43f39e13c4bc5bea0f8a667896e10d |
|
split: test |
|
type: C-MTEB/ThuNewsClusteringS2S |
|
metrics: |
|
- type: v_measure |
|
value: 56.616646560243524 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB Touche2020 |
|
revision: a34f9a33db75fa0cbb21bb5cfc3dae8dc8bec93f |
|
split: test |
|
type: mteb/touche2020 |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 22.819 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB ToxicConversationsClassification |
|
revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c |
|
split: test |
|
type: mteb/toxic_conversations_50k |
|
metrics: |
|
- type: accuracy |
|
value: 71.02579999999999 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB TweetSentimentExtractionClassification |
|
revision: d604517c81ca91fe16a244d1248fc021f9ecee7a |
|
split: test |
|
type: mteb/tweet_sentiment_extraction |
|
metrics: |
|
- type: accuracy |
|
value: 57.60045274476514 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB TwentyNewsgroupsClustering |
|
revision: 6125ec4e24fa026cec8a478383ee943acfbd5449 |
|
split: test |
|
type: mteb/twentynewsgroups-clustering |
|
metrics: |
|
- type: v_measure |
|
value: 50.346666699466205 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB TwitterSemEval2015 |
|
revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1 |
|
split: test |
|
type: mteb/twittersemeval2015-pairclassification |
|
metrics: |
|
- type: cos_sim_ap |
|
value: 71.88199004440489 |
|
task: |
|
type: PairClassification |
|
- dataset: |
|
config: default |
|
name: MTEB TwitterURLCorpus |
|
revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf |
|
split: test |
|
type: mteb/twitterurlcorpus-pairclassification |
|
metrics: |
|
- type: cos_sim_ap |
|
value: 85.41587779677383 |
|
task: |
|
type: PairClassification |
|
- dataset: |
|
config: default |
|
name: MTEB VideoRetrieval |
|
revision: 58c2597a5943a2ba48f4668c3b90d796283c5639 |
|
split: dev |
|
type: C-MTEB/VideoRetrieval |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 72.792 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB Waimai |
|
revision: 339287def212450dcaa9df8c22bf93e9980c7023 |
|
split: test |
|
type: C-MTEB/waimai-classification |
|
metrics: |
|
- type: accuracy |
|
value: 82.58000000000001 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: fr |
|
name: MTEB XPQARetrieval (fr) |
|
revision: c99d599f0a6ab9b85b065da6f9d94f9cf731679f |
|
split: test |
|
type: jinaai/xpqa |
|
metrics: |
|
- type: ndcg_at_10 |
|
value: 67.327 |
|
task: |
|
type: Retrieval |
|
tags: |
|
- mteb |
|
- multilingual |
|
- sentence-similarity |
|
- onnx |
|
- teradata |
|
--- |
|
|
|
|
|
***See Disclaimer below*** |
|
|
|
---- |
|
|
|
|
|
# A Teradata Vantage compatible Embeddings Model |
|
|
|
# Alibaba-NLP/gte-multilingual-base |
|
|
|
## Overview of this Model |
|
|
|
An Embedding Model which maps text (sentence/ paragraphs) into a vector. The [Alibaba-NLP/gte-multilingual-base](https://huggingface.co./Alibaba-NLP/gte-multilingual-base) model well known for its effectiveness in capturing semantic meanings in text data. It's a state-of-the-art model trained on a large corpus, capable of generating high-quality text embeddings. |
|
|
|
- 305.37M params (Sizes in ONNX format - "fp32": 1197.36MB, "int8": 324.17MB, "uint8": 324.17MB) |
|
- 8192 maximum input tokens |
|
- 768 dimensions of output vector |
|
- Licence: apache-2.0. The released models can be used for commercial purposes free of charge. |
|
- Reference to Original Model: https://huggingface.co./Alibaba-NLP/gte-multilingual-base |
|
|
|
|
|
## Quickstart: Deploying this Model in Teradata Vantage |
|
|
|
We have pre-converted the model into the ONNX format compatible with BYOM 6.0, eliminating the need for manual conversion. |
|
|
|
**Note:** Ensure you have access to a Teradata Database with BYOM 6.0 installed. |
|
|
|
To get started, clone the pre-converted model directly from the Teradata HuggingFace repository. |
|
|
|
|
|
```python |
|
|
|
import teradataml as tdml |
|
import getpass |
|
from huggingface_hub import hf_hub_download |
|
|
|
model_name = "gte-multilingual-base" |
|
number_dimensions_output = 768 |
|
model_file_name = "model.onnx" |
|
|
|
# Step 1: Download Model from Teradata HuggingFace Page |
|
|
|
hf_hub_download(repo_id=f"Teradata/{model_name}", filename=f"onnx/{model_file_name}", local_dir="./") |
|
hf_hub_download(repo_id=f"Teradata/{model_name}", filename=f"tokenizer.json", local_dir="./") |
|
|
|
# Step 2: Create Connection to Vantage |
|
|
|
tdml.create_context(host = input('enter your hostname'), |
|
username=input('enter your username'), |
|
password = getpass.getpass("enter your password")) |
|
|
|
# Step 3: Load Models into Vantage |
|
# a) Embedding model |
|
tdml.save_byom(model_id = model_name, # must be unique in the models table |
|
model_file = f"onnx/{model_file_name}", |
|
table_name = 'embeddings_models' ) |
|
# b) Tokenizer |
|
tdml.save_byom(model_id = model_name, # must be unique in the models table |
|
model_file = 'tokenizer.json', |
|
table_name = 'embeddings_tokenizers') |
|
|
|
# Step 4: Test ONNXEmbeddings Function |
|
# Note that ONNXEmbeddings expects the 'payload' column to be 'txt'. |
|
# If it has got a different name, just rename it in a subquery/CTE. |
|
input_table = "emails.emails" |
|
embeddings_query = f""" |
|
SELECT |
|
* |
|
from mldb.ONNXEmbeddings( |
|
on {input_table} as InputTable |
|
on (select * from embeddings_models where model_id = '{model_name}') as ModelTable DIMENSION |
|
on (select model as tokenizer from embeddings_tokenizers where model_id = '{model_name}') as TokenizerTable DIMENSION |
|
using |
|
Accumulate('id', 'txt') |
|
ModelOutputTensor('sentence_embedding') |
|
EnableMemoryCheck('false') |
|
OutputFormat('FLOAT32({number_dimensions_output})') |
|
OverwriteCachedModel('true') |
|
) a |
|
""" |
|
DF_embeddings = tdml.DataFrame.from_query(embeddings_query) |
|
DF_embeddings |
|
``` |
|
|
|
|
|
|
|
## What Can I Do with the Embeddings? |
|
|
|
Teradata Vantage includes pre-built in-database functions to process embeddings further. Explore the following examples: |
|
|
|
- **Semantic Clustering with TD_KMeans:** [Semantic Clustering Python Notebook](https://github.com/Teradata/jupyter-demos/blob/main/UseCases/Language_Models_InVantage/Semantic_Clustering_Python.ipynb) |
|
- **Semantic Distance with TD_VectorDistance:** [Semantic Similarity Python Notebook](https://github.com/Teradata/jupyter-demos/blob/main/UseCases/Language_Models_InVantage/Semantic_Similarity_Python.ipynb) |
|
- **RAG-Based Application with TD_VectorDistance:** [RAG and Bedrock Query PDF Notebook](https://github.com/Teradata/jupyter-demos/blob/main/UseCases/Language_Models_InVantage/RAG_and_Bedrock_QueryPDF.ipynb) |
|
|
|
|
|
## Deep Dive into Model Conversion to ONNX |
|
|
|
**The steps below outline how we converted the open-source Hugging Face model into an ONNX file compatible with the in-database ONNXEmbeddings function.** |
|
|
|
You do not need to perform these steps—they are provided solely for documentation and transparency. However, they may be helpful if you wish to convert another model to the required format. |
|
|
|
|
|
### Part 1. Importing and Converting Model using optimum |
|
|
|
We start by importing the pre-trained [Alibaba-NLP/gte-multilingual-base](https://huggingface.co./Alibaba-NLP/gte-multilingual-base) model from Hugging Face. |
|
|
|
To enhance performance and ensure compatibility with various execution environments, we'll use the [Optimum](https://github.com/huggingface/optimum) utility to convert the model into the ONNX (Open Neural Network Exchange) format. |
|
|
|
After conversion to ONNX, we are fixing the opset in the ONNX file for compatibility with ONNX runtime used in Teradata Vantage |
|
|
|
We are generating ONNX files for multiple different precisions: fp32, int8, uint8 |
|
|
|
You can find the detailed conversion steps in the file [convert.py](./convert.py) |
|
|
|
### Part 2. Running the model in Python with onnxruntime & compare results |
|
|
|
Once the fixes are applied, we proceed to test the correctness of the ONNX model by calculating cosine similarity between two texts using native SentenceTransformers and ONNX runtime, comparing the results. |
|
|
|
If the results are identical, it confirms that the ONNX model gives the same result as the native models, validating its correctness and suitability for further use in the database. |
|
|
|
|
|
```python |
|
import onnxruntime as rt |
|
|
|
from sentence_transformers.util import cos_sim |
|
from sentence_transformers import SentenceTransformer |
|
|
|
import transformers |
|
|
|
|
|
sentences_1 = 'How is the weather today?' |
|
sentences_2 = 'What is the current weather like today?' |
|
|
|
# Calculate ONNX result |
|
tokenizer = transformers.AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base") |
|
predef_sess = rt.InferenceSession("onnx/model.onnx") |
|
|
|
enc1 = tokenizer(sentences_1) |
|
embeddings_1_onnx = predef_sess.run(None, {"input_ids": [enc1.input_ids], |
|
"attention_mask": [enc1.attention_mask]}) |
|
|
|
enc2 = tokenizer(sentences_2) |
|
embeddings_2_onnx = predef_sess.run(None, {"input_ids": [enc2.input_ids], |
|
"attention_mask": [enc2.attention_mask]}) |
|
|
|
|
|
# Calculate embeddings with SentenceTransformer |
|
model = SentenceTransformer(model_id, trust_remote_code=True) |
|
embeddings_1_sentence_transformer = model.encode(sentences_1, normalize_embeddings=True, trust_remote_code=True) |
|
embeddings_2_sentence_transformer = model.encode(sentences_2, normalize_embeddings=True, trust_remote_code=True) |
|
|
|
# Compare results |
|
print("Cosine similiarity for embeddings calculated with ONNX:" + str(cos_sim(embeddings_1_onnx[1][0], embeddings_2_onnx[1][0]))) |
|
print("Cosine similiarity for embeddings calculated with SentenceTransformer:" + str(cos_sim(embeddings_1_sentence_transformer, embeddings_2_sentence_transformer))) |
|
``` |
|
|
|
You can find the detailed ONNX vs. SentenceTransformer result comparison steps in the file [test_local.py](./test_local.py) |
|
|
|
|
|
|
|
----- |
|
|
|
DISCLAIMER: The content herein (“Content”) is provided “AS IS” and is not covered by any Teradata Operations, Inc. and its affiliates (“Teradata”) agreements. Its listing here does not constitute certification or endorsement by Teradata. |
|
|
|
To the extent any of the Content contains or is related to any artificial intelligence (“AI”) or other language learning models (“Models”) that interoperate with the products and services of Teradata, by accessing, bringing, deploying or using such Models, you acknowledge and agree that you are solely responsible for ensuring compliance with all applicable laws, regulations, and restrictions governing the use, deployment, and distribution of AI technologies. This includes, but is not limited to, AI Diffusion Rules, European Union AI Act, AI-related laws and regulations, privacy laws, export controls, and financial or sector-specific regulations. |
|
|
|
While Teradata may provide support, guidance, or assistance in the deployment or implementation of Models to interoperate with Teradata’s products and/or services, you remain fully responsible for ensuring that your Models, data, and applications comply with all relevant legal and regulatory obligations. Our assistance does not constitute legal or regulatory approval, and Teradata disclaims any liability arising from non-compliance with applicable laws. |
|
|
|
You must determine the suitability of the Models for any purpose. Given the probabilistic nature of machine learning and modeling, the use of the Models may in some situations result in incorrect output that does not accurately reflect the action generated. You should evaluate the accuracy of any output as appropriate for your use case, including by using human review of the output. |
|
|