metadata
library_name: sentence-transformers
pipeline_tag: sentence-similarity
tags:
- sentence-transformers
- feature-extraction
- sentence-similarity
- transformers
- sentence-embedding
- mteb
model-index:
- name: bilingual-embedding-large
results:
- task:
type: Clustering
dataset:
type: lyon-nlp/alloprof
name: MTEB AlloProfClusteringP2P
config: default
split: test
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b
metrics:
- type: v_measure
value: 56.77190187231352
- type: v_measures
value:
- 0.5591529760439443
- 0.5974955147482336
- 0.5823433887463434
- 0.5582030533486634
- 0.5608784889863708
- task:
type: Clustering
dataset:
type: lyon-nlp/alloprof
name: MTEB AlloProfClusteringS2S
config: default
split: test
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b
metrics:
- type: v_measure
value: 43.8035296814567
- type: v_measures
value:
- 0.48563274496248593
- 0.39879051227524587
- 0.4846924607458884
- 0.48918774336682036
- 0.42626900366019754
- task:
type: Reranking
dataset:
type: lyon-nlp/mteb-fr-reranking-alloprof-s2p
name: MTEB AlloprofReranking
config: default
split: test
revision: 65393d0d7a08a10b4e348135e824f385d420b0fd
metrics:
- type: map
value: 71.51753654257872
- type: mrr
value: 72.59564245963209
- type: nAUC_map_diff1
value: 54.75803232873865
- type: nAUC_map_max
value: 22.763024994564173
- type: nAUC_mrr_diff1
value: 54.34234068011684
- type: nAUC_mrr_max
value: 23.607169979872587
- task:
type: Retrieval
dataset:
type: lyon-nlp/alloprof
name: MTEB AlloprofRetrieval
config: default
split: test
revision: fcf295ea64c750f41fadbaa37b9b861558e1bfbd
metrics:
- type: map_at_1
value: 26.598
- type: map_at_10
value: 37.808
- type: map_at_100
value: 38.726
- type: map_at_1000
value: 38.778
- type: map_at_20
value: 38.34
- type: map_at_3
value: 34.599999999999994
- type: map_at_5
value: 36.385
- type: mrr_at_1
value: 26.59758203799655
- type: mrr_at_10
value: 37.80750541437081
- type: mrr_at_100
value: 38.72559086585011
- type: mrr_at_1000
value: 38.77782362787768
- type: mrr_at_20
value: 38.33973921272315
- type: mrr_at_3
value: 34.59988485895229
- type: mrr_at_5
value: 36.385290731145794
- type: nauc_map_at_1000_diff1
value: 35.14662281558396
- type: nauc_map_at_1000_max
value: 33.29444147034975
- type: nauc_map_at_100_diff1
value: 35.14901176407076
- type: nauc_map_at_100_max
value: 33.32860103491456
- type: nauc_map_at_10_diff1
value: 34.96490139273049
- type: nauc_map_at_10_max
value: 33.252929811567526
- type: nauc_map_at_1_diff1
value: 40.652132824871664
- type: nauc_map_at_1_max
value: 28.53606743237387
- type: nauc_map_at_20_diff1
value: 35.11177701050558
- type: nauc_map_at_20_max
value: 33.38822815064973
- type: nauc_map_at_3_diff1
value: 35.08068505968589
- type: nauc_map_at_3_max
value: 32.10125944853496
- type: nauc_map_at_5_diff1
value: 34.78702330258393
- type: nauc_map_at_5_max
value: 32.89738895858572
- type: nauc_mrr_at_1000_diff1
value: 35.14662281558396
- type: nauc_mrr_at_1000_max
value: 33.29444147034975
- type: nauc_mrr_at_100_diff1
value: 35.14901176407076
- type: nauc_mrr_at_100_max
value: 33.32860103491456
- type: nauc_mrr_at_10_diff1
value: 34.96490139273049
- type: nauc_mrr_at_10_max
value: 33.252929811567526
- type: nauc_mrr_at_1_diff1
value: 40.652132824871664
- type: nauc_mrr_at_1_max
value: 28.53606743237387
- type: nauc_mrr_at_20_diff1
value: 35.11177701050558
- type: nauc_mrr_at_20_max
value: 33.38822815064973
- type: nauc_mrr_at_3_diff1
value: 35.08068505968589
- type: nauc_mrr_at_3_max
value: 32.10125944853496
- type: nauc_mrr_at_5_diff1
value: 34.78702330258393
- type: nauc_mrr_at_5_max
value: 32.89738895858572
- type: nauc_ndcg_at_1000_diff1
value: 34.24563790011671
- type: nauc_ndcg_at_1000_max
value: 35.1750183970367
- type: nauc_ndcg_at_100_diff1
value: 34.26540576121903
- type: nauc_ndcg_at_100_max
value: 36.211723696019526
- type: nauc_ndcg_at_10_diff1
value: 33.363793757214985
- type: nauc_ndcg_at_10_max
value: 35.724249117130285
- type: nauc_ndcg_at_1_diff1
value: 40.652132824871664
- type: nauc_ndcg_at_1_max
value: 28.53606743237387
- type: nauc_ndcg_at_20_diff1
value: 33.82271561239704
- type: nauc_ndcg_at_20_max
value: 36.31393467921569
- type: nauc_ndcg_at_3_diff1
value: 33.47602274641705
- type: nauc_ndcg_at_3_max
value: 33.25855418368982
- type: nauc_ndcg_at_5_diff1
value: 32.982858998932784
- type: nauc_ndcg_at_5_max
value: 34.683197330270694
- type: nauc_precision_at_1000_diff1
value: 20.695105766988465
- type: nauc_precision_at_1000_max
value: 79.24780459990231
- type: nauc_precision_at_100_diff1
value: 32.1302666444128
- type: nauc_precision_at_100_max
value: 59.829884850113594
- type: nauc_precision_at_10_diff1
value: 28.35768310864352
- type: nauc_precision_at_10_max
value: 44.56445585989902
- type: nauc_precision_at_1_diff1
value: 40.652132824871664
- type: nauc_precision_at_1_max
value: 28.53606743237387
- type: nauc_precision_at_20_diff1
value: 29.66063385017264
- type: nauc_precision_at_20_max
value: 48.87458861994212
- type: nauc_precision_at_3_diff1
value: 29.054817586181176
- type: nauc_precision_at_3_max
value: 36.488441946705876
- type: nauc_precision_at_5_diff1
value: 27.79863250059474
- type: nauc_precision_at_5_max
value: 40.02591480379938
- type: nauc_recall_at_1000_diff1
value: 20.695105766987187
- type: nauc_recall_at_1000_max
value: 79.24780459990147
- type: nauc_recall_at_100_diff1
value: 32.13026664441275
- type: nauc_recall_at_100_max
value: 59.829884850113736
- type: nauc_recall_at_10_diff1
value: 28.357683108643496
- type: nauc_recall_at_10_max
value: 44.56445585989908
- type: nauc_recall_at_1_diff1
value: 40.652132824871664
- type: nauc_recall_at_1_max
value: 28.53606743237387
- type: nauc_recall_at_20_diff1
value: 29.660633850172687
- type: nauc_recall_at_20_max
value: 48.87458861994213
- type: nauc_recall_at_3_diff1
value: 29.05481758618121
- type: nauc_recall_at_3_max
value: 36.48844194670593
- type: nauc_recall_at_5_diff1
value: 27.798632500594728
- type: nauc_recall_at_5_max
value: 40.025914803799395
- type: ndcg_at_1
value: 26.598
- type: ndcg_at_10
value: 43.902
- type: ndcg_at_100
value: 48.647
- type: ndcg_at_1000
value: 50.135
- type: ndcg_at_20
value: 45.794000000000004
- type: ndcg_at_3
value: 37.233
- type: ndcg_at_5
value: 40.47
- type: precision_at_1
value: 26.598
- type: precision_at_10
value: 6.334
- type: precision_at_100
value: 0.8619999999999999
- type: precision_at_1000
value: 0.098
- type: precision_at_20
value: 3.5360000000000005
- type: precision_at_3
value: 14.954
- type: precision_at_5
value: 10.552999999999999
- type: recall_at_1
value: 26.598
- type: recall_at_10
value: 63.342
- type: recall_at_100
value: 86.226
- type: recall_at_1000
value: 98.143
- type: recall_at_20
value: 70.72500000000001
- type: recall_at_3
value: 44.862
- type: recall_at_5
value: 52.763000000000005
- task:
type: Classification
dataset:
type: mteb/amazon_reviews_multi
name: MTEB AmazonReviewsClassification (fr)
config: fr
split: test
revision: 1399c76144fd37290681b995c656ef9b2e06e26d
metrics:
- type: accuracy
value: 45.504
- type: f1
value: 43.653559521530944
- type: f1_weighted
value: 43.65355952153093
- task:
type: Retrieval
dataset:
type: maastrichtlawtech/bsard
name: MTEB BSARDRetrieval
config: default
split: test
revision: 5effa1b9b5fa3b0f9e12523e6e43e5f86a6e6d59
metrics:
- type: map_at_1
value: 6.757000000000001
- type: map_at_10
value: 10.569
- type: map_at_100
value: 11.641
- type: map_at_1000
value: 11.758000000000001
- type: map_at_20
value: 11.152
- type: map_at_3
value: 9.009
- type: map_at_5
value: 9.82
- type: mrr_at_1
value: 6.756756756756757
- type: mrr_at_10
value: 10.568604318604317
- type: mrr_at_100
value: 11.640572409499667
- type: mrr_at_1000
value: 11.757590970218725
- type: mrr_at_20
value: 11.152253605813977
- type: mrr_at_3
value: 9.00900900900901
- type: mrr_at_5
value: 9.819819819819822
- type: nauc_map_at_1000_diff1
value: 20.497861726027473
- type: nauc_map_at_1000_max
value: 8.515289767591149
- type: nauc_map_at_100_diff1
value: 20.457264311409304
- type: nauc_map_at_100_max
value: 8.478315371126714
- type: nauc_map_at_10_diff1
value: 22.122755317386826
- type: nauc_map_at_10_max
value: 8.684832816651243
- type: nauc_map_at_1_diff1
value: 32.7324170733489
- type: nauc_map_at_1_max
value: 17.632507133954086
- type: nauc_map_at_20_diff1
value: 21.309144510706552
- type: nauc_map_at_20_max
value: 8.357569194331324
- type: nauc_map_at_3_diff1
value: 24.640888091380244
- type: nauc_map_at_3_max
value: 8.512417316260153
- type: nauc_map_at_5_diff1
value: 23.696706451320555
- type: nauc_map_at_5_max
value: 10.065581499162409
- type: nauc_mrr_at_1000_diff1
value: 20.497861726027473
- type: nauc_mrr_at_1000_max
value: 8.515289767591149
- type: nauc_mrr_at_100_diff1
value: 20.457264311409304
- type: nauc_mrr_at_100_max
value: 8.478315371126714
- type: nauc_mrr_at_10_diff1
value: 22.122755317386826
- type: nauc_mrr_at_10_max
value: 8.684832816651243
- type: nauc_mrr_at_1_diff1
value: 32.7324170733489
- type: nauc_mrr_at_1_max
value: 17.632507133954086
- type: nauc_mrr_at_20_diff1
value: 21.309144510706552
- type: nauc_mrr_at_20_max
value: 8.357569194331324
- type: nauc_mrr_at_3_diff1
value: 24.640888091380244
- type: nauc_mrr_at_3_max
value: 8.512417316260153
- type: nauc_mrr_at_5_diff1
value: 23.696706451320555
- type: nauc_mrr_at_5_max
value: 10.065581499162409
- type: nauc_ndcg_at_1000_diff1
value: 13.492135744038377
- type: nauc_ndcg_at_1000_max
value: 9.019754831261519
- type: nauc_ndcg_at_100_diff1
value: 12.386959698428296
- type: nauc_ndcg_at_100_max
value: 8.140082932773288
- type: nauc_ndcg_at_10_diff1
value: 18.08185602779908
- type: nauc_ndcg_at_10_max
value: 6.451070792965509
- type: nauc_ndcg_at_1_diff1
value: 32.7324170733489
- type: nauc_ndcg_at_1_max
value: 17.632507133954086
- type: nauc_ndcg_at_20_diff1
value: 16.104176022358285
- type: nauc_ndcg_at_20_max
value: 5.670070730016123
- type: nauc_ndcg_at_3_diff1
value: 22.51956353681352
- type: nauc_ndcg_at_3_max
value: 6.154988622749747
- type: nauc_ndcg_at_5_diff1
value: 20.90624606199523
- type: nauc_ndcg_at_5_max
value: 9.018795972091642
- type: nauc_precision_at_1000_diff1
value: -7.5156227166904666
- type: nauc_precision_at_1000_max
value: 21.389191162087265
- type: nauc_precision_at_100_diff1
value: -3.485942444137334
- type: nauc_precision_at_100_max
value: 11.604808001151612
- type: nauc_precision_at_10_diff1
value: 10.569691962167767
- type: nauc_precision_at_10_max
value: 2.5249979762023176
- type: nauc_precision_at_1_diff1
value: 32.7324170733489
- type: nauc_precision_at_1_max
value: 17.632507133954086
- type: nauc_precision_at_20_diff1
value: 6.993263678069275
- type: nauc_precision_at_20_max
value: 1.212342757686577
- type: nauc_precision_at_3_diff1
value: 17.747702545254146
- type: nauc_precision_at_3_max
value: 0.8595742156164364
- type: nauc_precision_at_5_diff1
value: 15.109328976502349
- type: nauc_precision_at_5_max
value: 7.278141846526832
- type: nauc_recall_at_1000_diff1
value: -7.515622716690405
- type: nauc_recall_at_1000_max
value: 21.389191162087183
- type: nauc_recall_at_100_diff1
value: -3.4859424441373585
- type: nauc_recall_at_100_max
value: 11.604808001151582
- type: nauc_recall_at_10_diff1
value: 10.569691962167727
- type: nauc_recall_at_10_max
value: 2.524997976202258
- type: nauc_recall_at_1_diff1
value: 32.7324170733489
- type: nauc_recall_at_1_max
value: 17.632507133954086
- type: nauc_recall_at_20_diff1
value: 6.9932636780692325
- type: nauc_recall_at_20_max
value: 1.2123427576865247
- type: nauc_recall_at_3_diff1
value: 17.74770254525415
- type: nauc_recall_at_3_max
value: 0.8595742156164401
- type: nauc_recall_at_5_diff1
value: 15.109328976502375
- type: nauc_recall_at_5_max
value: 7.278141846526856
- type: ndcg_at_1
value: 6.757000000000001
- type: ndcg_at_10
value: 13.147
- type: ndcg_at_100
value: 18.932
- type: ndcg_at_1000
value: 22.663
- type: ndcg_at_20
value: 15.222
- type: ndcg_at_3
value: 9.812999999999999
- type: ndcg_at_5
value: 11.286
- type: precision_at_1
value: 6.757000000000001
- type: precision_at_10
value: 2.162
- type: precision_at_100
value: 0.5
- type: precision_at_1000
value: 0.08099999999999999
- type: precision_at_20
value: 1.486
- type: precision_at_3
value: 4.054
- type: precision_at_5
value: 3.1530000000000005
- type: recall_at_1
value: 6.757000000000001
- type: recall_at_10
value: 21.622
- type: recall_at_100
value: 50
- type: recall_at_1000
value: 80.631
- type: recall_at_20
value: 29.73
- type: recall_at_3
value: 12.162
- type: recall_at_5
value: 15.766
- task:
type: Clustering
dataset:
type: lyon-nlp/clustering-hal-s2s
name: MTEB HALClusteringS2S
config: default
split: test
revision: e06ebbbb123f8144bef1a5d18796f3dec9ae2915
metrics:
- type: v_measure
value: 24.858750161104126
- type: v_measures
value:
- 0.2984717048033491
- 0.2554963718476627
- 0.27306743677619566
- 0.2655317293951285
- 0.22365142310852648
- task:
type: Clustering
dataset:
type: mlsum
name: MTEB MLSUMClusteringP2P
config: fr
split: test
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7
metrics:
- type: v_measure
value: 39.196489283513955
- type: v_measures
value:
- 0.397079645426394
- 0.41814959265244056
- 0.4075805000522318
- 0.3777207448521023
- 0.3499037023664506
- task:
type: Clustering
dataset:
type: mlsum
name: MTEB MLSUMClusteringS2S
config: fr
split: test
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7
metrics:
- type: v_measure
value: 38.90242301200363
- type: v_measures
value:
- 0.3879152638224075
- 0.4155376524170288
- 0.41644489647223826
- 0.3720493791140543
- 0.33885028730003314
- task:
type: Classification
dataset:
type: mteb/mtop_domain
name: MTEB MTOPDomainClassification (fr)
config: fr
split: test
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
metrics:
- type: accuracy
value: 86.88380833072345
- type: f1
value: 86.96787213648228
- type: f1_weighted
value: 86.83432508604707
- task:
type: Classification
dataset:
type: mteb/mtop_intent
name: MTEB MTOPIntentClassification (fr)
config: fr
split: test
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
metrics:
- type: accuracy
value: 63.780144065142494
- type: f1
value: 46.070400220183394
- type: f1_weighted
value: 66.2871618019472
- task:
type: Classification
dataset:
type: mteb/masakhanews
name: MTEB MasakhaNEWSClassification (fra)
config: fra
split: test
revision: 18193f187b92da67168c655c9973a165ed9593dd
metrics:
- type: accuracy
value: 73.64928909952606
- type: f1
value: 70.38554255346646
- type: f1_weighted
value: 73.78534895009892
- task:
type: Clustering
dataset:
type: masakhane/masakhanews
name: MTEB MasakhaNEWSClusteringP2P (fra)
config: fra
split: test
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60
metrics:
- type: v_measure
value: 53.94667838037061
- type: v_measures
value:
- 1
- 0.02235188516574002
- 0.3929431892497775
- 0.5064698281428253
- 0.7755690164601873
- task:
type: Clustering
dataset:
type: masakhane/masakhanews
name: MTEB MasakhaNEWSClusteringS2S (fra)
config: fra
split: test
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60
metrics:
- type: v_measure
value: 34.0804579102654
- type: v_measures
value:
- 1
- 0.03741154081338069
- 0.3515971997960832
- 0.027612964551341854
- 0.2874011903524643
- task:
type: Classification
dataset:
type: mteb/amazon_massive_intent
name: MTEB MassiveIntentClassification (fr)
config: fr
split: test
revision: 4672e20407010da34463acc759c162ca9734bca6
metrics:
- type: accuracy
value: 66.42905178211164
- type: f1
value: 64.35827544332014
- type: f1_weighted
value: 65.57615486214955
- task:
type: Classification
dataset:
type: mteb/amazon_massive_scenario
name: MTEB MassiveScenarioClassification (fr)
config: fr
split: test
revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
metrics:
- type: accuracy
value: 70.72293207800942
- type: f1
value: 70.02549388005589
- type: f1_weighted
value: 70.47189927452128
- task:
type: Retrieval
dataset:
type: jinaai/mintakaqa
name: MTEB MintakaRetrieval (fr)
config: fr
split: test
revision: efa78cc2f74bbcd21eff2261f9e13aebe40b814e
metrics:
- type: map_at_1
value: 19.41
- type: map_at_10
value: 28.254
- type: map_at_100
value: 29.355999999999998
- type: map_at_1000
value: 29.444
- type: map_at_20
value: 28.918
- type: map_at_3
value: 25.833000000000002
- type: map_at_5
value: 27.141
- type: mrr_at_1
value: 19.41031941031941
- type: mrr_at_10
value: 28.254309504309553
- type: mrr_at_100
value: 29.35551984998816
- type: mrr_at_1000
value: 29.44409433115412
- type: mrr_at_20
value: 28.91784141291608
- type: mrr_at_3
value: 25.832650832650856
- type: mrr_at_5
value: 27.141004641004695
- type: nauc_map_at_1000_diff1
value: 21.162510480664746
- type: nauc_map_at_1000_max
value: 30.110199608609662
- type: nauc_map_at_100_diff1
value: 21.14297046777571
- type: nauc_map_at_100_max
value: 30.122604363568612
- type: nauc_map_at_10_diff1
value: 21.199056431758258
- type: nauc_map_at_10_max
value: 30.270246884757647
- type: nauc_map_at_1_diff1
value: 26.23217963475422
- type: nauc_map_at_1_max
value: 25.259209130612533
- type: nauc_map_at_20_diff1
value: 21.13743075881012
- type: nauc_map_at_20_max
value: 30.22788200140141
- type: nauc_map_at_3_diff1
value: 21.972972142629406
- type: nauc_map_at_3_max
value: 29.75552354821982
- type: nauc_map_at_5_diff1
value: 21.79084317530715
- type: nauc_map_at_5_max
value: 30.387209515342473
- type: nauc_mrr_at_1000_diff1
value: 21.162510480664746
- type: nauc_mrr_at_1000_max
value: 30.110199608609662
- type: nauc_mrr_at_100_diff1
value: 21.14297046777571
- type: nauc_mrr_at_100_max
value: 30.122604363568612
- type: nauc_mrr_at_10_diff1
value: 21.199056431758258
- type: nauc_mrr_at_10_max
value: 30.270246884757647
- type: nauc_mrr_at_1_diff1
value: 26.23217963475422
- type: nauc_mrr_at_1_max
value: 25.259209130612533
- type: nauc_mrr_at_20_diff1
value: 21.13743075881012
- type: nauc_mrr_at_20_max
value: 30.22788200140141
- type: nauc_mrr_at_3_diff1
value: 21.972972142629406
- type: nauc_mrr_at_3_max
value: 29.75552354821982
- type: nauc_mrr_at_5_diff1
value: 21.79084317530715
- type: nauc_mrr_at_5_max
value: 30.387209515342473
- type: nauc_ndcg_at_1000_diff1
value: 19.513865750647934
- type: nauc_ndcg_at_1000_max
value: 30.88832578481811
- type: nauc_ndcg_at_100_diff1
value: 18.80377362018204
- type: nauc_ndcg_at_100_max
value: 30.952837388928288
- type: nauc_ndcg_at_10_diff1
value: 19.020409577228836
- type: nauc_ndcg_at_10_max
value: 31.70346401198393
- type: nauc_ndcg_at_1_diff1
value: 26.23217963475422
- type: nauc_ndcg_at_1_max
value: 25.259209130612533
- type: nauc_ndcg_at_20_diff1
value: 18.76950183960116
- type: nauc_ndcg_at_20_max
value: 31.598953492190745
- type: nauc_ndcg_at_3_diff1
value: 20.824179655562357
- type: nauc_ndcg_at_3_max
value: 31.0541305570042
- type: nauc_ndcg_at_5_diff1
value: 20.483978673834002
- type: nauc_ndcg_at_5_max
value: 32.10721692420019
- type: nauc_precision_at_1000_diff1
value: 2.2196094973599374
- type: nauc_precision_at_1000_max
value: 40.25789000420308
- type: nauc_precision_at_100_diff1
value: 8.63682048375218
- type: nauc_precision_at_100_max
value: 32.32052516290328
- type: nauc_precision_at_10_diff1
value: 12.717058324435426
- type: nauc_precision_at_10_max
value: 35.265251512978985
- type: nauc_precision_at_1_diff1
value: 26.23217963475422
- type: nauc_precision_at_1_max
value: 25.259209130612533
- type: nauc_precision_at_20_diff1
value: 11.220279583941753
- type: nauc_precision_at_20_max
value: 35.20392547315769
- type: nauc_precision_at_3_diff1
value: 17.863403819374867
- type: nauc_precision_at_3_max
value: 34.42645261456197
- type: nauc_precision_at_5_diff1
value: 17.114444816553625
- type: nauc_precision_at_5_max
value: 36.59616112935629
- type: nauc_recall_at_1000_diff1
value: 2.219609497359936
- type: nauc_recall_at_1000_max
value: 40.257890004202366
- type: nauc_recall_at_100_diff1
value: 8.63682048375225
- type: nauc_recall_at_100_max
value: 32.320525162903365
- type: nauc_recall_at_10_diff1
value: 12.71705832443547
- type: nauc_recall_at_10_max
value: 35.26525151297903
- type: nauc_recall_at_1_diff1
value: 26.23217963475422
- type: nauc_recall_at_1_max
value: 25.259209130612533
- type: nauc_recall_at_20_diff1
value: 11.220279583941677
- type: nauc_recall_at_20_max
value: 35.203925473157646
- type: nauc_recall_at_3_diff1
value: 17.86340381937489
- type: nauc_recall_at_3_max
value: 34.42645261456199
- type: nauc_recall_at_5_diff1
value: 17.11444481655362
- type: nauc_recall_at_5_max
value: 36.59616112935631
- type: ndcg_at_1
value: 19.41
- type: ndcg_at_10
value: 32.956
- type: ndcg_at_100
value: 38.504
- type: ndcg_at_1000
value: 41.217
- type: ndcg_at_20
value: 35.35
- type: ndcg_at_3
value: 27.898
- type: ndcg_at_5
value: 30.249
- type: precision_at_1
value: 19.41
- type: precision_at_10
value: 4.795
- type: precision_at_100
value: 0.744
- type: precision_at_1000
value: 0.096
- type: precision_at_20
value: 2.869
- type: precision_at_3
value: 11.289
- type: precision_at_5
value: 7.912
- type: recall_at_1
value: 19.41
- type: recall_at_10
value: 47.952
- type: recall_at_100
value: 74.36500000000001
- type: recall_at_1000
value: 96.478
- type: recall_at_20
value: 57.371
- type: recall_at_3
value: 33.866
- type: recall_at_5
value: 39.558
- task:
type: PairClassification
dataset:
type: GEM/opusparcus
name: MTEB OpusparcusPC (fr)
config: fr
split: test
revision: 9e9b1f8ef51616073f47f306f7f47dd91663f86a
metrics:
- type: cos_sim_accuracy
value: 84.6049046321526
- type: cos_sim_ap
value: 94.73385323002613
- type: cos_sim_f1
value: 89.28571428571428
- type: cos_sim_precision
value: 82.8377230246389
- type: cos_sim_recall
value: 96.8222442899702
- type: dot_accuracy
value: 84.6049046321526
- type: dot_ap
value: 94.73385323002613
- type: dot_f1
value: 89.28571428571428
- type: dot_precision
value: 82.8377230246389
- type: dot_recall
value: 96.8222442899702
- type: euclidean_accuracy
value: 84.6049046321526
- type: euclidean_ap
value: 94.73385323002613
- type: euclidean_f1
value: 89.28571428571428
- type: euclidean_precision
value: 82.8377230246389
- type: euclidean_recall
value: 96.8222442899702
- type: manhattan_accuracy
value: 84.46866485013624
- type: manhattan_ap
value: 94.6952667850496
- type: manhattan_f1
value: 89.20454545454545
- type: manhattan_precision
value: 85.24886877828054
- type: manhattan_recall
value: 93.545183714002
- type: max_accuracy
value: 84.6049046321526
- type: max_ap
value: 94.73385323002613
- type: max_f1
value: 89.28571428571428
- task:
type: PairClassification
dataset:
type: paws-x
name: MTEB PawsX (fr)
config: fr
split: test
revision: 8a04d940a42cd40658986fdd8e3da561533a3646
metrics:
- type: cos_sim_accuracy
value: 64.25
- type: cos_sim_ap
value: 64.3383111759356
- type: cos_sim_f1
value: 63.03703703703704
- type: cos_sim_precision
value: 47.35670562047857
- type: cos_sim_recall
value: 94.24141749723145
- type: dot_accuracy
value: 64.25
- type: dot_ap
value: 64.33647413039195
- type: dot_f1
value: 63.03703703703704
- type: dot_precision
value: 47.35670562047857
- type: dot_recall
value: 94.24141749723145
- type: euclidean_accuracy
value: 64.25
- type: euclidean_ap
value: 64.33837256418407
- type: euclidean_f1
value: 63.03703703703704
- type: euclidean_precision
value: 47.35670562047857
- type: euclidean_recall
value: 94.24141749723145
- type: manhattan_accuracy
value: 64.3
- type: manhattan_ap
value: 64.32795078191543
- type: manhattan_f1
value: 62.935507783543365
- type: manhattan_precision
value: 47.298050139275766
- type: manhattan_recall
value: 94.01993355481729
- type: max_accuracy
value: 64.3
- type: max_ap
value: 64.33837256418407
- type: max_f1
value: 63.03703703703704
- task:
type: STS
dataset:
type: Lajavaness/SICK-fr
name: MTEB SICKFr
config: default
split: test
revision: e077ab4cf4774a1e36d86d593b150422fafd8e8a
metrics:
- type: cos_sim_pearson
value: 84.72950355896789
- type: cos_sim_spearman
value: 79.4608290812696
- type: euclidean_pearson
value: 81.32539142627735
- type: euclidean_spearman
value: 79.46019403205177
- type: manhattan_pearson
value: 81.35998006674681
- type: manhattan_spearman
value: 79.41438354042496
- task:
type: STS
dataset:
type: mteb/sts22-crosslingual-sts
name: MTEB STS22 (fr)
config: fr
split: test
revision: eea2b4fe26a775864c896887d910b76a8098ad3f
metrics:
- type: cos_sim_pearson
value: 81.94721498063055
- type: cos_sim_spearman
value: 83.27561639335909
- type: euclidean_pearson
value: 80.74250472409508
- type: euclidean_spearman
value: 83.27561639335909
- type: manhattan_pearson
value: 80.55336080634422
- type: manhattan_spearman
value: 83.46556509775091
- task:
type: STS
dataset:
type: PhilipMay/stsb_multi_mt
name: MTEB STSBenchmarkMultilingualSTS (fr)
config: fr
split: test
revision: 93d57ef91790589e3ce9c365164337a8a78b7632
metrics:
- type: cos_sim_pearson
value: 86.42208373352562
- type: cos_sim_spearman
value: 86.99991276887566
- type: euclidean_pearson
value: 85.50325028600815
- type: euclidean_spearman
value: 87.00166758198344
- type: manhattan_pearson
value: 85.51048739822163
- type: manhattan_spearman
value: 86.98373812309134
- task:
type: Summarization
dataset:
type: lyon-nlp/summarization-summeval-fr-p2p
name: MTEB SummEvalFr
config: default
split: test
revision: b385812de6a9577b6f4d0f88c6a6e35395a94054
metrics:
- type: cos_sim_pearson
value: 31.911797754639164
- type: cos_sim_spearman
value: 32.17186521965941
- type: dot_pearson
value: 31.911796813216963
- type: dot_spearman
value: 32.17186521965941
- task:
type: Reranking
dataset:
type: lyon-nlp/mteb-fr-reranking-syntec-s2p
name: MTEB SyntecReranking
config: default
split: test
revision: daf0863838cd9e3ba50544cdce3ac2b338a1b0ad
metrics:
- type: map
value: 86.04285714285714
- type: mrr
value: 86.04285714285714
- type: nAUC_map_diff1
value: 67.26948440486838
- type: nAUC_map_max
value: 1.8106095852919237
- type: nAUC_mrr_diff1
value: 67.26948440486838
- type: nAUC_mrr_max
value: 1.8106095852919237
- task:
type: Retrieval
dataset:
type: lyon-nlp/mteb-fr-retrieval-syntec-s2p
name: MTEB SyntecRetrieval
config: default
split: test
revision: 19661ccdca4dfc2d15122d776b61685f48c68ca9
metrics:
- type: map_at_1
value: 72
- type: map_at_10
value: 81.294
- type: map_at_100
value: 81.428
- type: map_at_1000
value: 81.428
- type: map_at_20
value: 81.38499999999999
- type: map_at_3
value: 79.833
- type: map_at_5
value: 80.88300000000001
- type: mrr_at_1
value: 72
- type: mrr_at_10
value: 81.2940476190476
- type: mrr_at_100
value: 81.42843497082626
- type: mrr_at_1000
value: 81.42843497082626
- type: mrr_at_20
value: 81.3849567099567
- type: mrr_at_3
value: 79.83333333333334
- type: mrr_at_5
value: 80.88333333333333
- type: nauc_map_at_1000_diff1
value: 49.54228699135176
- type: nauc_map_at_1000_max
value: 0.06067639126124744
- type: nauc_map_at_100_diff1
value: 49.54228699135176
- type: nauc_map_at_100_max
value: 0.06067639126124744
- type: nauc_map_at_10_diff1
value: 49.53852081026105
- type: nauc_map_at_10_max
value: 0.3053366025276881
- type: nauc_map_at_1_diff1
value: 46.31028693528697
- type: nauc_map_at_1_max
value: -1.3144841269841376
- type: nauc_map_at_20_diff1
value: 49.506524393346865
- type: nauc_map_at_20_max
value: -0.08620516816116486
- type: nauc_map_at_3_diff1
value: 51.19253694434259
- type: nauc_map_at_3_max
value: 0.668963341320456
- type: nauc_map_at_5_diff1
value: 49.37737335974092
- type: nauc_map_at_5_max
value: -0.33590080679527184
- type: nauc_mrr_at_1000_diff1
value: 49.54228699135176
- type: nauc_mrr_at_1000_max
value: 0.06067639126124744
- type: nauc_mrr_at_100_diff1
value: 49.54228699135176
- type: nauc_mrr_at_100_max
value: 0.06067639126124744
- type: nauc_mrr_at_10_diff1
value: 49.53852081026105
- type: nauc_mrr_at_10_max
value: 0.3053366025276881
- type: nauc_mrr_at_1_diff1
value: 46.31028693528697
- type: nauc_mrr_at_1_max
value: -1.3144841269841376
- type: nauc_mrr_at_20_diff1
value: 49.506524393346865
- type: nauc_mrr_at_20_max
value: -0.08620516816116486
- type: nauc_mrr_at_3_diff1
value: 51.19253694434259
- type: nauc_mrr_at_3_max
value: 0.668963341320456
- type: nauc_mrr_at_5_diff1
value: 49.37737335974092
- type: nauc_mrr_at_5_max
value: -0.33590080679527184
- type: nauc_ndcg_at_1000_diff1
value: 49.6848382380357
- type: nauc_ndcg_at_1000_max
value: 0.04870501937096382
- type: nauc_ndcg_at_100_diff1
value: 49.6848382380357
- type: nauc_ndcg_at_100_max
value: 0.04870501937096382
- type: nauc_ndcg_at_10_diff1
value: 49.57645777272915
- type: nauc_ndcg_at_10_max
value: 0.6430420679440534
- type: nauc_ndcg_at_1_diff1
value: 46.31028693528697
- type: nauc_ndcg_at_1_max
value: -1.3144841269841376
- type: nauc_ndcg_at_20_diff1
value: 49.45017977018584
- type: nauc_ndcg_at_20_max
value: -0.9049646537819854
- type: nauc_ndcg_at_3_diff1
value: 52.898658060430904
- type: nauc_ndcg_at_3_max
value: 1.3070987858400047
- type: nauc_ndcg_at_5_diff1
value: 49.054456158711595
- type: nauc_ndcg_at_5_max
value: -1.177736876794348
- type: nauc_precision_at_1000_diff1
value: nan
- type: nauc_precision_at_1000_max
value: nan
- type: nauc_precision_at_100_diff1
value: nan
- type: nauc_precision_at_100_max
value: nan
- type: nauc_precision_at_10_diff1
value: 45.611577964519334
- type: nauc_precision_at_10_max
value: 7.936507936508234
- type: nauc_precision_at_1_diff1
value: 46.31028693528697
- type: nauc_precision_at_1_max
value: -1.3144841269841376
- type: nauc_precision_at_20_diff1
value: 35.80765639589114
- type: nauc_precision_at_20_max
value: -56.34920634920767
- type: nauc_precision_at_3_diff1
value: 61.56395891690006
- type: nauc_precision_at_3_max
value: 4.509803921568394
- type: nauc_precision_at_5_diff1
value: 43.15592903828254
- type: nauc_precision_at_5_max
value: -11.783380018673482
- type: nauc_recall_at_1000_diff1
value: nan
- type: nauc_recall_at_1000_max
value: nan
- type: nauc_recall_at_100_diff1
value: nan
- type: nauc_recall_at_100_max
value: nan
- type: nauc_recall_at_10_diff1
value: 45.61157796451899
- type: nauc_recall_at_10_max
value: 7.9365079365084235
- type: nauc_recall_at_1_diff1
value: 46.31028693528697
- type: nauc_recall_at_1_max
value: -1.3144841269841376
- type: nauc_recall_at_20_diff1
value: 35.80765639589109
- type: nauc_recall_at_20_max
value: -56.34920634920657
- type: nauc_recall_at_3_diff1
value: 61.5639589169002
- type: nauc_recall_at_3_max
value: 4.509803921568655
- type: nauc_recall_at_5_diff1
value: 43.15592903828185
- type: nauc_recall_at_5_max
value: -11.783380018674132
- type: ndcg_at_1
value: 72
- type: ndcg_at_10
value: 85.39999999999999
- type: ndcg_at_100
value: 85.897
- type: ndcg_at_1000
value: 85.897
- type: ndcg_at_20
value: 85.679
- type: ndcg_at_3
value: 82.44
- type: ndcg_at_5
value: 84.418
- type: precision_at_1
value: 72
- type: precision_at_10
value: 9.8
- type: precision_at_100
value: 1
- type: precision_at_1000
value: 0.1
- type: precision_at_20
value: 4.95
- type: precision_at_3
value: 30
- type: precision_at_5
value: 19
- type: recall_at_1
value: 72
- type: recall_at_10
value: 98
- type: recall_at_100
value: 100
- type: recall_at_1000
value: 100
- type: recall_at_20
value: 99
- type: recall_at_3
value: 90
- type: recall_at_5
value: 95
- task:
type: Retrieval
dataset:
type: jinaai/xpqa
name: MTEB XPQARetrieval (fr)
config: fr
split: test
revision: c99d599f0a6ab9b85b065da6f9d94f9cf731679f
metrics:
- type: map_at_1
value: 39.007999999999996
- type: map_at_10
value: 60.319
- type: map_at_100
value: 61.644
- type: map_at_1000
value: 61.712
- type: map_at_20
value: 61.053000000000004
- type: map_at_3
value: 53.942
- type: map_at_5
value: 58.132
- type: mrr_at_1
value: 60.747663551401864
- type: mrr_at_10
value: 68.10636404094345
- type: mrr_at_100
value: 68.57873500135119
- type: mrr_at_1000
value: 68.60183171580495
- type: mrr_at_20
value: 68.36478690417064
- type: mrr_at_3
value: 66.08811748998662
- type: mrr_at_5
value: 67.12950600801062
- type: nauc_map_at_1000_diff1
value: 46.633293504114434
- type: nauc_map_at_1000_max
value: 49.64101214126472
- type: nauc_map_at_100_diff1
value: 46.591074735810764
- type: nauc_map_at_100_max
value: 49.60435205919251
- type: nauc_map_at_10_diff1
value: 46.43486044009182
- type: nauc_map_at_10_max
value: 49.222376322201065
- type: nauc_map_at_1_diff1
value: 54.05448738315762
- type: nauc_map_at_1_max
value: 24.89423418246206
- type: nauc_map_at_20_diff1
value: 46.5131815367993
- type: nauc_map_at_20_max
value: 49.59934686413147
- type: nauc_map_at_3_diff1
value: 48.9688624037045
- type: nauc_map_at_3_max
value: 42.49957358403678
- type: nauc_map_at_5_diff1
value: 46.99183039261338
- type: nauc_map_at_5_max
value: 47.53639129265315
- type: nauc_mrr_at_1000_diff1
value: 55.19161872624107
- type: nauc_mrr_at_1000_max
value: 57.00358990989949
- type: nauc_mrr_at_100_diff1
value: 55.18922613003231
- type: nauc_mrr_at_100_max
value: 56.99910514727928
- type: nauc_mrr_at_10_diff1
value: 55.030904237371224
- type: nauc_mrr_at_10_max
value: 56.99207425207498
- type: nauc_mrr_at_1_diff1
value: 57.34582863547949
- type: nauc_mrr_at_1_max
value: 56.98349812853321
- type: nauc_mrr_at_20_diff1
value: 55.0844531407365
- type: nauc_mrr_at_20_max
value: 57.00264208604279
- type: nauc_mrr_at_3_diff1
value: 55.64735827895618
- type: nauc_mrr_at_3_max
value: 57.29703659670222
- type: nauc_mrr_at_5_diff1
value: 55.38932070005733
- type: nauc_mrr_at_5_max
value: 57.25407452051235
- type: nauc_ndcg_at_1000_diff1
value: 48.35929794131023
- type: nauc_ndcg_at_1000_max
value: 52.522778834890325
- type: nauc_ndcg_at_100_diff1
value: 47.56861995950162
- type: nauc_ndcg_at_100_max
value: 51.88529740719871
- type: nauc_ndcg_at_10_diff1
value: 46.87985909159253
- type: nauc_ndcg_at_10_max
value: 51.38385177949589
- type: nauc_ndcg_at_1_diff1
value: 57.34582863547949
- type: nauc_ndcg_at_1_max
value: 56.98349812853321
- type: nauc_ndcg_at_20_diff1
value: 47.02262106267654
- type: nauc_ndcg_at_20_max
value: 51.98897867452814
- type: nauc_ndcg_at_3_diff1
value: 49.03406876232345
- type: nauc_ndcg_at_3_max
value: 51.51024175391598
- type: nauc_ndcg_at_5_diff1
value: 47.91197516404686
- type: nauc_ndcg_at_5_max
value: 49.81634328349405
- type: nauc_precision_at_1000_diff1
value: -17.71701381388244
- type: nauc_precision_at_1000_max
value: 21.388531798131364
- type: nauc_precision_at_100_diff1
value: -15.539336210367946
- type: nauc_precision_at_100_max
value: 24.698569356368356
- type: nauc_precision_at_10_diff1
value: -4.847564718245167
- type: nauc_precision_at_10_max
value: 36.89232193362945
- type: nauc_precision_at_1_diff1
value: 57.34582863547949
- type: nauc_precision_at_1_max
value: 56.98349812853321
- type: nauc_precision_at_20_diff1
value: -8.665616069723095
- type: nauc_precision_at_20_max
value: 33.10610080847679
- type: nauc_precision_at_3_diff1
value: 11.05940284130611
- type: nauc_precision_at_3_max
value: 46.70869419036014
- type: nauc_precision_at_5_diff1
value: 1.5834393191629197
- type: nauc_precision_at_5_max
value: 41.862527913783865
- type: nauc_recall_at_1000_diff1
value: -10.569638499832076
- type: nauc_recall_at_1000_max
value: 8.749844537815326
- type: nauc_recall_at_100_diff1
value: 27.487071443233262
- type: nauc_recall_at_100_max
value: 35.4122293490153
- type: nauc_recall_at_10_diff1
value: 35.91432645476102
- type: nauc_recall_at_10_max
value: 44.19204765665408
- type: nauc_recall_at_1_diff1
value: 54.05448738315762
- type: nauc_recall_at_1_max
value: 24.89423418246206
- type: nauc_recall_at_20_diff1
value: 34.4589376419412
- type: nauc_recall_at_20_max
value: 44.798804712797136
- type: nauc_recall_at_3_diff1
value: 45.60515874084114
- type: nauc_recall_at_3_max
value: 39.109212749550764
- type: nauc_recall_at_5_diff1
value: 41.35313462606428
- type: nauc_recall_at_5_max
value: 43.39841866434438
- type: ndcg_at_1
value: 60.748000000000005
- type: ndcg_at_10
value: 66.232
- type: ndcg_at_100
value: 70.745
- type: ndcg_at_1000
value: 71.87
- type: ndcg_at_20
value: 67.977
- type: ndcg_at_3
value: 61.06399999999999
- type: ndcg_at_5
value: 62.744
- type: precision_at_1
value: 60.748000000000005
- type: precision_at_10
value: 15.434000000000001
- type: precision_at_100
value: 1.924
- type: precision_at_1000
value: 0.207
- type: precision_at_20
value: 8.378
- type: precision_at_3
value: 37.161
- type: precision_at_5
value: 26.916
- type: recall_at_1
value: 39.007999999999996
- type: recall_at_10
value: 74.929
- type: recall_at_100
value: 92.508
- type: recall_at_1000
value: 99.8
- type: recall_at_20
value: 80.343
- type: recall_at_3
value: 58.367000000000004
- type: recall_at_5
value: 66.446
license: apache-2.0
language:
- fr
- en
bilingual-embedding-large
Bilingual-embedding is the Embedding Model for bilingual language: french and english. This model is a specialized sentence-embedding trained specifically for the bilingual language, leveraging the robust capabilities of XLM-RoBERTa, a pre-trained language model based on the XLM-RoBERTa architecture. The model utilizes xlm-roberta to encode english-french sentences into a 1024-dimensional vector space, facilitating a wide range of applications from semantic search to text clustering. The embeddings capture the nuanced meanings of english-french sentences, reflecting both the lexical and contextual layers of the language.
Full Model Architecture
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BilingualModel
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
(2): Normalize()
)
Training and Fine-tuning process
Stage 1: NLI Training
- Dataset: [(SNLI+XNLI) for english+french]
- Method: Training using Multi-Negative Ranking Loss. This stage focused on improving the model's ability to discern and rank nuanced differences in sentence semantics.
Stage 3: Continued Fine-tuning for Semantic Textual Similarity on STS Benchmark
- Dataset: [STSB-fr and en]
- Method: Fine-tuning specifically for the semantic textual similarity benchmark using Siamese BERT-Networks configured with the 'sentence-transformers' library.
Stage 4: Advanced Augmentation Fine-tuning
- Dataset: STSB with generate silver sample from gold sample
- Method: Employed an advanced strategy using Augmented SBERT with Pair Sampling Strategies, integrating both Cross-Encoder and Bi-Encoder models. This stage further refined the embeddings by enriching the training data dynamically, enhancing the model's robustness and accuracy.
Usage:
Using this model becomes easy when you have sentence-transformers installed:
pip install -U sentence-transformers
Then you can use the model like this:
from sentence_transformers import SentenceTransformer
sentences = ["Paris est une capitale de la France", "Paris is a capital of France"]
model = SentenceTransformer('Lajavaness/bilingual-embedding-large', trust_remote_code=True)
print(embeddings)
Evaluation
TODO
Citation
@article{conneau2019unsupervised,
title={Unsupervised cross-lingual representation learning at scale},
author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin},
journal={arXiv preprint arXiv:1911.02116},
year={2019}
}
@article{reimers2019sentence,
title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
author={Nils Reimers, Iryna Gurevych},
journal={https://arxiv.org/abs/1908.10084},
year={2019}
}
@article{thakur2020augmented,
title={Augmented SBERT: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks},
author={Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna},
journal={arXiv e-prints},
pages={arXiv--2010},
year={2020}