|
--- |
|
model-index: |
|
- name: FRIDA |
|
results: |
|
- dataset: |
|
config: default |
|
name: MTEB CEDRClassification (default) |
|
revision: c0ba03d058e3e1b2f3fd20518875a4563dd12db4 |
|
split: test |
|
type: ai-forever/cedr-classification |
|
metrics: |
|
- type: accuracy |
|
value: 64.60148777895856 |
|
- type: f1 |
|
value: 70.36630348039266 |
|
- type: lrap |
|
value: 92.47290116896953 |
|
- type: main_score |
|
value: 64.60148777895856 |
|
task: |
|
type: MultilabelClassification |
|
- dataset: |
|
config: default |
|
name: MTEB GeoreviewClassification (default) |
|
revision: 3765c0d1de6b7d264bc459433c45e5a75513839c |
|
split: test |
|
type: ai-forever/georeview-classification |
|
metrics: |
|
- type: accuracy |
|
value: 57.70996093750001 |
|
- type: f1 |
|
value: 53.18542982057098 |
|
- type: f1_weighted |
|
value: 53.17663229582108 |
|
- type: main_score |
|
value: 57.70996093750001 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB GeoreviewClusteringP2P (default) |
|
revision: 97a313c8fc85b47f13f33e7e9a95c1ad888c7fec |
|
split: test |
|
type: ai-forever/georeview-clustering-p2p |
|
metrics: |
|
- type: main_score |
|
value: 78.25468393043356 |
|
- type: v_measure |
|
value: 78.25468393043356 |
|
- type: v_measure_std |
|
value: 0.5094366871364238 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB HeadlineClassification (default) |
|
revision: 2fe05ee6b5832cda29f2ef7aaad7b7fe6a3609eb |
|
split: test |
|
type: ai-forever/headline-classification |
|
metrics: |
|
- type: accuracy |
|
value: 89.0185546875 |
|
- type: f1 |
|
value: 88.993933120612 |
|
- type: f1_weighted |
|
value: 88.99276764225768 |
|
- type: main_score |
|
value: 89.0185546875 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB InappropriatenessClassification (default) |
|
revision: 601651fdc45ef243751676e62dd7a19f491c0285 |
|
split: test |
|
type: ai-forever/inappropriateness-classification |
|
metrics: |
|
- type: accuracy |
|
value: 78.330078125 |
|
- type: ap |
|
value: 73.17856750532495 |
|
- type: ap_weighted |
|
value: 73.17856750532495 |
|
- type: f1 |
|
value: 78.20169867599041 |
|
- type: f1_weighted |
|
value: 78.20169867599041 |
|
- type: main_score |
|
value: 78.330078125 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB KinopoiskClassification (default) |
|
revision: 5911f26666ac11af46cb9c6849d0dc80a378af24 |
|
split: test |
|
type: ai-forever/kinopoisk-sentiment-classification |
|
metrics: |
|
- type: accuracy |
|
value: 70.46666666666665 |
|
- type: f1 |
|
value: 65.83951766538878 |
|
- type: f1_weighted |
|
value: 65.83951766538878 |
|
- type: main_score |
|
value: 70.46666666666665 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ru |
|
name: MTEB MIRACLReranking (ru) |
|
revision: 6d1962c527217f8927fca80f890f14f36b2802af |
|
split: dev |
|
type: miracl/mmteb-miracl-reranking |
|
metrics: |
|
- type: MAP@1(MIRACL) |
|
value: 39.023 |
|
- type: MAP@10(MIRACL) |
|
value: 60.208 |
|
- type: MAP@100(MIRACL) |
|
value: 61.672000000000004 |
|
- type: MAP@1000(MIRACL) |
|
value: 61.672000000000004 |
|
- type: MAP@20(MIRACL) |
|
value: 61.30799999999999 |
|
- type: MAP@3(MIRACL) |
|
value: 53.33 |
|
- type: MAP@5(MIRACL) |
|
value: 57.289 |
|
- type: NDCG@1(MIRACL) |
|
value: 63.352 |
|
- type: NDCG@10(MIRACL) |
|
value: 66.042 |
|
- type: NDCG@100(MIRACL) |
|
value: 68.702 |
|
- type: NDCG@1000(MIRACL) |
|
value: 68.702 |
|
- type: NDCG@20(MIRACL) |
|
value: 67.768 |
|
- type: NDCG@3(MIRACL) |
|
value: 61.925 |
|
- type: NDCG@5(MIRACL) |
|
value: 63.327 |
|
- type: P@1(MIRACL) |
|
value: 63.352 |
|
- type: P@10(MIRACL) |
|
value: 16.512 |
|
- type: P@100(MIRACL) |
|
value: 1.9529999999999998 |
|
- type: P@1000(MIRACL) |
|
value: 0.19499999999999998 |
|
- type: P@20(MIRACL) |
|
value: 9.13 |
|
- type: P@3(MIRACL) |
|
value: 37.878 |
|
- type: P@5(MIRACL) |
|
value: 27.586 |
|
- type: Recall@1(MIRACL) |
|
value: 39.023 |
|
- type: Recall@10(MIRACL) |
|
value: 72.35000000000001 |
|
- type: Recall@100(MIRACL) |
|
value: 79.952 |
|
- type: Recall@1000(MIRACL) |
|
value: 79.952 |
|
- type: Recall@20(MIRACL) |
|
value: 76.828 |
|
- type: Recall@3(MIRACL) |
|
value: 57.769999999999996 |
|
- type: Recall@5(MIRACL) |
|
value: 64.91900000000001 |
|
- type: main_score |
|
value: 66.042 |
|
- type: nAUC_MAP@1000_diff1(MIRACL) |
|
value: 27.150388833033052 |
|
- type: nAUC_MAP@1000_max(MIRACL) |
|
value: 55.15672274267081 |
|
- type: nAUC_MAP@1000_std(MIRACL) |
|
value: 30.088939934575553 |
|
- type: nAUC_MAP@100_diff1(MIRACL) |
|
value: 27.150388833033052 |
|
- type: nAUC_MAP@100_max(MIRACL) |
|
value: 55.15672274267081 |
|
- type: nAUC_MAP@100_std(MIRACL) |
|
value: 30.088939934575553 |
|
- type: nAUC_MAP@10_diff1(MIRACL) |
|
value: 27.853691773641742 |
|
- type: nAUC_MAP@10_max(MIRACL) |
|
value: 52.89390350055654 |
|
- type: nAUC_MAP@10_std(MIRACL) |
|
value: 28.08732516551691 |
|
- type: nAUC_MAP@1_diff1(MIRACL) |
|
value: 43.23179150244192 |
|
- type: nAUC_MAP@1_max(MIRACL) |
|
value: 29.923943954188864 |
|
- type: nAUC_MAP@1_std(MIRACL) |
|
value: 7.447084370195121 |
|
- type: nAUC_MAP@20_diff1(MIRACL) |
|
value: 27.328384072311675 |
|
- type: nAUC_MAP@20_max(MIRACL) |
|
value: 54.60286379835721 |
|
- type: nAUC_MAP@20_std(MIRACL) |
|
value: 29.8084128980043 |
|
- type: nAUC_MAP@3_diff1(MIRACL) |
|
value: 31.244971536944554 |
|
- type: nAUC_MAP@3_max(MIRACL) |
|
value: 43.63984692803854 |
|
- type: nAUC_MAP@3_std(MIRACL) |
|
value: 18.609234683765887 |
|
- type: nAUC_MAP@5_diff1(MIRACL) |
|
value: 29.088760492638286 |
|
- type: nAUC_MAP@5_max(MIRACL) |
|
value: 48.30474364461509 |
|
- type: nAUC_MAP@5_std(MIRACL) |
|
value: 23.817514353844224 |
|
- type: nAUC_NDCG@1000_diff1(MIRACL) |
|
value: 23.12754356408408 |
|
- type: nAUC_NDCG@1000_max(MIRACL) |
|
value: 64.24894553363303 |
|
- type: nAUC_NDCG@1000_std(MIRACL) |
|
value: 38.19318050598967 |
|
- type: nAUC_NDCG@100_diff1(MIRACL) |
|
value: 23.12754356408408 |
|
- type: nAUC_NDCG@100_max(MIRACL) |
|
value: 64.24894553363303 |
|
- type: nAUC_NDCG@100_std(MIRACL) |
|
value: 38.19318050598967 |
|
- type: nAUC_NDCG@10_diff1(MIRACL) |
|
value: 24.779856373697275 |
|
- type: nAUC_NDCG@10_max(MIRACL) |
|
value: 60.4054459738118 |
|
- type: nAUC_NDCG@10_std(MIRACL) |
|
value: 35.148950441182784 |
|
- type: nAUC_NDCG@1_diff1(MIRACL) |
|
value: 35.605865569438556 |
|
- type: nAUC_NDCG@1_max(MIRACL) |
|
value: 65.77787399715454 |
|
- type: nAUC_NDCG@1_std(MIRACL) |
|
value: 34.34726892885082 |
|
- type: nAUC_NDCG@20_diff1(MIRACL) |
|
value: 23.71231783125691 |
|
- type: nAUC_NDCG@20_max(MIRACL) |
|
value: 62.89676599488004 |
|
- type: nAUC_NDCG@20_std(MIRACL) |
|
value: 37.697052941884316 |
|
- type: nAUC_NDCG@3_diff1(MIRACL) |
|
value: 26.109027741640865 |
|
- type: nAUC_NDCG@3_max(MIRACL) |
|
value: 56.22356793638693 |
|
- type: nAUC_NDCG@3_std(MIRACL) |
|
value: 29.9437568508688 |
|
- type: nAUC_NDCG@5_diff1(MIRACL) |
|
value: 25.98644715327336 |
|
- type: nAUC_NDCG@5_max(MIRACL) |
|
value: 56.25032008404774 |
|
- type: nAUC_NDCG@5_std(MIRACL) |
|
value: 31.581899860862578 |
|
- type: nAUC_P@1000_diff1(MIRACL) |
|
value: -18.29912787064644 |
|
- type: nAUC_P@1000_max(MIRACL) |
|
value: 31.811344878776087 |
|
- type: nAUC_P@1000_std(MIRACL) |
|
value: 30.163820183304914 |
|
- type: nAUC_P@100_diff1(MIRACL) |
|
value: -18.299127870646405 |
|
- type: nAUC_P@100_max(MIRACL) |
|
value: 31.811344878776133 |
|
- type: nAUC_P@100_std(MIRACL) |
|
value: 30.163820183304956 |
|
- type: nAUC_P@10_diff1(MIRACL) |
|
value: -15.96416268531149 |
|
- type: nAUC_P@10_max(MIRACL) |
|
value: 36.989578896466526 |
|
- type: nAUC_P@10_std(MIRACL) |
|
value: 34.54507111688143 |
|
- type: nAUC_P@1_diff1(MIRACL) |
|
value: 35.605865569438556 |
|
- type: nAUC_P@1_max(MIRACL) |
|
value: 65.77787399715454 |
|
- type: nAUC_P@1_std(MIRACL) |
|
value: 34.34726892885082 |
|
- type: nAUC_P@20_diff1(MIRACL) |
|
value: -17.443963421383287 |
|
- type: nAUC_P@20_max(MIRACL) |
|
value: 34.309618168778385 |
|
- type: nAUC_P@20_std(MIRACL) |
|
value: 33.38820956485373 |
|
- type: nAUC_P@3_diff1(MIRACL) |
|
value: -8.533621861815652 |
|
- type: nAUC_P@3_max(MIRACL) |
|
value: 45.90408386776497 |
|
- type: nAUC_P@3_std(MIRACL) |
|
value: 34.50459351305535 |
|
- type: nAUC_P@5_diff1(MIRACL) |
|
value: -13.207968899314865 |
|
- type: nAUC_P@5_max(MIRACL) |
|
value: 40.37718282248973 |
|
- type: nAUC_P@5_std(MIRACL) |
|
value: 35.601417332196206 |
|
- type: nAUC_Recall@1000_diff1(MIRACL) |
|
value: 7.907304198177226 |
|
- type: nAUC_Recall@1000_max(MIRACL) |
|
value: 77.82197832361145 |
|
- type: nAUC_Recall@1000_std(MIRACL) |
|
value: 52.66957487246724 |
|
- type: nAUC_Recall@100_diff1(MIRACL) |
|
value: 7.907304198177226 |
|
- type: nAUC_Recall@100_max(MIRACL) |
|
value: 77.82197832361145 |
|
- type: nAUC_Recall@100_std(MIRACL) |
|
value: 52.66957487246724 |
|
- type: nAUC_Recall@10_diff1(MIRACL) |
|
value: 15.498121023488693 |
|
- type: nAUC_Recall@10_max(MIRACL) |
|
value: 62.24320529338724 |
|
- type: nAUC_Recall@10_std(MIRACL) |
|
value: 40.60221460946224 |
|
- type: nAUC_Recall@1_diff1(MIRACL) |
|
value: 43.23179150244192 |
|
- type: nAUC_Recall@1_max(MIRACL) |
|
value: 29.923943954188864 |
|
- type: nAUC_Recall@1_std(MIRACL) |
|
value: 7.447084370195121 |
|
- type: nAUC_Recall@20_diff1(MIRACL) |
|
value: 11.457044176116248 |
|
- type: nAUC_Recall@20_max(MIRACL) |
|
value: 70.3493054342368 |
|
- type: nAUC_Recall@20_std(MIRACL) |
|
value: 49.27124296325928 |
|
- type: nAUC_Recall@3_diff1(MIRACL) |
|
value: 25.12077828977941 |
|
- type: nAUC_Recall@3_max(MIRACL) |
|
value: 42.903379317937166 |
|
- type: nAUC_Recall@3_std(MIRACL) |
|
value: 20.324501722161497 |
|
- type: nAUC_Recall@5_diff1(MIRACL) |
|
value: 20.925701235197977 |
|
- type: nAUC_Recall@5_max(MIRACL) |
|
value: 49.85323960390812 |
|
- type: nAUC_Recall@5_std(MIRACL) |
|
value: 29.04484539530469 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: ru |
|
name: MTEB MIRACLRetrieval (ru) |
|
revision: main |
|
split: dev |
|
type: miracl/mmteb-miracl |
|
metrics: |
|
- type: main_score |
|
value: 71.882 |
|
- type: map_at_1 |
|
value: 37.913000000000004 |
|
- type: map_at_10 |
|
value: 62.604000000000006 |
|
- type: map_at_100 |
|
value: 64.925 |
|
- type: map_at_1000 |
|
value: 64.992 |
|
- type: map_at_20 |
|
value: 64.081 |
|
- type: map_at_3 |
|
value: 55.212 |
|
- type: map_at_5 |
|
value: 59.445 |
|
- type: mrr_at_1 |
|
value: 73.24281150159744 |
|
- type: mrr_at_10 |
|
value: 81.65043866321825 |
|
- type: mrr_at_100 |
|
value: 81.85391378818977 |
|
- type: mrr_at_1000 |
|
value: 81.85753390802569 |
|
- type: mrr_at_20 |
|
value: 81.81045606130179 |
|
- type: mrr_at_3 |
|
value: 80.56443024494146 |
|
- type: mrr_at_5 |
|
value: 81.30724174653893 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 26.962150235593356 |
|
- type: nauc_map_at_1000_max |
|
value: 29.234958037854568 |
|
- type: nauc_map_at_1000_std |
|
value: -2.4294465103633884 |
|
- type: nauc_map_at_100_diff1 |
|
value: 26.92990252114163 |
|
- type: nauc_map_at_100_max |
|
value: 29.206328533120118 |
|
- type: nauc_map_at_100_std |
|
value: -2.437371090941197 |
|
- type: nauc_map_at_10_diff1 |
|
value: 25.758265691179226 |
|
- type: nauc_map_at_10_max |
|
value: 26.949978490795317 |
|
- type: nauc_map_at_10_std |
|
value: -5.484961002106038 |
|
- type: nauc_map_at_1_diff1 |
|
value: 34.70849461278043 |
|
- type: nauc_map_at_1_max |
|
value: 12.778570893623042 |
|
- type: nauc_map_at_1_std |
|
value: -13.018292652743938 |
|
- type: nauc_map_at_20_diff1 |
|
value: 26.659923008218268 |
|
- type: nauc_map_at_20_max |
|
value: 28.341440871568185 |
|
- type: nauc_map_at_20_std |
|
value: -3.614549844913084 |
|
- type: nauc_map_at_3_diff1 |
|
value: 27.197629021438203 |
|
- type: nauc_map_at_3_max |
|
value: 20.701094874050856 |
|
- type: nauc_map_at_3_std |
|
value: -12.062992301112041 |
|
- type: nauc_map_at_5_diff1 |
|
value: 25.51793537203295 |
|
- type: nauc_map_at_5_max |
|
value: 23.80396771243794 |
|
- type: nauc_map_at_5_std |
|
value: -8.920465695323575 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 45.14819989592967 |
|
- type: nauc_mrr_at_1000_max |
|
value: 53.29202156141053 |
|
- type: nauc_mrr_at_1000_std |
|
value: 18.037336462510524 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 45.15287600228451 |
|
- type: nauc_mrr_at_100_max |
|
value: 53.29979751928615 |
|
- type: nauc_mrr_at_100_std |
|
value: 18.04996604778386 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 44.96865105944474 |
|
- type: nauc_mrr_at_10_max |
|
value: 53.53323465323092 |
|
- type: nauc_mrr_at_10_std |
|
value: 18.25001344917689 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 46.16604946873163 |
|
- type: nauc_mrr_at_1_max |
|
value: 48.573651103547874 |
|
- type: nauc_mrr_at_1_std |
|
value: 13.764871626330915 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 45.11925458479102 |
|
- type: nauc_mrr_at_20_max |
|
value: 53.35685123898342 |
|
- type: nauc_mrr_at_20_std |
|
value: 18.127344968819905 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 45.377195452730234 |
|
- type: nauc_mrr_at_3_max |
|
value: 53.35146309217089 |
|
- type: nauc_mrr_at_3_std |
|
value: 17.47105877186237 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 45.00525578771549 |
|
- type: nauc_mrr_at_5_max |
|
value: 53.76227254707128 |
|
- type: nauc_mrr_at_5_std |
|
value: 18.437290060746957 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 31.19215594457491 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 38.09555406458668 |
|
- type: nauc_ndcg_at_1000_std |
|
value: 7.225628621238009 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 30.726331247999934 |
|
- type: nauc_ndcg_at_100_max |
|
value: 37.81369589418277 |
|
- type: nauc_ndcg_at_100_std |
|
value: 7.242855238555071 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 27.514048333744835 |
|
- type: nauc_ndcg_at_10_max |
|
value: 33.10990399385253 |
|
- type: nauc_ndcg_at_10_std |
|
value: 0.3051899572112002 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 47.06089085235751 |
|
- type: nauc_ndcg_at_1_max |
|
value: 47.7300872370495 |
|
- type: nauc_ndcg_at_1_std |
|
value: 12.468605493613916 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 29.404215438764496 |
|
- type: nauc_ndcg_at_20_max |
|
value: 35.26967886796471 |
|
- type: nauc_ndcg_at_20_std |
|
value: 3.7214697890813353 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 29.448848639643067 |
|
- type: nauc_ndcg_at_3_max |
|
value: 33.85912412370657 |
|
- type: nauc_ndcg_at_3_std |
|
value: 0.895453646819452 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 26.916649012613526 |
|
- type: nauc_ndcg_at_5_max |
|
value: 30.899005979291644 |
|
- type: nauc_ndcg_at_5_std |
|
value: -1.0001575639156615 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: -8.492004667432635 |
|
- type: nauc_precision_at_1000_max |
|
value: 14.970190384017679 |
|
- type: nauc_precision_at_1000_std |
|
value: 32.871386621137816 |
|
- type: nauc_precision_at_100_diff1 |
|
value: -8.287314133999967 |
|
- type: nauc_precision_at_100_max |
|
value: 17.794821961284736 |
|
- type: nauc_precision_at_100_std |
|
value: 35.092483550562 |
|
- type: nauc_precision_at_10_diff1 |
|
value: -7.594128993028063 |
|
- type: nauc_precision_at_10_max |
|
value: 24.691446370325732 |
|
- type: nauc_precision_at_10_std |
|
value: 30.126552282608493 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 47.06089085235751 |
|
- type: nauc_precision_at_1_max |
|
value: 47.7300872370495 |
|
- type: nauc_precision_at_1_std |
|
value: 12.468605493613916 |
|
- type: nauc_precision_at_20_diff1 |
|
value: -6.503872195775146 |
|
- type: nauc_precision_at_20_max |
|
value: 21.789730053141312 |
|
- type: nauc_precision_at_20_std |
|
value: 32.61349377558794 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 0.67417079971061 |
|
- type: nauc_precision_at_3_max |
|
value: 30.793871354370662 |
|
- type: nauc_precision_at_3_std |
|
value: 18.35266479252011 |
|
- type: nauc_precision_at_5_diff1 |
|
value: -7.088881730215777 |
|
- type: nauc_precision_at_5_max |
|
value: 26.539771712769006 |
|
- type: nauc_precision_at_5_std |
|
value: 24.116262291865834 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 34.53263588412461 |
|
- type: nauc_recall_at_1000_max |
|
value: 63.54157869100173 |
|
- type: nauc_recall_at_1000_std |
|
value: 64.19854844792808 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 22.86564728642275 |
|
- type: nauc_recall_at_100_max |
|
value: 40.350507162549825 |
|
- type: nauc_recall_at_100_std |
|
value: 29.24492545863015 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 15.384818367225009 |
|
- type: nauc_recall_at_10_max |
|
value: 24.41108571453699 |
|
- type: nauc_recall_at_10_std |
|
value: -3.9216160585776323 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 34.70849461278043 |
|
- type: nauc_recall_at_1_max |
|
value: 12.778570893623042 |
|
- type: nauc_recall_at_1_std |
|
value: -13.018292652743938 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 18.122499000084208 |
|
- type: nauc_recall_at_20_max |
|
value: 26.63104220179424 |
|
- type: nauc_recall_at_20_std |
|
value: 3.969217732521512 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 21.413050725250116 |
|
- type: nauc_recall_at_3_max |
|
value: 16.18894988386887 |
|
- type: nauc_recall_at_3_std |
|
value: -15.24884339282375 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 16.35673072212927 |
|
- type: nauc_recall_at_5_max |
|
value: 18.607003829267846 |
|
- type: nauc_recall_at_5_std |
|
value: -10.463525876945454 |
|
- type: ndcg_at_1 |
|
value: 72.923 |
|
- type: ndcg_at_10 |
|
value: 71.882 |
|
- type: ndcg_at_100 |
|
value: 77.09899999999999 |
|
- type: ndcg_at_1000 |
|
value: 77.835 |
|
- type: ndcg_at_20 |
|
value: 74.497 |
|
- type: ndcg_at_3 |
|
value: 68.504 |
|
- type: ndcg_at_5 |
|
value: 69.068 |
|
- type: precision_at_1 |
|
value: 72.923 |
|
- type: precision_at_10 |
|
value: 19.936 |
|
- type: precision_at_100 |
|
value: 2.6310000000000002 |
|
- type: precision_at_1000 |
|
value: 0.27799999999999997 |
|
- type: precision_at_20 |
|
value: 11.33 |
|
- type: precision_at_3 |
|
value: 45.927 |
|
- type: precision_at_5 |
|
value: 33.131 |
|
- type: recall_at_1 |
|
value: 37.913000000000004 |
|
- type: recall_at_10 |
|
value: 78.365 |
|
- type: recall_at_100 |
|
value: 94.348 |
|
- type: recall_at_1000 |
|
value: 98.187 |
|
- type: recall_at_20 |
|
value: 85.229 |
|
- type: recall_at_3 |
|
value: 61.42999999999999 |
|
- type: recall_at_5 |
|
value: 69.56700000000001 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: ru |
|
name: MTEB MassiveIntentClassification (ru) |
|
revision: 4672e20407010da34463acc759c162ca9734bca6 |
|
split: test |
|
type: mteb/amazon_massive_intent |
|
metrics: |
|
- type: accuracy |
|
value: 79.11903160726294 |
|
- type: f1 |
|
value: 76.22609082694545 |
|
- type: f1_weighted |
|
value: 77.81461248063566 |
|
- type: main_score |
|
value: 79.11903160726294 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: ru |
|
name: MTEB MassiveScenarioClassification (ru) |
|
revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8 |
|
split: test |
|
type: mteb/amazon_massive_scenario |
|
metrics: |
|
- type: accuracy |
|
value: 88.80632145258912 |
|
- type: f1 |
|
value: 87.53157475314829 |
|
- type: f1_weighted |
|
value: 88.22733432521495 |
|
- type: main_score |
|
value: 88.80632145258912 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB RUParaPhraserSTS (default) |
|
revision: 43265056790b8f7c59e0139acb4be0a8dad2c8f4 |
|
split: test |
|
type: merionum/ru_paraphraser |
|
metrics: |
|
- type: cosine_pearson |
|
value: 72.70307124858925 |
|
- type: cosine_spearman |
|
value: 78.09439086920204 |
|
- type: euclidean_pearson |
|
value: 76.2033672014715 |
|
- type: euclidean_spearman |
|
value: 78.09439086920204 |
|
- type: main_score |
|
value: 78.09439086920204 |
|
- type: manhattan_pearson |
|
value: 76.11750470223116 |
|
- type: manhattan_spearman |
|
value: 78.01081063503413 |
|
- type: pearson |
|
value: 72.70307124858925 |
|
- type: spearman |
|
value: 78.09439086920204 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB RiaNewsRetrieval (default) |
|
revision: 82374b0bbacda6114f39ff9c5b925fa1512ca5d7 |
|
split: test |
|
type: ai-forever/ria-news-retrieval |
|
metrics: |
|
- type: main_score |
|
value: 86.819 |
|
- type: map_at_1 |
|
value: 78.79 |
|
- type: map_at_10 |
|
value: 84.516 |
|
- type: map_at_100 |
|
value: 84.68 |
|
- type: map_at_1000 |
|
value: 84.685 |
|
- type: map_at_20 |
|
value: 84.624 |
|
- type: map_at_3 |
|
value: 83.722 |
|
- type: map_at_5 |
|
value: 84.246 |
|
- type: mrr_at_1 |
|
value: 78.78 |
|
- type: mrr_at_10 |
|
value: 84.51815476190441 |
|
- type: mrr_at_100 |
|
value: 84.68390840473289 |
|
- type: mrr_at_1000 |
|
value: 84.68947095200002 |
|
- type: mrr_at_20 |
|
value: 84.62958130822527 |
|
- type: mrr_at_3 |
|
value: 83.74499999999964 |
|
- type: mrr_at_5 |
|
value: 84.23849999999955 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 82.09914867708899 |
|
- type: nauc_map_at_1000_max |
|
value: 43.02024854784386 |
|
- type: nauc_map_at_1000_std |
|
value: -22.919695880762777 |
|
- type: nauc_map_at_100_diff1 |
|
value: 82.09705922783733 |
|
- type: nauc_map_at_100_max |
|
value: 43.02697379581718 |
|
- type: nauc_map_at_100_std |
|
value: -22.90719212899522 |
|
- type: nauc_map_at_10_diff1 |
|
value: 82.04404594672894 |
|
- type: nauc_map_at_10_max |
|
value: 43.06752103182731 |
|
- type: nauc_map_at_10_std |
|
value: -23.007870153273576 |
|
- type: nauc_map_at_1_diff1 |
|
value: 83.89134152210333 |
|
- type: nauc_map_at_1_max |
|
value: 38.083626428503415 |
|
- type: nauc_map_at_1_std |
|
value: -25.817960401194252 |
|
- type: nauc_map_at_20_diff1 |
|
value: 82.08534662247806 |
|
- type: nauc_map_at_20_max |
|
value: 43.074305042312346 |
|
- type: nauc_map_at_20_std |
|
value: -22.91785703613217 |
|
- type: nauc_map_at_3_diff1 |
|
value: 81.7967508697558 |
|
- type: nauc_map_at_3_max |
|
value: 42.90927479098251 |
|
- type: nauc_map_at_3_std |
|
value: -24.01312203859392 |
|
- type: nauc_map_at_5_diff1 |
|
value: 81.90704517505098 |
|
- type: nauc_map_at_5_max |
|
value: 43.05204677044616 |
|
- type: nauc_map_at_5_std |
|
value: -23.267331507554896 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 82.11902348082472 |
|
- type: nauc_mrr_at_1000_max |
|
value: 43.04118936353063 |
|
- type: nauc_mrr_at_1000_std |
|
value: -22.858804296830773 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 82.11685562002263 |
|
- type: nauc_mrr_at_100_max |
|
value: 43.0482537895494 |
|
- type: nauc_mrr_at_100_std |
|
value: -22.84431127787993 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 82.06909958688058 |
|
- type: nauc_mrr_at_10_max |
|
value: 43.07921689466605 |
|
- type: nauc_mrr_at_10_std |
|
value: -22.957623576663234 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 83.91147637794326 |
|
- type: nauc_mrr_at_1_max |
|
value: 37.91917159543152 |
|
- type: nauc_mrr_at_1_std |
|
value: -26.141868289283266 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 82.10314004731809 |
|
- type: nauc_mrr_at_20_max |
|
value: 43.09295406509764 |
|
- type: nauc_mrr_at_20_std |
|
value: -22.862091782178787 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 81.82117067269036 |
|
- type: nauc_mrr_at_3_max |
|
value: 42.94628953323521 |
|
- type: nauc_mrr_at_3_std |
|
value: -23.852510312400714 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 81.92857441701598 |
|
- type: nauc_mrr_at_5_max |
|
value: 43.129719354492934 |
|
- type: nauc_mrr_at_5_std |
|
value: -23.145342272624085 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 81.75015729717991 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 44.7266586308995 |
|
- type: nauc_ndcg_at_1000_std |
|
value: -20.60663899715267 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 81.6897808298767 |
|
- type: nauc_ndcg_at_100_max |
|
value: 44.99492791287099 |
|
- type: nauc_ndcg_at_100_std |
|
value: -20.09637266506936 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 81.46290312197337 |
|
- type: nauc_ndcg_at_10_max |
|
value: 45.30218378452244 |
|
- type: nauc_ndcg_at_10_std |
|
value: -20.70393523891777 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 83.89134152210333 |
|
- type: nauc_ndcg_at_1_max |
|
value: 38.083626428503415 |
|
- type: nauc_ndcg_at_1_std |
|
value: -25.817960401194252 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 81.61080772657213 |
|
- type: nauc_ndcg_at_20_max |
|
value: 45.36571800492172 |
|
- type: nauc_ndcg_at_20_std |
|
value: -20.278763852504042 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 80.95965359410461 |
|
- type: nauc_ndcg_at_3_max |
|
value: 44.756971949205834 |
|
- type: nauc_ndcg_at_3_std |
|
value: -23.07797617717319 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 81.12417712163976 |
|
- type: nauc_ndcg_at_5_max |
|
value: 45.15727381406512 |
|
- type: nauc_ndcg_at_5_std |
|
value: -21.52861766165519 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: 76.80566850396093 |
|
- type: nauc_precision_at_1000_max |
|
value: 82.45685370922442 |
|
- type: nauc_precision_at_1000_std |
|
value: 46.93570976777808 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 77.21645520953484 |
|
- type: nauc_precision_at_100_max |
|
value: 73.43604108309935 |
|
- type: nauc_precision_at_100_std |
|
value: 31.978176891671367 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 77.88251664302092 |
|
- type: nauc_precision_at_10_max |
|
value: 60.58112638995018 |
|
- type: nauc_precision_at_10_std |
|
value: -3.674424315180332 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 83.89134152210333 |
|
- type: nauc_precision_at_1_max |
|
value: 38.083626428503415 |
|
- type: nauc_precision_at_1_std |
|
value: -25.817960401194252 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 78.16426786697438 |
|
- type: nauc_precision_at_20_max |
|
value: 66.0723612699222 |
|
- type: nauc_precision_at_20_std |
|
value: 6.121527084555938 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 77.43122492166451 |
|
- type: nauc_precision_at_3_max |
|
value: 52.50727288548085 |
|
- type: nauc_precision_at_3_std |
|
value: -19.036076920799427 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 77.1127254320532 |
|
- type: nauc_precision_at_5_max |
|
value: 56.100901899221135 |
|
- type: nauc_precision_at_5_std |
|
value: -12.009191140844198 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 76.80566850396035 |
|
- type: nauc_recall_at_1000_max |
|
value: 82.45685370922577 |
|
- type: nauc_recall_at_1000_std |
|
value: 46.93570976777776 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 77.21645520953459 |
|
- type: nauc_recall_at_100_max |
|
value: 73.43604108310011 |
|
- type: nauc_recall_at_100_std |
|
value: 31.978176891671993 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 77.88251664302089 |
|
- type: nauc_recall_at_10_max |
|
value: 60.58112638994999 |
|
- type: nauc_recall_at_10_std |
|
value: -3.6744243151805427 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 83.89134152210333 |
|
- type: nauc_recall_at_1_max |
|
value: 38.083626428503415 |
|
- type: nauc_recall_at_1_std |
|
value: -25.817960401194252 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 78.16426786697409 |
|
- type: nauc_recall_at_20_max |
|
value: 66.07236126992217 |
|
- type: nauc_recall_at_20_std |
|
value: 6.121527084555941 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 77.43122492166454 |
|
- type: nauc_recall_at_3_max |
|
value: 52.507272885480816 |
|
- type: nauc_recall_at_3_std |
|
value: -19.036076920799776 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 77.11272543205318 |
|
- type: nauc_recall_at_5_max |
|
value: 56.10090189922128 |
|
- type: nauc_recall_at_5_std |
|
value: -12.009191140843809 |
|
- type: ndcg_at_1 |
|
value: 78.79 |
|
- type: ndcg_at_10 |
|
value: 86.819 |
|
- type: ndcg_at_100 |
|
value: 87.599 |
|
- type: ndcg_at_1000 |
|
value: 87.761 |
|
- type: ndcg_at_20 |
|
value: 87.208 |
|
- type: ndcg_at_3 |
|
value: 85.222 |
|
- type: ndcg_at_5 |
|
value: 86.164 |
|
- type: precision_at_1 |
|
value: 78.79 |
|
- type: precision_at_10 |
|
value: 9.384 |
|
- type: precision_at_100 |
|
value: 0.975 |
|
- type: precision_at_1000 |
|
value: 0.099 |
|
- type: precision_at_20 |
|
value: 4.769 |
|
- type: precision_at_3 |
|
value: 29.842999999999996 |
|
- type: precision_at_5 |
|
value: 18.362000000000002 |
|
- type: recall_at_1 |
|
value: 78.79 |
|
- type: recall_at_10 |
|
value: 93.84 |
|
- type: recall_at_100 |
|
value: 97.45 |
|
- type: recall_at_1000 |
|
value: 98.76 |
|
- type: recall_at_20 |
|
value: 95.37 |
|
- type: recall_at_3 |
|
value: 89.53 |
|
- type: recall_at_5 |
|
value: 91.81 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB RuBQReranking (default) |
|
revision: 2e96b8f098fa4b0950fc58eacadeb31c0d0c7fa2 |
|
split: test |
|
type: ai-forever/rubq-reranking |
|
metrics: |
|
- type: main_score |
|
value: 77.07394404835635 |
|
- type: map |
|
value: 77.07394404835635 |
|
- type: mrr |
|
value: 82.53144412718882 |
|
- type: nAUC_map_diff1 |
|
value: 45.29805217456628 |
|
- type: nAUC_map_max |
|
value: 34.39894042439188 |
|
- type: nAUC_map_std |
|
value: 21.11309674418275 |
|
- type: nAUC_mrr_diff1 |
|
value: 54.783994737367046 |
|
- type: nAUC_mrr_max |
|
value: 45.68526733900048 |
|
- type: nAUC_mrr_std |
|
value: 28.22466385500339 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB RuBQRetrieval (default) |
|
revision: e19b6ffa60b3bc248e0b41f4cc37c26a55c2a67b |
|
split: test |
|
type: ai-forever/rubq-retrieval |
|
metrics: |
|
- type: main_score |
|
value: 72.392 |
|
- type: map_at_1 |
|
value: 47.370000000000005 |
|
- type: map_at_10 |
|
value: 65.503 |
|
- type: map_at_100 |
|
value: 66.38 |
|
- type: map_at_1000 |
|
value: 66.42099999999999 |
|
- type: map_at_20 |
|
value: 66.071 |
|
- type: map_at_3 |
|
value: 61.439 |
|
- type: map_at_5 |
|
value: 63.922999999999995 |
|
- type: mrr_at_1 |
|
value: 67.37588652482269 |
|
- type: mrr_at_10 |
|
value: 76.0066747345116 |
|
- type: mrr_at_100 |
|
value: 76.25754138969413 |
|
- type: mrr_at_1000 |
|
value: 76.26968825657428 |
|
- type: mrr_at_20 |
|
value: 76.17548265904622 |
|
- type: mrr_at_3 |
|
value: 74.61583924349881 |
|
- type: mrr_at_5 |
|
value: 75.46690307328608 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 42.52570720187294 |
|
- type: nauc_map_at_1000_max |
|
value: 37.40318318724238 |
|
- type: nauc_map_at_1000_std |
|
value: 0.6037788201535506 |
|
- type: nauc_map_at_100_diff1 |
|
value: 42.493410029691226 |
|
- type: nauc_map_at_100_max |
|
value: 37.39802489244377 |
|
- type: nauc_map_at_100_std |
|
value: 0.6071359951887154 |
|
- type: nauc_map_at_10_diff1 |
|
value: 42.09833519659916 |
|
- type: nauc_map_at_10_max |
|
value: 37.1184138958874 |
|
- type: nauc_map_at_10_std |
|
value: 0.4063543094010351 |
|
- type: nauc_map_at_1_diff1 |
|
value: 49.56605205141156 |
|
- type: nauc_map_at_1_max |
|
value: 26.251096698710384 |
|
- type: nauc_map_at_1_std |
|
value: -4.580748485387834 |
|
- type: nauc_map_at_20_diff1 |
|
value: 42.33372393482018 |
|
- type: nauc_map_at_20_max |
|
value: 37.416955604649985 |
|
- type: nauc_map_at_20_std |
|
value: 0.6050577802787294 |
|
- type: nauc_map_at_3_diff1 |
|
value: 42.362234475441845 |
|
- type: nauc_map_at_3_max |
|
value: 34.56001379838821 |
|
- type: nauc_map_at_3_std |
|
value: -1.507636598929042 |
|
- type: nauc_map_at_5_diff1 |
|
value: 42.0202264882535 |
|
- type: nauc_map_at_5_max |
|
value: 36.64306050200848 |
|
- type: nauc_map_at_5_std |
|
value: -0.09509025708798424 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 58.99601742026931 |
|
- type: nauc_mrr_at_1000_max |
|
value: 49.61561872452777 |
|
- type: nauc_mrr_at_1000_std |
|
value: 2.3956102974352356 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 58.9865943101085 |
|
- type: nauc_mrr_at_100_max |
|
value: 49.6248111507265 |
|
- type: nauc_mrr_at_100_std |
|
value: 2.411155095066369 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 58.81758131092919 |
|
- type: nauc_mrr_at_10_max |
|
value: 49.780365572616695 |
|
- type: nauc_mrr_at_10_std |
|
value: 2.7068696565195944 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 61.67036882487055 |
|
- type: nauc_mrr_at_1_max |
|
value: 45.455271042821714 |
|
- type: nauc_mrr_at_1_std |
|
value: -0.9370526815458349 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 58.93674818203478 |
|
- type: nauc_mrr_at_20_max |
|
value: 49.703218108625215 |
|
- type: nauc_mrr_at_20_std |
|
value: 2.4473106598190415 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 59.046856598788445 |
|
- type: nauc_mrr_at_3_max |
|
value: 49.37161726123392 |
|
- type: nauc_mrr_at_3_std |
|
value: 1.5110936686701506 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 58.92289378915668 |
|
- type: nauc_mrr_at_5_max |
|
value: 49.847638994134144 |
|
- type: nauc_mrr_at_5_std |
|
value: 2.420421880131702 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 45.56062215161734 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 41.507152286702 |
|
- type: nauc_ndcg_at_1000_std |
|
value: 2.79388283208751 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 44.84064192570408 |
|
- type: nauc_ndcg_at_100_max |
|
value: 41.50353573562353 |
|
- type: nauc_ndcg_at_100_std |
|
value: 3.1804999773629357 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 43.341482144213614 |
|
- type: nauc_ndcg_at_10_max |
|
value: 41.159590898395074 |
|
- type: nauc_ndcg_at_10_std |
|
value: 2.945242338240843 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 62.23623985611396 |
|
- type: nauc_ndcg_at_1_max |
|
value: 45.04945770947091 |
|
- type: nauc_ndcg_at_1_std |
|
value: -0.8804967656575725 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 43.905372612093664 |
|
- type: nauc_ndcg_at_20_max |
|
value: 41.797709837872446 |
|
- type: nauc_ndcg_at_20_std |
|
value: 3.1853356915569653 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 44.18163998834299 |
|
- type: nauc_ndcg_at_3_max |
|
value: 38.352891017864636 |
|
- type: nauc_ndcg_at_3_std |
|
value: -0.8235767021150929 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 43.41374688421302 |
|
- type: nauc_ndcg_at_5_max |
|
value: 40.390365601593956 |
|
- type: nauc_ndcg_at_5_std |
|
value: 1.6743650108127537 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: -9.711058370691381 |
|
- type: nauc_precision_at_1000_max |
|
value: 6.97321343449286 |
|
- type: nauc_precision_at_1000_std |
|
value: 7.933531916622121 |
|
- type: nauc_precision_at_100_diff1 |
|
value: -8.247029644152319 |
|
- type: nauc_precision_at_100_max |
|
value: 10.86740140944616 |
|
- type: nauc_precision_at_100_std |
|
value: 9.581885544675918 |
|
- type: nauc_precision_at_10_diff1 |
|
value: -2.409043695429943 |
|
- type: nauc_precision_at_10_max |
|
value: 21.04733206074314 |
|
- type: nauc_precision_at_10_std |
|
value: 10.03334651647101 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 62.23623985611396 |
|
- type: nauc_precision_at_1_max |
|
value: 45.04945770947091 |
|
- type: nauc_precision_at_1_std |
|
value: -0.8804967656575725 |
|
- type: nauc_precision_at_20_diff1 |
|
value: -5.230303656931621 |
|
- type: nauc_precision_at_20_max |
|
value: 17.77799716919181 |
|
- type: nauc_precision_at_20_std |
|
value: 10.739127998618654 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 10.40376424999862 |
|
- type: nauc_precision_at_3_max |
|
value: 30.933333400254035 |
|
- type: nauc_precision_at_3_std |
|
value: 6.126209127968004 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 3.147398101830739 |
|
- type: nauc_precision_at_5_max |
|
value: 27.1746309955971 |
|
- type: nauc_precision_at_5_std |
|
value: 8.874723615388788 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 5.055940692380908 |
|
- type: nauc_recall_at_1000_max |
|
value: 22.42031123370267 |
|
- type: nauc_recall_at_1000_std |
|
value: 27.75476692527869 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 17.86391178198642 |
|
- type: nauc_recall_at_100_max |
|
value: 34.776134863678955 |
|
- type: nauc_recall_at_100_std |
|
value: 18.96377158778504 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 24.863097695413597 |
|
- type: nauc_recall_at_10_max |
|
value: 37.697411651507444 |
|
- type: nauc_recall_at_10_std |
|
value: 9.519849994253967 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 49.56605205141156 |
|
- type: nauc_recall_at_1_max |
|
value: 26.251096698710384 |
|
- type: nauc_recall_at_1_std |
|
value: -4.580748485387834 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 22.440602811005636 |
|
- type: nauc_recall_at_20_max |
|
value: 39.538861316515 |
|
- type: nauc_recall_at_20_std |
|
value: 11.363269553121468 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 32.80302839873736 |
|
- type: nauc_recall_at_3_max |
|
value: 32.53105685012729 |
|
- type: nauc_recall_at_3_std |
|
value: -0.7140166410605693 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 29.375386639154865 |
|
- type: nauc_recall_at_5_max |
|
value: 36.91045781164083 |
|
- type: nauc_recall_at_5_std |
|
value: 4.725419050262578 |
|
- type: ndcg_at_1 |
|
value: 67.13900000000001 |
|
- type: ndcg_at_10 |
|
value: 72.392 |
|
- type: ndcg_at_100 |
|
value: 75.25800000000001 |
|
- type: ndcg_at_1000 |
|
value: 75.982 |
|
- type: ndcg_at_20 |
|
value: 73.783 |
|
- type: ndcg_at_3 |
|
value: 67.269 |
|
- type: ndcg_at_5 |
|
value: 69.807 |
|
- type: precision_at_1 |
|
value: 67.13900000000001 |
|
- type: precision_at_10 |
|
value: 13.327 |
|
- type: precision_at_100 |
|
value: 1.5559999999999998 |
|
- type: precision_at_1000 |
|
value: 0.164 |
|
- type: precision_at_20 |
|
value: 7.119000000000001 |
|
- type: precision_at_3 |
|
value: 35.599 |
|
- type: precision_at_5 |
|
value: 23.936 |
|
- type: recall_at_1 |
|
value: 47.370000000000005 |
|
- type: recall_at_10 |
|
value: 82.16 |
|
- type: recall_at_100 |
|
value: 93.34 |
|
- type: recall_at_1000 |
|
value: 98.202 |
|
- type: recall_at_20 |
|
value: 86.687 |
|
- type: recall_at_3 |
|
value: 69.319 |
|
- type: recall_at_5 |
|
value: 75.637 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB RuReviewsClassification (default) |
|
revision: f6d2c31f4dc6b88f468552750bfec05b4b41b05a |
|
split: test |
|
type: ai-forever/ru-reviews-classification |
|
metrics: |
|
- type: accuracy |
|
value: 75.0537109375 |
|
- type: f1 |
|
value: 74.00523205209554 |
|
- type: f1_weighted |
|
value: 74.00436782840376 |
|
- type: main_score |
|
value: 75.0537109375 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB RuSTSBenchmarkSTS (default) |
|
revision: 7cf24f325c6da6195df55bef3d86b5e0616f3018 |
|
split: test |
|
type: ai-forever/ru-stsbenchmark-sts |
|
metrics: |
|
- type: cosine_pearson |
|
value: 81.10255413476487 |
|
- type: cosine_spearman |
|
value: 81.40020843157141 |
|
- type: euclidean_pearson |
|
value: 81.25155479902466 |
|
- type: euclidean_spearman |
|
value: 81.40020831064922 |
|
- type: main_score |
|
value: 81.40020843157141 |
|
- type: manhattan_pearson |
|
value: 81.1493715249014 |
|
- type: manhattan_spearman |
|
value: 81.30973667941649 |
|
- type: pearson |
|
value: 81.10255413476487 |
|
- type: spearman |
|
value: 81.40020843157141 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB RuSciBenchGRNTIClassification (default) |
|
revision: 673a610d6d3dd91a547a0d57ae1b56f37ebbf6a1 |
|
split: test |
|
type: ai-forever/ru-scibench-grnti-classification |
|
metrics: |
|
- type: accuracy |
|
value: 69.8974609375 |
|
- type: f1 |
|
value: 68.57837564785511 |
|
- type: f1_weighted |
|
value: 68.59030489460784 |
|
- type: main_score |
|
value: 69.8974609375 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB RuSciBenchGRNTIClusteringP2P (default) |
|
revision: 673a610d6d3dd91a547a0d57ae1b56f37ebbf6a1 |
|
split: test |
|
type: ai-forever/ru-scibench-grnti-classification |
|
metrics: |
|
- type: main_score |
|
value: 67.03880348548029 |
|
- type: v_measure |
|
value: 67.03880348548029 |
|
- type: v_measure_std |
|
value: 0.6126278133139618 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: default |
|
name: MTEB RuSciBenchOECDClassification (default) |
|
revision: 26c88e99dcaba32bb45d0e1bfc21902337f6d471 |
|
split: test |
|
type: ai-forever/ru-scibench-oecd-classification |
|
metrics: |
|
- type: accuracy |
|
value: 54.63378906250001 |
|
- type: f1 |
|
value: 51.34306420274629 |
|
- type: f1_weighted |
|
value: 51.33495867493914 |
|
- type: main_score |
|
value: 54.63378906250001 |
|
task: |
|
type: Classification |
|
- dataset: |
|
config: default |
|
name: MTEB RuSciBenchOECDClusteringP2P (default) |
|
revision: 26c88e99dcaba32bb45d0e1bfc21902337f6d471 |
|
split: test |
|
type: ai-forever/ru-scibench-oecd-classification |
|
metrics: |
|
- type: main_score |
|
value: 56.55947121159027 |
|
- type: v_measure |
|
value: 56.55947121159027 |
|
- type: v_measure_std |
|
value: 0.5498882006880662 |
|
task: |
|
type: Clustering |
|
- dataset: |
|
config: ru |
|
name: MTEB STS22 (ru) |
|
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3 |
|
split: test |
|
type: mteb/sts22-crosslingual-sts |
|
metrics: |
|
- type: cosine_pearson |
|
value: 61.833294921667914 |
|
- type: cosine_spearman |
|
value: 63.53967536726357 |
|
- type: euclidean_pearson |
|
value: 60.382865218855805 |
|
- type: euclidean_spearman |
|
value: 63.53967536726357 |
|
- type: main_score |
|
value: 63.53967536726357 |
|
- type: manhattan_pearson |
|
value: 60.24879015304578 |
|
- type: manhattan_spearman |
|
value: 63.42305760430092 |
|
- type: pearson |
|
value: 61.833294921667914 |
|
- type: spearman |
|
value: 63.53967536726357 |
|
task: |
|
type: STS |
|
- dataset: |
|
config: default |
|
name: MTEB SensitiveTopicsClassification (default) |
|
revision: 416b34a802308eac30e4192afc0ff99bb8dcc7f2 |
|
split: test |
|
type: ai-forever/sensitive-topics-classification |
|
metrics: |
|
- type: accuracy |
|
value: 39.8193359375 |
|
- type: f1 |
|
value: 55.46591740935434 |
|
- type: lrap |
|
value: 66.50980631510454 |
|
- type: main_score |
|
value: 39.8193359375 |
|
task: |
|
type: MultilabelClassification |
|
- dataset: |
|
config: default |
|
name: MTEB TERRa (default) |
|
revision: 7b58f24536063837d644aab9a023c62199b2a612 |
|
split: dev |
|
type: ai-forever/terra-pairclassification |
|
metrics: |
|
- type: cosine_accuracy |
|
value: 66.77524429967427 |
|
- type: cosine_accuracy_threshold |
|
value: 55.58975338935852 |
|
- type: cosine_ap |
|
value: 66.4567219323658 |
|
- type: cosine_f1 |
|
value: 70.64676616915423 |
|
- type: cosine_f1_threshold |
|
value: 45.55969536304474 |
|
- type: cosine_precision |
|
value: 57.028112449799195 |
|
- type: cosine_recall |
|
value: 92.81045751633987 |
|
- type: dot_accuracy |
|
value: 66.77524429967427 |
|
- type: dot_accuracy_threshold |
|
value: 55.589759349823 |
|
- type: dot_ap |
|
value: 66.4567219323658 |
|
- type: dot_f1 |
|
value: 70.64676616915423 |
|
- type: dot_f1_threshold |
|
value: 45.55969536304474 |
|
- type: dot_precision |
|
value: 57.028112449799195 |
|
- type: dot_recall |
|
value: 92.81045751633987 |
|
- type: euclidean_accuracy |
|
value: 66.77524429967427 |
|
- type: euclidean_accuracy_threshold |
|
value: 94.24455165863037 |
|
- type: euclidean_ap |
|
value: 66.4567219323658 |
|
- type: euclidean_f1 |
|
value: 70.64676616915423 |
|
- type: euclidean_f1_threshold |
|
value: 104.34587001800537 |
|
- type: euclidean_precision |
|
value: 57.028112449799195 |
|
- type: euclidean_recall |
|
value: 92.81045751633987 |
|
- type: main_score |
|
value: 66.4567219323658 |
|
- type: manhattan_accuracy |
|
value: 66.77524429967427 |
|
- type: manhattan_accuracy_threshold |
|
value: 2865.5345916748047 |
|
- type: manhattan_ap |
|
value: 66.26659863769075 |
|
- type: manhattan_f1 |
|
value: 70.8542713567839 |
|
- type: manhattan_f1_threshold |
|
value: 3212.3912811279297 |
|
- type: manhattan_precision |
|
value: 57.55102040816327 |
|
- type: manhattan_recall |
|
value: 92.15686274509804 |
|
- type: max_accuracy |
|
value: 66.77524429967427 |
|
- type: max_ap |
|
value: 66.4567219323658 |
|
- type: max_f1 |
|
value: 70.8542713567839 |
|
- type: max_precision |
|
value: 57.55102040816327 |
|
- type: max_recall |
|
value: 92.81045751633987 |
|
- type: similarity_accuracy |
|
value: 66.77524429967427 |
|
- type: similarity_accuracy_threshold |
|
value: 55.58975338935852 |
|
- type: similarity_ap |
|
value: 66.4567219323658 |
|
- type: similarity_f1 |
|
value: 70.64676616915423 |
|
- type: similarity_f1_threshold |
|
value: 45.55969536304474 |
|
- type: similarity_precision |
|
value: 57.028112449799195 |
|
- type: similarity_recall |
|
value: 92.81045751633987 |
|
task: |
|
type: PairClassification |
|
license: mit |
|
language: |
|
- ru |
|
- en |
|
tags: |
|
- mteb |
|
- transformers |
|
- sentence-transformers |
|
base_model: ai-forever/FRED-T5-1.7B |
|
pipeline_tag: feature-extraction |
|
--- |
|
|
|
# Model Card for FRIDA |
|
|
|
<figure> |
|
<img src="img.jpg"> |
|
</figure> |
|
|
|
FRIDA is a full-scale finetuned general text embedding model inspired by denoising architecture based on T5. The model is based on the encoder part of [FRED-T5](https://arxiv.org/abs/2309.10931) model and continues research of text embedding models ([ruMTEB](https://arxiv.org/abs/2408.12503), [ru-en-RoSBERTa](https://huggingface.co./ai-forever/ru-en-RoSBERTa)). It has been pre-trained on a Russian-English dataset and fine-tuned for improved performance on the target task. |
|
|
|
For more model details please refer to our technical report [TODO]. |
|
|
|
## Usage |
|
|
|
The model can be used as is with prefixes. It is recommended to use CLS pooling. The choice of prefix and pooling depends on the task. |
|
|
|
We use the following basic rules to choose a prefix: |
|
- `"search_query: "` and `"search_document: "` prefixes are for answer or relevant paragraph retrieval |
|
- `"paraphrase: "` prefix is for symmetric paraphrasing related tasks (STS, paraphrase mining, deduplication) |
|
- `"categorize: "` prefix is for asymmetric matching of document title and body (e.g. news, scientific papers, social posts) |
|
- `"categorize_sentiment: "` prefix is for any tasks that rely on sentiment features (e.g. hate, toxic, emotion) |
|
- `"categorize_topic: "` prefix is intended for tasks where you need to group texts by topic |
|
- `"categorize_entailment: "` prefix is for textual entailment task (NLI) |
|
|
|
To better tailor the model to your needs, you can fine-tune it with relevant high-quality Russian and English datasets. |
|
|
|
Below are examples of texts encoding using the Transformers and SentenceTransformers libraries. |
|
|
|
### Transformers |
|
|
|
```python |
|
import torch |
|
import torch.nn.functional as F |
|
from transformers import AutoTokenizer, T5EncoderModel |
|
|
|
|
|
def pool(hidden_state, mask, pooling_method="cls"): |
|
if pooling_method == "mean": |
|
s = torch.sum(hidden_state * mask.unsqueeze(-1).float(), dim=1) |
|
d = mask.sum(axis=1, keepdim=True).float() |
|
return s / d |
|
elif pooling_method == "cls": |
|
return hidden_state[:, 0] |
|
|
|
inputs = [ |
|
# |
|
"paraphrase: В Ярославской области разрешили работу бань, но без посетителей", |
|
"categorize_entailment: Женщину доставили в больницу, за ее жизнь сейчас борются врачи.", |
|
"search_query: Сколько программистов нужно, чтобы вкрутить лампочку?", |
|
# |
|
"paraphrase: Ярославским баням разрешили работать без посетителей", |
|
"categorize_entailment: Женщину спасают врачи.", |
|
"search_document: Чтобы вкрутить лампочку, требуется три программиста: один напишет программу извлечения лампочки, другой — вкручивания лампочки, а третий проведет тестирование." |
|
] |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("ai-forever/FRIDA") |
|
model = T5EncoderModel.from_pretrained("ai-forever/FRIDA") |
|
|
|
tokenized_inputs = tokenizer(inputs, max_length=512, padding=True, truncation=True, return_tensors="pt") |
|
|
|
with torch.no_grad(): |
|
outputs = model(**tokenized_inputs) |
|
|
|
embeddings = pool( |
|
outputs.last_hidden_state, |
|
tokenized_inputs["attention_mask"], |
|
pooling_method="cls" # or try "mean" |
|
) |
|
|
|
embeddings = F.normalize(embeddings, p=2, dim=1) |
|
sim_scores = embeddings[:3] @ embeddings[3:].T |
|
print(sim_scores.diag().tolist()) |
|
# [0.9360030293464661, 0.8591322302818298, 0.728583037853241] |
|
``` |
|
|
|
### SentenceTransformers |
|
|
|
```python |
|
from sentence_transformers import SentenceTransformer |
|
|
|
inputs = [ |
|
# |
|
"paraphrase: В Ярославской области разрешили работу бань, но без посетителей", |
|
"categorize_entailment: Женщину доставили в больницу, за ее жизнь сейчас борются врачи.", |
|
"search_query: Сколько программистов нужно, чтобы вкрутить лампочку?", |
|
# |
|
"paraphrase: Ярославским баням разрешили работать без посетителей", |
|
"categorize_entailment: Женщину спасают врачи.", |
|
"search_document: Чтобы вкрутить лампочку, требуется три программиста: один напишет программу извлечения лампочки, другой — вкручивания лампочки, а третий проведет тестирование." |
|
] |
|
|
|
# loads model with CLS pooling |
|
model = SentenceTransformer("ai-forever/FRIDA") |
|
|
|
# embeddings are normalized by default |
|
embeddings = model.encode(inputs, convert_to_tensor=True) |
|
|
|
sim_scores = embeddings[:3] @ embeddings[3:].T |
|
print(sim_scores.diag().tolist()) |
|
# [0.9360026717185974, 0.8591331243515015, 0.7285830974578857] |
|
``` |
|
|
|
or using prompts (sentence-transformers>=2.4.0): |
|
|
|
```python |
|
from sentence_transformers import SentenceTransformer |
|
|
|
# loads model with CLS pooling |
|
model = SentenceTransformer("ai-forever/FRIDA") |
|
|
|
paraphrase = model.encode(["В Ярославской области разрешили работу бань, но без посетителей", "Ярославским баням разрешили работать без посетителей"], prompt_name="paraphrase") |
|
print(paraphrase[0] @ paraphrase[1].T) # 0.9360032 |
|
|
|
categorize_entailment = model.encode(["Женщину доставили в больницу, за ее жизнь сейчас борются врачи.", "Женщину спасают врачи."], prompt_name="categorize_entailment") |
|
print(categorize_entailment[0] @ categorize_entailment[1].T) # 0.8591322 |
|
|
|
query_embedding = model.encode("Сколько программистов нужно, чтобы вкрутить лампочку?", prompt_name="search_query") |
|
document_embedding = model.encode("Чтобы вкрутить лампочку, требуется три программиста: один напишет программу извлечения лампочки, другой — вкручивания лампочки, а третий проведет тестирование.", prompt_name="search_document") |
|
print(query_embedding @ document_embedding.T) # 0.7285831 |
|
``` |
|
|
|
## Authors |
|
+ [SaluteDevices](https://sberdevices.ru/) AI for B2C RnD Team. |
|
+ Artem Snegirev: [HF profile](https://huggingface.co./artemsnegirev), [Github](https://github.com/artemsnegirev); |
|
+ Anna Maksimova [HF profile](https://huggingface.co./anpalmak); |
|
+ Aleksandr Abramov: [HF profile](https://huggingface.co./Andrilko), [Github](https://github.com/Ab1992ao), [Kaggle Competitions Master](https://www.kaggle.com/andrilko) |
|
|
|
|
|
## Citation |
|
|
|
``` |
|
@misc{TODO |
|
} |
|
``` |
|
|
|
## Limitations |
|
|
|
The model is designed to process texts in Russian, the quality in English is unknown. Maximum input text length is limited to 512 tokens. |