tags:
- mteb
base_model: mixedbread-ai/mxbai-embed-mini-v1
library_name: sentence-transformers
model-index:
- name: mxbai-embed-xsmall-v1
results:
- task:
type: Retrieval
dataset:
type: arguana
name: MTEB ArguAna
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 25.18
- type: ndcg_at_3
value: 39.22
- type: ndcg_at_5
value: 43.93
- type: ndcg_at_10
value: 49.58
- type: ndcg_at_30
value: 53.41
- type: ndcg_at_100
value: 54.11
- type: map_at_1
value: 25.18
- type: map_at_3
value: 35.66
- type: map_at_5
value: 38.25
- type: map_at_10
value: 40.58
- type: map_at_30
value: 41.6
- type: map_at_100
value: 41.69
- type: recall_at_1
value: 25.18
- type: recall_at_3
value: 49.57
- type: recall_at_5
value: 61.09
- type: recall_at_10
value: 78.59
- type: recall_at_30
value: 94.03
- type: recall_at_100
value: 97.94
- type: precision_at_1
value: 25.18
- type: precision_at_3
value: 16.52
- type: precision_at_5
value: 12.22
- type: precision_at_10
value: 7.86
- type: precision_at_30
value: 3.13
- type: precision_at_100
value: 0.98
- type: accuracy_at_3
value: 49.57
- type: accuracy_at_5
value: 61.09
- type: accuracy_at_10
value: 78.59
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackAndroidRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 44.35
- type: ndcg_at_3
value: 49.64
- type: ndcg_at_5
value: 51.73
- type: ndcg_at_10
value: 54.82
- type: ndcg_at_30
value: 57.64
- type: ndcg_at_100
value: 59.77
- type: map_at_1
value: 36.26
- type: map_at_3
value: 44.35
- type: map_at_5
value: 46.26
- type: map_at_10
value: 48.24
- type: map_at_30
value: 49.34
- type: map_at_100
value: 49.75
- type: recall_at_1
value: 36.26
- type: recall_at_3
value: 51.46
- type: recall_at_5
value: 57.78
- type: recall_at_10
value: 66.5
- type: recall_at_30
value: 77.19
- type: recall_at_100
value: 87.53
- type: precision_at_1
value: 44.35
- type: precision_at_3
value: 23.65
- type: precision_at_5
value: 16.88
- type: precision_at_10
value: 10.7
- type: precision_at_30
value: 4.53
- type: precision_at_100
value: 1.65
- type: accuracy_at_3
value: 60.51
- type: accuracy_at_5
value: 67.67
- type: accuracy_at_10
value: 74.68
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackEnglishRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 39.43
- type: ndcg_at_3
value: 44.13
- type: ndcg_at_5
value: 46.06
- type: ndcg_at_10
value: 48.31
- type: ndcg_at_30
value: 51.06
- type: ndcg_at_100
value: 53.07
- type: map_at_1
value: 31.27
- type: map_at_3
value: 39.07
- type: map_at_5
value: 40.83
- type: map_at_10
value: 42.23
- type: map_at_30
value: 43.27
- type: map_at_100
value: 43.66
- type: recall_at_1
value: 31.27
- type: recall_at_3
value: 45.89
- type: recall_at_5
value: 51.44
- type: recall_at_10
value: 58.65
- type: recall_at_30
value: 69.12
- type: recall_at_100
value: 78.72
- type: precision_at_1
value: 39.43
- type: precision_at_3
value: 21.61
- type: precision_at_5
value: 15.34
- type: precision_at_10
value: 9.27
- type: precision_at_30
value: 4.01
- type: precision_at_100
value: 1.52
- type: accuracy_at_3
value: 55.48
- type: accuracy_at_5
value: 60.76
- type: accuracy_at_10
value: 67.45
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackGamingRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 45.58
- type: ndcg_at_3
value: 52.68
- type: ndcg_at_5
value: 55.28
- type: ndcg_at_10
value: 57.88
- type: ndcg_at_30
value: 60.6
- type: ndcg_at_100
value: 62.03
- type: map_at_1
value: 39.97
- type: map_at_3
value: 49.06
- type: map_at_5
value: 50.87
- type: map_at_10
value: 52.2
- type: map_at_30
value: 53.06
- type: map_at_100
value: 53.28
- type: recall_at_1
value: 39.97
- type: recall_at_3
value: 57.4
- type: recall_at_5
value: 63.83
- type: recall_at_10
value: 71.33
- type: recall_at_30
value: 81.81
- type: recall_at_100
value: 89
- type: precision_at_1
value: 45.58
- type: precision_at_3
value: 23.55
- type: precision_at_5
value: 16.01
- type: precision_at_10
value: 9.25
- type: precision_at_30
value: 3.67
- type: precision_at_100
value: 1.23
- type: accuracy_at_3
value: 62.76
- type: accuracy_at_5
value: 68.84
- type: accuracy_at_10
value: 75.8
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackGisRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 27.35
- type: ndcg_at_3
value: 34.23
- type: ndcg_at_5
value: 37.1
- type: ndcg_at_10
value: 40.26
- type: ndcg_at_30
value: 43.54
- type: ndcg_at_100
value: 45.9
- type: map_at_1
value: 25.28
- type: map_at_3
value: 31.68
- type: map_at_5
value: 33.38
- type: map_at_10
value: 34.79
- type: map_at_30
value: 35.67
- type: map_at_100
value: 35.96
- type: recall_at_1
value: 25.28
- type: recall_at_3
value: 38.95
- type: recall_at_5
value: 45.82
- type: recall_at_10
value: 55.11
- type: recall_at_30
value: 68.13
- type: recall_at_100
value: 80.88
- type: precision_at_1
value: 27.35
- type: precision_at_3
value: 14.65
- type: precision_at_5
value: 10.44
- type: precision_at_10
value: 6.37
- type: precision_at_30
value: 2.65
- type: precision_at_100
value: 0.97
- type: accuracy_at_3
value: 42.15
- type: accuracy_at_5
value: 49.15
- type: accuracy_at_10
value: 58.53
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackMathematicaRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 18.91
- type: ndcg_at_3
value: 24.37
- type: ndcg_at_5
value: 26.11
- type: ndcg_at_10
value: 29.37
- type: ndcg_at_30
value: 33.22
- type: ndcg_at_100
value: 35.73
- type: map_at_1
value: 15.23
- type: map_at_3
value: 21.25
- type: map_at_5
value: 22.38
- type: map_at_10
value: 23.86
- type: map_at_30
value: 24.91
- type: map_at_100
value: 25.24
- type: recall_at_1
value: 15.23
- type: recall_at_3
value: 28.28
- type: recall_at_5
value: 32.67
- type: recall_at_10
value: 42.23
- type: recall_at_30
value: 56.87
- type: recall_at_100
value: 69.44
- type: precision_at_1
value: 18.91
- type: precision_at_3
value: 11.9
- type: precision_at_5
value: 8.48
- type: precision_at_10
value: 5.63
- type: precision_at_30
value: 2.64
- type: precision_at_100
value: 1.02
- type: accuracy_at_3
value: 33.95
- type: accuracy_at_5
value: 38.81
- type: accuracy_at_10
value: 49.13
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackPhysicsRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 36.96
- type: ndcg_at_3
value: 42.48
- type: ndcg_at_5
value: 44.57
- type: ndcg_at_10
value: 47.13
- type: ndcg_at_30
value: 50.65
- type: ndcg_at_100
value: 53.14
- type: map_at_1
value: 30.1
- type: map_at_3
value: 37.97
- type: map_at_5
value: 39.62
- type: map_at_10
value: 41.06
- type: map_at_30
value: 42.13
- type: map_at_100
value: 42.53
- type: recall_at_1
value: 30.1
- type: recall_at_3
value: 45.98
- type: recall_at_5
value: 51.58
- type: recall_at_10
value: 59.24
- type: recall_at_30
value: 72.47
- type: recall_at_100
value: 84.53
- type: precision_at_1
value: 36.96
- type: precision_at_3
value: 20.5
- type: precision_at_5
value: 14.4
- type: precision_at_10
value: 8.62
- type: precision_at_30
value: 3.67
- type: precision_at_100
value: 1.38
- type: accuracy_at_3
value: 54.09
- type: accuracy_at_5
value: 60.25
- type: accuracy_at_10
value: 67.37
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackProgrammersRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 28.65
- type: ndcg_at_3
value: 34.3
- type: ndcg_at_5
value: 36.8
- type: ndcg_at_10
value: 39.92
- type: ndcg_at_30
value: 42.97
- type: ndcg_at_100
value: 45.45
- type: map_at_1
value: 23.35
- type: map_at_3
value: 30.36
- type: map_at_5
value: 32.15
- type: map_at_10
value: 33.74
- type: map_at_30
value: 34.69
- type: map_at_100
value: 35.02
- type: recall_at_1
value: 23.35
- type: recall_at_3
value: 37.71
- type: recall_at_5
value: 44.23
- type: recall_at_10
value: 53.6
- type: recall_at_30
value: 64.69
- type: recall_at_100
value: 77.41
- type: precision_at_1
value: 28.65
- type: precision_at_3
value: 16.74
- type: precision_at_5
value: 12.21
- type: precision_at_10
value: 7.61
- type: precision_at_30
value: 3.29
- type: precision_at_100
value: 1.22
- type: accuracy_at_3
value: 44.86
- type: accuracy_at_5
value: 52.4
- type: accuracy_at_10
value: 61.07
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackStatsRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 26.07
- type: ndcg_at_3
value: 31.62
- type: ndcg_at_5
value: 33.23
- type: ndcg_at_10
value: 35.62
- type: ndcg_at_30
value: 38.41
- type: ndcg_at_100
value: 40.81
- type: map_at_1
value: 22.96
- type: map_at_3
value: 28.85
- type: map_at_5
value: 29.97
- type: map_at_10
value: 31.11
- type: map_at_30
value: 31.86
- type: map_at_100
value: 32.15
- type: recall_at_1
value: 22.96
- type: recall_at_3
value: 35.14
- type: recall_at_5
value: 39.22
- type: recall_at_10
value: 46.52
- type: recall_at_30
value: 57.58
- type: recall_at_100
value: 70.57
- type: precision_at_1
value: 26.07
- type: precision_at_3
value: 14.11
- type: precision_at_5
value: 9.69
- type: precision_at_10
value: 5.81
- type: precision_at_30
value: 2.45
- type: precision_at_100
value: 0.92
- type: accuracy_at_3
value: 39.42
- type: accuracy_at_5
value: 43.41
- type: accuracy_at_10
value: 50.92
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackTexRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 21.78
- type: ndcg_at_3
value: 25.74
- type: ndcg_at_5
value: 27.86
- type: ndcg_at_10
value: 30.3
- type: ndcg_at_30
value: 33.51
- type: ndcg_at_100
value: 36.12
- type: map_at_1
value: 17.63
- type: map_at_3
value: 22.7
- type: map_at_5
value: 24.14
- type: map_at_10
value: 25.31
- type: map_at_30
value: 26.22
- type: map_at_100
value: 26.56
- type: recall_at_1
value: 17.63
- type: recall_at_3
value: 28.37
- type: recall_at_5
value: 33.99
- type: recall_at_10
value: 41.23
- type: recall_at_30
value: 53.69
- type: recall_at_100
value: 67.27
- type: precision_at_1
value: 21.78
- type: precision_at_3
value: 12.41
- type: precision_at_5
value: 9.07
- type: precision_at_10
value: 5.69
- type: precision_at_30
value: 2.61
- type: precision_at_100
value: 1.03
- type: accuracy_at_3
value: 33.62
- type: accuracy_at_5
value: 39.81
- type: accuracy_at_10
value: 47.32
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackUnixRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 30.97
- type: ndcg_at_3
value: 36.13
- type: ndcg_at_5
value: 39
- type: ndcg_at_10
value: 41.78
- type: ndcg_at_30
value: 44.96
- type: ndcg_at_100
value: 47.52
- type: map_at_1
value: 26.05
- type: map_at_3
value: 32.77
- type: map_at_5
value: 34.6
- type: map_at_10
value: 35.93
- type: map_at_30
value: 36.88
- type: map_at_100
value: 37.22
- type: recall_at_1
value: 26.05
- type: recall_at_3
value: 40
- type: recall_at_5
value: 47.34
- type: recall_at_10
value: 55.34
- type: recall_at_30
value: 67.08
- type: recall_at_100
value: 80.2
- type: precision_at_1
value: 30.97
- type: precision_at_3
value: 16.6
- type: precision_at_5
value: 12.03
- type: precision_at_10
value: 7.3
- type: precision_at_30
value: 3.08
- type: precision_at_100
value: 1.15
- type: accuracy_at_3
value: 45.62
- type: accuracy_at_5
value: 53.64
- type: accuracy_at_10
value: 61.66
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackWebmastersRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 29.64
- type: ndcg_at_3
value: 35.49
- type: ndcg_at_5
value: 37.77
- type: ndcg_at_10
value: 40.78
- type: ndcg_at_30
value: 44.59
- type: ndcg_at_100
value: 46.97
- type: map_at_1
value: 24.77
- type: map_at_3
value: 31.33
- type: map_at_5
value: 32.95
- type: map_at_10
value: 34.47
- type: map_at_30
value: 35.7
- type: map_at_100
value: 36.17
- type: recall_at_1
value: 24.77
- type: recall_at_3
value: 38.16
- type: recall_at_5
value: 44.1
- type: recall_at_10
value: 53.31
- type: recall_at_30
value: 68.43
- type: recall_at_100
value: 80.24
- type: precision_at_1
value: 29.64
- type: precision_at_3
value: 16.8
- type: precision_at_5
value: 12.21
- type: precision_at_10
value: 7.83
- type: precision_at_30
value: 3.89
- type: precision_at_100
value: 1.63
- type: accuracy_at_3
value: 45.45
- type: accuracy_at_5
value: 51.58
- type: accuracy_at_10
value: 61.07
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackWordpressRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 23.47
- type: ndcg_at_3
value: 27.98
- type: ndcg_at_5
value: 30.16
- type: ndcg_at_10
value: 32.97
- type: ndcg_at_30
value: 36.3
- type: ndcg_at_100
value: 38.47
- type: map_at_1
value: 21.63
- type: map_at_3
value: 26.02
- type: map_at_5
value: 27.32
- type: map_at_10
value: 28.51
- type: map_at_30
value: 29.39
- type: map_at_100
value: 29.66
- type: recall_at_1
value: 21.63
- type: recall_at_3
value: 31.47
- type: recall_at_5
value: 36.69
- type: recall_at_10
value: 44.95
- type: recall_at_30
value: 58.2
- type: recall_at_100
value: 69.83
- type: precision_at_1
value: 23.47
- type: precision_at_3
value: 11.71
- type: precision_at_5
value: 8.32
- type: precision_at_10
value: 5.23
- type: precision_at_30
value: 2.29
- type: precision_at_100
value: 0.86
- type: accuracy_at_3
value: 34.01
- type: accuracy_at_5
value: 39.37
- type: accuracy_at_10
value: 48.24
- task:
type: Retrieval
dataset:
type: climate-fever
name: MTEB ClimateFEVER
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 19.8
- type: ndcg_at_3
value: 17.93
- type: ndcg_at_5
value: 19.39
- type: ndcg_at_10
value: 22.42
- type: ndcg_at_30
value: 26.79
- type: ndcg_at_100
value: 29.84
- type: map_at_1
value: 9.09
- type: map_at_3
value: 12.91
- type: map_at_5
value: 14.12
- type: map_at_10
value: 15.45
- type: map_at_30
value: 16.73
- type: map_at_100
value: 17.21
- type: recall_at_1
value: 9.09
- type: recall_at_3
value: 16.81
- type: recall_at_5
value: 20.9
- type: recall_at_10
value: 27.65
- type: recall_at_30
value: 41.23
- type: recall_at_100
value: 53.57
- type: precision_at_1
value: 19.8
- type: precision_at_3
value: 13.36
- type: precision_at_5
value: 10.33
- type: precision_at_10
value: 7.15
- type: precision_at_30
value: 3.66
- type: precision_at_100
value: 1.49
- type: accuracy_at_3
value: 36.22
- type: accuracy_at_5
value: 44.1
- type: accuracy_at_10
value: 55.11
- task:
type: Retrieval
dataset:
type: dbpedia-entity
name: MTEB DBPedia
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 42.75
- type: ndcg_at_3
value: 35.67
- type: ndcg_at_5
value: 33.58
- type: ndcg_at_10
value: 32.19
- type: ndcg_at_30
value: 31.82
- type: ndcg_at_100
value: 35.87
- type: map_at_1
value: 7.05
- type: map_at_3
value: 10.5
- type: map_at_5
value: 12.06
- type: map_at_10
value: 14.29
- type: map_at_30
value: 17.38
- type: map_at_100
value: 19.58
- type: recall_at_1
value: 7.05
- type: recall_at_3
value: 11.89
- type: recall_at_5
value: 14.7
- type: recall_at_10
value: 19.78
- type: recall_at_30
value: 29.88
- type: recall_at_100
value: 42.4
- type: precision_at_1
value: 54.25
- type: precision_at_3
value: 39.42
- type: precision_at_5
value: 33.15
- type: precision_at_10
value: 25.95
- type: precision_at_30
value: 15.51
- type: precision_at_100
value: 7.9
- type: accuracy_at_3
value: 72
- type: accuracy_at_5
value: 77.75
- type: accuracy_at_10
value: 83.5
- task:
type: Retrieval
dataset:
type: fever
name: MTEB FEVER
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 40.19
- type: ndcg_at_3
value: 50.51
- type: ndcg_at_5
value: 53.51
- type: ndcg_at_10
value: 56.45
- type: ndcg_at_30
value: 58.74
- type: ndcg_at_100
value: 59.72
- type: map_at_1
value: 37.56
- type: map_at_3
value: 46.74
- type: map_at_5
value: 48.46
- type: map_at_10
value: 49.7
- type: map_at_30
value: 50.31
- type: map_at_100
value: 50.43
- type: recall_at_1
value: 37.56
- type: recall_at_3
value: 58.28
- type: recall_at_5
value: 65.45
- type: recall_at_10
value: 74.28
- type: recall_at_30
value: 83.42
- type: recall_at_100
value: 88.76
- type: precision_at_1
value: 40.19
- type: precision_at_3
value: 20.99
- type: precision_at_5
value: 14.24
- type: precision_at_10
value: 8.12
- type: precision_at_30
value: 3.06
- type: precision_at_100
value: 0.98
- type: accuracy_at_3
value: 62.3
- type: accuracy_at_5
value: 69.94
- type: accuracy_at_10
value: 79.13
- task:
type: Retrieval
dataset:
type: fiqa
name: MTEB FiQA2018
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 34.41
- type: ndcg_at_3
value: 33.2
- type: ndcg_at_5
value: 34.71
- type: ndcg_at_10
value: 37.1
- type: ndcg_at_30
value: 40.88
- type: ndcg_at_100
value: 44.12
- type: map_at_1
value: 17.27
- type: map_at_3
value: 25.36
- type: map_at_5
value: 27.76
- type: map_at_10
value: 29.46
- type: map_at_30
value: 30.74
- type: map_at_100
value: 31.29
- type: recall_at_1
value: 17.27
- type: recall_at_3
value: 30.46
- type: recall_at_5
value: 36.91
- type: recall_at_10
value: 44.47
- type: recall_at_30
value: 56.71
- type: recall_at_100
value: 70.72
- type: precision_at_1
value: 34.41
- type: precision_at_3
value: 22.32
- type: precision_at_5
value: 16.91
- type: precision_at_10
value: 10.53
- type: precision_at_30
value: 4.62
- type: precision_at_100
value: 1.79
- type: accuracy_at_3
value: 50.77
- type: accuracy_at_5
value: 57.56
- type: accuracy_at_10
value: 65.12
- task:
type: Retrieval
dataset:
type: hotpotqa
name: MTEB HotpotQA
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 57.93
- type: ndcg_at_3
value: 44.21
- type: ndcg_at_5
value: 46.4
- type: ndcg_at_10
value: 48.37
- type: ndcg_at_30
value: 50.44
- type: ndcg_at_100
value: 51.86
- type: map_at_1
value: 28.97
- type: map_at_3
value: 36.79
- type: map_at_5
value: 38.31
- type: map_at_10
value: 39.32
- type: map_at_30
value: 39.99
- type: map_at_100
value: 40.2
- type: recall_at_1
value: 28.97
- type: recall_at_3
value: 41.01
- type: recall_at_5
value: 45.36
- type: recall_at_10
value: 50.32
- type: recall_at_30
value: 57.38
- type: recall_at_100
value: 64.06
- type: precision_at_1
value: 57.93
- type: precision_at_3
value: 27.34
- type: precision_at_5
value: 18.14
- type: precision_at_10
value: 10.06
- type: precision_at_30
value: 3.82
- type: precision_at_100
value: 1.28
- type: accuracy_at_3
value: 71.03
- type: accuracy_at_5
value: 75.14
- type: accuracy_at_10
value: 79.84
- task:
type: Retrieval
dataset:
type: msmarco
name: MTEB MSMARCO
config: default
split: dev
revision: None
metrics:
- type: ndcg_at_1
value: 19.74
- type: ndcg_at_3
value: 29.47
- type: ndcg_at_5
value: 32.99
- type: ndcg_at_10
value: 36.76
- type: ndcg_at_30
value: 40.52
- type: ndcg_at_100
value: 42.78
- type: map_at_1
value: 19.2
- type: map_at_3
value: 26.81
- type: map_at_5
value: 28.78
- type: map_at_10
value: 30.35
- type: map_at_30
value: 31.3
- type: map_at_100
value: 31.57
- type: recall_at_1
value: 19.2
- type: recall_at_3
value: 36.59
- type: recall_at_5
value: 45.08
- type: recall_at_10
value: 56.54
- type: recall_at_30
value: 72.05
- type: recall_at_100
value: 84.73
- type: precision_at_1
value: 19.74
- type: precision_at_3
value: 12.61
- type: precision_at_5
value: 9.37
- type: precision_at_10
value: 5.89
- type: precision_at_30
value: 2.52
- type: precision_at_100
value: 0.89
- type: accuracy_at_3
value: 37.38
- type: accuracy_at_5
value: 46.06
- type: accuracy_at_10
value: 57.62
- task:
type: Retrieval
dataset:
type: nq
name: MTEB NQ
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 25.9
- type: ndcg_at_3
value: 35.97
- type: ndcg_at_5
value: 40.27
- type: ndcg_at_10
value: 44.44
- type: ndcg_at_30
value: 48.31
- type: ndcg_at_100
value: 50.14
- type: map_at_1
value: 23.03
- type: map_at_3
value: 32.45
- type: map_at_5
value: 34.99
- type: map_at_10
value: 36.84
- type: map_at_30
value: 37.92
- type: map_at_100
value: 38.16
- type: recall_at_1
value: 23.03
- type: recall_at_3
value: 43.49
- type: recall_at_5
value: 53.41
- type: recall_at_10
value: 65.65
- type: recall_at_30
value: 80.79
- type: recall_at_100
value: 90.59
- type: precision_at_1
value: 25.9
- type: precision_at_3
value: 16.76
- type: precision_at_5
value: 12.54
- type: precision_at_10
value: 7.78
- type: precision_at_30
value: 3.23
- type: precision_at_100
value: 1.1
- type: accuracy_at_3
value: 47.31
- type: accuracy_at_5
value: 57.16
- type: accuracy_at_10
value: 69.09
- task:
type: Retrieval
dataset:
type: nfcorpus
name: MTEB NFCorpus
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 40.87
- type: ndcg_at_3
value: 36.79
- type: ndcg_at_5
value: 34.47
- type: ndcg_at_10
value: 32.05
- type: ndcg_at_30
value: 29.23
- type: ndcg_at_100
value: 29.84
- type: map_at_1
value: 5.05
- type: map_at_3
value: 8.5
- type: map_at_5
value: 9.87
- type: map_at_10
value: 11.71
- type: map_at_30
value: 13.48
- type: map_at_100
value: 14.86
- type: recall_at_1
value: 5.05
- type: recall_at_3
value: 9.55
- type: recall_at_5
value: 11.91
- type: recall_at_10
value: 16.07
- type: recall_at_30
value: 22.13
- type: recall_at_100
value: 30.7
- type: precision_at_1
value: 42.72
- type: precision_at_3
value: 34.78
- type: precision_at_5
value: 30.03
- type: precision_at_10
value: 23.93
- type: precision_at_30
value: 14.61
- type: precision_at_100
value: 7.85
- type: accuracy_at_3
value: 58.2
- type: accuracy_at_5
value: 64.09
- type: accuracy_at_10
value: 69.35
- task:
type: Retrieval
dataset:
type: quora
name: MTEB QuoraRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 80.62
- type: ndcg_at_3
value: 84.62
- type: ndcg_at_5
value: 86.25
- type: ndcg_at_10
value: 87.7
- type: ndcg_at_30
value: 88.63
- type: ndcg_at_100
value: 88.95
- type: map_at_1
value: 69.91
- type: map_at_3
value: 80.7
- type: map_at_5
value: 82.57
- type: map_at_10
value: 83.78
- type: map_at_30
value: 84.33
- type: map_at_100
value: 84.44
- type: recall_at_1
value: 69.91
- type: recall_at_3
value: 86.36
- type: recall_at_5
value: 90.99
- type: recall_at_10
value: 95.19
- type: recall_at_30
value: 98.25
- type: recall_at_100
value: 99.47
- type: precision_at_1
value: 80.62
- type: precision_at_3
value: 37.03
- type: precision_at_5
value: 24.36
- type: precision_at_10
value: 13.4
- type: precision_at_30
value: 4.87
- type: precision_at_100
value: 1.53
- type: accuracy_at_3
value: 92.25
- type: accuracy_at_5
value: 95.29
- type: accuracy_at_10
value: 97.74
- task:
type: Retrieval
dataset:
type: scidocs
name: MTEB SCIDOCS
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 24.1
- type: ndcg_at_3
value: 20.18
- type: ndcg_at_5
value: 17.72
- type: ndcg_at_10
value: 21.5
- type: ndcg_at_30
value: 26.66
- type: ndcg_at_100
value: 30.95
- type: map_at_1
value: 4.88
- type: map_at_3
value: 9.09
- type: map_at_5
value: 10.99
- type: map_at_10
value: 12.93
- type: map_at_30
value: 14.71
- type: map_at_100
value: 15.49
- type: recall_at_1
value: 4.88
- type: recall_at_3
value: 11.55
- type: recall_at_5
value: 15.91
- type: recall_at_10
value: 22.82
- type: recall_at_30
value: 35.7
- type: recall_at_100
value: 50.41
- type: precision_at_1
value: 24.1
- type: precision_at_3
value: 19
- type: precision_at_5
value: 15.72
- type: precision_at_10
value: 11.27
- type: precision_at_30
value: 5.87
- type: precision_at_100
value: 2.49
- type: accuracy_at_3
value: 43
- type: accuracy_at_5
value: 51.6
- type: accuracy_at_10
value: 62.7
- task:
type: Retrieval
dataset:
type: scifact
name: MTEB SciFact
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 52.33
- type: ndcg_at_3
value: 61.47
- type: ndcg_at_5
value: 63.82
- type: ndcg_at_10
value: 65.81
- type: ndcg_at_30
value: 67.75
- type: ndcg_at_100
value: 68.96
- type: map_at_1
value: 50.46
- type: map_at_3
value: 58.51
- type: map_at_5
value: 60.12
- type: map_at_10
value: 61.07
- type: map_at_30
value: 61.64
- type: map_at_100
value: 61.8
- type: recall_at_1
value: 50.46
- type: recall_at_3
value: 67.81
- type: recall_at_5
value: 73.6
- type: recall_at_10
value: 79.31
- type: recall_at_30
value: 86.8
- type: recall_at_100
value: 93.5
- type: precision_at_1
value: 52.33
- type: precision_at_3
value: 24.56
- type: precision_at_5
value: 16.27
- type: precision_at_10
value: 8.9
- type: precision_at_30
value: 3.28
- type: precision_at_100
value: 1.06
- type: accuracy_at_3
value: 69.67
- type: accuracy_at_5
value: 75
- type: accuracy_at_10
value: 80.67
- task:
type: Retrieval
dataset:
type: trec-covid
name: MTEB TRECCOVID
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 57
- type: ndcg_at_3
value: 53.78
- type: ndcg_at_5
value: 52.62
- type: ndcg_at_10
value: 48.9
- type: ndcg_at_30
value: 44.2
- type: ndcg_at_100
value: 36.53
- type: map_at_1
value: 0.16
- type: map_at_3
value: 0.41
- type: map_at_5
value: 0.62
- type: map_at_10
value: 1.07
- type: map_at_30
value: 2.46
- type: map_at_100
value: 5.52
- type: recall_at_1
value: 0.16
- type: recall_at_3
value: 0.45
- type: recall_at_5
value: 0.72
- type: recall_at_10
value: 1.33
- type: recall_at_30
value: 3.46
- type: recall_at_100
value: 8.73
- type: precision_at_1
value: 62
- type: precision_at_3
value: 57.33
- type: precision_at_5
value: 56
- type: precision_at_10
value: 52
- type: precision_at_30
value: 46.2
- type: precision_at_100
value: 37.22
- type: accuracy_at_3
value: 82
- type: accuracy_at_5
value: 90
- type: accuracy_at_10
value: 92
- task:
type: Retrieval
dataset:
type: webis-touche2020
name: MTEB Touche2020
config: default
split: test
revision: None
metrics:
- type: ndcg_at_1
value: 20.41
- type: ndcg_at_3
value: 17.62
- type: ndcg_at_5
value: 17.16
- type: ndcg_at_10
value: 17.09
- type: ndcg_at_30
value: 20.1
- type: ndcg_at_100
value: 26.33
- type: map_at_1
value: 2.15
- type: map_at_3
value: 3.59
- type: map_at_5
value: 5.07
- type: map_at_10
value: 6.95
- type: map_at_30
value: 9.01
- type: map_at_100
value: 10.54
- type: recall_at_1
value: 2.15
- type: recall_at_3
value: 4.5
- type: recall_at_5
value: 7.54
- type: recall_at_10
value: 12.46
- type: recall_at_30
value: 21.9
- type: recall_at_100
value: 36.58
- type: precision_at_1
value: 22.45
- type: precision_at_3
value: 19.05
- type: precision_at_5
value: 17.55
- type: precision_at_10
value: 15.51
- type: precision_at_30
value: 10.07
- type: precision_at_100
value: 5.57
- type: accuracy_at_3
value: 42.86
- type: accuracy_at_5
value: 53.06
- type: accuracy_at_10
value: 69.39
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackRetrieval
config: default
split: test
revision: None
metrics:
- type: ndcg_at_10
value: 41.59
license: apache-2.0
language:
- en
pipeline_tag: feature-extraction
The crispy sentence embedding family from Mixedbread.
mixedbread-ai/mxbai-embed-xsmall-v1
This model is an open-source English embedding model developed by Mixedbread. It's built upon sentence-transformers/all-MiniLM-L6-v2 and trained with the AnglE loss and Espresso. Read more details in our blog post.
In a bread loaf:
- State-of-the-art performance
- Supports both binary quantization and Matryoshka Representation Learning (MRL).
- Optimized for retrieval tasks
Performance
Binary Quantization and Matryoshka
Our model supports both binary quantization and Matryoshka Representation Learning (MRL), allowing for significant efficiency gains:
- Binary quantization: Retains 93.9% of performance while increasing efficiency by a factor of 32
- MRL: A 33% reduction in vector size still leaves 96.2% of model performance
These optimizations can lead to substantial reductions in infrastructure costs for cloud computing and vector databases. Read more here.
Quickstart
Here are several ways to produce German sentence embeddings using our model.
angle-emb
pip install -U angle-emb
from angle_emb import AnglE
from angle_emb.utils import cosine_similarity
# 1. Specify preferred dimensions
dimensions = 384
# 2. Load model and set pooling strategy to avg
model = AnglE.from_pretrained(
"mixedbread-ai/mxbai-embed-xsmall-v1",
pooling_strategy='avg').cuda()
query = 'A man is eating a piece of bread'
docs = [
query,
"A man is eating food.",
"A man is eating pasta.",
"The girl is carrying a baby.",
"A man is riding a horse.",
]
# 3. Encode
embeddings = model.encode(docs, embedding_size=dimensions)
for doc, emb in zip(docs[1:], embeddings[1:]):
print(f'{query} ||| {doc}', cosine_similarity(embeddings[0], emb))
Sentence Transformers
python -m pip install -U sentence-transformers
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim
# 1. Specify preferred dimensions
dimensions = 384
# 2. Load model
model = SentenceTransformer("mixedbread-ai/mxbai-embed-xsmall-v1", truncate_dim=dimensions)
query = 'A man is eating a piece of bread'
docs = [
query,
"A man is eating food.",
"A man is eating pasta.",
"The girl is carrying a baby.",
"A man is riding a horse.",
]
# 3. Encode
embeddings = model.encode(docs)
similarities = cos_sim(embeddings[0], embeddings[1:])
print('similarities:', similarities)
transformers
pip install -U transformers
from typing import Dict
import torch
import numpy as np
from transformers import AutoModel, AutoTokenizer
from sentence_transformers.util import cos_sim
def pooling(outputs: torch.Tensor, inputs: Dict) -> np.ndarray:
outputs = torch.sum(
outputs * inputs["attention_mask"][:, :, None], dim=1) / torch.sum(inputs["attention_mask"])
return outputs.detach().cpu().numpy()
# 1. Load model
model_id = 'mixedbread-ai/mxbai-embed-xsmall-v1'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id).cuda()
query = 'A man is eating a piece of bread'
docs = [
query,
"A man is eating food.",
"A man is eating pasta.",
"The girl is carrying a baby.",
"A man is riding a horse.",
]
# 2. Encode
inputs = tokenizer(docs, padding=True, return_tensors='pt')
for k, v in inputs.items():
inputs[k] = v.cuda()
outputs = model(**inputs).last_hidden_state
embeddings = pooling(outputs, inputs)
# 3. Compute similarity scores
similarities = cos_sim(embeddings[0], embeddings[1:])
print('similarities:', similarities)
Batched API
python -m pip install batched
import uvicorn
import batched
from fastapi import FastAPI
from fastapi.responses import ORJSONResponse
from sentence_transformers import SentenceTransformer
from pydantic import BaseModel
app = FastAPI()
model = SentenceTransformer('mixedbread-ai/mxbai-embed-xsmall-v1')
model.encode = batched.aio.dynamically(model.encode)
class EmbeddingsRequest(BaseModel):
input: str | list[str]
@app.post("/embeddings")
async def embeddings(request: EmbeddingsRequest):
return ORJSONResponse({"embeddings": await model.encode(request.input)})
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
Community
Join our discord community to share your feedback and thoughts. We're here to help and always happy to discuss the exciting field of machine learning!
License
Apache 2.0
Citation
@online{xsmall2024mxbai,
title={Every Byte Matters: Introducing mxbai-embed-xsmall-v1},
author={Sean Lee and Julius Lipp and Rui Huang and Darius Koenig},
year={2024},
url={https://www.mixedbread.ai/blog/mxbai-embed-xsmall-v1},
}