aamirshakir's picture
Initial commit
b00f33d
metadata
tags:
  - mteb
base_model: mixedbread-ai/mxbai-embed-mini-v1
library_name: sentence-transformers
model-index:
  - name: mxbai-embed-xsmall-v1
    results:
      - task:
          type: Retrieval
        dataset:
          type: arguana
          name: MTEB ArguAna
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 25.18
          - type: ndcg_at_3
            value: 39.22
          - type: ndcg_at_5
            value: 43.93
          - type: ndcg_at_10
            value: 49.58
          - type: ndcg_at_30
            value: 53.41
          - type: ndcg_at_100
            value: 54.11
          - type: map_at_1
            value: 25.18
          - type: map_at_3
            value: 35.66
          - type: map_at_5
            value: 38.25
          - type: map_at_10
            value: 40.58
          - type: map_at_30
            value: 41.6
          - type: map_at_100
            value: 41.69
          - type: recall_at_1
            value: 25.18
          - type: recall_at_3
            value: 49.57
          - type: recall_at_5
            value: 61.09
          - type: recall_at_10
            value: 78.59
          - type: recall_at_30
            value: 94.03
          - type: recall_at_100
            value: 97.94
          - type: precision_at_1
            value: 25.18
          - type: precision_at_3
            value: 16.52
          - type: precision_at_5
            value: 12.22
          - type: precision_at_10
            value: 7.86
          - type: precision_at_30
            value: 3.13
          - type: precision_at_100
            value: 0.98
          - type: accuracy_at_3
            value: 49.57
          - type: accuracy_at_5
            value: 61.09
          - type: accuracy_at_10
            value: 78.59
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackAndroidRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 44.35
          - type: ndcg_at_3
            value: 49.64
          - type: ndcg_at_5
            value: 51.73
          - type: ndcg_at_10
            value: 54.82
          - type: ndcg_at_30
            value: 57.64
          - type: ndcg_at_100
            value: 59.77
          - type: map_at_1
            value: 36.26
          - type: map_at_3
            value: 44.35
          - type: map_at_5
            value: 46.26
          - type: map_at_10
            value: 48.24
          - type: map_at_30
            value: 49.34
          - type: map_at_100
            value: 49.75
          - type: recall_at_1
            value: 36.26
          - type: recall_at_3
            value: 51.46
          - type: recall_at_5
            value: 57.78
          - type: recall_at_10
            value: 66.5
          - type: recall_at_30
            value: 77.19
          - type: recall_at_100
            value: 87.53
          - type: precision_at_1
            value: 44.35
          - type: precision_at_3
            value: 23.65
          - type: precision_at_5
            value: 16.88
          - type: precision_at_10
            value: 10.7
          - type: precision_at_30
            value: 4.53
          - type: precision_at_100
            value: 1.65
          - type: accuracy_at_3
            value: 60.51
          - type: accuracy_at_5
            value: 67.67
          - type: accuracy_at_10
            value: 74.68
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackEnglishRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 39.43
          - type: ndcg_at_3
            value: 44.13
          - type: ndcg_at_5
            value: 46.06
          - type: ndcg_at_10
            value: 48.31
          - type: ndcg_at_30
            value: 51.06
          - type: ndcg_at_100
            value: 53.07
          - type: map_at_1
            value: 31.27
          - type: map_at_3
            value: 39.07
          - type: map_at_5
            value: 40.83
          - type: map_at_10
            value: 42.23
          - type: map_at_30
            value: 43.27
          - type: map_at_100
            value: 43.66
          - type: recall_at_1
            value: 31.27
          - type: recall_at_3
            value: 45.89
          - type: recall_at_5
            value: 51.44
          - type: recall_at_10
            value: 58.65
          - type: recall_at_30
            value: 69.12
          - type: recall_at_100
            value: 78.72
          - type: precision_at_1
            value: 39.43
          - type: precision_at_3
            value: 21.61
          - type: precision_at_5
            value: 15.34
          - type: precision_at_10
            value: 9.27
          - type: precision_at_30
            value: 4.01
          - type: precision_at_100
            value: 1.52
          - type: accuracy_at_3
            value: 55.48
          - type: accuracy_at_5
            value: 60.76
          - type: accuracy_at_10
            value: 67.45
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackGamingRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 45.58
          - type: ndcg_at_3
            value: 52.68
          - type: ndcg_at_5
            value: 55.28
          - type: ndcg_at_10
            value: 57.88
          - type: ndcg_at_30
            value: 60.6
          - type: ndcg_at_100
            value: 62.03
          - type: map_at_1
            value: 39.97
          - type: map_at_3
            value: 49.06
          - type: map_at_5
            value: 50.87
          - type: map_at_10
            value: 52.2
          - type: map_at_30
            value: 53.06
          - type: map_at_100
            value: 53.28
          - type: recall_at_1
            value: 39.97
          - type: recall_at_3
            value: 57.4
          - type: recall_at_5
            value: 63.83
          - type: recall_at_10
            value: 71.33
          - type: recall_at_30
            value: 81.81
          - type: recall_at_100
            value: 89
          - type: precision_at_1
            value: 45.58
          - type: precision_at_3
            value: 23.55
          - type: precision_at_5
            value: 16.01
          - type: precision_at_10
            value: 9.25
          - type: precision_at_30
            value: 3.67
          - type: precision_at_100
            value: 1.23
          - type: accuracy_at_3
            value: 62.76
          - type: accuracy_at_5
            value: 68.84
          - type: accuracy_at_10
            value: 75.8
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackGisRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 27.35
          - type: ndcg_at_3
            value: 34.23
          - type: ndcg_at_5
            value: 37.1
          - type: ndcg_at_10
            value: 40.26
          - type: ndcg_at_30
            value: 43.54
          - type: ndcg_at_100
            value: 45.9
          - type: map_at_1
            value: 25.28
          - type: map_at_3
            value: 31.68
          - type: map_at_5
            value: 33.38
          - type: map_at_10
            value: 34.79
          - type: map_at_30
            value: 35.67
          - type: map_at_100
            value: 35.96
          - type: recall_at_1
            value: 25.28
          - type: recall_at_3
            value: 38.95
          - type: recall_at_5
            value: 45.82
          - type: recall_at_10
            value: 55.11
          - type: recall_at_30
            value: 68.13
          - type: recall_at_100
            value: 80.88
          - type: precision_at_1
            value: 27.35
          - type: precision_at_3
            value: 14.65
          - type: precision_at_5
            value: 10.44
          - type: precision_at_10
            value: 6.37
          - type: precision_at_30
            value: 2.65
          - type: precision_at_100
            value: 0.97
          - type: accuracy_at_3
            value: 42.15
          - type: accuracy_at_5
            value: 49.15
          - type: accuracy_at_10
            value: 58.53
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackMathematicaRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 18.91
          - type: ndcg_at_3
            value: 24.37
          - type: ndcg_at_5
            value: 26.11
          - type: ndcg_at_10
            value: 29.37
          - type: ndcg_at_30
            value: 33.22
          - type: ndcg_at_100
            value: 35.73
          - type: map_at_1
            value: 15.23
          - type: map_at_3
            value: 21.25
          - type: map_at_5
            value: 22.38
          - type: map_at_10
            value: 23.86
          - type: map_at_30
            value: 24.91
          - type: map_at_100
            value: 25.24
          - type: recall_at_1
            value: 15.23
          - type: recall_at_3
            value: 28.28
          - type: recall_at_5
            value: 32.67
          - type: recall_at_10
            value: 42.23
          - type: recall_at_30
            value: 56.87
          - type: recall_at_100
            value: 69.44
          - type: precision_at_1
            value: 18.91
          - type: precision_at_3
            value: 11.9
          - type: precision_at_5
            value: 8.48
          - type: precision_at_10
            value: 5.63
          - type: precision_at_30
            value: 2.64
          - type: precision_at_100
            value: 1.02
          - type: accuracy_at_3
            value: 33.95
          - type: accuracy_at_5
            value: 38.81
          - type: accuracy_at_10
            value: 49.13
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackPhysicsRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 36.96
          - type: ndcg_at_3
            value: 42.48
          - type: ndcg_at_5
            value: 44.57
          - type: ndcg_at_10
            value: 47.13
          - type: ndcg_at_30
            value: 50.65
          - type: ndcg_at_100
            value: 53.14
          - type: map_at_1
            value: 30.1
          - type: map_at_3
            value: 37.97
          - type: map_at_5
            value: 39.62
          - type: map_at_10
            value: 41.06
          - type: map_at_30
            value: 42.13
          - type: map_at_100
            value: 42.53
          - type: recall_at_1
            value: 30.1
          - type: recall_at_3
            value: 45.98
          - type: recall_at_5
            value: 51.58
          - type: recall_at_10
            value: 59.24
          - type: recall_at_30
            value: 72.47
          - type: recall_at_100
            value: 84.53
          - type: precision_at_1
            value: 36.96
          - type: precision_at_3
            value: 20.5
          - type: precision_at_5
            value: 14.4
          - type: precision_at_10
            value: 8.62
          - type: precision_at_30
            value: 3.67
          - type: precision_at_100
            value: 1.38
          - type: accuracy_at_3
            value: 54.09
          - type: accuracy_at_5
            value: 60.25
          - type: accuracy_at_10
            value: 67.37
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackProgrammersRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 28.65
          - type: ndcg_at_3
            value: 34.3
          - type: ndcg_at_5
            value: 36.8
          - type: ndcg_at_10
            value: 39.92
          - type: ndcg_at_30
            value: 42.97
          - type: ndcg_at_100
            value: 45.45
          - type: map_at_1
            value: 23.35
          - type: map_at_3
            value: 30.36
          - type: map_at_5
            value: 32.15
          - type: map_at_10
            value: 33.74
          - type: map_at_30
            value: 34.69
          - type: map_at_100
            value: 35.02
          - type: recall_at_1
            value: 23.35
          - type: recall_at_3
            value: 37.71
          - type: recall_at_5
            value: 44.23
          - type: recall_at_10
            value: 53.6
          - type: recall_at_30
            value: 64.69
          - type: recall_at_100
            value: 77.41
          - type: precision_at_1
            value: 28.65
          - type: precision_at_3
            value: 16.74
          - type: precision_at_5
            value: 12.21
          - type: precision_at_10
            value: 7.61
          - type: precision_at_30
            value: 3.29
          - type: precision_at_100
            value: 1.22
          - type: accuracy_at_3
            value: 44.86
          - type: accuracy_at_5
            value: 52.4
          - type: accuracy_at_10
            value: 61.07
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackStatsRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 26.07
          - type: ndcg_at_3
            value: 31.62
          - type: ndcg_at_5
            value: 33.23
          - type: ndcg_at_10
            value: 35.62
          - type: ndcg_at_30
            value: 38.41
          - type: ndcg_at_100
            value: 40.81
          - type: map_at_1
            value: 22.96
          - type: map_at_3
            value: 28.85
          - type: map_at_5
            value: 29.97
          - type: map_at_10
            value: 31.11
          - type: map_at_30
            value: 31.86
          - type: map_at_100
            value: 32.15
          - type: recall_at_1
            value: 22.96
          - type: recall_at_3
            value: 35.14
          - type: recall_at_5
            value: 39.22
          - type: recall_at_10
            value: 46.52
          - type: recall_at_30
            value: 57.58
          - type: recall_at_100
            value: 70.57
          - type: precision_at_1
            value: 26.07
          - type: precision_at_3
            value: 14.11
          - type: precision_at_5
            value: 9.69
          - type: precision_at_10
            value: 5.81
          - type: precision_at_30
            value: 2.45
          - type: precision_at_100
            value: 0.92
          - type: accuracy_at_3
            value: 39.42
          - type: accuracy_at_5
            value: 43.41
          - type: accuracy_at_10
            value: 50.92
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackTexRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 21.78
          - type: ndcg_at_3
            value: 25.74
          - type: ndcg_at_5
            value: 27.86
          - type: ndcg_at_10
            value: 30.3
          - type: ndcg_at_30
            value: 33.51
          - type: ndcg_at_100
            value: 36.12
          - type: map_at_1
            value: 17.63
          - type: map_at_3
            value: 22.7
          - type: map_at_5
            value: 24.14
          - type: map_at_10
            value: 25.31
          - type: map_at_30
            value: 26.22
          - type: map_at_100
            value: 26.56
          - type: recall_at_1
            value: 17.63
          - type: recall_at_3
            value: 28.37
          - type: recall_at_5
            value: 33.99
          - type: recall_at_10
            value: 41.23
          - type: recall_at_30
            value: 53.69
          - type: recall_at_100
            value: 67.27
          - type: precision_at_1
            value: 21.78
          - type: precision_at_3
            value: 12.41
          - type: precision_at_5
            value: 9.07
          - type: precision_at_10
            value: 5.69
          - type: precision_at_30
            value: 2.61
          - type: precision_at_100
            value: 1.03
          - type: accuracy_at_3
            value: 33.62
          - type: accuracy_at_5
            value: 39.81
          - type: accuracy_at_10
            value: 47.32
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackUnixRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 30.97
          - type: ndcg_at_3
            value: 36.13
          - type: ndcg_at_5
            value: 39
          - type: ndcg_at_10
            value: 41.78
          - type: ndcg_at_30
            value: 44.96
          - type: ndcg_at_100
            value: 47.52
          - type: map_at_1
            value: 26.05
          - type: map_at_3
            value: 32.77
          - type: map_at_5
            value: 34.6
          - type: map_at_10
            value: 35.93
          - type: map_at_30
            value: 36.88
          - type: map_at_100
            value: 37.22
          - type: recall_at_1
            value: 26.05
          - type: recall_at_3
            value: 40
          - type: recall_at_5
            value: 47.34
          - type: recall_at_10
            value: 55.34
          - type: recall_at_30
            value: 67.08
          - type: recall_at_100
            value: 80.2
          - type: precision_at_1
            value: 30.97
          - type: precision_at_3
            value: 16.6
          - type: precision_at_5
            value: 12.03
          - type: precision_at_10
            value: 7.3
          - type: precision_at_30
            value: 3.08
          - type: precision_at_100
            value: 1.15
          - type: accuracy_at_3
            value: 45.62
          - type: accuracy_at_5
            value: 53.64
          - type: accuracy_at_10
            value: 61.66
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackWebmastersRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 29.64
          - type: ndcg_at_3
            value: 35.49
          - type: ndcg_at_5
            value: 37.77
          - type: ndcg_at_10
            value: 40.78
          - type: ndcg_at_30
            value: 44.59
          - type: ndcg_at_100
            value: 46.97
          - type: map_at_1
            value: 24.77
          - type: map_at_3
            value: 31.33
          - type: map_at_5
            value: 32.95
          - type: map_at_10
            value: 34.47
          - type: map_at_30
            value: 35.7
          - type: map_at_100
            value: 36.17
          - type: recall_at_1
            value: 24.77
          - type: recall_at_3
            value: 38.16
          - type: recall_at_5
            value: 44.1
          - type: recall_at_10
            value: 53.31
          - type: recall_at_30
            value: 68.43
          - type: recall_at_100
            value: 80.24
          - type: precision_at_1
            value: 29.64
          - type: precision_at_3
            value: 16.8
          - type: precision_at_5
            value: 12.21
          - type: precision_at_10
            value: 7.83
          - type: precision_at_30
            value: 3.89
          - type: precision_at_100
            value: 1.63
          - type: accuracy_at_3
            value: 45.45
          - type: accuracy_at_5
            value: 51.58
          - type: accuracy_at_10
            value: 61.07
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackWordpressRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 23.47
          - type: ndcg_at_3
            value: 27.98
          - type: ndcg_at_5
            value: 30.16
          - type: ndcg_at_10
            value: 32.97
          - type: ndcg_at_30
            value: 36.3
          - type: ndcg_at_100
            value: 38.47
          - type: map_at_1
            value: 21.63
          - type: map_at_3
            value: 26.02
          - type: map_at_5
            value: 27.32
          - type: map_at_10
            value: 28.51
          - type: map_at_30
            value: 29.39
          - type: map_at_100
            value: 29.66
          - type: recall_at_1
            value: 21.63
          - type: recall_at_3
            value: 31.47
          - type: recall_at_5
            value: 36.69
          - type: recall_at_10
            value: 44.95
          - type: recall_at_30
            value: 58.2
          - type: recall_at_100
            value: 69.83
          - type: precision_at_1
            value: 23.47
          - type: precision_at_3
            value: 11.71
          - type: precision_at_5
            value: 8.32
          - type: precision_at_10
            value: 5.23
          - type: precision_at_30
            value: 2.29
          - type: precision_at_100
            value: 0.86
          - type: accuracy_at_3
            value: 34.01
          - type: accuracy_at_5
            value: 39.37
          - type: accuracy_at_10
            value: 48.24
      - task:
          type: Retrieval
        dataset:
          type: climate-fever
          name: MTEB ClimateFEVER
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 19.8
          - type: ndcg_at_3
            value: 17.93
          - type: ndcg_at_5
            value: 19.39
          - type: ndcg_at_10
            value: 22.42
          - type: ndcg_at_30
            value: 26.79
          - type: ndcg_at_100
            value: 29.84
          - type: map_at_1
            value: 9.09
          - type: map_at_3
            value: 12.91
          - type: map_at_5
            value: 14.12
          - type: map_at_10
            value: 15.45
          - type: map_at_30
            value: 16.73
          - type: map_at_100
            value: 17.21
          - type: recall_at_1
            value: 9.09
          - type: recall_at_3
            value: 16.81
          - type: recall_at_5
            value: 20.9
          - type: recall_at_10
            value: 27.65
          - type: recall_at_30
            value: 41.23
          - type: recall_at_100
            value: 53.57
          - type: precision_at_1
            value: 19.8
          - type: precision_at_3
            value: 13.36
          - type: precision_at_5
            value: 10.33
          - type: precision_at_10
            value: 7.15
          - type: precision_at_30
            value: 3.66
          - type: precision_at_100
            value: 1.49
          - type: accuracy_at_3
            value: 36.22
          - type: accuracy_at_5
            value: 44.1
          - type: accuracy_at_10
            value: 55.11
      - task:
          type: Retrieval
        dataset:
          type: dbpedia-entity
          name: MTEB DBPedia
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 42.75
          - type: ndcg_at_3
            value: 35.67
          - type: ndcg_at_5
            value: 33.58
          - type: ndcg_at_10
            value: 32.19
          - type: ndcg_at_30
            value: 31.82
          - type: ndcg_at_100
            value: 35.87
          - type: map_at_1
            value: 7.05
          - type: map_at_3
            value: 10.5
          - type: map_at_5
            value: 12.06
          - type: map_at_10
            value: 14.29
          - type: map_at_30
            value: 17.38
          - type: map_at_100
            value: 19.58
          - type: recall_at_1
            value: 7.05
          - type: recall_at_3
            value: 11.89
          - type: recall_at_5
            value: 14.7
          - type: recall_at_10
            value: 19.78
          - type: recall_at_30
            value: 29.88
          - type: recall_at_100
            value: 42.4
          - type: precision_at_1
            value: 54.25
          - type: precision_at_3
            value: 39.42
          - type: precision_at_5
            value: 33.15
          - type: precision_at_10
            value: 25.95
          - type: precision_at_30
            value: 15.51
          - type: precision_at_100
            value: 7.9
          - type: accuracy_at_3
            value: 72
          - type: accuracy_at_5
            value: 77.75
          - type: accuracy_at_10
            value: 83.5
      - task:
          type: Retrieval
        dataset:
          type: fever
          name: MTEB FEVER
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 40.19
          - type: ndcg_at_3
            value: 50.51
          - type: ndcg_at_5
            value: 53.51
          - type: ndcg_at_10
            value: 56.45
          - type: ndcg_at_30
            value: 58.74
          - type: ndcg_at_100
            value: 59.72
          - type: map_at_1
            value: 37.56
          - type: map_at_3
            value: 46.74
          - type: map_at_5
            value: 48.46
          - type: map_at_10
            value: 49.7
          - type: map_at_30
            value: 50.31
          - type: map_at_100
            value: 50.43
          - type: recall_at_1
            value: 37.56
          - type: recall_at_3
            value: 58.28
          - type: recall_at_5
            value: 65.45
          - type: recall_at_10
            value: 74.28
          - type: recall_at_30
            value: 83.42
          - type: recall_at_100
            value: 88.76
          - type: precision_at_1
            value: 40.19
          - type: precision_at_3
            value: 20.99
          - type: precision_at_5
            value: 14.24
          - type: precision_at_10
            value: 8.12
          - type: precision_at_30
            value: 3.06
          - type: precision_at_100
            value: 0.98
          - type: accuracy_at_3
            value: 62.3
          - type: accuracy_at_5
            value: 69.94
          - type: accuracy_at_10
            value: 79.13
      - task:
          type: Retrieval
        dataset:
          type: fiqa
          name: MTEB FiQA2018
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 34.41
          - type: ndcg_at_3
            value: 33.2
          - type: ndcg_at_5
            value: 34.71
          - type: ndcg_at_10
            value: 37.1
          - type: ndcg_at_30
            value: 40.88
          - type: ndcg_at_100
            value: 44.12
          - type: map_at_1
            value: 17.27
          - type: map_at_3
            value: 25.36
          - type: map_at_5
            value: 27.76
          - type: map_at_10
            value: 29.46
          - type: map_at_30
            value: 30.74
          - type: map_at_100
            value: 31.29
          - type: recall_at_1
            value: 17.27
          - type: recall_at_3
            value: 30.46
          - type: recall_at_5
            value: 36.91
          - type: recall_at_10
            value: 44.47
          - type: recall_at_30
            value: 56.71
          - type: recall_at_100
            value: 70.72
          - type: precision_at_1
            value: 34.41
          - type: precision_at_3
            value: 22.32
          - type: precision_at_5
            value: 16.91
          - type: precision_at_10
            value: 10.53
          - type: precision_at_30
            value: 4.62
          - type: precision_at_100
            value: 1.79
          - type: accuracy_at_3
            value: 50.77
          - type: accuracy_at_5
            value: 57.56
          - type: accuracy_at_10
            value: 65.12
      - task:
          type: Retrieval
        dataset:
          type: hotpotqa
          name: MTEB HotpotQA
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 57.93
          - type: ndcg_at_3
            value: 44.21
          - type: ndcg_at_5
            value: 46.4
          - type: ndcg_at_10
            value: 48.37
          - type: ndcg_at_30
            value: 50.44
          - type: ndcg_at_100
            value: 51.86
          - type: map_at_1
            value: 28.97
          - type: map_at_3
            value: 36.79
          - type: map_at_5
            value: 38.31
          - type: map_at_10
            value: 39.32
          - type: map_at_30
            value: 39.99
          - type: map_at_100
            value: 40.2
          - type: recall_at_1
            value: 28.97
          - type: recall_at_3
            value: 41.01
          - type: recall_at_5
            value: 45.36
          - type: recall_at_10
            value: 50.32
          - type: recall_at_30
            value: 57.38
          - type: recall_at_100
            value: 64.06
          - type: precision_at_1
            value: 57.93
          - type: precision_at_3
            value: 27.34
          - type: precision_at_5
            value: 18.14
          - type: precision_at_10
            value: 10.06
          - type: precision_at_30
            value: 3.82
          - type: precision_at_100
            value: 1.28
          - type: accuracy_at_3
            value: 71.03
          - type: accuracy_at_5
            value: 75.14
          - type: accuracy_at_10
            value: 79.84
      - task:
          type: Retrieval
        dataset:
          type: msmarco
          name: MTEB MSMARCO
          config: default
          split: dev
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 19.74
          - type: ndcg_at_3
            value: 29.47
          - type: ndcg_at_5
            value: 32.99
          - type: ndcg_at_10
            value: 36.76
          - type: ndcg_at_30
            value: 40.52
          - type: ndcg_at_100
            value: 42.78
          - type: map_at_1
            value: 19.2
          - type: map_at_3
            value: 26.81
          - type: map_at_5
            value: 28.78
          - type: map_at_10
            value: 30.35
          - type: map_at_30
            value: 31.3
          - type: map_at_100
            value: 31.57
          - type: recall_at_1
            value: 19.2
          - type: recall_at_3
            value: 36.59
          - type: recall_at_5
            value: 45.08
          - type: recall_at_10
            value: 56.54
          - type: recall_at_30
            value: 72.05
          - type: recall_at_100
            value: 84.73
          - type: precision_at_1
            value: 19.74
          - type: precision_at_3
            value: 12.61
          - type: precision_at_5
            value: 9.37
          - type: precision_at_10
            value: 5.89
          - type: precision_at_30
            value: 2.52
          - type: precision_at_100
            value: 0.89
          - type: accuracy_at_3
            value: 37.38
          - type: accuracy_at_5
            value: 46.06
          - type: accuracy_at_10
            value: 57.62
      - task:
          type: Retrieval
        dataset:
          type: nq
          name: MTEB NQ
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 25.9
          - type: ndcg_at_3
            value: 35.97
          - type: ndcg_at_5
            value: 40.27
          - type: ndcg_at_10
            value: 44.44
          - type: ndcg_at_30
            value: 48.31
          - type: ndcg_at_100
            value: 50.14
          - type: map_at_1
            value: 23.03
          - type: map_at_3
            value: 32.45
          - type: map_at_5
            value: 34.99
          - type: map_at_10
            value: 36.84
          - type: map_at_30
            value: 37.92
          - type: map_at_100
            value: 38.16
          - type: recall_at_1
            value: 23.03
          - type: recall_at_3
            value: 43.49
          - type: recall_at_5
            value: 53.41
          - type: recall_at_10
            value: 65.65
          - type: recall_at_30
            value: 80.79
          - type: recall_at_100
            value: 90.59
          - type: precision_at_1
            value: 25.9
          - type: precision_at_3
            value: 16.76
          - type: precision_at_5
            value: 12.54
          - type: precision_at_10
            value: 7.78
          - type: precision_at_30
            value: 3.23
          - type: precision_at_100
            value: 1.1
          - type: accuracy_at_3
            value: 47.31
          - type: accuracy_at_5
            value: 57.16
          - type: accuracy_at_10
            value: 69.09
      - task:
          type: Retrieval
        dataset:
          type: nfcorpus
          name: MTEB NFCorpus
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 40.87
          - type: ndcg_at_3
            value: 36.79
          - type: ndcg_at_5
            value: 34.47
          - type: ndcg_at_10
            value: 32.05
          - type: ndcg_at_30
            value: 29.23
          - type: ndcg_at_100
            value: 29.84
          - type: map_at_1
            value: 5.05
          - type: map_at_3
            value: 8.5
          - type: map_at_5
            value: 9.87
          - type: map_at_10
            value: 11.71
          - type: map_at_30
            value: 13.48
          - type: map_at_100
            value: 14.86
          - type: recall_at_1
            value: 5.05
          - type: recall_at_3
            value: 9.55
          - type: recall_at_5
            value: 11.91
          - type: recall_at_10
            value: 16.07
          - type: recall_at_30
            value: 22.13
          - type: recall_at_100
            value: 30.7
          - type: precision_at_1
            value: 42.72
          - type: precision_at_3
            value: 34.78
          - type: precision_at_5
            value: 30.03
          - type: precision_at_10
            value: 23.93
          - type: precision_at_30
            value: 14.61
          - type: precision_at_100
            value: 7.85
          - type: accuracy_at_3
            value: 58.2
          - type: accuracy_at_5
            value: 64.09
          - type: accuracy_at_10
            value: 69.35
      - task:
          type: Retrieval
        dataset:
          type: quora
          name: MTEB QuoraRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 80.62
          - type: ndcg_at_3
            value: 84.62
          - type: ndcg_at_5
            value: 86.25
          - type: ndcg_at_10
            value: 87.7
          - type: ndcg_at_30
            value: 88.63
          - type: ndcg_at_100
            value: 88.95
          - type: map_at_1
            value: 69.91
          - type: map_at_3
            value: 80.7
          - type: map_at_5
            value: 82.57
          - type: map_at_10
            value: 83.78
          - type: map_at_30
            value: 84.33
          - type: map_at_100
            value: 84.44
          - type: recall_at_1
            value: 69.91
          - type: recall_at_3
            value: 86.36
          - type: recall_at_5
            value: 90.99
          - type: recall_at_10
            value: 95.19
          - type: recall_at_30
            value: 98.25
          - type: recall_at_100
            value: 99.47
          - type: precision_at_1
            value: 80.62
          - type: precision_at_3
            value: 37.03
          - type: precision_at_5
            value: 24.36
          - type: precision_at_10
            value: 13.4
          - type: precision_at_30
            value: 4.87
          - type: precision_at_100
            value: 1.53
          - type: accuracy_at_3
            value: 92.25
          - type: accuracy_at_5
            value: 95.29
          - type: accuracy_at_10
            value: 97.74
      - task:
          type: Retrieval
        dataset:
          type: scidocs
          name: MTEB SCIDOCS
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 24.1
          - type: ndcg_at_3
            value: 20.18
          - type: ndcg_at_5
            value: 17.72
          - type: ndcg_at_10
            value: 21.5
          - type: ndcg_at_30
            value: 26.66
          - type: ndcg_at_100
            value: 30.95
          - type: map_at_1
            value: 4.88
          - type: map_at_3
            value: 9.09
          - type: map_at_5
            value: 10.99
          - type: map_at_10
            value: 12.93
          - type: map_at_30
            value: 14.71
          - type: map_at_100
            value: 15.49
          - type: recall_at_1
            value: 4.88
          - type: recall_at_3
            value: 11.55
          - type: recall_at_5
            value: 15.91
          - type: recall_at_10
            value: 22.82
          - type: recall_at_30
            value: 35.7
          - type: recall_at_100
            value: 50.41
          - type: precision_at_1
            value: 24.1
          - type: precision_at_3
            value: 19
          - type: precision_at_5
            value: 15.72
          - type: precision_at_10
            value: 11.27
          - type: precision_at_30
            value: 5.87
          - type: precision_at_100
            value: 2.49
          - type: accuracy_at_3
            value: 43
          - type: accuracy_at_5
            value: 51.6
          - type: accuracy_at_10
            value: 62.7
      - task:
          type: Retrieval
        dataset:
          type: scifact
          name: MTEB SciFact
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 52.33
          - type: ndcg_at_3
            value: 61.47
          - type: ndcg_at_5
            value: 63.82
          - type: ndcg_at_10
            value: 65.81
          - type: ndcg_at_30
            value: 67.75
          - type: ndcg_at_100
            value: 68.96
          - type: map_at_1
            value: 50.46
          - type: map_at_3
            value: 58.51
          - type: map_at_5
            value: 60.12
          - type: map_at_10
            value: 61.07
          - type: map_at_30
            value: 61.64
          - type: map_at_100
            value: 61.8
          - type: recall_at_1
            value: 50.46
          - type: recall_at_3
            value: 67.81
          - type: recall_at_5
            value: 73.6
          - type: recall_at_10
            value: 79.31
          - type: recall_at_30
            value: 86.8
          - type: recall_at_100
            value: 93.5
          - type: precision_at_1
            value: 52.33
          - type: precision_at_3
            value: 24.56
          - type: precision_at_5
            value: 16.27
          - type: precision_at_10
            value: 8.9
          - type: precision_at_30
            value: 3.28
          - type: precision_at_100
            value: 1.06
          - type: accuracy_at_3
            value: 69.67
          - type: accuracy_at_5
            value: 75
          - type: accuracy_at_10
            value: 80.67
      - task:
          type: Retrieval
        dataset:
          type: trec-covid
          name: MTEB TRECCOVID
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 57
          - type: ndcg_at_3
            value: 53.78
          - type: ndcg_at_5
            value: 52.62
          - type: ndcg_at_10
            value: 48.9
          - type: ndcg_at_30
            value: 44.2
          - type: ndcg_at_100
            value: 36.53
          - type: map_at_1
            value: 0.16
          - type: map_at_3
            value: 0.41
          - type: map_at_5
            value: 0.62
          - type: map_at_10
            value: 1.07
          - type: map_at_30
            value: 2.46
          - type: map_at_100
            value: 5.52
          - type: recall_at_1
            value: 0.16
          - type: recall_at_3
            value: 0.45
          - type: recall_at_5
            value: 0.72
          - type: recall_at_10
            value: 1.33
          - type: recall_at_30
            value: 3.46
          - type: recall_at_100
            value: 8.73
          - type: precision_at_1
            value: 62
          - type: precision_at_3
            value: 57.33
          - type: precision_at_5
            value: 56
          - type: precision_at_10
            value: 52
          - type: precision_at_30
            value: 46.2
          - type: precision_at_100
            value: 37.22
          - type: accuracy_at_3
            value: 82
          - type: accuracy_at_5
            value: 90
          - type: accuracy_at_10
            value: 92
      - task:
          type: Retrieval
        dataset:
          type: webis-touche2020
          name: MTEB Touche2020
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_1
            value: 20.41
          - type: ndcg_at_3
            value: 17.62
          - type: ndcg_at_5
            value: 17.16
          - type: ndcg_at_10
            value: 17.09
          - type: ndcg_at_30
            value: 20.1
          - type: ndcg_at_100
            value: 26.33
          - type: map_at_1
            value: 2.15
          - type: map_at_3
            value: 3.59
          - type: map_at_5
            value: 5.07
          - type: map_at_10
            value: 6.95
          - type: map_at_30
            value: 9.01
          - type: map_at_100
            value: 10.54
          - type: recall_at_1
            value: 2.15
          - type: recall_at_3
            value: 4.5
          - type: recall_at_5
            value: 7.54
          - type: recall_at_10
            value: 12.46
          - type: recall_at_30
            value: 21.9
          - type: recall_at_100
            value: 36.58
          - type: precision_at_1
            value: 22.45
          - type: precision_at_3
            value: 19.05
          - type: precision_at_5
            value: 17.55
          - type: precision_at_10
            value: 15.51
          - type: precision_at_30
            value: 10.07
          - type: precision_at_100
            value: 5.57
          - type: accuracy_at_3
            value: 42.86
          - type: accuracy_at_5
            value: 53.06
          - type: accuracy_at_10
            value: 69.39
      - task:
          type: Retrieval
        dataset:
          type: BeIR/cqadupstack
          name: MTEB CQADupstackRetrieval
          config: default
          split: test
          revision: None
        metrics:
          - type: ndcg_at_10
            value: 41.59
license: apache-2.0
language:
  - en
pipeline_tag: feature-extraction

The crispy sentence embedding family from Mixedbread.

mixedbread-ai/mxbai-embed-xsmall-v1

This model is an open-source English embedding model developed by Mixedbread. It's built upon sentence-transformers/all-MiniLM-L6-v2 and trained with the AnglE loss and Espresso. Read more details in our blog post.

In a bread loaf:

Performance

Binary Quantization and Matryoshka

Our model supports both binary quantization and Matryoshka Representation Learning (MRL), allowing for significant efficiency gains:

  • Binary quantization: Retains 93.9% of performance while increasing efficiency by a factor of 32
  • MRL: A 33% reduction in vector size still leaves 96.2% of model performance

These optimizations can lead to substantial reductions in infrastructure costs for cloud computing and vector databases. Read more here.

Quickstart

Here are several ways to produce German sentence embeddings using our model.

angle-emb
pip install -U angle-emb
from angle_emb import AnglE
from angle_emb.utils import cosine_similarity

# 1. Specify preferred dimensions
dimensions = 384

# 2. Load model and set pooling strategy to avg
model = AnglE.from_pretrained(
    "mixedbread-ai/mxbai-embed-xsmall-v1",
    pooling_strategy='avg').cuda()

query = 'A man is eating a piece of bread'

docs = [
    query,
    "A man is eating food.",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]

# 3. Encode
embeddings = model.encode(docs, embedding_size=dimensions)

for doc, emb in zip(docs[1:], embeddings[1:]):
    print(f'{query} ||| {doc}', cosine_similarity(embeddings[0], emb))
Sentence Transformers
python -m pip install -U sentence-transformers
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim

# 1. Specify preferred dimensions
dimensions = 384

# 2. Load model
model = SentenceTransformer("mixedbread-ai/mxbai-embed-xsmall-v1", truncate_dim=dimensions)

query = 'A man is eating a piece of bread'

docs = [
    query,
    "A man is eating food.",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]


# 3. Encode
embeddings = model.encode(docs)

similarities = cos_sim(embeddings[0], embeddings[1:])
print('similarities:', similarities)
transformers
pip install -U transformers
from typing import Dict

import torch
import numpy as np
from transformers import AutoModel, AutoTokenizer
from sentence_transformers.util import cos_sim

def pooling(outputs: torch.Tensor, inputs: Dict) -> np.ndarray:
    outputs = torch.sum(
      outputs * inputs["attention_mask"][:, :, None], dim=1) / torch.sum(inputs["attention_mask"])
    return outputs.detach().cpu().numpy()

# 1. Load model
model_id = 'mixedbread-ai/mxbai-embed-xsmall-v1'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id).cuda()

query = 'A man is eating a piece of bread'

docs = [
    query,
    "A man is eating food.",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]

# 2. Encode
inputs = tokenizer(docs, padding=True, return_tensors='pt')
for k, v in inputs.items():
    inputs[k] = v.cuda()
outputs = model(**inputs).last_hidden_state
embeddings = pooling(outputs, inputs)

# 3. Compute similarity scores
similarities = cos_sim(embeddings[0], embeddings[1:])
print('similarities:', similarities)
Batched API
python -m pip install batched
import uvicorn
import batched
from fastapi import FastAPI
from fastapi.responses import ORJSONResponse
from sentence_transformers import SentenceTransformer
from pydantic import BaseModel
 
app = FastAPI()
 
model = SentenceTransformer('mixedbread-ai/mxbai-embed-xsmall-v1')
model.encode = batched.aio.dynamically(model.encode)
 
class EmbeddingsRequest(BaseModel):
    input: str | list[str]
 
@app.post("/embeddings")
async def embeddings(request: EmbeddingsRequest):
    return ORJSONResponse({"embeddings": await model.encode(request.input)})
 
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)

Community

Join our discord community to share your feedback and thoughts. We're here to help and always happy to discuss the exciting field of machine learning!

License

Apache 2.0

Citation

@online{xsmall2024mxbai,
  title={Every Byte Matters: Introducing mxbai-embed-xsmall-v1},
  author={Sean Lee and Julius Lipp and Rui Huang and Darius Koenig},
  year={2024},
  url={https://www.mixedbread.ai/blog/mxbai-embed-xsmall-v1},
}