Tom Aarsen commited on
Commit
5c66f04
·
1 Parent(s): 752e76f

Remove reference_compile; complete Sentence Transformers support

Browse files
Files changed (4) hide show
  1. README.md +21 -9
  2. config.json +0 -1
  3. config_sentence_transformers.json +10 -0
  4. modules.json +14 -0
README.md CHANGED
@@ -6,6 +6,8 @@ base_model:
6
  - answerdotai/ModernBERT-base
7
  pipeline_tag: sentence-similarity
8
  library_name: transformers
 
 
9
  ---
10
 
11
  # gte-modernbert-base
@@ -36,7 +38,7 @@ The `gte-modernbert` models demonstrates competitive performance in several text
36
  Use with `Transformers`
37
 
38
  ```python
39
- # Requires transformers>=4.36.0
40
 
41
  import torch.nn.functional as F
42
  from transformers import AutoModel, AutoTokenizer
@@ -48,9 +50,9 @@ input_texts = [
48
  "sorting algorithms"
49
  ]
50
 
51
- model_path = 'Alibaba-NLP/gte-modernbert-base'
52
  tokenizer = AutoTokenizer.from_pretrained(model_path)
53
- model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
54
 
55
  # Tokenize the input texts
56
  batch_dict = tokenizer(input_texts, max_length=8192, padding=True, truncation=True, return_tensors='pt')
@@ -62,21 +64,31 @@ embeddings = outputs.last_hidden_state[:, 0]
62
  embeddings = F.normalize(embeddings, p=2, dim=1)
63
  scores = (embeddings[:1] @ embeddings[1:].T) * 100
64
  print(scores.tolist())
 
65
  ```
66
 
67
  Use with `sentence-transformers`:
68
 
69
  ```python
70
- # Requires sentence_transformers>=2.7.0
71
-
72
  from sentence_transformers import SentenceTransformer
73
  from sentence_transformers.util import cos_sim
74
 
75
- sentences = ['That is a happy person', 'That is a very happy person']
 
 
 
 
 
 
 
 
 
 
76
 
77
- model = SentenceTransformer('Alibaba-NLP/gte-modernbert-base', trust_remote_code=True)
78
- embeddings = model.encode(sentences)
79
- print(cos_sim(embeddings[0], embeddings[1]))
80
  ```
81
 
82
  Use with `transformers.js`:
 
6
  - answerdotai/ModernBERT-base
7
  pipeline_tag: sentence-similarity
8
  library_name: transformers
9
+ tags:
10
+ - sentence-transformers
11
  ---
12
 
13
  # gte-modernbert-base
 
38
  Use with `Transformers`
39
 
40
  ```python
41
+ # Requires transformers>=4.48.0
42
 
43
  import torch.nn.functional as F
44
  from transformers import AutoModel, AutoTokenizer
 
50
  "sorting algorithms"
51
  ]
52
 
53
+ model_path = "Alibaba-NLP/gte-modernbert-base"
54
  tokenizer = AutoTokenizer.from_pretrained(model_path)
55
+ model = AutoModel.from_pretrained(model_path)
56
 
57
  # Tokenize the input texts
58
  batch_dict = tokenizer(input_texts, max_length=8192, padding=True, truncation=True, return_tensors='pt')
 
64
  embeddings = F.normalize(embeddings, p=2, dim=1)
65
  scores = (embeddings[:1] @ embeddings[1:].T) * 100
66
  print(scores.tolist())
67
+ # [[42.89073944091797, 71.30911254882812, 33.664554595947266]]
68
  ```
69
 
70
  Use with `sentence-transformers`:
71
 
72
  ```python
73
+ # Requires transformers>=4.48.0
 
74
  from sentence_transformers import SentenceTransformer
75
  from sentence_transformers.util import cos_sim
76
 
77
+ input_texts = [
78
+ "what is the capital of China?",
79
+ "how to implement quick sort in python?",
80
+ "Beijing",
81
+ "sorting algorithms"
82
+ ]
83
+
84
+ model = SentenceTransformer("Alibaba-NLP/gte-modernbert-base")
85
+ embeddings = model.encode(input_texts)
86
+ print(embeddings.shape)
87
+ # (4, 768)
88
 
89
+ similarities = cos_sim(embeddings[0], embeddings[1:])
90
+ print(similarities)
91
+ # tensor([[0.4289, 0.7131, 0.3366]])
92
  ```
93
 
94
  Use with `transformers.js`:
config.json CHANGED
@@ -35,7 +35,6 @@
35
  "num_hidden_layers": 22,
36
  "pad_token_id": 50283,
37
  "position_embedding_type": "absolute",
38
- "reference_compile": true,
39
  "sep_token_id": 50282,
40
  "sparse_pred_ignore_index": -100,
41
  "sparse_prediction": false,
 
35
  "num_hidden_layers": 22,
36
  "pad_token_id": 50283,
37
  "position_embedding_type": "absolute",
 
38
  "sep_token_id": 50282,
39
  "sparse_pred_ignore_index": -100,
40
  "sparse_prediction": false,
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.7.0",
4
+ "transformers": "4.48.0",
5
+ "pytorch": "2.5.0+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]