lw2134 commited on
Commit
52bd27e
1 Parent(s): 73332b0

Add new SentenceTransformer model.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 1_Pooling/config.json +10 -0
  2. README.md +617 -0
  3. checkpoint-3/1_Pooling/config.json +10 -0
  4. checkpoint-3/README.md +614 -0
  5. checkpoint-3/config.json +44 -0
  6. checkpoint-3/config_sentence_transformers.json +10 -0
  7. checkpoint-3/model.safetensors +3 -0
  8. checkpoint-3/modules.json +14 -0
  9. checkpoint-3/optimizer.pt +3 -0
  10. checkpoint-3/rng_state.pth +3 -0
  11. checkpoint-3/scheduler.pt +3 -0
  12. checkpoint-3/sentence_bert_config.json +4 -0
  13. checkpoint-3/special_tokens_map.json +37 -0
  14. checkpoint-3/tokenizer.json +0 -0
  15. checkpoint-3/tokenizer_config.json +62 -0
  16. checkpoint-3/trainer_state.json +144 -0
  17. checkpoint-3/training_args.bin +3 -0
  18. checkpoint-3/vocab.txt +0 -0
  19. checkpoint-4/1_Pooling/config.json +10 -0
  20. checkpoint-4/README.md +615 -0
  21. checkpoint-4/config.json +44 -0
  22. checkpoint-4/config_sentence_transformers.json +10 -0
  23. checkpoint-4/model.safetensors +3 -0
  24. checkpoint-4/modules.json +14 -0
  25. checkpoint-4/optimizer.pt +3 -0
  26. checkpoint-4/rng_state.pth +3 -0
  27. checkpoint-4/scheduler.pt +3 -0
  28. checkpoint-4/sentence_bert_config.json +4 -0
  29. checkpoint-4/special_tokens_map.json +37 -0
  30. checkpoint-4/tokenizer.json +0 -0
  31. checkpoint-4/tokenizer_config.json +62 -0
  32. checkpoint-4/trainer_state.json +181 -0
  33. checkpoint-4/training_args.bin +3 -0
  34. checkpoint-4/vocab.txt +0 -0
  35. checkpoint-5/1_Pooling/config.json +10 -0
  36. checkpoint-5/README.md +616 -0
  37. checkpoint-5/config.json +44 -0
  38. checkpoint-5/config_sentence_transformers.json +10 -0
  39. checkpoint-5/model.safetensors +3 -0
  40. checkpoint-5/modules.json +14 -0
  41. checkpoint-5/optimizer.pt +3 -0
  42. checkpoint-5/rng_state.pth +3 -0
  43. checkpoint-5/scheduler.pt +3 -0
  44. checkpoint-5/sentence_bert_config.json +4 -0
  45. checkpoint-5/special_tokens_map.json +37 -0
  46. checkpoint-5/tokenizer.json +0 -0
  47. checkpoint-5/tokenizer_config.json +62 -0
  48. checkpoint-5/trainer_state.json +218 -0
  49. checkpoint-5/training_args.bin +3 -0
  50. checkpoint-5/vocab.txt +0 -0
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,617 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Alibaba-NLP/gte-large-en-v1.5
3
+ library_name: sentence-transformers
4
+ metrics:
5
+ - cosine_accuracy@1
6
+ - cosine_accuracy@3
7
+ - cosine_accuracy@5
8
+ - cosine_accuracy@10
9
+ - cosine_precision@1
10
+ - cosine_precision@3
11
+ - cosine_precision@5
12
+ - cosine_precision@10
13
+ - cosine_recall@1
14
+ - cosine_recall@3
15
+ - cosine_recall@5
16
+ - cosine_recall@10
17
+ - cosine_ndcg@10
18
+ - cosine_mrr@10
19
+ - cosine_map@100
20
+ - dot_accuracy@1
21
+ - dot_accuracy@3
22
+ - dot_accuracy@5
23
+ - dot_accuracy@10
24
+ - dot_precision@1
25
+ - dot_precision@3
26
+ - dot_precision@5
27
+ - dot_precision@10
28
+ - dot_recall@1
29
+ - dot_recall@3
30
+ - dot_recall@5
31
+ - dot_recall@10
32
+ - dot_ndcg@10
33
+ - dot_mrr@10
34
+ - dot_map@100
35
+ pipeline_tag: sentence-similarity
36
+ tags:
37
+ - sentence-transformers
38
+ - sentence-similarity
39
+ - feature-extraction
40
+ - generated_from_trainer
41
+ - dataset_size:500
42
+ - loss:MatryoshkaLoss
43
+ - loss:MultipleNegativesRankingLoss
44
+ widget:
45
+ - source_sentence: "narrow identified goals, to avoid \"mission creep.\" Anticipated\
46
+ \ data collection should be determined to be \nstrictly necessary to the identified\
47
+ \ goals and should be minimized as much as possible. Data collected based on \n\
48
+ these identified goals and for a specific context should not be used in a different\
49
+ \ context without assessing for \nnew privacy risks and implementing appropriate\
50
+ \ mitigation measures, which may include express consent."
51
+ sentences:
52
+ - What measures should be taken if data collected for specific identified goals
53
+ is to be used in a different context?
54
+ - What measures should be taken to ensure the privacy of sensitive data and limit
55
+ access to it?
56
+ - What special requirements are mentioned in the white paper regarding national
57
+ security and defense activities in relation to trustworthy artificial intelligence?
58
+ - source_sentence: '•
59
+
60
+ Karen Levy, Assistant Professor, Department of Information Science, Cornell University
61
+
62
+
63
+
64
+ Natasha Duarte, Project Director, Upturn
65
+
66
+
67
+
68
+ Elana Zeide, Assistant Professor, University of Nebraska College of Law
69
+
70
+
71
+
72
+ Fabian Rogers, Constituent Advocate, Office of NY State Senator Jabari Brisport
73
+ and Community
74
+
75
+ Advocate and Floor Captain, Atlantic Plaza Towers Tenants Association
76
+
77
+ The individual panelists described the ways in which AI systems and other technologies
78
+ are increasingly being'
79
+ sentences:
80
+ - What are some of the challenges posed to democracy by the use of technology and
81
+ automated systems, as mentioned in the foreword?
82
+ - What principles has the U.S. Intelligence Community developed to guide personnel
83
+ in the ethical use of AI?
84
+ - What roles do the panelists hold in relation to the discussion on AI systems and
85
+ technology?
86
+ - source_sentence: "impacts disfavoring people based on their race, color, ethnicity,\
87
+ \ \nsex \n(including \npregnancy, \nchildbirth, \nand \nrelated \nmedical \nconditions,\
88
+ \ \ngender \nidentity, \nintersex \nstatus, \nand \nsexual \norientation), religion,\
89
+ \ age, national origin, disability, veteran status,"
90
+ sentences:
91
+ - What does the term "HUMAN ALTERNATIVES" refer to in the context provided?
92
+ - What types of discrimination are mentioned in the context?
93
+ - What are the expectations for automated systems in relation to public protection
94
+ from surveillance?
95
+ - source_sentence: "establish and maintain the capabilities that will allow individuals\
96
+ \ to use their own automated systems to help \nthem make consent, access, and\
97
+ \ control decisions in a complex data ecosystem. Capabilities include machine\
98
+ \ \nreadable data, standardized data formats, metadata or tags for expressing\
99
+ \ data processing permissions and \npreferences and data provenance and lineage,\
100
+ \ context of use and access-specific tags, and training models for \nassessing\
101
+ \ privacy risk."
102
+ sentences:
103
+ - What measures should be taken to ensure that independent evaluations of algorithmic
104
+ discrimination are conducted while balancing individual privacy and data access
105
+ needs?
106
+ - What capabilities are necessary for individuals to effectively manage consent
107
+ and control decisions in a complex data ecosystem?
108
+ - What are some examples of classifications that are protected by law against discrimination?
109
+ - source_sentence: "SAFE AND EFFECTIVE \nSYSTEMS \nWHAT SHOULD BE EXPECTED OF AUTOMATED\
110
+ \ SYSTEMS\nThe expectations for automated systems are meant to serve as a blueprint\
111
+ \ for the development of additional \ntechnical standards and practices that are\
112
+ \ tailored for particular sectors and contexts. \nDerived data sources tracked\
113
+ \ and reviewed carefully. Data that is derived from other data through"
114
+ sentences:
115
+ - What is the purpose of the expectations set for automated systems in relation
116
+ to technical standards and practices?
117
+ - What factors influence the appropriate application of the principles outlined
118
+ in the white paper regarding automated systems?
119
+ - What actions can a court take if a federal agency fails to comply with the Privacy
120
+ Act regarding an individual's records?
121
+ model-index:
122
+ - name: SentenceTransformer based on Alibaba-NLP/gte-large-en-v1.5
123
+ results:
124
+ - task:
125
+ type: information-retrieval
126
+ name: Information Retrieval
127
+ dataset:
128
+ name: Unknown
129
+ type: unknown
130
+ metrics:
131
+ - type: cosine_accuracy@1
132
+ value: 0.88
133
+ name: Cosine Accuracy@1
134
+ - type: cosine_accuracy@3
135
+ value: 0.9866666666666667
136
+ name: Cosine Accuracy@3
137
+ - type: cosine_accuracy@5
138
+ value: 0.9866666666666667
139
+ name: Cosine Accuracy@5
140
+ - type: cosine_accuracy@10
141
+ value: 1.0
142
+ name: Cosine Accuracy@10
143
+ - type: cosine_precision@1
144
+ value: 0.88
145
+ name: Cosine Precision@1
146
+ - type: cosine_precision@3
147
+ value: 0.3288888888888888
148
+ name: Cosine Precision@3
149
+ - type: cosine_precision@5
150
+ value: 0.1973333333333333
151
+ name: Cosine Precision@5
152
+ - type: cosine_precision@10
153
+ value: 0.09999999999999998
154
+ name: Cosine Precision@10
155
+ - type: cosine_recall@1
156
+ value: 0.88
157
+ name: Cosine Recall@1
158
+ - type: cosine_recall@3
159
+ value: 0.9866666666666667
160
+ name: Cosine Recall@3
161
+ - type: cosine_recall@5
162
+ value: 0.9866666666666667
163
+ name: Cosine Recall@5
164
+ - type: cosine_recall@10
165
+ value: 1.0
166
+ name: Cosine Recall@10
167
+ - type: cosine_ndcg@10
168
+ value: 0.9499978881111136
169
+ name: Cosine Ndcg@10
170
+ - type: cosine_mrr@10
171
+ value: 0.9330158730158731
172
+ name: Cosine Mrr@10
173
+ - type: cosine_map@100
174
+ value: 0.9330158730158731
175
+ name: Cosine Map@100
176
+ - type: dot_accuracy@1
177
+ value: 0.88
178
+ name: Dot Accuracy@1
179
+ - type: dot_accuracy@3
180
+ value: 0.9866666666666667
181
+ name: Dot Accuracy@3
182
+ - type: dot_accuracy@5
183
+ value: 0.9866666666666667
184
+ name: Dot Accuracy@5
185
+ - type: dot_accuracy@10
186
+ value: 1.0
187
+ name: Dot Accuracy@10
188
+ - type: dot_precision@1
189
+ value: 0.88
190
+ name: Dot Precision@1
191
+ - type: dot_precision@3
192
+ value: 0.3288888888888888
193
+ name: Dot Precision@3
194
+ - type: dot_precision@5
195
+ value: 0.1973333333333333
196
+ name: Dot Precision@5
197
+ - type: dot_precision@10
198
+ value: 0.09999999999999998
199
+ name: Dot Precision@10
200
+ - type: dot_recall@1
201
+ value: 0.88
202
+ name: Dot Recall@1
203
+ - type: dot_recall@3
204
+ value: 0.9866666666666667
205
+ name: Dot Recall@3
206
+ - type: dot_recall@5
207
+ value: 0.9866666666666667
208
+ name: Dot Recall@5
209
+ - type: dot_recall@10
210
+ value: 1.0
211
+ name: Dot Recall@10
212
+ - type: dot_ndcg@10
213
+ value: 0.9499978881111136
214
+ name: Dot Ndcg@10
215
+ - type: dot_mrr@10
216
+ value: 0.9330158730158731
217
+ name: Dot Mrr@10
218
+ - type: dot_map@100
219
+ value: 0.9330158730158731
220
+ name: Dot Map@100
221
+ ---
222
+
223
+ # SentenceTransformer based on Alibaba-NLP/gte-large-en-v1.5
224
+
225
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) on the json dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
226
+
227
+ ## Model Details
228
+
229
+ ### Model Description
230
+ - **Model Type:** Sentence Transformer
231
+ - **Base model:** [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) <!-- at revision 104333d6af6f97649377c2afbde10a7704870c7b -->
232
+ - **Maximum Sequence Length:** 8192 tokens
233
+ - **Output Dimensionality:** 1024 tokens
234
+ - **Similarity Function:** Cosine Similarity
235
+ - **Training Dataset:**
236
+ - json
237
+ <!-- - **Language:** Unknown -->
238
+ <!-- - **License:** Unknown -->
239
+
240
+ ### Model Sources
241
+
242
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
243
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
244
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
245
+
246
+ ### Full Model Architecture
247
+
248
+ ```
249
+ SentenceTransformer(
250
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
251
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
252
+ )
253
+ ```
254
+
255
+ ## Usage
256
+
257
+ ### Direct Usage (Sentence Transformers)
258
+
259
+ First install the Sentence Transformers library:
260
+
261
+ ```bash
262
+ pip install -U sentence-transformers
263
+ ```
264
+
265
+ Then you can load this model and run inference.
266
+ ```python
267
+ from sentence_transformers import SentenceTransformer
268
+
269
+ # Download from the 🤗 Hub
270
+ model = SentenceTransformer("sentence_transformers_model_id")
271
+ # Run inference
272
+ sentences = [
273
+ 'SAFE AND EFFECTIVE \nSYSTEMS \nWHAT SHOULD BE EXPECTED OF AUTOMATED SYSTEMS\nThe expectations for automated systems are meant to serve as a blueprint for the development of additional \ntechnical standards and practices that are tailored for particular sectors and contexts. \nDerived data sources tracked and reviewed carefully. Data that is derived from other data through',
274
+ 'What is the purpose of the expectations set for automated systems in relation to technical standards and practices?',
275
+ 'What factors influence the appropriate application of the principles outlined in the white paper regarding automated systems?',
276
+ ]
277
+ embeddings = model.encode(sentences)
278
+ print(embeddings.shape)
279
+ # [3, 1024]
280
+
281
+ # Get the similarity scores for the embeddings
282
+ similarities = model.similarity(embeddings, embeddings)
283
+ print(similarities.shape)
284
+ # [3, 3]
285
+ ```
286
+
287
+ <!--
288
+ ### Direct Usage (Transformers)
289
+
290
+ <details><summary>Click to see the direct usage in Transformers</summary>
291
+
292
+ </details>
293
+ -->
294
+
295
+ <!--
296
+ ### Downstream Usage (Sentence Transformers)
297
+
298
+ You can finetune this model on your own dataset.
299
+
300
+ <details><summary>Click to expand</summary>
301
+
302
+ </details>
303
+ -->
304
+
305
+ <!--
306
+ ### Out-of-Scope Use
307
+
308
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
309
+ -->
310
+
311
+ ## Evaluation
312
+
313
+ ### Metrics
314
+
315
+ #### Information Retrieval
316
+
317
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
318
+
319
+ | Metric | Value |
320
+ |:--------------------|:----------|
321
+ | cosine_accuracy@1 | 0.88 |
322
+ | cosine_accuracy@3 | 0.9867 |
323
+ | cosine_accuracy@5 | 0.9867 |
324
+ | cosine_accuracy@10 | 1.0 |
325
+ | cosine_precision@1 | 0.88 |
326
+ | cosine_precision@3 | 0.3289 |
327
+ | cosine_precision@5 | 0.1973 |
328
+ | cosine_precision@10 | 0.1 |
329
+ | cosine_recall@1 | 0.88 |
330
+ | cosine_recall@3 | 0.9867 |
331
+ | cosine_recall@5 | 0.9867 |
332
+ | cosine_recall@10 | 1.0 |
333
+ | cosine_ndcg@10 | 0.95 |
334
+ | cosine_mrr@10 | 0.933 |
335
+ | **cosine_map@100** | **0.933** |
336
+ | dot_accuracy@1 | 0.88 |
337
+ | dot_accuracy@3 | 0.9867 |
338
+ | dot_accuracy@5 | 0.9867 |
339
+ | dot_accuracy@10 | 1.0 |
340
+ | dot_precision@1 | 0.88 |
341
+ | dot_precision@3 | 0.3289 |
342
+ | dot_precision@5 | 0.1973 |
343
+ | dot_precision@10 | 0.1 |
344
+ | dot_recall@1 | 0.88 |
345
+ | dot_recall@3 | 0.9867 |
346
+ | dot_recall@5 | 0.9867 |
347
+ | dot_recall@10 | 1.0 |
348
+ | dot_ndcg@10 | 0.95 |
349
+ | dot_mrr@10 | 0.933 |
350
+ | dot_map@100 | 0.933 |
351
+
352
+ <!--
353
+ ## Bias, Risks and Limitations
354
+
355
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
356
+ -->
357
+
358
+ <!--
359
+ ### Recommendations
360
+
361
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
362
+ -->
363
+
364
+ ## Training Details
365
+
366
+ ### Training Dataset
367
+
368
+ #### json
369
+
370
+ * Dataset: json
371
+ * Size: 500 training samples
372
+ * Columns: <code>anchor</code> and <code>positive</code>
373
+ * Approximate statistics based on the first 500 samples:
374
+ | | anchor | positive |
375
+ |:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
376
+ | type | string | string |
377
+ | details | <ul><li>min: 12 tokens</li><li>mean: 21.76 tokens</li><li>max: 37 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 78.92 tokens</li><li>max: 104 tokens</li></ul> |
378
+ * Samples:
379
+ | anchor | positive |
380
+ |:--------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
381
+ | <code>What is the primary purpose of the AI Bill of Rights outlined in the October 2022 blueprint?</code> | <code>BLUEPRINT FOR AN <br>AI BILL OF <br>RIGHTS <br>MAKING AUTOMATED <br>SYSTEMS WORK FOR <br>THE AMERICAN PEOPLE <br>OCTOBER 2022</code> |
382
+ | <code>What was the purpose of the Blueprint for an AI Bill of Rights published by the White House Office of Science and Technology Policy?</code> | <code>About this Document <br>The Blueprint for an AI Bill of Rights: Making Automated Systems Work for the American People was <br>published by the White House Office of Science and Technology Policy in October 2022. This framework was <br>released one year after OSTP announced the launch of a process to develop “a bill of rights for an AI-powered</code> |
383
+ | <code>What initiative did the OSTP announce a year prior to the release of the framework for a bill of rights for an AI-powered world?</code> | <code>released one year after OSTP announced the launch of a process to develop “a bill of rights for an AI-powered <br>world.” Its release follows a year of public engagement to inform this initiative. The framework is available <br>online at: https://www.whitehouse.gov/ostp/ai-bill-of-rights <br>About the Office of Science and Technology Policy <br>The Office of Science and Technology Policy (OSTP) was established by the National Science and Technology</code> |
384
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
385
+ ```json
386
+ {
387
+ "loss": "MultipleNegativesRankingLoss",
388
+ "matryoshka_dims": [
389
+ 1024,
390
+ 512,
391
+ 256,
392
+ 128,
393
+ 64
394
+ ],
395
+ "matryoshka_weights": [
396
+ 1,
397
+ 1,
398
+ 1,
399
+ 1,
400
+ 1
401
+ ],
402
+ "n_dims_per_step": -1
403
+ }
404
+ ```
405
+
406
+ ### Training Hyperparameters
407
+ #### Non-Default Hyperparameters
408
+
409
+ - `eval_strategy`: epoch
410
+ - `per_device_train_batch_size`: 32
411
+ - `per_device_eval_batch_size`: 16
412
+ - `gradient_accumulation_steps`: 16
413
+ - `learning_rate`: 2e-05
414
+ - `num_train_epochs`: 5
415
+ - `lr_scheduler_type`: cosine
416
+ - `warmup_ratio`: 0.1
417
+ - `bf16`: True
418
+ - `tf32`: True
419
+ - `load_best_model_at_end`: True
420
+ - `optim`: adamw_torch_fused
421
+ - `batch_sampler`: no_duplicates
422
+
423
+ #### All Hyperparameters
424
+ <details><summary>Click to expand</summary>
425
+
426
+ - `overwrite_output_dir`: False
427
+ - `do_predict`: False
428
+ - `eval_strategy`: epoch
429
+ - `prediction_loss_only`: True
430
+ - `per_device_train_batch_size`: 32
431
+ - `per_device_eval_batch_size`: 16
432
+ - `per_gpu_train_batch_size`: None
433
+ - `per_gpu_eval_batch_size`: None
434
+ - `gradient_accumulation_steps`: 16
435
+ - `eval_accumulation_steps`: None
436
+ - `torch_empty_cache_steps`: None
437
+ - `learning_rate`: 2e-05
438
+ - `weight_decay`: 0.0
439
+ - `adam_beta1`: 0.9
440
+ - `adam_beta2`: 0.999
441
+ - `adam_epsilon`: 1e-08
442
+ - `max_grad_norm`: 1.0
443
+ - `num_train_epochs`: 5
444
+ - `max_steps`: -1
445
+ - `lr_scheduler_type`: cosine
446
+ - `lr_scheduler_kwargs`: {}
447
+ - `warmup_ratio`: 0.1
448
+ - `warmup_steps`: 0
449
+ - `log_level`: passive
450
+ - `log_level_replica`: warning
451
+ - `log_on_each_node`: True
452
+ - `logging_nan_inf_filter`: True
453
+ - `save_safetensors`: True
454
+ - `save_on_each_node`: False
455
+ - `save_only_model`: False
456
+ - `restore_callback_states_from_checkpoint`: False
457
+ - `no_cuda`: False
458
+ - `use_cpu`: False
459
+ - `use_mps_device`: False
460
+ - `seed`: 42
461
+ - `data_seed`: None
462
+ - `jit_mode_eval`: False
463
+ - `use_ipex`: False
464
+ - `bf16`: True
465
+ - `fp16`: False
466
+ - `fp16_opt_level`: O1
467
+ - `half_precision_backend`: auto
468
+ - `bf16_full_eval`: False
469
+ - `fp16_full_eval`: False
470
+ - `tf32`: True
471
+ - `local_rank`: 0
472
+ - `ddp_backend`: None
473
+ - `tpu_num_cores`: None
474
+ - `tpu_metrics_debug`: False
475
+ - `debug`: []
476
+ - `dataloader_drop_last`: False
477
+ - `dataloader_num_workers`: 0
478
+ - `dataloader_prefetch_factor`: None
479
+ - `past_index`: -1
480
+ - `disable_tqdm`: False
481
+ - `remove_unused_columns`: True
482
+ - `label_names`: None
483
+ - `load_best_model_at_end`: True
484
+ - `ignore_data_skip`: False
485
+ - `fsdp`: []
486
+ - `fsdp_min_num_params`: 0
487
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
488
+ - `fsdp_transformer_layer_cls_to_wrap`: None
489
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
490
+ - `deepspeed`: None
491
+ - `label_smoothing_factor`: 0.0
492
+ - `optim`: adamw_torch_fused
493
+ - `optim_args`: None
494
+ - `adafactor`: False
495
+ - `group_by_length`: False
496
+ - `length_column_name`: length
497
+ - `ddp_find_unused_parameters`: None
498
+ - `ddp_bucket_cap_mb`: None
499
+ - `ddp_broadcast_buffers`: False
500
+ - `dataloader_pin_memory`: True
501
+ - `dataloader_persistent_workers`: False
502
+ - `skip_memory_metrics`: True
503
+ - `use_legacy_prediction_loop`: False
504
+ - `push_to_hub`: False
505
+ - `resume_from_checkpoint`: None
506
+ - `hub_model_id`: None
507
+ - `hub_strategy`: every_save
508
+ - `hub_private_repo`: False
509
+ - `hub_always_push`: False
510
+ - `gradient_checkpointing`: False
511
+ - `gradient_checkpointing_kwargs`: None
512
+ - `include_inputs_for_metrics`: False
513
+ - `eval_do_concat_batches`: True
514
+ - `fp16_backend`: auto
515
+ - `push_to_hub_model_id`: None
516
+ - `push_to_hub_organization`: None
517
+ - `mp_parameters`:
518
+ - `auto_find_batch_size`: False
519
+ - `full_determinism`: False
520
+ - `torchdynamo`: None
521
+ - `ray_scope`: last
522
+ - `ddp_timeout`: 1800
523
+ - `torch_compile`: False
524
+ - `torch_compile_backend`: None
525
+ - `torch_compile_mode`: None
526
+ - `dispatch_batches`: None
527
+ - `split_batches`: None
528
+ - `include_tokens_per_second`: False
529
+ - `include_num_input_tokens_seen`: False
530
+ - `neftune_noise_alpha`: None
531
+ - `optim_target_modules`: None
532
+ - `batch_eval_metrics`: False
533
+ - `eval_on_start`: False
534
+ - `eval_use_gather_object`: False
535
+ - `batch_sampler`: no_duplicates
536
+ - `multi_dataset_batch_sampler`: proportional
537
+
538
+ </details>
539
+
540
+ ### Training Logs
541
+ | Epoch | Step | cosine_map@100 |
542
+ |:-------:|:-----:|:--------------:|
543
+ | 1.0 | 1 | 0.9022 |
544
+ | 2.0 | 2 | 0.9311 |
545
+ | **3.0** | **3** | **0.9397** |
546
+ | 4.0 | 4 | 0.9330 |
547
+ | 5.0 | 5 | 0.9330 |
548
+
549
+ * The bold row denotes the saved checkpoint.
550
+
551
+ ### Framework Versions
552
+ - Python: 3.10.12
553
+ - Sentence Transformers: 3.1.1
554
+ - Transformers: 4.44.2
555
+ - PyTorch: 2.4.1+cu121
556
+ - Accelerate: 0.34.2
557
+ - Datasets: 3.0.1
558
+ - Tokenizers: 0.19.1
559
+
560
+ ## Citation
561
+
562
+ ### BibTeX
563
+
564
+ #### Sentence Transformers
565
+ ```bibtex
566
+ @inproceedings{reimers-2019-sentence-bert,
567
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
568
+ author = "Reimers, Nils and Gurevych, Iryna",
569
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
570
+ month = "11",
571
+ year = "2019",
572
+ publisher = "Association for Computational Linguistics",
573
+ url = "https://arxiv.org/abs/1908.10084",
574
+ }
575
+ ```
576
+
577
+ #### MatryoshkaLoss
578
+ ```bibtex
579
+ @misc{kusupati2024matryoshka,
580
+ title={Matryoshka Representation Learning},
581
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
582
+ year={2024},
583
+ eprint={2205.13147},
584
+ archivePrefix={arXiv},
585
+ primaryClass={cs.LG}
586
+ }
587
+ ```
588
+
589
+ #### MultipleNegativesRankingLoss
590
+ ```bibtex
591
+ @misc{henderson2017efficient,
592
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
593
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
594
+ year={2017},
595
+ eprint={1705.00652},
596
+ archivePrefix={arXiv},
597
+ primaryClass={cs.CL}
598
+ }
599
+ ```
600
+
601
+ <!--
602
+ ## Glossary
603
+
604
+ *Clearly define terms in order to be accessible across audiences.*
605
+ -->
606
+
607
+ <!--
608
+ ## Model Card Authors
609
+
610
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
611
+ -->
612
+
613
+ <!--
614
+ ## Model Card Contact
615
+
616
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
617
+ -->
checkpoint-3/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-3/README.md ADDED
@@ -0,0 +1,614 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Alibaba-NLP/gte-large-en-v1.5
3
+ library_name: sentence-transformers
4
+ metrics:
5
+ - cosine_accuracy@1
6
+ - cosine_accuracy@3
7
+ - cosine_accuracy@5
8
+ - cosine_accuracy@10
9
+ - cosine_precision@1
10
+ - cosine_precision@3
11
+ - cosine_precision@5
12
+ - cosine_precision@10
13
+ - cosine_recall@1
14
+ - cosine_recall@3
15
+ - cosine_recall@5
16
+ - cosine_recall@10
17
+ - cosine_ndcg@10
18
+ - cosine_mrr@10
19
+ - cosine_map@100
20
+ - dot_accuracy@1
21
+ - dot_accuracy@3
22
+ - dot_accuracy@5
23
+ - dot_accuracy@10
24
+ - dot_precision@1
25
+ - dot_precision@3
26
+ - dot_precision@5
27
+ - dot_precision@10
28
+ - dot_recall@1
29
+ - dot_recall@3
30
+ - dot_recall@5
31
+ - dot_recall@10
32
+ - dot_ndcg@10
33
+ - dot_mrr@10
34
+ - dot_map@100
35
+ pipeline_tag: sentence-similarity
36
+ tags:
37
+ - sentence-transformers
38
+ - sentence-similarity
39
+ - feature-extraction
40
+ - generated_from_trainer
41
+ - dataset_size:500
42
+ - loss:MatryoshkaLoss
43
+ - loss:MultipleNegativesRankingLoss
44
+ widget:
45
+ - source_sentence: "narrow identified goals, to avoid \"mission creep.\" Anticipated\
46
+ \ data collection should be determined to be \nstrictly necessary to the identified\
47
+ \ goals and should be minimized as much as possible. Data collected based on \n\
48
+ these identified goals and for a specific context should not be used in a different\
49
+ \ context without assessing for \nnew privacy risks and implementing appropriate\
50
+ \ mitigation measures, which may include express consent."
51
+ sentences:
52
+ - What measures should be taken if data collected for specific identified goals
53
+ is to be used in a different context?
54
+ - What measures should be taken to ensure the privacy of sensitive data and limit
55
+ access to it?
56
+ - What special requirements are mentioned in the white paper regarding national
57
+ security and defense activities in relation to trustworthy artificial intelligence?
58
+ - source_sentence: '•
59
+
60
+ Karen Levy, Assistant Professor, Department of Information Science, Cornell University
61
+
62
+
63
+
64
+ Natasha Duarte, Project Director, Upturn
65
+
66
+
67
+
68
+ Elana Zeide, Assistant Professor, University of Nebraska College of Law
69
+
70
+
71
+
72
+ Fabian Rogers, Constituent Advocate, Office of NY State Senator Jabari Brisport
73
+ and Community
74
+
75
+ Advocate and Floor Captain, Atlantic Plaza Towers Tenants Association
76
+
77
+ The individual panelists described the ways in which AI systems and other technologies
78
+ are increasingly being'
79
+ sentences:
80
+ - What are some of the challenges posed to democracy by the use of technology and
81
+ automated systems, as mentioned in the foreword?
82
+ - What principles has the U.S. Intelligence Community developed to guide personnel
83
+ in the ethical use of AI?
84
+ - What roles do the panelists hold in relation to the discussion on AI systems and
85
+ technology?
86
+ - source_sentence: "impacts disfavoring people based on their race, color, ethnicity,\
87
+ \ \nsex \n(including \npregnancy, \nchildbirth, \nand \nrelated \nmedical \nconditions,\
88
+ \ \ngender \nidentity, \nintersex \nstatus, \nand \nsexual \norientation), religion,\
89
+ \ age, national origin, disability, veteran status,"
90
+ sentences:
91
+ - What does the term "HUMAN ALTERNATIVES" refer to in the context provided?
92
+ - What types of discrimination are mentioned in the context?
93
+ - What are the expectations for automated systems in relation to public protection
94
+ from surveillance?
95
+ - source_sentence: "establish and maintain the capabilities that will allow individuals\
96
+ \ to use their own automated systems to help \nthem make consent, access, and\
97
+ \ control decisions in a complex data ecosystem. Capabilities include machine\
98
+ \ \nreadable data, standardized data formats, metadata or tags for expressing\
99
+ \ data processing permissions and \npreferences and data provenance and lineage,\
100
+ \ context of use and access-specific tags, and training models for \nassessing\
101
+ \ privacy risk."
102
+ sentences:
103
+ - What measures should be taken to ensure that independent evaluations of algorithmic
104
+ discrimination are conducted while balancing individual privacy and data access
105
+ needs?
106
+ - What capabilities are necessary for individuals to effectively manage consent
107
+ and control decisions in a complex data ecosystem?
108
+ - What are some examples of classifications that are protected by law against discrimination?
109
+ - source_sentence: "SAFE AND EFFECTIVE \nSYSTEMS \nWHAT SHOULD BE EXPECTED OF AUTOMATED\
110
+ \ SYSTEMS\nThe expectations for automated systems are meant to serve as a blueprint\
111
+ \ for the development of additional \ntechnical standards and practices that are\
112
+ \ tailored for particular sectors and contexts. \nDerived data sources tracked\
113
+ \ and reviewed carefully. Data that is derived from other data through"
114
+ sentences:
115
+ - What is the purpose of the expectations set for automated systems in relation
116
+ to technical standards and practices?
117
+ - What factors influence the appropriate application of the principles outlined
118
+ in the white paper regarding automated systems?
119
+ - What actions can a court take if a federal agency fails to comply with the Privacy
120
+ Act regarding an individual's records?
121
+ model-index:
122
+ - name: SentenceTransformer based on Alibaba-NLP/gte-large-en-v1.5
123
+ results:
124
+ - task:
125
+ type: information-retrieval
126
+ name: Information Retrieval
127
+ dataset:
128
+ name: Unknown
129
+ type: unknown
130
+ metrics:
131
+ - type: cosine_accuracy@1
132
+ value: 0.8933333333333333
133
+ name: Cosine Accuracy@1
134
+ - type: cosine_accuracy@3
135
+ value: 0.9866666666666667
136
+ name: Cosine Accuracy@3
137
+ - type: cosine_accuracy@5
138
+ value: 0.9866666666666667
139
+ name: Cosine Accuracy@5
140
+ - type: cosine_accuracy@10
141
+ value: 1.0
142
+ name: Cosine Accuracy@10
143
+ - type: cosine_precision@1
144
+ value: 0.8933333333333333
145
+ name: Cosine Precision@1
146
+ - type: cosine_precision@3
147
+ value: 0.3288888888888888
148
+ name: Cosine Precision@3
149
+ - type: cosine_precision@5
150
+ value: 0.1973333333333333
151
+ name: Cosine Precision@5
152
+ - type: cosine_precision@10
153
+ value: 0.09999999999999998
154
+ name: Cosine Precision@10
155
+ - type: cosine_recall@1
156
+ value: 0.8933333333333333
157
+ name: Cosine Recall@1
158
+ - type: cosine_recall@3
159
+ value: 0.9866666666666667
160
+ name: Cosine Recall@3
161
+ - type: cosine_recall@5
162
+ value: 0.9866666666666667
163
+ name: Cosine Recall@5
164
+ - type: cosine_recall@10
165
+ value: 1.0
166
+ name: Cosine Recall@10
167
+ - type: cosine_ndcg@10
168
+ value: 0.954918824730161
169
+ name: Cosine Ndcg@10
170
+ - type: cosine_mrr@10
171
+ value: 0.9396825396825398
172
+ name: Cosine Mrr@10
173
+ - type: cosine_map@100
174
+ value: 0.9396825396825398
175
+ name: Cosine Map@100
176
+ - type: dot_accuracy@1
177
+ value: 0.8933333333333333
178
+ name: Dot Accuracy@1
179
+ - type: dot_accuracy@3
180
+ value: 0.9866666666666667
181
+ name: Dot Accuracy@3
182
+ - type: dot_accuracy@5
183
+ value: 0.9866666666666667
184
+ name: Dot Accuracy@5
185
+ - type: dot_accuracy@10
186
+ value: 1.0
187
+ name: Dot Accuracy@10
188
+ - type: dot_precision@1
189
+ value: 0.8933333333333333
190
+ name: Dot Precision@1
191
+ - type: dot_precision@3
192
+ value: 0.3288888888888888
193
+ name: Dot Precision@3
194
+ - type: dot_precision@5
195
+ value: 0.1973333333333333
196
+ name: Dot Precision@5
197
+ - type: dot_precision@10
198
+ value: 0.09999999999999998
199
+ name: Dot Precision@10
200
+ - type: dot_recall@1
201
+ value: 0.8933333333333333
202
+ name: Dot Recall@1
203
+ - type: dot_recall@3
204
+ value: 0.9866666666666667
205
+ name: Dot Recall@3
206
+ - type: dot_recall@5
207
+ value: 0.9866666666666667
208
+ name: Dot Recall@5
209
+ - type: dot_recall@10
210
+ value: 1.0
211
+ name: Dot Recall@10
212
+ - type: dot_ndcg@10
213
+ value: 0.954918824730161
214
+ name: Dot Ndcg@10
215
+ - type: dot_mrr@10
216
+ value: 0.9396825396825398
217
+ name: Dot Mrr@10
218
+ - type: dot_map@100
219
+ value: 0.9396825396825398
220
+ name: Dot Map@100
221
+ ---
222
+
223
+ # SentenceTransformer based on Alibaba-NLP/gte-large-en-v1.5
224
+
225
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) on the json dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
226
+
227
+ ## Model Details
228
+
229
+ ### Model Description
230
+ - **Model Type:** Sentence Transformer
231
+ - **Base model:** [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) <!-- at revision 104333d6af6f97649377c2afbde10a7704870c7b -->
232
+ - **Maximum Sequence Length:** 8192 tokens
233
+ - **Output Dimensionality:** 1024 tokens
234
+ - **Similarity Function:** Cosine Similarity
235
+ - **Training Dataset:**
236
+ - json
237
+ <!-- - **Language:** Unknown -->
238
+ <!-- - **License:** Unknown -->
239
+
240
+ ### Model Sources
241
+
242
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
243
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
244
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
245
+
246
+ ### Full Model Architecture
247
+
248
+ ```
249
+ SentenceTransformer(
250
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
251
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
252
+ )
253
+ ```
254
+
255
+ ## Usage
256
+
257
+ ### Direct Usage (Sentence Transformers)
258
+
259
+ First install the Sentence Transformers library:
260
+
261
+ ```bash
262
+ pip install -U sentence-transformers
263
+ ```
264
+
265
+ Then you can load this model and run inference.
266
+ ```python
267
+ from sentence_transformers import SentenceTransformer
268
+
269
+ # Download from the 🤗 Hub
270
+ model = SentenceTransformer("sentence_transformers_model_id")
271
+ # Run inference
272
+ sentences = [
273
+ 'SAFE AND EFFECTIVE \nSYSTEMS \nWHAT SHOULD BE EXPECTED OF AUTOMATED SYSTEMS\nThe expectations for automated systems are meant to serve as a blueprint for the development of additional \ntechnical standards and practices that are tailored for particular sectors and contexts. \nDerived data sources tracked and reviewed carefully. Data that is derived from other data through',
274
+ 'What is the purpose of the expectations set for automated systems in relation to technical standards and practices?',
275
+ 'What factors influence the appropriate application of the principles outlined in the white paper regarding automated systems?',
276
+ ]
277
+ embeddings = model.encode(sentences)
278
+ print(embeddings.shape)
279
+ # [3, 1024]
280
+
281
+ # Get the similarity scores for the embeddings
282
+ similarities = model.similarity(embeddings, embeddings)
283
+ print(similarities.shape)
284
+ # [3, 3]
285
+ ```
286
+
287
+ <!--
288
+ ### Direct Usage (Transformers)
289
+
290
+ <details><summary>Click to see the direct usage in Transformers</summary>
291
+
292
+ </details>
293
+ -->
294
+
295
+ <!--
296
+ ### Downstream Usage (Sentence Transformers)
297
+
298
+ You can finetune this model on your own dataset.
299
+
300
+ <details><summary>Click to expand</summary>
301
+
302
+ </details>
303
+ -->
304
+
305
+ <!--
306
+ ### Out-of-Scope Use
307
+
308
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
309
+ -->
310
+
311
+ ## Evaluation
312
+
313
+ ### Metrics
314
+
315
+ #### Information Retrieval
316
+
317
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
318
+
319
+ | Metric | Value |
320
+ |:--------------------|:-----------|
321
+ | cosine_accuracy@1 | 0.8933 |
322
+ | cosine_accuracy@3 | 0.9867 |
323
+ | cosine_accuracy@5 | 0.9867 |
324
+ | cosine_accuracy@10 | 1.0 |
325
+ | cosine_precision@1 | 0.8933 |
326
+ | cosine_precision@3 | 0.3289 |
327
+ | cosine_precision@5 | 0.1973 |
328
+ | cosine_precision@10 | 0.1 |
329
+ | cosine_recall@1 | 0.8933 |
330
+ | cosine_recall@3 | 0.9867 |
331
+ | cosine_recall@5 | 0.9867 |
332
+ | cosine_recall@10 | 1.0 |
333
+ | cosine_ndcg@10 | 0.9549 |
334
+ | cosine_mrr@10 | 0.9397 |
335
+ | **cosine_map@100** | **0.9397** |
336
+ | dot_accuracy@1 | 0.8933 |
337
+ | dot_accuracy@3 | 0.9867 |
338
+ | dot_accuracy@5 | 0.9867 |
339
+ | dot_accuracy@10 | 1.0 |
340
+ | dot_precision@1 | 0.8933 |
341
+ | dot_precision@3 | 0.3289 |
342
+ | dot_precision@5 | 0.1973 |
343
+ | dot_precision@10 | 0.1 |
344
+ | dot_recall@1 | 0.8933 |
345
+ | dot_recall@3 | 0.9867 |
346
+ | dot_recall@5 | 0.9867 |
347
+ | dot_recall@10 | 1.0 |
348
+ | dot_ndcg@10 | 0.9549 |
349
+ | dot_mrr@10 | 0.9397 |
350
+ | dot_map@100 | 0.9397 |
351
+
352
+ <!--
353
+ ## Bias, Risks and Limitations
354
+
355
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
356
+ -->
357
+
358
+ <!--
359
+ ### Recommendations
360
+
361
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
362
+ -->
363
+
364
+ ## Training Details
365
+
366
+ ### Training Dataset
367
+
368
+ #### json
369
+
370
+ * Dataset: json
371
+ * Size: 500 training samples
372
+ * Columns: <code>anchor</code> and <code>positive</code>
373
+ * Approximate statistics based on the first 500 samples:
374
+ | | anchor | positive |
375
+ |:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
376
+ | type | string | string |
377
+ | details | <ul><li>min: 12 tokens</li><li>mean: 21.76 tokens</li><li>max: 37 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 78.92 tokens</li><li>max: 104 tokens</li></ul> |
378
+ * Samples:
379
+ | anchor | positive |
380
+ |:--------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
381
+ | <code>What is the primary purpose of the AI Bill of Rights outlined in the October 2022 blueprint?</code> | <code>BLUEPRINT FOR AN <br>AI BILL OF <br>RIGHTS <br>MAKING AUTOMATED <br>SYSTEMS WORK FOR <br>THE AMERICAN PEOPLE <br>OCTOBER 2022</code> |
382
+ | <code>What was the purpose of the Blueprint for an AI Bill of Rights published by the White House Office of Science and Technology Policy?</code> | <code>About this Document <br>The Blueprint for an AI Bill of Rights: Making Automated Systems Work for the American People was <br>published by the White House Office of Science and Technology Policy in October 2022. This framework was <br>released one year after OSTP announced the launch of a process to develop “a bill of rights for an AI-powered</code> |
383
+ | <code>What initiative did the OSTP announce a year prior to the release of the framework for a bill of rights for an AI-powered world?</code> | <code>released one year after OSTP announced the launch of a process to develop “a bill of rights for an AI-powered <br>world.” Its release follows a year of public engagement to inform this initiative. The framework is available <br>online at: https://www.whitehouse.gov/ostp/ai-bill-of-rights <br>About the Office of Science and Technology Policy <br>The Office of Science and Technology Policy (OSTP) was established by the National Science and Technology</code> |
384
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
385
+ ```json
386
+ {
387
+ "loss": "MultipleNegativesRankingLoss",
388
+ "matryoshka_dims": [
389
+ 1024,
390
+ 512,
391
+ 256,
392
+ 128,
393
+ 64
394
+ ],
395
+ "matryoshka_weights": [
396
+ 1,
397
+ 1,
398
+ 1,
399
+ 1,
400
+ 1
401
+ ],
402
+ "n_dims_per_step": -1
403
+ }
404
+ ```
405
+
406
+ ### Training Hyperparameters
407
+ #### Non-Default Hyperparameters
408
+
409
+ - `eval_strategy`: epoch
410
+ - `per_device_train_batch_size`: 32
411
+ - `per_device_eval_batch_size`: 16
412
+ - `gradient_accumulation_steps`: 16
413
+ - `learning_rate`: 2e-05
414
+ - `num_train_epochs`: 5
415
+ - `lr_scheduler_type`: cosine
416
+ - `warmup_ratio`: 0.1
417
+ - `bf16`: True
418
+ - `tf32`: True
419
+ - `load_best_model_at_end`: True
420
+ - `optim`: adamw_torch_fused
421
+ - `batch_sampler`: no_duplicates
422
+
423
+ #### All Hyperparameters
424
+ <details><summary>Click to expand</summary>
425
+
426
+ - `overwrite_output_dir`: False
427
+ - `do_predict`: False
428
+ - `eval_strategy`: epoch
429
+ - `prediction_loss_only`: True
430
+ - `per_device_train_batch_size`: 32
431
+ - `per_device_eval_batch_size`: 16
432
+ - `per_gpu_train_batch_size`: None
433
+ - `per_gpu_eval_batch_size`: None
434
+ - `gradient_accumulation_steps`: 16
435
+ - `eval_accumulation_steps`: None
436
+ - `torch_empty_cache_steps`: None
437
+ - `learning_rate`: 2e-05
438
+ - `weight_decay`: 0.0
439
+ - `adam_beta1`: 0.9
440
+ - `adam_beta2`: 0.999
441
+ - `adam_epsilon`: 1e-08
442
+ - `max_grad_norm`: 1.0
443
+ - `num_train_epochs`: 5
444
+ - `max_steps`: -1
445
+ - `lr_scheduler_type`: cosine
446
+ - `lr_scheduler_kwargs`: {}
447
+ - `warmup_ratio`: 0.1
448
+ - `warmup_steps`: 0
449
+ - `log_level`: passive
450
+ - `log_level_replica`: warning
451
+ - `log_on_each_node`: True
452
+ - `logging_nan_inf_filter`: True
453
+ - `save_safetensors`: True
454
+ - `save_on_each_node`: False
455
+ - `save_only_model`: False
456
+ - `restore_callback_states_from_checkpoint`: False
457
+ - `no_cuda`: False
458
+ - `use_cpu`: False
459
+ - `use_mps_device`: False
460
+ - `seed`: 42
461
+ - `data_seed`: None
462
+ - `jit_mode_eval`: False
463
+ - `use_ipex`: False
464
+ - `bf16`: True
465
+ - `fp16`: False
466
+ - `fp16_opt_level`: O1
467
+ - `half_precision_backend`: auto
468
+ - `bf16_full_eval`: False
469
+ - `fp16_full_eval`: False
470
+ - `tf32`: True
471
+ - `local_rank`: 0
472
+ - `ddp_backend`: None
473
+ - `tpu_num_cores`: None
474
+ - `tpu_metrics_debug`: False
475
+ - `debug`: []
476
+ - `dataloader_drop_last`: False
477
+ - `dataloader_num_workers`: 0
478
+ - `dataloader_prefetch_factor`: None
479
+ - `past_index`: -1
480
+ - `disable_tqdm`: False
481
+ - `remove_unused_columns`: True
482
+ - `label_names`: None
483
+ - `load_best_model_at_end`: True
484
+ - `ignore_data_skip`: False
485
+ - `fsdp`: []
486
+ - `fsdp_min_num_params`: 0
487
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
488
+ - `fsdp_transformer_layer_cls_to_wrap`: None
489
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
490
+ - `deepspeed`: None
491
+ - `label_smoothing_factor`: 0.0
492
+ - `optim`: adamw_torch_fused
493
+ - `optim_args`: None
494
+ - `adafactor`: False
495
+ - `group_by_length`: False
496
+ - `length_column_name`: length
497
+ - `ddp_find_unused_parameters`: None
498
+ - `ddp_bucket_cap_mb`: None
499
+ - `ddp_broadcast_buffers`: False
500
+ - `dataloader_pin_memory`: True
501
+ - `dataloader_persistent_workers`: False
502
+ - `skip_memory_metrics`: True
503
+ - `use_legacy_prediction_loop`: False
504
+ - `push_to_hub`: False
505
+ - `resume_from_checkpoint`: None
506
+ - `hub_model_id`: None
507
+ - `hub_strategy`: every_save
508
+ - `hub_private_repo`: False
509
+ - `hub_always_push`: False
510
+ - `gradient_checkpointing`: False
511
+ - `gradient_checkpointing_kwargs`: None
512
+ - `include_inputs_for_metrics`: False
513
+ - `eval_do_concat_batches`: True
514
+ - `fp16_backend`: auto
515
+ - `push_to_hub_model_id`: None
516
+ - `push_to_hub_organization`: None
517
+ - `mp_parameters`:
518
+ - `auto_find_batch_size`: False
519
+ - `full_determinism`: False
520
+ - `torchdynamo`: None
521
+ - `ray_scope`: last
522
+ - `ddp_timeout`: 1800
523
+ - `torch_compile`: False
524
+ - `torch_compile_backend`: None
525
+ - `torch_compile_mode`: None
526
+ - `dispatch_batches`: None
527
+ - `split_batches`: None
528
+ - `include_tokens_per_second`: False
529
+ - `include_num_input_tokens_seen`: False
530
+ - `neftune_noise_alpha`: None
531
+ - `optim_target_modules`: None
532
+ - `batch_eval_metrics`: False
533
+ - `eval_on_start`: False
534
+ - `eval_use_gather_object`: False
535
+ - `batch_sampler`: no_duplicates
536
+ - `multi_dataset_batch_sampler`: proportional
537
+
538
+ </details>
539
+
540
+ ### Training Logs
541
+ | Epoch | Step | cosine_map@100 |
542
+ |:-----:|:----:|:--------------:|
543
+ | 1.0 | 1 | 0.9022 |
544
+ | 2.0 | 2 | 0.9311 |
545
+ | 3.0 | 3 | 0.9397 |
546
+
547
+
548
+ ### Framework Versions
549
+ - Python: 3.10.12
550
+ - Sentence Transformers: 3.1.1
551
+ - Transformers: 4.44.2
552
+ - PyTorch: 2.4.1+cu121
553
+ - Accelerate: 0.34.2
554
+ - Datasets: 3.0.1
555
+ - Tokenizers: 0.19.1
556
+
557
+ ## Citation
558
+
559
+ ### BibTeX
560
+
561
+ #### Sentence Transformers
562
+ ```bibtex
563
+ @inproceedings{reimers-2019-sentence-bert,
564
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
565
+ author = "Reimers, Nils and Gurevych, Iryna",
566
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
567
+ month = "11",
568
+ year = "2019",
569
+ publisher = "Association for Computational Linguistics",
570
+ url = "https://arxiv.org/abs/1908.10084",
571
+ }
572
+ ```
573
+
574
+ #### MatryoshkaLoss
575
+ ```bibtex
576
+ @misc{kusupati2024matryoshka,
577
+ title={Matryoshka Representation Learning},
578
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
579
+ year={2024},
580
+ eprint={2205.13147},
581
+ archivePrefix={arXiv},
582
+ primaryClass={cs.LG}
583
+ }
584
+ ```
585
+
586
+ #### MultipleNegativesRankingLoss
587
+ ```bibtex
588
+ @misc{henderson2017efficient,
589
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
590
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
591
+ year={2017},
592
+ eprint={1705.00652},
593
+ archivePrefix={arXiv},
594
+ primaryClass={cs.CL}
595
+ }
596
+ ```
597
+
598
+ <!--
599
+ ## Glossary
600
+
601
+ *Clearly define terms in order to be accessible across audiences.*
602
+ -->
603
+
604
+ <!--
605
+ ## Model Card Authors
606
+
607
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
608
+ -->
609
+
610
+ <!--
611
+ ## Model Card Contact
612
+
613
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
614
+ -->
checkpoint-3/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
3
+ "architectures": [
4
+ "NewModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
9
+ "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
10
+ "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
11
+ "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
12
+ "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
13
+ "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
14
+ "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
15
+ },
16
+ "classifier_dropout": null,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.1,
19
+ "hidden_size": 1024,
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 4096,
22
+ "layer_norm_eps": 1e-12,
23
+ "layer_norm_type": "layer_norm",
24
+ "logn_attention_clip1": false,
25
+ "logn_attention_scale": false,
26
+ "max_position_embeddings": 8192,
27
+ "model_type": "new",
28
+ "num_attention_heads": 16,
29
+ "num_hidden_layers": 24,
30
+ "pack_qkv": true,
31
+ "pad_token_id": 0,
32
+ "position_embedding_type": "rope",
33
+ "rope_scaling": {
34
+ "factor": 2.0,
35
+ "type": "ntk"
36
+ },
37
+ "rope_theta": 160000,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.44.2",
40
+ "type_vocab_size": 2,
41
+ "unpad_inputs": false,
42
+ "use_memory_efficient_attention": false,
43
+ "vocab_size": 30528
44
+ }
checkpoint-3/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.1.1",
4
+ "transformers": "4.44.2",
5
+ "pytorch": "2.4.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
checkpoint-3/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e25d7f23a2e7540f9ef39002dcb8e13a3d41246d3a97bffabd5b7c35b2d6875
3
+ size 1736585680
checkpoint-3/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-3/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60e03d1824f0aab203fd97f7175e899d315ec1d0008b65fd400f767116f72d38
3
+ size 3473340858
checkpoint-3/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:762b7bef36bb3af400c8e148b60209427ecbb410cfd44ddc4034c2ca4a28ad0f
3
+ size 14244
checkpoint-3/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd8222543155d3562cbc0f00ce970f9119e4c80a6209d29eb44089577d97989
3
+ size 1064
checkpoint-3/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
checkpoint-3/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-3/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-3/tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "max_length": 8000,
49
+ "model_max_length": 8192,
50
+ "pad_to_multiple_of": null,
51
+ "pad_token": "[PAD]",
52
+ "pad_token_type_id": 0,
53
+ "padding_side": "right",
54
+ "sep_token": "[SEP]",
55
+ "stride": 0,
56
+ "strip_accents": null,
57
+ "tokenize_chinese_chars": true,
58
+ "tokenizer_class": "BertTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
+ "unk_token": "[UNK]"
62
+ }
checkpoint-3/trainer_state.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8933333333333333,
3
+ "best_model_checkpoint": "policy_gte_large_5/checkpoint-3",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_cosine_accuracy@1": 0.8266666666666667,
14
+ "eval_cosine_accuracy@10": 1.0,
15
+ "eval_cosine_accuracy@3": 0.9866666666666667,
16
+ "eval_cosine_accuracy@5": 0.9866666666666667,
17
+ "eval_cosine_map@100": 0.9022222222222223,
18
+ "eval_cosine_mrr@10": 0.9022222222222223,
19
+ "eval_cosine_ndcg@10": 0.9271276662566819,
20
+ "eval_cosine_precision@1": 0.8266666666666667,
21
+ "eval_cosine_precision@10": 0.09999999999999998,
22
+ "eval_cosine_precision@3": 0.3288888888888888,
23
+ "eval_cosine_precision@5": 0.1973333333333333,
24
+ "eval_cosine_recall@1": 0.8266666666666667,
25
+ "eval_cosine_recall@10": 1.0,
26
+ "eval_cosine_recall@3": 0.9866666666666667,
27
+ "eval_cosine_recall@5": 0.9866666666666667,
28
+ "eval_dot_accuracy@1": 0.8266666666666667,
29
+ "eval_dot_accuracy@10": 1.0,
30
+ "eval_dot_accuracy@3": 0.9866666666666667,
31
+ "eval_dot_accuracy@5": 0.9866666666666667,
32
+ "eval_dot_map@100": 0.9019047619047619,
33
+ "eval_dot_mrr@10": 0.901904761904762,
34
+ "eval_dot_ndcg@10": 0.926822681539686,
35
+ "eval_dot_precision@1": 0.8266666666666667,
36
+ "eval_dot_precision@10": 0.09999999999999998,
37
+ "eval_dot_precision@3": 0.3288888888888888,
38
+ "eval_dot_precision@5": 0.1973333333333333,
39
+ "eval_dot_recall@1": 0.8266666666666667,
40
+ "eval_dot_recall@10": 1.0,
41
+ "eval_dot_recall@3": 0.9866666666666667,
42
+ "eval_dot_recall@5": 0.9866666666666667,
43
+ "eval_runtime": 0.2111,
44
+ "eval_samples_per_second": 0.0,
45
+ "eval_steps_per_second": 0.0,
46
+ "step": 1
47
+ },
48
+ {
49
+ "epoch": 2.0,
50
+ "eval_cosine_accuracy@1": 0.88,
51
+ "eval_cosine_accuracy@10": 1.0,
52
+ "eval_cosine_accuracy@3": 0.9866666666666667,
53
+ "eval_cosine_accuracy@5": 0.9866666666666667,
54
+ "eval_cosine_map@100": 0.931111111111111,
55
+ "eval_cosine_mrr@10": 0.9311111111111112,
56
+ "eval_cosine_ndcg@10": 0.9485571427804901,
57
+ "eval_cosine_precision@1": 0.88,
58
+ "eval_cosine_precision@10": 0.09999999999999998,
59
+ "eval_cosine_precision@3": 0.3288888888888888,
60
+ "eval_cosine_precision@5": 0.1973333333333333,
61
+ "eval_cosine_recall@1": 0.88,
62
+ "eval_cosine_recall@10": 1.0,
63
+ "eval_cosine_recall@3": 0.9866666666666667,
64
+ "eval_cosine_recall@5": 0.9866666666666667,
65
+ "eval_dot_accuracy@1": 0.88,
66
+ "eval_dot_accuracy@10": 1.0,
67
+ "eval_dot_accuracy@3": 0.9866666666666667,
68
+ "eval_dot_accuracy@5": 0.9866666666666667,
69
+ "eval_dot_map@100": 0.931111111111111,
70
+ "eval_dot_mrr@10": 0.9311111111111112,
71
+ "eval_dot_ndcg@10": 0.9485571427804901,
72
+ "eval_dot_precision@1": 0.88,
73
+ "eval_dot_precision@10": 0.09999999999999998,
74
+ "eval_dot_precision@3": 0.3288888888888888,
75
+ "eval_dot_precision@5": 0.1973333333333333,
76
+ "eval_dot_recall@1": 0.88,
77
+ "eval_dot_recall@10": 1.0,
78
+ "eval_dot_recall@3": 0.9866666666666667,
79
+ "eval_dot_recall@5": 0.9866666666666667,
80
+ "eval_runtime": 0.2179,
81
+ "eval_samples_per_second": 0.0,
82
+ "eval_steps_per_second": 0.0,
83
+ "step": 2
84
+ },
85
+ {
86
+ "epoch": 3.0,
87
+ "eval_cosine_accuracy@1": 0.8933333333333333,
88
+ "eval_cosine_accuracy@10": 1.0,
89
+ "eval_cosine_accuracy@3": 0.9866666666666667,
90
+ "eval_cosine_accuracy@5": 0.9866666666666667,
91
+ "eval_cosine_map@100": 0.9396825396825398,
92
+ "eval_cosine_mrr@10": 0.9396825396825398,
93
+ "eval_cosine_ndcg@10": 0.954918824730161,
94
+ "eval_cosine_precision@1": 0.8933333333333333,
95
+ "eval_cosine_precision@10": 0.09999999999999998,
96
+ "eval_cosine_precision@3": 0.3288888888888888,
97
+ "eval_cosine_precision@5": 0.1973333333333333,
98
+ "eval_cosine_recall@1": 0.8933333333333333,
99
+ "eval_cosine_recall@10": 1.0,
100
+ "eval_cosine_recall@3": 0.9866666666666667,
101
+ "eval_cosine_recall@5": 0.9866666666666667,
102
+ "eval_dot_accuracy@1": 0.8933333333333333,
103
+ "eval_dot_accuracy@10": 1.0,
104
+ "eval_dot_accuracy@3": 0.9866666666666667,
105
+ "eval_dot_accuracy@5": 0.9866666666666667,
106
+ "eval_dot_map@100": 0.9396825396825398,
107
+ "eval_dot_mrr@10": 0.9396825396825398,
108
+ "eval_dot_ndcg@10": 0.954918824730161,
109
+ "eval_dot_precision@1": 0.8933333333333333,
110
+ "eval_dot_precision@10": 0.09999999999999998,
111
+ "eval_dot_precision@3": 0.3288888888888888,
112
+ "eval_dot_precision@5": 0.1973333333333333,
113
+ "eval_dot_recall@1": 0.8933333333333333,
114
+ "eval_dot_recall@10": 1.0,
115
+ "eval_dot_recall@3": 0.9866666666666667,
116
+ "eval_dot_recall@5": 0.9866666666666667,
117
+ "eval_runtime": 0.2139,
118
+ "eval_samples_per_second": 0.0,
119
+ "eval_steps_per_second": 0.0,
120
+ "step": 3
121
+ }
122
+ ],
123
+ "logging_steps": 10,
124
+ "max_steps": 5,
125
+ "num_input_tokens_seen": 0,
126
+ "num_train_epochs": 5,
127
+ "save_steps": 500,
128
+ "stateful_callbacks": {
129
+ "TrainerControl": {
130
+ "args": {
131
+ "should_epoch_stop": false,
132
+ "should_evaluate": false,
133
+ "should_log": false,
134
+ "should_save": true,
135
+ "should_training_stop": false
136
+ },
137
+ "attributes": {}
138
+ }
139
+ },
140
+ "total_flos": 0.0,
141
+ "train_batch_size": 32,
142
+ "trial_name": null,
143
+ "trial_params": null
144
+ }
checkpoint-3/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:251926714f5864c84dcca92a9a3cdc8bce3b6ade325cb435c1034de51951bf55
3
+ size 5496
checkpoint-3/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-4/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-4/README.md ADDED
@@ -0,0 +1,615 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Alibaba-NLP/gte-large-en-v1.5
3
+ library_name: sentence-transformers
4
+ metrics:
5
+ - cosine_accuracy@1
6
+ - cosine_accuracy@3
7
+ - cosine_accuracy@5
8
+ - cosine_accuracy@10
9
+ - cosine_precision@1
10
+ - cosine_precision@3
11
+ - cosine_precision@5
12
+ - cosine_precision@10
13
+ - cosine_recall@1
14
+ - cosine_recall@3
15
+ - cosine_recall@5
16
+ - cosine_recall@10
17
+ - cosine_ndcg@10
18
+ - cosine_mrr@10
19
+ - cosine_map@100
20
+ - dot_accuracy@1
21
+ - dot_accuracy@3
22
+ - dot_accuracy@5
23
+ - dot_accuracy@10
24
+ - dot_precision@1
25
+ - dot_precision@3
26
+ - dot_precision@5
27
+ - dot_precision@10
28
+ - dot_recall@1
29
+ - dot_recall@3
30
+ - dot_recall@5
31
+ - dot_recall@10
32
+ - dot_ndcg@10
33
+ - dot_mrr@10
34
+ - dot_map@100
35
+ pipeline_tag: sentence-similarity
36
+ tags:
37
+ - sentence-transformers
38
+ - sentence-similarity
39
+ - feature-extraction
40
+ - generated_from_trainer
41
+ - dataset_size:500
42
+ - loss:MatryoshkaLoss
43
+ - loss:MultipleNegativesRankingLoss
44
+ widget:
45
+ - source_sentence: "narrow identified goals, to avoid \"mission creep.\" Anticipated\
46
+ \ data collection should be determined to be \nstrictly necessary to the identified\
47
+ \ goals and should be minimized as much as possible. Data collected based on \n\
48
+ these identified goals and for a specific context should not be used in a different\
49
+ \ context without assessing for \nnew privacy risks and implementing appropriate\
50
+ \ mitigation measures, which may include express consent."
51
+ sentences:
52
+ - What measures should be taken if data collected for specific identified goals
53
+ is to be used in a different context?
54
+ - What measures should be taken to ensure the privacy of sensitive data and limit
55
+ access to it?
56
+ - What special requirements are mentioned in the white paper regarding national
57
+ security and defense activities in relation to trustworthy artificial intelligence?
58
+ - source_sentence: '•
59
+
60
+ Karen Levy, Assistant Professor, Department of Information Science, Cornell University
61
+
62
+
63
+
64
+ Natasha Duarte, Project Director, Upturn
65
+
66
+
67
+
68
+ Elana Zeide, Assistant Professor, University of Nebraska College of Law
69
+
70
+
71
+
72
+ Fabian Rogers, Constituent Advocate, Office of NY State Senator Jabari Brisport
73
+ and Community
74
+
75
+ Advocate and Floor Captain, Atlantic Plaza Towers Tenants Association
76
+
77
+ The individual panelists described the ways in which AI systems and other technologies
78
+ are increasingly being'
79
+ sentences:
80
+ - What are some of the challenges posed to democracy by the use of technology and
81
+ automated systems, as mentioned in the foreword?
82
+ - What principles has the U.S. Intelligence Community developed to guide personnel
83
+ in the ethical use of AI?
84
+ - What roles do the panelists hold in relation to the discussion on AI systems and
85
+ technology?
86
+ - source_sentence: "impacts disfavoring people based on their race, color, ethnicity,\
87
+ \ \nsex \n(including \npregnancy, \nchildbirth, \nand \nrelated \nmedical \nconditions,\
88
+ \ \ngender \nidentity, \nintersex \nstatus, \nand \nsexual \norientation), religion,\
89
+ \ age, national origin, disability, veteran status,"
90
+ sentences:
91
+ - What does the term "HUMAN ALTERNATIVES" refer to in the context provided?
92
+ - What types of discrimination are mentioned in the context?
93
+ - What are the expectations for automated systems in relation to public protection
94
+ from surveillance?
95
+ - source_sentence: "establish and maintain the capabilities that will allow individuals\
96
+ \ to use their own automated systems to help \nthem make consent, access, and\
97
+ \ control decisions in a complex data ecosystem. Capabilities include machine\
98
+ \ \nreadable data, standardized data formats, metadata or tags for expressing\
99
+ \ data processing permissions and \npreferences and data provenance and lineage,\
100
+ \ context of use and access-specific tags, and training models for \nassessing\
101
+ \ privacy risk."
102
+ sentences:
103
+ - What measures should be taken to ensure that independent evaluations of algorithmic
104
+ discrimination are conducted while balancing individual privacy and data access
105
+ needs?
106
+ - What capabilities are necessary for individuals to effectively manage consent
107
+ and control decisions in a complex data ecosystem?
108
+ - What are some examples of classifications that are protected by law against discrimination?
109
+ - source_sentence: "SAFE AND EFFECTIVE \nSYSTEMS \nWHAT SHOULD BE EXPECTED OF AUTOMATED\
110
+ \ SYSTEMS\nThe expectations for automated systems are meant to serve as a blueprint\
111
+ \ for the development of additional \ntechnical standards and practices that are\
112
+ \ tailored for particular sectors and contexts. \nDerived data sources tracked\
113
+ \ and reviewed carefully. Data that is derived from other data through"
114
+ sentences:
115
+ - What is the purpose of the expectations set for automated systems in relation
116
+ to technical standards and practices?
117
+ - What factors influence the appropriate application of the principles outlined
118
+ in the white paper regarding automated systems?
119
+ - What actions can a court take if a federal agency fails to comply with the Privacy
120
+ Act regarding an individual's records?
121
+ model-index:
122
+ - name: SentenceTransformer based on Alibaba-NLP/gte-large-en-v1.5
123
+ results:
124
+ - task:
125
+ type: information-retrieval
126
+ name: Information Retrieval
127
+ dataset:
128
+ name: Unknown
129
+ type: unknown
130
+ metrics:
131
+ - type: cosine_accuracy@1
132
+ value: 0.88
133
+ name: Cosine Accuracy@1
134
+ - type: cosine_accuracy@3
135
+ value: 0.9866666666666667
136
+ name: Cosine Accuracy@3
137
+ - type: cosine_accuracy@5
138
+ value: 0.9866666666666667
139
+ name: Cosine Accuracy@5
140
+ - type: cosine_accuracy@10
141
+ value: 1.0
142
+ name: Cosine Accuracy@10
143
+ - type: cosine_precision@1
144
+ value: 0.88
145
+ name: Cosine Precision@1
146
+ - type: cosine_precision@3
147
+ value: 0.3288888888888888
148
+ name: Cosine Precision@3
149
+ - type: cosine_precision@5
150
+ value: 0.1973333333333333
151
+ name: Cosine Precision@5
152
+ - type: cosine_precision@10
153
+ value: 0.09999999999999998
154
+ name: Cosine Precision@10
155
+ - type: cosine_recall@1
156
+ value: 0.88
157
+ name: Cosine Recall@1
158
+ - type: cosine_recall@3
159
+ value: 0.9866666666666667
160
+ name: Cosine Recall@3
161
+ - type: cosine_recall@5
162
+ value: 0.9866666666666667
163
+ name: Cosine Recall@5
164
+ - type: cosine_recall@10
165
+ value: 1.0
166
+ name: Cosine Recall@10
167
+ - type: cosine_ndcg@10
168
+ value: 0.9499978881111136
169
+ name: Cosine Ndcg@10
170
+ - type: cosine_mrr@10
171
+ value: 0.9330158730158731
172
+ name: Cosine Mrr@10
173
+ - type: cosine_map@100
174
+ value: 0.9330158730158731
175
+ name: Cosine Map@100
176
+ - type: dot_accuracy@1
177
+ value: 0.8933333333333333
178
+ name: Dot Accuracy@1
179
+ - type: dot_accuracy@3
180
+ value: 0.9866666666666667
181
+ name: Dot Accuracy@3
182
+ - type: dot_accuracy@5
183
+ value: 0.9866666666666667
184
+ name: Dot Accuracy@5
185
+ - type: dot_accuracy@10
186
+ value: 1.0
187
+ name: Dot Accuracy@10
188
+ - type: dot_precision@1
189
+ value: 0.8933333333333333
190
+ name: Dot Precision@1
191
+ - type: dot_precision@3
192
+ value: 0.3288888888888888
193
+ name: Dot Precision@3
194
+ - type: dot_precision@5
195
+ value: 0.1973333333333333
196
+ name: Dot Precision@5
197
+ - type: dot_precision@10
198
+ value: 0.09999999999999998
199
+ name: Dot Precision@10
200
+ - type: dot_recall@1
201
+ value: 0.8933333333333333
202
+ name: Dot Recall@1
203
+ - type: dot_recall@3
204
+ value: 0.9866666666666667
205
+ name: Dot Recall@3
206
+ - type: dot_recall@5
207
+ value: 0.9866666666666667
208
+ name: Dot Recall@5
209
+ - type: dot_recall@10
210
+ value: 1.0
211
+ name: Dot Recall@10
212
+ - type: dot_ndcg@10
213
+ value: 0.9546805786428596
214
+ name: Dot Ndcg@10
215
+ - type: dot_mrr@10
216
+ value: 0.9394444444444445
217
+ name: Dot Mrr@10
218
+ - type: dot_map@100
219
+ value: 0.9394444444444444
220
+ name: Dot Map@100
221
+ ---
222
+
223
+ # SentenceTransformer based on Alibaba-NLP/gte-large-en-v1.5
224
+
225
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) on the json dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
226
+
227
+ ## Model Details
228
+
229
+ ### Model Description
230
+ - **Model Type:** Sentence Transformer
231
+ - **Base model:** [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) <!-- at revision 104333d6af6f97649377c2afbde10a7704870c7b -->
232
+ - **Maximum Sequence Length:** 8192 tokens
233
+ - **Output Dimensionality:** 1024 tokens
234
+ - **Similarity Function:** Cosine Similarity
235
+ - **Training Dataset:**
236
+ - json
237
+ <!-- - **Language:** Unknown -->
238
+ <!-- - **License:** Unknown -->
239
+
240
+ ### Model Sources
241
+
242
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
243
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
244
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
245
+
246
+ ### Full Model Architecture
247
+
248
+ ```
249
+ SentenceTransformer(
250
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
251
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
252
+ )
253
+ ```
254
+
255
+ ## Usage
256
+
257
+ ### Direct Usage (Sentence Transformers)
258
+
259
+ First install the Sentence Transformers library:
260
+
261
+ ```bash
262
+ pip install -U sentence-transformers
263
+ ```
264
+
265
+ Then you can load this model and run inference.
266
+ ```python
267
+ from sentence_transformers import SentenceTransformer
268
+
269
+ # Download from the 🤗 Hub
270
+ model = SentenceTransformer("sentence_transformers_model_id")
271
+ # Run inference
272
+ sentences = [
273
+ 'SAFE AND EFFECTIVE \nSYSTEMS \nWHAT SHOULD BE EXPECTED OF AUTOMATED SYSTEMS\nThe expectations for automated systems are meant to serve as a blueprint for the development of additional \ntechnical standards and practices that are tailored for particular sectors and contexts. \nDerived data sources tracked and reviewed carefully. Data that is derived from other data through',
274
+ 'What is the purpose of the expectations set for automated systems in relation to technical standards and practices?',
275
+ 'What factors influence the appropriate application of the principles outlined in the white paper regarding automated systems?',
276
+ ]
277
+ embeddings = model.encode(sentences)
278
+ print(embeddings.shape)
279
+ # [3, 1024]
280
+
281
+ # Get the similarity scores for the embeddings
282
+ similarities = model.similarity(embeddings, embeddings)
283
+ print(similarities.shape)
284
+ # [3, 3]
285
+ ```
286
+
287
+ <!--
288
+ ### Direct Usage (Transformers)
289
+
290
+ <details><summary>Click to see the direct usage in Transformers</summary>
291
+
292
+ </details>
293
+ -->
294
+
295
+ <!--
296
+ ### Downstream Usage (Sentence Transformers)
297
+
298
+ You can finetune this model on your own dataset.
299
+
300
+ <details><summary>Click to expand</summary>
301
+
302
+ </details>
303
+ -->
304
+
305
+ <!--
306
+ ### Out-of-Scope Use
307
+
308
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
309
+ -->
310
+
311
+ ## Evaluation
312
+
313
+ ### Metrics
314
+
315
+ #### Information Retrieval
316
+
317
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
318
+
319
+ | Metric | Value |
320
+ |:--------------------|:----------|
321
+ | cosine_accuracy@1 | 0.88 |
322
+ | cosine_accuracy@3 | 0.9867 |
323
+ | cosine_accuracy@5 | 0.9867 |
324
+ | cosine_accuracy@10 | 1.0 |
325
+ | cosine_precision@1 | 0.88 |
326
+ | cosine_precision@3 | 0.3289 |
327
+ | cosine_precision@5 | 0.1973 |
328
+ | cosine_precision@10 | 0.1 |
329
+ | cosine_recall@1 | 0.88 |
330
+ | cosine_recall@3 | 0.9867 |
331
+ | cosine_recall@5 | 0.9867 |
332
+ | cosine_recall@10 | 1.0 |
333
+ | cosine_ndcg@10 | 0.95 |
334
+ | cosine_mrr@10 | 0.933 |
335
+ | **cosine_map@100** | **0.933** |
336
+ | dot_accuracy@1 | 0.8933 |
337
+ | dot_accuracy@3 | 0.9867 |
338
+ | dot_accuracy@5 | 0.9867 |
339
+ | dot_accuracy@10 | 1.0 |
340
+ | dot_precision@1 | 0.8933 |
341
+ | dot_precision@3 | 0.3289 |
342
+ | dot_precision@5 | 0.1973 |
343
+ | dot_precision@10 | 0.1 |
344
+ | dot_recall@1 | 0.8933 |
345
+ | dot_recall@3 | 0.9867 |
346
+ | dot_recall@5 | 0.9867 |
347
+ | dot_recall@10 | 1.0 |
348
+ | dot_ndcg@10 | 0.9547 |
349
+ | dot_mrr@10 | 0.9394 |
350
+ | dot_map@100 | 0.9394 |
351
+
352
+ <!--
353
+ ## Bias, Risks and Limitations
354
+
355
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
356
+ -->
357
+
358
+ <!--
359
+ ### Recommendations
360
+
361
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
362
+ -->
363
+
364
+ ## Training Details
365
+
366
+ ### Training Dataset
367
+
368
+ #### json
369
+
370
+ * Dataset: json
371
+ * Size: 500 training samples
372
+ * Columns: <code>anchor</code> and <code>positive</code>
373
+ * Approximate statistics based on the first 500 samples:
374
+ | | anchor | positive |
375
+ |:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
376
+ | type | string | string |
377
+ | details | <ul><li>min: 12 tokens</li><li>mean: 21.76 tokens</li><li>max: 37 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 78.92 tokens</li><li>max: 104 tokens</li></ul> |
378
+ * Samples:
379
+ | anchor | positive |
380
+ |:--------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
381
+ | <code>What is the primary purpose of the AI Bill of Rights outlined in the October 2022 blueprint?</code> | <code>BLUEPRINT FOR AN <br>AI BILL OF <br>RIGHTS <br>MAKING AUTOMATED <br>SYSTEMS WORK FOR <br>THE AMERICAN PEOPLE <br>OCTOBER 2022</code> |
382
+ | <code>What was the purpose of the Blueprint for an AI Bill of Rights published by the White House Office of Science and Technology Policy?</code> | <code>About this Document <br>The Blueprint for an AI Bill of Rights: Making Automated Systems Work for the American People was <br>published by the White House Office of Science and Technology Policy in October 2022. This framework was <br>released one year after OSTP announced the launch of a process to develop “a bill of rights for an AI-powered</code> |
383
+ | <code>What initiative did the OSTP announce a year prior to the release of the framework for a bill of rights for an AI-powered world?</code> | <code>released one year after OSTP announced the launch of a process to develop “a bill of rights for an AI-powered <br>world.” Its release follows a year of public engagement to inform this initiative. The framework is available <br>online at: https://www.whitehouse.gov/ostp/ai-bill-of-rights <br>About the Office of Science and Technology Policy <br>The Office of Science and Technology Policy (OSTP) was established by the National Science and Technology</code> |
384
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
385
+ ```json
386
+ {
387
+ "loss": "MultipleNegativesRankingLoss",
388
+ "matryoshka_dims": [
389
+ 1024,
390
+ 512,
391
+ 256,
392
+ 128,
393
+ 64
394
+ ],
395
+ "matryoshka_weights": [
396
+ 1,
397
+ 1,
398
+ 1,
399
+ 1,
400
+ 1
401
+ ],
402
+ "n_dims_per_step": -1
403
+ }
404
+ ```
405
+
406
+ ### Training Hyperparameters
407
+ #### Non-Default Hyperparameters
408
+
409
+ - `eval_strategy`: epoch
410
+ - `per_device_train_batch_size`: 32
411
+ - `per_device_eval_batch_size`: 16
412
+ - `gradient_accumulation_steps`: 16
413
+ - `learning_rate`: 2e-05
414
+ - `num_train_epochs`: 5
415
+ - `lr_scheduler_type`: cosine
416
+ - `warmup_ratio`: 0.1
417
+ - `bf16`: True
418
+ - `tf32`: True
419
+ - `load_best_model_at_end`: True
420
+ - `optim`: adamw_torch_fused
421
+ - `batch_sampler`: no_duplicates
422
+
423
+ #### All Hyperparameters
424
+ <details><summary>Click to expand</summary>
425
+
426
+ - `overwrite_output_dir`: False
427
+ - `do_predict`: False
428
+ - `eval_strategy`: epoch
429
+ - `prediction_loss_only`: True
430
+ - `per_device_train_batch_size`: 32
431
+ - `per_device_eval_batch_size`: 16
432
+ - `per_gpu_train_batch_size`: None
433
+ - `per_gpu_eval_batch_size`: None
434
+ - `gradient_accumulation_steps`: 16
435
+ - `eval_accumulation_steps`: None
436
+ - `torch_empty_cache_steps`: None
437
+ - `learning_rate`: 2e-05
438
+ - `weight_decay`: 0.0
439
+ - `adam_beta1`: 0.9
440
+ - `adam_beta2`: 0.999
441
+ - `adam_epsilon`: 1e-08
442
+ - `max_grad_norm`: 1.0
443
+ - `num_train_epochs`: 5
444
+ - `max_steps`: -1
445
+ - `lr_scheduler_type`: cosine
446
+ - `lr_scheduler_kwargs`: {}
447
+ - `warmup_ratio`: 0.1
448
+ - `warmup_steps`: 0
449
+ - `log_level`: passive
450
+ - `log_level_replica`: warning
451
+ - `log_on_each_node`: True
452
+ - `logging_nan_inf_filter`: True
453
+ - `save_safetensors`: True
454
+ - `save_on_each_node`: False
455
+ - `save_only_model`: False
456
+ - `restore_callback_states_from_checkpoint`: False
457
+ - `no_cuda`: False
458
+ - `use_cpu`: False
459
+ - `use_mps_device`: False
460
+ - `seed`: 42
461
+ - `data_seed`: None
462
+ - `jit_mode_eval`: False
463
+ - `use_ipex`: False
464
+ - `bf16`: True
465
+ - `fp16`: False
466
+ - `fp16_opt_level`: O1
467
+ - `half_precision_backend`: auto
468
+ - `bf16_full_eval`: False
469
+ - `fp16_full_eval`: False
470
+ - `tf32`: True
471
+ - `local_rank`: 0
472
+ - `ddp_backend`: None
473
+ - `tpu_num_cores`: None
474
+ - `tpu_metrics_debug`: False
475
+ - `debug`: []
476
+ - `dataloader_drop_last`: False
477
+ - `dataloader_num_workers`: 0
478
+ - `dataloader_prefetch_factor`: None
479
+ - `past_index`: -1
480
+ - `disable_tqdm`: False
481
+ - `remove_unused_columns`: True
482
+ - `label_names`: None
483
+ - `load_best_model_at_end`: True
484
+ - `ignore_data_skip`: False
485
+ - `fsdp`: []
486
+ - `fsdp_min_num_params`: 0
487
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
488
+ - `fsdp_transformer_layer_cls_to_wrap`: None
489
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
490
+ - `deepspeed`: None
491
+ - `label_smoothing_factor`: 0.0
492
+ - `optim`: adamw_torch_fused
493
+ - `optim_args`: None
494
+ - `adafactor`: False
495
+ - `group_by_length`: False
496
+ - `length_column_name`: length
497
+ - `ddp_find_unused_parameters`: None
498
+ - `ddp_bucket_cap_mb`: None
499
+ - `ddp_broadcast_buffers`: False
500
+ - `dataloader_pin_memory`: True
501
+ - `dataloader_persistent_workers`: False
502
+ - `skip_memory_metrics`: True
503
+ - `use_legacy_prediction_loop`: False
504
+ - `push_to_hub`: False
505
+ - `resume_from_checkpoint`: None
506
+ - `hub_model_id`: None
507
+ - `hub_strategy`: every_save
508
+ - `hub_private_repo`: False
509
+ - `hub_always_push`: False
510
+ - `gradient_checkpointing`: False
511
+ - `gradient_checkpointing_kwargs`: None
512
+ - `include_inputs_for_metrics`: False
513
+ - `eval_do_concat_batches`: True
514
+ - `fp16_backend`: auto
515
+ - `push_to_hub_model_id`: None
516
+ - `push_to_hub_organization`: None
517
+ - `mp_parameters`:
518
+ - `auto_find_batch_size`: False
519
+ - `full_determinism`: False
520
+ - `torchdynamo`: None
521
+ - `ray_scope`: last
522
+ - `ddp_timeout`: 1800
523
+ - `torch_compile`: False
524
+ - `torch_compile_backend`: None
525
+ - `torch_compile_mode`: None
526
+ - `dispatch_batches`: None
527
+ - `split_batches`: None
528
+ - `include_tokens_per_second`: False
529
+ - `include_num_input_tokens_seen`: False
530
+ - `neftune_noise_alpha`: None
531
+ - `optim_target_modules`: None
532
+ - `batch_eval_metrics`: False
533
+ - `eval_on_start`: False
534
+ - `eval_use_gather_object`: False
535
+ - `batch_sampler`: no_duplicates
536
+ - `multi_dataset_batch_sampler`: proportional
537
+
538
+ </details>
539
+
540
+ ### Training Logs
541
+ | Epoch | Step | cosine_map@100 |
542
+ |:-----:|:----:|:--------------:|
543
+ | 1.0 | 1 | 0.9022 |
544
+ | 2.0 | 2 | 0.9311 |
545
+ | 3.0 | 3 | 0.9397 |
546
+ | 4.0 | 4 | 0.9330 |
547
+
548
+
549
+ ### Framework Versions
550
+ - Python: 3.10.12
551
+ - Sentence Transformers: 3.1.1
552
+ - Transformers: 4.44.2
553
+ - PyTorch: 2.4.1+cu121
554
+ - Accelerate: 0.34.2
555
+ - Datasets: 3.0.1
556
+ - Tokenizers: 0.19.1
557
+
558
+ ## Citation
559
+
560
+ ### BibTeX
561
+
562
+ #### Sentence Transformers
563
+ ```bibtex
564
+ @inproceedings{reimers-2019-sentence-bert,
565
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
566
+ author = "Reimers, Nils and Gurevych, Iryna",
567
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
568
+ month = "11",
569
+ year = "2019",
570
+ publisher = "Association for Computational Linguistics",
571
+ url = "https://arxiv.org/abs/1908.10084",
572
+ }
573
+ ```
574
+
575
+ #### MatryoshkaLoss
576
+ ```bibtex
577
+ @misc{kusupati2024matryoshka,
578
+ title={Matryoshka Representation Learning},
579
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
580
+ year={2024},
581
+ eprint={2205.13147},
582
+ archivePrefix={arXiv},
583
+ primaryClass={cs.LG}
584
+ }
585
+ ```
586
+
587
+ #### MultipleNegativesRankingLoss
588
+ ```bibtex
589
+ @misc{henderson2017efficient,
590
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
591
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
592
+ year={2017},
593
+ eprint={1705.00652},
594
+ archivePrefix={arXiv},
595
+ primaryClass={cs.CL}
596
+ }
597
+ ```
598
+
599
+ <!--
600
+ ## Glossary
601
+
602
+ *Clearly define terms in order to be accessible across audiences.*
603
+ -->
604
+
605
+ <!--
606
+ ## Model Card Authors
607
+
608
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
609
+ -->
610
+
611
+ <!--
612
+ ## Model Card Contact
613
+
614
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
615
+ -->
checkpoint-4/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
3
+ "architectures": [
4
+ "NewModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
9
+ "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
10
+ "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
11
+ "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
12
+ "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
13
+ "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
14
+ "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
15
+ },
16
+ "classifier_dropout": null,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.1,
19
+ "hidden_size": 1024,
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 4096,
22
+ "layer_norm_eps": 1e-12,
23
+ "layer_norm_type": "layer_norm",
24
+ "logn_attention_clip1": false,
25
+ "logn_attention_scale": false,
26
+ "max_position_embeddings": 8192,
27
+ "model_type": "new",
28
+ "num_attention_heads": 16,
29
+ "num_hidden_layers": 24,
30
+ "pack_qkv": true,
31
+ "pad_token_id": 0,
32
+ "position_embedding_type": "rope",
33
+ "rope_scaling": {
34
+ "factor": 2.0,
35
+ "type": "ntk"
36
+ },
37
+ "rope_theta": 160000,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.44.2",
40
+ "type_vocab_size": 2,
41
+ "unpad_inputs": false,
42
+ "use_memory_efficient_attention": false,
43
+ "vocab_size": 30528
44
+ }
checkpoint-4/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.1.1",
4
+ "transformers": "4.44.2",
5
+ "pytorch": "2.4.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
checkpoint-4/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2dd19d29eb077b0b2aa8a7a73a034c05912d5d083363baffe44cafa09821986
3
+ size 1736585680
checkpoint-4/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-4/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7c690771c71d666185804e2b94e0d4e1e2909922b4e56b4ad7f5c044bfaca05
3
+ size 3473340858
checkpoint-4/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d302f71bde693022fefcd308e8a5dc0a19a3e4086b474bdb848c73c83230e4
3
+ size 14244
checkpoint-4/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8a7ce1e35a788cd0ac83e915d154e450254f59d4c3ee42cfc7afc0555e76af5
3
+ size 1064
checkpoint-4/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
checkpoint-4/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-4/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-4/tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "max_length": 8000,
49
+ "model_max_length": 8192,
50
+ "pad_to_multiple_of": null,
51
+ "pad_token": "[PAD]",
52
+ "pad_token_type_id": 0,
53
+ "padding_side": "right",
54
+ "sep_token": "[SEP]",
55
+ "stride": 0,
56
+ "strip_accents": null,
57
+ "tokenize_chinese_chars": true,
58
+ "tokenizer_class": "BertTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
+ "unk_token": "[UNK]"
62
+ }
checkpoint-4/trainer_state.json ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8933333333333333,
3
+ "best_model_checkpoint": "policy_gte_large_5/checkpoint-3",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_cosine_accuracy@1": 0.8266666666666667,
14
+ "eval_cosine_accuracy@10": 1.0,
15
+ "eval_cosine_accuracy@3": 0.9866666666666667,
16
+ "eval_cosine_accuracy@5": 0.9866666666666667,
17
+ "eval_cosine_map@100": 0.9022222222222223,
18
+ "eval_cosine_mrr@10": 0.9022222222222223,
19
+ "eval_cosine_ndcg@10": 0.9271276662566819,
20
+ "eval_cosine_precision@1": 0.8266666666666667,
21
+ "eval_cosine_precision@10": 0.09999999999999998,
22
+ "eval_cosine_precision@3": 0.3288888888888888,
23
+ "eval_cosine_precision@5": 0.1973333333333333,
24
+ "eval_cosine_recall@1": 0.8266666666666667,
25
+ "eval_cosine_recall@10": 1.0,
26
+ "eval_cosine_recall@3": 0.9866666666666667,
27
+ "eval_cosine_recall@5": 0.9866666666666667,
28
+ "eval_dot_accuracy@1": 0.8266666666666667,
29
+ "eval_dot_accuracy@10": 1.0,
30
+ "eval_dot_accuracy@3": 0.9866666666666667,
31
+ "eval_dot_accuracy@5": 0.9866666666666667,
32
+ "eval_dot_map@100": 0.9019047619047619,
33
+ "eval_dot_mrr@10": 0.901904761904762,
34
+ "eval_dot_ndcg@10": 0.926822681539686,
35
+ "eval_dot_precision@1": 0.8266666666666667,
36
+ "eval_dot_precision@10": 0.09999999999999998,
37
+ "eval_dot_precision@3": 0.3288888888888888,
38
+ "eval_dot_precision@5": 0.1973333333333333,
39
+ "eval_dot_recall@1": 0.8266666666666667,
40
+ "eval_dot_recall@10": 1.0,
41
+ "eval_dot_recall@3": 0.9866666666666667,
42
+ "eval_dot_recall@5": 0.9866666666666667,
43
+ "eval_runtime": 0.2111,
44
+ "eval_samples_per_second": 0.0,
45
+ "eval_steps_per_second": 0.0,
46
+ "step": 1
47
+ },
48
+ {
49
+ "epoch": 2.0,
50
+ "eval_cosine_accuracy@1": 0.88,
51
+ "eval_cosine_accuracy@10": 1.0,
52
+ "eval_cosine_accuracy@3": 0.9866666666666667,
53
+ "eval_cosine_accuracy@5": 0.9866666666666667,
54
+ "eval_cosine_map@100": 0.931111111111111,
55
+ "eval_cosine_mrr@10": 0.9311111111111112,
56
+ "eval_cosine_ndcg@10": 0.9485571427804901,
57
+ "eval_cosine_precision@1": 0.88,
58
+ "eval_cosine_precision@10": 0.09999999999999998,
59
+ "eval_cosine_precision@3": 0.3288888888888888,
60
+ "eval_cosine_precision@5": 0.1973333333333333,
61
+ "eval_cosine_recall@1": 0.88,
62
+ "eval_cosine_recall@10": 1.0,
63
+ "eval_cosine_recall@3": 0.9866666666666667,
64
+ "eval_cosine_recall@5": 0.9866666666666667,
65
+ "eval_dot_accuracy@1": 0.88,
66
+ "eval_dot_accuracy@10": 1.0,
67
+ "eval_dot_accuracy@3": 0.9866666666666667,
68
+ "eval_dot_accuracy@5": 0.9866666666666667,
69
+ "eval_dot_map@100": 0.931111111111111,
70
+ "eval_dot_mrr@10": 0.9311111111111112,
71
+ "eval_dot_ndcg@10": 0.9485571427804901,
72
+ "eval_dot_precision@1": 0.88,
73
+ "eval_dot_precision@10": 0.09999999999999998,
74
+ "eval_dot_precision@3": 0.3288888888888888,
75
+ "eval_dot_precision@5": 0.1973333333333333,
76
+ "eval_dot_recall@1": 0.88,
77
+ "eval_dot_recall@10": 1.0,
78
+ "eval_dot_recall@3": 0.9866666666666667,
79
+ "eval_dot_recall@5": 0.9866666666666667,
80
+ "eval_runtime": 0.2179,
81
+ "eval_samples_per_second": 0.0,
82
+ "eval_steps_per_second": 0.0,
83
+ "step": 2
84
+ },
85
+ {
86
+ "epoch": 3.0,
87
+ "eval_cosine_accuracy@1": 0.8933333333333333,
88
+ "eval_cosine_accuracy@10": 1.0,
89
+ "eval_cosine_accuracy@3": 0.9866666666666667,
90
+ "eval_cosine_accuracy@5": 0.9866666666666667,
91
+ "eval_cosine_map@100": 0.9396825396825398,
92
+ "eval_cosine_mrr@10": 0.9396825396825398,
93
+ "eval_cosine_ndcg@10": 0.954918824730161,
94
+ "eval_cosine_precision@1": 0.8933333333333333,
95
+ "eval_cosine_precision@10": 0.09999999999999998,
96
+ "eval_cosine_precision@3": 0.3288888888888888,
97
+ "eval_cosine_precision@5": 0.1973333333333333,
98
+ "eval_cosine_recall@1": 0.8933333333333333,
99
+ "eval_cosine_recall@10": 1.0,
100
+ "eval_cosine_recall@3": 0.9866666666666667,
101
+ "eval_cosine_recall@5": 0.9866666666666667,
102
+ "eval_dot_accuracy@1": 0.8933333333333333,
103
+ "eval_dot_accuracy@10": 1.0,
104
+ "eval_dot_accuracy@3": 0.9866666666666667,
105
+ "eval_dot_accuracy@5": 0.9866666666666667,
106
+ "eval_dot_map@100": 0.9396825396825398,
107
+ "eval_dot_mrr@10": 0.9396825396825398,
108
+ "eval_dot_ndcg@10": 0.954918824730161,
109
+ "eval_dot_precision@1": 0.8933333333333333,
110
+ "eval_dot_precision@10": 0.09999999999999998,
111
+ "eval_dot_precision@3": 0.3288888888888888,
112
+ "eval_dot_precision@5": 0.1973333333333333,
113
+ "eval_dot_recall@1": 0.8933333333333333,
114
+ "eval_dot_recall@10": 1.0,
115
+ "eval_dot_recall@3": 0.9866666666666667,
116
+ "eval_dot_recall@5": 0.9866666666666667,
117
+ "eval_runtime": 0.2139,
118
+ "eval_samples_per_second": 0.0,
119
+ "eval_steps_per_second": 0.0,
120
+ "step": 3
121
+ },
122
+ {
123
+ "epoch": 4.0,
124
+ "eval_cosine_accuracy@1": 0.88,
125
+ "eval_cosine_accuracy@10": 1.0,
126
+ "eval_cosine_accuracy@3": 0.9866666666666667,
127
+ "eval_cosine_accuracy@5": 0.9866666666666667,
128
+ "eval_cosine_map@100": 0.9330158730158731,
129
+ "eval_cosine_mrr@10": 0.9330158730158731,
130
+ "eval_cosine_ndcg@10": 0.9499978881111136,
131
+ "eval_cosine_precision@1": 0.88,
132
+ "eval_cosine_precision@10": 0.09999999999999998,
133
+ "eval_cosine_precision@3": 0.3288888888888888,
134
+ "eval_cosine_precision@5": 0.1973333333333333,
135
+ "eval_cosine_recall@1": 0.88,
136
+ "eval_cosine_recall@10": 1.0,
137
+ "eval_cosine_recall@3": 0.9866666666666667,
138
+ "eval_cosine_recall@5": 0.9866666666666667,
139
+ "eval_dot_accuracy@1": 0.8933333333333333,
140
+ "eval_dot_accuracy@10": 1.0,
141
+ "eval_dot_accuracy@3": 0.9866666666666667,
142
+ "eval_dot_accuracy@5": 0.9866666666666667,
143
+ "eval_dot_map@100": 0.9394444444444444,
144
+ "eval_dot_mrr@10": 0.9394444444444445,
145
+ "eval_dot_ndcg@10": 0.9546805786428596,
146
+ "eval_dot_precision@1": 0.8933333333333333,
147
+ "eval_dot_precision@10": 0.09999999999999998,
148
+ "eval_dot_precision@3": 0.3288888888888888,
149
+ "eval_dot_precision@5": 0.1973333333333333,
150
+ "eval_dot_recall@1": 0.8933333333333333,
151
+ "eval_dot_recall@10": 1.0,
152
+ "eval_dot_recall@3": 0.9866666666666667,
153
+ "eval_dot_recall@5": 0.9866666666666667,
154
+ "eval_runtime": 0.2064,
155
+ "eval_samples_per_second": 0.0,
156
+ "eval_steps_per_second": 0.0,
157
+ "step": 4
158
+ }
159
+ ],
160
+ "logging_steps": 10,
161
+ "max_steps": 5,
162
+ "num_input_tokens_seen": 0,
163
+ "num_train_epochs": 5,
164
+ "save_steps": 500,
165
+ "stateful_callbacks": {
166
+ "TrainerControl": {
167
+ "args": {
168
+ "should_epoch_stop": false,
169
+ "should_evaluate": false,
170
+ "should_log": false,
171
+ "should_save": true,
172
+ "should_training_stop": false
173
+ },
174
+ "attributes": {}
175
+ }
176
+ },
177
+ "total_flos": 0.0,
178
+ "train_batch_size": 32,
179
+ "trial_name": null,
180
+ "trial_params": null
181
+ }
checkpoint-4/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:251926714f5864c84dcca92a9a3cdc8bce3b6ade325cb435c1034de51951bf55
3
+ size 5496
checkpoint-4/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-5/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-5/README.md ADDED
@@ -0,0 +1,616 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Alibaba-NLP/gte-large-en-v1.5
3
+ library_name: sentence-transformers
4
+ metrics:
5
+ - cosine_accuracy@1
6
+ - cosine_accuracy@3
7
+ - cosine_accuracy@5
8
+ - cosine_accuracy@10
9
+ - cosine_precision@1
10
+ - cosine_precision@3
11
+ - cosine_precision@5
12
+ - cosine_precision@10
13
+ - cosine_recall@1
14
+ - cosine_recall@3
15
+ - cosine_recall@5
16
+ - cosine_recall@10
17
+ - cosine_ndcg@10
18
+ - cosine_mrr@10
19
+ - cosine_map@100
20
+ - dot_accuracy@1
21
+ - dot_accuracy@3
22
+ - dot_accuracy@5
23
+ - dot_accuracy@10
24
+ - dot_precision@1
25
+ - dot_precision@3
26
+ - dot_precision@5
27
+ - dot_precision@10
28
+ - dot_recall@1
29
+ - dot_recall@3
30
+ - dot_recall@5
31
+ - dot_recall@10
32
+ - dot_ndcg@10
33
+ - dot_mrr@10
34
+ - dot_map@100
35
+ pipeline_tag: sentence-similarity
36
+ tags:
37
+ - sentence-transformers
38
+ - sentence-similarity
39
+ - feature-extraction
40
+ - generated_from_trainer
41
+ - dataset_size:500
42
+ - loss:MatryoshkaLoss
43
+ - loss:MultipleNegativesRankingLoss
44
+ widget:
45
+ - source_sentence: "narrow identified goals, to avoid \"mission creep.\" Anticipated\
46
+ \ data collection should be determined to be \nstrictly necessary to the identified\
47
+ \ goals and should be minimized as much as possible. Data collected based on \n\
48
+ these identified goals and for a specific context should not be used in a different\
49
+ \ context without assessing for \nnew privacy risks and implementing appropriate\
50
+ \ mitigation measures, which may include express consent."
51
+ sentences:
52
+ - What measures should be taken if data collected for specific identified goals
53
+ is to be used in a different context?
54
+ - What measures should be taken to ensure the privacy of sensitive data and limit
55
+ access to it?
56
+ - What special requirements are mentioned in the white paper regarding national
57
+ security and defense activities in relation to trustworthy artificial intelligence?
58
+ - source_sentence: '•
59
+
60
+ Karen Levy, Assistant Professor, Department of Information Science, Cornell University
61
+
62
+
63
+
64
+ Natasha Duarte, Project Director, Upturn
65
+
66
+
67
+
68
+ Elana Zeide, Assistant Professor, University of Nebraska College of Law
69
+
70
+
71
+
72
+ Fabian Rogers, Constituent Advocate, Office of NY State Senator Jabari Brisport
73
+ and Community
74
+
75
+ Advocate and Floor Captain, Atlantic Plaza Towers Tenants Association
76
+
77
+ The individual panelists described the ways in which AI systems and other technologies
78
+ are increasingly being'
79
+ sentences:
80
+ - What are some of the challenges posed to democracy by the use of technology and
81
+ automated systems, as mentioned in the foreword?
82
+ - What principles has the U.S. Intelligence Community developed to guide personnel
83
+ in the ethical use of AI?
84
+ - What roles do the panelists hold in relation to the discussion on AI systems and
85
+ technology?
86
+ - source_sentence: "impacts disfavoring people based on their race, color, ethnicity,\
87
+ \ \nsex \n(including \npregnancy, \nchildbirth, \nand \nrelated \nmedical \nconditions,\
88
+ \ \ngender \nidentity, \nintersex \nstatus, \nand \nsexual \norientation), religion,\
89
+ \ age, national origin, disability, veteran status,"
90
+ sentences:
91
+ - What does the term "HUMAN ALTERNATIVES" refer to in the context provided?
92
+ - What types of discrimination are mentioned in the context?
93
+ - What are the expectations for automated systems in relation to public protection
94
+ from surveillance?
95
+ - source_sentence: "establish and maintain the capabilities that will allow individuals\
96
+ \ to use their own automated systems to help \nthem make consent, access, and\
97
+ \ control decisions in a complex data ecosystem. Capabilities include machine\
98
+ \ \nreadable data, standardized data formats, metadata or tags for expressing\
99
+ \ data processing permissions and \npreferences and data provenance and lineage,\
100
+ \ context of use and access-specific tags, and training models for \nassessing\
101
+ \ privacy risk."
102
+ sentences:
103
+ - What measures should be taken to ensure that independent evaluations of algorithmic
104
+ discrimination are conducted while balancing individual privacy and data access
105
+ needs?
106
+ - What capabilities are necessary for individuals to effectively manage consent
107
+ and control decisions in a complex data ecosystem?
108
+ - What are some examples of classifications that are protected by law against discrimination?
109
+ - source_sentence: "SAFE AND EFFECTIVE \nSYSTEMS \nWHAT SHOULD BE EXPECTED OF AUTOMATED\
110
+ \ SYSTEMS\nThe expectations for automated systems are meant to serve as a blueprint\
111
+ \ for the development of additional \ntechnical standards and practices that are\
112
+ \ tailored for particular sectors and contexts. \nDerived data sources tracked\
113
+ \ and reviewed carefully. Data that is derived from other data through"
114
+ sentences:
115
+ - What is the purpose of the expectations set for automated systems in relation
116
+ to technical standards and practices?
117
+ - What factors influence the appropriate application of the principles outlined
118
+ in the white paper regarding automated systems?
119
+ - What actions can a court take if a federal agency fails to comply with the Privacy
120
+ Act regarding an individual's records?
121
+ model-index:
122
+ - name: SentenceTransformer based on Alibaba-NLP/gte-large-en-v1.5
123
+ results:
124
+ - task:
125
+ type: information-retrieval
126
+ name: Information Retrieval
127
+ dataset:
128
+ name: Unknown
129
+ type: unknown
130
+ metrics:
131
+ - type: cosine_accuracy@1
132
+ value: 0.88
133
+ name: Cosine Accuracy@1
134
+ - type: cosine_accuracy@3
135
+ value: 0.9866666666666667
136
+ name: Cosine Accuracy@3
137
+ - type: cosine_accuracy@5
138
+ value: 0.9866666666666667
139
+ name: Cosine Accuracy@5
140
+ - type: cosine_accuracy@10
141
+ value: 1.0
142
+ name: Cosine Accuracy@10
143
+ - type: cosine_precision@1
144
+ value: 0.88
145
+ name: Cosine Precision@1
146
+ - type: cosine_precision@3
147
+ value: 0.3288888888888888
148
+ name: Cosine Precision@3
149
+ - type: cosine_precision@5
150
+ value: 0.1973333333333333
151
+ name: Cosine Precision@5
152
+ - type: cosine_precision@10
153
+ value: 0.09999999999999998
154
+ name: Cosine Precision@10
155
+ - type: cosine_recall@1
156
+ value: 0.88
157
+ name: Cosine Recall@1
158
+ - type: cosine_recall@3
159
+ value: 0.9866666666666667
160
+ name: Cosine Recall@3
161
+ - type: cosine_recall@5
162
+ value: 0.9866666666666667
163
+ name: Cosine Recall@5
164
+ - type: cosine_recall@10
165
+ value: 1.0
166
+ name: Cosine Recall@10
167
+ - type: cosine_ndcg@10
168
+ value: 0.9499978881111136
169
+ name: Cosine Ndcg@10
170
+ - type: cosine_mrr@10
171
+ value: 0.9330158730158731
172
+ name: Cosine Mrr@10
173
+ - type: cosine_map@100
174
+ value: 0.9330158730158731
175
+ name: Cosine Map@100
176
+ - type: dot_accuracy@1
177
+ value: 0.88
178
+ name: Dot Accuracy@1
179
+ - type: dot_accuracy@3
180
+ value: 0.9866666666666667
181
+ name: Dot Accuracy@3
182
+ - type: dot_accuracy@5
183
+ value: 0.9866666666666667
184
+ name: Dot Accuracy@5
185
+ - type: dot_accuracy@10
186
+ value: 1.0
187
+ name: Dot Accuracy@10
188
+ - type: dot_precision@1
189
+ value: 0.88
190
+ name: Dot Precision@1
191
+ - type: dot_precision@3
192
+ value: 0.3288888888888888
193
+ name: Dot Precision@3
194
+ - type: dot_precision@5
195
+ value: 0.1973333333333333
196
+ name: Dot Precision@5
197
+ - type: dot_precision@10
198
+ value: 0.09999999999999998
199
+ name: Dot Precision@10
200
+ - type: dot_recall@1
201
+ value: 0.88
202
+ name: Dot Recall@1
203
+ - type: dot_recall@3
204
+ value: 0.9866666666666667
205
+ name: Dot Recall@3
206
+ - type: dot_recall@5
207
+ value: 0.9866666666666667
208
+ name: Dot Recall@5
209
+ - type: dot_recall@10
210
+ value: 1.0
211
+ name: Dot Recall@10
212
+ - type: dot_ndcg@10
213
+ value: 0.9499978881111136
214
+ name: Dot Ndcg@10
215
+ - type: dot_mrr@10
216
+ value: 0.9330158730158731
217
+ name: Dot Mrr@10
218
+ - type: dot_map@100
219
+ value: 0.9330158730158731
220
+ name: Dot Map@100
221
+ ---
222
+
223
+ # SentenceTransformer based on Alibaba-NLP/gte-large-en-v1.5
224
+
225
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) on the json dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
226
+
227
+ ## Model Details
228
+
229
+ ### Model Description
230
+ - **Model Type:** Sentence Transformer
231
+ - **Base model:** [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) <!-- at revision 104333d6af6f97649377c2afbde10a7704870c7b -->
232
+ - **Maximum Sequence Length:** 8192 tokens
233
+ - **Output Dimensionality:** 1024 tokens
234
+ - **Similarity Function:** Cosine Similarity
235
+ - **Training Dataset:**
236
+ - json
237
+ <!-- - **Language:** Unknown -->
238
+ <!-- - **License:** Unknown -->
239
+
240
+ ### Model Sources
241
+
242
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
243
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
244
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
245
+
246
+ ### Full Model Architecture
247
+
248
+ ```
249
+ SentenceTransformer(
250
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
251
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
252
+ )
253
+ ```
254
+
255
+ ## Usage
256
+
257
+ ### Direct Usage (Sentence Transformers)
258
+
259
+ First install the Sentence Transformers library:
260
+
261
+ ```bash
262
+ pip install -U sentence-transformers
263
+ ```
264
+
265
+ Then you can load this model and run inference.
266
+ ```python
267
+ from sentence_transformers import SentenceTransformer
268
+
269
+ # Download from the 🤗 Hub
270
+ model = SentenceTransformer("sentence_transformers_model_id")
271
+ # Run inference
272
+ sentences = [
273
+ 'SAFE AND EFFECTIVE \nSYSTEMS \nWHAT SHOULD BE EXPECTED OF AUTOMATED SYSTEMS\nThe expectations for automated systems are meant to serve as a blueprint for the development of additional \ntechnical standards and practices that are tailored for particular sectors and contexts. \nDerived data sources tracked and reviewed carefully. Data that is derived from other data through',
274
+ 'What is the purpose of the expectations set for automated systems in relation to technical standards and practices?',
275
+ 'What factors influence the appropriate application of the principles outlined in the white paper regarding automated systems?',
276
+ ]
277
+ embeddings = model.encode(sentences)
278
+ print(embeddings.shape)
279
+ # [3, 1024]
280
+
281
+ # Get the similarity scores for the embeddings
282
+ similarities = model.similarity(embeddings, embeddings)
283
+ print(similarities.shape)
284
+ # [3, 3]
285
+ ```
286
+
287
+ <!--
288
+ ### Direct Usage (Transformers)
289
+
290
+ <details><summary>Click to see the direct usage in Transformers</summary>
291
+
292
+ </details>
293
+ -->
294
+
295
+ <!--
296
+ ### Downstream Usage (Sentence Transformers)
297
+
298
+ You can finetune this model on your own dataset.
299
+
300
+ <details><summary>Click to expand</summary>
301
+
302
+ </details>
303
+ -->
304
+
305
+ <!--
306
+ ### Out-of-Scope Use
307
+
308
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
309
+ -->
310
+
311
+ ## Evaluation
312
+
313
+ ### Metrics
314
+
315
+ #### Information Retrieval
316
+
317
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
318
+
319
+ | Metric | Value |
320
+ |:--------------------|:----------|
321
+ | cosine_accuracy@1 | 0.88 |
322
+ | cosine_accuracy@3 | 0.9867 |
323
+ | cosine_accuracy@5 | 0.9867 |
324
+ | cosine_accuracy@10 | 1.0 |
325
+ | cosine_precision@1 | 0.88 |
326
+ | cosine_precision@3 | 0.3289 |
327
+ | cosine_precision@5 | 0.1973 |
328
+ | cosine_precision@10 | 0.1 |
329
+ | cosine_recall@1 | 0.88 |
330
+ | cosine_recall@3 | 0.9867 |
331
+ | cosine_recall@5 | 0.9867 |
332
+ | cosine_recall@10 | 1.0 |
333
+ | cosine_ndcg@10 | 0.95 |
334
+ | cosine_mrr@10 | 0.933 |
335
+ | **cosine_map@100** | **0.933** |
336
+ | dot_accuracy@1 | 0.88 |
337
+ | dot_accuracy@3 | 0.9867 |
338
+ | dot_accuracy@5 | 0.9867 |
339
+ | dot_accuracy@10 | 1.0 |
340
+ | dot_precision@1 | 0.88 |
341
+ | dot_precision@3 | 0.3289 |
342
+ | dot_precision@5 | 0.1973 |
343
+ | dot_precision@10 | 0.1 |
344
+ | dot_recall@1 | 0.88 |
345
+ | dot_recall@3 | 0.9867 |
346
+ | dot_recall@5 | 0.9867 |
347
+ | dot_recall@10 | 1.0 |
348
+ | dot_ndcg@10 | 0.95 |
349
+ | dot_mrr@10 | 0.933 |
350
+ | dot_map@100 | 0.933 |
351
+
352
+ <!--
353
+ ## Bias, Risks and Limitations
354
+
355
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
356
+ -->
357
+
358
+ <!--
359
+ ### Recommendations
360
+
361
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
362
+ -->
363
+
364
+ ## Training Details
365
+
366
+ ### Training Dataset
367
+
368
+ #### json
369
+
370
+ * Dataset: json
371
+ * Size: 500 training samples
372
+ * Columns: <code>anchor</code> and <code>positive</code>
373
+ * Approximate statistics based on the first 500 samples:
374
+ | | anchor | positive |
375
+ |:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
376
+ | type | string | string |
377
+ | details | <ul><li>min: 12 tokens</li><li>mean: 21.76 tokens</li><li>max: 37 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 78.92 tokens</li><li>max: 104 tokens</li></ul> |
378
+ * Samples:
379
+ | anchor | positive |
380
+ |:--------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
381
+ | <code>What is the primary purpose of the AI Bill of Rights outlined in the October 2022 blueprint?</code> | <code>BLUEPRINT FOR AN <br>AI BILL OF <br>RIGHTS <br>MAKING AUTOMATED <br>SYSTEMS WORK FOR <br>THE AMERICAN PEOPLE <br>OCTOBER 2022</code> |
382
+ | <code>What was the purpose of the Blueprint for an AI Bill of Rights published by the White House Office of Science and Technology Policy?</code> | <code>About this Document <br>The Blueprint for an AI Bill of Rights: Making Automated Systems Work for the American People was <br>published by the White House Office of Science and Technology Policy in October 2022. This framework was <br>released one year after OSTP announced the launch of a process to develop “a bill of rights for an AI-powered</code> |
383
+ | <code>What initiative did the OSTP announce a year prior to the release of the framework for a bill of rights for an AI-powered world?</code> | <code>released one year after OSTP announced the launch of a process to develop “a bill of rights for an AI-powered <br>world.” Its release follows a year of public engagement to inform this initiative. The framework is available <br>online at: https://www.whitehouse.gov/ostp/ai-bill-of-rights <br>About the Office of Science and Technology Policy <br>The Office of Science and Technology Policy (OSTP) was established by the National Science and Technology</code> |
384
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
385
+ ```json
386
+ {
387
+ "loss": "MultipleNegativesRankingLoss",
388
+ "matryoshka_dims": [
389
+ 1024,
390
+ 512,
391
+ 256,
392
+ 128,
393
+ 64
394
+ ],
395
+ "matryoshka_weights": [
396
+ 1,
397
+ 1,
398
+ 1,
399
+ 1,
400
+ 1
401
+ ],
402
+ "n_dims_per_step": -1
403
+ }
404
+ ```
405
+
406
+ ### Training Hyperparameters
407
+ #### Non-Default Hyperparameters
408
+
409
+ - `eval_strategy`: epoch
410
+ - `per_device_train_batch_size`: 32
411
+ - `per_device_eval_batch_size`: 16
412
+ - `gradient_accumulation_steps`: 16
413
+ - `learning_rate`: 2e-05
414
+ - `num_train_epochs`: 5
415
+ - `lr_scheduler_type`: cosine
416
+ - `warmup_ratio`: 0.1
417
+ - `bf16`: True
418
+ - `tf32`: True
419
+ - `load_best_model_at_end`: True
420
+ - `optim`: adamw_torch_fused
421
+ - `batch_sampler`: no_duplicates
422
+
423
+ #### All Hyperparameters
424
+ <details><summary>Click to expand</summary>
425
+
426
+ - `overwrite_output_dir`: False
427
+ - `do_predict`: False
428
+ - `eval_strategy`: epoch
429
+ - `prediction_loss_only`: True
430
+ - `per_device_train_batch_size`: 32
431
+ - `per_device_eval_batch_size`: 16
432
+ - `per_gpu_train_batch_size`: None
433
+ - `per_gpu_eval_batch_size`: None
434
+ - `gradient_accumulation_steps`: 16
435
+ - `eval_accumulation_steps`: None
436
+ - `torch_empty_cache_steps`: None
437
+ - `learning_rate`: 2e-05
438
+ - `weight_decay`: 0.0
439
+ - `adam_beta1`: 0.9
440
+ - `adam_beta2`: 0.999
441
+ - `adam_epsilon`: 1e-08
442
+ - `max_grad_norm`: 1.0
443
+ - `num_train_epochs`: 5
444
+ - `max_steps`: -1
445
+ - `lr_scheduler_type`: cosine
446
+ - `lr_scheduler_kwargs`: {}
447
+ - `warmup_ratio`: 0.1
448
+ - `warmup_steps`: 0
449
+ - `log_level`: passive
450
+ - `log_level_replica`: warning
451
+ - `log_on_each_node`: True
452
+ - `logging_nan_inf_filter`: True
453
+ - `save_safetensors`: True
454
+ - `save_on_each_node`: False
455
+ - `save_only_model`: False
456
+ - `restore_callback_states_from_checkpoint`: False
457
+ - `no_cuda`: False
458
+ - `use_cpu`: False
459
+ - `use_mps_device`: False
460
+ - `seed`: 42
461
+ - `data_seed`: None
462
+ - `jit_mode_eval`: False
463
+ - `use_ipex`: False
464
+ - `bf16`: True
465
+ - `fp16`: False
466
+ - `fp16_opt_level`: O1
467
+ - `half_precision_backend`: auto
468
+ - `bf16_full_eval`: False
469
+ - `fp16_full_eval`: False
470
+ - `tf32`: True
471
+ - `local_rank`: 0
472
+ - `ddp_backend`: None
473
+ - `tpu_num_cores`: None
474
+ - `tpu_metrics_debug`: False
475
+ - `debug`: []
476
+ - `dataloader_drop_last`: False
477
+ - `dataloader_num_workers`: 0
478
+ - `dataloader_prefetch_factor`: None
479
+ - `past_index`: -1
480
+ - `disable_tqdm`: False
481
+ - `remove_unused_columns`: True
482
+ - `label_names`: None
483
+ - `load_best_model_at_end`: True
484
+ - `ignore_data_skip`: False
485
+ - `fsdp`: []
486
+ - `fsdp_min_num_params`: 0
487
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
488
+ - `fsdp_transformer_layer_cls_to_wrap`: None
489
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
490
+ - `deepspeed`: None
491
+ - `label_smoothing_factor`: 0.0
492
+ - `optim`: adamw_torch_fused
493
+ - `optim_args`: None
494
+ - `adafactor`: False
495
+ - `group_by_length`: False
496
+ - `length_column_name`: length
497
+ - `ddp_find_unused_parameters`: None
498
+ - `ddp_bucket_cap_mb`: None
499
+ - `ddp_broadcast_buffers`: False
500
+ - `dataloader_pin_memory`: True
501
+ - `dataloader_persistent_workers`: False
502
+ - `skip_memory_metrics`: True
503
+ - `use_legacy_prediction_loop`: False
504
+ - `push_to_hub`: False
505
+ - `resume_from_checkpoint`: None
506
+ - `hub_model_id`: None
507
+ - `hub_strategy`: every_save
508
+ - `hub_private_repo`: False
509
+ - `hub_always_push`: False
510
+ - `gradient_checkpointing`: False
511
+ - `gradient_checkpointing_kwargs`: None
512
+ - `include_inputs_for_metrics`: False
513
+ - `eval_do_concat_batches`: True
514
+ - `fp16_backend`: auto
515
+ - `push_to_hub_model_id`: None
516
+ - `push_to_hub_organization`: None
517
+ - `mp_parameters`:
518
+ - `auto_find_batch_size`: False
519
+ - `full_determinism`: False
520
+ - `torchdynamo`: None
521
+ - `ray_scope`: last
522
+ - `ddp_timeout`: 1800
523
+ - `torch_compile`: False
524
+ - `torch_compile_backend`: None
525
+ - `torch_compile_mode`: None
526
+ - `dispatch_batches`: None
527
+ - `split_batches`: None
528
+ - `include_tokens_per_second`: False
529
+ - `include_num_input_tokens_seen`: False
530
+ - `neftune_noise_alpha`: None
531
+ - `optim_target_modules`: None
532
+ - `batch_eval_metrics`: False
533
+ - `eval_on_start`: False
534
+ - `eval_use_gather_object`: False
535
+ - `batch_sampler`: no_duplicates
536
+ - `multi_dataset_batch_sampler`: proportional
537
+
538
+ </details>
539
+
540
+ ### Training Logs
541
+ | Epoch | Step | cosine_map@100 |
542
+ |:-----:|:----:|:--------------:|
543
+ | 1.0 | 1 | 0.9022 |
544
+ | 2.0 | 2 | 0.9311 |
545
+ | 3.0 | 3 | 0.9397 |
546
+ | 4.0 | 4 | 0.9330 |
547
+ | 5.0 | 5 | 0.9330 |
548
+
549
+
550
+ ### Framework Versions
551
+ - Python: 3.10.12
552
+ - Sentence Transformers: 3.1.1
553
+ - Transformers: 4.44.2
554
+ - PyTorch: 2.4.1+cu121
555
+ - Accelerate: 0.34.2
556
+ - Datasets: 3.0.1
557
+ - Tokenizers: 0.19.1
558
+
559
+ ## Citation
560
+
561
+ ### BibTeX
562
+
563
+ #### Sentence Transformers
564
+ ```bibtex
565
+ @inproceedings{reimers-2019-sentence-bert,
566
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
567
+ author = "Reimers, Nils and Gurevych, Iryna",
568
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
569
+ month = "11",
570
+ year = "2019",
571
+ publisher = "Association for Computational Linguistics",
572
+ url = "https://arxiv.org/abs/1908.10084",
573
+ }
574
+ ```
575
+
576
+ #### MatryoshkaLoss
577
+ ```bibtex
578
+ @misc{kusupati2024matryoshka,
579
+ title={Matryoshka Representation Learning},
580
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
581
+ year={2024},
582
+ eprint={2205.13147},
583
+ archivePrefix={arXiv},
584
+ primaryClass={cs.LG}
585
+ }
586
+ ```
587
+
588
+ #### MultipleNegativesRankingLoss
589
+ ```bibtex
590
+ @misc{henderson2017efficient,
591
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
592
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
593
+ year={2017},
594
+ eprint={1705.00652},
595
+ archivePrefix={arXiv},
596
+ primaryClass={cs.CL}
597
+ }
598
+ ```
599
+
600
+ <!--
601
+ ## Glossary
602
+
603
+ *Clearly define terms in order to be accessible across audiences.*
604
+ -->
605
+
606
+ <!--
607
+ ## Model Card Authors
608
+
609
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
610
+ -->
611
+
612
+ <!--
613
+ ## Model Card Contact
614
+
615
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
616
+ -->
checkpoint-5/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
3
+ "architectures": [
4
+ "NewModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
9
+ "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
10
+ "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
11
+ "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
12
+ "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
13
+ "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
14
+ "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
15
+ },
16
+ "classifier_dropout": null,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.1,
19
+ "hidden_size": 1024,
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 4096,
22
+ "layer_norm_eps": 1e-12,
23
+ "layer_norm_type": "layer_norm",
24
+ "logn_attention_clip1": false,
25
+ "logn_attention_scale": false,
26
+ "max_position_embeddings": 8192,
27
+ "model_type": "new",
28
+ "num_attention_heads": 16,
29
+ "num_hidden_layers": 24,
30
+ "pack_qkv": true,
31
+ "pad_token_id": 0,
32
+ "position_embedding_type": "rope",
33
+ "rope_scaling": {
34
+ "factor": 2.0,
35
+ "type": "ntk"
36
+ },
37
+ "rope_theta": 160000,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.44.2",
40
+ "type_vocab_size": 2,
41
+ "unpad_inputs": false,
42
+ "use_memory_efficient_attention": false,
43
+ "vocab_size": 30528
44
+ }
checkpoint-5/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.1.1",
4
+ "transformers": "4.44.2",
5
+ "pytorch": "2.4.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
checkpoint-5/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb02a72dc231a1db56c744e2b987060525d728dcfb752ffc16a4ea124fdac2d5
3
+ size 1736585680
checkpoint-5/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-5/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:073d927e4832ef320c239d027c7271d72510b7722825935f561543d56ee70588
3
+ size 3473340858
checkpoint-5/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:054cf5787be45bbbe198069e1e23b3bff298fe9e6bc56a1d370572808efb6c24
3
+ size 14244
checkpoint-5/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81304ae4bba0f71e9b1ff0ac488bc44c76139fad1c1ef9ec0ef69ecacbbbcc22
3
+ size 1064
checkpoint-5/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
checkpoint-5/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-5/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-5/tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "max_length": 8000,
49
+ "model_max_length": 8192,
50
+ "pad_to_multiple_of": null,
51
+ "pad_token": "[PAD]",
52
+ "pad_token_type_id": 0,
53
+ "padding_side": "right",
54
+ "sep_token": "[SEP]",
55
+ "stride": 0,
56
+ "strip_accents": null,
57
+ "tokenize_chinese_chars": true,
58
+ "tokenizer_class": "BertTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
+ "unk_token": "[UNK]"
62
+ }
checkpoint-5/trainer_state.json ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8933333333333333,
3
+ "best_model_checkpoint": "policy_gte_large_5/checkpoint-3",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 5,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_cosine_accuracy@1": 0.8266666666666667,
14
+ "eval_cosine_accuracy@10": 1.0,
15
+ "eval_cosine_accuracy@3": 0.9866666666666667,
16
+ "eval_cosine_accuracy@5": 0.9866666666666667,
17
+ "eval_cosine_map@100": 0.9022222222222223,
18
+ "eval_cosine_mrr@10": 0.9022222222222223,
19
+ "eval_cosine_ndcg@10": 0.9271276662566819,
20
+ "eval_cosine_precision@1": 0.8266666666666667,
21
+ "eval_cosine_precision@10": 0.09999999999999998,
22
+ "eval_cosine_precision@3": 0.3288888888888888,
23
+ "eval_cosine_precision@5": 0.1973333333333333,
24
+ "eval_cosine_recall@1": 0.8266666666666667,
25
+ "eval_cosine_recall@10": 1.0,
26
+ "eval_cosine_recall@3": 0.9866666666666667,
27
+ "eval_cosine_recall@5": 0.9866666666666667,
28
+ "eval_dot_accuracy@1": 0.8266666666666667,
29
+ "eval_dot_accuracy@10": 1.0,
30
+ "eval_dot_accuracy@3": 0.9866666666666667,
31
+ "eval_dot_accuracy@5": 0.9866666666666667,
32
+ "eval_dot_map@100": 0.9019047619047619,
33
+ "eval_dot_mrr@10": 0.901904761904762,
34
+ "eval_dot_ndcg@10": 0.926822681539686,
35
+ "eval_dot_precision@1": 0.8266666666666667,
36
+ "eval_dot_precision@10": 0.09999999999999998,
37
+ "eval_dot_precision@3": 0.3288888888888888,
38
+ "eval_dot_precision@5": 0.1973333333333333,
39
+ "eval_dot_recall@1": 0.8266666666666667,
40
+ "eval_dot_recall@10": 1.0,
41
+ "eval_dot_recall@3": 0.9866666666666667,
42
+ "eval_dot_recall@5": 0.9866666666666667,
43
+ "eval_runtime": 0.2111,
44
+ "eval_samples_per_second": 0.0,
45
+ "eval_steps_per_second": 0.0,
46
+ "step": 1
47
+ },
48
+ {
49
+ "epoch": 2.0,
50
+ "eval_cosine_accuracy@1": 0.88,
51
+ "eval_cosine_accuracy@10": 1.0,
52
+ "eval_cosine_accuracy@3": 0.9866666666666667,
53
+ "eval_cosine_accuracy@5": 0.9866666666666667,
54
+ "eval_cosine_map@100": 0.931111111111111,
55
+ "eval_cosine_mrr@10": 0.9311111111111112,
56
+ "eval_cosine_ndcg@10": 0.9485571427804901,
57
+ "eval_cosine_precision@1": 0.88,
58
+ "eval_cosine_precision@10": 0.09999999999999998,
59
+ "eval_cosine_precision@3": 0.3288888888888888,
60
+ "eval_cosine_precision@5": 0.1973333333333333,
61
+ "eval_cosine_recall@1": 0.88,
62
+ "eval_cosine_recall@10": 1.0,
63
+ "eval_cosine_recall@3": 0.9866666666666667,
64
+ "eval_cosine_recall@5": 0.9866666666666667,
65
+ "eval_dot_accuracy@1": 0.88,
66
+ "eval_dot_accuracy@10": 1.0,
67
+ "eval_dot_accuracy@3": 0.9866666666666667,
68
+ "eval_dot_accuracy@5": 0.9866666666666667,
69
+ "eval_dot_map@100": 0.931111111111111,
70
+ "eval_dot_mrr@10": 0.9311111111111112,
71
+ "eval_dot_ndcg@10": 0.9485571427804901,
72
+ "eval_dot_precision@1": 0.88,
73
+ "eval_dot_precision@10": 0.09999999999999998,
74
+ "eval_dot_precision@3": 0.3288888888888888,
75
+ "eval_dot_precision@5": 0.1973333333333333,
76
+ "eval_dot_recall@1": 0.88,
77
+ "eval_dot_recall@10": 1.0,
78
+ "eval_dot_recall@3": 0.9866666666666667,
79
+ "eval_dot_recall@5": 0.9866666666666667,
80
+ "eval_runtime": 0.2179,
81
+ "eval_samples_per_second": 0.0,
82
+ "eval_steps_per_second": 0.0,
83
+ "step": 2
84
+ },
85
+ {
86
+ "epoch": 3.0,
87
+ "eval_cosine_accuracy@1": 0.8933333333333333,
88
+ "eval_cosine_accuracy@10": 1.0,
89
+ "eval_cosine_accuracy@3": 0.9866666666666667,
90
+ "eval_cosine_accuracy@5": 0.9866666666666667,
91
+ "eval_cosine_map@100": 0.9396825396825398,
92
+ "eval_cosine_mrr@10": 0.9396825396825398,
93
+ "eval_cosine_ndcg@10": 0.954918824730161,
94
+ "eval_cosine_precision@1": 0.8933333333333333,
95
+ "eval_cosine_precision@10": 0.09999999999999998,
96
+ "eval_cosine_precision@3": 0.3288888888888888,
97
+ "eval_cosine_precision@5": 0.1973333333333333,
98
+ "eval_cosine_recall@1": 0.8933333333333333,
99
+ "eval_cosine_recall@10": 1.0,
100
+ "eval_cosine_recall@3": 0.9866666666666667,
101
+ "eval_cosine_recall@5": 0.9866666666666667,
102
+ "eval_dot_accuracy@1": 0.8933333333333333,
103
+ "eval_dot_accuracy@10": 1.0,
104
+ "eval_dot_accuracy@3": 0.9866666666666667,
105
+ "eval_dot_accuracy@5": 0.9866666666666667,
106
+ "eval_dot_map@100": 0.9396825396825398,
107
+ "eval_dot_mrr@10": 0.9396825396825398,
108
+ "eval_dot_ndcg@10": 0.954918824730161,
109
+ "eval_dot_precision@1": 0.8933333333333333,
110
+ "eval_dot_precision@10": 0.09999999999999998,
111
+ "eval_dot_precision@3": 0.3288888888888888,
112
+ "eval_dot_precision@5": 0.1973333333333333,
113
+ "eval_dot_recall@1": 0.8933333333333333,
114
+ "eval_dot_recall@10": 1.0,
115
+ "eval_dot_recall@3": 0.9866666666666667,
116
+ "eval_dot_recall@5": 0.9866666666666667,
117
+ "eval_runtime": 0.2139,
118
+ "eval_samples_per_second": 0.0,
119
+ "eval_steps_per_second": 0.0,
120
+ "step": 3
121
+ },
122
+ {
123
+ "epoch": 4.0,
124
+ "eval_cosine_accuracy@1": 0.88,
125
+ "eval_cosine_accuracy@10": 1.0,
126
+ "eval_cosine_accuracy@3": 0.9866666666666667,
127
+ "eval_cosine_accuracy@5": 0.9866666666666667,
128
+ "eval_cosine_map@100": 0.9330158730158731,
129
+ "eval_cosine_mrr@10": 0.9330158730158731,
130
+ "eval_cosine_ndcg@10": 0.9499978881111136,
131
+ "eval_cosine_precision@1": 0.88,
132
+ "eval_cosine_precision@10": 0.09999999999999998,
133
+ "eval_cosine_precision@3": 0.3288888888888888,
134
+ "eval_cosine_precision@5": 0.1973333333333333,
135
+ "eval_cosine_recall@1": 0.88,
136
+ "eval_cosine_recall@10": 1.0,
137
+ "eval_cosine_recall@3": 0.9866666666666667,
138
+ "eval_cosine_recall@5": 0.9866666666666667,
139
+ "eval_dot_accuracy@1": 0.8933333333333333,
140
+ "eval_dot_accuracy@10": 1.0,
141
+ "eval_dot_accuracy@3": 0.9866666666666667,
142
+ "eval_dot_accuracy@5": 0.9866666666666667,
143
+ "eval_dot_map@100": 0.9394444444444444,
144
+ "eval_dot_mrr@10": 0.9394444444444445,
145
+ "eval_dot_ndcg@10": 0.9546805786428596,
146
+ "eval_dot_precision@1": 0.8933333333333333,
147
+ "eval_dot_precision@10": 0.09999999999999998,
148
+ "eval_dot_precision@3": 0.3288888888888888,
149
+ "eval_dot_precision@5": 0.1973333333333333,
150
+ "eval_dot_recall@1": 0.8933333333333333,
151
+ "eval_dot_recall@10": 1.0,
152
+ "eval_dot_recall@3": 0.9866666666666667,
153
+ "eval_dot_recall@5": 0.9866666666666667,
154
+ "eval_runtime": 0.2064,
155
+ "eval_samples_per_second": 0.0,
156
+ "eval_steps_per_second": 0.0,
157
+ "step": 4
158
+ },
159
+ {
160
+ "epoch": 5.0,
161
+ "eval_cosine_accuracy@1": 0.88,
162
+ "eval_cosine_accuracy@10": 1.0,
163
+ "eval_cosine_accuracy@3": 0.9866666666666667,
164
+ "eval_cosine_accuracy@5": 0.9866666666666667,
165
+ "eval_cosine_map@100": 0.9330158730158731,
166
+ "eval_cosine_mrr@10": 0.9330158730158731,
167
+ "eval_cosine_ndcg@10": 0.9499978881111136,
168
+ "eval_cosine_precision@1": 0.88,
169
+ "eval_cosine_precision@10": 0.09999999999999998,
170
+ "eval_cosine_precision@3": 0.3288888888888888,
171
+ "eval_cosine_precision@5": 0.1973333333333333,
172
+ "eval_cosine_recall@1": 0.88,
173
+ "eval_cosine_recall@10": 1.0,
174
+ "eval_cosine_recall@3": 0.9866666666666667,
175
+ "eval_cosine_recall@5": 0.9866666666666667,
176
+ "eval_dot_accuracy@1": 0.88,
177
+ "eval_dot_accuracy@10": 1.0,
178
+ "eval_dot_accuracy@3": 0.9866666666666667,
179
+ "eval_dot_accuracy@5": 0.9866666666666667,
180
+ "eval_dot_map@100": 0.9330158730158731,
181
+ "eval_dot_mrr@10": 0.9330158730158731,
182
+ "eval_dot_ndcg@10": 0.9499978881111136,
183
+ "eval_dot_precision@1": 0.88,
184
+ "eval_dot_precision@10": 0.09999999999999998,
185
+ "eval_dot_precision@3": 0.3288888888888888,
186
+ "eval_dot_precision@5": 0.1973333333333333,
187
+ "eval_dot_recall@1": 0.88,
188
+ "eval_dot_recall@10": 1.0,
189
+ "eval_dot_recall@3": 0.9866666666666667,
190
+ "eval_dot_recall@5": 0.9866666666666667,
191
+ "eval_runtime": 0.2216,
192
+ "eval_samples_per_second": 0.0,
193
+ "eval_steps_per_second": 0.0,
194
+ "step": 5
195
+ }
196
+ ],
197
+ "logging_steps": 10,
198
+ "max_steps": 5,
199
+ "num_input_tokens_seen": 0,
200
+ "num_train_epochs": 5,
201
+ "save_steps": 500,
202
+ "stateful_callbacks": {
203
+ "TrainerControl": {
204
+ "args": {
205
+ "should_epoch_stop": false,
206
+ "should_evaluate": false,
207
+ "should_log": false,
208
+ "should_save": true,
209
+ "should_training_stop": true
210
+ },
211
+ "attributes": {}
212
+ }
213
+ },
214
+ "total_flos": 0.0,
215
+ "train_batch_size": 32,
216
+ "trial_name": null,
217
+ "trial_params": null
218
+ }
checkpoint-5/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:251926714f5864c84dcca92a9a3cdc8bce3b6ade325cb435c1034de51951bf55
3
+ size 5496
checkpoint-5/vocab.txt ADDED
The diff for this file is too large to render. See raw diff