aari1995 commited on
Commit
16943d0
·
verified ·
1 Parent(s): 673edcf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +37 -978
README.md CHANGED
@@ -1,24 +1,13 @@
1
  ---
2
  language:
3
  - de
4
- - en
5
- - es
6
- - fr
7
- - it
8
- - nl
9
- - pl
10
- - pt
11
- - ru
12
- - zh
13
  library_name: sentence-transformers
14
  tags:
15
  - sentence-transformers
16
  - sentence-similarity
17
  - feature-extraction
18
- - dataset_size:10K<n<100K
19
  - loss:MatryoshkaLoss
20
- - loss:CosineSimilarityLoss
21
- base_model: aari1995/gbert-large-2-cls-nlisim
22
  metrics:
23
  - pearson_cosine
24
  - spearman_cosine
@@ -57,478 +46,48 @@ widget:
57
  - Die Frau prüft die Augen des Mannes.
58
  - Ein Mann ist auf einem Dach
59
  pipeline_tag: sentence-similarity
60
- model-index:
61
- - name: SentenceTransformer based on aari1995/gbert-large-2-cls-nlisim
62
- results:
63
- - task:
64
- type: semantic-similarity
65
- name: Semantic Similarity
66
- dataset:
67
- name: sts dev 1024
68
- type: sts-dev-1024
69
- metrics:
70
- - type: pearson_cosine
71
- value: 0.8417806877288009
72
- name: Pearson Cosine
73
- - type: spearman_cosine
74
- value: 0.8452891310343582
75
- name: Spearman Cosine
76
- - type: pearson_manhattan
77
- value: 0.8418749526406495
78
- name: Pearson Manhattan
79
- - type: spearman_manhattan
80
- value: 0.8450348906331776
81
- name: Spearman Manhattan
82
- - type: pearson_euclidean
83
- value: 0.8422615095001257
84
- name: Pearson Euclidean
85
- - type: spearman_euclidean
86
- value: 0.8453390990427703
87
- name: Spearman Euclidean
88
- - type: pearson_dot
89
- value: 0.8416625079549063
90
- name: Pearson Dot
91
- - type: spearman_dot
92
- value: 0.8450616171323844
93
- name: Spearman Dot
94
- - type: pearson_max
95
- value: 0.8422615095001257
96
- name: Pearson Max
97
- - type: spearman_max
98
- value: 0.8453390990427703
99
- name: Spearman Max
100
- - task:
101
- type: semantic-similarity
102
- name: Semantic Similarity
103
- dataset:
104
- name: sts dev 768
105
- type: sts-dev-768
106
- metrics:
107
- - type: pearson_cosine
108
- value: 0.8418107096367227
109
- name: Pearson Cosine
110
- - type: spearman_cosine
111
- value: 0.8453863409322975
112
- name: Spearman Cosine
113
- - type: pearson_manhattan
114
- value: 0.8418527770289471
115
- name: Pearson Manhattan
116
- - type: spearman_manhattan
117
- value: 0.8448328869253576
118
- name: Spearman Manhattan
119
- - type: pearson_euclidean
120
- value: 0.8422791953749277
121
- name: Pearson Euclidean
122
- - type: spearman_euclidean
123
- value: 0.8451547857394669
124
- name: Spearman Euclidean
125
- - type: pearson_dot
126
- value: 0.8417682812591724
127
- name: Pearson Dot
128
- - type: spearman_dot
129
- value: 0.8446927200809794
130
- name: Spearman Dot
131
- - type: pearson_max
132
- value: 0.8422791953749277
133
- name: Pearson Max
134
- - type: spearman_max
135
- value: 0.8453863409322975
136
- name: Spearman Max
137
- - task:
138
- type: semantic-similarity
139
- name: Semantic Similarity
140
- dataset:
141
- name: sts dev 512
142
- type: sts-dev-512
143
- metrics:
144
- - type: pearson_cosine
145
- value: 0.8394808864309438
146
- name: Pearson Cosine
147
- - type: spearman_cosine
148
- value: 0.8437551103291275
149
- name: Spearman Cosine
150
- - type: pearson_manhattan
151
- value: 0.8420246416513741
152
- name: Pearson Manhattan
153
- - type: spearman_manhattan
154
- value: 0.8447335398769396
155
- name: Spearman Manhattan
156
- - type: pearson_euclidean
157
- value: 0.8422722079216611
158
- name: Pearson Euclidean
159
- - type: spearman_euclidean
160
- value: 0.8448909261141044
161
- name: Spearman Euclidean
162
- - type: pearson_dot
163
- value: 0.8358204287638725
164
- name: Pearson Dot
165
- - type: spearman_dot
166
- value: 0.8380004733308642
167
- name: Spearman Dot
168
- - type: pearson_max
169
- value: 0.8422722079216611
170
- name: Pearson Max
171
- - type: spearman_max
172
- value: 0.8448909261141044
173
- name: Spearman Max
174
- - task:
175
- type: semantic-similarity
176
- name: Semantic Similarity
177
- dataset:
178
- name: sts dev 256
179
- type: sts-dev-256
180
- metrics:
181
- - type: pearson_cosine
182
- value: 0.833879413726309
183
- name: Pearson Cosine
184
- - type: spearman_cosine
185
- value: 0.8392439788855341
186
- name: Spearman Cosine
187
- - type: pearson_manhattan
188
- value: 0.8379618268497928
189
- name: Pearson Manhattan
190
- - type: spearman_manhattan
191
- value: 0.839860826315925
192
- name: Spearman Manhattan
193
- - type: pearson_euclidean
194
- value: 0.838931461279174
195
- name: Pearson Euclidean
196
- - type: spearman_euclidean
197
- value: 0.8404811150299943
198
- name: Spearman Euclidean
199
- - type: pearson_dot
200
- value: 0.8230557648139373
201
- name: Pearson Dot
202
- - type: spearman_dot
203
- value: 0.8242532718299653
204
- name: Spearman Dot
205
- - type: pearson_max
206
- value: 0.838931461279174
207
- name: Pearson Max
208
- - type: spearman_max
209
- value: 0.8404811150299943
210
- name: Spearman Max
211
- - task:
212
- type: semantic-similarity
213
- name: Semantic Similarity
214
- dataset:
215
- name: sts dev 128
216
- type: sts-dev-128
217
- metrics:
218
- - type: pearson_cosine
219
- value: 0.8253967606033702
220
- name: Pearson Cosine
221
- - type: spearman_cosine
222
- value: 0.8335750690073012
223
- name: Spearman Cosine
224
- - type: pearson_manhattan
225
- value: 0.8341588626988476
226
- name: Pearson Manhattan
227
- - type: spearman_manhattan
228
- value: 0.8343994326050966
229
- name: Spearman Manhattan
230
- - type: pearson_euclidean
231
- value: 0.8355263623880292
232
- name: Pearson Euclidean
233
- - type: spearman_euclidean
234
- value: 0.8358857095028451
235
- name: Spearman Euclidean
236
- - type: pearson_dot
237
- value: 0.8035163216908426
238
- name: Pearson Dot
239
- - type: spearman_dot
240
- value: 0.8050271037746011
241
- name: Spearman Dot
242
- - type: pearson_max
243
- value: 0.8355263623880292
244
- name: Pearson Max
245
- - type: spearman_max
246
- value: 0.8358857095028451
247
- name: Spearman Max
248
- - task:
249
- type: semantic-similarity
250
- name: Semantic Similarity
251
- dataset:
252
- name: sts dev 64
253
- type: sts-dev-64
254
- metrics:
255
- - type: pearson_cosine
256
- value: 0.8150661334039712
257
- name: Pearson Cosine
258
- - type: spearman_cosine
259
- value: 0.8265558538619309
260
- name: Spearman Cosine
261
- - type: pearson_manhattan
262
- value: 0.8241988539394505
263
- name: Pearson Manhattan
264
- - type: spearman_manhattan
265
- value: 0.8238763145175863
266
- name: Spearman Manhattan
267
- - type: pearson_euclidean
268
- value: 0.8274925218859535
269
- name: Pearson Euclidean
270
- - type: spearman_euclidean
271
- value: 0.8270778062044848
272
- name: Spearman Euclidean
273
- - type: pearson_dot
274
- value: 0.7773847317840161
275
- name: Pearson Dot
276
- - type: spearman_dot
277
- value: 0.7790338242936304
278
- name: Spearman Dot
279
- - type: pearson_max
280
- value: 0.8274925218859535
281
- name: Pearson Max
282
- - type: spearman_max
283
- value: 0.8270778062044848
284
- name: Spearman Max
285
- - task:
286
- type: semantic-similarity
287
- name: Semantic Similarity
288
- dataset:
289
- name: sts test 1024
290
- type: sts-test-1024
291
- metrics:
292
- - type: pearson_cosine
293
- value: 0.8130772714952826
294
- name: Pearson Cosine
295
- - type: spearman_cosine
296
- value: 0.8188901246173036
297
- name: Spearman Cosine
298
- - type: pearson_manhattan
299
- value: 0.8208715312691268
300
- name: Pearson Manhattan
301
- - type: spearman_manhattan
302
- value: 0.8195095089412118
303
- name: Spearman Manhattan
304
- - type: pearson_euclidean
305
- value: 0.820344720619671
306
- name: Pearson Euclidean
307
- - type: spearman_euclidean
308
- value: 0.8189263018901494
309
- name: Spearman Euclidean
310
- - type: pearson_dot
311
- value: 0.8127924456922464
312
- name: Pearson Dot
313
- - type: spearman_dot
314
- value: 0.8185815083131535
315
- name: Spearman Dot
316
- - type: pearson_max
317
- value: 0.8208715312691268
318
- name: Pearson Max
319
- - type: spearman_max
320
- value: 0.8195095089412118
321
- name: Spearman Max
322
- - task:
323
- type: semantic-similarity
324
- name: Semantic Similarity
325
- dataset:
326
- name: sts test 768
327
- type: sts-test-768
328
- metrics:
329
- - type: pearson_cosine
330
- value: 0.8121757739236393
331
- name: Pearson Cosine
332
- - type: spearman_cosine
333
- value: 0.8182913347635533
334
- name: Spearman Cosine
335
- - type: pearson_manhattan
336
- value: 0.820604714791802
337
- name: Pearson Manhattan
338
- - type: spearman_manhattan
339
- value: 0.8190481839997107
340
- name: Spearman Manhattan
341
- - type: pearson_euclidean
342
- value: 0.8197462057663948
343
- name: Pearson Euclidean
344
- - type: spearman_euclidean
345
- value: 0.8183157116237637
346
- name: Spearman Euclidean
347
- - type: pearson_dot
348
- value: 0.8106698462984598
349
- name: Pearson Dot
350
- - type: spearman_dot
351
- value: 0.8148932181769889
352
- name: Spearman Dot
353
- - type: pearson_max
354
- value: 0.820604714791802
355
- name: Pearson Max
356
- - type: spearman_max
357
- value: 0.8190481839997107
358
- name: Spearman Max
359
- - task:
360
- type: semantic-similarity
361
- name: Semantic Similarity
362
- dataset:
363
- name: sts test 512
364
- type: sts-test-512
365
- metrics:
366
- - type: pearson_cosine
367
- value: 0.8096452235754106
368
- name: Pearson Cosine
369
- - type: spearman_cosine
370
- value: 0.816264314810491
371
- name: Spearman Cosine
372
- - type: pearson_manhattan
373
- value: 0.8180021560255247
374
- name: Pearson Manhattan
375
- - type: spearman_manhattan
376
- value: 0.8165486306356095
377
- name: Spearman Manhattan
378
- - type: pearson_euclidean
379
- value: 0.8173829404008947
380
- name: Pearson Euclidean
381
- - type: spearman_euclidean
382
- value: 0.8158592878546184
383
- name: Spearman Euclidean
384
- - type: pearson_dot
385
- value: 0.8059176831913651
386
- name: Pearson Dot
387
- - type: spearman_dot
388
- value: 0.8088972406630007
389
- name: Spearman Dot
390
- - type: pearson_max
391
- value: 0.8180021560255247
392
- name: Pearson Max
393
- - type: spearman_max
394
- value: 0.8165486306356095
395
- name: Spearman Max
396
- - task:
397
- type: semantic-similarity
398
- name: Semantic Similarity
399
- dataset:
400
- name: sts test 256
401
- type: sts-test-256
402
- metrics:
403
- - type: pearson_cosine
404
- value: 0.8070921035712145
405
- name: Pearson Cosine
406
- - type: spearman_cosine
407
- value: 0.8150266310280979
408
- name: Spearman Cosine
409
- - type: pearson_manhattan
410
- value: 0.818409081545237
411
- name: Pearson Manhattan
412
- - type: spearman_manhattan
413
- value: 0.8167245415653657
414
- name: Spearman Manhattan
415
- - type: pearson_euclidean
416
- value: 0.8176811220335696
417
- name: Pearson Euclidean
418
- - type: spearman_euclidean
419
- value: 0.8158894222194816
420
- name: Spearman Euclidean
421
- - type: pearson_dot
422
- value: 0.795483328805793
423
- name: Pearson Dot
424
- - type: spearman_dot
425
- value: 0.7956062163122977
426
- name: Spearman Dot
427
- - type: pearson_max
428
- value: 0.818409081545237
429
- name: Pearson Max
430
- - type: spearman_max
431
- value: 0.8167245415653657
432
- name: Spearman Max
433
- - task:
434
- type: semantic-similarity
435
- name: Semantic Similarity
436
- dataset:
437
- name: sts test 128
438
- type: sts-test-128
439
- metrics:
440
- - type: pearson_cosine
441
- value: 0.7974039089035316
442
- name: Pearson Cosine
443
- - type: spearman_cosine
444
- value: 0.8093067652791092
445
- name: Spearman Cosine
446
- - type: pearson_manhattan
447
- value: 0.8125792968401813
448
- name: Pearson Manhattan
449
- - type: spearman_manhattan
450
- value: 0.8121486514324944
451
- name: Spearman Manhattan
452
- - type: pearson_euclidean
453
- value: 0.8119102513178551
454
- name: Pearson Euclidean
455
- - type: spearman_euclidean
456
- value: 0.811152531425261
457
- name: Spearman Euclidean
458
- - type: pearson_dot
459
- value: 0.7739555890021923
460
- name: Pearson Dot
461
- - type: spearman_dot
462
- value: 0.770072655568691
463
- name: Spearman Dot
464
- - type: pearson_max
465
- value: 0.8125792968401813
466
- name: Pearson Max
467
- - type: spearman_max
468
- value: 0.8121486514324944
469
- name: Spearman Max
470
- - task:
471
- type: semantic-similarity
472
- name: Semantic Similarity
473
- dataset:
474
- name: sts test 64
475
- type: sts-test-64
476
- metrics:
477
- - type: pearson_cosine
478
- value: 0.7873069617689994
479
- name: Pearson Cosine
480
- - type: spearman_cosine
481
- value: 0.8024994399645912
482
- name: Spearman Cosine
483
- - type: pearson_manhattan
484
- value: 0.8048161563115213
485
- name: Pearson Manhattan
486
- - type: spearman_manhattan
487
- value: 0.8031972835914969
488
- name: Spearman Manhattan
489
- - type: pearson_euclidean
490
- value: 0.8060416893207731
491
- name: Pearson Euclidean
492
- - type: spearman_euclidean
493
- value: 0.8041515980374414
494
- name: Spearman Euclidean
495
- - type: pearson_dot
496
- value: 0.747911221220991
497
- name: Pearson Dot
498
- - type: spearman_dot
499
- value: 0.7386011869481828
500
- name: Spearman Dot
501
- - type: pearson_max
502
- value: 0.8060416893207731
503
- name: Pearson Max
504
- - type: spearman_max
505
- value: 0.8041515980374414
506
- name: Spearman Max
507
  ---
508
 
509
- # SentenceTransformer based on aari1995/gbert-large-2-cls-nlisim
510
 
511
- POTENTIAL GERMAN_SEMANTIC_V3
512
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [aari1995/gbert-large-2-cls-nlisim](https://huggingface.co/aari1995/gbert-large-2-cls-nlisim) on the [PhilipMay/stsb_multi_mt](https://huggingface.co/datasets/PhilipMay/stsb_multi_mt) dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
513
 
514
- ## Model Details
515
 
516
- ### Model Description
517
- - **Model Type:** Sentence Transformer
518
- - **Base model:** [aari1995/gbert-large-2-cls-nlisim](https://huggingface.co/aari1995/gbert-large-2-cls-nlisim) <!-- at revision fb515aefe7a575165dcaa62db3f77a09642ebe64 -->
519
- - **Maximum Sequence Length:** 8192 tokens
520
- - **Output Dimensionality:** 1024 tokens
521
- - **Similarity Function:** Cosine Similarity
522
- - **Training Dataset:**
523
- - [PhilipMay/stsb_multi_mt](https://huggingface.co/datasets/PhilipMay/stsb_multi_mt)
524
- - **Languages:** de, en, es, fr, it, nl, pl, pt, ru, zh
525
- <!-- - **License:** Unknown -->
526
 
527
- ### Model Sources
528
 
529
- - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
530
- - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
531
- - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
 
533
  ### Full Model Architecture
534
 
@@ -597,507 +156,7 @@ You can finetune this model on your own dataset.
597
 
598
  ## Evaluation
599
 
600
- ### Metrics
601
-
602
- #### Semantic Similarity
603
- * Dataset: `sts-dev-1024`
604
- * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
605
-
606
- | Metric | Value |
607
- |:--------------------|:-----------|
608
- | pearson_cosine | 0.8418 |
609
- | **spearman_cosine** | **0.8453** |
610
- | pearson_manhattan | 0.8419 |
611
- | spearman_manhattan | 0.845 |
612
- | pearson_euclidean | 0.8423 |
613
- | spearman_euclidean | 0.8453 |
614
- | pearson_dot | 0.8417 |
615
- | spearman_dot | 0.8451 |
616
- | pearson_max | 0.8423 |
617
- | spearman_max | 0.8453 |
618
-
619
- #### Semantic Similarity
620
- * Dataset: `sts-dev-768`
621
- * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
622
-
623
- | Metric | Value |
624
- |:--------------------|:-----------|
625
- | pearson_cosine | 0.8418 |
626
- | **spearman_cosine** | **0.8454** |
627
- | pearson_manhattan | 0.8419 |
628
- | spearman_manhattan | 0.8448 |
629
- | pearson_euclidean | 0.8423 |
630
- | spearman_euclidean | 0.8452 |
631
- | pearson_dot | 0.8418 |
632
- | spearman_dot | 0.8447 |
633
- | pearson_max | 0.8423 |
634
- | spearman_max | 0.8454 |
635
-
636
- #### Semantic Similarity
637
- * Dataset: `sts-dev-512`
638
- * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
639
-
640
- | Metric | Value |
641
- |:--------------------|:-----------|
642
- | pearson_cosine | 0.8395 |
643
- | **spearman_cosine** | **0.8438** |
644
- | pearson_manhattan | 0.842 |
645
- | spearman_manhattan | 0.8447 |
646
- | pearson_euclidean | 0.8423 |
647
- | spearman_euclidean | 0.8449 |
648
- | pearson_dot | 0.8358 |
649
- | spearman_dot | 0.838 |
650
- | pearson_max | 0.8423 |
651
- | spearman_max | 0.8449 |
652
-
653
- #### Semantic Similarity
654
- * Dataset: `sts-dev-256`
655
- * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
656
-
657
- | Metric | Value |
658
- |:--------------------|:-----------|
659
- | pearson_cosine | 0.8339 |
660
- | **spearman_cosine** | **0.8392** |
661
- | pearson_manhattan | 0.838 |
662
- | spearman_manhattan | 0.8399 |
663
- | pearson_euclidean | 0.8389 |
664
- | spearman_euclidean | 0.8405 |
665
- | pearson_dot | 0.8231 |
666
- | spearman_dot | 0.8243 |
667
- | pearson_max | 0.8389 |
668
- | spearman_max | 0.8405 |
669
-
670
- #### Semantic Similarity
671
- * Dataset: `sts-dev-128`
672
- * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
673
-
674
- | Metric | Value |
675
- |:--------------------|:-----------|
676
- | pearson_cosine | 0.8254 |
677
- | **spearman_cosine** | **0.8336** |
678
- | pearson_manhattan | 0.8342 |
679
- | spearman_manhattan | 0.8344 |
680
- | pearson_euclidean | 0.8355 |
681
- | spearman_euclidean | 0.8359 |
682
- | pearson_dot | 0.8035 |
683
- | spearman_dot | 0.805 |
684
- | pearson_max | 0.8355 |
685
- | spearman_max | 0.8359 |
686
-
687
- #### Semantic Similarity
688
- * Dataset: `sts-dev-64`
689
- * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
690
-
691
- | Metric | Value |
692
- |:--------------------|:-----------|
693
- | pearson_cosine | 0.8151 |
694
- | **spearman_cosine** | **0.8266** |
695
- | pearson_manhattan | 0.8242 |
696
- | spearman_manhattan | 0.8239 |
697
- | pearson_euclidean | 0.8275 |
698
- | spearman_euclidean | 0.8271 |
699
- | pearson_dot | 0.7774 |
700
- | spearman_dot | 0.779 |
701
- | pearson_max | 0.8275 |
702
- | spearman_max | 0.8271 |
703
-
704
- #### Semantic Similarity
705
- * Dataset: `sts-test-1024`
706
- * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
707
-
708
- | Metric | Value |
709
- |:--------------------|:-----------|
710
- | pearson_cosine | 0.8131 |
711
- | **spearman_cosine** | **0.8189** |
712
- | pearson_manhattan | 0.8209 |
713
- | spearman_manhattan | 0.8195 |
714
- | pearson_euclidean | 0.8203 |
715
- | spearman_euclidean | 0.8189 |
716
- | pearson_dot | 0.8128 |
717
- | spearman_dot | 0.8186 |
718
- | pearson_max | 0.8209 |
719
- | spearman_max | 0.8195 |
720
-
721
- #### Semantic Similarity
722
- * Dataset: `sts-test-768`
723
- * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
724
-
725
- | Metric | Value |
726
- |:--------------------|:-----------|
727
- | pearson_cosine | 0.8122 |
728
- | **spearman_cosine** | **0.8183** |
729
- | pearson_manhattan | 0.8206 |
730
- | spearman_manhattan | 0.819 |
731
- | pearson_euclidean | 0.8197 |
732
- | spearman_euclidean | 0.8183 |
733
- | pearson_dot | 0.8107 |
734
- | spearman_dot | 0.8149 |
735
- | pearson_max | 0.8206 |
736
- | spearman_max | 0.819 |
737
-
738
- #### Semantic Similarity
739
- * Dataset: `sts-test-512`
740
- * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
741
-
742
- | Metric | Value |
743
- |:--------------------|:-----------|
744
- | pearson_cosine | 0.8096 |
745
- | **spearman_cosine** | **0.8163** |
746
- | pearson_manhattan | 0.818 |
747
- | spearman_manhattan | 0.8165 |
748
- | pearson_euclidean | 0.8174 |
749
- | spearman_euclidean | 0.8159 |
750
- | pearson_dot | 0.8059 |
751
- | spearman_dot | 0.8089 |
752
- | pearson_max | 0.818 |
753
- | spearman_max | 0.8165 |
754
-
755
- #### Semantic Similarity
756
- * Dataset: `sts-test-256`
757
- * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
758
-
759
- | Metric | Value |
760
- |:--------------------|:----------|
761
- | pearson_cosine | 0.8071 |
762
- | **spearman_cosine** | **0.815** |
763
- | pearson_manhattan | 0.8184 |
764
- | spearman_manhattan | 0.8167 |
765
- | pearson_euclidean | 0.8177 |
766
- | spearman_euclidean | 0.8159 |
767
- | pearson_dot | 0.7955 |
768
- | spearman_dot | 0.7956 |
769
- | pearson_max | 0.8184 |
770
- | spearman_max | 0.8167 |
771
-
772
- #### Semantic Similarity
773
- * Dataset: `sts-test-128`
774
- * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
775
-
776
- | Metric | Value |
777
- |:--------------------|:-----------|
778
- | pearson_cosine | 0.7974 |
779
- | **spearman_cosine** | **0.8093** |
780
- | pearson_manhattan | 0.8126 |
781
- | spearman_manhattan | 0.8121 |
782
- | pearson_euclidean | 0.8119 |
783
- | spearman_euclidean | 0.8112 |
784
- | pearson_dot | 0.774 |
785
- | spearman_dot | 0.7701 |
786
- | pearson_max | 0.8126 |
787
- | spearman_max | 0.8121 |
788
-
789
- #### Semantic Similarity
790
- * Dataset: `sts-test-64`
791
- * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
792
-
793
- | Metric | Value |
794
- |:--------------------|:-----------|
795
- | pearson_cosine | 0.7873 |
796
- | **spearman_cosine** | **0.8025** |
797
- | pearson_manhattan | 0.8048 |
798
- | spearman_manhattan | 0.8032 |
799
- | pearson_euclidean | 0.806 |
800
- | spearman_euclidean | 0.8042 |
801
- | pearson_dot | 0.7479 |
802
- | spearman_dot | 0.7386 |
803
- | pearson_max | 0.806 |
804
- | spearman_max | 0.8042 |
805
-
806
- <!--
807
- ## Bias, Risks and Limitations
808
-
809
- *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
810
- -->
811
-
812
- <!--
813
- ### Recommendations
814
-
815
- *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
816
- -->
817
-
818
- ## Training Details
819
-
820
- ### Training Dataset
821
-
822
- #### PhilipMay/stsb_multi_mt
823
-
824
- * Dataset: [PhilipMay/stsb_multi_mt](https://huggingface.co/datasets/PhilipMay/stsb_multi_mt) at [3acaa3d](https://huggingface.co/datasets/PhilipMay/stsb_multi_mt/tree/3acaa3dd8c91649e0b8e627ffad891f059e47c8c)
825
- * Size: 22,996 training samples
826
- * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
827
- * Approximate statistics based on the first 1000 samples:
828
- | | sentence1 | sentence2 | score |
829
- |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------|
830
- | type | string | string | float |
831
- | details | <ul><li>min: 6 tokens</li><li>mean: 18.13 tokens</li><li>max: 65 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 18.25 tokens</li><li>max: 90 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.54</li><li>max: 1.0</li></ul> |
832
- * Samples:
833
- | sentence1 | sentence2 | score |
834
- |:-------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:--------------------------------|
835
- | <code>schütze wegen mordes an schwarzem us-jugendlichen angeklagt</code> | <code>gedanken zu den rassenbeziehungen unter einem schwarzen präsidenten</code> | <code>0.1599999964237213</code> |
836
- | <code>fußballspieler kicken einen fußball in das tor.</code> | <code>Ein Fußballspieler schießt ein Tor.</code> | <code>0.7599999904632568</code> |
837
- | <code>obama lockert abschiebungsregeln für junge einwanderer</code> | <code>usa lockert abschiebebestimmungen für jugendliche: napolitano</code> | <code>0.800000011920929</code> |
838
- * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
839
- ```json
840
- {
841
- "loss": "CosineSimilarityLoss",
842
- "matryoshka_dims": [
843
- 1024,
844
- 768,
845
- 512,
846
- 256,
847
- 128,
848
- 64
849
- ],
850
- "matryoshka_weights": [
851
- 1,
852
- 1,
853
- 1,
854
- 1,
855
- 1,
856
- 1
857
- ],
858
- "n_dims_per_step": -1
859
- }
860
- ```
861
-
862
- ### Evaluation Dataset
863
-
864
- #### PhilipMay/stsb_multi_mt
865
-
866
- * Dataset: [PhilipMay/stsb_multi_mt](https://huggingface.co/datasets/PhilipMay/stsb_multi_mt) at [3acaa3d](https://huggingface.co/datasets/PhilipMay/stsb_multi_mt/tree/3acaa3dd8c91649e0b8e627ffad891f059e47c8c)
867
- * Size: 1,500 evaluation samples
868
- * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
869
- * Approximate statistics based on the first 1000 samples:
870
- | | sentence1 | sentence2 | score |
871
- |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------|
872
- | type | string | string | float |
873
- | details | <ul><li>min: 5 tokens</li><li>mean: 16.54 tokens</li><li>max: 53 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 16.53 tokens</li><li>max: 47 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.47</li><li>max: 1.0</li></ul> |
874
- * Samples:
875
- | sentence1 | sentence2 | score |
876
- |:-------------------------------------------------------------|:-----------------------------------------------------------|:-------------------------------|
877
- | <code>Ein Mann mit einem Schutzhelm tanzt.</code> | <code>Ein Mann mit einem Schutzhelm tanzt.</code> | <code>1.0</code> |
878
- | <code>Ein kleines Kind reitet auf einem Pferd.</code> | <code>Ein Kind reitet auf einem Pferd.</code> | <code>0.949999988079071</code> |
879
- | <code>Ein Mann verfüttert eine Maus an eine Schlange.</code> | <code>Der Mann füttert die Schlange mit einer Maus.</code> | <code>1.0</code> |
880
- * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
881
- ```json
882
- {
883
- "loss": "CosineSimilarityLoss",
884
- "matryoshka_dims": [
885
- 1024,
886
- 768,
887
- 512,
888
- 256,
889
- 128,
890
- 64
891
- ],
892
- "matryoshka_weights": [
893
- 1,
894
- 1,
895
- 1,
896
- 1,
897
- 1,
898
- 1
899
- ],
900
- "n_dims_per_step": -1
901
- }
902
- ```
903
-
904
- ### Training Hyperparameters
905
- #### Non-Default Hyperparameters
906
-
907
- - `eval_strategy`: steps
908
- - `per_device_train_batch_size`: 4
909
- - `per_device_eval_batch_size`: 16
910
- - `learning_rate`: 5e-06
911
- - `num_train_epochs`: 1
912
- - `warmup_ratio`: 0.1
913
- - `bf16`: True
914
-
915
- #### All Hyperparameters
916
- <details><summary>Click to expand</summary>
917
-
918
- - `overwrite_output_dir`: False
919
- - `do_predict`: False
920
- - `eval_strategy`: steps
921
- - `prediction_loss_only`: True
922
- - `per_device_train_batch_size`: 4
923
- - `per_device_eval_batch_size`: 16
924
- - `per_gpu_train_batch_size`: None
925
- - `per_gpu_eval_batch_size`: None
926
- - `gradient_accumulation_steps`: 1
927
- - `eval_accumulation_steps`: None
928
- - `learning_rate`: 5e-06
929
- - `weight_decay`: 0.0
930
- - `adam_beta1`: 0.9
931
- - `adam_beta2`: 0.999
932
- - `adam_epsilon`: 1e-08
933
- - `max_grad_norm`: 1.0
934
- - `num_train_epochs`: 1
935
- - `max_steps`: -1
936
- - `lr_scheduler_type`: linear
937
- - `lr_scheduler_kwargs`: {}
938
- - `warmup_ratio`: 0.1
939
- - `warmup_steps`: 0
940
- - `log_level`: passive
941
- - `log_level_replica`: warning
942
- - `log_on_each_node`: True
943
- - `logging_nan_inf_filter`: True
944
- - `save_safetensors`: True
945
- - `save_on_each_node`: False
946
- - `save_only_model`: False
947
- - `restore_callback_states_from_checkpoint`: False
948
- - `no_cuda`: False
949
- - `use_cpu`: False
950
- - `use_mps_device`: False
951
- - `seed`: 42
952
- - `data_seed`: None
953
- - `jit_mode_eval`: False
954
- - `use_ipex`: False
955
- - `bf16`: True
956
- - `fp16`: False
957
- - `fp16_opt_level`: O1
958
- - `half_precision_backend`: auto
959
- - `bf16_full_eval`: False
960
- - `fp16_full_eval`: False
961
- - `tf32`: None
962
- - `local_rank`: 0
963
- - `ddp_backend`: None
964
- - `tpu_num_cores`: None
965
- - `tpu_metrics_debug`: False
966
- - `debug`: []
967
- - `dataloader_drop_last`: False
968
- - `dataloader_num_workers`: 0
969
- - `dataloader_prefetch_factor`: None
970
- - `past_index`: -1
971
- - `disable_tqdm`: False
972
- - `remove_unused_columns`: True
973
- - `label_names`: None
974
- - `load_best_model_at_end`: False
975
- - `ignore_data_skip`: False
976
- - `fsdp`: []
977
- - `fsdp_min_num_params`: 0
978
- - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
979
- - `fsdp_transformer_layer_cls_to_wrap`: None
980
- - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
981
- - `deepspeed`: None
982
- - `label_smoothing_factor`: 0.0
983
- - `optim`: adamw_torch
984
- - `optim_args`: None
985
- - `adafactor`: False
986
- - `group_by_length`: False
987
- - `length_column_name`: length
988
- - `ddp_find_unused_parameters`: None
989
- - `ddp_bucket_cap_mb`: None
990
- - `ddp_broadcast_buffers`: False
991
- - `dataloader_pin_memory`: True
992
- - `dataloader_persistent_workers`: False
993
- - `skip_memory_metrics`: True
994
- - `use_legacy_prediction_loop`: False
995
- - `push_to_hub`: False
996
- - `resume_from_checkpoint`: None
997
- - `hub_model_id`: None
998
- - `hub_strategy`: every_save
999
- - `hub_private_repo`: False
1000
- - `hub_always_push`: False
1001
- - `gradient_checkpointing`: False
1002
- - `gradient_checkpointing_kwargs`: None
1003
- - `include_inputs_for_metrics`: False
1004
- - `eval_do_concat_batches`: True
1005
- - `fp16_backend`: auto
1006
- - `push_to_hub_model_id`: None
1007
- - `push_to_hub_organization`: None
1008
- - `mp_parameters`:
1009
- - `auto_find_batch_size`: False
1010
- - `full_determinism`: False
1011
- - `torchdynamo`: None
1012
- - `ray_scope`: last
1013
- - `ddp_timeout`: 1800
1014
- - `torch_compile`: False
1015
- - `torch_compile_backend`: None
1016
- - `torch_compile_mode`: None
1017
- - `dispatch_batches`: None
1018
- - `split_batches`: None
1019
- - `include_tokens_per_second`: False
1020
- - `include_num_input_tokens_seen`: False
1021
- - `neftune_noise_alpha`: None
1022
- - `optim_target_modules`: None
1023
- - `batch_eval_metrics`: False
1024
- - `eval_on_start`: False
1025
- - `batch_sampler`: batch_sampler
1026
- - `multi_dataset_batch_sampler`: proportional
1027
-
1028
- </details>
1029
-
1030
- ### Training Logs
1031
- | Epoch | Step | Training Loss | loss | sts-dev-1024_spearman_cosine | sts-dev-128_spearman_cosine | sts-dev-256_spearman_cosine | sts-dev-512_spearman_cosine | sts-dev-64_spearman_cosine | sts-dev-768_spearman_cosine | sts-test-1024_spearman_cosine | sts-test-128_spearman_cosine | sts-test-256_spearman_cosine | sts-test-512_spearman_cosine | sts-test-64_spearman_cosine | sts-test-768_spearman_cosine |
1032
- |:------:|:----:|:-------------:|:------:|:----------------------------:|:---------------------------:|:---------------------------:|:---------------------------:|:--------------------------:|:---------------------------:|:-----------------------------:|:----------------------------:|:----------------------------:|:----------------------------:|:---------------------------:|:----------------------------:|
1033
- | 0.0174 | 100 | 0.2958 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1034
- | 0.0348 | 200 | 0.2914 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1035
- | 0.0522 | 300 | 0.2691 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1036
- | 0.0696 | 400 | 0.253 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1037
- | 0.0870 | 500 | 0.2458 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1038
- | 0.1044 | 600 | 0.2594 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1039
- | 0.1218 | 700 | 0.2339 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1040
- | 0.1392 | 800 | 0.2245 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1041
- | 0.1565 | 900 | 0.2122 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1042
- | 0.1739 | 1000 | 0.2369 | 0.2394 | 0.8402 | 0.8277 | 0.8352 | 0.8393 | 0.8164 | 0.8404 | - | - | - | - | - | - |
1043
- | 0.1913 | 1100 | 0.2308 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1044
- | 0.2087 | 1200 | 0.2292 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1045
- | 0.2261 | 1300 | 0.2232 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1046
- | 0.2435 | 1400 | 0.2001 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1047
- | 0.2609 | 1500 | 0.2139 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1048
- | 0.2783 | 1600 | 0.1906 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1049
- | 0.2957 | 1700 | 0.1895 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1050
- | 0.3131 | 1800 | 0.2011 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1051
- | 0.3305 | 1900 | 0.1723 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1052
- | 0.3479 | 2000 | 0.1886 | 0.2340 | 0.8448 | 0.8321 | 0.8385 | 0.8435 | 0.8233 | 0.8449 | - | - | - | - | - | - |
1053
- | 0.3653 | 2100 | 0.1719 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1054
- | 0.3827 | 2200 | 0.1879 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1055
- | 0.4001 | 2300 | 0.187 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1056
- | 0.4175 | 2400 | 0.1487 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1057
- | 0.4349 | 2500 | 0.1752 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1058
- | 0.4523 | 2600 | 0.1475 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1059
- | 0.4696 | 2700 | 0.1695 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1060
- | 0.4870 | 2800 | 0.1615 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1061
- | 0.5044 | 2900 | 0.1558 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1062
- | 0.5218 | 3000 | 0.1713 | 0.2357 | 0.8457 | 0.8344 | 0.8406 | 0.8447 | 0.8266 | 0.8461 | - | - | - | - | - | - |
1063
- | 0.5392 | 3100 | 0.1556 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1064
- | 0.5566 | 3200 | 0.1743 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1065
- | 0.5740 | 3300 | 0.1426 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1066
- | 0.5914 | 3400 | 0.1519 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1067
- | 0.6088 | 3500 | 0.1763 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1068
- | 0.6262 | 3600 | 0.1456 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1069
- | 0.6436 | 3700 | 0.1649 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1070
- | 0.6610 | 3800 | 0.1427 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1071
- | 0.6784 | 3900 | 0.1284 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1072
- | 0.6958 | 4000 | 0.1533 | 0.2344 | 0.8417 | 0.8291 | 0.8357 | 0.8402 | 0.8225 | 0.8421 | - | - | - | - | - | - |
1073
- | 0.7132 | 4100 | 0.1397 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1074
- | 0.7306 | 4200 | 0.1505 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1075
- | 0.7480 | 4300 | 0.1355 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1076
- | 0.7654 | 4400 | 0.1275 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1077
- | 0.7827 | 4500 | 0.1599 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1078
- | 0.8001 | 4600 | 0.1493 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1079
- | 0.8175 | 4700 | 0.1497 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1080
- | 0.8349 | 4800 | 0.1492 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1081
- | 0.8523 | 4900 | 0.1378 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1082
- | 0.8697 | 5000 | 0.1391 | 0.2362 | 0.8453 | 0.8336 | 0.8392 | 0.8438 | 0.8266 | 0.8454 | - | - | - | - | - | - |
1083
- | 0.8871 | 5100 | 0.1622 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1084
- | 0.9045 | 5200 | 0.1456 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1085
- | 0.9219 | 5300 | 0.1367 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1086
- | 0.9393 | 5400 | 0.1243 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1087
- | 0.9567 | 5500 | 0.1389 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1088
- | 0.9741 | 5600 | 0.1338 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1089
- | 0.9915 | 5700 | 0.1146 | - | - | - | - | - | - | - | - | - | - | - | - | - |
1090
- | 1.0 | 5749 | - | - | - | - | - | - | - | - | 0.8189 | 0.8093 | 0.8150 | 0.8163 | 0.8025 | 0.8183 |
1091
-
1092
-
1093
- ### Framework Versions
1094
- - Python: 3.9.16
1095
- - Sentence Transformers: 3.0.0
1096
- - Transformers: 4.42.0.dev0
1097
- - PyTorch: 2.2.2+cu118
1098
- - Accelerate: 0.31.0
1099
- - Datasets: 2.19.1
1100
- - Tokenizers: 0.19.1
1101
 
1102
  ## Citation
1103
 
 
1
  ---
2
  language:
3
  - de
 
 
 
 
 
 
 
 
 
4
  library_name: sentence-transformers
5
  tags:
6
  - sentence-transformers
7
  - sentence-similarity
8
  - feature-extraction
 
9
  - loss:MatryoshkaLoss
10
+ base_model: aari1995/gbert-large-2
 
11
  metrics:
12
  - pearson_cosine
13
  - spearman_cosine
 
46
  - Die Frau prüft die Augen des Mannes.
47
  - Ein Mann ist auf einem Dach
48
  pipeline_tag: sentence-similarity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  ---
50
 
51
+ # German Semantic V3
52
 
53
+ Finally, a new version! The successor of German_Semantic_STS_V2 is here and comes with loads of cool new features!
 
54
 
55
+ ## Major updates and USPs:
56
 
57
+ - **Flexibility:** Trained with flexible sequence-length and embedding truncation, flexibility is a core feature of the model. Yet, smaller dimensions bring a minor trade-off in quality.
58
+ - **Sequence length:** 8192, (16 times more than V2 and other models) -> thanks to the ALiBi implementation of Jina-Team!
59
+ - **Matryoshka Embeddings:** The model is trained for embedding sizes from 1024 down to 64, allowing you to store much smaller embeddings with little quality loss.
60
+ - **German only:** This model is German-only, it has rich cultural knowledge about Germany and German topics. Therefore, also the model to learn more efficient thanks to its tokenizer, deal better with shorter queries and generally be more nuanced in many scenarios.
61
+ - **Updated knowledge and quality data:** The backbone of this model is gbert-large by deepset. With Stage-2 pretraining on 1 Billion tokens of German fineweb by occiglot, up-to-date knowledge is ensured.
62
+ - **Typo and Casing**: This model was trained to be robust against minor typos and casing, leading to slightly weaker benchmark performance and learning during training, but higher robustness of the embeddings.
63
+ - **Pooling Function:** Moving away from mean pooling towards using the CLS token. Generally seems to learn better after the stage-2 pretraining and allows for more flexibility.
64
+ - **License:** Apache 2.0
 
 
65
 
66
+ ## Usage:
67
 
68
+ ```python
69
+ from sentence_transformers import SentenceTransformer
70
+
71
+
72
+ matryoshka_dim = 1024 # How big your embeddings should be, choose from: 64, 128, 256, 512, 768, 1024
73
+ model = SentenceTransformer("aari1995/German_Semantic_V3", trust_remote_code=True, truncate_dim=matryoshka_dim)
74
+
75
+ # model.truncate_dim = 64 # truncation dimensions can also be changed after loading
76
+ # model.max_seq_length = 512 #optionally, set your maximum sequence length lower if your hardware is limited
77
+
78
+ # Run inference
79
+ sentences = [
80
+ 'Eine Flagge weht.',
81
+ 'Die Flagge bewegte sich in der Luft.',
82
+ 'Zwei Personen beobachten das Wasser.',
83
+ ]
84
+ embeddings = model.encode(sentences)
85
+
86
+ # Get the similarity scores for the embeddings
87
+ similarities = model.similarity(embeddings, embeddings)
88
+
89
+
90
+ ```
91
 
92
  ### Full Model Architecture
93
 
 
156
 
157
  ## Evaluation
158
 
159
+ Evaluation to come.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  ## Citation
162