Update README.md
Browse files
README.md
CHANGED
@@ -1,24 +1,13 @@
|
|
1 |
---
|
2 |
language:
|
3 |
- de
|
4 |
-
- en
|
5 |
-
- es
|
6 |
-
- fr
|
7 |
-
- it
|
8 |
-
- nl
|
9 |
-
- pl
|
10 |
-
- pt
|
11 |
-
- ru
|
12 |
-
- zh
|
13 |
library_name: sentence-transformers
|
14 |
tags:
|
15 |
- sentence-transformers
|
16 |
- sentence-similarity
|
17 |
- feature-extraction
|
18 |
-
- dataset_size:10K<n<100K
|
19 |
- loss:MatryoshkaLoss
|
20 |
-
-
|
21 |
-
base_model: aari1995/gbert-large-2-cls-nlisim
|
22 |
metrics:
|
23 |
- pearson_cosine
|
24 |
- spearman_cosine
|
@@ -57,478 +46,48 @@ widget:
|
|
57 |
- Die Frau prüft die Augen des Mannes.
|
58 |
- Ein Mann ist auf einem Dach
|
59 |
pipeline_tag: sentence-similarity
|
60 |
-
model-index:
|
61 |
-
- name: SentenceTransformer based on aari1995/gbert-large-2-cls-nlisim
|
62 |
-
results:
|
63 |
-
- task:
|
64 |
-
type: semantic-similarity
|
65 |
-
name: Semantic Similarity
|
66 |
-
dataset:
|
67 |
-
name: sts dev 1024
|
68 |
-
type: sts-dev-1024
|
69 |
-
metrics:
|
70 |
-
- type: pearson_cosine
|
71 |
-
value: 0.8417806877288009
|
72 |
-
name: Pearson Cosine
|
73 |
-
- type: spearman_cosine
|
74 |
-
value: 0.8452891310343582
|
75 |
-
name: Spearman Cosine
|
76 |
-
- type: pearson_manhattan
|
77 |
-
value: 0.8418749526406495
|
78 |
-
name: Pearson Manhattan
|
79 |
-
- type: spearman_manhattan
|
80 |
-
value: 0.8450348906331776
|
81 |
-
name: Spearman Manhattan
|
82 |
-
- type: pearson_euclidean
|
83 |
-
value: 0.8422615095001257
|
84 |
-
name: Pearson Euclidean
|
85 |
-
- type: spearman_euclidean
|
86 |
-
value: 0.8453390990427703
|
87 |
-
name: Spearman Euclidean
|
88 |
-
- type: pearson_dot
|
89 |
-
value: 0.8416625079549063
|
90 |
-
name: Pearson Dot
|
91 |
-
- type: spearman_dot
|
92 |
-
value: 0.8450616171323844
|
93 |
-
name: Spearman Dot
|
94 |
-
- type: pearson_max
|
95 |
-
value: 0.8422615095001257
|
96 |
-
name: Pearson Max
|
97 |
-
- type: spearman_max
|
98 |
-
value: 0.8453390990427703
|
99 |
-
name: Spearman Max
|
100 |
-
- task:
|
101 |
-
type: semantic-similarity
|
102 |
-
name: Semantic Similarity
|
103 |
-
dataset:
|
104 |
-
name: sts dev 768
|
105 |
-
type: sts-dev-768
|
106 |
-
metrics:
|
107 |
-
- type: pearson_cosine
|
108 |
-
value: 0.8418107096367227
|
109 |
-
name: Pearson Cosine
|
110 |
-
- type: spearman_cosine
|
111 |
-
value: 0.8453863409322975
|
112 |
-
name: Spearman Cosine
|
113 |
-
- type: pearson_manhattan
|
114 |
-
value: 0.8418527770289471
|
115 |
-
name: Pearson Manhattan
|
116 |
-
- type: spearman_manhattan
|
117 |
-
value: 0.8448328869253576
|
118 |
-
name: Spearman Manhattan
|
119 |
-
- type: pearson_euclidean
|
120 |
-
value: 0.8422791953749277
|
121 |
-
name: Pearson Euclidean
|
122 |
-
- type: spearman_euclidean
|
123 |
-
value: 0.8451547857394669
|
124 |
-
name: Spearman Euclidean
|
125 |
-
- type: pearson_dot
|
126 |
-
value: 0.8417682812591724
|
127 |
-
name: Pearson Dot
|
128 |
-
- type: spearman_dot
|
129 |
-
value: 0.8446927200809794
|
130 |
-
name: Spearman Dot
|
131 |
-
- type: pearson_max
|
132 |
-
value: 0.8422791953749277
|
133 |
-
name: Pearson Max
|
134 |
-
- type: spearman_max
|
135 |
-
value: 0.8453863409322975
|
136 |
-
name: Spearman Max
|
137 |
-
- task:
|
138 |
-
type: semantic-similarity
|
139 |
-
name: Semantic Similarity
|
140 |
-
dataset:
|
141 |
-
name: sts dev 512
|
142 |
-
type: sts-dev-512
|
143 |
-
metrics:
|
144 |
-
- type: pearson_cosine
|
145 |
-
value: 0.8394808864309438
|
146 |
-
name: Pearson Cosine
|
147 |
-
- type: spearman_cosine
|
148 |
-
value: 0.8437551103291275
|
149 |
-
name: Spearman Cosine
|
150 |
-
- type: pearson_manhattan
|
151 |
-
value: 0.8420246416513741
|
152 |
-
name: Pearson Manhattan
|
153 |
-
- type: spearman_manhattan
|
154 |
-
value: 0.8447335398769396
|
155 |
-
name: Spearman Manhattan
|
156 |
-
- type: pearson_euclidean
|
157 |
-
value: 0.8422722079216611
|
158 |
-
name: Pearson Euclidean
|
159 |
-
- type: spearman_euclidean
|
160 |
-
value: 0.8448909261141044
|
161 |
-
name: Spearman Euclidean
|
162 |
-
- type: pearson_dot
|
163 |
-
value: 0.8358204287638725
|
164 |
-
name: Pearson Dot
|
165 |
-
- type: spearman_dot
|
166 |
-
value: 0.8380004733308642
|
167 |
-
name: Spearman Dot
|
168 |
-
- type: pearson_max
|
169 |
-
value: 0.8422722079216611
|
170 |
-
name: Pearson Max
|
171 |
-
- type: spearman_max
|
172 |
-
value: 0.8448909261141044
|
173 |
-
name: Spearman Max
|
174 |
-
- task:
|
175 |
-
type: semantic-similarity
|
176 |
-
name: Semantic Similarity
|
177 |
-
dataset:
|
178 |
-
name: sts dev 256
|
179 |
-
type: sts-dev-256
|
180 |
-
metrics:
|
181 |
-
- type: pearson_cosine
|
182 |
-
value: 0.833879413726309
|
183 |
-
name: Pearson Cosine
|
184 |
-
- type: spearman_cosine
|
185 |
-
value: 0.8392439788855341
|
186 |
-
name: Spearman Cosine
|
187 |
-
- type: pearson_manhattan
|
188 |
-
value: 0.8379618268497928
|
189 |
-
name: Pearson Manhattan
|
190 |
-
- type: spearman_manhattan
|
191 |
-
value: 0.839860826315925
|
192 |
-
name: Spearman Manhattan
|
193 |
-
- type: pearson_euclidean
|
194 |
-
value: 0.838931461279174
|
195 |
-
name: Pearson Euclidean
|
196 |
-
- type: spearman_euclidean
|
197 |
-
value: 0.8404811150299943
|
198 |
-
name: Spearman Euclidean
|
199 |
-
- type: pearson_dot
|
200 |
-
value: 0.8230557648139373
|
201 |
-
name: Pearson Dot
|
202 |
-
- type: spearman_dot
|
203 |
-
value: 0.8242532718299653
|
204 |
-
name: Spearman Dot
|
205 |
-
- type: pearson_max
|
206 |
-
value: 0.838931461279174
|
207 |
-
name: Pearson Max
|
208 |
-
- type: spearman_max
|
209 |
-
value: 0.8404811150299943
|
210 |
-
name: Spearman Max
|
211 |
-
- task:
|
212 |
-
type: semantic-similarity
|
213 |
-
name: Semantic Similarity
|
214 |
-
dataset:
|
215 |
-
name: sts dev 128
|
216 |
-
type: sts-dev-128
|
217 |
-
metrics:
|
218 |
-
- type: pearson_cosine
|
219 |
-
value: 0.8253967606033702
|
220 |
-
name: Pearson Cosine
|
221 |
-
- type: spearman_cosine
|
222 |
-
value: 0.8335750690073012
|
223 |
-
name: Spearman Cosine
|
224 |
-
- type: pearson_manhattan
|
225 |
-
value: 0.8341588626988476
|
226 |
-
name: Pearson Manhattan
|
227 |
-
- type: spearman_manhattan
|
228 |
-
value: 0.8343994326050966
|
229 |
-
name: Spearman Manhattan
|
230 |
-
- type: pearson_euclidean
|
231 |
-
value: 0.8355263623880292
|
232 |
-
name: Pearson Euclidean
|
233 |
-
- type: spearman_euclidean
|
234 |
-
value: 0.8358857095028451
|
235 |
-
name: Spearman Euclidean
|
236 |
-
- type: pearson_dot
|
237 |
-
value: 0.8035163216908426
|
238 |
-
name: Pearson Dot
|
239 |
-
- type: spearman_dot
|
240 |
-
value: 0.8050271037746011
|
241 |
-
name: Spearman Dot
|
242 |
-
- type: pearson_max
|
243 |
-
value: 0.8355263623880292
|
244 |
-
name: Pearson Max
|
245 |
-
- type: spearman_max
|
246 |
-
value: 0.8358857095028451
|
247 |
-
name: Spearman Max
|
248 |
-
- task:
|
249 |
-
type: semantic-similarity
|
250 |
-
name: Semantic Similarity
|
251 |
-
dataset:
|
252 |
-
name: sts dev 64
|
253 |
-
type: sts-dev-64
|
254 |
-
metrics:
|
255 |
-
- type: pearson_cosine
|
256 |
-
value: 0.8150661334039712
|
257 |
-
name: Pearson Cosine
|
258 |
-
- type: spearman_cosine
|
259 |
-
value: 0.8265558538619309
|
260 |
-
name: Spearman Cosine
|
261 |
-
- type: pearson_manhattan
|
262 |
-
value: 0.8241988539394505
|
263 |
-
name: Pearson Manhattan
|
264 |
-
- type: spearman_manhattan
|
265 |
-
value: 0.8238763145175863
|
266 |
-
name: Spearman Manhattan
|
267 |
-
- type: pearson_euclidean
|
268 |
-
value: 0.8274925218859535
|
269 |
-
name: Pearson Euclidean
|
270 |
-
- type: spearman_euclidean
|
271 |
-
value: 0.8270778062044848
|
272 |
-
name: Spearman Euclidean
|
273 |
-
- type: pearson_dot
|
274 |
-
value: 0.7773847317840161
|
275 |
-
name: Pearson Dot
|
276 |
-
- type: spearman_dot
|
277 |
-
value: 0.7790338242936304
|
278 |
-
name: Spearman Dot
|
279 |
-
- type: pearson_max
|
280 |
-
value: 0.8274925218859535
|
281 |
-
name: Pearson Max
|
282 |
-
- type: spearman_max
|
283 |
-
value: 0.8270778062044848
|
284 |
-
name: Spearman Max
|
285 |
-
- task:
|
286 |
-
type: semantic-similarity
|
287 |
-
name: Semantic Similarity
|
288 |
-
dataset:
|
289 |
-
name: sts test 1024
|
290 |
-
type: sts-test-1024
|
291 |
-
metrics:
|
292 |
-
- type: pearson_cosine
|
293 |
-
value: 0.8130772714952826
|
294 |
-
name: Pearson Cosine
|
295 |
-
- type: spearman_cosine
|
296 |
-
value: 0.8188901246173036
|
297 |
-
name: Spearman Cosine
|
298 |
-
- type: pearson_manhattan
|
299 |
-
value: 0.8208715312691268
|
300 |
-
name: Pearson Manhattan
|
301 |
-
- type: spearman_manhattan
|
302 |
-
value: 0.8195095089412118
|
303 |
-
name: Spearman Manhattan
|
304 |
-
- type: pearson_euclidean
|
305 |
-
value: 0.820344720619671
|
306 |
-
name: Pearson Euclidean
|
307 |
-
- type: spearman_euclidean
|
308 |
-
value: 0.8189263018901494
|
309 |
-
name: Spearman Euclidean
|
310 |
-
- type: pearson_dot
|
311 |
-
value: 0.8127924456922464
|
312 |
-
name: Pearson Dot
|
313 |
-
- type: spearman_dot
|
314 |
-
value: 0.8185815083131535
|
315 |
-
name: Spearman Dot
|
316 |
-
- type: pearson_max
|
317 |
-
value: 0.8208715312691268
|
318 |
-
name: Pearson Max
|
319 |
-
- type: spearman_max
|
320 |
-
value: 0.8195095089412118
|
321 |
-
name: Spearman Max
|
322 |
-
- task:
|
323 |
-
type: semantic-similarity
|
324 |
-
name: Semantic Similarity
|
325 |
-
dataset:
|
326 |
-
name: sts test 768
|
327 |
-
type: sts-test-768
|
328 |
-
metrics:
|
329 |
-
- type: pearson_cosine
|
330 |
-
value: 0.8121757739236393
|
331 |
-
name: Pearson Cosine
|
332 |
-
- type: spearman_cosine
|
333 |
-
value: 0.8182913347635533
|
334 |
-
name: Spearman Cosine
|
335 |
-
- type: pearson_manhattan
|
336 |
-
value: 0.820604714791802
|
337 |
-
name: Pearson Manhattan
|
338 |
-
- type: spearman_manhattan
|
339 |
-
value: 0.8190481839997107
|
340 |
-
name: Spearman Manhattan
|
341 |
-
- type: pearson_euclidean
|
342 |
-
value: 0.8197462057663948
|
343 |
-
name: Pearson Euclidean
|
344 |
-
- type: spearman_euclidean
|
345 |
-
value: 0.8183157116237637
|
346 |
-
name: Spearman Euclidean
|
347 |
-
- type: pearson_dot
|
348 |
-
value: 0.8106698462984598
|
349 |
-
name: Pearson Dot
|
350 |
-
- type: spearman_dot
|
351 |
-
value: 0.8148932181769889
|
352 |
-
name: Spearman Dot
|
353 |
-
- type: pearson_max
|
354 |
-
value: 0.820604714791802
|
355 |
-
name: Pearson Max
|
356 |
-
- type: spearman_max
|
357 |
-
value: 0.8190481839997107
|
358 |
-
name: Spearman Max
|
359 |
-
- task:
|
360 |
-
type: semantic-similarity
|
361 |
-
name: Semantic Similarity
|
362 |
-
dataset:
|
363 |
-
name: sts test 512
|
364 |
-
type: sts-test-512
|
365 |
-
metrics:
|
366 |
-
- type: pearson_cosine
|
367 |
-
value: 0.8096452235754106
|
368 |
-
name: Pearson Cosine
|
369 |
-
- type: spearman_cosine
|
370 |
-
value: 0.816264314810491
|
371 |
-
name: Spearman Cosine
|
372 |
-
- type: pearson_manhattan
|
373 |
-
value: 0.8180021560255247
|
374 |
-
name: Pearson Manhattan
|
375 |
-
- type: spearman_manhattan
|
376 |
-
value: 0.8165486306356095
|
377 |
-
name: Spearman Manhattan
|
378 |
-
- type: pearson_euclidean
|
379 |
-
value: 0.8173829404008947
|
380 |
-
name: Pearson Euclidean
|
381 |
-
- type: spearman_euclidean
|
382 |
-
value: 0.8158592878546184
|
383 |
-
name: Spearman Euclidean
|
384 |
-
- type: pearson_dot
|
385 |
-
value: 0.8059176831913651
|
386 |
-
name: Pearson Dot
|
387 |
-
- type: spearman_dot
|
388 |
-
value: 0.8088972406630007
|
389 |
-
name: Spearman Dot
|
390 |
-
- type: pearson_max
|
391 |
-
value: 0.8180021560255247
|
392 |
-
name: Pearson Max
|
393 |
-
- type: spearman_max
|
394 |
-
value: 0.8165486306356095
|
395 |
-
name: Spearman Max
|
396 |
-
- task:
|
397 |
-
type: semantic-similarity
|
398 |
-
name: Semantic Similarity
|
399 |
-
dataset:
|
400 |
-
name: sts test 256
|
401 |
-
type: sts-test-256
|
402 |
-
metrics:
|
403 |
-
- type: pearson_cosine
|
404 |
-
value: 0.8070921035712145
|
405 |
-
name: Pearson Cosine
|
406 |
-
- type: spearman_cosine
|
407 |
-
value: 0.8150266310280979
|
408 |
-
name: Spearman Cosine
|
409 |
-
- type: pearson_manhattan
|
410 |
-
value: 0.818409081545237
|
411 |
-
name: Pearson Manhattan
|
412 |
-
- type: spearman_manhattan
|
413 |
-
value: 0.8167245415653657
|
414 |
-
name: Spearman Manhattan
|
415 |
-
- type: pearson_euclidean
|
416 |
-
value: 0.8176811220335696
|
417 |
-
name: Pearson Euclidean
|
418 |
-
- type: spearman_euclidean
|
419 |
-
value: 0.8158894222194816
|
420 |
-
name: Spearman Euclidean
|
421 |
-
- type: pearson_dot
|
422 |
-
value: 0.795483328805793
|
423 |
-
name: Pearson Dot
|
424 |
-
- type: spearman_dot
|
425 |
-
value: 0.7956062163122977
|
426 |
-
name: Spearman Dot
|
427 |
-
- type: pearson_max
|
428 |
-
value: 0.818409081545237
|
429 |
-
name: Pearson Max
|
430 |
-
- type: spearman_max
|
431 |
-
value: 0.8167245415653657
|
432 |
-
name: Spearman Max
|
433 |
-
- task:
|
434 |
-
type: semantic-similarity
|
435 |
-
name: Semantic Similarity
|
436 |
-
dataset:
|
437 |
-
name: sts test 128
|
438 |
-
type: sts-test-128
|
439 |
-
metrics:
|
440 |
-
- type: pearson_cosine
|
441 |
-
value: 0.7974039089035316
|
442 |
-
name: Pearson Cosine
|
443 |
-
- type: spearman_cosine
|
444 |
-
value: 0.8093067652791092
|
445 |
-
name: Spearman Cosine
|
446 |
-
- type: pearson_manhattan
|
447 |
-
value: 0.8125792968401813
|
448 |
-
name: Pearson Manhattan
|
449 |
-
- type: spearman_manhattan
|
450 |
-
value: 0.8121486514324944
|
451 |
-
name: Spearman Manhattan
|
452 |
-
- type: pearson_euclidean
|
453 |
-
value: 0.8119102513178551
|
454 |
-
name: Pearson Euclidean
|
455 |
-
- type: spearman_euclidean
|
456 |
-
value: 0.811152531425261
|
457 |
-
name: Spearman Euclidean
|
458 |
-
- type: pearson_dot
|
459 |
-
value: 0.7739555890021923
|
460 |
-
name: Pearson Dot
|
461 |
-
- type: spearman_dot
|
462 |
-
value: 0.770072655568691
|
463 |
-
name: Spearman Dot
|
464 |
-
- type: pearson_max
|
465 |
-
value: 0.8125792968401813
|
466 |
-
name: Pearson Max
|
467 |
-
- type: spearman_max
|
468 |
-
value: 0.8121486514324944
|
469 |
-
name: Spearman Max
|
470 |
-
- task:
|
471 |
-
type: semantic-similarity
|
472 |
-
name: Semantic Similarity
|
473 |
-
dataset:
|
474 |
-
name: sts test 64
|
475 |
-
type: sts-test-64
|
476 |
-
metrics:
|
477 |
-
- type: pearson_cosine
|
478 |
-
value: 0.7873069617689994
|
479 |
-
name: Pearson Cosine
|
480 |
-
- type: spearman_cosine
|
481 |
-
value: 0.8024994399645912
|
482 |
-
name: Spearman Cosine
|
483 |
-
- type: pearson_manhattan
|
484 |
-
value: 0.8048161563115213
|
485 |
-
name: Pearson Manhattan
|
486 |
-
- type: spearman_manhattan
|
487 |
-
value: 0.8031972835914969
|
488 |
-
name: Spearman Manhattan
|
489 |
-
- type: pearson_euclidean
|
490 |
-
value: 0.8060416893207731
|
491 |
-
name: Pearson Euclidean
|
492 |
-
- type: spearman_euclidean
|
493 |
-
value: 0.8041515980374414
|
494 |
-
name: Spearman Euclidean
|
495 |
-
- type: pearson_dot
|
496 |
-
value: 0.747911221220991
|
497 |
-
name: Pearson Dot
|
498 |
-
- type: spearman_dot
|
499 |
-
value: 0.7386011869481828
|
500 |
-
name: Spearman Dot
|
501 |
-
- type: pearson_max
|
502 |
-
value: 0.8060416893207731
|
503 |
-
name: Pearson Max
|
504 |
-
- type: spearman_max
|
505 |
-
value: 0.8041515980374414
|
506 |
-
name: Spearman Max
|
507 |
---
|
508 |
|
509 |
-
#
|
510 |
|
511 |
-
|
512 |
-
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [aari1995/gbert-large-2-cls-nlisim](https://huggingface.co/aari1995/gbert-large-2-cls-nlisim) on the [PhilipMay/stsb_multi_mt](https://huggingface.co/datasets/PhilipMay/stsb_multi_mt) dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
513 |
|
514 |
-
##
|
515 |
|
516 |
-
|
517 |
-
- **
|
518 |
-
- **
|
519 |
-
- **
|
520 |
-
- **
|
521 |
-
- **
|
522 |
-
- **
|
523 |
-
|
524 |
-
- **Languages:** de, en, es, fr, it, nl, pl, pt, ru, zh
|
525 |
-
<!-- - **License:** Unknown -->
|
526 |
|
527 |
-
|
528 |
|
529 |
-
|
530 |
-
|
531 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
532 |
|
533 |
### Full Model Architecture
|
534 |
|
@@ -597,507 +156,7 @@ You can finetune this model on your own dataset.
|
|
597 |
|
598 |
## Evaluation
|
599 |
|
600 |
-
|
601 |
-
|
602 |
-
#### Semantic Similarity
|
603 |
-
* Dataset: `sts-dev-1024`
|
604 |
-
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
605 |
-
|
606 |
-
| Metric | Value |
|
607 |
-
|:--------------------|:-----------|
|
608 |
-
| pearson_cosine | 0.8418 |
|
609 |
-
| **spearman_cosine** | **0.8453** |
|
610 |
-
| pearson_manhattan | 0.8419 |
|
611 |
-
| spearman_manhattan | 0.845 |
|
612 |
-
| pearson_euclidean | 0.8423 |
|
613 |
-
| spearman_euclidean | 0.8453 |
|
614 |
-
| pearson_dot | 0.8417 |
|
615 |
-
| spearman_dot | 0.8451 |
|
616 |
-
| pearson_max | 0.8423 |
|
617 |
-
| spearman_max | 0.8453 |
|
618 |
-
|
619 |
-
#### Semantic Similarity
|
620 |
-
* Dataset: `sts-dev-768`
|
621 |
-
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
622 |
-
|
623 |
-
| Metric | Value |
|
624 |
-
|:--------------------|:-----------|
|
625 |
-
| pearson_cosine | 0.8418 |
|
626 |
-
| **spearman_cosine** | **0.8454** |
|
627 |
-
| pearson_manhattan | 0.8419 |
|
628 |
-
| spearman_manhattan | 0.8448 |
|
629 |
-
| pearson_euclidean | 0.8423 |
|
630 |
-
| spearman_euclidean | 0.8452 |
|
631 |
-
| pearson_dot | 0.8418 |
|
632 |
-
| spearman_dot | 0.8447 |
|
633 |
-
| pearson_max | 0.8423 |
|
634 |
-
| spearman_max | 0.8454 |
|
635 |
-
|
636 |
-
#### Semantic Similarity
|
637 |
-
* Dataset: `sts-dev-512`
|
638 |
-
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
639 |
-
|
640 |
-
| Metric | Value |
|
641 |
-
|:--------------------|:-----------|
|
642 |
-
| pearson_cosine | 0.8395 |
|
643 |
-
| **spearman_cosine** | **0.8438** |
|
644 |
-
| pearson_manhattan | 0.842 |
|
645 |
-
| spearman_manhattan | 0.8447 |
|
646 |
-
| pearson_euclidean | 0.8423 |
|
647 |
-
| spearman_euclidean | 0.8449 |
|
648 |
-
| pearson_dot | 0.8358 |
|
649 |
-
| spearman_dot | 0.838 |
|
650 |
-
| pearson_max | 0.8423 |
|
651 |
-
| spearman_max | 0.8449 |
|
652 |
-
|
653 |
-
#### Semantic Similarity
|
654 |
-
* Dataset: `sts-dev-256`
|
655 |
-
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
656 |
-
|
657 |
-
| Metric | Value |
|
658 |
-
|:--------------------|:-----------|
|
659 |
-
| pearson_cosine | 0.8339 |
|
660 |
-
| **spearman_cosine** | **0.8392** |
|
661 |
-
| pearson_manhattan | 0.838 |
|
662 |
-
| spearman_manhattan | 0.8399 |
|
663 |
-
| pearson_euclidean | 0.8389 |
|
664 |
-
| spearman_euclidean | 0.8405 |
|
665 |
-
| pearson_dot | 0.8231 |
|
666 |
-
| spearman_dot | 0.8243 |
|
667 |
-
| pearson_max | 0.8389 |
|
668 |
-
| spearman_max | 0.8405 |
|
669 |
-
|
670 |
-
#### Semantic Similarity
|
671 |
-
* Dataset: `sts-dev-128`
|
672 |
-
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
673 |
-
|
674 |
-
| Metric | Value |
|
675 |
-
|:--------------------|:-----------|
|
676 |
-
| pearson_cosine | 0.8254 |
|
677 |
-
| **spearman_cosine** | **0.8336** |
|
678 |
-
| pearson_manhattan | 0.8342 |
|
679 |
-
| spearman_manhattan | 0.8344 |
|
680 |
-
| pearson_euclidean | 0.8355 |
|
681 |
-
| spearman_euclidean | 0.8359 |
|
682 |
-
| pearson_dot | 0.8035 |
|
683 |
-
| spearman_dot | 0.805 |
|
684 |
-
| pearson_max | 0.8355 |
|
685 |
-
| spearman_max | 0.8359 |
|
686 |
-
|
687 |
-
#### Semantic Similarity
|
688 |
-
* Dataset: `sts-dev-64`
|
689 |
-
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
690 |
-
|
691 |
-
| Metric | Value |
|
692 |
-
|:--------------------|:-----------|
|
693 |
-
| pearson_cosine | 0.8151 |
|
694 |
-
| **spearman_cosine** | **0.8266** |
|
695 |
-
| pearson_manhattan | 0.8242 |
|
696 |
-
| spearman_manhattan | 0.8239 |
|
697 |
-
| pearson_euclidean | 0.8275 |
|
698 |
-
| spearman_euclidean | 0.8271 |
|
699 |
-
| pearson_dot | 0.7774 |
|
700 |
-
| spearman_dot | 0.779 |
|
701 |
-
| pearson_max | 0.8275 |
|
702 |
-
| spearman_max | 0.8271 |
|
703 |
-
|
704 |
-
#### Semantic Similarity
|
705 |
-
* Dataset: `sts-test-1024`
|
706 |
-
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
707 |
-
|
708 |
-
| Metric | Value |
|
709 |
-
|:--------------------|:-----------|
|
710 |
-
| pearson_cosine | 0.8131 |
|
711 |
-
| **spearman_cosine** | **0.8189** |
|
712 |
-
| pearson_manhattan | 0.8209 |
|
713 |
-
| spearman_manhattan | 0.8195 |
|
714 |
-
| pearson_euclidean | 0.8203 |
|
715 |
-
| spearman_euclidean | 0.8189 |
|
716 |
-
| pearson_dot | 0.8128 |
|
717 |
-
| spearman_dot | 0.8186 |
|
718 |
-
| pearson_max | 0.8209 |
|
719 |
-
| spearman_max | 0.8195 |
|
720 |
-
|
721 |
-
#### Semantic Similarity
|
722 |
-
* Dataset: `sts-test-768`
|
723 |
-
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
724 |
-
|
725 |
-
| Metric | Value |
|
726 |
-
|:--------------------|:-----------|
|
727 |
-
| pearson_cosine | 0.8122 |
|
728 |
-
| **spearman_cosine** | **0.8183** |
|
729 |
-
| pearson_manhattan | 0.8206 |
|
730 |
-
| spearman_manhattan | 0.819 |
|
731 |
-
| pearson_euclidean | 0.8197 |
|
732 |
-
| spearman_euclidean | 0.8183 |
|
733 |
-
| pearson_dot | 0.8107 |
|
734 |
-
| spearman_dot | 0.8149 |
|
735 |
-
| pearson_max | 0.8206 |
|
736 |
-
| spearman_max | 0.819 |
|
737 |
-
|
738 |
-
#### Semantic Similarity
|
739 |
-
* Dataset: `sts-test-512`
|
740 |
-
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
741 |
-
|
742 |
-
| Metric | Value |
|
743 |
-
|:--------------------|:-----------|
|
744 |
-
| pearson_cosine | 0.8096 |
|
745 |
-
| **spearman_cosine** | **0.8163** |
|
746 |
-
| pearson_manhattan | 0.818 |
|
747 |
-
| spearman_manhattan | 0.8165 |
|
748 |
-
| pearson_euclidean | 0.8174 |
|
749 |
-
| spearman_euclidean | 0.8159 |
|
750 |
-
| pearson_dot | 0.8059 |
|
751 |
-
| spearman_dot | 0.8089 |
|
752 |
-
| pearson_max | 0.818 |
|
753 |
-
| spearman_max | 0.8165 |
|
754 |
-
|
755 |
-
#### Semantic Similarity
|
756 |
-
* Dataset: `sts-test-256`
|
757 |
-
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
758 |
-
|
759 |
-
| Metric | Value |
|
760 |
-
|:--------------------|:----------|
|
761 |
-
| pearson_cosine | 0.8071 |
|
762 |
-
| **spearman_cosine** | **0.815** |
|
763 |
-
| pearson_manhattan | 0.8184 |
|
764 |
-
| spearman_manhattan | 0.8167 |
|
765 |
-
| pearson_euclidean | 0.8177 |
|
766 |
-
| spearman_euclidean | 0.8159 |
|
767 |
-
| pearson_dot | 0.7955 |
|
768 |
-
| spearman_dot | 0.7956 |
|
769 |
-
| pearson_max | 0.8184 |
|
770 |
-
| spearman_max | 0.8167 |
|
771 |
-
|
772 |
-
#### Semantic Similarity
|
773 |
-
* Dataset: `sts-test-128`
|
774 |
-
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
775 |
-
|
776 |
-
| Metric | Value |
|
777 |
-
|:--------------------|:-----------|
|
778 |
-
| pearson_cosine | 0.7974 |
|
779 |
-
| **spearman_cosine** | **0.8093** |
|
780 |
-
| pearson_manhattan | 0.8126 |
|
781 |
-
| spearman_manhattan | 0.8121 |
|
782 |
-
| pearson_euclidean | 0.8119 |
|
783 |
-
| spearman_euclidean | 0.8112 |
|
784 |
-
| pearson_dot | 0.774 |
|
785 |
-
| spearman_dot | 0.7701 |
|
786 |
-
| pearson_max | 0.8126 |
|
787 |
-
| spearman_max | 0.8121 |
|
788 |
-
|
789 |
-
#### Semantic Similarity
|
790 |
-
* Dataset: `sts-test-64`
|
791 |
-
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
792 |
-
|
793 |
-
| Metric | Value |
|
794 |
-
|:--------------------|:-----------|
|
795 |
-
| pearson_cosine | 0.7873 |
|
796 |
-
| **spearman_cosine** | **0.8025** |
|
797 |
-
| pearson_manhattan | 0.8048 |
|
798 |
-
| spearman_manhattan | 0.8032 |
|
799 |
-
| pearson_euclidean | 0.806 |
|
800 |
-
| spearman_euclidean | 0.8042 |
|
801 |
-
| pearson_dot | 0.7479 |
|
802 |
-
| spearman_dot | 0.7386 |
|
803 |
-
| pearson_max | 0.806 |
|
804 |
-
| spearman_max | 0.8042 |
|
805 |
-
|
806 |
-
<!--
|
807 |
-
## Bias, Risks and Limitations
|
808 |
-
|
809 |
-
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
810 |
-
-->
|
811 |
-
|
812 |
-
<!--
|
813 |
-
### Recommendations
|
814 |
-
|
815 |
-
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
816 |
-
-->
|
817 |
-
|
818 |
-
## Training Details
|
819 |
-
|
820 |
-
### Training Dataset
|
821 |
-
|
822 |
-
#### PhilipMay/stsb_multi_mt
|
823 |
-
|
824 |
-
* Dataset: [PhilipMay/stsb_multi_mt](https://huggingface.co/datasets/PhilipMay/stsb_multi_mt) at [3acaa3d](https://huggingface.co/datasets/PhilipMay/stsb_multi_mt/tree/3acaa3dd8c91649e0b8e627ffad891f059e47c8c)
|
825 |
-
* Size: 22,996 training samples
|
826 |
-
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
|
827 |
-
* Approximate statistics based on the first 1000 samples:
|
828 |
-
| | sentence1 | sentence2 | score |
|
829 |
-
|:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------|
|
830 |
-
| type | string | string | float |
|
831 |
-
| details | <ul><li>min: 6 tokens</li><li>mean: 18.13 tokens</li><li>max: 65 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 18.25 tokens</li><li>max: 90 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.54</li><li>max: 1.0</li></ul> |
|
832 |
-
* Samples:
|
833 |
-
| sentence1 | sentence2 | score |
|
834 |
-
|:-------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:--------------------------------|
|
835 |
-
| <code>schütze wegen mordes an schwarzem us-jugendlichen angeklagt</code> | <code>gedanken zu den rassenbeziehungen unter einem schwarzen präsidenten</code> | <code>0.1599999964237213</code> |
|
836 |
-
| <code>fußballspieler kicken einen fußball in das tor.</code> | <code>Ein Fußballspieler schießt ein Tor.</code> | <code>0.7599999904632568</code> |
|
837 |
-
| <code>obama lockert abschiebungsregeln für junge einwanderer</code> | <code>usa lockert abschiebebestimmungen für jugendliche: napolitano</code> | <code>0.800000011920929</code> |
|
838 |
-
* Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
|
839 |
-
```json
|
840 |
-
{
|
841 |
-
"loss": "CosineSimilarityLoss",
|
842 |
-
"matryoshka_dims": [
|
843 |
-
1024,
|
844 |
-
768,
|
845 |
-
512,
|
846 |
-
256,
|
847 |
-
128,
|
848 |
-
64
|
849 |
-
],
|
850 |
-
"matryoshka_weights": [
|
851 |
-
1,
|
852 |
-
1,
|
853 |
-
1,
|
854 |
-
1,
|
855 |
-
1,
|
856 |
-
1
|
857 |
-
],
|
858 |
-
"n_dims_per_step": -1
|
859 |
-
}
|
860 |
-
```
|
861 |
-
|
862 |
-
### Evaluation Dataset
|
863 |
-
|
864 |
-
#### PhilipMay/stsb_multi_mt
|
865 |
-
|
866 |
-
* Dataset: [PhilipMay/stsb_multi_mt](https://huggingface.co/datasets/PhilipMay/stsb_multi_mt) at [3acaa3d](https://huggingface.co/datasets/PhilipMay/stsb_multi_mt/tree/3acaa3dd8c91649e0b8e627ffad891f059e47c8c)
|
867 |
-
* Size: 1,500 evaluation samples
|
868 |
-
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
|
869 |
-
* Approximate statistics based on the first 1000 samples:
|
870 |
-
| | sentence1 | sentence2 | score |
|
871 |
-
|:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------|
|
872 |
-
| type | string | string | float |
|
873 |
-
| details | <ul><li>min: 5 tokens</li><li>mean: 16.54 tokens</li><li>max: 53 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 16.53 tokens</li><li>max: 47 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.47</li><li>max: 1.0</li></ul> |
|
874 |
-
* Samples:
|
875 |
-
| sentence1 | sentence2 | score |
|
876 |
-
|:-------------------------------------------------------------|:-----------------------------------------------------------|:-------------------------------|
|
877 |
-
| <code>Ein Mann mit einem Schutzhelm tanzt.</code> | <code>Ein Mann mit einem Schutzhelm tanzt.</code> | <code>1.0</code> |
|
878 |
-
| <code>Ein kleines Kind reitet auf einem Pferd.</code> | <code>Ein Kind reitet auf einem Pferd.</code> | <code>0.949999988079071</code> |
|
879 |
-
| <code>Ein Mann verfüttert eine Maus an eine Schlange.</code> | <code>Der Mann füttert die Schlange mit einer Maus.</code> | <code>1.0</code> |
|
880 |
-
* Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
|
881 |
-
```json
|
882 |
-
{
|
883 |
-
"loss": "CosineSimilarityLoss",
|
884 |
-
"matryoshka_dims": [
|
885 |
-
1024,
|
886 |
-
768,
|
887 |
-
512,
|
888 |
-
256,
|
889 |
-
128,
|
890 |
-
64
|
891 |
-
],
|
892 |
-
"matryoshka_weights": [
|
893 |
-
1,
|
894 |
-
1,
|
895 |
-
1,
|
896 |
-
1,
|
897 |
-
1,
|
898 |
-
1
|
899 |
-
],
|
900 |
-
"n_dims_per_step": -1
|
901 |
-
}
|
902 |
-
```
|
903 |
-
|
904 |
-
### Training Hyperparameters
|
905 |
-
#### Non-Default Hyperparameters
|
906 |
-
|
907 |
-
- `eval_strategy`: steps
|
908 |
-
- `per_device_train_batch_size`: 4
|
909 |
-
- `per_device_eval_batch_size`: 16
|
910 |
-
- `learning_rate`: 5e-06
|
911 |
-
- `num_train_epochs`: 1
|
912 |
-
- `warmup_ratio`: 0.1
|
913 |
-
- `bf16`: True
|
914 |
-
|
915 |
-
#### All Hyperparameters
|
916 |
-
<details><summary>Click to expand</summary>
|
917 |
-
|
918 |
-
- `overwrite_output_dir`: False
|
919 |
-
- `do_predict`: False
|
920 |
-
- `eval_strategy`: steps
|
921 |
-
- `prediction_loss_only`: True
|
922 |
-
- `per_device_train_batch_size`: 4
|
923 |
-
- `per_device_eval_batch_size`: 16
|
924 |
-
- `per_gpu_train_batch_size`: None
|
925 |
-
- `per_gpu_eval_batch_size`: None
|
926 |
-
- `gradient_accumulation_steps`: 1
|
927 |
-
- `eval_accumulation_steps`: None
|
928 |
-
- `learning_rate`: 5e-06
|
929 |
-
- `weight_decay`: 0.0
|
930 |
-
- `adam_beta1`: 0.9
|
931 |
-
- `adam_beta2`: 0.999
|
932 |
-
- `adam_epsilon`: 1e-08
|
933 |
-
- `max_grad_norm`: 1.0
|
934 |
-
- `num_train_epochs`: 1
|
935 |
-
- `max_steps`: -1
|
936 |
-
- `lr_scheduler_type`: linear
|
937 |
-
- `lr_scheduler_kwargs`: {}
|
938 |
-
- `warmup_ratio`: 0.1
|
939 |
-
- `warmup_steps`: 0
|
940 |
-
- `log_level`: passive
|
941 |
-
- `log_level_replica`: warning
|
942 |
-
- `log_on_each_node`: True
|
943 |
-
- `logging_nan_inf_filter`: True
|
944 |
-
- `save_safetensors`: True
|
945 |
-
- `save_on_each_node`: False
|
946 |
-
- `save_only_model`: False
|
947 |
-
- `restore_callback_states_from_checkpoint`: False
|
948 |
-
- `no_cuda`: False
|
949 |
-
- `use_cpu`: False
|
950 |
-
- `use_mps_device`: False
|
951 |
-
- `seed`: 42
|
952 |
-
- `data_seed`: None
|
953 |
-
- `jit_mode_eval`: False
|
954 |
-
- `use_ipex`: False
|
955 |
-
- `bf16`: True
|
956 |
-
- `fp16`: False
|
957 |
-
- `fp16_opt_level`: O1
|
958 |
-
- `half_precision_backend`: auto
|
959 |
-
- `bf16_full_eval`: False
|
960 |
-
- `fp16_full_eval`: False
|
961 |
-
- `tf32`: None
|
962 |
-
- `local_rank`: 0
|
963 |
-
- `ddp_backend`: None
|
964 |
-
- `tpu_num_cores`: None
|
965 |
-
- `tpu_metrics_debug`: False
|
966 |
-
- `debug`: []
|
967 |
-
- `dataloader_drop_last`: False
|
968 |
-
- `dataloader_num_workers`: 0
|
969 |
-
- `dataloader_prefetch_factor`: None
|
970 |
-
- `past_index`: -1
|
971 |
-
- `disable_tqdm`: False
|
972 |
-
- `remove_unused_columns`: True
|
973 |
-
- `label_names`: None
|
974 |
-
- `load_best_model_at_end`: False
|
975 |
-
- `ignore_data_skip`: False
|
976 |
-
- `fsdp`: []
|
977 |
-
- `fsdp_min_num_params`: 0
|
978 |
-
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
979 |
-
- `fsdp_transformer_layer_cls_to_wrap`: None
|
980 |
-
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
981 |
-
- `deepspeed`: None
|
982 |
-
- `label_smoothing_factor`: 0.0
|
983 |
-
- `optim`: adamw_torch
|
984 |
-
- `optim_args`: None
|
985 |
-
- `adafactor`: False
|
986 |
-
- `group_by_length`: False
|
987 |
-
- `length_column_name`: length
|
988 |
-
- `ddp_find_unused_parameters`: None
|
989 |
-
- `ddp_bucket_cap_mb`: None
|
990 |
-
- `ddp_broadcast_buffers`: False
|
991 |
-
- `dataloader_pin_memory`: True
|
992 |
-
- `dataloader_persistent_workers`: False
|
993 |
-
- `skip_memory_metrics`: True
|
994 |
-
- `use_legacy_prediction_loop`: False
|
995 |
-
- `push_to_hub`: False
|
996 |
-
- `resume_from_checkpoint`: None
|
997 |
-
- `hub_model_id`: None
|
998 |
-
- `hub_strategy`: every_save
|
999 |
-
- `hub_private_repo`: False
|
1000 |
-
- `hub_always_push`: False
|
1001 |
-
- `gradient_checkpointing`: False
|
1002 |
-
- `gradient_checkpointing_kwargs`: None
|
1003 |
-
- `include_inputs_for_metrics`: False
|
1004 |
-
- `eval_do_concat_batches`: True
|
1005 |
-
- `fp16_backend`: auto
|
1006 |
-
- `push_to_hub_model_id`: None
|
1007 |
-
- `push_to_hub_organization`: None
|
1008 |
-
- `mp_parameters`:
|
1009 |
-
- `auto_find_batch_size`: False
|
1010 |
-
- `full_determinism`: False
|
1011 |
-
- `torchdynamo`: None
|
1012 |
-
- `ray_scope`: last
|
1013 |
-
- `ddp_timeout`: 1800
|
1014 |
-
- `torch_compile`: False
|
1015 |
-
- `torch_compile_backend`: None
|
1016 |
-
- `torch_compile_mode`: None
|
1017 |
-
- `dispatch_batches`: None
|
1018 |
-
- `split_batches`: None
|
1019 |
-
- `include_tokens_per_second`: False
|
1020 |
-
- `include_num_input_tokens_seen`: False
|
1021 |
-
- `neftune_noise_alpha`: None
|
1022 |
-
- `optim_target_modules`: None
|
1023 |
-
- `batch_eval_metrics`: False
|
1024 |
-
- `eval_on_start`: False
|
1025 |
-
- `batch_sampler`: batch_sampler
|
1026 |
-
- `multi_dataset_batch_sampler`: proportional
|
1027 |
-
|
1028 |
-
</details>
|
1029 |
-
|
1030 |
-
### Training Logs
|
1031 |
-
| Epoch | Step | Training Loss | loss | sts-dev-1024_spearman_cosine | sts-dev-128_spearman_cosine | sts-dev-256_spearman_cosine | sts-dev-512_spearman_cosine | sts-dev-64_spearman_cosine | sts-dev-768_spearman_cosine | sts-test-1024_spearman_cosine | sts-test-128_spearman_cosine | sts-test-256_spearman_cosine | sts-test-512_spearman_cosine | sts-test-64_spearman_cosine | sts-test-768_spearman_cosine |
|
1032 |
-
|:------:|:----:|:-------------:|:------:|:----------------------------:|:---------------------------:|:---------------------------:|:---------------------------:|:--------------------------:|:---------------------------:|:-----------------------------:|:----------------------------:|:----------------------------:|:----------------------------:|:---------------------------:|:----------------------------:|
|
1033 |
-
| 0.0174 | 100 | 0.2958 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1034 |
-
| 0.0348 | 200 | 0.2914 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1035 |
-
| 0.0522 | 300 | 0.2691 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1036 |
-
| 0.0696 | 400 | 0.253 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1037 |
-
| 0.0870 | 500 | 0.2458 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1038 |
-
| 0.1044 | 600 | 0.2594 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1039 |
-
| 0.1218 | 700 | 0.2339 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1040 |
-
| 0.1392 | 800 | 0.2245 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1041 |
-
| 0.1565 | 900 | 0.2122 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1042 |
-
| 0.1739 | 1000 | 0.2369 | 0.2394 | 0.8402 | 0.8277 | 0.8352 | 0.8393 | 0.8164 | 0.8404 | - | - | - | - | - | - |
|
1043 |
-
| 0.1913 | 1100 | 0.2308 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1044 |
-
| 0.2087 | 1200 | 0.2292 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1045 |
-
| 0.2261 | 1300 | 0.2232 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1046 |
-
| 0.2435 | 1400 | 0.2001 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1047 |
-
| 0.2609 | 1500 | 0.2139 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1048 |
-
| 0.2783 | 1600 | 0.1906 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1049 |
-
| 0.2957 | 1700 | 0.1895 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1050 |
-
| 0.3131 | 1800 | 0.2011 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1051 |
-
| 0.3305 | 1900 | 0.1723 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1052 |
-
| 0.3479 | 2000 | 0.1886 | 0.2340 | 0.8448 | 0.8321 | 0.8385 | 0.8435 | 0.8233 | 0.8449 | - | - | - | - | - | - |
|
1053 |
-
| 0.3653 | 2100 | 0.1719 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1054 |
-
| 0.3827 | 2200 | 0.1879 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1055 |
-
| 0.4001 | 2300 | 0.187 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1056 |
-
| 0.4175 | 2400 | 0.1487 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1057 |
-
| 0.4349 | 2500 | 0.1752 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1058 |
-
| 0.4523 | 2600 | 0.1475 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1059 |
-
| 0.4696 | 2700 | 0.1695 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1060 |
-
| 0.4870 | 2800 | 0.1615 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1061 |
-
| 0.5044 | 2900 | 0.1558 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1062 |
-
| 0.5218 | 3000 | 0.1713 | 0.2357 | 0.8457 | 0.8344 | 0.8406 | 0.8447 | 0.8266 | 0.8461 | - | - | - | - | - | - |
|
1063 |
-
| 0.5392 | 3100 | 0.1556 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1064 |
-
| 0.5566 | 3200 | 0.1743 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1065 |
-
| 0.5740 | 3300 | 0.1426 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1066 |
-
| 0.5914 | 3400 | 0.1519 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1067 |
-
| 0.6088 | 3500 | 0.1763 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1068 |
-
| 0.6262 | 3600 | 0.1456 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1069 |
-
| 0.6436 | 3700 | 0.1649 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1070 |
-
| 0.6610 | 3800 | 0.1427 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1071 |
-
| 0.6784 | 3900 | 0.1284 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1072 |
-
| 0.6958 | 4000 | 0.1533 | 0.2344 | 0.8417 | 0.8291 | 0.8357 | 0.8402 | 0.8225 | 0.8421 | - | - | - | - | - | - |
|
1073 |
-
| 0.7132 | 4100 | 0.1397 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1074 |
-
| 0.7306 | 4200 | 0.1505 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1075 |
-
| 0.7480 | 4300 | 0.1355 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1076 |
-
| 0.7654 | 4400 | 0.1275 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1077 |
-
| 0.7827 | 4500 | 0.1599 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1078 |
-
| 0.8001 | 4600 | 0.1493 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1079 |
-
| 0.8175 | 4700 | 0.1497 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1080 |
-
| 0.8349 | 4800 | 0.1492 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1081 |
-
| 0.8523 | 4900 | 0.1378 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1082 |
-
| 0.8697 | 5000 | 0.1391 | 0.2362 | 0.8453 | 0.8336 | 0.8392 | 0.8438 | 0.8266 | 0.8454 | - | - | - | - | - | - |
|
1083 |
-
| 0.8871 | 5100 | 0.1622 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1084 |
-
| 0.9045 | 5200 | 0.1456 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1085 |
-
| 0.9219 | 5300 | 0.1367 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1086 |
-
| 0.9393 | 5400 | 0.1243 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1087 |
-
| 0.9567 | 5500 | 0.1389 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1088 |
-
| 0.9741 | 5600 | 0.1338 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1089 |
-
| 0.9915 | 5700 | 0.1146 | - | - | - | - | - | - | - | - | - | - | - | - | - |
|
1090 |
-
| 1.0 | 5749 | - | - | - | - | - | - | - | - | 0.8189 | 0.8093 | 0.8150 | 0.8163 | 0.8025 | 0.8183 |
|
1091 |
-
|
1092 |
-
|
1093 |
-
### Framework Versions
|
1094 |
-
- Python: 3.9.16
|
1095 |
-
- Sentence Transformers: 3.0.0
|
1096 |
-
- Transformers: 4.42.0.dev0
|
1097 |
-
- PyTorch: 2.2.2+cu118
|
1098 |
-
- Accelerate: 0.31.0
|
1099 |
-
- Datasets: 2.19.1
|
1100 |
-
- Tokenizers: 0.19.1
|
1101 |
|
1102 |
## Citation
|
1103 |
|
|
|
1 |
---
|
2 |
language:
|
3 |
- de
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
library_name: sentence-transformers
|
5 |
tags:
|
6 |
- sentence-transformers
|
7 |
- sentence-similarity
|
8 |
- feature-extraction
|
|
|
9 |
- loss:MatryoshkaLoss
|
10 |
+
base_model: aari1995/gbert-large-2
|
|
|
11 |
metrics:
|
12 |
- pearson_cosine
|
13 |
- spearman_cosine
|
|
|
46 |
- Die Frau prüft die Augen des Mannes.
|
47 |
- Ein Mann ist auf einem Dach
|
48 |
pipeline_tag: sentence-similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
---
|
50 |
|
51 |
+
# German Semantic V3
|
52 |
|
53 |
+
Finally, a new version! The successor of German_Semantic_STS_V2 is here and comes with loads of cool new features!
|
|
|
54 |
|
55 |
+
## Major updates and USPs:
|
56 |
|
57 |
+
- **Flexibility:** Trained with flexible sequence-length and embedding truncation, flexibility is a core feature of the model. Yet, smaller dimensions bring a minor trade-off in quality.
|
58 |
+
- **Sequence length:** 8192, (16 times more than V2 and other models) -> thanks to the ALiBi implementation of Jina-Team!
|
59 |
+
- **Matryoshka Embeddings:** The model is trained for embedding sizes from 1024 down to 64, allowing you to store much smaller embeddings with little quality loss.
|
60 |
+
- **German only:** This model is German-only, it has rich cultural knowledge about Germany and German topics. Therefore, also the model to learn more efficient thanks to its tokenizer, deal better with shorter queries and generally be more nuanced in many scenarios.
|
61 |
+
- **Updated knowledge and quality data:** The backbone of this model is gbert-large by deepset. With Stage-2 pretraining on 1 Billion tokens of German fineweb by occiglot, up-to-date knowledge is ensured.
|
62 |
+
- **Typo and Casing**: This model was trained to be robust against minor typos and casing, leading to slightly weaker benchmark performance and learning during training, but higher robustness of the embeddings.
|
63 |
+
- **Pooling Function:** Moving away from mean pooling towards using the CLS token. Generally seems to learn better after the stage-2 pretraining and allows for more flexibility.
|
64 |
+
- **License:** Apache 2.0
|
|
|
|
|
65 |
|
66 |
+
## Usage:
|
67 |
|
68 |
+
```python
|
69 |
+
from sentence_transformers import SentenceTransformer
|
70 |
+
|
71 |
+
|
72 |
+
matryoshka_dim = 1024 # How big your embeddings should be, choose from: 64, 128, 256, 512, 768, 1024
|
73 |
+
model = SentenceTransformer("aari1995/German_Semantic_V3", trust_remote_code=True, truncate_dim=matryoshka_dim)
|
74 |
+
|
75 |
+
# model.truncate_dim = 64 # truncation dimensions can also be changed after loading
|
76 |
+
# model.max_seq_length = 512 #optionally, set your maximum sequence length lower if your hardware is limited
|
77 |
+
|
78 |
+
# Run inference
|
79 |
+
sentences = [
|
80 |
+
'Eine Flagge weht.',
|
81 |
+
'Die Flagge bewegte sich in der Luft.',
|
82 |
+
'Zwei Personen beobachten das Wasser.',
|
83 |
+
]
|
84 |
+
embeddings = model.encode(sentences)
|
85 |
+
|
86 |
+
# Get the similarity scores for the embeddings
|
87 |
+
similarities = model.similarity(embeddings, embeddings)
|
88 |
+
|
89 |
+
|
90 |
+
```
|
91 |
|
92 |
### Full Model Architecture
|
93 |
|
|
|
156 |
|
157 |
## Evaluation
|
158 |
|
159 |
+
Evaluation to come.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
## Citation
|
162 |
|