Training in progress, step 4390, checkpoint
Browse files
last-checkpoint/2_Dense/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3149984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98502da6c4dbee1502fa8ebc31ff356b5762eb792a899d4e5339d3cd3a7c0ae4
|
3 |
size 3149984
|
last-checkpoint/README.md
CHANGED
@@ -830,6 +830,20 @@ You can finetune this model on your own dataset.
|
|
830 |
| 9.6146 | 4230 | 0.061 | - | - |
|
831 |
| 9.6374 | 4240 | 0.0616 | - | - |
|
832 |
| 9.6601 | 4250 | 0.0613 | 0.0418 | 0.8282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
833 |
|
834 |
</details>
|
835 |
|
|
|
830 |
| 9.6146 | 4230 | 0.061 | - | - |
|
831 |
| 9.6374 | 4240 | 0.0616 | - | - |
|
832 |
| 9.6601 | 4250 | 0.0613 | 0.0418 | 0.8282 |
|
833 |
+
| 9.6829 | 4260 | 0.0623 | - | - |
|
834 |
+
| 9.7056 | 4270 | 0.0605 | - | - |
|
835 |
+
| 9.7284 | 4280 | 0.0637 | - | - |
|
836 |
+
| 9.7512 | 4290 | 0.0604 | - | - |
|
837 |
+
| 9.7739 | 4300 | 0.0606 | - | - |
|
838 |
+
| 9.7967 | 4310 | 0.0622 | - | - |
|
839 |
+
| 9.8195 | 4320 | 0.0598 | - | - |
|
840 |
+
| 9.8422 | 4330 | 0.0611 | - | - |
|
841 |
+
| 9.8650 | 4340 | 0.0604 | - | - |
|
842 |
+
| 9.8878 | 4350 | 0.0598 | - | - |
|
843 |
+
| 9.9105 | 4360 | 0.0626 | - | - |
|
844 |
+
| 9.9333 | 4370 | 0.0624 | - | - |
|
845 |
+
| 9.9560 | 4380 | 0.0617 | - | - |
|
846 |
+
| 9.9788 | 4390 | 0.0603 | - | - |
|
847 |
|
848 |
</details>
|
849 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 735216376
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4533a4e396a4cedfa433a333dfd7f93b95b00042cf7dc09f5854fa0650746841
|
3 |
size 735216376
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1476823354
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03f40734fc193c019c97cbc28b1ae04414cdc745d19240313f29ceea320dd5c0
|
3 |
size 1476823354
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbcc6d0c1acd705e5ef3f7c1bdfb510617f9ad5f9bd9d641c051fe36f40e8b31
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 9.
|
5 |
"eval_steps": 250,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3288,6 +3288,104 @@
|
|
3288 |
"eval_sts_dev_spearman_manhattan": 0.7996541111809876,
|
3289 |
"eval_sts_dev_spearman_max": 0.8282368218808581,
|
3290 |
"step": 4250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3291 |
}
|
3292 |
],
|
3293 |
"logging_steps": 10,
|
@@ -3302,7 +3400,7 @@
|
|
3302 |
"should_evaluate": false,
|
3303 |
"should_log": false,
|
3304 |
"should_save": true,
|
3305 |
-
"should_training_stop":
|
3306 |
},
|
3307 |
"attributes": {}
|
3308 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.97880210556267,
|
5 |
"eval_steps": 250,
|
6 |
+
"global_step": 4390,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3288 |
"eval_sts_dev_spearman_manhattan": 0.7996541111809876,
|
3289 |
"eval_sts_dev_spearman_max": 0.8282368218808581,
|
3290 |
"step": 4250
|
3291 |
+
},
|
3292 |
+
{
|
3293 |
+
"epoch": 9.682885189927443,
|
3294 |
+
"grad_norm": 0.2565889060497284,
|
3295 |
+
"learning_rate": 3.0299797290088556e-06,
|
3296 |
+
"loss": 0.0623,
|
3297 |
+
"step": 4260
|
3298 |
+
},
|
3299 |
+
{
|
3300 |
+
"epoch": 9.705648029591691,
|
3301 |
+
"grad_norm": 0.2263515293598175,
|
3302 |
+
"learning_rate": 3.037092357480707e-06,
|
3303 |
+
"loss": 0.0605,
|
3304 |
+
"step": 4270
|
3305 |
+
},
|
3306 |
+
{
|
3307 |
+
"epoch": 9.72841086925594,
|
3308 |
+
"grad_norm": 0.21705535054206848,
|
3309 |
+
"learning_rate": 3.044204985952559e-06,
|
3310 |
+
"loss": 0.0637,
|
3311 |
+
"step": 4280
|
3312 |
+
},
|
3313 |
+
{
|
3314 |
+
"epoch": 9.751173708920188,
|
3315 |
+
"grad_norm": 0.21649038791656494,
|
3316 |
+
"learning_rate": 3.0513176144244106e-06,
|
3317 |
+
"loss": 0.0604,
|
3318 |
+
"step": 4290
|
3319 |
+
},
|
3320 |
+
{
|
3321 |
+
"epoch": 9.773936548584436,
|
3322 |
+
"grad_norm": 0.22717022895812988,
|
3323 |
+
"learning_rate": 3.0584302428962625e-06,
|
3324 |
+
"loss": 0.0606,
|
3325 |
+
"step": 4300
|
3326 |
+
},
|
3327 |
+
{
|
3328 |
+
"epoch": 9.796699388248683,
|
3329 |
+
"grad_norm": 0.23610946536064148,
|
3330 |
+
"learning_rate": 3.0655428713681145e-06,
|
3331 |
+
"loss": 0.0622,
|
3332 |
+
"step": 4310
|
3333 |
+
},
|
3334 |
+
{
|
3335 |
+
"epoch": 9.819462227912933,
|
3336 |
+
"grad_norm": 0.2080880105495453,
|
3337 |
+
"learning_rate": 3.072655499839966e-06,
|
3338 |
+
"loss": 0.0598,
|
3339 |
+
"step": 4320
|
3340 |
+
},
|
3341 |
+
{
|
3342 |
+
"epoch": 9.84222506757718,
|
3343 |
+
"grad_norm": 0.2862449884414673,
|
3344 |
+
"learning_rate": 3.079768128311818e-06,
|
3345 |
+
"loss": 0.0611,
|
3346 |
+
"step": 4330
|
3347 |
+
},
|
3348 |
+
{
|
3349 |
+
"epoch": 9.864987907241428,
|
3350 |
+
"grad_norm": 0.2211073935031891,
|
3351 |
+
"learning_rate": 3.0868807567836695e-06,
|
3352 |
+
"loss": 0.0604,
|
3353 |
+
"step": 4340
|
3354 |
+
},
|
3355 |
+
{
|
3356 |
+
"epoch": 9.887750746905677,
|
3357 |
+
"grad_norm": 0.2399899959564209,
|
3358 |
+
"learning_rate": 3.0939933852555214e-06,
|
3359 |
+
"loss": 0.0598,
|
3360 |
+
"step": 4350
|
3361 |
+
},
|
3362 |
+
{
|
3363 |
+
"epoch": 9.910513586569925,
|
3364 |
+
"grad_norm": 0.2330579161643982,
|
3365 |
+
"learning_rate": 3.101106013727373e-06,
|
3366 |
+
"loss": 0.0626,
|
3367 |
+
"step": 4360
|
3368 |
+
},
|
3369 |
+
{
|
3370 |
+
"epoch": 9.933276426234173,
|
3371 |
+
"grad_norm": 0.23163940012454987,
|
3372 |
+
"learning_rate": 3.108218642199225e-06,
|
3373 |
+
"loss": 0.0624,
|
3374 |
+
"step": 4370
|
3375 |
+
},
|
3376 |
+
{
|
3377 |
+
"epoch": 9.95603926589842,
|
3378 |
+
"grad_norm": 0.2087012380361557,
|
3379 |
+
"learning_rate": 3.115331270671077e-06,
|
3380 |
+
"loss": 0.0617,
|
3381 |
+
"step": 4380
|
3382 |
+
},
|
3383 |
+
{
|
3384 |
+
"epoch": 9.97880210556267,
|
3385 |
+
"grad_norm": 0.24286577105522156,
|
3386 |
+
"learning_rate": 3.1224438991429284e-06,
|
3387 |
+
"loss": 0.0603,
|
3388 |
+
"step": 4390
|
3389 |
}
|
3390 |
],
|
3391 |
"logging_steps": 10,
|
|
|
3400 |
"should_evaluate": false,
|
3401 |
"should_log": false,
|
3402 |
"should_save": true,
|
3403 |
+
"should_training_stop": true
|
3404 |
},
|
3405 |
"attributes": {}
|
3406 |
}
|