CocoRoF commited on
Commit
3e54693
·
verified ·
1 Parent(s): 145f4ef

Training in progress, step 4390, checkpoint

Browse files
last-checkpoint/2_Dense/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a59bfc4cd3767747c580ac670f0d6c48bfe9e402250467b22e693fdfc61b625d
3
  size 3149984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98502da6c4dbee1502fa8ebc31ff356b5762eb792a899d4e5339d3cd3a7c0ae4
3
  size 3149984
last-checkpoint/README.md CHANGED
@@ -830,6 +830,20 @@ You can finetune this model on your own dataset.
830
  | 9.6146 | 4230 | 0.061 | - | - |
831
  | 9.6374 | 4240 | 0.0616 | - | - |
832
  | 9.6601 | 4250 | 0.0613 | 0.0418 | 0.8282 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
833
 
834
  </details>
835
 
 
830
  | 9.6146 | 4230 | 0.061 | - | - |
831
  | 9.6374 | 4240 | 0.0616 | - | - |
832
  | 9.6601 | 4250 | 0.0613 | 0.0418 | 0.8282 |
833
+ | 9.6829 | 4260 | 0.0623 | - | - |
834
+ | 9.7056 | 4270 | 0.0605 | - | - |
835
+ | 9.7284 | 4280 | 0.0637 | - | - |
836
+ | 9.7512 | 4290 | 0.0604 | - | - |
837
+ | 9.7739 | 4300 | 0.0606 | - | - |
838
+ | 9.7967 | 4310 | 0.0622 | - | - |
839
+ | 9.8195 | 4320 | 0.0598 | - | - |
840
+ | 9.8422 | 4330 | 0.0611 | - | - |
841
+ | 9.8650 | 4340 | 0.0604 | - | - |
842
+ | 9.8878 | 4350 | 0.0598 | - | - |
843
+ | 9.9105 | 4360 | 0.0626 | - | - |
844
+ | 9.9333 | 4370 | 0.0624 | - | - |
845
+ | 9.9560 | 4380 | 0.0617 | - | - |
846
+ | 9.9788 | 4390 | 0.0603 | - | - |
847
 
848
  </details>
849
 
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66daefb719ad12215c08363cf07f604053315b28142583dcc866c834327eca3f
3
  size 735216376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4533a4e396a4cedfa433a333dfd7f93b95b00042cf7dc09f5854fa0650746841
3
  size 735216376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:796b01c86922133da7b4702097cf156006e03e00f92d857ba3d2713e738810f2
3
  size 1476823354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03f40734fc193c019c97cbc28b1ae04414cdc745d19240313f29ceea320dd5c0
3
  size 1476823354
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:451fe1a5f62f2f6eed0b67a70a5f8f0f813e8a38e58c106c948a6c2c9e79f8ef
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbcc6d0c1acd705e5ef3f7c1bdfb510617f9ad5f9bd9d641c051fe36f40e8b31
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.660122350263196,
5
  "eval_steps": 250,
6
- "global_step": 4250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3288,6 +3288,104 @@
3288
  "eval_sts_dev_spearman_manhattan": 0.7996541111809876,
3289
  "eval_sts_dev_spearman_max": 0.8282368218808581,
3290
  "step": 4250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3291
  }
3292
  ],
3293
  "logging_steps": 10,
@@ -3302,7 +3400,7 @@
3302
  "should_evaluate": false,
3303
  "should_log": false,
3304
  "should_save": true,
3305
- "should_training_stop": false
3306
  },
3307
  "attributes": {}
3308
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.97880210556267,
5
  "eval_steps": 250,
6
+ "global_step": 4390,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3288
  "eval_sts_dev_spearman_manhattan": 0.7996541111809876,
3289
  "eval_sts_dev_spearman_max": 0.8282368218808581,
3290
  "step": 4250
3291
+ },
3292
+ {
3293
+ "epoch": 9.682885189927443,
3294
+ "grad_norm": 0.2565889060497284,
3295
+ "learning_rate": 3.0299797290088556e-06,
3296
+ "loss": 0.0623,
3297
+ "step": 4260
3298
+ },
3299
+ {
3300
+ "epoch": 9.705648029591691,
3301
+ "grad_norm": 0.2263515293598175,
3302
+ "learning_rate": 3.037092357480707e-06,
3303
+ "loss": 0.0605,
3304
+ "step": 4270
3305
+ },
3306
+ {
3307
+ "epoch": 9.72841086925594,
3308
+ "grad_norm": 0.21705535054206848,
3309
+ "learning_rate": 3.044204985952559e-06,
3310
+ "loss": 0.0637,
3311
+ "step": 4280
3312
+ },
3313
+ {
3314
+ "epoch": 9.751173708920188,
3315
+ "grad_norm": 0.21649038791656494,
3316
+ "learning_rate": 3.0513176144244106e-06,
3317
+ "loss": 0.0604,
3318
+ "step": 4290
3319
+ },
3320
+ {
3321
+ "epoch": 9.773936548584436,
3322
+ "grad_norm": 0.22717022895812988,
3323
+ "learning_rate": 3.0584302428962625e-06,
3324
+ "loss": 0.0606,
3325
+ "step": 4300
3326
+ },
3327
+ {
3328
+ "epoch": 9.796699388248683,
3329
+ "grad_norm": 0.23610946536064148,
3330
+ "learning_rate": 3.0655428713681145e-06,
3331
+ "loss": 0.0622,
3332
+ "step": 4310
3333
+ },
3334
+ {
3335
+ "epoch": 9.819462227912933,
3336
+ "grad_norm": 0.2080880105495453,
3337
+ "learning_rate": 3.072655499839966e-06,
3338
+ "loss": 0.0598,
3339
+ "step": 4320
3340
+ },
3341
+ {
3342
+ "epoch": 9.84222506757718,
3343
+ "grad_norm": 0.2862449884414673,
3344
+ "learning_rate": 3.079768128311818e-06,
3345
+ "loss": 0.0611,
3346
+ "step": 4330
3347
+ },
3348
+ {
3349
+ "epoch": 9.864987907241428,
3350
+ "grad_norm": 0.2211073935031891,
3351
+ "learning_rate": 3.0868807567836695e-06,
3352
+ "loss": 0.0604,
3353
+ "step": 4340
3354
+ },
3355
+ {
3356
+ "epoch": 9.887750746905677,
3357
+ "grad_norm": 0.2399899959564209,
3358
+ "learning_rate": 3.0939933852555214e-06,
3359
+ "loss": 0.0598,
3360
+ "step": 4350
3361
+ },
3362
+ {
3363
+ "epoch": 9.910513586569925,
3364
+ "grad_norm": 0.2330579161643982,
3365
+ "learning_rate": 3.101106013727373e-06,
3366
+ "loss": 0.0626,
3367
+ "step": 4360
3368
+ },
3369
+ {
3370
+ "epoch": 9.933276426234173,
3371
+ "grad_norm": 0.23163940012454987,
3372
+ "learning_rate": 3.108218642199225e-06,
3373
+ "loss": 0.0624,
3374
+ "step": 4370
3375
+ },
3376
+ {
3377
+ "epoch": 9.95603926589842,
3378
+ "grad_norm": 0.2087012380361557,
3379
+ "learning_rate": 3.115331270671077e-06,
3380
+ "loss": 0.0617,
3381
+ "step": 4380
3382
+ },
3383
+ {
3384
+ "epoch": 9.97880210556267,
3385
+ "grad_norm": 0.24286577105522156,
3386
+ "learning_rate": 3.1224438991429284e-06,
3387
+ "loss": 0.0603,
3388
+ "step": 4390
3389
  }
3390
  ],
3391
  "logging_steps": 10,
 
3400
  "should_evaluate": false,
3401
  "should_log": false,
3402
  "should_save": true,
3403
+ "should_training_stop": true
3404
  },
3405
  "attributes": {}
3406
  }