ngwgsang commited on
Commit
fd15173
·
verified ·
1 Parent(s): f6d56f0

Training in progress, epoch 7, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:749271874a66a0bb44175c9c4e534fa913565b840fde7339f597ed30a42e479a
3
  size 442668636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda5447e852aec1a88c6e1e170b80ba6fe9014a12dbe01e7be648c38a4de3614
3
  size 442668636
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5d3381cb7fd88fdbe7c1cd9bfd41f3e3c8cc97d2b4a55d635f79a83987983bb
3
  size 885457146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11b2622b740fa4e8b9bde46ad5633bd5f457d760c14266e230077a93270f722d
3
  size 885457146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:473ecb09e5f106de8046a76cc9b1107489610a4ca8d22c8acd37629ea6ee333c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff6cc6d0cbab2c0d1b846e878224dd13dea331cdbd300a43f50d003878abb4b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4733e7616ec8d677f1ad74de3be0e8ae5883d8beda594b0ecd844ef01ca5ed9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbbf9fa8c1d04bf161b93e37605349a2dc1920d1f886b15f60d590d5c06678fa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 5.4548899332682295,
3
  "best_model_checkpoint": "./results/checkpoint-5496",
4
- "epoch": 6.0,
5
  "eval_steps": 500,
6
- "global_step": 5496,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -457,6 +457,88 @@
457
  "eval_samples_per_second": 271.145,
458
  "eval_steps_per_second": 8.474,
459
  "step": 5496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  }
461
  ],
462
  "logging_steps": 100,
@@ -476,7 +558,7 @@
476
  "attributes": {}
477
  }
478
  },
479
- "total_flos": 1.1566597400031744e+16,
480
  "train_batch_size": 32,
481
  "trial_name": null,
482
  "trial_params": null
 
1
  {
2
  "best_metric": 5.4548899332682295,
3
  "best_model_checkpoint": "./results/checkpoint-5496",
4
+ "epoch": 7.0,
5
  "eval_steps": 500,
6
+ "global_step": 6412,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
457
  "eval_samples_per_second": 271.145,
458
  "eval_steps_per_second": 8.474,
459
  "step": 5496
460
+ },
461
+ {
462
+ "epoch": 6.004366812227074,
463
+ "grad_norm": 27.099462509155273,
464
+ "learning_rate": 2.4945414847161576e-05,
465
+ "loss": 5.0121,
466
+ "step": 5500
467
+ },
468
+ {
469
+ "epoch": 6.11353711790393,
470
+ "grad_norm": 26.199542999267578,
471
+ "learning_rate": 2.3580786026200875e-05,
472
+ "loss": 4.6621,
473
+ "step": 5600
474
+ },
475
+ {
476
+ "epoch": 6.222707423580786,
477
+ "grad_norm": 31.04909324645996,
478
+ "learning_rate": 2.2216157205240178e-05,
479
+ "loss": 4.6792,
480
+ "step": 5700
481
+ },
482
+ {
483
+ "epoch": 6.331877729257642,
484
+ "grad_norm": 26.449748992919922,
485
+ "learning_rate": 2.0851528384279477e-05,
486
+ "loss": 4.6471,
487
+ "step": 5800
488
+ },
489
+ {
490
+ "epoch": 6.441048034934497,
491
+ "grad_norm": 27.806798934936523,
492
+ "learning_rate": 1.948689956331878e-05,
493
+ "loss": 4.5161,
494
+ "step": 5900
495
+ },
496
+ {
497
+ "epoch": 6.550218340611353,
498
+ "grad_norm": 39.113399505615234,
499
+ "learning_rate": 1.812227074235808e-05,
500
+ "loss": 4.5787,
501
+ "step": 6000
502
+ },
503
+ {
504
+ "epoch": 6.6593886462882095,
505
+ "grad_norm": 30.493192672729492,
506
+ "learning_rate": 1.675764192139738e-05,
507
+ "loss": 4.6041,
508
+ "step": 6100
509
+ },
510
+ {
511
+ "epoch": 6.7685589519650655,
512
+ "grad_norm": 35.86678695678711,
513
+ "learning_rate": 1.5393013100436683e-05,
514
+ "loss": 4.6345,
515
+ "step": 6200
516
+ },
517
+ {
518
+ "epoch": 6.877729257641922,
519
+ "grad_norm": 44.66313934326172,
520
+ "learning_rate": 1.4028384279475984e-05,
521
+ "loss": 4.6063,
522
+ "step": 6300
523
+ },
524
+ {
525
+ "epoch": 6.986899563318778,
526
+ "grad_norm": 34.84800720214844,
527
+ "learning_rate": 1.2663755458515283e-05,
528
+ "loss": 4.6152,
529
+ "step": 6400
530
+ },
531
+ {
532
+ "epoch": 7.0,
533
+ "eval_avg_mae": 5.481756210327148,
534
+ "eval_loss": 5.481756210327148,
535
+ "eval_mae_lex": 4.816911220550537,
536
+ "eval_mae_sem": 3.748530626296997,
537
+ "eval_mae_syn": 7.87982702255249,
538
+ "eval_runtime": 27.0121,
539
+ "eval_samples_per_second": 271.248,
540
+ "eval_steps_per_second": 8.478,
541
+ "step": 6412
542
  }
543
  ],
544
  "logging_steps": 100,
 
558
  "attributes": {}
559
  }
560
  },
561
+ "total_flos": 1.3494363633370368e+16,
562
  "train_batch_size": 32,
563
  "trial_name": null,
564
  "trial_params": null