leixa commited on
Commit
0d20314
·
verified ·
1 Parent(s): 2bef165

Training in progress, step 264, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77e5602370f71396a4a9d536fa31db82d52fdb27311cc5775a584a26abcfbe95
3
  size 201892112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc55da3a89171bf252e042ada42565e064487a2157541887e69eac3d97d442b2
3
  size 201892112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f37bf22f63a7f59903a5094dea3f1ab6d7e03349c78dd0e1086bd5b31c9c1331
3
- size 102864548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da2d2119e83f390d39071b1ec721709219117ccbb0dd644e82b4474f317dcb14
3
+ size 102864868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:418e0ff9289d2a7afcb614530a676e000b998e80e40980bc423384419c5d5263
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1bd3ac95599b40f1305be1f0a64cd545e152ad7b38b9a60045306725bab973
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d40b4021f74e2e4610fc648a5c9341998feef6ab8f05cc4e06b8e0f1f8685d5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93ded0d5cb9245a05d18914685d015def341481d1dc23b96d02a9408efc885cf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5,
5
  "eval_steps": 24,
6
- "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -655,6 +655,70 @@
655
  "eval_samples_per_second": 48.466,
656
  "eval_steps_per_second": 6.283,
657
  "step": 240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
  }
659
  ],
660
  "logging_steps": 3,
@@ -674,7 +738,7 @@
674
  "attributes": {}
675
  }
676
  },
677
- "total_flos": 5.420861980095283e+16,
678
  "train_batch_size": 8,
679
  "trial_name": null,
680
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.75,
5
  "eval_steps": 24,
6
+ "global_step": 264,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
655
  "eval_samples_per_second": 48.466,
656
  "eval_steps_per_second": 6.283,
657
  "step": 240
658
+ },
659
+ {
660
+ "epoch": 2.53125,
661
+ "grad_norm": 0.46080684661865234,
662
+ "learning_rate": 3.1634847988458615e-06,
663
+ "loss": 1.3173,
664
+ "step": 243
665
+ },
666
+ {
667
+ "epoch": 2.5625,
668
+ "grad_norm": 0.46182775497436523,
669
+ "learning_rate": 2.7634428858242995e-06,
670
+ "loss": 1.3378,
671
+ "step": 246
672
+ },
673
+ {
674
+ "epoch": 2.59375,
675
+ "grad_norm": 0.48507875204086304,
676
+ "learning_rate": 2.388956133279266e-06,
677
+ "loss": 1.339,
678
+ "step": 249
679
+ },
680
+ {
681
+ "epoch": 2.625,
682
+ "grad_norm": 0.5142520666122437,
683
+ "learning_rate": 2.040454916695972e-06,
684
+ "loss": 1.3046,
685
+ "step": 252
686
+ },
687
+ {
688
+ "epoch": 2.65625,
689
+ "grad_norm": 0.5162761807441711,
690
+ "learning_rate": 1.7183397479194174e-06,
691
+ "loss": 1.3473,
692
+ "step": 255
693
+ },
694
+ {
695
+ "epoch": 2.6875,
696
+ "grad_norm": 0.5032109022140503,
697
+ "learning_rate": 1.4229808148697733e-06,
698
+ "loss": 1.3258,
699
+ "step": 258
700
+ },
701
+ {
702
+ "epoch": 2.71875,
703
+ "grad_norm": 0.5153303146362305,
704
+ "learning_rate": 1.1547175561073154e-06,
705
+ "loss": 1.2873,
706
+ "step": 261
707
+ },
708
+ {
709
+ "epoch": 2.75,
710
+ "grad_norm": 0.5076745748519897,
711
+ "learning_rate": 9.13858270735743e-07,
712
+ "loss": 1.3198,
713
+ "step": 264
714
+ },
715
+ {
716
+ "epoch": 2.75,
717
+ "eval_loss": 1.3732330799102783,
718
+ "eval_runtime": 3.334,
719
+ "eval_samples_per_second": 48.591,
720
+ "eval_steps_per_second": 6.299,
721
+ "step": 264
722
  }
723
  ],
724
  "logging_steps": 3,
 
738
  "attributes": {}
739
  }
740
  },
741
+ "total_flos": 5.959481812844544e+16,
742
  "train_batch_size": 8,
743
  "trial_name": null,
744
  "trial_params": null