plip commited on
Commit
1f2aae0
1 Parent(s): e9dfc8a

Training in progress, step 240000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9d08b16b8f0a6abe7e4ff06562f781b437ddd6c1464c0f95d747d51260c40e3
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dd6a017b241710845a02e0580947768eef1dfbc1f0827d4a1cd087465f14ae0
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf594ad85da8b14fc79f1305a9d38a155ee31dc640b1cde587a09e1650e1b112
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08123eb498e0376035cfc13e99904da381c8fd27d56ad427569e3314a31c3e52
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:183346140573b10db5b967d424c726559ad36fe06d5946002c429fd0425a547c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525fecdf426cb91f007f65ad628cb2afdc5c951475490489af137b7537b71a0e
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:183346140573b10db5b967d424c726559ad36fe06d5946002c429fd0425a547c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525fecdf426cb91f007f65ad628cb2afdc5c951475490489af137b7537b71a0e
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:183346140573b10db5b967d424c726559ad36fe06d5946002c429fd0425a547c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525fecdf426cb91f007f65ad628cb2afdc5c951475490489af137b7537b71a0e
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:183346140573b10db5b967d424c726559ad36fe06d5946002c429fd0425a547c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525fecdf426cb91f007f65ad628cb2afdc5c951475490489af137b7537b71a0e
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:183346140573b10db5b967d424c726559ad36fe06d5946002c429fd0425a547c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525fecdf426cb91f007f65ad628cb2afdc5c951475490489af137b7537b71a0e
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:183346140573b10db5b967d424c726559ad36fe06d5946002c429fd0425a547c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525fecdf426cb91f007f65ad628cb2afdc5c951475490489af137b7537b71a0e
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:183346140573b10db5b967d424c726559ad36fe06d5946002c429fd0425a547c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525fecdf426cb91f007f65ad628cb2afdc5c951475490489af137b7537b71a0e
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:183346140573b10db5b967d424c726559ad36fe06d5946002c429fd0425a547c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525fecdf426cb91f007f65ad628cb2afdc5c951475490489af137b7537b71a0e
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a2bb37359d05bc7917d91ab1261ba8c4d8f00648cd8cb2d11c677b6c91ddb27
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a310455c0ab00dbcb4f4c26c03e0304d748fbc8c3e547006fb15346d32ac70da
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.861664712778429,
5
- "global_step": 230000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4606,11 +4606,211 @@
4606
  "eval_samples_per_second": 764.442,
4607
  "eval_steps_per_second": 12.231,
4608
  "step": 230000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4609
  }
4610
  ],
4611
  "max_steps": 500000,
4612
  "num_train_epochs": 13,
4613
- "total_flos": 7.348136797248275e+21,
4614
  "trial_name": null,
4615
  "trial_params": null
4616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.116519700290534,
5
+ "global_step": 240000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4606
  "eval_samples_per_second": 764.442,
4607
  "eval_steps_per_second": 12.231,
4608
  "step": 230000
4609
+ },
4610
+ {
4611
+ "epoch": 5.87,
4612
+ "learning_rate": 0.00018545980690870903,
4613
+ "loss": 0.2899,
4614
+ "step": 230500
4615
+ },
4616
+ {
4617
+ "epoch": 5.89,
4618
+ "learning_rate": 0.00018499083425600648,
4619
+ "loss": 0.2898,
4620
+ "step": 231000
4621
+ },
4622
+ {
4623
+ "epoch": 5.89,
4624
+ "eval_loss": 0.8181861639022827,
4625
+ "eval_runtime": 1.2638,
4626
+ "eval_samples_per_second": 791.236,
4627
+ "eval_steps_per_second": 12.66,
4628
+ "step": 231000
4629
+ },
4630
+ {
4631
+ "epoch": 5.9,
4632
+ "learning_rate": 0.00018452153362834552,
4633
+ "loss": 0.2896,
4634
+ "step": 231500
4635
+ },
4636
+ {
4637
+ "epoch": 5.91,
4638
+ "learning_rate": 0.00018405191015792254,
4639
+ "loss": 0.29,
4640
+ "step": 232000
4641
+ },
4642
+ {
4643
+ "epoch": 5.91,
4644
+ "eval_loss": 0.8149720430374146,
4645
+ "eval_runtime": 1.2755,
4646
+ "eval_samples_per_second": 783.985,
4647
+ "eval_steps_per_second": 12.544,
4648
+ "step": 232000
4649
+ },
4650
+ {
4651
+ "epoch": 5.93,
4652
+ "learning_rate": 0.0001835819689804646,
4653
+ "loss": 0.2892,
4654
+ "step": 232500
4655
+ },
4656
+ {
4657
+ "epoch": 5.94,
4658
+ "learning_rate": 0.0001831117152351732,
4659
+ "loss": 0.2905,
4660
+ "step": 233000
4661
+ },
4662
+ {
4663
+ "epoch": 5.94,
4664
+ "eval_loss": 0.8126420378684998,
4665
+ "eval_runtime": 1.2766,
4666
+ "eval_samples_per_second": 783.351,
4667
+ "eval_steps_per_second": 12.534,
4668
+ "step": 233000
4669
+ },
4670
+ {
4671
+ "epoch": 5.95,
4672
+ "learning_rate": 0.00018264115406466778,
4673
+ "loss": 0.2893,
4674
+ "step": 233500
4675
+ },
4676
+ {
4677
+ "epoch": 5.96,
4678
+ "learning_rate": 0.00018217029061493007,
4679
+ "loss": 0.2894,
4680
+ "step": 234000
4681
+ },
4682
+ {
4683
+ "epoch": 5.96,
4684
+ "eval_loss": 0.82079017162323,
4685
+ "eval_runtime": 1.3447,
4686
+ "eval_samples_per_second": 743.651,
4687
+ "eval_steps_per_second": 11.898,
4688
+ "step": 234000
4689
+ },
4690
+ {
4691
+ "epoch": 5.98,
4692
+ "learning_rate": 0.00018169913003524717,
4693
+ "loss": 0.2893,
4694
+ "step": 234500
4695
+ },
4696
+ {
4697
+ "epoch": 5.99,
4698
+ "learning_rate": 0.00018122767747815594,
4699
+ "loss": 0.2894,
4700
+ "step": 235000
4701
+ },
4702
+ {
4703
+ "epoch": 5.99,
4704
+ "eval_loss": 0.8262202739715576,
4705
+ "eval_runtime": 1.2956,
4706
+ "eval_samples_per_second": 771.835,
4707
+ "eval_steps_per_second": 12.349,
4708
+ "step": 235000
4709
+ },
4710
+ {
4711
+ "epoch": 6.0,
4712
+ "learning_rate": 0.00018075593809938574,
4713
+ "loss": 0.2898,
4714
+ "step": 235500
4715
+ },
4716
+ {
4717
+ "epoch": 6.01,
4718
+ "learning_rate": 0.00018028391705780295,
4719
+ "loss": 0.2899,
4720
+ "step": 236000
4721
+ },
4722
+ {
4723
+ "epoch": 6.01,
4724
+ "eval_loss": 0.8133281469345093,
4725
+ "eval_runtime": 1.2572,
4726
+ "eval_samples_per_second": 795.402,
4727
+ "eval_steps_per_second": 12.726,
4728
+ "step": 236000
4729
+ },
4730
+ {
4731
+ "epoch": 6.03,
4732
+ "learning_rate": 0.0001798116195153541,
4733
+ "loss": 0.2895,
4734
+ "step": 236500
4735
+ },
4736
+ {
4737
+ "epoch": 6.04,
4738
+ "learning_rate": 0.0001793390506370094,
4739
+ "loss": 0.2891,
4740
+ "step": 237000
4741
+ },
4742
+ {
4743
+ "epoch": 6.04,
4744
+ "eval_loss": 0.8038508296012878,
4745
+ "eval_runtime": 1.2713,
4746
+ "eval_samples_per_second": 786.62,
4747
+ "eval_steps_per_second": 12.586,
4748
+ "step": 237000
4749
+ },
4750
+ {
4751
+ "epoch": 6.05,
4752
+ "learning_rate": 0.00017886621559070638,
4753
+ "loss": 0.289,
4754
+ "step": 237500
4755
+ },
4756
+ {
4757
+ "epoch": 6.07,
4758
+ "learning_rate": 0.00017839311954729337,
4759
+ "loss": 0.2887,
4760
+ "step": 238000
4761
+ },
4762
+ {
4763
+ "epoch": 6.07,
4764
+ "eval_loss": 0.8181630969047546,
4765
+ "eval_runtime": 1.3231,
4766
+ "eval_samples_per_second": 755.824,
4767
+ "eval_steps_per_second": 12.093,
4768
+ "step": 238000
4769
+ },
4770
+ {
4771
+ "epoch": 6.08,
4772
+ "learning_rate": 0.00017791976768047292,
4773
+ "loss": 0.2891,
4774
+ "step": 238500
4775
+ },
4776
+ {
4777
+ "epoch": 6.09,
4778
+ "learning_rate": 0.00017744616516674518,
4779
+ "loss": 0.2889,
4780
+ "step": 239000
4781
+ },
4782
+ {
4783
+ "epoch": 6.09,
4784
+ "eval_loss": 0.806620717048645,
4785
+ "eval_runtime": 1.2977,
4786
+ "eval_samples_per_second": 770.587,
4787
+ "eval_steps_per_second": 12.329,
4788
+ "step": 239000
4789
+ },
4790
+ {
4791
+ "epoch": 6.1,
4792
+ "learning_rate": 0.00017697231718535132,
4793
+ "loss": 0.2888,
4794
+ "step": 239500
4795
+ },
4796
+ {
4797
+ "epoch": 6.12,
4798
+ "learning_rate": 0.00017649822891821707,
4799
+ "loss": 0.2889,
4800
+ "step": 240000
4801
+ },
4802
+ {
4803
+ "epoch": 6.12,
4804
+ "eval_loss": 0.8128913640975952,
4805
+ "eval_runtime": 1.2865,
4806
+ "eval_samples_per_second": 777.28,
4807
+ "eval_steps_per_second": 12.436,
4808
+ "step": 240000
4809
  }
4810
  ],
4811
  "max_steps": 500000,
4812
  "num_train_epochs": 13,
4813
+ "total_flos": 7.66761045710973e+21,
4814
  "trial_name": null,
4815
  "trial_params": null
4816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf594ad85da8b14fc79f1305a9d38a155ee31dc640b1cde587a09e1650e1b112
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08123eb498e0376035cfc13e99904da381c8fd27d56ad427569e3314a31c3e52
3
  size 102501541