alicegoesdown commited on
Commit
3e26e42
·
verified ·
1 Parent(s): 6f24cd8

Training in progress, step 4950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbc27f9d670bf59613f0ad35c6ed0a61b0c42a921427780b714064faeedc7ce2
3
  size 100689176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8be8480941245af20b0c2571d02c9b94b1dda03625c942b82e1e2d9944ffeac2
3
  size 100689176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:051de4a085c3b8b4a38172e6abbdb97e0fd9a905d72f7607924e791ed45fc1b3
3
  size 201488698
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:629c0bfd0b78b063757dd2cd9ca9c64341a7bd4190c47cd037e3cbc83e96caf0
3
  size 201488698
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3cc334a175d1898fa9cff12909aff991b240c7fc1ce24cdffc01c260075fe88
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d778eb59ce7dbca8136a7959dc2657baef2d7e03b4b7fda5ae3d63b697442a50
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daa57dde97277e3ea991e32bd89d96081667572958c56d89a2fb6b0a6f8e1923
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fe7f1bf39a2a21ec6453d647a430741165accfbd545b7df5e312cd5fa0c7360
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.0090162754058838,
3
- "best_model_checkpoint": "./output/checkpoint-4800",
4
- "epoch": 0.17368649587494572,
5
  "eval_steps": 150,
6
- "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3623,6 +3623,119 @@
3623
  "eval_samples_per_second": 7.34,
3624
  "eval_steps_per_second": 7.34,
3625
  "step": 4800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3626
  }
3627
  ],
3628
  "logging_steps": 10,
@@ -3642,7 +3755,7 @@
3642
  "attributes": {}
3643
  }
3644
  },
3645
- "total_flos": 4.294189144565023e+17,
3646
  "train_batch_size": 4,
3647
  "trial_name": null,
3648
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.0089582204818726,
3
+ "best_model_checkpoint": "./output/checkpoint-4950",
4
+ "epoch": 0.17911419887103777,
5
  "eval_steps": 150,
6
+ "global_step": 4950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3623
  "eval_samples_per_second": 7.34,
3624
  "eval_steps_per_second": 7.34,
3625
  "step": 4800
3626
+ },
3627
+ {
3628
+ "epoch": 0.17404834274135186,
3629
+ "grad_norm": 3.6224241256713867,
3630
+ "learning_rate": 2.0378882491921159e-07,
3631
+ "loss": 1.0616,
3632
+ "step": 4810
3633
+ },
3634
+ {
3635
+ "epoch": 0.174410189607758,
3636
+ "grad_norm": 1.605273723602295,
3637
+ "learning_rate": 1.8292507030715362e-07,
3638
+ "loss": 1.1681,
3639
+ "step": 4820
3640
+ },
3641
+ {
3642
+ "epoch": 0.17477203647416414,
3643
+ "grad_norm": 1.325554609298706,
3644
+ "learning_rate": 1.6318421365179055e-07,
3645
+ "loss": 1.0721,
3646
+ "step": 4830
3647
+ },
3648
+ {
3649
+ "epoch": 0.17513388334057028,
3650
+ "grad_norm": 1.8882009983062744,
3651
+ "learning_rate": 1.4456706642242134e-07,
3652
+ "loss": 1.1359,
3653
+ "step": 4840
3654
+ },
3655
+ {
3656
+ "epoch": 0.1754957302069764,
3657
+ "grad_norm": 1.310950517654419,
3658
+ "learning_rate": 1.2707439389704867e-07,
3659
+ "loss": 1.0071,
3660
+ "step": 4850
3661
+ },
3662
+ {
3663
+ "epoch": 0.17585757707338254,
3664
+ "grad_norm": 1.9121229648590088,
3665
+ "learning_rate": 1.1070691513092563e-07,
3666
+ "loss": 1.1355,
3667
+ "step": 4860
3668
+ },
3669
+ {
3670
+ "epoch": 0.17621942393978868,
3671
+ "grad_norm": 1.402378797531128,
3672
+ "learning_rate": 9.546530292699863e-08,
3673
+ "loss": 1.1337,
3674
+ "step": 4870
3675
+ },
3676
+ {
3677
+ "epoch": 0.17658127080619482,
3678
+ "grad_norm": 1.7570936679840088,
3679
+ "learning_rate": 8.135018380824921e-08,
3680
+ "loss": 1.2101,
3681
+ "step": 4880
3682
+ },
3683
+ {
3684
+ "epoch": 0.17694311767260096,
3685
+ "grad_norm": 2.1342835426330566,
3686
+ "learning_rate": 6.836213799193497e-08,
3687
+ "loss": 0.9914,
3688
+ "step": 4890
3689
+ },
3690
+ {
3691
+ "epoch": 0.1773049645390071,
3692
+ "grad_norm": 1.0870988368988037,
3693
+ "learning_rate": 5.6501699365750784e-08,
3694
+ "loss": 1.1218,
3695
+ "step": 4900
3696
+ },
3697
+ {
3698
+ "epoch": 0.17766681140541324,
3699
+ "grad_norm": 1.0682313442230225,
3700
+ "learning_rate": 4.5769355465876964e-08,
3701
+ "loss": 1.0238,
3702
+ "step": 4910
3703
+ },
3704
+ {
3705
+ "epoch": 0.17802865827181938,
3706
+ "grad_norm": 1.6820406913757324,
3707
+ "learning_rate": 3.616554745692946e-08,
3708
+ "loss": 1.2016,
3709
+ "step": 4920
3710
+ },
3711
+ {
3712
+ "epoch": 0.1783905051382255,
3713
+ "grad_norm": 1.4748146533966064,
3714
+ "learning_rate": 2.7690670113848792e-08,
3715
+ "loss": 1.0566,
3716
+ "step": 4930
3717
+ },
3718
+ {
3719
+ "epoch": 0.17875235200463163,
3720
+ "grad_norm": 1.8944076299667358,
3721
+ "learning_rate": 2.034507180563916e-08,
3722
+ "loss": 1.1868,
3723
+ "step": 4940
3724
+ },
3725
+ {
3726
+ "epoch": 0.17911419887103777,
3727
+ "grad_norm": 1.0298928022384644,
3728
+ "learning_rate": 1.4129054481082926e-08,
3729
+ "loss": 0.9945,
3730
+ "step": 4950
3731
+ },
3732
+ {
3733
+ "epoch": 0.17911419887103777,
3734
+ "eval_loss": 1.0089582204818726,
3735
+ "eval_runtime": 68.6655,
3736
+ "eval_samples_per_second": 7.282,
3737
+ "eval_steps_per_second": 7.282,
3738
+ "step": 4950
3739
  }
3740
  ],
3741
  "logging_steps": 10,
 
3755
  "attributes": {}
3756
  }
3757
  },
3758
+ "total_flos": 4.430239058215649e+17,
3759
  "train_batch_size": 4,
3760
  "trial_name": null,
3761
  "trial_params": null