ncbateman commited on
Commit
11dbf71
1 Parent(s): f657c98

Training in progress, step 555, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4c1a22a5ad978a253e58570d5a6e4cbeec9bfa7a8ece40f8616e2220428cc7a
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bef787ba16eb3d41f4d13340c07bc05fd0530058b348be8288493c90ab94325f
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9081acbf7e17111ebe4f8bbdc838b421d5fd63ae6342d91e8d860439a914c398
3
  size 85723732
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3635defef027975c19b220db2e7d50884b06413d7e99c178c0845e44afd1e88
3
  size 85723732
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a199493692cf25c16b518bf6e7216c3987b56836fc34a4b8f0b5e62bd338bbd
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fd23d48d0a00f2bcef4e61e7b0ebc30691cd0946be2aa8316f2c61edcd7b2f1
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74615d30db2006579916bf75a005a1eafb18948a8ecb1d61a6161179b736d4f2
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59225b59d81ea40b5f6d48123628af66023e2b10a1af4f3a269ca2da584f937a
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cccf180715c4b2e0d980973df8379f3cff06745ac6beabafbc8fc4345127563
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fa5404a9d5a7f82472a09215ed58ac37185a39533d448975bd7ab0b239729ef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.853932584269663,
5
  "eval_steps": 56,
6
- "global_step": 540,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3867,6 +3867,111 @@
3867
  "learning_rate": 2.1753260154906973e-07,
3868
  "loss": 0.2876,
3869
  "step": 540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3870
  }
3871
  ],
3872
  "logging_steps": 1,
@@ -3881,12 +3986,12 @@
3881
  "should_evaluate": false,
3882
  "should_log": false,
3883
  "should_save": true,
3884
- "should_training_stop": false
3885
  },
3886
  "attributes": {}
3887
  }
3888
  },
3889
- "total_flos": 3.2049530649811354e+18,
3890
  "train_batch_size": 4,
3891
  "trial_name": null,
3892
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.98876404494382,
5
  "eval_steps": 56,
6
+ "global_step": 555,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3867
  "learning_rate": 2.1753260154906973e-07,
3868
  "loss": 0.2876,
3869
  "step": 540
3870
+ },
3871
+ {
3872
+ "epoch": 4.8629213483146065,
3873
+ "grad_norm": 1.6425896883010864,
3874
+ "learning_rate": 1.8951279224599382e-07,
3875
+ "loss": 0.2625,
3876
+ "step": 541
3877
+ },
3878
+ {
3879
+ "epoch": 4.871910112359551,
3880
+ "grad_norm": 1.553606629371643,
3881
+ "learning_rate": 1.6342066931995802e-07,
3882
+ "loss": 0.2581,
3883
+ "step": 542
3884
+ },
3885
+ {
3886
+ "epoch": 4.880898876404494,
3887
+ "grad_norm": 1.5650955438613892,
3888
+ "learning_rate": 1.3925724254686413e-07,
3889
+ "loss": 0.2533,
3890
+ "step": 543
3891
+ },
3892
+ {
3893
+ "epoch": 4.889887640449438,
3894
+ "grad_norm": 1.6483417749404907,
3895
+ "learning_rate": 1.1702344706129298e-07,
3896
+ "loss": 0.2943,
3897
+ "step": 544
3898
+ },
3899
+ {
3900
+ "epoch": 4.898876404494382,
3901
+ "grad_norm": 1.5799306631088257,
3902
+ "learning_rate": 9.672014332028356e-08,
3903
+ "loss": 0.2944,
3904
+ "step": 545
3905
+ },
3906
+ {
3907
+ "epoch": 4.907865168539326,
3908
+ "grad_norm": 1.736416220664978,
3909
+ "learning_rate": 7.834811707005951e-08,
3910
+ "loss": 0.3674,
3911
+ "step": 546
3912
+ },
3913
+ {
3914
+ "epoch": 4.91685393258427,
3915
+ "grad_norm": 1.6711561679840088,
3916
+ "learning_rate": 6.190807931560349e-08,
3917
+ "loss": 0.2691,
3918
+ "step": 547
3919
+ },
3920
+ {
3921
+ "epoch": 4.925842696629213,
3922
+ "grad_norm": 1.663176417350769,
3923
+ "learning_rate": 4.740066629315143e-08,
3924
+ "loss": 0.2818,
3925
+ "step": 548
3926
+ },
3927
+ {
3928
+ "epoch": 4.9348314606741575,
3929
+ "grad_norm": 1.7786431312561035,
3930
+ "learning_rate": 3.482643944556219e-08,
3931
+ "loss": 0.3012,
3932
+ "step": 549
3933
+ },
3934
+ {
3935
+ "epoch": 4.943820224719101,
3936
+ "grad_norm": 1.6006958484649658,
3937
+ "learning_rate": 2.4185885400596075e-08,
3938
+ "loss": 0.2574,
3939
+ "step": 550
3940
+ },
3941
+ {
3942
+ "epoch": 4.952808988764045,
3943
+ "grad_norm": 1.7665148973464966,
3944
+ "learning_rate": 1.5479415952085463e-08,
3945
+ "loss": 0.2547,
3946
+ "step": 551
3947
+ },
3948
+ {
3949
+ "epoch": 4.961797752808989,
3950
+ "grad_norm": 1.5012476444244385,
3951
+ "learning_rate": 8.707368043975317e-09,
3952
+ "loss": 0.2478,
3953
+ "step": 552
3954
+ },
3955
+ {
3956
+ "epoch": 4.9707865168539325,
3957
+ "grad_norm": 1.7341482639312744,
3958
+ "learning_rate": 3.870003757311391e-09,
3959
+ "loss": 0.2677,
3960
+ "step": 553
3961
+ },
3962
+ {
3963
+ "epoch": 4.979775280898877,
3964
+ "grad_norm": 1.6566503047943115,
3965
+ "learning_rate": 9.675103000872376e-10,
3966
+ "loss": 0.2858,
3967
+ "step": 554
3968
+ },
3969
+ {
3970
+ "epoch": 4.98876404494382,
3971
+ "grad_norm": 1.8078707456588745,
3972
+ "learning_rate": 0.0,
3973
+ "loss": 0.3493,
3974
+ "step": 555
3975
  }
3976
  ],
3977
  "logging_steps": 1,
 
3986
  "should_evaluate": false,
3987
  "should_log": false,
3988
  "should_save": true,
3989
+ "should_training_stop": true
3990
  },
3991
  "attributes": {}
3992
  }
3993
  },
3994
+ "total_flos": 3.293979539008389e+18,
3995
  "train_batch_size": 4,
3996
  "trial_name": null,
3997
  "trial_params": null