oldiday commited on
Commit
b704882
·
verified ·
1 Parent(s): a378651

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad0bccb0ba793a49c0c6477d404b9362429826e2885aa40346d6d93bb8233774
3
  size 912336848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7da1d2ce40dbea8e73696e0f082744f02644f6b47dca5f7a06ce8cff602f451c
3
  size 912336848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b676ea1901ac1598f20b20dfb2d4d50dc3993f4f24c5acfc0d305758d706432
3
  size 463916756
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96d7c9708f2f3ce73426ae02cf52e9257dd9191c642c858297aa88b6b0f0e824
3
  size 463916756
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdbf4d797ce28163185f6bc64a5482e44de7a6b979c0ef6ed14c5c694f4353e4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2026cddff93bf50fc4330ef3e58cf48c9b2abea2d0d7a08ccf2d89915f727e3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9096f15f02bac6b0fc27aa7aa4986f85d87d53fca310a75657e0015357af5c5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:705cabf5cbc3a6ab0feb67c77b9b453d59efcc939ce90d310af96e621810f990
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.5596095323562622,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-350",
4
- "epoch": 0.14525835235526044,
5
  "eval_steps": 50,
6
- "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -883,6 +883,133 @@
883
  "eval_samples_per_second": 9.338,
884
  "eval_steps_per_second": 2.336,
885
  "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
886
  }
887
  ],
888
  "logging_steps": 3,
@@ -897,7 +1024,7 @@
897
  "early_stopping_threshold": 0.0
898
  },
899
  "attributes": {
900
- "early_stopping_patience_counter": 0
901
  }
902
  },
903
  "TrainerControl": {
@@ -906,12 +1033,12 @@
906
  "should_evaluate": false,
907
  "should_log": false,
908
  "should_save": true,
909
- "should_training_stop": false
910
  },
911
  "attributes": {}
912
  }
913
  },
914
- "total_flos": 8.128987503840461e+17,
915
  "train_batch_size": 8,
916
  "trial_name": null,
917
  "trial_params": null
 
1
  {
2
  "best_metric": 1.5596095323562622,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-350",
4
+ "epoch": 0.16600954554886907,
5
  "eval_steps": 50,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
883
  "eval_samples_per_second": 9.338,
884
  "eval_steps_per_second": 2.336,
885
  "step": 350
886
+ },
887
+ {
888
+ "epoch": 0.1456733762191326,
889
+ "grad_norm": 1.5290882587432861,
890
+ "learning_rate": 3.844650207332562e-06,
891
+ "loss": 5.579,
892
+ "step": 351
893
+ },
894
+ {
895
+ "epoch": 0.14691844781074911,
896
+ "grad_norm": 1.922317385673523,
897
+ "learning_rate": 3.393526721321616e-06,
898
+ "loss": 6.2572,
899
+ "step": 354
900
+ },
901
+ {
902
+ "epoch": 0.14816351940236563,
903
+ "grad_norm": 1.9951307773590088,
904
+ "learning_rate": 2.9696201032491434e-06,
905
+ "loss": 5.9967,
906
+ "step": 357
907
+ },
908
+ {
909
+ "epoch": 0.14940859099398215,
910
+ "grad_norm": 1.9137158393859863,
911
+ "learning_rate": 2.573177902642726e-06,
912
+ "loss": 6.0885,
913
+ "step": 360
914
+ },
915
+ {
916
+ "epoch": 0.15065366258559867,
917
+ "grad_norm": 1.8864800930023193,
918
+ "learning_rate": 2.204431630583548e-06,
919
+ "loss": 6.0631,
920
+ "step": 363
921
+ },
922
+ {
923
+ "epoch": 0.1518987341772152,
924
+ "grad_norm": 1.8956104516983032,
925
+ "learning_rate": 1.8635966245104664e-06,
926
+ "loss": 6.2917,
927
+ "step": 366
928
+ },
929
+ {
930
+ "epoch": 0.15314380576883171,
931
+ "grad_norm": 2.2040300369262695,
932
+ "learning_rate": 1.5508719224689717e-06,
933
+ "loss": 6.0451,
934
+ "step": 369
935
+ },
936
+ {
937
+ "epoch": 0.15438887736044823,
938
+ "grad_norm": 1.814900279045105,
939
+ "learning_rate": 1.2664401468786114e-06,
940
+ "loss": 6.3508,
941
+ "step": 372
942
+ },
943
+ {
944
+ "epoch": 0.15563394895206475,
945
+ "grad_norm": 2.195012331008911,
946
+ "learning_rate": 1.0104673978866164e-06,
947
+ "loss": 6.4296,
948
+ "step": 375
949
+ },
950
+ {
951
+ "epoch": 0.15687902054368127,
952
+ "grad_norm": 2.159865379333496,
953
+ "learning_rate": 7.83103156370113e-07,
954
+ "loss": 6.4639,
955
+ "step": 378
956
+ },
957
+ {
958
+ "epoch": 0.15812409213529777,
959
+ "grad_norm": 2.2066335678100586,
960
+ "learning_rate": 5.844801966434832e-07,
961
+ "loss": 6.6283,
962
+ "step": 381
963
+ },
964
+ {
965
+ "epoch": 0.1593691637269143,
966
+ "grad_norm": 2.2779653072357178,
967
+ "learning_rate": 4.1471450892189846e-07,
968
+ "loss": 6.2024,
969
+ "step": 384
970
+ },
971
+ {
972
+ "epoch": 0.1606142353185308,
973
+ "grad_norm": 2.2787067890167236,
974
+ "learning_rate": 2.7390523158633554e-07,
975
+ "loss": 6.4473,
976
+ "step": 387
977
+ },
978
+ {
979
+ "epoch": 0.16185930691014733,
980
+ "grad_norm": 2.420171022415161,
981
+ "learning_rate": 1.6213459328950352e-07,
982
+ "loss": 6.3441,
983
+ "step": 390
984
+ },
985
+ {
986
+ "epoch": 0.16310437850176385,
987
+ "grad_norm": 2.8213346004486084,
988
+ "learning_rate": 7.946786493666647e-08,
989
+ "loss": 6.279,
990
+ "step": 393
991
+ },
992
+ {
993
+ "epoch": 0.16434945009338037,
994
+ "grad_norm": 2.905611515045166,
995
+ "learning_rate": 2.595332156925534e-08,
996
+ "loss": 6.1803,
997
+ "step": 396
998
+ },
999
+ {
1000
+ "epoch": 0.1655945216849969,
1001
+ "grad_norm": 4.091871738433838,
1002
+ "learning_rate": 1.622214173602199e-09,
1003
+ "loss": 5.6065,
1004
+ "step": 399
1005
+ },
1006
+ {
1007
+ "epoch": 0.16600954554886907,
1008
+ "eval_loss": 1.561850905418396,
1009
+ "eval_runtime": 434.5611,
1010
+ "eval_samples_per_second": 9.338,
1011
+ "eval_steps_per_second": 2.336,
1012
+ "step": 400
1013
  }
1014
  ],
1015
  "logging_steps": 3,
 
1024
  "early_stopping_threshold": 0.0
1025
  },
1026
  "attributes": {
1027
+ "early_stopping_patience_counter": 1
1028
  }
1029
  },
1030
  "TrainerControl": {
 
1033
  "should_evaluate": false,
1034
  "should_log": false,
1035
  "should_save": true,
1036
+ "should_training_stop": true
1037
  },
1038
  "attributes": {}
1039
  }
1040
  },
1041
+ "total_flos": 9.289442536508621e+17,
1042
  "train_batch_size": 8,
1043
  "trial_name": null,
1044
  "trial_params": null