dq158 commited on
Commit
69944bf
1 Parent(s): 1a14740

Training in progress, epoch 11, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc9ebe55774e98f2e8ad86775f276cd8ded7b6363c560e8cc4381d87b67445a
3
- size 2371770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b685eb47baafc7984a2354c64f3a633c89ac5489423119e83feaa1b146cbedb
3
+ size 2372346
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47450dc9e0b278d77248d05ef1c34bd97409b0d7111b42d8afdb381c61b6751c
3
  size 990409330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6600748b28844d6445286c2db32da87f8f4ae7df8cb2643db5bb76ae67233234
3
  size 990409330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:754c90914edc0d2f5f5e31691433ae62d48ae18f50b17f4f75d023288dda9b5a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4073b96953e05453b4b6e4a3030f1c740fa0e02670dbb4843214e79d4e7e84c5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:694cd02885166a75e3403878712069706f257db461ded2ae6ad6c41b84f22bf2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b44cd738bffad7be53bf491009475d442fa0e6452600c820ee5979a9a8ce3a05
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.5654487609863281,
3
  "best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 63230,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -953,13 +953,110 @@
953
  "eval_steps_per_second": 1.037,
954
  "eval_translation_length": 52485,
955
  "step": 63230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
956
  }
957
  ],
958
  "logging_steps": 500,
959
  "max_steps": 126460,
960
  "num_train_epochs": 20,
961
  "save_steps": 500,
962
- "total_flos": 1.7318198200762368e+17,
963
  "trial_name": null,
964
  "trial_params": null
965
  }
 
1
  {
2
  "best_metric": 1.5654487609863281,
3
  "best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
4
+ "epoch": 11.0,
5
  "eval_steps": 500,
6
+ "global_step": 69553,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
953
  "eval_steps_per_second": 1.037,
954
  "eval_translation_length": 52485,
955
  "step": 63230
956
+ },
957
+ {
958
+ "epoch": 10.04,
959
+ "learning_rate": 2.498752940408342e-06,
960
+ "loss": 1.7938,
961
+ "step": 63500
962
+ },
963
+ {
964
+ "epoch": 10.12,
965
+ "learning_rate": 2.4675773582364977e-06,
966
+ "loss": 1.7688,
967
+ "step": 64000
968
+ },
969
+ {
970
+ "epoch": 10.2,
971
+ "learning_rate": 2.436406818231583e-06,
972
+ "loss": 1.7701,
973
+ "step": 64500
974
+ },
975
+ {
976
+ "epoch": 10.28,
977
+ "learning_rate": 2.4052461678414753e-06,
978
+ "loss": 1.7821,
979
+ "step": 65000
980
+ },
981
+ {
982
+ "epoch": 10.36,
983
+ "learning_rate": 2.37410025297608e-06,
984
+ "loss": 1.8251,
985
+ "step": 65500
986
+ },
987
+ {
988
+ "epoch": 10.44,
989
+ "learning_rate": 2.342973917253726e-06,
990
+ "loss": 1.7384,
991
+ "step": 66000
992
+ },
993
+ {
994
+ "epoch": 10.52,
995
+ "learning_rate": 2.3118720012479183e-06,
996
+ "loss": 1.8001,
997
+ "step": 66500
998
+ },
999
+ {
1000
+ "epoch": 10.6,
1001
+ "learning_rate": 2.280799341734556e-06,
1002
+ "loss": 1.8386,
1003
+ "step": 67000
1004
+ },
1005
+ {
1006
+ "epoch": 10.68,
1007
+ "learning_rate": 2.249760770939754e-06,
1008
+ "loss": 1.8098,
1009
+ "step": 67500
1010
+ },
1011
+ {
1012
+ "epoch": 10.75,
1013
+ "learning_rate": 2.218761115788362e-06,
1014
+ "loss": 1.8059,
1015
+ "step": 68000
1016
+ },
1017
+ {
1018
+ "epoch": 10.83,
1019
+ "learning_rate": 2.1878051971533093e-06,
1020
+ "loss": 1.757,
1021
+ "step": 68500
1022
+ },
1023
+ {
1024
+ "epoch": 10.91,
1025
+ "learning_rate": 2.156897829105898e-06,
1026
+ "loss": 1.8037,
1027
+ "step": 69000
1028
+ },
1029
+ {
1030
+ "epoch": 10.99,
1031
+ "learning_rate": 2.1260438181671446e-06,
1032
+ "loss": 1.7714,
1033
+ "step": 69500
1034
+ },
1035
+ {
1036
+ "epoch": 11.0,
1037
+ "eval_bleu": 1.0,
1038
+ "eval_brevity_penalty": 1.0,
1039
+ "eval_length_ratio": 1.0,
1040
+ "eval_loss": 1.5735211372375488,
1041
+ "eval_precisions": [
1042
+ 1.0,
1043
+ 1.0,
1044
+ 1.0,
1045
+ 1.0
1046
+ ],
1047
+ "eval_reference_length": 52469,
1048
+ "eval_runtime": 678.026,
1049
+ "eval_samples_per_second": 4.146,
1050
+ "eval_steps_per_second": 1.037,
1051
+ "eval_translation_length": 52469,
1052
+ "step": 69553
1053
  }
1054
  ],
1055
  "logging_steps": 500,
1056
  "max_steps": 126460,
1057
  "num_train_epochs": 20,
1058
  "save_steps": 500,
1059
+ "total_flos": 1.9050018020838605e+17,
1060
  "trial_name": null,
1061
  "trial_params": null
1062
  }