leixa commited on
Commit
28b7714
·
verified ·
1 Parent(s): 6333056

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8451e588a45163d61a250025d9fe1becefe5afb2ebefc4d027e4e03754fa2eb3
3
  size 692136856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:068a3df9cd87acef2ce5ba2fe376fed7b85eb182b1bd144f0bb3410b20253fb7
3
  size 692136856
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d4f6611287b070b2030de06a9d530bcbb55107b364b6ac4e1e5598f37ed1871
3
  size 85723732
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a1a8637b1886dc9fc6067b2548d484b950030b8daccc9a50308a49e33a03be4
3
  size 85723732
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:852b4f618a18afb719aa9c5d0ac61182c6a8e953aadb45c389358df7f1d84b41
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52b58e724e3c19bafefbb2b1f7844b7398c0f381184ee71a09d79ed6d4442fb5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51094b5d327949483be134a2a7ce82f120d34a302bf097e81122d94eff7cf8c6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64297a6969c9113e6582dde9428f08d78a5599aec9c2adf99caa5d81625685a6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.025200458190148912,
5
  "eval_steps": 34,
6
- "global_step": 374,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -971,6 +971,69 @@
971
  "eval_samples_per_second": 14.042,
972
  "eval_steps_per_second": 1.756,
973
  "step": 374
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
974
  }
975
  ],
976
  "logging_steps": 3,
@@ -985,12 +1048,12 @@
985
  "should_evaluate": false,
986
  "should_log": false,
987
  "should_save": true,
988
- "should_training_stop": false
989
  },
990
  "attributes": {}
991
  }
992
  },
993
- "total_flos": 5.2594116619822694e+17,
994
  "train_batch_size": 8,
995
  "trial_name": null,
996
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.026952361700694025,
5
  "eval_steps": 34,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
971
  "eval_samples_per_second": 14.042,
972
  "eval_steps_per_second": 1.756,
973
  "step": 374
974
+ },
975
+ {
976
+ "epoch": 0.025267839094400647,
977
+ "grad_norm": 0.708363950252533,
978
+ "learning_rate": 5.052336989433082e-07,
979
+ "loss": 0.8262,
980
+ "step": 375
981
+ },
982
+ {
983
+ "epoch": 0.025469981807155852,
984
+ "grad_norm": 0.7722126841545105,
985
+ "learning_rate": 3.915515781850565e-07,
986
+ "loss": 0.8382,
987
+ "step": 378
988
+ },
989
+ {
990
+ "epoch": 0.025672124519911056,
991
+ "grad_norm": 0.6360299587249756,
992
+ "learning_rate": 2.922400983217416e-07,
993
+ "loss": 0.8027,
994
+ "step": 381
995
+ },
996
+ {
997
+ "epoch": 0.02587426723266626,
998
+ "grad_norm": 0.7291192412376404,
999
+ "learning_rate": 2.0735725446094923e-07,
1000
+ "loss": 0.8251,
1001
+ "step": 384
1002
+ },
1003
+ {
1004
+ "epoch": 0.02607640994542147,
1005
+ "grad_norm": 0.8587584495544434,
1006
+ "learning_rate": 1.3695261579316777e-07,
1007
+ "loss": 0.7955,
1008
+ "step": 387
1009
+ },
1010
+ {
1011
+ "epoch": 0.026278552658176673,
1012
+ "grad_norm": 0.6676596403121948,
1013
+ "learning_rate": 8.106729664475176e-08,
1014
+ "loss": 0.7629,
1015
+ "step": 390
1016
+ },
1017
+ {
1018
+ "epoch": 0.026480695370931878,
1019
+ "grad_norm": 0.6349416375160217,
1020
+ "learning_rate": 3.9733932468333234e-08,
1021
+ "loss": 0.8323,
1022
+ "step": 393
1023
+ },
1024
+ {
1025
+ "epoch": 0.026682838083687082,
1026
+ "grad_norm": 0.7458873987197876,
1027
+ "learning_rate": 1.297666078462767e-08,
1028
+ "loss": 0.7842,
1029
+ "step": 396
1030
+ },
1031
+ {
1032
+ "epoch": 0.02688498079644229,
1033
+ "grad_norm": 0.7006601691246033,
1034
+ "learning_rate": 8.111070868010995e-10,
1035
+ "loss": 0.8103,
1036
+ "step": 399
1037
  }
1038
  ],
1039
  "logging_steps": 3,
 
1048
  "should_evaluate": false,
1049
  "should_log": false,
1050
  "should_save": true,
1051
+ "should_training_stop": true
1052
  },
1053
  "attributes": {}
1054
  }
1055
  },
1056
+ "total_flos": 5.625039210676224e+17,
1057
  "train_batch_size": 8,
1058
  "trial_name": null,
1059
  "trial_params": null