Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +67 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8451e588a45163d61a250025d9fe1becefe5afb2ebefc4d027e4e03754fa2eb3
 size 692136856

 version https://git-lfs.github.com/spec/v1
+oid sha256:068a3df9cd87acef2ce5ba2fe376fed7b85eb182b1bd144f0bb3410b20253fb7
 size 692136856

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d4f6611287b070b2030de06a9d530bcbb55107b364b6ac4e1e5598f37ed1871
 size 85723732

 version https://git-lfs.github.com/spec/v1
+oid sha256:7a1a8637b1886dc9fc6067b2548d484b950030b8daccc9a50308a49e33a03be4
 size 85723732

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:852b4f618a18afb719aa9c5d0ac61182c6a8e953aadb45c389358df7f1d84b41
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:52b58e724e3c19bafefbb2b1f7844b7398c0f381184ee71a09d79ed6d4442fb5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51094b5d327949483be134a2a7ce82f120d34a302bf097e81122d94eff7cf8c6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:64297a6969c9113e6582dde9428f08d78a5599aec9c2adf99caa5d81625685a6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.025200458190148912,
   "eval_steps": 34,
-  "global_step": 374,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -971,6 +971,69 @@
       "eval_samples_per_second": 14.042,
       "eval_steps_per_second": 1.756,
       "step": 374
     }
   ],
   "logging_steps": 3,
@@ -985,12 +1048,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.2594116619822694e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.026952361700694025,
   "eval_steps": 34,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.042,
       "eval_steps_per_second": 1.756,
       "step": 374
+    },
+    {
+      "epoch": 0.025267839094400647,
+      "grad_norm": 0.708363950252533,
+      "learning_rate": 5.052336989433082e-07,
+      "loss": 0.8262,
+      "step": 375
+    },
+    {
+      "epoch": 0.025469981807155852,
+      "grad_norm": 0.7722126841545105,
+      "learning_rate": 3.915515781850565e-07,
+      "loss": 0.8382,
+      "step": 378
+    },
+    {
+      "epoch": 0.025672124519911056,
+      "grad_norm": 0.6360299587249756,
+      "learning_rate": 2.922400983217416e-07,
+      "loss": 0.8027,
+      "step": 381
+    },
+    {
+      "epoch": 0.02587426723266626,
+      "grad_norm": 0.7291192412376404,
+      "learning_rate": 2.0735725446094923e-07,
+      "loss": 0.8251,
+      "step": 384
+    },
+    {
+      "epoch": 0.02607640994542147,
+      "grad_norm": 0.8587584495544434,
+      "learning_rate": 1.3695261579316777e-07,
+      "loss": 0.7955,
+      "step": 387
+    },
+    {
+      "epoch": 0.026278552658176673,
+      "grad_norm": 0.6676596403121948,
+      "learning_rate": 8.106729664475176e-08,
+      "loss": 0.7629,
+      "step": 390
+    },
+    {
+      "epoch": 0.026480695370931878,
+      "grad_norm": 0.6349416375160217,
+      "learning_rate": 3.9733932468333234e-08,
+      "loss": 0.8323,
+      "step": 393
+    },
+    {
+      "epoch": 0.026682838083687082,
+      "grad_norm": 0.7458873987197876,
+      "learning_rate": 1.297666078462767e-08,
+      "loss": 0.7842,
+      "step": 396
+    },
+    {
+      "epoch": 0.02688498079644229,
+      "grad_norm": 0.7006601691246033,
+      "learning_rate": 8.111070868010995e-10,
+      "loss": 0.8103,
+      "step": 399
     }
   ],
   "logging_steps": 3,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.625039210676224e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null