Training in progress, step 7200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +122 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59e4deb0fd7483db8ad6ecbc4b685edbd17935d1da824196ba4fc901973119e0
 size 13982248

 version https://git-lfs.github.com/spec/v1
+oid sha256:d33ad5ad7297a3474565ac786f8f9d70a33eb5627d08f5a697c138ffc15d88d1
 size 13982248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d0a5a0084d24afc11f74c50b817d93674fb054ada62fbbef9c92566dd4897ab
 size 7062522

 version https://git-lfs.github.com/spec/v1
+oid sha256:6cbe5a6df51d7cf574700e32447f7c59e1f86ee74b6f42f6b5250d0259029ffa
 size 7062522

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:379130258049e0cbeef66efed7dd213cfd228d8c04c387dcbb54754f2a453a5a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ae43d9a0a1751b368da41b4d38a9e6b8912af0859b5bf0f0514e58b2fa49faf
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e4378be4b8a985777dc1876169a11bcb383075ed65c6a773225043d0598af8d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c777f1142b68b8b3ec3d325314bfa50ddccfacdeb4ee5ac4c1090831572e47da
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5488,
   "eval_steps": 500,
-  "global_step": 6860,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2408,6 +2408,125 @@
       "learning_rate": 2.293046993300198e-05,
       "loss": 3.567,
       "step": 6860
     }
   ],
   "logging_steps": 20,
@@ -2415,7 +2534,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 2,
   "save_steps": 20,
-  "total_flos": 1.6194507519492096e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.576,
   "eval_steps": 500,
+  "global_step": 7200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.293046993300198e-05,
       "loss": 3.567,
       "step": 6860
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 21.349533081054688,
+      "learning_rate": 2.288919209281294e-05,
+      "loss": 3.4752,
+      "step": 6880
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 25.281597137451172,
+      "learning_rate": 2.284783149873195e-05,
+      "loss": 3.4934,
+      "step": 6900
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 32.13029861450195,
+      "learning_rate": 2.2806388584612067e-05,
+      "loss": 3.4793,
+      "step": 6920
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 21.672082901000977,
+      "learning_rate": 2.2764863785169857e-05,
+      "loss": 3.4366,
+      "step": 6940
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 29.802305221557617,
+      "learning_rate": 2.2723257535980804e-05,
+      "loss": 3.5174,
+      "step": 6960
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 43.310577392578125,
+      "learning_rate": 2.2681570273474783e-05,
+      "loss": 3.4745,
+      "step": 6980
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 32.417236328125,
+      "learning_rate": 2.2639802434931447e-05,
+      "loss": 3.438,
+      "step": 7000
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 42.29374313354492,
+      "learning_rate": 2.259795445847566e-05,
+      "loss": 3.5194,
+      "step": 7020
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 51.19217300415039,
+      "learning_rate": 2.2556026783072896e-05,
+      "loss": 3.496,
+      "step": 7040
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 24.400171279907227,
+      "learning_rate": 2.251401984852463e-05,
+      "loss": 3.4013,
+      "step": 7060
+    },
+    {
+      "epoch": 0.57,
+      "grad_norm": 26.298309326171875,
+      "learning_rate": 2.2471934095463724e-05,
+      "loss": 3.5607,
+      "step": 7080
+    },
+    {
+      "epoch": 0.57,
+      "grad_norm": 27.419946670532227,
+      "learning_rate": 2.2429769965349818e-05,
+      "loss": 3.4593,
+      "step": 7100
+    },
+    {
+      "epoch": 0.57,
+      "grad_norm": 29.470266342163086,
+      "learning_rate": 2.2387527900464676e-05,
+      "loss": 3.4388,
+      "step": 7120
+    },
+    {
+      "epoch": 0.57,
+      "grad_norm": 21.410829544067383,
+      "learning_rate": 2.2345208343907577e-05,
+      "loss": 3.5141,
+      "step": 7140
+    },
+    {
+      "epoch": 0.57,
+      "grad_norm": 32.501766204833984,
+      "learning_rate": 2.2302811739590642e-05,
+      "loss": 3.4647,
+      "step": 7160
+    },
+    {
+      "epoch": 0.57,
+      "grad_norm": 24.69274139404297,
+      "learning_rate": 2.2260338532234194e-05,
+      "loss": 3.4781,
+      "step": 7180
+    },
+    {
+      "epoch": 0.58,
+      "grad_norm": 25.721759796142578,
+      "learning_rate": 2.2217789167362078e-05,
+      "loss": 3.4405,
+      "step": 7200
     }
   ],
   "logging_steps": 20,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 2,
   "save_steps": 20,
+  "total_flos": 1.7005830543409152e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null