Training in progress, step 18560, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1a89e01452a1f4a07786e13815583895e60d58274777765962f66cefaa61094
 size 13982248

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3ead06ca64c097b468e4848a48dfe313e10e066882daadec4733b392f7d027f
 size 13982248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98428bd88fdf622ed27e54ec99ad6aff4a2635b140eb02e69ec3d2b2c59a6ea7
 size 7062522

 version https://git-lfs.github.com/spec/v1
+oid sha256:7406298021aee6db4278f2ee9543df6b8e99529b9e938b2035da9248006c804c
 size 7062522

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cac6c71d875067e541674f24a19d324074a4e353704dafdd9d6d6cd6edb82f1a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:47f34dc38cec7b95d2822f7dac5953198d47e6fd7a3a22a93769d88071de8848
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e0540d1cdcac50331660ae8f79c6485938552612b78b57c5285e9b6f7182b9b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b581f3feb3bd9bc8f3fdf9e12a5bd849da1459f5b42fe0464a7236b0311e0814
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2623721042129544,
   "eval_steps": 500,
-  "global_step": 18540,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6496,6 +6496,13 @@
       "learning_rate": 6.954854106421715e-07,
       "loss": 3.5348,
       "step": 18540
     }
   ],
   "logging_steps": 20,
@@ -6503,7 +6510,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 20,
-  "total_flos": 4.028930209962394e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2626551377665822,
   "eval_steps": 500,
+  "global_step": 18560,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.954854106421715e-07,
       "loss": 3.5348,
       "step": 18540
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 28.641721725463867,
+      "learning_rate": 6.766473629355452e-07,
+      "loss": 3.5451,
+      "step": 18560
     }
   ],
   "logging_steps": 20,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 20,
+  "total_flos": 4.033483670627942e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null