Training in progress, step 18540, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ee3c331e12cc90d2109f4e1480b06ec8c2e15119b962650d98dc8e9275f0114
 size 13982248

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1a89e01452a1f4a07786e13815583895e60d58274777765962f66cefaa61094
 size 13982248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3891f63e21d90247d0e51a91f6edee6475b1595b386689a4f0e8119dba22b45
 size 7062522

 version https://git-lfs.github.com/spec/v1
+oid sha256:98428bd88fdf622ed27e54ec99ad6aff4a2635b140eb02e69ec3d2b2c59a6ea7
 size 7062522

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe802ef5501ca89e4a80494bd85301488c2a808ed2678ad042735d4adc405db0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:cac6c71d875067e541674f24a19d324074a4e353704dafdd9d6d6cd6edb82f1a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a28521b432517a7996bfa525c5ee610c6e329294777eab45a275397c9b6af395
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e0540d1cdcac50331660ae8f79c6485938552612b78b57c5285e9b6f7182b9b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2620890706593267,
   "eval_steps": 500,
-  "global_step": 18520,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6489,6 +6489,13 @@
       "learning_rate": 7.145784012061424e-07,
       "loss": 3.5844,
       "step": 18520
     }
   ],
   "logging_steps": 20,
@@ -6496,7 +6503,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 20,
-  "total_flos": 4.024222561507738e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2623721042129544,
   "eval_steps": 500,
+  "global_step": 18540,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.145784012061424e-07,
       "loss": 3.5844,
       "step": 18520
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 22.156579971313477,
+      "learning_rate": 6.954854106421715e-07,
+      "loss": 3.5348,
+      "step": 18540
     }
   ],
   "logging_steps": 20,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 20,
+  "total_flos": 4.028930209962394e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null