Training in progress, step 18520, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e31afd1c461e20344e65e088c22c1fa2b87f37ae229e186c8dd536291662f3e0
 size 13982248

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ee3c331e12cc90d2109f4e1480b06ec8c2e15119b962650d98dc8e9275f0114
 size 13982248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:017ad378478be21c5c3faea9ff1f7f16ea9d295e8dd33c419da242689e52ac4e
 size 7062522

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3891f63e21d90247d0e51a91f6edee6475b1595b386689a4f0e8119dba22b45
 size 7062522

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a466f419d35745da96cceb5fb7566539c3bbb8c46cc1d8d9b9aaf268fdd5012c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe802ef5501ca89e4a80494bd85301488c2a808ed2678ad042735d4adc405db0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a13fce1c4fb8383036ea7f38eed4855194359f3326b9de22ec443d33e11a4ab9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a28521b432517a7996bfa525c5ee610c6e329294777eab45a275397c9b6af395
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2618060371056989,
   "eval_steps": 500,
-  "global_step": 18500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6482,6 +6482,13 @@
       "learning_rate": 7.339261343510206e-07,
       "loss": 3.4247,
       "step": 18500
     }
   ],
   "logging_steps": 20,
@@ -6489,7 +6496,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 20,
-  "total_flos": 4.019406653541581e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2620890706593267,
   "eval_steps": 500,
+  "global_step": 18520,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.339261343510206e-07,
       "loss": 3.4247,
       "step": 18500
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 19.394601821899414,
+      "learning_rate": 7.145784012061424e-07,
+      "loss": 3.5844,
+      "step": 18520
     }
   ],
   "logging_steps": 20,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 20,
+  "total_flos": 4.024222561507738e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null