Training in progress, step 85000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc78a24e7476a4e461772f168f108199a2a77af9685517ffc74005501f0fad76
 size 747557272

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad9d161ccfe8557f774389914a0964e7cdf04d6e8ea6d5448a02a5477eba0971
 size 747557272

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b85145acd057826e904cc0e71967e4d1d1f560d39e6f6ca0aa843aa19167c3c
 size 1495236026

 version https://git-lfs.github.com/spec/v1
+oid sha256:51c751859bef95ec2c893271651707f5eb8ea5792b49f2773a99044eeec97c6e
 size 1495236026

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63d6594b348d0328bc13eba670203bc9f731b1fb8ce9bd11a213ec382ee291d3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c5348d2e9fea1109e86f6b89c5d5fa153a55ab6c2c1017e0eb00abc8b2a78cf
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fbb70bff1a2c3af892703430f34f16a2bbe164c3de8d1d7040c801183f0a97c1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:61d291fc8147253f44bd142e317a0e7f96fef440dba6211ddac6ff2f9ed3f157
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.683033168997552,
   "eval_steps": 500,
-  "global_step": 80000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1127,6 +1127,76 @@
       "learning_rate": 2.1131122066829887e-06,
       "loss": 4.1045,
       "step": 80000
     }
   ],
   "logging_steps": 500,
@@ -1146,7 +1216,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.4629207046906e+16,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.8507227420598986,
   "eval_steps": 500,
+  "global_step": 85000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.1131122066829887e-06,
       "loss": 4.1045,
       "step": 80000
+    },
+    {
+      "epoch": 2.6998021263037866,
+      "grad_norm": 16.334218978881836,
+      "learning_rate": 2.001319157974757e-06,
+      "loss": 4.1196,
+      "step": 80500
+    },
+    {
+      "epoch": 2.716571083610021,
+      "grad_norm": 15.715066909790039,
+      "learning_rate": 1.889526109266526e-06,
+      "loss": 4.0669,
+      "step": 81000
+    },
+    {
+      "epoch": 2.733340040916256,
+      "grad_norm": 19.521465301513672,
+      "learning_rate": 1.7777330605582947e-06,
+      "loss": 4.0515,
+      "step": 81500
+    },
+    {
+      "epoch": 2.7501089982224904,
+      "grad_norm": 14.903164863586426,
+      "learning_rate": 1.6659400118500632e-06,
+      "loss": 4.0894,
+      "step": 82000
+    },
+    {
+      "epoch": 2.766877955528725,
+      "grad_norm": 15.718372344970703,
+      "learning_rate": 1.554146963141832e-06,
+      "loss": 4.1012,
+      "step": 82500
+    },
+    {
+      "epoch": 2.7836469128349597,
+      "grad_norm": 18.20594024658203,
+      "learning_rate": 1.4423539144336007e-06,
+      "loss": 4.0898,
+      "step": 83000
+    },
+    {
+      "epoch": 2.8004158701411948,
+      "grad_norm": 26.69768524169922,
+      "learning_rate": 1.3305608657253694e-06,
+      "loss": 4.1673,
+      "step": 83500
+    },
+    {
+      "epoch": 2.8171848274474294,
+      "grad_norm": 15.399796485900879,
+      "learning_rate": 1.218767817017138e-06,
+      "loss": 4.0204,
+      "step": 84000
+    },
+    {
+      "epoch": 2.833953784753664,
+      "grad_norm": 14.401926040649414,
+      "learning_rate": 1.1069747683089067e-06,
+      "loss": 4.0974,
+      "step": 84500
+    },
+    {
+      "epoch": 2.8507227420598986,
+      "grad_norm": 18.283782958984375,
+      "learning_rate": 9.951817196006754e-07,
+      "loss": 4.1121,
+      "step": 85000
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 2.6168910452202e+16,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null