Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +132 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad0bccb0ba793a49c0c6477d404b9362429826e2885aa40346d6d93bb8233774
 size 912336848

 version https://git-lfs.github.com/spec/v1
+oid sha256:7da1d2ce40dbea8e73696e0f082744f02644f6b47dca5f7a06ce8cff602f451c
 size 912336848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b676ea1901ac1598f20b20dfb2d4d50dc3993f4f24c5acfc0d305758d706432
 size 463916756

 version https://git-lfs.github.com/spec/v1
+oid sha256:96d7c9708f2f3ce73426ae02cf52e9257dd9191c642c858297aa88b6b0f0e824
 size 463916756

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fdbf4d797ce28163185f6bc64a5482e44de7a6b979c0ef6ed14c5c694f4353e4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2026cddff93bf50fc4330ef3e58cf48c9b2abea2d0d7a08ccf2d89915f727e3
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9096f15f02bac6b0fc27aa7aa4986f85d87d53fca310a75657e0015357af5c5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:705cabf5cbc3a6ab0feb67c77b9b453d59efcc939ce90d310af96e621810f990
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.5596095323562622,
   "best_model_checkpoint": "miner_id_24/checkpoint-350",
-  "epoch": 0.14525835235526044,
   "eval_steps": 50,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -883,6 +883,133 @@
       "eval_samples_per_second": 9.338,
       "eval_steps_per_second": 2.336,
       "step": 350
     }
   ],
   "logging_steps": 3,
@@ -897,7 +1024,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -906,12 +1033,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8.128987503840461e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.5596095323562622,
   "best_model_checkpoint": "miner_id_24/checkpoint-350",
+  "epoch": 0.16600954554886907,
   "eval_steps": 50,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.338,
       "eval_steps_per_second": 2.336,
       "step": 350
+    },
+    {
+      "epoch": 0.1456733762191326,
+      "grad_norm": 1.5290882587432861,
+      "learning_rate": 3.844650207332562e-06,
+      "loss": 5.579,
+      "step": 351
+    },
+    {
+      "epoch": 0.14691844781074911,
+      "grad_norm": 1.922317385673523,
+      "learning_rate": 3.393526721321616e-06,
+      "loss": 6.2572,
+      "step": 354
+    },
+    {
+      "epoch": 0.14816351940236563,
+      "grad_norm": 1.9951307773590088,
+      "learning_rate": 2.9696201032491434e-06,
+      "loss": 5.9967,
+      "step": 357
+    },
+    {
+      "epoch": 0.14940859099398215,
+      "grad_norm": 1.9137158393859863,
+      "learning_rate": 2.573177902642726e-06,
+      "loss": 6.0885,
+      "step": 360
+    },
+    {
+      "epoch": 0.15065366258559867,
+      "grad_norm": 1.8864800930023193,
+      "learning_rate": 2.204431630583548e-06,
+      "loss": 6.0631,
+      "step": 363
+    },
+    {
+      "epoch": 0.1518987341772152,
+      "grad_norm": 1.8956104516983032,
+      "learning_rate": 1.8635966245104664e-06,
+      "loss": 6.2917,
+      "step": 366
+    },
+    {
+      "epoch": 0.15314380576883171,
+      "grad_norm": 2.2040300369262695,
+      "learning_rate": 1.5508719224689717e-06,
+      "loss": 6.0451,
+      "step": 369
+    },
+    {
+      "epoch": 0.15438887736044823,
+      "grad_norm": 1.814900279045105,
+      "learning_rate": 1.2664401468786114e-06,
+      "loss": 6.3508,
+      "step": 372
+    },
+    {
+      "epoch": 0.15563394895206475,
+      "grad_norm": 2.195012331008911,
+      "learning_rate": 1.0104673978866164e-06,
+      "loss": 6.4296,
+      "step": 375
+    },
+    {
+      "epoch": 0.15687902054368127,
+      "grad_norm": 2.159865379333496,
+      "learning_rate": 7.83103156370113e-07,
+      "loss": 6.4639,
+      "step": 378
+    },
+    {
+      "epoch": 0.15812409213529777,
+      "grad_norm": 2.2066335678100586,
+      "learning_rate": 5.844801966434832e-07,
+      "loss": 6.6283,
+      "step": 381
+    },
+    {
+      "epoch": 0.1593691637269143,
+      "grad_norm": 2.2779653072357178,
+      "learning_rate": 4.1471450892189846e-07,
+      "loss": 6.2024,
+      "step": 384
+    },
+    {
+      "epoch": 0.1606142353185308,
+      "grad_norm": 2.2787067890167236,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 6.4473,
+      "step": 387
+    },
+    {
+      "epoch": 0.16185930691014733,
+      "grad_norm": 2.420171022415161,
+      "learning_rate": 1.6213459328950352e-07,
+      "loss": 6.3441,
+      "step": 390
+    },
+    {
+      "epoch": 0.16310437850176385,
+      "grad_norm": 2.8213346004486084,
+      "learning_rate": 7.946786493666647e-08,
+      "loss": 6.279,
+      "step": 393
+    },
+    {
+      "epoch": 0.16434945009338037,
+      "grad_norm": 2.905611515045166,
+      "learning_rate": 2.595332156925534e-08,
+      "loss": 6.1803,
+      "step": 396
+    },
+    {
+      "epoch": 0.1655945216849969,
+      "grad_norm": 4.091871738433838,
+      "learning_rate": 1.622214173602199e-09,
+      "loss": 5.6065,
+      "step": 399
+    },
+    {
+      "epoch": 0.16600954554886907,
+      "eval_loss": 1.561850905418396,
+      "eval_runtime": 434.5611,
+      "eval_samples_per_second": 9.338,
+      "eval_steps_per_second": 2.336,
+      "step": 400
     }
   ],
   "logging_steps": 3,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9.289442536508621e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null