Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7647ae0627a7b725e322f0e4d5e33cd169586fc185828a0e8f595005b45fa3df
 size 2269195160

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c6e8289afbb5b7a6c01b2e2cfb6109769e332fba99d585b5a6d31019acc682e
 size 2269195160

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9f4f0d2f5d6b2ead9512276ab4388b03851af6e42e5e40a1fdb4e34188d3049
 size 335922386

 version https://git-lfs.github.com/spec/v1
+oid sha256:abdc5361dc010fed4b9e2c108e3ffd12a71982725995fff96bfbc2a3dbd54194
 size 335922386

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c36b216c590f6eca2a00a39d4b3b2093147d8c280798056ecb0b21597d90051e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5432f7d821ce4350169c76b75dea6eadce5659f64df6eb2118f93374d0ed33da
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.169845700263977,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.29708485486166986,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 6.292,
       "eval_steps_per_second": 6.292,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.364766999805952e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.1278622150421143,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.39611313981555984,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.292,
       "eval_steps_per_second": 6.292,
       "step": 150
+    },
+    {
+      "epoch": 0.29906542056074764,
+      "grad_norm": 0.5212217569351196,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 0.9336,
+      "step": 151
+    },
+    {
+      "epoch": 0.3010459862598255,
+      "grad_norm": 0.5553271770477295,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 0.9745,
+      "step": 152
+    },
+    {
+      "epoch": 0.30302655195890327,
+      "grad_norm": 0.6053627133369446,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 0.953,
+      "step": 153
+    },
+    {
+      "epoch": 0.30500711765798105,
+      "grad_norm": 0.5293852686882019,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 0.8579,
+      "step": 154
+    },
+    {
+      "epoch": 0.30698768335705884,
+      "grad_norm": 0.5316524505615234,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 0.8284,
+      "step": 155
+    },
+    {
+      "epoch": 0.3089682490561367,
+      "grad_norm": 0.5538764595985413,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 0.8725,
+      "step": 156
+    },
+    {
+      "epoch": 0.31094881475521446,
+      "grad_norm": 0.5353713631629944,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 0.8798,
+      "step": 157
+    },
+    {
+      "epoch": 0.31292938045429225,
+      "grad_norm": 0.45507702231407166,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 0.7345,
+      "step": 158
+    },
+    {
+      "epoch": 0.31490994615337004,
+      "grad_norm": 0.5768627524375916,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 0.9696,
+      "step": 159
+    },
+    {
+      "epoch": 0.3168905118524479,
+      "grad_norm": 0.4070928394794464,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 0.8171,
+      "step": 160
+    },
+    {
+      "epoch": 0.31887107755152566,
+      "grad_norm": 0.28896093368530273,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 0.6783,
+      "step": 161
+    },
+    {
+      "epoch": 0.32085164325060345,
+      "grad_norm": 0.292948842048645,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 0.8252,
+      "step": 162
+    },
+    {
+      "epoch": 0.32283220894968123,
+      "grad_norm": 0.373655766248703,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 0.8913,
+      "step": 163
+    },
+    {
+      "epoch": 0.3248127746487591,
+      "grad_norm": 0.2723099887371063,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 0.7866,
+      "step": 164
+    },
+    {
+      "epoch": 0.32679334034783686,
+      "grad_norm": 0.31054458022117615,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 0.7626,
+      "step": 165
+    },
+    {
+      "epoch": 0.32877390604691464,
+      "grad_norm": 0.31001657247543335,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 0.837,
+      "step": 166
+    },
+    {
+      "epoch": 0.33075447174599243,
+      "grad_norm": 0.3545086085796356,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 1.0232,
+      "step": 167
+    },
+    {
+      "epoch": 0.33273503744507027,
+      "grad_norm": 0.3655906915664673,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 0.9356,
+      "step": 168
+    },
+    {
+      "epoch": 0.33471560314414806,
+      "grad_norm": 0.42094361782073975,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 1.122,
+      "step": 169
+    },
+    {
+      "epoch": 0.33669616884322584,
+      "grad_norm": 0.4121546745300293,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 1.1692,
+      "step": 170
+    },
+    {
+      "epoch": 0.3386767345423036,
+      "grad_norm": 0.43121325969696045,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 1.1433,
+      "step": 171
+    },
+    {
+      "epoch": 0.34065730024138147,
+      "grad_norm": 0.44818028807640076,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 1.1544,
+      "step": 172
+    },
+    {
+      "epoch": 0.34263786594045925,
+      "grad_norm": 0.46644723415374756,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 1.1511,
+      "step": 173
+    },
+    {
+      "epoch": 0.34461843163953704,
+      "grad_norm": 0.5559659004211426,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 1.1325,
+      "step": 174
+    },
+    {
+      "epoch": 0.3465989973386148,
+      "grad_norm": 0.486060231924057,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 1.0881,
+      "step": 175
+    },
+    {
+      "epoch": 0.3465989973386148,
+      "eval_loss": 1.13009774684906,
+      "eval_runtime": 7.9539,
+      "eval_samples_per_second": 6.286,
+      "eval_steps_per_second": 6.286,
+      "step": 175
+    },
+    {
+      "epoch": 0.34857956303769266,
+      "grad_norm": 0.4915584921836853,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 1.1702,
+      "step": 176
+    },
+    {
+      "epoch": 0.35056012873677045,
+      "grad_norm": 0.5216694474220276,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 1.2237,
+      "step": 177
+    },
+    {
+      "epoch": 0.35254069443584823,
+      "grad_norm": 0.4730156660079956,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 1.1725,
+      "step": 178
+    },
+    {
+      "epoch": 0.354521260134926,
+      "grad_norm": 0.5110475420951843,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 1.2701,
+      "step": 179
+    },
+    {
+      "epoch": 0.35650182583400386,
+      "grad_norm": 0.5410056114196777,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 1.125,
+      "step": 180
+    },
+    {
+      "epoch": 0.35848239153308165,
+      "grad_norm": 0.4916069209575653,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 1.063,
+      "step": 181
+    },
+    {
+      "epoch": 0.36046295723215943,
+      "grad_norm": 0.5280264019966125,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 1.022,
+      "step": 182
+    },
+    {
+      "epoch": 0.3624435229312372,
+      "grad_norm": 0.4919007420539856,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 1.0365,
+      "step": 183
+    },
+    {
+      "epoch": 0.36442408863031506,
+      "grad_norm": 0.5240092873573303,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 1.1609,
+      "step": 184
+    },
+    {
+      "epoch": 0.36640465432939284,
+      "grad_norm": 0.6551080942153931,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 0.8449,
+      "step": 185
+    },
+    {
+      "epoch": 0.36838522002847063,
+      "grad_norm": 0.5417661666870117,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 1.0913,
+      "step": 186
+    },
+    {
+      "epoch": 0.3703657857275484,
+      "grad_norm": 0.5707260370254517,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 0.8799,
+      "step": 187
+    },
+    {
+      "epoch": 0.37234635142662625,
+      "grad_norm": 0.5540571808815002,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 0.9926,
+      "step": 188
+    },
+    {
+      "epoch": 0.37432691712570404,
+      "grad_norm": 0.5678872466087341,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 0.9895,
+      "step": 189
+    },
+    {
+      "epoch": 0.3763074828247818,
+      "grad_norm": 0.6278743743896484,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 0.9935,
+      "step": 190
+    },
+    {
+      "epoch": 0.3782880485238596,
+      "grad_norm": 0.6568822860717773,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 0.8738,
+      "step": 191
+    },
+    {
+      "epoch": 0.38026861422293745,
+      "grad_norm": 0.6540056467056274,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 1.1838,
+      "step": 192
+    },
+    {
+      "epoch": 0.38224917992201524,
+      "grad_norm": 0.7000465393066406,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 1.0445,
+      "step": 193
+    },
+    {
+      "epoch": 0.384229745621093,
+      "grad_norm": 0.5517255663871765,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 0.6349,
+      "step": 194
+    },
+    {
+      "epoch": 0.3862103113201708,
+      "grad_norm": 0.837535560131073,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 0.7306,
+      "step": 195
+    },
+    {
+      "epoch": 0.38819087701924865,
+      "grad_norm": 0.7388361096382141,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 0.9283,
+      "step": 196
+    },
+    {
+      "epoch": 0.39017144271832643,
+      "grad_norm": 1.2435849905014038,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 1.205,
+      "step": 197
+    },
+    {
+      "epoch": 0.3921520084174042,
+      "grad_norm": 1.1012904644012451,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 1.2314,
+      "step": 198
+    },
+    {
+      "epoch": 0.394132574116482,
+      "grad_norm": 1.3080377578735352,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 0.7755,
+      "step": 199
+    },
+    {
+      "epoch": 0.39611313981555984,
+      "grad_norm": 1.854987621307373,
+      "learning_rate": 0.0,
+      "loss": 0.8858,
+      "step": 200
+    },
+    {
+      "epoch": 0.39611313981555984,
+      "eval_loss": 1.1278622150421143,
+      "eval_runtime": 7.959,
+      "eval_samples_per_second": 6.282,
+      "eval_steps_per_second": 6.282,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.153022666407936e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null