Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f7e73f1c29ace5136d82fe0f039157229723dfd7d76a57fd47ea047b8836d08
 size 100690184

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4d4145452738077533a2dc33037a609e567c1c82420d26286d02634119dcf07
 size 100690184

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0c16c73317e63d9c9305b78938c8c48a1ea54708d2336f1afbc8dab29029742
 size 201488570

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ed74a06669bcdcc0fc654449f6bc2b7a6ad775562c411e284cee00a26b9ff39
 size 201488570

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8eef7980a892c95e91896f4dac40eb25e05f94b505d40628e8cb4a7796ef3e94
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0dd4d09998dab61a9bcc42e73827008345b11ff51113c40563333288122a67d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4c9c807f0681c8b7e53ada9b6ec3dba530d303de7da0d0a0562a3d8d0bbba08
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.9649507999420166,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.10630758327427356,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 57.634,
       "eval_steps_per_second": 28.829,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.145624762253312e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.9374473094940186,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.14174344436569808,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 57.634,
       "eval_steps_per_second": 28.829,
       "step": 150
+    },
+    {
+      "epoch": 0.10701630049610206,
+      "grad_norm": 90.98662567138672,
+      "learning_rate": 1.7197048550474643e-05,
+      "loss": 14.4073,
+      "step": 151
+    },
+    {
+      "epoch": 0.10772501771793054,
+      "grad_norm": 85.76145935058594,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 14.0713,
+      "step": 152
+    },
+    {
+      "epoch": 0.10843373493975904,
+      "grad_norm": 125.13016510009766,
+      "learning_rate": 1.5900081996875083e-05,
+      "loss": 12.443,
+      "step": 153
+    },
+    {
+      "epoch": 0.10914245216158752,
+      "grad_norm": 77.14301300048828,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 12.6,
+      "step": 154
+    },
+    {
+      "epoch": 0.10985116938341602,
+      "grad_norm": 98.53907775878906,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 13.344,
+      "step": 155
+    },
+    {
+      "epoch": 0.1105598866052445,
+      "grad_norm": 86.58975219726562,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 12.8699,
+      "step": 156
+    },
+    {
+      "epoch": 0.111268603827073,
+      "grad_norm": 77.6010971069336,
+      "learning_rate": 1.3432314919041478e-05,
+      "loss": 11.9447,
+      "step": 157
+    },
+    {
+      "epoch": 0.11197732104890148,
+      "grad_norm": 93.79428100585938,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 12.5428,
+      "step": 158
+    },
+    {
+      "epoch": 0.11268603827072998,
+      "grad_norm": 76.28801727294922,
+      "learning_rate": 1.22645209888614e-05,
+      "loss": 11.8837,
+      "step": 159
+    },
+    {
+      "epoch": 0.11339475549255847,
+      "grad_norm": 75.04334259033203,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 11.9508,
+      "step": 160
+    },
+    {
+      "epoch": 0.11410347271438696,
+      "grad_norm": 78.6142807006836,
+      "learning_rate": 1.1142701927151456e-05,
+      "loss": 12.2202,
+      "step": 161
+    },
+    {
+      "epoch": 0.11481218993621545,
+      "grad_norm": 79.49354553222656,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 12.1025,
+      "step": 162
+    },
+    {
+      "epoch": 0.11552090715804395,
+      "grad_norm": 78.5987319946289,
+      "learning_rate": 1.006822449763537e-05,
+      "loss": 12.1806,
+      "step": 163
+    },
+    {
+      "epoch": 0.11622962437987243,
+      "grad_norm": 91.10774230957031,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 12.0004,
+      "step": 164
+    },
+    {
+      "epoch": 0.11693834160170093,
+      "grad_norm": 89.10466003417969,
+      "learning_rate": 9.042397785550405e-06,
+      "loss": 12.198,
+      "step": 165
+    },
+    {
+      "epoch": 0.11764705882352941,
+      "grad_norm": 72.3241195678711,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 11.4762,
+      "step": 166
+    },
+    {
+      "epoch": 0.11835577604535791,
+      "grad_norm": 74.09810638427734,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 11.804,
+      "step": 167
+    },
+    {
+      "epoch": 0.11906449326718639,
+      "grad_norm": 74.54590606689453,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 11.7306,
+      "step": 168
+    },
+    {
+      "epoch": 0.11977321048901489,
+      "grad_norm": 70.98832702636719,
+      "learning_rate": 7.1416349648943894e-06,
+      "loss": 12.4922,
+      "step": 169
+    },
+    {
+      "epoch": 0.12048192771084337,
+      "grad_norm": 56.892051696777344,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 12.0487,
+      "step": 170
+    },
+    {
+      "epoch": 0.12119064493267187,
+      "grad_norm": 74.62382507324219,
+      "learning_rate": 6.269014643030213e-06,
+      "loss": 12.7333,
+      "step": 171
+    },
+    {
+      "epoch": 0.12189936215450035,
+      "grad_norm": 57.8958740234375,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 11.6401,
+      "step": 172
+    },
+    {
+      "epoch": 0.12260807937632884,
+      "grad_norm": 67.43286895751953,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 11.5284,
+      "step": 173
+    },
+    {
+      "epoch": 0.12331679659815734,
+      "grad_norm": 66.57628631591797,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 12.0942,
+      "step": 174
+    },
+    {
+      "epoch": 0.12402551381998582,
+      "grad_norm": 64.6221923828125,
+      "learning_rate": 4.684610648167503e-06,
+      "loss": 11.3993,
+      "step": 175
+    },
+    {
+      "epoch": 0.12473423104181432,
+      "grad_norm": 62.38096237182617,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 12.2216,
+      "step": 176
+    },
+    {
+      "epoch": 0.1254429482636428,
+      "grad_norm": 60.59175491333008,
+      "learning_rate": 3.974757327377981e-06,
+      "loss": 12.2929,
+      "step": 177
+    },
+    {
+      "epoch": 0.1261516654854713,
+      "grad_norm": 63.0843391418457,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 11.0547,
+      "step": 178
+    },
+    {
+      "epoch": 0.1268603827072998,
+      "grad_norm": 64.60417175292969,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 11.8783,
+      "step": 179
+    },
+    {
+      "epoch": 0.12756909992912827,
+      "grad_norm": 70.0575942993164,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 11.5584,
+      "step": 180
+    },
+    {
+      "epoch": 0.12827781715095676,
+      "grad_norm": 80.77247619628906,
+      "learning_rate": 2.724071220034158e-06,
+      "loss": 11.4372,
+      "step": 181
+    },
+    {
+      "epoch": 0.12898653437278526,
+      "grad_norm": 65.36114501953125,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 10.4815,
+      "step": 182
+    },
+    {
+      "epoch": 0.12969525159461376,
+      "grad_norm": 68.62056732177734,
+      "learning_rate": 2.1847622018482283e-06,
+      "loss": 11.4395,
+      "step": 183
+    },
+    {
+      "epoch": 0.13040396881644223,
+      "grad_norm": 75.86769104003906,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 11.5,
+      "step": 184
+    },
+    {
+      "epoch": 0.13111268603827073,
+      "grad_norm": 62.48869323730469,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 11.9649,
+      "step": 185
+    },
+    {
+      "epoch": 0.13182140326009922,
+      "grad_norm": 65.78805541992188,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 11.1761,
+      "step": 186
+    },
+    {
+      "epoch": 0.13253012048192772,
+      "grad_norm": 60.84538269042969,
+      "learning_rate": 1.2814967607382432e-06,
+      "loss": 11.7782,
+      "step": 187
+    },
+    {
+      "epoch": 0.1332388377037562,
+      "grad_norm": 55.33953857421875,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 11.1253,
+      "step": 188
+    },
+    {
+      "epoch": 0.1339475549255847,
+      "grad_norm": 54.96763610839844,
+      "learning_rate": 9.186408276168013e-07,
+      "loss": 11.1227,
+      "step": 189
+    },
+    {
+      "epoch": 0.1346562721474132,
+      "grad_norm": 50.5203857421875,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 10.6525,
+      "step": 190
+    },
+    {
+      "epoch": 0.13536498936924168,
+      "grad_norm": 67.98164367675781,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 12.1145,
+      "step": 191
+    },
+    {
+      "epoch": 0.13607370659107015,
+      "grad_norm": 70.22998046875,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 11.1736,
+      "step": 192
+    },
+    {
+      "epoch": 0.13678242381289865,
+      "grad_norm": 67.49481201171875,
+      "learning_rate": 3.7269241793390085e-07,
+      "loss": 11.139,
+      "step": 193
+    },
+    {
+      "epoch": 0.13749114103472715,
+      "grad_norm": 89.0225830078125,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 11.5438,
+      "step": 194
+    },
+    {
+      "epoch": 0.13819985825655565,
+      "grad_norm": 66.20199584960938,
+      "learning_rate": 1.9026509541272275e-07,
+      "loss": 11.4346,
+      "step": 195
+    },
+    {
+      "epoch": 0.13890857547838412,
+      "grad_norm": 59.58319091796875,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 11.6106,
+      "step": 196
+    },
+    {
+      "epoch": 0.13961729270021261,
+      "grad_norm": 65.84984588623047,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 10.8105,
+      "step": 197
+    },
+    {
+      "epoch": 0.1403260099220411,
+      "grad_norm": 65.17884063720703,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 10.958,
+      "step": 198
+    },
+    {
+      "epoch": 0.1410347271438696,
+      "grad_norm": 70.02983093261719,
+      "learning_rate": 7.615242180436522e-09,
+      "loss": 10.967,
+      "step": 199
+    },
+    {
+      "epoch": 0.14174344436569808,
+      "grad_norm": 113.24897766113281,
+      "learning_rate": 0.0,
+      "loss": 11.5775,
+      "step": 200
+    },
+    {
+      "epoch": 0.14174344436569808,
+      "eval_loss": 2.9374473094940186,
+      "eval_runtime": 41.2541,
+      "eval_samples_per_second": 57.619,
+      "eval_steps_per_second": 28.821,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.5312252919873536e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null