Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:366beb98df22a32a15306b486f25c3c71c5b2dce1c0197d9041c950e040781f2
 size 295488936

 version https://git-lfs.github.com/spec/v1
+oid sha256:670cd73738fe931b56972a1fd07b89232b56dbef5e3890e88699cba070779538
 size 295488936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:676fedce9f2052860ca18a1698365232b71fee713d52a420ea874fb70e36173c
 size 150486964

 version https://git-lfs.github.com/spec/v1
+oid sha256:df7ef194adb16e23a11ac8759bf4718725172f4c28dc43bae46fb0b65f2ed96f
 size 150486964

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d593b9819d4f398de4cf60927bbd9f7f2b218406371976e984313bb3bc908933
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:724d926601dc0a3ee8983fc2647744d5217cd5c8b8e49d5c6c803a32a18d0c0a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8064916729927063,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.035174111853675694,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 33.938,
       "eval_steps_per_second": 8.487,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.0821709602816e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8013890385627747,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.04689881580490093,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 33.938,
       "eval_steps_per_second": 8.487,
       "step": 150
+    },
+    {
+      "epoch": 0.0354086059327002,
+      "grad_norm": 0.9074562788009644,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 0.6811,
+      "step": 151
+    },
+    {
+      "epoch": 0.0356431000117247,
+      "grad_norm": 0.9699204564094543,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 0.792,
+      "step": 152
+    },
+    {
+      "epoch": 0.035877594090749206,
+      "grad_norm": 1.0767892599105835,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 0.8623,
+      "step": 153
+    },
+    {
+      "epoch": 0.036112088169773716,
+      "grad_norm": 1.102132797241211,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 0.8136,
+      "step": 154
+    },
+    {
+      "epoch": 0.03634658224879822,
+      "grad_norm": 1.1590954065322876,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 0.8561,
+      "step": 155
+    },
+    {
+      "epoch": 0.036581076327822724,
+      "grad_norm": 1.1245816946029663,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 0.8209,
+      "step": 156
+    },
+    {
+      "epoch": 0.03681557040684723,
+      "grad_norm": 0.9807021021842957,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 0.6628,
+      "step": 157
+    },
+    {
+      "epoch": 0.03705006448587173,
+      "grad_norm": 1.13668692111969,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 0.7741,
+      "step": 158
+    },
+    {
+      "epoch": 0.037284558564896236,
+      "grad_norm": 1.083509087562561,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 0.7008,
+      "step": 159
+    },
+    {
+      "epoch": 0.03751905264392074,
+      "grad_norm": 1.2992668151855469,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 0.8832,
+      "step": 160
+    },
+    {
+      "epoch": 0.037753546722945244,
+      "grad_norm": 1.1311757564544678,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 0.8002,
+      "step": 161
+    },
+    {
+      "epoch": 0.03798804080196975,
+      "grad_norm": 1.1364468336105347,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.6833,
+      "step": 162
+    },
+    {
+      "epoch": 0.03822253488099425,
+      "grad_norm": 1.1254762411117554,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 0.7505,
+      "step": 163
+    },
+    {
+      "epoch": 0.038457028960018756,
+      "grad_norm": 1.270178198814392,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 0.737,
+      "step": 164
+    },
+    {
+      "epoch": 0.03869152303904327,
+      "grad_norm": 1.0859111547470093,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 0.6538,
+      "step": 165
+    },
+    {
+      "epoch": 0.03892601711806777,
+      "grad_norm": 1.346832275390625,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 0.803,
+      "step": 166
+    },
+    {
+      "epoch": 0.039160511197092275,
+      "grad_norm": 1.1719223260879517,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 0.8512,
+      "step": 167
+    },
+    {
+      "epoch": 0.03939500527611678,
+      "grad_norm": 1.1551932096481323,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 0.7088,
+      "step": 168
+    },
+    {
+      "epoch": 0.03962949935514128,
+      "grad_norm": 1.3518060445785522,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 0.9165,
+      "step": 169
+    },
+    {
+      "epoch": 0.03986399343416579,
+      "grad_norm": 1.203717827796936,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 0.8045,
+      "step": 170
+    },
+    {
+      "epoch": 0.04009848751319029,
+      "grad_norm": 1.9292775392532349,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 0.8122,
+      "step": 171
+    },
+    {
+      "epoch": 0.040332981592214795,
+      "grad_norm": 1.4368733167648315,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 0.8494,
+      "step": 172
+    },
+    {
+      "epoch": 0.0405674756712393,
+      "grad_norm": 1.7008283138275146,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 0.9061,
+      "step": 173
+    },
+    {
+      "epoch": 0.0408019697502638,
+      "grad_norm": 1.3242393732070923,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 0.8308,
+      "step": 174
+    },
+    {
+      "epoch": 0.041036463829288314,
+      "grad_norm": 1.4106900691986084,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 0.7034,
+      "step": 175
+    },
+    {
+      "epoch": 0.04127095790831282,
+      "grad_norm": 1.361316442489624,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 0.803,
+      "step": 176
+    },
+    {
+      "epoch": 0.04150545198733732,
+      "grad_norm": 1.3642582893371582,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 0.7051,
+      "step": 177
+    },
+    {
+      "epoch": 0.041739946066361826,
+      "grad_norm": 1.269537091255188,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 0.6864,
+      "step": 178
+    },
+    {
+      "epoch": 0.04197444014538633,
+      "grad_norm": 1.382075548171997,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 0.7623,
+      "step": 179
+    },
+    {
+      "epoch": 0.042208934224410834,
+      "grad_norm": 1.6543961763381958,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 0.948,
+      "step": 180
+    },
+    {
+      "epoch": 0.04244342830343534,
+      "grad_norm": 1.5329254865646362,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.8895,
+      "step": 181
+    },
+    {
+      "epoch": 0.04267792238245984,
+      "grad_norm": 1.5828336477279663,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 0.853,
+      "step": 182
+    },
+    {
+      "epoch": 0.042912416461484346,
+      "grad_norm": 2.4510273933410645,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 0.8706,
+      "step": 183
+    },
+    {
+      "epoch": 0.04314691054050885,
+      "grad_norm": 1.5756542682647705,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 0.8614,
+      "step": 184
+    },
+    {
+      "epoch": 0.043381404619533354,
+      "grad_norm": 1.4439570903778076,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 0.6665,
+      "step": 185
+    },
+    {
+      "epoch": 0.043615898698557864,
+      "grad_norm": 1.446807622909546,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 0.8002,
+      "step": 186
+    },
+    {
+      "epoch": 0.04385039277758237,
+      "grad_norm": 1.592470407485962,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 0.8435,
+      "step": 187
+    },
+    {
+      "epoch": 0.04408488685660687,
+      "grad_norm": 1.6941490173339844,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 0.9267,
+      "step": 188
+    },
+    {
+      "epoch": 0.044319380935631376,
+      "grad_norm": 1.459783673286438,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 0.7305,
+      "step": 189
+    },
+    {
+      "epoch": 0.04455387501465588,
+      "grad_norm": 1.5384364128112793,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 0.9372,
+      "step": 190
+    },
+    {
+      "epoch": 0.044788369093680384,
+      "grad_norm": 1.5187623500823975,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 0.7094,
+      "step": 191
+    },
+    {
+      "epoch": 0.04502286317270489,
+      "grad_norm": 1.8162751197814941,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 0.8488,
+      "step": 192
+    },
+    {
+      "epoch": 0.04525735725172939,
+      "grad_norm": 1.4734644889831543,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 0.7385,
+      "step": 193
+    },
+    {
+      "epoch": 0.045491851330753896,
+      "grad_norm": 2.108508348464966,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 0.699,
+      "step": 194
+    },
+    {
+      "epoch": 0.0457263454097784,
+      "grad_norm": 1.665117859840393,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 0.8165,
+      "step": 195
+    },
+    {
+      "epoch": 0.045960839488802904,
+      "grad_norm": 1.8221404552459717,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 0.7502,
+      "step": 196
+    },
+    {
+      "epoch": 0.046195333567827415,
+      "grad_norm": 1.992260456085205,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 0.8191,
+      "step": 197
+    },
+    {
+      "epoch": 0.04642982764685192,
+      "grad_norm": 1.873140811920166,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 0.8301,
+      "step": 198
+    },
+    {
+      "epoch": 0.04666432172587642,
+      "grad_norm": 1.9807795286178589,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 0.8948,
+      "step": 199
+    },
+    {
+      "epoch": 0.04689881580490093,
+      "grad_norm": 2.010061740875244,
+      "learning_rate": 0.0,
+      "loss": 0.7609,
+      "step": 200
+    },
+    {
+      "epoch": 0.04689881580490093,
+      "eval_loss": 0.8013890385627747,
+      "eval_runtime": 211.6589,
+      "eval_samples_per_second": 33.932,
+      "eval_steps_per_second": 8.485,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.4428946137088e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null