Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddc6ff68e50c2eef2f78f235e393a13742a9c30a3f8ed3aed2ec5ef187490b1b
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:48b7d60c0b70a7744f58a94dd30f3d7a0ceb63d768a116a524fc5969d7a32fc5
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7db1de247d5f83c7055973b89dcb4cee136378d499bcdd300dd775b1bfe41ac9
 size 335922386

 version https://git-lfs.github.com/spec/v1
+oid sha256:5e55533f5ebd086f2e270634190442e06879f6ed8b45109372d2a352302549cd
 size 335922386

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cad3ee7f648c4f71c014b56b413664860b378abf8dcfb1f87ee46a0d77117f23
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4a230bcf7834fc6bc68d809e441cbac665c1f39db84495047c6155e3d55f56f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.2685512602329254,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 1.2314007183170856,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 11.892,
       "eval_steps_per_second": 1.665,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.364765716348928e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.23010525107383728,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 1.641867624422781,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.892,
       "eval_steps_per_second": 1.665,
       "step": 150
+    },
+    {
+      "epoch": 1.2396100564391996,
+      "grad_norm": 0.5411159992218018,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 0.0791,
+      "step": 151
+    },
+    {
+      "epoch": 1.2478193945613134,
+      "grad_norm": 0.6580871343612671,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 0.146,
+      "step": 152
+    },
+    {
+      "epoch": 1.2560287326834274,
+      "grad_norm": 1.062386155128479,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 0.2549,
+      "step": 153
+    },
+    {
+      "epoch": 1.2642380708055412,
+      "grad_norm": 0.8537935018539429,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 0.2096,
+      "step": 154
+    },
+    {
+      "epoch": 1.2724474089276552,
+      "grad_norm": 1.2839819192886353,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 0.3274,
+      "step": 155
+    },
+    {
+      "epoch": 1.2806567470497692,
+      "grad_norm": 0.8188867568969727,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 0.2216,
+      "step": 156
+    },
+    {
+      "epoch": 1.288866085171883,
+      "grad_norm": 0.7944577932357788,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 0.2609,
+      "step": 157
+    },
+    {
+      "epoch": 1.297075423293997,
+      "grad_norm": 1.1626592874526978,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 0.2893,
+      "step": 158
+    },
+    {
+      "epoch": 1.305284761416111,
+      "grad_norm": 0.763344407081604,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 0.1905,
+      "step": 159
+    },
+    {
+      "epoch": 1.3134940995382247,
+      "grad_norm": 1.0466609001159668,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 0.2593,
+      "step": 160
+    },
+    {
+      "epoch": 1.3217034376603387,
+      "grad_norm": 0.8457233309745789,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 0.2853,
+      "step": 161
+    },
+    {
+      "epoch": 1.3299127757824525,
+      "grad_norm": 0.9174469709396362,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 0.2524,
+      "step": 162
+    },
+    {
+      "epoch": 1.3381221139045665,
+      "grad_norm": 0.9040597081184387,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 0.2984,
+      "step": 163
+    },
+    {
+      "epoch": 1.3463314520266803,
+      "grad_norm": 0.8464344143867493,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 0.2039,
+      "step": 164
+    },
+    {
+      "epoch": 1.3545407901487942,
+      "grad_norm": 1.0829017162322998,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 0.2211,
+      "step": 165
+    },
+    {
+      "epoch": 1.3627501282709082,
+      "grad_norm": 0.8870744705200195,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 0.1614,
+      "step": 166
+    },
+    {
+      "epoch": 1.370959466393022,
+      "grad_norm": 0.6612769961357117,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 0.1575,
+      "step": 167
+    },
+    {
+      "epoch": 1.379168804515136,
+      "grad_norm": 0.7530766725540161,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 0.1883,
+      "step": 168
+    },
+    {
+      "epoch": 1.3873781426372498,
+      "grad_norm": 1.1085615158081055,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 0.1853,
+      "step": 169
+    },
+    {
+      "epoch": 1.3955874807593638,
+      "grad_norm": 1.5783230066299438,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 0.2529,
+      "step": 170
+    },
+    {
+      "epoch": 1.4037968188814776,
+      "grad_norm": 1.6519408226013184,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 0.2508,
+      "step": 171
+    },
+    {
+      "epoch": 1.4120061570035916,
+      "grad_norm": 0.9360213279724121,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 0.1858,
+      "step": 172
+    },
+    {
+      "epoch": 1.4202154951257056,
+      "grad_norm": 0.7246414422988892,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 0.1822,
+      "step": 173
+    },
+    {
+      "epoch": 1.4284248332478193,
+      "grad_norm": 1.5920621156692505,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 0.2298,
+      "step": 174
+    },
+    {
+      "epoch": 1.4366341713699333,
+      "grad_norm": 0.8744984865188599,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 0.1964,
+      "step": 175
+    },
+    {
+      "epoch": 1.4366341713699333,
+      "eval_loss": 0.21325525641441345,
+      "eval_runtime": 4.1909,
+      "eval_samples_per_second": 11.931,
+      "eval_steps_per_second": 1.67,
+      "step": 175
+    },
+    {
+      "epoch": 1.4448435094920473,
+      "grad_norm": 0.8040856719017029,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 0.2247,
+      "step": 176
+    },
+    {
+      "epoch": 1.453052847614161,
+      "grad_norm": 1.3336321115493774,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 0.2992,
+      "step": 177
+    },
+    {
+      "epoch": 1.461262185736275,
+      "grad_norm": 0.6860930323600769,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 0.146,
+      "step": 178
+    },
+    {
+      "epoch": 1.4694715238583889,
+      "grad_norm": 1.3084850311279297,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 0.1836,
+      "step": 179
+    },
+    {
+      "epoch": 1.4776808619805029,
+      "grad_norm": 0.8496176600456238,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 0.148,
+      "step": 180
+    },
+    {
+      "epoch": 1.4858902001026166,
+      "grad_norm": 0.6939314007759094,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 0.0918,
+      "step": 181
+    },
+    {
+      "epoch": 1.4940995382247306,
+      "grad_norm": 1.3061468601226807,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 0.1953,
+      "step": 182
+    },
+    {
+      "epoch": 1.5023088763468446,
+      "grad_norm": 0.8121980428695679,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 0.2595,
+      "step": 183
+    },
+    {
+      "epoch": 1.5105182144689584,
+      "grad_norm": 0.7984703183174133,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 0.1912,
+      "step": 184
+    },
+    {
+      "epoch": 1.5187275525910724,
+      "grad_norm": 0.9937928915023804,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 0.2912,
+      "step": 185
+    },
+    {
+      "epoch": 1.5269368907131864,
+      "grad_norm": 0.9170016050338745,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 0.2076,
+      "step": 186
+    },
+    {
+      "epoch": 1.5351462288353002,
+      "grad_norm": 0.8977094292640686,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 0.2685,
+      "step": 187
+    },
+    {
+      "epoch": 1.543355566957414,
+      "grad_norm": 0.8154568076133728,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 0.2685,
+      "step": 188
+    },
+    {
+      "epoch": 1.551564905079528,
+      "grad_norm": 0.6876551508903503,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 0.1567,
+      "step": 189
+    },
+    {
+      "epoch": 1.559774243201642,
+      "grad_norm": 0.7863226532936096,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 0.1835,
+      "step": 190
+    },
+    {
+      "epoch": 1.5679835813237557,
+      "grad_norm": 0.6986067295074463,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 0.2112,
+      "step": 191
+    },
+    {
+      "epoch": 1.5761929194458697,
+      "grad_norm": 0.7879580855369568,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 0.2008,
+      "step": 192
+    },
+    {
+      "epoch": 1.5844022575679837,
+      "grad_norm": 0.964294970035553,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 0.2572,
+      "step": 193
+    },
+    {
+      "epoch": 1.5926115956900975,
+      "grad_norm": 0.7760185599327087,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 0.2151,
+      "step": 194
+    },
+    {
+      "epoch": 1.6008209338122112,
+      "grad_norm": 0.8228120803833008,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 0.1982,
+      "step": 195
+    },
+    {
+      "epoch": 1.6090302719343252,
+      "grad_norm": 0.8305048942565918,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 0.1895,
+      "step": 196
+    },
+    {
+      "epoch": 1.6172396100564392,
+      "grad_norm": 0.5265530347824097,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 0.1337,
+      "step": 197
+    },
+    {
+      "epoch": 1.625448948178553,
+      "grad_norm": 0.6880905032157898,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 0.2148,
+      "step": 198
+    },
+    {
+      "epoch": 1.633658286300667,
+      "grad_norm": 0.8256459832191467,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 0.2136,
+      "step": 199
+    },
+    {
+      "epoch": 1.641867624422781,
+      "grad_norm": 0.989826500415802,
+      "learning_rate": 0.0,
+      "loss": 0.1931,
+      "step": 200
+    },
+    {
+      "epoch": 1.641867624422781,
+      "eval_loss": 0.23010525107383728,
+      "eval_runtime": 4.1903,
+      "eval_samples_per_second": 11.932,
+      "eval_steps_per_second": 1.671,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.153020955131904e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null