Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c63b95829c9f137f5979b56ebb18368ead4c0430a9c4aca13557b6f9fe7dc8f
 size 985240

 version https://git-lfs.github.com/spec/v1
+oid sha256:24bfd8c5cca1e4aa7cbef59c37fc2e65eb21c84f38ae64d7470bee4bdfa0d8d8
 size 985240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37cecee7fd5375e19f905de2aab67f8f077d37f68b9d50de682884600d8e619f
 size 520860

 version https://git-lfs.github.com/spec/v1
+oid sha256:c657ebcd1e287f996dece39ae0e6f6eef8b42e34c7615a340714fbc441782f29
 size 520860

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a95fc8665795720a1d2e549315e8c156b317790f1b4458d751ca369bedca4bdf
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e71dda85045fe0c73a3372432bb532f4b4f8a3225f1e6c65889ede356ca3524b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d606eeb1aa97b417de3c30d0a970be83ac979e2c7cc0fa41135c63d459909e5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba4af3b1b4fa156d60adeec70df709d1741ac2f3147c676ab2805007313fc707
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.880435943603516,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.08440005626670417,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 109.929,
       "eval_steps_per_second": 27.51,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 18333579608064.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.876670837402344,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.11253340835560557,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 109.929,
       "eval_steps_per_second": 27.51,
       "step": 150
+    },
+    {
+      "epoch": 0.0849627233084822,
+      "grad_norm": 0.8498581647872925,
+      "learning_rate": 2.6047368421052634e-05,
+      "loss": 43.6525,
+      "step": 151
+    },
+    {
+      "epoch": 0.08552539035026023,
+      "grad_norm": 0.6408432126045227,
+      "learning_rate": 2.5515789473684213e-05,
+      "loss": 43.7221,
+      "step": 152
+    },
+    {
+      "epoch": 0.08608805739203826,
+      "grad_norm": 0.7549567222595215,
+      "learning_rate": 2.4984210526315788e-05,
+      "loss": 43.6946,
+      "step": 153
+    },
+    {
+      "epoch": 0.08665072443381629,
+      "grad_norm": 0.9270240664482117,
+      "learning_rate": 2.445263157894737e-05,
+      "loss": 43.6104,
+      "step": 154
+    },
+    {
+      "epoch": 0.08721339147559432,
+      "grad_norm": 0.8804726600646973,
+      "learning_rate": 2.3921052631578946e-05,
+      "loss": 43.6234,
+      "step": 155
+    },
+    {
+      "epoch": 0.08777605851737234,
+      "grad_norm": 0.7767876386642456,
+      "learning_rate": 2.3389473684210528e-05,
+      "loss": 43.666,
+      "step": 156
+    },
+    {
+      "epoch": 0.08833872555915037,
+      "grad_norm": 0.7587795257568359,
+      "learning_rate": 2.2857894736842106e-05,
+      "loss": 43.6076,
+      "step": 157
+    },
+    {
+      "epoch": 0.0889013926009284,
+      "grad_norm": 0.779515266418457,
+      "learning_rate": 2.2326315789473685e-05,
+      "loss": 43.6458,
+      "step": 158
+    },
+    {
+      "epoch": 0.08946405964270643,
+      "grad_norm": 0.942866861820221,
+      "learning_rate": 2.1794736842105264e-05,
+      "loss": 43.6472,
+      "step": 159
+    },
+    {
+      "epoch": 0.09002672668448446,
+      "grad_norm": 0.7620813846588135,
+      "learning_rate": 2.1263157894736842e-05,
+      "loss": 43.6805,
+      "step": 160
+    },
+    {
+      "epoch": 0.09058939372626248,
+      "grad_norm": 0.8272588849067688,
+      "learning_rate": 2.073157894736842e-05,
+      "loss": 43.6039,
+      "step": 161
+    },
+    {
+      "epoch": 0.09115206076804051,
+      "grad_norm": 0.8112311363220215,
+      "learning_rate": 2.0200000000000003e-05,
+      "loss": 43.5755,
+      "step": 162
+    },
+    {
+      "epoch": 0.09171472780981854,
+      "grad_norm": 0.8061158061027527,
+      "learning_rate": 1.966842105263158e-05,
+      "loss": 43.6099,
+      "step": 163
+    },
+    {
+      "epoch": 0.09227739485159657,
+      "grad_norm": 0.907562255859375,
+      "learning_rate": 1.913684210526316e-05,
+      "loss": 43.5635,
+      "step": 164
+    },
+    {
+      "epoch": 0.0928400618933746,
+      "grad_norm": 0.883489191532135,
+      "learning_rate": 1.8605263157894736e-05,
+      "loss": 43.5016,
+      "step": 165
+    },
+    {
+      "epoch": 0.09340272893515263,
+      "grad_norm": 0.7994459271430969,
+      "learning_rate": 1.8073684210526318e-05,
+      "loss": 43.6176,
+      "step": 166
+    },
+    {
+      "epoch": 0.09396539597693065,
+      "grad_norm": 0.8139027953147888,
+      "learning_rate": 1.7542105263157897e-05,
+      "loss": 43.5808,
+      "step": 167
+    },
+    {
+      "epoch": 0.09452806301870868,
+      "grad_norm": 0.8115171790122986,
+      "learning_rate": 1.7010526315789475e-05,
+      "loss": 43.5916,
+      "step": 168
+    },
+    {
+      "epoch": 0.09509073006048671,
+      "grad_norm": 0.7153356075286865,
+      "learning_rate": 1.6478947368421054e-05,
+      "loss": 43.6374,
+      "step": 169
+    },
+    {
+      "epoch": 0.09565339710226474,
+      "grad_norm": 0.8724981546401978,
+      "learning_rate": 1.5947368421052633e-05,
+      "loss": 43.6232,
+      "step": 170
+    },
+    {
+      "epoch": 0.09621606414404277,
+      "grad_norm": 0.8424834609031677,
+      "learning_rate": 1.541578947368421e-05,
+      "loss": 43.5661,
+      "step": 171
+    },
+    {
+      "epoch": 0.0967787311858208,
+      "grad_norm": 0.8084411025047302,
+      "learning_rate": 1.4884210526315788e-05,
+      "loss": 43.4886,
+      "step": 172
+    },
+    {
+      "epoch": 0.09734139822759882,
+      "grad_norm": 0.8066278696060181,
+      "learning_rate": 1.4352631578947369e-05,
+      "loss": 43.6011,
+      "step": 173
+    },
+    {
+      "epoch": 0.09790406526937685,
+      "grad_norm": 0.9052120447158813,
+      "learning_rate": 1.3821052631578949e-05,
+      "loss": 43.5779,
+      "step": 174
+    },
+    {
+      "epoch": 0.09846673231115488,
+      "grad_norm": 0.8675753474235535,
+      "learning_rate": 1.3289473684210526e-05,
+      "loss": 43.587,
+      "step": 175
+    },
+    {
+      "epoch": 0.0990293993529329,
+      "grad_norm": 0.9872041344642639,
+      "learning_rate": 1.2757894736842106e-05,
+      "loss": 43.5783,
+      "step": 176
+    },
+    {
+      "epoch": 0.09959206639471092,
+      "grad_norm": 0.879370391368866,
+      "learning_rate": 1.2226315789473685e-05,
+      "loss": 43.6095,
+      "step": 177
+    },
+    {
+      "epoch": 0.10015473343648895,
+      "grad_norm": 0.8267509937286377,
+      "learning_rate": 1.1694736842105264e-05,
+      "loss": 43.6539,
+      "step": 178
+    },
+    {
+      "epoch": 0.10071740047826698,
+      "grad_norm": 1.0806152820587158,
+      "learning_rate": 1.1163157894736842e-05,
+      "loss": 43.4564,
+      "step": 179
+    },
+    {
+      "epoch": 0.10128006752004501,
+      "grad_norm": 1.0574450492858887,
+      "learning_rate": 1.0631578947368421e-05,
+      "loss": 43.5638,
+      "step": 180
+    },
+    {
+      "epoch": 0.10184273456182304,
+      "grad_norm": 1.0475265979766846,
+      "learning_rate": 1.0100000000000002e-05,
+      "loss": 43.4628,
+      "step": 181
+    },
+    {
+      "epoch": 0.10240540160360107,
+      "grad_norm": 0.9292538166046143,
+      "learning_rate": 9.56842105263158e-06,
+      "loss": 43.55,
+      "step": 182
+    },
+    {
+      "epoch": 0.1029680686453791,
+      "grad_norm": 1.3078457117080688,
+      "learning_rate": 9.036842105263159e-06,
+      "loss": 43.5677,
+      "step": 183
+    },
+    {
+      "epoch": 0.10353073568715712,
+      "grad_norm": 1.3080542087554932,
+      "learning_rate": 8.505263157894738e-06,
+      "loss": 43.2857,
+      "step": 184
+    },
+    {
+      "epoch": 0.10409340272893515,
+      "grad_norm": 1.1805113554000854,
+      "learning_rate": 7.973684210526316e-06,
+      "loss": 43.4206,
+      "step": 185
+    },
+    {
+      "epoch": 0.10465606977071318,
+      "grad_norm": 1.1277086734771729,
+      "learning_rate": 7.442105263157894e-06,
+      "loss": 43.3923,
+      "step": 186
+    },
+    {
+      "epoch": 0.1052187368124912,
+      "grad_norm": 1.2308294773101807,
+      "learning_rate": 6.9105263157894745e-06,
+      "loss": 43.3882,
+      "step": 187
+    },
+    {
+      "epoch": 0.10578140385426923,
+      "grad_norm": 1.1887234449386597,
+      "learning_rate": 6.378947368421053e-06,
+      "loss": 43.4451,
+      "step": 188
+    },
+    {
+      "epoch": 0.10634407089604726,
+      "grad_norm": 1.1450612545013428,
+      "learning_rate": 5.847368421052632e-06,
+      "loss": 43.4785,
+      "step": 189
+    },
+    {
+      "epoch": 0.10690673793782529,
+      "grad_norm": 1.217918038368225,
+      "learning_rate": 5.315789473684211e-06,
+      "loss": 43.5225,
+      "step": 190
+    },
+    {
+      "epoch": 0.10746940497960332,
+      "grad_norm": 1.1894441843032837,
+      "learning_rate": 4.78421052631579e-06,
+      "loss": 43.3129,
+      "step": 191
+    },
+    {
+      "epoch": 0.10803207202138135,
+      "grad_norm": 1.4247220754623413,
+      "learning_rate": 4.252631578947369e-06,
+      "loss": 43.3311,
+      "step": 192
+    },
+    {
+      "epoch": 0.10859473906315938,
+      "grad_norm": 1.664419174194336,
+      "learning_rate": 3.721052631578947e-06,
+      "loss": 43.2038,
+      "step": 193
+    },
+    {
+      "epoch": 0.1091574061049374,
+      "grad_norm": 1.5254756212234497,
+      "learning_rate": 3.1894736842105266e-06,
+      "loss": 43.3228,
+      "step": 194
+    },
+    {
+      "epoch": 0.10972007314671543,
+      "grad_norm": 1.9036180973052979,
+      "learning_rate": 2.6578947368421053e-06,
+      "loss": 43.1253,
+      "step": 195
+    },
+    {
+      "epoch": 0.11028274018849346,
+      "grad_norm": 1.8412998914718628,
+      "learning_rate": 2.1263157894736844e-06,
+      "loss": 43.4353,
+      "step": 196
+    },
+    {
+      "epoch": 0.11084540723027149,
+      "grad_norm": 2.01310396194458,
+      "learning_rate": 1.5947368421052633e-06,
+      "loss": 43.4062,
+      "step": 197
+    },
+    {
+      "epoch": 0.11140807427204952,
+      "grad_norm": 2.4822793006896973,
+      "learning_rate": 1.0631578947368422e-06,
+      "loss": 43.2878,
+      "step": 198
+    },
+    {
+      "epoch": 0.11197074131382755,
+      "grad_norm": 3.1609292030334473,
+      "learning_rate": 5.315789473684211e-07,
+      "loss": 43.5898,
+      "step": 199
+    },
+    {
+      "epoch": 0.11253340835560557,
+      "grad_norm": 5.270547389984131,
+      "learning_rate": 0.0,
+      "loss": 43.6679,
+      "step": 200
+    },
+    {
+      "epoch": 0.11253340835560557,
+      "eval_loss": 10.876670837402344,
+      "eval_runtime": 27.3482,
+      "eval_samples_per_second": 109.44,
+      "eval_steps_per_second": 27.388,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 24454725894144.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null