Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35a66ffb102bdb2e85c336a4bc15001f2ad31abc4df64b05936254cf01fc269e
 size 134235712

 version https://git-lfs.github.com/spec/v1
+oid sha256:48a76cc5bd6972cfab52bdd45db1f9e79080e999e4e7d03d8762e1985891a2c9
 size 134235712

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ab64a2e0982d1bf799548c77b066358c5a589a6906c75c46eed5fe989670aea
 size 268543610

 version https://git-lfs.github.com/spec/v1
+oid sha256:29104c6edc1219e971129d84e872f606a4ff9fc00950a5e3c6970743eb12d029
 size 268543610

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1894c8f7415479b6de062560fe0a0b24b2842fd36cdefaed8a22e126bd0b625f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd3d43fb645818d100589c8df8645f08b624ad45553476b28fdf12e86a74e257
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ece3236edcb393fe9fe067fb4c27aaaf2d1a125595517cb84b2456d9b62475c0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d60a69e2379be2053e816cbaff31e6c931b5922dd86c71c9eaf473299cbf62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9772149920463562,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.06870491240123669,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 50.923,
       "eval_steps_per_second": 25.462,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1393881576833024e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8876156806945801,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.13740982480247338,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 50.923,
       "eval_steps_per_second": 25.462,
       "step": 50
+    },
+    {
+      "epoch": 0.07007901064926142,
+      "grad_norm": 4.790940761566162,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 3.5343,
+      "step": 51
+    },
+    {
+      "epoch": 0.07145310889728615,
+      "grad_norm": 4.0827860832214355,
+      "learning_rate": 7.754484907260513e-05,
+      "loss": 3.5173,
+      "step": 52
+    },
+    {
+      "epoch": 0.0728272071453109,
+      "grad_norm": 2.454427480697632,
+      "learning_rate": 7.564496387029532e-05,
+      "loss": 2.9402,
+      "step": 53
+    },
+    {
+      "epoch": 0.07420130539333562,
+      "grad_norm": 1.7591845989227295,
+      "learning_rate": 7.369343312364993e-05,
+      "loss": 2.8061,
+      "step": 54
+    },
+    {
+      "epoch": 0.07557540364136035,
+      "grad_norm": 1.4820759296417236,
+      "learning_rate": 7.169418695587791e-05,
+      "loss": 2.6238,
+      "step": 55
+    },
+    {
+      "epoch": 0.0769495018893851,
+      "grad_norm": 1.361936092376709,
+      "learning_rate": 6.965125158269619e-05,
+      "loss": 2.8171,
+      "step": 56
+    },
+    {
+      "epoch": 0.07832360013740983,
+      "grad_norm": 1.1161038875579834,
+      "learning_rate": 6.756874120406714e-05,
+      "loss": 2.6354,
+      "step": 57
+    },
+    {
+      "epoch": 0.07969769838543456,
+      "grad_norm": 1.0470097064971924,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 2.7784,
+      "step": 58
+    },
+    {
+      "epoch": 0.08107179663345929,
+      "grad_norm": 1.0488992929458618,
+      "learning_rate": 6.330184227833376e-05,
+      "loss": 2.8162,
+      "step": 59
+    },
+    {
+      "epoch": 0.08244589488148403,
+      "grad_norm": 1.0387022495269775,
+      "learning_rate": 6.112604669781572e-05,
+      "loss": 2.8907,
+      "step": 60
+    },
+    {
+      "epoch": 0.08381999312950876,
+      "grad_norm": 1.0147078037261963,
+      "learning_rate": 5.8927844739931834e-05,
+      "loss": 2.8147,
+      "step": 61
+    },
+    {
+      "epoch": 0.08519409137753349,
+      "grad_norm": 1.0787792205810547,
+      "learning_rate": 5.6711663290882776e-05,
+      "loss": 3.0599,
+      "step": 62
+    },
+    {
+      "epoch": 0.08656818962555823,
+      "grad_norm": 0.9972174167633057,
+      "learning_rate": 5.448196544517168e-05,
+      "loss": 3.0402,
+      "step": 63
+    },
+    {
+      "epoch": 0.08794228787358296,
+      "grad_norm": 1.05092453956604,
+      "learning_rate": 5.2243241517525754e-05,
+      "loss": 3.1217,
+      "step": 64
+    },
+    {
+      "epoch": 0.0893163861216077,
+      "grad_norm": 0.9785769581794739,
+      "learning_rate": 5e-05,
+      "loss": 2.9106,
+      "step": 65
+    },
+    {
+      "epoch": 0.09069048436963242,
+      "grad_norm": 1.1027038097381592,
+      "learning_rate": 4.775675848247427e-05,
+      "loss": 3.1218,
+      "step": 66
+    },
+    {
+      "epoch": 0.09206458261765717,
+      "grad_norm": 1.075793743133545,
+      "learning_rate": 4.551803455482833e-05,
+      "loss": 3.4223,
+      "step": 67
+    },
+    {
+      "epoch": 0.0934386808656819,
+      "grad_norm": 1.0721230506896973,
+      "learning_rate": 4.328833670911724e-05,
+      "loss": 3.2137,
+      "step": 68
+    },
+    {
+      "epoch": 0.09481277911370663,
+      "grad_norm": 1.0705825090408325,
+      "learning_rate": 4.107215526006817e-05,
+      "loss": 3.2946,
+      "step": 69
+    },
+    {
+      "epoch": 0.09618687736173136,
+      "grad_norm": 1.1933683156967163,
+      "learning_rate": 3.887395330218429e-05,
+      "loss": 3.5703,
+      "step": 70
+    },
+    {
+      "epoch": 0.0975609756097561,
+      "grad_norm": 1.1190630197525024,
+      "learning_rate": 3.6698157721666246e-05,
+      "loss": 3.3662,
+      "step": 71
+    },
+    {
+      "epoch": 0.09893507385778083,
+      "grad_norm": 1.2145003080368042,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 3.4709,
+      "step": 72
+    },
+    {
+      "epoch": 0.10030917210580556,
+      "grad_norm": 1.1540323495864868,
+      "learning_rate": 3.243125879593286e-05,
+      "loss": 3.2167,
+      "step": 73
+    },
+    {
+      "epoch": 0.1016832703538303,
+      "grad_norm": 1.1874574422836304,
+      "learning_rate": 3.0348748417303823e-05,
+      "loss": 3.4655,
+      "step": 74
+    },
+    {
+      "epoch": 0.10305736860185503,
+      "grad_norm": 1.1657719612121582,
+      "learning_rate": 2.8305813044122097e-05,
+      "loss": 3.4028,
+      "step": 75
+    },
+    {
+      "epoch": 0.10443146684987976,
+      "grad_norm": 1.2047468423843384,
+      "learning_rate": 2.630656687635007e-05,
+      "loss": 3.4156,
+      "step": 76
+    },
+    {
+      "epoch": 0.1058055650979045,
+      "grad_norm": 1.2500311136245728,
+      "learning_rate": 2.43550361297047e-05,
+      "loss": 3.6538,
+      "step": 77
+    },
+    {
+      "epoch": 0.10717966334592924,
+      "grad_norm": 1.23727548122406,
+      "learning_rate": 2.245515092739488e-05,
+      "loss": 3.7062,
+      "step": 78
+    },
+    {
+      "epoch": 0.10855376159395397,
+      "grad_norm": 1.327557921409607,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 3.6038,
+      "step": 79
+    },
+    {
+      "epoch": 0.1099278598419787,
+      "grad_norm": 1.3304165601730347,
+      "learning_rate": 1.8825509907063327e-05,
+      "loss": 3.8635,
+      "step": 80
+    },
+    {
+      "epoch": 0.11130195809000344,
+      "grad_norm": 1.351146936416626,
+      "learning_rate": 1.7103063703014372e-05,
+      "loss": 3.7362,
+      "step": 81
+    },
+    {
+      "epoch": 0.11267605633802817,
+      "grad_norm": 1.374627709388733,
+      "learning_rate": 1.544686755065677e-05,
+      "loss": 3.7078,
+      "step": 82
+    },
+    {
+      "epoch": 0.1140501545860529,
+      "grad_norm": 1.3466027975082397,
+      "learning_rate": 1.3860256808630428e-05,
+      "loss": 3.7743,
+      "step": 83
+    },
+    {
+      "epoch": 0.11542425283407763,
+      "grad_norm": 1.4703048467636108,
+      "learning_rate": 1.2346426699819458e-05,
+      "loss": 4.009,
+      "step": 84
+    },
+    {
+      "epoch": 0.11679835108210238,
+      "grad_norm": 1.402172565460205,
+      "learning_rate": 1.090842587659851e-05,
+      "loss": 3.8147,
+      "step": 85
+    },
+    {
+      "epoch": 0.1181724493301271,
+      "grad_norm": 1.5001634359359741,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 3.9168,
+      "step": 86
+    },
+    {
+      "epoch": 0.11954654757815183,
+      "grad_norm": 1.468559980392456,
+      "learning_rate": 8.271337313934869e-06,
+      "loss": 4.349,
+      "step": 87
+    },
+    {
+      "epoch": 0.12092064582617656,
+      "grad_norm": 1.5293834209442139,
+      "learning_rate": 7.077560319906695e-06,
+      "loss": 3.8892,
+      "step": 88
+    },
+    {
+      "epoch": 0.12229474407420131,
+      "grad_norm": 1.5736913681030273,
+      "learning_rate": 5.9702234071631e-06,
+      "loss": 4.2034,
+      "step": 89
+    },
+    {
+      "epoch": 0.12366884232222604,
+      "grad_norm": 1.5629932880401611,
+      "learning_rate": 4.951556604879048e-06,
+      "loss": 4.0794,
+      "step": 90
+    },
+    {
+      "epoch": 0.12504294057025078,
+      "grad_norm": 1.7163255214691162,
+      "learning_rate": 4.023611372427471e-06,
+      "loss": 4.5224,
+      "step": 91
+    },
+    {
+      "epoch": 0.1264170388182755,
+      "grad_norm": 1.7329245805740356,
+      "learning_rate": 3.18825646801314e-06,
+      "loss": 4.1498,
+      "step": 92
+    },
+    {
+      "epoch": 0.12779113706630024,
+      "grad_norm": 1.832844853401184,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 4.4573,
+      "step": 93
+    },
+    {
+      "epoch": 0.12916523531432497,
+      "grad_norm": 1.913427710533142,
+      "learning_rate": 1.8018569652073381e-06,
+      "loss": 4.5025,
+      "step": 94
+    },
+    {
+      "epoch": 0.1305393335623497,
+      "grad_norm": 1.9010084867477417,
+      "learning_rate": 1.2536043909088191e-06,
+      "loss": 4.4059,
+      "step": 95
+    },
+    {
+      "epoch": 0.13191343181037443,
+      "grad_norm": 2.1113531589508057,
+      "learning_rate": 8.035205700685167e-07,
+      "loss": 4.5278,
+      "step": 96
+    },
+    {
+      "epoch": 0.13328753005839916,
+      "grad_norm": 2.1245055198669434,
+      "learning_rate": 4.52511911603265e-07,
+      "loss": 4.6188,
+      "step": 97
+    },
+    {
+      "epoch": 0.13466162830642392,
+      "grad_norm": 2.3013916015625,
+      "learning_rate": 2.012853002380466e-07,
+      "loss": 4.3355,
+      "step": 98
+    },
+    {
+      "epoch": 0.13603572655444865,
+      "grad_norm": 2.819425106048584,
+      "learning_rate": 5.0346672934270534e-08,
+      "loss": 4.9912,
+      "step": 99
+    },
+    {
+      "epoch": 0.13740982480247338,
+      "grad_norm": 3.633572816848755,
+      "learning_rate": 0.0,
+      "loss": 5.0884,
+      "step": 100
+    },
+    {
+      "epoch": 0.13740982480247338,
+      "eval_loss": 0.8876156806945801,
+      "eval_runtime": 24.0584,
+      "eval_samples_per_second": 50.959,
+      "eval_steps_per_second": 25.48,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.2973029520769024e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null