Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca0d2bde0beb9180fdb969dcd813db4729c4f9b38d65250613fc4778d60113ed
 size 4525640

 version https://git-lfs.github.com/spec/v1
+oid sha256:976f28a07b57bc2ab3325d4d44092e023caef8ad537c5b4a84af0fba088927e9
 size 4525640

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:602757339b258753a7cd3bf8c3a4758f7887ed79ab32cce3f420849784b1a6b1
 size 9067978

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c87eb9746963395a9bdd58a42c8bf254c2c42a07a8ce7a92fd008d3bf691a2b
 size 9067978

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31dcd05c711b18b5c50a4153f92bec8a65443056f7dc54fe5d934f49a08767d9
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa58e18ab15852f9626941c93dfbb4dc51e52641baa07bb39f9334d96fc65e06
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36f592008e58a167283058621021e8083b7e0494f9e4eaff17cb66d70f002d42
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:044f8ccc674155e92aea2e8413880d1c86692d5f2c4bee3bacf48d2e0ed42bb4
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1bc50b2235fb4c074a0bc96e9d7a27043db49236a29c6053060da0e95fab47f5
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:1358536e8292412bcf28025fb2b2a1e9c607bebf90ef030d60b25c8d0153fbea
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0225a23b8789ae53c3a1df7e021c24be3e3b0013447c2ab93b24493519813e0f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d03314b7c174da2a6ece9d2238718a0b81407508624b7c24c389c7ddb0233827
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9260759353637695,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.45351473922902497,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 946.868,
       "eval_steps_per_second": 119.631,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1753141896806400.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7363215684890747,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.9070294784580499,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 946.868,
       "eval_steps_per_second": 119.631,
       "step": 25
+    },
+    {
+      "epoch": 0.47165532879818595,
+      "grad_norm": 11.000182151794434,
+      "learning_rate": 5e-05,
+      "loss": 0.6944,
+      "step": 26
+    },
+    {
+      "epoch": 0.4897959183673469,
+      "grad_norm": 7.899143695831299,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 0.6893,
+      "step": 27
+    },
+    {
+      "epoch": 0.5079365079365079,
+      "grad_norm": 3.3740994930267334,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 0.6363,
+      "step": 28
+    },
+    {
+      "epoch": 0.5260770975056689,
+      "grad_norm": 1.9741195440292358,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 0.6583,
+      "step": 29
+    },
+    {
+      "epoch": 0.54421768707483,
+      "grad_norm": 2.1237120628356934,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 0.6359,
+      "step": 30
+    },
+    {
+      "epoch": 0.562358276643991,
+      "grad_norm": 3.018322467803955,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 0.6581,
+      "step": 31
+    },
+    {
+      "epoch": 0.5804988662131519,
+      "grad_norm": 3.021416187286377,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 0.6479,
+      "step": 32
+    },
+    {
+      "epoch": 0.5986394557823129,
+      "grad_norm": 3.8875057697296143,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 0.7775,
+      "step": 33
+    },
+    {
+      "epoch": 0.6167800453514739,
+      "grad_norm": 3.475656270980835,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.7254,
+      "step": 34
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 4.547675132751465,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 0.7812,
+      "step": 35
+    },
+    {
+      "epoch": 0.6530612244897959,
+      "grad_norm": 4.685992240905762,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 0.8471,
+      "step": 36
+    },
+    {
+      "epoch": 0.671201814058957,
+      "grad_norm": 5.522526264190674,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 0.8515,
+      "step": 37
+    },
+    {
+      "epoch": 0.6893424036281179,
+      "grad_norm": 2.1352436542510986,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.585,
+      "step": 38
+    },
+    {
+      "epoch": 0.7074829931972789,
+      "grad_norm": 2.075186014175415,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 0.4525,
+      "step": 39
+    },
+    {
+      "epoch": 0.7256235827664399,
+      "grad_norm": 1.9144731760025024,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 0.494,
+      "step": 40
+    },
+    {
+      "epoch": 0.7437641723356009,
+      "grad_norm": 1.8125754594802856,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.4795,
+      "step": 41
+    },
+    {
+      "epoch": 0.7619047619047619,
+      "grad_norm": 1.9145684242248535,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.5169,
+      "step": 42
+    },
+    {
+      "epoch": 0.780045351473923,
+      "grad_norm": 1.7573060989379883,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.5226,
+      "step": 43
+    },
+    {
+      "epoch": 0.7981859410430839,
+      "grad_norm": 2.030489444732666,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.5625,
+      "step": 44
+    },
+    {
+      "epoch": 0.8163265306122449,
+      "grad_norm": 2.0532655715942383,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.5671,
+      "step": 45
+    },
+    {
+      "epoch": 0.8344671201814059,
+      "grad_norm": 3.2021546363830566,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.7115,
+      "step": 46
+    },
+    {
+      "epoch": 0.8526077097505669,
+      "grad_norm": 3.1948206424713135,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.7382,
+      "step": 47
+    },
+    {
+      "epoch": 0.8707482993197279,
+      "grad_norm": 2.8352816104888916,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.7598,
+      "step": 48
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 2.8769757747650146,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.7636,
+      "step": 49
+    },
+    {
+      "epoch": 0.9070294784580499,
+      "grad_norm": 7.174294948577881,
+      "learning_rate": 0.0,
+      "loss": 0.9811,
+      "step": 50
+    },
+    {
+      "epoch": 0.9070294784580499,
+      "eval_loss": 0.7363215684890747,
+      "eval_runtime": 0.393,
+      "eval_samples_per_second": 946.61,
+      "eval_steps_per_second": 119.599,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3506283793612800.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null