Training in progress, step 40, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d7e1956e00bca6a37ff14db2a30da9b68b0412270804d15c373d573d895130e
 size 619632

 version https://git-lfs.github.com/spec/v1
+oid sha256:38799347f65a7ed1a2cbacce2d9144aa3c914d18ccb1e3fdd4251542125db97d
 size 619632

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f68e5fcfec47e208e5db613b7916aebe5eb47e922e21e87d04bd4b90ef71e6c0
 size 1324026

 version https://git-lfs.github.com/spec/v1
+oid sha256:bab235c3bd36826b1a9f64b02b346ee5a6d20e57442da515f05163e4e5379acd
 size 1324026

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f1b42cbd14e4887332cb62e946119f0bcf0d9a9fcacc80ed8b66010e166bbd7
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:90021be0ad63cbcd6db2577a307ccc4130b2c06ca765264bfa98daa67579ad94
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c8d2f6e87f5b671dfa031e06947209073b518165329eeb9867cd4aaa16776ca
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:a402bfbd22dfb93faf811296f0988585bea76884af7cafd9bcf1eec8614fa602
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3e333deca01a6860a16a4bea1a2ebeb14f960dad45973fb2bf65501096c51e2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a2759c51def6f12ecf076b0cf2571dc6049a636d579f7f0a8bd8ae37daff96c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7565011820330969,
   "eval_steps": 10,
-  "global_step": 30,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -249,6 +249,84 @@
       "eval_samples_per_second": 92.045,
       "eval_steps_per_second": 23.011,
       "step": 30
     }
   ],
   "logging_steps": 1,
@@ -263,12 +341,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 487493216501760.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0110323089046493,
   "eval_steps": 10,
+  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 92.045,
       "eval_steps_per_second": 23.011,
       "step": 30
+    },
+    {
+      "epoch": 0.7817178881008668,
+      "grad_norm": 0.25170794129371643,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 11.9165,
+      "step": 31
+    },
+    {
+      "epoch": 0.8069345941686368,
+      "grad_norm": 0.2574658989906311,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 11.9158,
+      "step": 32
+    },
+    {
+      "epoch": 0.8321513002364066,
+      "grad_norm": 0.2503858506679535,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 11.9152,
+      "step": 33
+    },
+    {
+      "epoch": 0.8573680063041765,
+      "grad_norm": 0.27511805295944214,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 11.9153,
+      "step": 34
+    },
+    {
+      "epoch": 0.8825847123719465,
+      "grad_norm": 0.2627543807029724,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 11.9171,
+      "step": 35
+    },
+    {
+      "epoch": 0.9078014184397163,
+      "grad_norm": 0.22878780961036682,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 11.9206,
+      "step": 36
+    },
+    {
+      "epoch": 0.9330181245074862,
+      "grad_norm": 0.26924818754196167,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 11.9155,
+      "step": 37
+    },
+    {
+      "epoch": 0.9582348305752562,
+      "grad_norm": 0.25205621123313904,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 11.916,
+      "step": 38
+    },
+    {
+      "epoch": 0.983451536643026,
+      "grad_norm": 0.25344499945640564,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 11.9166,
+      "step": 39
+    },
+    {
+      "epoch": 1.0110323089046493,
+      "grad_norm": 0.42802441120147705,
+      "learning_rate": 0.0,
+      "loss": 17.6584,
+      "step": 40
+    },
+    {
+      "epoch": 1.0110323089046493,
+      "eval_loss": 11.915095329284668,
+      "eval_runtime": 2.9117,
+      "eval_samples_per_second": 92.042,
+      "eval_steps_per_second": 23.011,
+      "step": 40
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 649990955335680.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null