Training in progress, step 30, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bed482554efe123094943faf37455da3a996fdac36f56f8fb99db004c10f6e77
 size 619632

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d7e1956e00bca6a37ff14db2a30da9b68b0412270804d15c373d573d895130e
 size 619632

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:085da727568270ac2791a46b34838f3da7de5ded0312a74ee45c1fe08c97f73c
 size 1324026

 version https://git-lfs.github.com/spec/v1
+oid sha256:f68e5fcfec47e208e5db613b7916aebe5eb47e922e21e87d04bd4b90ef71e6c0
 size 1324026

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d27fe899a7839025729cf1fb7357f56d7faf3f1dbdc18ff29f9424d9d3fc5b7
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f1b42cbd14e4887332cb62e946119f0bcf0d9a9fcacc80ed8b66010e166bbd7
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17fc2c8c4ae755ef8015c8c7e0643c28af7cc038251fdb78f97f403908b497c7
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c8d2f6e87f5b671dfa031e06947209073b518165329eeb9867cd4aaa16776ca
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3af69dbf654c73d9cdaa3b7070f08391ce43134d6289c650afd77196fc9b0fe
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3e333deca01a6860a16a4bea1a2ebeb14f960dad45973fb2bf65501096c51e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5043341213553979,
   "eval_steps": 10,
-  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -171,6 +171,84 @@
       "eval_samples_per_second": 92.192,
       "eval_steps_per_second": 23.048,
       "step": 20
     }
   ],
   "logging_steps": 1,
@@ -190,7 +268,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 324995477667840.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7565011820330969,
   "eval_steps": 10,
+  "global_step": 30,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 92.192,
       "eval_steps_per_second": 23.048,
       "step": 20
+    },
+    {
+      "epoch": 0.5295508274231678,
+      "grad_norm": 0.23226885497570038,
+      "learning_rate": 5e-05,
+      "loss": 11.9191,
+      "step": 21
+    },
+    {
+      "epoch": 0.5547675334909378,
+      "grad_norm": 0.23677287995815277,
+      "learning_rate": 4.5871032726383386e-05,
+      "loss": 11.9191,
+      "step": 22
+    },
+    {
+      "epoch": 0.5799842395587076,
+      "grad_norm": 0.24115414917469025,
+      "learning_rate": 4.17702704859633e-05,
+      "loss": 11.9169,
+      "step": 23
+    },
+    {
+      "epoch": 0.6052009456264775,
+      "grad_norm": 0.23872853815555573,
+      "learning_rate": 3.772572564296005e-05,
+      "loss": 11.9193,
+      "step": 24
+    },
+    {
+      "epoch": 0.6304176516942475,
+      "grad_norm": 0.24184449017047882,
+      "learning_rate": 3.3765026539765834e-05,
+      "loss": 11.9191,
+      "step": 25
+    },
+    {
+      "epoch": 0.6556343577620173,
+      "grad_norm": 0.2603144645690918,
+      "learning_rate": 2.991522876735154e-05,
+      "loss": 11.9174,
+      "step": 26
+    },
+    {
+      "epoch": 0.6808510638297872,
+      "grad_norm": 0.26540812849998474,
+      "learning_rate": 2.6202630348146324e-05,
+      "loss": 11.916,
+      "step": 27
+    },
+    {
+      "epoch": 0.7060677698975572,
+      "grad_norm": 0.2523845136165619,
+      "learning_rate": 2.2652592093878666e-05,
+      "loss": 11.9157,
+      "step": 28
+    },
+    {
+      "epoch": 0.731284475965327,
+      "grad_norm": 0.24367552995681763,
+      "learning_rate": 1.928936436551661e-05,
+      "loss": 11.9162,
+      "step": 29
+    },
+    {
+      "epoch": 0.7565011820330969,
+      "grad_norm": 0.25121253728866577,
+      "learning_rate": 1.6135921418712956e-05,
+      "loss": 11.9183,
+      "step": 30
+    },
+    {
+      "epoch": 0.7565011820330969,
+      "eval_loss": 11.915938377380371,
+      "eval_runtime": 2.9116,
+      "eval_samples_per_second": 92.045,
+      "eval_steps_per_second": 23.011,
+      "step": 30
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 487493216501760.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null