Training in progress, step 440, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a99c5b789b7a06f40811d3146f888da339adf9e702f36969d5b6a7ac3ab82ab6
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:e84e019c8851cde8bbc3291912c02b0d372f753097e404cd9c63df3635876d58
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7081fa4644e6edc9ed10c9a37bfc9b2e3b4dd975799975829b4c606230bf5f36
 size 85723732

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb16128ba3713943c32c15e444ff5b9d0639d313830202e6e343341c474be781
 size 85723732

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f7bcb69931d483e25b0c94c58fcc965a98e3bae3e6c3bf085e4edd6e43f5a40
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:45bde99108f092870a3ddeadad368065f1715b17c0e73ecda20ac6b84eb7df46
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87bfdda30185873c0a9cb229d25fc230d1146196d05e81747db223517c4eafcf
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:62fb05d1751019ac24743a26e17e60ff4857e4af1f72362a4b410a6c8410a65b
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2cd61f66c9dd6299c5f3f99f7ceb5a152a1d64c4909020bec811205c4243a070
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:54547a23ea029fe03b5f58871d4527a748e7ac8f837926dd1a6e08b721fbdc33
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66172525291e0f467bfc09091521e14e32a3eaaef79955a2826a4348133fc8e0
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:bef39a40db1a483262af7570836839950da73fbe882edbfe93922ade4768db34
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79b607b3724fadd616761069f9d370b3b216d512a0fbae133eb5c1bd8e06f7d6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8323834fccc4192732a390e4554f47c9681faaee6040082de0c2920fe02fd450
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9712027103331451,
   "eval_steps": 222,
-  "global_step": 430,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3033,6 +3033,76 @@
       "learning_rate": 2.442712862748775e-07,
       "loss": 1.7898,
       "step": 430
     }
   ],
   "logging_steps": 1,
@@ -3052,7 +3122,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.5528341423288484e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9937888198757764,
   "eval_steps": 222,
+  "global_step": 440,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.442712862748775e-07,
       "loss": 1.7898,
       "step": 430
+    },
+    {
+      "epoch": 0.9734613212874083,
+      "grad_norm": 0.6884695291519165,
+      "learning_rate": 2.0816157625706545e-07,
+      "loss": 1.33,
+      "step": 431
+    },
+    {
+      "epoch": 0.9757199322416714,
+      "grad_norm": 0.5976503491401672,
+      "learning_rate": 1.749329488395124e-07,
+      "loss": 1.6437,
+      "step": 432
+    },
+    {
+      "epoch": 0.9779785431959345,
+      "grad_norm": 0.42137616872787476,
+      "learning_rate": 1.4458732671523977e-07,
+      "loss": 1.2883,
+      "step": 433
+    },
+    {
+      "epoch": 0.9802371541501976,
+      "grad_norm": 0.6945174932479858,
+      "learning_rate": 1.1712646575922637e-07,
+      "loss": 1.3545,
+      "step": 434
+    },
+    {
+      "epoch": 0.9824957651044608,
+      "grad_norm": 0.5032137036323547,
+      "learning_rate": 9.255195492685609e-08,
+      "loss": 1.4777,
+      "step": 435
+    },
+    {
+      "epoch": 0.9847543760587238,
+      "grad_norm": 0.6473844647407532,
+      "learning_rate": 7.086521616190279e-08,
+      "loss": 1.4482,
+      "step": 436
+    },
+    {
+      "epoch": 0.987012987012987,
+      "grad_norm": 1.0584694147109985,
+      "learning_rate": 5.2067504314323723e-08,
+      "loss": 1.2144,
+      "step": 437
+    },
+    {
+      "epoch": 0.9892715979672502,
+      "grad_norm": 0.4894103407859802,
+      "learning_rate": 3.6159907067601085e-08,
+      "loss": 1.3691,
+      "step": 438
+    },
+    {
+      "epoch": 0.9915302089215132,
+      "grad_norm": 0.6497173309326172,
+      "learning_rate": 2.3143344875831142e-08,
+      "loss": 1.3208,
+      "step": 439
+    },
+    {
+      "epoch": 0.9937888198757764,
+      "grad_norm": 0.5225093364715576,
+      "learning_rate": 1.3018570910466877e-08,
+      "loss": 1.1954,
+      "step": 440
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.6129270118945915e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null