Training in progress, step 555, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +109 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4c1a22a5ad978a253e58570d5a6e4cbeec9bfa7a8ece40f8616e2220428cc7a
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:bef787ba16eb3d41f4d13340c07bc05fd0530058b348be8288493c90ab94325f
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9081acbf7e17111ebe4f8bbdc838b421d5fd63ae6342d91e8d860439a914c398
 size 85723732

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3635defef027975c19b220db2e7d50884b06413d7e99c178c0845e44afd1e88
 size 85723732

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a199493692cf25c16b518bf6e7216c3987b56836fc34a4b8f0b5e62bd338bbd
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:9fd23d48d0a00f2bcef4e61e7b0ebc30691cd0946be2aa8316f2c61edcd7b2f1
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:74615d30db2006579916bf75a005a1eafb18948a8ecb1d61a6161179b736d4f2
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:59225b59d81ea40b5f6d48123628af66023e2b10a1af4f3a269ca2da584f937a
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5cccf180715c4b2e0d980973df8379f3cff06745ac6beabafbc8fc4345127563
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fa5404a9d5a7f82472a09215ed58ac37185a39533d448975bd7ab0b239729ef
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.853932584269663,
   "eval_steps": 56,
-  "global_step": 540,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3867,6 +3867,111 @@
       "learning_rate": 2.1753260154906973e-07,
       "loss": 0.2876,
       "step": 540
     }
   ],
   "logging_steps": 1,
@@ -3881,12 +3986,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.2049530649811354e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.98876404494382,
   "eval_steps": 56,
+  "global_step": 555,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.1753260154906973e-07,
       "loss": 0.2876,
       "step": 540
+    },
+    {
+      "epoch": 4.8629213483146065,
+      "grad_norm": 1.6425896883010864,
+      "learning_rate": 1.8951279224599382e-07,
+      "loss": 0.2625,
+      "step": 541
+    },
+    {
+      "epoch": 4.871910112359551,
+      "grad_norm": 1.553606629371643,
+      "learning_rate": 1.6342066931995802e-07,
+      "loss": 0.2581,
+      "step": 542
+    },
+    {
+      "epoch": 4.880898876404494,
+      "grad_norm": 1.5650955438613892,
+      "learning_rate": 1.3925724254686413e-07,
+      "loss": 0.2533,
+      "step": 543
+    },
+    {
+      "epoch": 4.889887640449438,
+      "grad_norm": 1.6483417749404907,
+      "learning_rate": 1.1702344706129298e-07,
+      "loss": 0.2943,
+      "step": 544
+    },
+    {
+      "epoch": 4.898876404494382,
+      "grad_norm": 1.5799306631088257,
+      "learning_rate": 9.672014332028356e-08,
+      "loss": 0.2944,
+      "step": 545
+    },
+    {
+      "epoch": 4.907865168539326,
+      "grad_norm": 1.736416220664978,
+      "learning_rate": 7.834811707005951e-08,
+      "loss": 0.3674,
+      "step": 546
+    },
+    {
+      "epoch": 4.91685393258427,
+      "grad_norm": 1.6711561679840088,
+      "learning_rate": 6.190807931560349e-08,
+      "loss": 0.2691,
+      "step": 547
+    },
+    {
+      "epoch": 4.925842696629213,
+      "grad_norm": 1.663176417350769,
+      "learning_rate": 4.740066629315143e-08,
+      "loss": 0.2818,
+      "step": 548
+    },
+    {
+      "epoch": 4.9348314606741575,
+      "grad_norm": 1.7786431312561035,
+      "learning_rate": 3.482643944556219e-08,
+      "loss": 0.3012,
+      "step": 549
+    },
+    {
+      "epoch": 4.943820224719101,
+      "grad_norm": 1.6006958484649658,
+      "learning_rate": 2.4185885400596075e-08,
+      "loss": 0.2574,
+      "step": 550
+    },
+    {
+      "epoch": 4.952808988764045,
+      "grad_norm": 1.7665148973464966,
+      "learning_rate": 1.5479415952085463e-08,
+      "loss": 0.2547,
+      "step": 551
+    },
+    {
+      "epoch": 4.961797752808989,
+      "grad_norm": 1.5012476444244385,
+      "learning_rate": 8.707368043975317e-09,
+      "loss": 0.2478,
+      "step": 552
+    },
+    {
+      "epoch": 4.9707865168539325,
+      "grad_norm": 1.7341482639312744,
+      "learning_rate": 3.870003757311391e-09,
+      "loss": 0.2677,
+      "step": 553
+    },
+    {
+      "epoch": 4.979775280898877,
+      "grad_norm": 1.6566503047943115,
+      "learning_rate": 9.675103000872376e-10,
+      "loss": 0.2858,
+      "step": 554
+    },
+    {
+      "epoch": 4.98876404494382,
+      "grad_norm": 1.8078707456588745,
+      "learning_rate": 0.0,
+      "loss": 0.3493,
+      "step": 555
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.293979539008389e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null