Training in progress, step 150, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +2 -2
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +71 -297
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1aec38ec987cade1694dd2adb977be799dc886de277db28a7617163fdb79093f
 size 100689176

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c0dcc370af7068987019d5ad7c55775cd4aa685f984103fd5fb8da8d0263a78
 size 100689176

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4cf02ff75d920a72a544ac9eb9e2180ab22d712f5963fbb3016e4a4418a0d6dd
 size 201488698

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c6a4f4139c8708380a359a5afcbe53de60e8b0bdaaaef9b89ffbc87e489ebb1
 size 201488698

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3fa9d6e7bb503576de38e4bc341860abcc3b9e550d7810ca32a03e7fcdb4512
-size 14308

 version https://git-lfs.github.com/spec/v1
+oid sha256:adee8db53dd522679041ac1e22258a9dccde0894ff7e7335c959c7a488f59fa9
+size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef8d2900de30e6031eb67496f65d84b5428252e5a56573254c12f627baa587a8
 size 1256

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5f37e21fbe85e09b136734aed2deb2ce642b5bd3d64c65196a2c110d8c5ff3a
 size 1256

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,351 +1,125 @@
 {
-  "best_metric": 1.0556260347366333,
-  "best_model_checkpoint": "./output/checkpoint-450",
-  "epoch": 0.03256621797655232,
   "eval_steps": 150,
-  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.0007236937328122738,
-      "grad_norm": 1.5576016902923584,
-      "learning_rate": 1.25e-05,
-      "loss": 1.2444,
       "step": 10
     },
     {
-      "epoch": 0.0014473874656245477,
-      "grad_norm": 1.1290874481201172,
-      "learning_rate": 2.5e-05,
-      "loss": 1.3366,
       "step": 20
     },
     {
-      "epoch": 0.0021710811984368217,
-      "grad_norm": 1.8639086484909058,
-      "learning_rate": 3.75e-05,
-      "loss": 1.256,
       "step": 30
     },
     {
-      "epoch": 0.0028947749312490953,
-      "grad_norm": 1.134912133216858,
-      "learning_rate": 5e-05,
-      "loss": 1.1769,
       "step": 40
     },
     {
-      "epoch": 0.0036184686640613694,
-      "grad_norm": 1.3332692384719849,
-      "learning_rate": 6.25e-05,
-      "loss": 1.0865,
       "step": 50
     },
     {
-      "epoch": 0.004342162396873643,
-      "grad_norm": 1.3363752365112305,
-      "learning_rate": 7.5e-05,
-      "loss": 1.084,
       "step": 60
     },
     {
-      "epoch": 0.005065856129685917,
-      "grad_norm": 1.0881965160369873,
-      "learning_rate": 8.75e-05,
-      "loss": 1.0748,
       "step": 70
     },
     {
-      "epoch": 0.005789549862498191,
-      "grad_norm": 1.3386387825012207,
-      "learning_rate": 0.0001,
-      "loss": 1.1922,
       "step": 80
     },
     {
-      "epoch": 0.006513243595310464,
-      "grad_norm": 0.9421172142028809,
-      "learning_rate": 0.00011250000000000001,
-      "loss": 1.0653,
       "step": 90
     },
     {
-      "epoch": 0.007236937328122739,
-      "grad_norm": 1.0505911111831665,
-      "learning_rate": 0.000125,
-      "loss": 1.0001,
       "step": 100
     },
     {
-      "epoch": 0.007960631060935012,
-      "grad_norm": 1.0293700695037842,
-      "learning_rate": 0.00012499871543489787,
-      "loss": 1.1826,
       "step": 110
     },
     {
-      "epoch": 0.008684324793747287,
-      "grad_norm": 1.0566595792770386,
-      "learning_rate": 0.00012499486179239495,
-      "loss": 1.1505,
       "step": 120
     },
     {
-      "epoch": 0.00940801852655956,
-      "grad_norm": 0.7607803344726562,
-      "learning_rate": 0.00012498843923089938,
-      "loss": 1.1219,
       "step": 130
     },
     {
-      "epoch": 0.010131712259371834,
-      "grad_norm": 0.9567335844039917,
-      "learning_rate": 0.0001249794480144175,
-      "loss": 1.1675,
       "step": 140
     },
     {
-      "epoch": 0.010855405992184108,
-      "grad_norm": 0.9335429668426514,
-      "learning_rate": 0.000124967888512543,
-      "loss": 1.1177,
       "step": 150
     },
     {
-      "epoch": 0.010855405992184108,
-      "eval_loss": 1.084096074104309,
-      "eval_runtime": 68.3319,
-      "eval_samples_per_second": 7.317,
-      "eval_steps_per_second": 7.317,
       "step": 150
-    },
-    {
-      "epoch": 0.011579099724996381,
-      "grad_norm": 1.0022648572921753,
-      "learning_rate": 0.00012495376120044173,
-      "loss": 1.0762,
-      "step": 160
-    },
-    {
-      "epoch": 0.012302793457808655,
-      "grad_norm": 1.0098097324371338,
-      "learning_rate": 0.00012493706665883217,
-      "loss": 1.0473,
-      "step": 170
-    },
-    {
-      "epoch": 0.013026487190620929,
-      "grad_norm": 0.9342713952064514,
-      "learning_rate": 0.00012491780557396154,
-      "loss": 1.2133,
-      "step": 180
-    },
-    {
-      "epoch": 0.013750180923433204,
-      "grad_norm": 1.2415492534637451,
-      "learning_rate": 0.00012489597873757756,
-      "loss": 1.1635,
-      "step": 190
-    },
-    {
-      "epoch": 0.014473874656245478,
-      "grad_norm": 0.727070152759552,
-      "learning_rate": 0.00012487158704689602,
-      "loss": 1.1106,
-      "step": 200
-    },
-    {
-      "epoch": 0.015197568389057751,
-      "grad_norm": 1.039361596107483,
-      "learning_rate": 0.0001248446315045638,
-      "loss": 1.1199,
-      "step": 210
-    },
-    {
-      "epoch": 0.015921262121870023,
-      "grad_norm": 1.1260257959365845,
-      "learning_rate": 0.00012481511321861763,
-      "loss": 1.1924,
-      "step": 220
-    },
-    {
-      "epoch": 0.0166449558546823,
-      "grad_norm": 0.9752004742622375,
-      "learning_rate": 0.00012478303340243864,
-      "loss": 1.0699,
-      "step": 230
-    },
-    {
-      "epoch": 0.017368649587494574,
-      "grad_norm": 1.4011763334274292,
-      "learning_rate": 0.00012474839337470246,
-      "loss": 1.106,
-      "step": 240
-    },
-    {
-      "epoch": 0.018092343320306847,
-      "grad_norm": 0.9429338574409485,
-      "learning_rate": 0.0001247111945593249,
-      "loss": 1.161,
-      "step": 250
-    },
-    {
-      "epoch": 0.01881603705311912,
-      "grad_norm": 1.0999586582183838,
-      "learning_rate": 0.00012467143848540359,
-      "loss": 1.1911,
-      "step": 260
-    },
-    {
-      "epoch": 0.019539730785931395,
-      "grad_norm": 1.1027190685272217,
-      "learning_rate": 0.000124629126787155,
-      "loss": 1.2331,
-      "step": 270
-    },
-    {
-      "epoch": 0.020263424518743668,
-      "grad_norm": 0.9375354647636414,
-      "learning_rate": 0.00012458426120384738,
-      "loss": 1.0836,
-      "step": 280
-    },
-    {
-      "epoch": 0.020987118251555942,
-      "grad_norm": 2.151923894882202,
-      "learning_rate": 0.00012453684357972906,
-      "loss": 1.0584,
-      "step": 290
-    },
-    {
-      "epoch": 0.021710811984368215,
-      "grad_norm": 0.833605945110321,
-      "learning_rate": 0.00012448687586395289,
-      "loss": 1.1453,
-      "step": 300
-    },
-    {
-      "epoch": 0.021710811984368215,
-      "eval_loss": 1.061118483543396,
-      "eval_runtime": 67.9747,
-      "eval_samples_per_second": 7.356,
-      "eval_steps_per_second": 7.356,
-      "step": 300
-    },
-    {
-      "epoch": 0.02243450571718049,
-      "grad_norm": 1.0999945402145386,
-      "learning_rate": 0.00012443436011049593,
-      "loss": 1.2021,
-      "step": 310
-    },
-    {
-      "epoch": 0.023158199449992763,
-      "grad_norm": 1.1812015771865845,
-      "learning_rate": 0.0001243792984780751,
-      "loss": 1.0777,
-      "step": 320
-    },
-    {
-      "epoch": 0.023881893182805036,
-      "grad_norm": 1.6861599683761597,
-      "learning_rate": 0.00012432169323005853,
-      "loss": 1.1632,
-      "step": 330
-    },
-    {
-      "epoch": 0.02460558691561731,
-      "grad_norm": 1.1688460111618042,
-      "learning_rate": 0.00012426154673437223,
-      "loss": 1.07,
-      "step": 340
-    },
-    {
-      "epoch": 0.025329280648429583,
-      "grad_norm": 0.8406811952590942,
-      "learning_rate": 0.00012419886146340314,
-      "loss": 1.0175,
-      "step": 350
-    },
-    {
-      "epoch": 0.026052974381241857,
-      "grad_norm": 1.205870270729065,
-      "learning_rate": 0.0001241336399938972,
-      "loss": 1.2039,
-      "step": 360
-    },
-    {
-      "epoch": 0.02677666811405413,
-      "grad_norm": 0.7990264296531677,
-      "learning_rate": 0.00012406588500685355,
-      "loss": 1.0588,
-      "step": 370
-    },
-    {
-      "epoch": 0.027500361846866408,
-      "grad_norm": 1.1303527355194092,
-      "learning_rate": 0.00012399559928741435,
-      "loss": 1.1073,
-      "step": 380
-    },
-    {
-      "epoch": 0.02822405557967868,
-      "grad_norm": 0.7280349731445312,
-      "learning_rate": 0.00012392278572475023,
-      "loss": 1.0966,
-      "step": 390
-    },
-    {
-      "epoch": 0.028947749312490955,
-      "grad_norm": 1.076653242111206,
-      "learning_rate": 0.0001238474473119416,
-      "loss": 1.111,
-      "step": 400
-    },
-    {
-      "epoch": 0.02967144304530323,
-      "grad_norm": 0.9253267049789429,
-      "learning_rate": 0.00012376958714585545,
-      "loss": 1.1165,
-      "step": 410
-    },
-    {
-      "epoch": 0.030395136778115502,
-      "grad_norm": 0.9444619417190552,
-      "learning_rate": 0.0001236892084270183,
-      "loss": 1.1612,
-      "step": 420
-    },
-    {
-      "epoch": 0.031118830510927776,
-      "grad_norm": 1.2068166732788086,
-      "learning_rate": 0.00012360631445948448,
-      "loss": 1.1822,
-      "step": 430
-    },
-    {
-      "epoch": 0.031842524243740046,
-      "grad_norm": 0.8767175674438477,
-      "learning_rate": 0.00012352090865070026,
-      "loss": 0.9645,
-      "step": 440
-    },
-    {
-      "epoch": 0.03256621797655232,
-      "grad_norm": 0.8043785691261292,
-      "learning_rate": 0.00012343299451136397,
-      "loss": 1.1397,
-      "step": 450
-    },
-    {
-      "epoch": 0.03256621797655232,
-      "eval_loss": 1.0556260347366333,
-      "eval_runtime": 67.7421,
-      "eval_samples_per_second": 7.381,
-      "eval_steps_per_second": 7.381,
-      "step": 450
     }
   ],
   "logging_steps": 10,
@@ -365,8 +139,8 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.640123545491866e+16,
-  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.0342717170715332,
+  "best_model_checkpoint": "./output/checkpoint-150",
+  "epoch": 0.005427702996092054,
   "eval_steps": 150,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.0003618468664061369,
+      "grad_norm": 1.2045749425888062,
+      "learning_rate": 5.500000000000001e-06,
+      "loss": 1.144,
       "step": 10
     },
     {
+      "epoch": 0.0007236937328122738,
+      "grad_norm": 1.50728178024292,
+      "learning_rate": 1.1000000000000001e-05,
+      "loss": 1.1809,
       "step": 20
     },
     {
+      "epoch": 0.0010855405992184109,
+      "grad_norm": 0.9494473934173584,
+      "learning_rate": 1.65e-05,
+      "loss": 1.0738,
       "step": 30
     },
     {
+      "epoch": 0.0014473874656245477,
+      "grad_norm": 0.957133948802948,
+      "learning_rate": 2.2000000000000003e-05,
+      "loss": 0.9573,
       "step": 40
     },
     {
+      "epoch": 0.0018092343320306847,
+      "grad_norm": 1.7268428802490234,
+      "learning_rate": 2.75e-05,
+      "loss": 1.0361,
       "step": 50
     },
     {
+      "epoch": 0.0021710811984368217,
+      "grad_norm": 1.1843866109848022,
+      "learning_rate": 3.3e-05,
+      "loss": 1.0351,
       "step": 60
     },
     {
+      "epoch": 0.0025329280648429585,
+      "grad_norm": 1.5817480087280273,
+      "learning_rate": 3.85e-05,
+      "loss": 1.1654,
       "step": 70
     },
     {
+      "epoch": 0.0028947749312490953,
+      "grad_norm": 0.8221575617790222,
+      "learning_rate": 4.4000000000000006e-05,
+      "loss": 1.1031,
       "step": 80
     },
     {
+      "epoch": 0.003256621797655232,
+      "grad_norm": 1.0210144519805908,
+      "learning_rate": 4.9500000000000004e-05,
+      "loss": 1.2083,
       "step": 90
     },
     {
+      "epoch": 0.0036184686640613694,
+      "grad_norm": 1.6523082256317139,
+      "learning_rate": 5.5e-05,
+      "loss": 1.1551,
       "step": 100
     },
     {
+      "epoch": 0.003980315530467506,
+      "grad_norm": 1.3959214687347412,
+      "learning_rate": 5.4999434791355066e-05,
+      "loss": 1.2088,
       "step": 110
     },
     {
+      "epoch": 0.004342162396873643,
+      "grad_norm": 1.7850854396820068,
+      "learning_rate": 5.4997739188653784e-05,
+      "loss": 1.0394,
       "step": 120
     },
     {
+      "epoch": 0.00470400926327978,
+      "grad_norm": 1.707861304283142,
+      "learning_rate": 5.4994913261595724e-05,
+      "loss": 1.0406,
       "step": 130
     },
     {
+      "epoch": 0.005065856129685917,
+      "grad_norm": 1.622674584388733,
+      "learning_rate": 5.49909571263437e-05,
+      "loss": 1.0781,
       "step": 140
     },
     {
+      "epoch": 0.005427702996092054,
+      "grad_norm": 1.135132908821106,
+      "learning_rate": 5.498587094551892e-05,
+      "loss": 1.2658,
       "step": 150
     },
     {
+      "epoch": 0.005427702996092054,
+      "eval_loss": 1.0342717170715332,
+      "eval_runtime": 68.3584,
+      "eval_samples_per_second": 7.314,
+      "eval_steps_per_second": 7.314,
       "step": 150
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.358716220940288e+16,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54d6539c84a7f0730a538274bf05d1d19242ee2f5b7307043f37a16d2ee393e6
 size 5496

 version https://git-lfs.github.com/spec/v1
+oid sha256:36ffe1bc7f9782a0960f25658b63a6896f6292d5fbd06dd9772d5fa0c7cd4b3b
 size 5496