Training in progress, step 50, checkpoint

Browse files

Files changed (7) hide show

checkpoint-50/adapter_config.json +2 -2
checkpoint-50/adapter_model.safetensors +2 -2
checkpoint-50/optimizer.pt +2 -2
checkpoint-50/rng_state.pth +1 -1
checkpoint-50/scheduler.pt +1 -1
checkpoint-50/trainer_state.json +229 -229
checkpoint-50/training_args.bin +1 -1

checkpoint-50/adapter_config.json CHANGED Viewed

@@ -10,7 +10,7 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 32,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
@@ -19,7 +19,7 @@
     "score"
   ],
   "peft_type": "LORA",
-  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 64,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
     "score"
   ],
   "peft_type": "LORA",
+  "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [

checkpoint-50/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68a045468f0e9ab47ef0c1a9da28d023b5322f88b90ffe9027c78c0958eaa0c3
-size 27313024

 version https://git-lfs.github.com/spec/v1
+oid sha256:996abc775fa52798168a432665b684d09f9a487bb971709b2be0ff559b4bdc96
+size 54576048

checkpoint-50/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0c097f34464ee144ca4b18eec5321b0629e2eb07ddd8237a2644130bbf266fc
-size 54668218

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4eb0ea463df519c6e94818d460604ff6999bf25a011fb4edfc8bd8db72a47f9
+size 109196538

checkpoint-50/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a57775e97d734b1bf77f3d08cc2543cd48b452bdd29105d0c5cff1bb0e96183
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9e3f686147a49a9f31c1e5e32f0f7a18a4f215b56c917e895e7df0f5e717c12
 size 14244

checkpoint-50/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8477ebdd9e98a050efaa98c4ee5fb26ea6ac9516d088fbb54c344ed67ff7e87
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9583dae960f1692f1c714672f2fe9e52e367d1e6c1734c325e522dd72e1d9343
 size 1064

checkpoint-50/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_metric": 0.7421436309814453,
   "best_model_checkpoint": "../artifacts/LlaMa3-QLoRA-PatentMatch-v0.1/checkpoint-50",
-  "epoch": 0.4716981132075472,
   "eval_steps": 10,
   "global_step": 50,
   "is_hyper_param_search": false,
@@ -9,398 +9,398 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.009433962264150943,
-      "grad_norm": 6.128956317901611,
-      "learning_rate": 9.905660377358492e-05,
-      "loss": 0.461,
       "step": 1
     },
     {
-      "epoch": 0.018867924528301886,
-      "grad_norm": 68.78094482421875,
-      "learning_rate": 9.811320754716981e-05,
-      "loss": 0.9442,
       "step": 2
     },
     {
-      "epoch": 0.02830188679245283,
-      "grad_norm": 35.7277946472168,
-      "learning_rate": 9.716981132075472e-05,
-      "loss": 0.6359,
       "step": 3
     },
     {
-      "epoch": 0.03773584905660377,
-      "grad_norm": 56.930320739746094,
-      "learning_rate": 9.622641509433963e-05,
-      "loss": 0.5856,
       "step": 4
     },
     {
-      "epoch": 0.04716981132075472,
-      "grad_norm": 68.45040130615234,
-      "learning_rate": 9.528301886792453e-05,
-      "loss": 0.7444,
       "step": 5
     },
     {
-      "epoch": 0.05660377358490566,
-      "grad_norm": 18.832008361816406,
-      "learning_rate": 9.433962264150944e-05,
-      "loss": 0.6504,
       "step": 6
     },
     {
-      "epoch": 0.0660377358490566,
-      "grad_norm": 11.086563110351562,
-      "learning_rate": 9.339622641509434e-05,
-      "loss": 0.5207,
       "step": 7
     },
     {
-      "epoch": 0.07547169811320754,
-      "grad_norm": 20.990877151489258,
-      "learning_rate": 9.245283018867925e-05,
-      "loss": 0.586,
       "step": 8
     },
     {
-      "epoch": 0.08490566037735849,
-      "grad_norm": 14.929924964904785,
-      "learning_rate": 9.150943396226416e-05,
-      "loss": 0.4919,
       "step": 9
     },
     {
-      "epoch": 0.09433962264150944,
-      "grad_norm": 13.368741989135742,
-      "learning_rate": 9.056603773584906e-05,
-      "loss": 0.7216,
       "step": 10
     },
     {
-      "epoch": 0.09433962264150944,
-      "eval_loss": 0.7321442365646362,
-      "eval_runtime": 17.8269,
-      "eval_samples_per_second": 16.548,
-      "eval_steps_per_second": 3.31,
       "step": 10
     },
     {
-      "epoch": 0.10377358490566038,
-      "grad_norm": 13.765029907226562,
-      "learning_rate": 8.962264150943397e-05,
-      "loss": 0.9187,
       "step": 11
     },
     {
-      "epoch": 0.11320754716981132,
-      "grad_norm": 23.503965377807617,
-      "learning_rate": 8.867924528301888e-05,
-      "loss": 0.742,
       "step": 12
     },
     {
-      "epoch": 0.12264150943396226,
-      "grad_norm": 11.606241226196289,
-      "learning_rate": 8.773584905660378e-05,
-      "loss": 0.5432,
       "step": 13
     },
     {
-      "epoch": 0.1320754716981132,
-      "grad_norm": 37.84874725341797,
-      "learning_rate": 8.679245283018869e-05,
-      "loss": 0.6434,
       "step": 14
     },
     {
-      "epoch": 0.14150943396226415,
-      "grad_norm": 7.491106033325195,
-      "learning_rate": 8.584905660377359e-05,
-      "loss": 0.5317,
       "step": 15
     },
     {
-      "epoch": 0.1509433962264151,
-      "grad_norm": 19.157922744750977,
-      "learning_rate": 8.49056603773585e-05,
-      "loss": 0.8891,
       "step": 16
     },
     {
-      "epoch": 0.16037735849056603,
-      "grad_norm": 29.841453552246094,
-      "learning_rate": 8.396226415094341e-05,
-      "loss": 0.5606,
       "step": 17
     },
     {
-      "epoch": 0.16981132075471697,
-      "grad_norm": 31.549617767333984,
-      "learning_rate": 8.30188679245283e-05,
-      "loss": 0.7426,
       "step": 18
     },
     {
-      "epoch": 0.1792452830188679,
-      "grad_norm": 12.463747024536133,
-      "learning_rate": 8.207547169811322e-05,
-      "loss": 0.7602,
       "step": 19
     },
     {
-      "epoch": 0.18867924528301888,
-      "grad_norm": 37.965084075927734,
-      "learning_rate": 8.113207547169813e-05,
-      "loss": 0.6794,
       "step": 20
     },
     {
-      "epoch": 0.18867924528301888,
-      "eval_loss": 0.7417612671852112,
-      "eval_runtime": 18.8468,
-      "eval_samples_per_second": 15.652,
-      "eval_steps_per_second": 3.13,
       "step": 20
     },
     {
-      "epoch": 0.19811320754716982,
-      "grad_norm": 13.589489936828613,
-      "learning_rate": 8.018867924528302e-05,
-      "loss": 0.648,
       "step": 21
     },
     {
-      "epoch": 0.20754716981132076,
-      "grad_norm": 39.87663269042969,
-      "learning_rate": 7.924528301886794e-05,
-      "loss": 0.7446,
       "step": 22
     },
     {
-      "epoch": 0.2169811320754717,
-      "grad_norm": 17.065126419067383,
-      "learning_rate": 7.830188679245283e-05,
-      "loss": 0.6289,
       "step": 23
     },
     {
-      "epoch": 0.22641509433962265,
-      "grad_norm": 8.414811134338379,
-      "learning_rate": 7.735849056603774e-05,
-      "loss": 0.462,
       "step": 24
     },
     {
-      "epoch": 0.2358490566037736,
-      "grad_norm": 30.71627426147461,
-      "learning_rate": 7.641509433962265e-05,
-      "loss": 0.5479,
       "step": 25
     },
     {
-      "epoch": 0.24528301886792453,
-      "grad_norm": 32.945404052734375,
-      "learning_rate": 7.547169811320755e-05,
-      "loss": 0.6388,
       "step": 26
     },
     {
-      "epoch": 0.25471698113207547,
-      "grad_norm": 8.858525276184082,
-      "learning_rate": 7.452830188679245e-05,
-      "loss": 0.6801,
       "step": 27
     },
     {
-      "epoch": 0.2641509433962264,
-      "grad_norm": 7.566395282745361,
-      "learning_rate": 7.358490566037736e-05,
-      "loss": 0.6122,
       "step": 28
     },
     {
-      "epoch": 0.27358490566037735,
-      "grad_norm": 17.68086051940918,
-      "learning_rate": 7.264150943396226e-05,
-      "loss": 0.5955,
       "step": 29
     },
     {
-      "epoch": 0.2830188679245283,
-      "grad_norm": 13.58435344696045,
-      "learning_rate": 7.169811320754717e-05,
-      "loss": 0.6361,
       "step": 30
     },
     {
-      "epoch": 0.2830188679245283,
-      "eval_loss": 0.8027433156967163,
-      "eval_runtime": 19.1354,
-      "eval_samples_per_second": 15.416,
-      "eval_steps_per_second": 3.083,
       "step": 30
     },
     {
-      "epoch": 0.29245283018867924,
-      "grad_norm": 35.713294982910156,
-      "learning_rate": 7.075471698113208e-05,
-      "loss": 0.8423,
       "step": 31
     },
     {
-      "epoch": 0.3018867924528302,
-      "grad_norm": 8.273999214172363,
-      "learning_rate": 6.981132075471698e-05,
-      "loss": 0.6246,
       "step": 32
     },
     {
-      "epoch": 0.3113207547169811,
-      "grad_norm": 24.59564208984375,
-      "learning_rate": 6.886792452830189e-05,
-      "loss": 0.6088,
       "step": 33
     },
     {
-      "epoch": 0.32075471698113206,
-      "grad_norm": 50.26318359375,
-      "learning_rate": 6.79245283018868e-05,
-      "loss": 0.6227,
       "step": 34
     },
     {
-      "epoch": 0.330188679245283,
-      "grad_norm": 8.71313190460205,
-      "learning_rate": 6.69811320754717e-05,
-      "loss": 0.6782,
       "step": 35
     },
     {
-      "epoch": 0.33962264150943394,
-      "grad_norm": 43.067962646484375,
-      "learning_rate": 6.60377358490566e-05,
-      "loss": 0.876,
       "step": 36
     },
     {
-      "epoch": 0.3490566037735849,
-      "grad_norm": 52.883399963378906,
-      "learning_rate": 6.50943396226415e-05,
-      "loss": 0.8668,
       "step": 37
     },
     {
-      "epoch": 0.3584905660377358,
-      "grad_norm": 23.537960052490234,
-      "learning_rate": 6.415094339622641e-05,
-      "loss": 0.6368,
       "step": 38
     },
     {
-      "epoch": 0.36792452830188677,
-      "grad_norm": 18.321443557739258,
-      "learning_rate": 6.320754716981132e-05,
-      "loss": 0.6782,
       "step": 39
     },
     {
-      "epoch": 0.37735849056603776,
-      "grad_norm": 14.678520202636719,
-      "learning_rate": 6.226415094339622e-05,
-      "loss": 0.8059,
       "step": 40
     },
     {
-      "epoch": 0.37735849056603776,
-      "eval_loss": 0.7866339683532715,
-      "eval_runtime": 19.074,
-      "eval_samples_per_second": 15.466,
-      "eval_steps_per_second": 3.093,
       "step": 40
     },
     {
-      "epoch": 0.3867924528301887,
-      "grad_norm": 53.35795593261719,
-      "learning_rate": 6.132075471698113e-05,
-      "loss": 0.8482,
       "step": 41
     },
     {
-      "epoch": 0.39622641509433965,
-      "grad_norm": 26.74378776550293,
-      "learning_rate": 6.037735849056604e-05,
-      "loss": 0.5963,
       "step": 42
     },
     {
-      "epoch": 0.4056603773584906,
-      "grad_norm": 51.7420539855957,
-      "learning_rate": 5.943396226415094e-05,
-      "loss": 0.6738,
       "step": 43
     },
     {
-      "epoch": 0.41509433962264153,
-      "grad_norm": 7.802835941314697,
-      "learning_rate": 5.849056603773585e-05,
-      "loss": 0.7359,
       "step": 44
     },
     {
-      "epoch": 0.42452830188679247,
-      "grad_norm": 46.24528121948242,
-      "learning_rate": 5.7547169811320756e-05,
-      "loss": 0.6698,
       "step": 45
     },
     {
-      "epoch": 0.4339622641509434,
-      "grad_norm": 11.004349708557129,
-      "learning_rate": 5.660377358490566e-05,
-      "loss": 0.7172,
       "step": 46
     },
     {
-      "epoch": 0.44339622641509435,
-      "grad_norm": 46.21535873413086,
-      "learning_rate": 5.5660377358490564e-05,
-      "loss": 0.7805,
       "step": 47
     },
     {
-      "epoch": 0.4528301886792453,
-      "grad_norm": 30.457117080688477,
-      "learning_rate": 5.4716981132075475e-05,
-      "loss": 0.6887,
       "step": 48
     },
     {
-      "epoch": 0.46226415094339623,
-      "grad_norm": 15.550039291381836,
-      "learning_rate": 5.377358490566038e-05,
-      "loss": 0.6272,
       "step": 49
     },
     {
-      "epoch": 0.4716981132075472,
-      "grad_norm": 22.72784996032715,
-      "learning_rate": 5.283018867924528e-05,
-      "loss": 0.6153,
       "step": 50
     },
     {
-      "epoch": 0.4716981132075472,
-      "eval_loss": 0.7421436309814453,
-      "eval_runtime": 19.1648,
-      "eval_samples_per_second": 15.393,
-      "eval_steps_per_second": 3.079,
       "step": 50
     }
   ],
   "logging_steps": 1,
-  "max_steps": 106,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 50,
@@ -416,7 +416,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.905280632127488e+16,
   "train_batch_size": 5,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.322121500968933,
   "best_model_checkpoint": "../artifacts/LlaMa3-QLoRA-PatentMatch-v0.1/checkpoint-50",
+  "epoch": 0.09433962264150944,
   "eval_steps": 10,
   "global_step": 50,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.0018867924528301887,
+      "grad_norm": 29.756061553955078,
+      "learning_rate": 1e-10,
+      "loss": 0.8469,
       "step": 1
     },
     {
+      "epoch": 0.0037735849056603774,
+      "grad_norm": 95.5372543334961,
+      "learning_rate": 2e-10,
+      "loss": 1.3855,
       "step": 2
     },
     {
+      "epoch": 0.005660377358490566,
+      "grad_norm": 31.052196502685547,
+      "learning_rate": 3e-10,
+      "loss": 1.0934,
       "step": 3
     },
     {
+      "epoch": 0.007547169811320755,
+      "grad_norm": 27.79388999938965,
+      "learning_rate": 4e-10,
+      "loss": 0.5449,
       "step": 4
     },
     {
+      "epoch": 0.009433962264150943,
+      "grad_norm": 104.76834869384766,
+      "learning_rate": 5e-10,
+      "loss": 2.7414,
       "step": 5
     },
     {
+      "epoch": 0.011320754716981131,
+      "grad_norm": 72.9654312133789,
+      "learning_rate": 6e-10,
+      "loss": 1.8,
       "step": 6
     },
     {
+      "epoch": 0.013207547169811321,
+      "grad_norm": 118.8756103515625,
+      "learning_rate": 7.000000000000001e-10,
+      "loss": 2.7953,
       "step": 7
     },
     {
+      "epoch": 0.01509433962264151,
+      "grad_norm": 38.9265251159668,
+      "learning_rate": 8e-10,
+      "loss": 0.9516,
       "step": 8
     },
     {
+      "epoch": 0.016981132075471698,
+      "grad_norm": 36.34098815917969,
+      "learning_rate": 9e-10,
+      "loss": 1.4047,
       "step": 9
     },
     {
+      "epoch": 0.018867924528301886,
+      "grad_norm": 73.90235900878906,
+      "learning_rate": 1e-09,
+      "loss": 0.842,
       "step": 10
     },
     {
+      "epoch": 0.018867924528301886,
+      "eval_loss": 1.3371663093566895,
+      "eval_runtime": 19.2573,
+      "eval_samples_per_second": 15.319,
+      "eval_steps_per_second": 1.921,
       "step": 10
     },
     {
+      "epoch": 0.020754716981132074,
+      "grad_norm": 23.1890926361084,
+      "learning_rate": 1.1000000000000001e-09,
+      "loss": 0.1208,
       "step": 11
     },
     {
+      "epoch": 0.022641509433962263,
+      "grad_norm": 22.793596267700195,
+      "learning_rate": 1.2e-09,
+      "loss": 0.9565,
       "step": 12
     },
     {
+      "epoch": 0.024528301886792454,
+      "grad_norm": 119.95530700683594,
+      "learning_rate": 1.3e-09,
+      "loss": 2.2288,
       "step": 13
     },
     {
+      "epoch": 0.026415094339622643,
+      "grad_norm": 65.44388580322266,
+      "learning_rate": 1.4000000000000001e-09,
+      "loss": 0.6767,
       "step": 14
     },
     {
+      "epoch": 0.02830188679245283,
+      "grad_norm": 58.65330123901367,
+      "learning_rate": 1.5e-09,
+      "loss": 1.082,
       "step": 15
     },
     {
+      "epoch": 0.03018867924528302,
+      "grad_norm": 32.0055046081543,
+      "learning_rate": 1.6e-09,
+      "loss": 0.1699,
       "step": 16
     },
     {
+      "epoch": 0.03207547169811321,
+      "grad_norm": 73.29571533203125,
+      "learning_rate": 1.7000000000000001e-09,
+      "loss": 0.8457,
       "step": 17
     },
     {
+      "epoch": 0.033962264150943396,
+      "grad_norm": 148.53343200683594,
+      "learning_rate": 1.8e-09,
+      "loss": 2.2594,
       "step": 18
     },
     {
+      "epoch": 0.035849056603773584,
+      "grad_norm": 41.90864181518555,
+      "learning_rate": 1.9e-09,
+      "loss": 0.8586,
       "step": 19
     },
     {
+      "epoch": 0.03773584905660377,
+      "grad_norm": 27.577930450439453,
+      "learning_rate": 2e-09,
+      "loss": 1.0981,
       "step": 20
     },
     {
+      "epoch": 0.03773584905660377,
+      "eval_loss": 1.3284127712249756,
+      "eval_runtime": 19.9562,
+      "eval_samples_per_second": 14.782,
+      "eval_steps_per_second": 1.854,
       "step": 20
     },
     {
+      "epoch": 0.03962264150943396,
+      "grad_norm": 42.17684555053711,
+      "learning_rate": 2.0999999999999998e-09,
+      "loss": 0.8403,
       "step": 21
     },
     {
+      "epoch": 0.04150943396226415,
+      "grad_norm": 23.889053344726562,
+      "learning_rate": 2.2000000000000003e-09,
+      "loss": 0.5028,
       "step": 22
     },
     {
+      "epoch": 0.04339622641509434,
+      "grad_norm": 123.04353332519531,
+      "learning_rate": 2.3000000000000003e-09,
+      "loss": 2.2626,
       "step": 23
     },
     {
+      "epoch": 0.045283018867924525,
+      "grad_norm": 28.47271156311035,
+      "learning_rate": 2.4e-09,
+      "loss": 0.1571,
       "step": 24
     },
     {
+      "epoch": 0.04716981132075472,
+      "grad_norm": 76.90851593017578,
+      "learning_rate": 2.5e-09,
+      "loss": 1.1323,
       "step": 25
     },
     {
+      "epoch": 0.04905660377358491,
+      "grad_norm": 76.9769058227539,
+      "learning_rate": 2.6e-09,
+      "loss": 1.2021,
       "step": 26
     },
     {
+      "epoch": 0.0509433962264151,
+      "grad_norm": 65.92312622070312,
+      "learning_rate": 2.7e-09,
+      "loss": 1.1695,
       "step": 27
     },
     {
+      "epoch": 0.052830188679245285,
+      "grad_norm": 68.66289520263672,
+      "learning_rate": 2.8000000000000003e-09,
+      "loss": 1.2923,
       "step": 28
     },
     {
+      "epoch": 0.05471698113207547,
+      "grad_norm": 60.09720230102539,
+      "learning_rate": 2.9e-09,
+      "loss": 0.8071,
       "step": 29
     },
     {
+      "epoch": 0.05660377358490566,
+      "grad_norm": 120.86378479003906,
+      "learning_rate": 3e-09,
+      "loss": 2.4214,
       "step": 30
     },
     {
+      "epoch": 0.05660377358490566,
+      "eval_loss": 1.322973370552063,
+      "eval_runtime": 20.2855,
+      "eval_samples_per_second": 14.542,
+      "eval_steps_per_second": 1.824,
       "step": 30
     },
     {
+      "epoch": 0.05849056603773585,
+      "grad_norm": 51.104766845703125,
+      "learning_rate": 3.1e-09,
+      "loss": 1.0263,
       "step": 31
     },
     {
+      "epoch": 0.06037735849056604,
+      "grad_norm": 74.6181640625,
+      "learning_rate": 3.2e-09,
+      "loss": 1.5383,
       "step": 32
     },
     {
+      "epoch": 0.062264150943396226,
+      "grad_norm": 30.15936279296875,
+      "learning_rate": 3.3e-09,
+      "loss": 0.3719,
       "step": 33
     },
     {
+      "epoch": 0.06415094339622641,
+      "grad_norm": 157.64108276367188,
+      "learning_rate": 3.4000000000000003e-09,
+      "loss": 3.4692,
       "step": 34
     },
     {
+      "epoch": 0.0660377358490566,
+      "grad_norm": 43.18593215942383,
+      "learning_rate": 3.5e-09,
+      "loss": 0.549,
       "step": 35
     },
     {
+      "epoch": 0.06792452830188679,
+      "grad_norm": 125.1178207397461,
+      "learning_rate": 3.6e-09,
+      "loss": 1.3121,
       "step": 36
     },
     {
+      "epoch": 0.06981132075471698,
+      "grad_norm": 111.42374420166016,
+      "learning_rate": 3.7e-09,
+      "loss": 1.8228,
       "step": 37
     },
     {
+      "epoch": 0.07169811320754717,
+      "grad_norm": 35.12282943725586,
+      "learning_rate": 3.8e-09,
+      "loss": 0.5382,
       "step": 38
     },
     {
+      "epoch": 0.07358490566037736,
+      "grad_norm": 159.79856872558594,
+      "learning_rate": 3.9e-09,
+      "loss": 2.6343,
       "step": 39
     },
     {
+      "epoch": 0.07547169811320754,
+      "grad_norm": 20.610780715942383,
+      "learning_rate": 4e-09,
+      "loss": 0.5307,
       "step": 40
     },
     {
+      "epoch": 0.07547169811320754,
+      "eval_loss": 1.3228683471679688,
+      "eval_runtime": 20.4238,
+      "eval_samples_per_second": 14.444,
+      "eval_steps_per_second": 1.812,
       "step": 40
     },
     {
+      "epoch": 0.07735849056603773,
+      "grad_norm": 25.3465633392334,
+      "learning_rate": 4.0999999999999995e-09,
+      "loss": 0.7819,
       "step": 41
     },
     {
+      "epoch": 0.07924528301886792,
+      "grad_norm": 76.80998229980469,
+      "learning_rate": 4.1999999999999996e-09,
+      "loss": 1.4672,
       "step": 42
     },
     {
+      "epoch": 0.08113207547169811,
+      "grad_norm": 27.21162223815918,
+      "learning_rate": 4.3e-09,
+      "loss": 1.2953,
       "step": 43
     },
     {
+      "epoch": 0.0830188679245283,
+      "grad_norm": 105.13359832763672,
+      "learning_rate": 4.4000000000000005e-09,
+      "loss": 1.9177,
       "step": 44
     },
     {
+      "epoch": 0.08490566037735849,
+      "grad_norm": 28.627273559570312,
+      "learning_rate": 4.500000000000001e-09,
+      "loss": 0.2641,
       "step": 45
     },
     {
+      "epoch": 0.08679245283018867,
+      "grad_norm": 39.927616119384766,
+      "learning_rate": 4.600000000000001e-09,
+      "loss": 1.7988,
       "step": 46
     },
     {
+      "epoch": 0.08867924528301886,
+      "grad_norm": 117.57942962646484,
+      "learning_rate": 4.7e-09,
+      "loss": 1.8186,
       "step": 47
     },
     {
+      "epoch": 0.09056603773584905,
+      "grad_norm": 65.71804809570312,
+      "learning_rate": 4.8e-09,
+      "loss": 1.1402,
       "step": 48
     },
     {
+      "epoch": 0.09245283018867924,
+      "grad_norm": 45.33087921142578,
+      "learning_rate": 4.9e-09,
+      "loss": 1.6036,
       "step": 49
     },
     {
+      "epoch": 0.09433962264150944,
+      "grad_norm": 76.15016174316406,
+      "learning_rate": 5e-09,
+      "loss": 1.1135,
       "step": 50
     },
     {
+      "epoch": 0.09433962264150944,
+      "eval_loss": 1.322121500968933,
+      "eval_runtime": 20.4665,
+      "eval_samples_per_second": 14.414,
+      "eval_steps_per_second": 1.808,
       "step": 50
     }
   ],
   "logging_steps": 1,
+  "max_steps": 530,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 3589628022374400.0,
   "train_batch_size": 5,
   "trial_name": null,
   "trial_params": null

checkpoint-50/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7639eddf8788426b3403c73375c624a0bd04ca994bbc9d09fd8030ef07a2dfc2
 size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:30601ceab6d241a569cfd829c7955620a983f03985e8370f4a3f11ecd5ec3a95
 size 5112