|
{ |
|
"best_metric": 0.6827160120010376, |
|
"best_model_checkpoint": "../artifacts/LlaMa3-QLoRA-PatentMatch-v0.1/checkpoint-50", |
|
"epoch": 0.4716981132075472, |
|
"eval_steps": 10, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009433962264150943, |
|
"grad_norm": 12.244806289672852, |
|
"learning_rate": 2e-08, |
|
"loss": 0.4715, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.018867924528301886, |
|
"grad_norm": 11.454357147216797, |
|
"learning_rate": 4e-08, |
|
"loss": 0.5527, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02830188679245283, |
|
"grad_norm": 7.327939510345459, |
|
"learning_rate": 6e-08, |
|
"loss": 0.5359, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03773584905660377, |
|
"grad_norm": 8.935256958007812, |
|
"learning_rate": 8e-08, |
|
"loss": 0.4292, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04716981132075472, |
|
"grad_norm": 17.576908111572266, |
|
"learning_rate": 1e-07, |
|
"loss": 0.5657, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05660377358490566, |
|
"grad_norm": 22.42218780517578, |
|
"learning_rate": 1.2e-07, |
|
"loss": 0.7024, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0660377358490566, |
|
"grad_norm": 7.509771347045898, |
|
"learning_rate": 1.4e-07, |
|
"loss": 0.5426, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.07547169811320754, |
|
"grad_norm": 24.912858963012695, |
|
"learning_rate": 1.6e-07, |
|
"loss": 0.6312, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.08490566037735849, |
|
"grad_norm": 10.798696517944336, |
|
"learning_rate": 1.8e-07, |
|
"loss": 0.4632, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.09433962264150944, |
|
"grad_norm": 9.916950225830078, |
|
"learning_rate": 2e-07, |
|
"loss": 0.6934, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09433962264150944, |
|
"eval_loss": 0.6845090985298157, |
|
"eval_runtime": 18.8138, |
|
"eval_samples_per_second": 15.68, |
|
"eval_steps_per_second": 3.136, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.10377358490566038, |
|
"grad_norm": 8.111969947814941, |
|
"learning_rate": 2.1999999999999998e-07, |
|
"loss": 0.7586, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.11320754716981132, |
|
"grad_norm": 25.175071716308594, |
|
"learning_rate": 2.4e-07, |
|
"loss": 0.6298, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.12264150943396226, |
|
"grad_norm": 5.813445568084717, |
|
"learning_rate": 2.6e-07, |
|
"loss": 0.5559, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.1320754716981132, |
|
"grad_norm": 7.799736022949219, |
|
"learning_rate": 2.8e-07, |
|
"loss": 0.5321, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.14150943396226415, |
|
"grad_norm": 10.612166404724121, |
|
"learning_rate": 3e-07, |
|
"loss": 0.5567, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1509433962264151, |
|
"grad_norm": 25.862613677978516, |
|
"learning_rate": 3.2e-07, |
|
"loss": 0.7949, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.16037735849056603, |
|
"grad_norm": 5.672112941741943, |
|
"learning_rate": 3.4000000000000003e-07, |
|
"loss": 0.5568, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.16981132075471697, |
|
"grad_norm": 22.59090805053711, |
|
"learning_rate": 3.6e-07, |
|
"loss": 0.651, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1792452830188679, |
|
"grad_norm": 6.6907548904418945, |
|
"learning_rate": 3.7999999999999996e-07, |
|
"loss": 0.5429, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.18867924528301888, |
|
"grad_norm": 7.563165187835693, |
|
"learning_rate": 4e-07, |
|
"loss": 0.5338, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18867924528301888, |
|
"eval_loss": 0.6880346536636353, |
|
"eval_runtime": 19.0282, |
|
"eval_samples_per_second": 15.503, |
|
"eval_steps_per_second": 3.101, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19811320754716982, |
|
"grad_norm": 22.867984771728516, |
|
"learning_rate": 4.1999999999999995e-07, |
|
"loss": 0.6839, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.20754716981132076, |
|
"grad_norm": 12.407017707824707, |
|
"learning_rate": 4.3999999999999997e-07, |
|
"loss": 0.6577, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.2169811320754717, |
|
"grad_norm": 12.605359077453613, |
|
"learning_rate": 4.6e-07, |
|
"loss": 0.7215, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.22641509433962265, |
|
"grad_norm": 8.375327110290527, |
|
"learning_rate": 4.8e-07, |
|
"loss": 0.5053, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2358490566037736, |
|
"grad_norm": 16.666528701782227, |
|
"learning_rate": 5e-07, |
|
"loss": 0.5431, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.24528301886792453, |
|
"grad_norm": 27.57564353942871, |
|
"learning_rate": 5.2e-07, |
|
"loss": 0.6242, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.25471698113207547, |
|
"grad_norm": 14.450230598449707, |
|
"learning_rate": 5.4e-07, |
|
"loss": 0.6718, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.2641509433962264, |
|
"grad_norm": 16.55278968811035, |
|
"learning_rate": 5.6e-07, |
|
"loss": 0.6649, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.27358490566037735, |
|
"grad_norm": 17.196575164794922, |
|
"learning_rate": 5.8e-07, |
|
"loss": 0.6084, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.2830188679245283, |
|
"grad_norm": 38.10641860961914, |
|
"learning_rate": 6e-07, |
|
"loss": 0.6538, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2830188679245283, |
|
"eval_loss": 0.6865962743759155, |
|
"eval_runtime": 19.122, |
|
"eval_samples_per_second": 15.427, |
|
"eval_steps_per_second": 3.085, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.29245283018867924, |
|
"grad_norm": 9.382880210876465, |
|
"learning_rate": 6.2e-07, |
|
"loss": 0.6686, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.3018867924528302, |
|
"grad_norm": 25.904178619384766, |
|
"learning_rate": 6.4e-07, |
|
"loss": 0.5842, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.3113207547169811, |
|
"grad_norm": 10.835689544677734, |
|
"learning_rate": 6.6e-07, |
|
"loss": 0.6862, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.32075471698113206, |
|
"grad_norm": 16.35777473449707, |
|
"learning_rate": 6.800000000000001e-07, |
|
"loss": 0.4901, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.330188679245283, |
|
"grad_norm": 11.801332473754883, |
|
"learning_rate": 7e-07, |
|
"loss": 0.7005, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.33962264150943394, |
|
"grad_norm": 28.929777145385742, |
|
"learning_rate": 7.2e-07, |
|
"loss": 0.7141, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.3490566037735849, |
|
"grad_norm": 33.3692512512207, |
|
"learning_rate": 7.4e-07, |
|
"loss": 0.7235, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3584905660377358, |
|
"grad_norm": 14.086514472961426, |
|
"learning_rate": 7.599999999999999e-07, |
|
"loss": 0.5546, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.36792452830188677, |
|
"grad_norm": 8.276351928710938, |
|
"learning_rate": 7.799999999999999e-07, |
|
"loss": 0.5855, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.37735849056603776, |
|
"grad_norm": 8.203176498413086, |
|
"learning_rate": 8e-07, |
|
"loss": 0.6988, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.37735849056603776, |
|
"eval_loss": 0.6843137741088867, |
|
"eval_runtime": 19.1524, |
|
"eval_samples_per_second": 15.403, |
|
"eval_steps_per_second": 3.081, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3867924528301887, |
|
"grad_norm": 15.79111099243164, |
|
"learning_rate": 8.199999999999999e-07, |
|
"loss": 0.5881, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.39622641509433965, |
|
"grad_norm": 16.36391258239746, |
|
"learning_rate": 8.399999999999999e-07, |
|
"loss": 0.6394, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.4056603773584906, |
|
"grad_norm": 14.09928035736084, |
|
"learning_rate": 8.599999999999999e-07, |
|
"loss": 0.5188, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.41509433962264153, |
|
"grad_norm": 13.666457176208496, |
|
"learning_rate": 8.799999999999999e-07, |
|
"loss": 0.6493, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.42452830188679247, |
|
"grad_norm": 26.71883773803711, |
|
"learning_rate": 9e-07, |
|
"loss": 0.5879, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4339622641509434, |
|
"grad_norm": 7.5422844886779785, |
|
"learning_rate": 9.2e-07, |
|
"loss": 0.5821, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.44339622641509435, |
|
"grad_norm": 23.531204223632812, |
|
"learning_rate": 9.399999999999999e-07, |
|
"loss": 0.6332, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.4528301886792453, |
|
"grad_norm": 30.758493423461914, |
|
"learning_rate": 9.6e-07, |
|
"loss": 0.7319, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.46226415094339623, |
|
"grad_norm": 12.101729393005371, |
|
"learning_rate": 9.8e-07, |
|
"loss": 0.5698, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.4716981132075472, |
|
"grad_norm": 8.760655403137207, |
|
"learning_rate": 1e-06, |
|
"loss": 0.5976, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4716981132075472, |
|
"eval_loss": 0.6827160120010376, |
|
"eval_runtime": 19.1865, |
|
"eval_samples_per_second": 15.375, |
|
"eval_steps_per_second": 3.075, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 318, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.905280632127488e+16, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|