|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.584501266479492, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5636, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.6354408930825969, |
|
"eval_loss": 0.507041335105896, |
|
"eval_precision": 0.6575309104533533, |
|
"eval_recall": 0.6274322604109838, |
|
"eval_runtime": 4.6793, |
|
"eval_samples_per_second": 85.269, |
|
"eval_steps_per_second": 10.685, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.185054302215576, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5128, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7343358395989975, |
|
"eval_f1": 0.6976408350014298, |
|
"eval_loss": 0.501506507396698, |
|
"eval_precision": 0.6911057692307692, |
|
"eval_recall": 0.7120385524640844, |
|
"eval_runtime": 5.0462, |
|
"eval_samples_per_second": 79.069, |
|
"eval_steps_per_second": 9.908, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.350452423095703, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4941, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7468671679197995, |
|
"eval_f1": 0.6969020059116857, |
|
"eval_loss": 0.470926433801651, |
|
"eval_precision": 0.6955197132616487, |
|
"eval_recall": 0.698399709038007, |
|
"eval_runtime": 5.0701, |
|
"eval_samples_per_second": 78.696, |
|
"eval_steps_per_second": 9.862, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.6379525661468506, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4702, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7744360902255639, |
|
"eval_f1": 0.7207099303135889, |
|
"eval_loss": 0.4496114253997803, |
|
"eval_precision": 0.7274676737933683, |
|
"eval_recall": 0.7154028005091835, |
|
"eval_runtime": 5.0568, |
|
"eval_samples_per_second": 78.904, |
|
"eval_steps_per_second": 9.888, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.561305046081543, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4704, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7719298245614035, |
|
"eval_f1": 0.7320072332730561, |
|
"eval_loss": 0.45210200548171997, |
|
"eval_precision": 0.7269805119926199, |
|
"eval_recall": 0.7386342971449354, |
|
"eval_runtime": 5.0475, |
|
"eval_samples_per_second": 79.049, |
|
"eval_steps_per_second": 9.906, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.5164988040924072, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4616, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7644110275689223, |
|
"eval_f1": 0.7212527498662227, |
|
"eval_loss": 0.4490063786506653, |
|
"eval_precision": 0.7175328467153285, |
|
"eval_recall": 0.7258137843244226, |
|
"eval_runtime": 5.059, |
|
"eval_samples_per_second": 78.869, |
|
"eval_steps_per_second": 9.883, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.614987373352051, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.4543, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7819548872180451, |
|
"eval_f1": 0.7449392712550607, |
|
"eval_loss": 0.4381465017795563, |
|
"eval_precision": 0.7389190734915643, |
|
"eval_recall": 0.7532278596108384, |
|
"eval_runtime": 5.0493, |
|
"eval_samples_per_second": 79.021, |
|
"eval_steps_per_second": 9.902, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.577010631561279, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4532, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8070175438596491, |
|
"eval_f1": 0.7519478107817887, |
|
"eval_loss": 0.41970905661582947, |
|
"eval_precision": 0.7744157656086501, |
|
"eval_recall": 0.7384524458992544, |
|
"eval_runtime": 5.0679, |
|
"eval_samples_per_second": 78.731, |
|
"eval_steps_per_second": 9.866, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.4122800827026367, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.4517, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7969924812030075, |
|
"eval_f1": 0.7544988415815261, |
|
"eval_loss": 0.4194534420967102, |
|
"eval_precision": 0.7551480443523821, |
|
"eval_recall": 0.753864338970722, |
|
"eval_runtime": 5.0418, |
|
"eval_samples_per_second": 79.138, |
|
"eval_steps_per_second": 9.917, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.07973575592041, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4438, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8170426065162907, |
|
"eval_f1": 0.7539886990379824, |
|
"eval_loss": 0.41020727157592773, |
|
"eval_precision": 0.8012931034482759, |
|
"eval_recall": 0.7330423713402436, |
|
"eval_runtime": 5.0907, |
|
"eval_samples_per_second": 78.378, |
|
"eval_steps_per_second": 9.822, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.512185573577881, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.4389, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8270676691729323, |
|
"eval_f1": 0.7875973059089472, |
|
"eval_loss": 0.41120436787605286, |
|
"eval_precision": 0.7933469644542308, |
|
"eval_recall": 0.7826422985997454, |
|
"eval_runtime": 5.0564, |
|
"eval_samples_per_second": 78.909, |
|
"eval_steps_per_second": 9.888, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 6.24143123626709, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4428, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7969924812030075, |
|
"eval_f1": 0.7603603603603604, |
|
"eval_loss": 0.417868971824646, |
|
"eval_precision": 0.7554945054945055, |
|
"eval_recall": 0.766366612111293, |
|
"eval_runtime": 5.0497, |
|
"eval_samples_per_second": 79.014, |
|
"eval_steps_per_second": 9.901, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.4772980213165283, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.4421, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.7828368575397437, |
|
"eval_loss": 0.4030059576034546, |
|
"eval_precision": 0.810950998442147, |
|
"eval_recall": 0.7661847608656118, |
|
"eval_runtime": 5.0599, |
|
"eval_samples_per_second": 78.855, |
|
"eval_steps_per_second": 9.882, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 4.054733753204346, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.4403, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.7915149151881459, |
|
"eval_loss": 0.4036868214607239, |
|
"eval_precision": 0.801371627277996, |
|
"eval_recall": 0.7836879432624113, |
|
"eval_runtime": 5.0499, |
|
"eval_samples_per_second": 79.012, |
|
"eval_steps_per_second": 9.901, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 2.8372957706451416, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.4392, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8220551378446115, |
|
"eval_f1": 0.7858811080458032, |
|
"eval_loss": 0.4077085256576538, |
|
"eval_precision": 0.7851800470474697, |
|
"eval_recall": 0.7865975631933079, |
|
"eval_runtime": 5.0546, |
|
"eval_samples_per_second": 78.938, |
|
"eval_steps_per_second": 9.892, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 3.2562994956970215, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4329, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8195488721804511, |
|
"eval_f1": 0.7834006876168647, |
|
"eval_loss": 0.40617087483406067, |
|
"eval_precision": 0.7820378151260504, |
|
"eval_recall": 0.7848245135479177, |
|
"eval_runtime": 5.0513, |
|
"eval_samples_per_second": 78.99, |
|
"eval_steps_per_second": 9.898, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.811532497406006, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.4338, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8145363408521303, |
|
"eval_f1": 0.7773840400506664, |
|
"eval_loss": 0.4058062434196472, |
|
"eval_precision": 0.7760504201680672, |
|
"eval_recall": 0.7787779596290234, |
|
"eval_runtime": 5.0681, |
|
"eval_samples_per_second": 78.728, |
|
"eval_steps_per_second": 9.866, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.1403231620788574, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4407, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8145363408521303, |
|
"eval_f1": 0.7762775050009092, |
|
"eval_loss": 0.40417465567588806, |
|
"eval_precision": 0.7762775050009092, |
|
"eval_recall": 0.7762775050009092, |
|
"eval_runtime": 5.0621, |
|
"eval_samples_per_second": 78.821, |
|
"eval_steps_per_second": 9.877, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 2.9727184772491455, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.4329, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8195488721804511, |
|
"eval_f1": 0.781223345924211, |
|
"eval_loss": 0.4033084809780121, |
|
"eval_precision": 0.7826852418860992, |
|
"eval_recall": 0.7798236042916894, |
|
"eval_runtime": 5.0559, |
|
"eval_samples_per_second": 78.918, |
|
"eval_steps_per_second": 9.889, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 5.092881202697754, |
|
"learning_rate": 0.0, |
|
"loss": 0.4292, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8145363408521303, |
|
"eval_f1": 0.7762775050009092, |
|
"eval_loss": 0.40420523285865784, |
|
"eval_precision": 0.7762775050009092, |
|
"eval_recall": 0.7762775050009092, |
|
"eval_runtime": 5.0503, |
|
"eval_samples_per_second": 79.006, |
|
"eval_steps_per_second": 9.9, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7588990440528000.0, |
|
"train_loss": 0.45743675075593543, |
|
"train_runtime": 1951.2471, |
|
"train_samples_per_second": 37.289, |
|
"train_steps_per_second": 1.25 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7588990440528000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|