|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.800751686096191, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5455, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7543859649122807, |
|
"eval_f1": 0.6639335808580858, |
|
"eval_loss": 0.48765844106674194, |
|
"eval_precision": 0.7053086419753087, |
|
"eval_recall": 0.6512093107837789, |
|
"eval_runtime": 1.7695, |
|
"eval_samples_per_second": 225.487, |
|
"eval_steps_per_second": 28.257, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.828364849090576, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4356, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8209674617878647, |
|
"eval_loss": 0.35374370217323303, |
|
"eval_precision": 0.810288627625787, |
|
"eval_recall": 0.8375613747954174, |
|
"eval_runtime": 1.7671, |
|
"eval_samples_per_second": 225.791, |
|
"eval_steps_per_second": 28.295, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 7.537293910980225, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3468, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8072960267885891, |
|
"eval_loss": 0.3415985107421875, |
|
"eval_precision": 0.8325657894736842, |
|
"eval_recall": 0.7910983815239134, |
|
"eval_runtime": 1.7644, |
|
"eval_samples_per_second": 226.134, |
|
"eval_steps_per_second": 28.338, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.427431106567383, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3049, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8180088078011953, |
|
"eval_loss": 0.3125925660133362, |
|
"eval_precision": 0.8323930726843348, |
|
"eval_recall": 0.8071467539552646, |
|
"eval_runtime": 1.7656, |
|
"eval_samples_per_second": 225.985, |
|
"eval_steps_per_second": 28.319, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.060295820236206, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2673, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8315338681464504, |
|
"eval_loss": 0.29185813665390015, |
|
"eval_precision": 0.8299369747899159, |
|
"eval_recall": 0.8331969448990726, |
|
"eval_runtime": 1.7632, |
|
"eval_samples_per_second": 226.299, |
|
"eval_steps_per_second": 28.358, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.6104700565338135, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2516, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8350734843845683, |
|
"eval_loss": 0.2822675108909607, |
|
"eval_precision": 0.8386812302741506, |
|
"eval_recall": 0.8317421349336243, |
|
"eval_runtime": 1.761, |
|
"eval_samples_per_second": 226.577, |
|
"eval_steps_per_second": 28.393, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.407837837934494, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2243, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8622085718274466, |
|
"eval_loss": 0.26883605122566223, |
|
"eval_precision": 0.8530168716042322, |
|
"eval_recall": 0.8741589379887251, |
|
"eval_runtime": 1.7626, |
|
"eval_samples_per_second": 226.37, |
|
"eval_steps_per_second": 28.367, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.6773055791854858, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2157, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8696646341463414, |
|
"eval_loss": 0.2641172409057617, |
|
"eval_precision": 0.8807130969146075, |
|
"eval_recall": 0.8605200945626478, |
|
"eval_runtime": 1.7614, |
|
"eval_samples_per_second": 226.526, |
|
"eval_steps_per_second": 28.387, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.9705936908721924, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2052, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8572517421602788, |
|
"eval_loss": 0.26267343759536743, |
|
"eval_precision": 0.8679426449878376, |
|
"eval_recall": 0.8484269867248591, |
|
"eval_runtime": 1.7598, |
|
"eval_samples_per_second": 226.73, |
|
"eval_steps_per_second": 28.412, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 13.646815299987793, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1864, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8548402404302435, |
|
"eval_loss": 0.288084477186203, |
|
"eval_precision": 0.8737291514386338, |
|
"eval_recall": 0.8409256228405164, |
|
"eval_runtime": 1.7593, |
|
"eval_samples_per_second": 226.79, |
|
"eval_steps_per_second": 28.42, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.454102039337158, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1928, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.867476104365797, |
|
"eval_loss": 0.2784619927406311, |
|
"eval_precision": 0.8592596863468634, |
|
"eval_recall": 0.8777050372795053, |
|
"eval_runtime": 1.7608, |
|
"eval_samples_per_second": 226.608, |
|
"eval_steps_per_second": 28.397, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 4.11415958404541, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1804, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8758710801393728, |
|
"eval_loss": 0.2506408393383026, |
|
"eval_precision": 0.8870983228779925, |
|
"eval_recall": 0.8665666484815421, |
|
"eval_runtime": 1.7598, |
|
"eval_samples_per_second": 226.734, |
|
"eval_steps_per_second": 28.413, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.9822263717651367, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1654, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8790689216221131, |
|
"eval_loss": 0.26640743017196655, |
|
"eval_precision": 0.8790689216221131, |
|
"eval_recall": 0.8790689216221131, |
|
"eval_runtime": 1.7621, |
|
"eval_samples_per_second": 226.429, |
|
"eval_steps_per_second": 28.375, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 4.247542381286621, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1567, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8878351186601172, |
|
"eval_loss": 0.2660583257675171, |
|
"eval_precision": 0.879776516905975, |
|
"eval_recall": 0.8976177486815784, |
|
"eval_runtime": 1.7643, |
|
"eval_samples_per_second": 226.146, |
|
"eval_steps_per_second": 28.339, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.1535786390304565, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1438, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9097744360902256, |
|
"eval_f1": 0.8917003438084323, |
|
"eval_loss": 0.2614538073539734, |
|
"eval_precision": 0.8898109243697478, |
|
"eval_recall": 0.893662484088016, |
|
"eval_runtime": 1.7625, |
|
"eval_samples_per_second": 226.385, |
|
"eval_steps_per_second": 28.369, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.5205868482589722, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1472, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8856836962422341, |
|
"eval_loss": 0.25552284717559814, |
|
"eval_precision": 0.8838235294117647, |
|
"eval_recall": 0.8876159301691217, |
|
"eval_runtime": 1.7611, |
|
"eval_samples_per_second": 226.564, |
|
"eval_steps_per_second": 28.392, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 6.205023288726807, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1394, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8790689216221131, |
|
"eval_loss": 0.2647987902164459, |
|
"eval_precision": 0.8790689216221131, |
|
"eval_recall": 0.8790689216221131, |
|
"eval_runtime": 1.7605, |
|
"eval_samples_per_second": 226.638, |
|
"eval_steps_per_second": 28.401, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.5985056161880493, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1387, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8862394957983193, |
|
"eval_loss": 0.26299694180488586, |
|
"eval_precision": 0.8826476009275225, |
|
"eval_recall": 0.8901163847972358, |
|
"eval_runtime": 1.7607, |
|
"eval_samples_per_second": 226.62, |
|
"eval_steps_per_second": 28.399, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 1.7635153532028198, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1378, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8845345436822225, |
|
"eval_loss": 0.26893848180770874, |
|
"eval_precision": 0.8865278628291489, |
|
"eval_recall": 0.8826150209128933, |
|
"eval_runtime": 1.7631, |
|
"eval_samples_per_second": 226.305, |
|
"eval_steps_per_second": 28.359, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.3988428115844727, |
|
"learning_rate": 0.0, |
|
"loss": 0.1365, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8817957385392532, |
|
"eval_loss": 0.2682853937149048, |
|
"eval_precision": 0.8827677592299257, |
|
"eval_recall": 0.8808419712675032, |
|
"eval_runtime": 1.7608, |
|
"eval_samples_per_second": 226.596, |
|
"eval_steps_per_second": 28.396, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.22609603365913766, |
|
"train_runtime": 618.465, |
|
"train_samples_per_second": 117.646, |
|
"train_steps_per_second": 3.945 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|