|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.953822374343872, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5448, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.6567661411411412, |
|
"eval_loss": 0.5046952366828918, |
|
"eval_precision": 0.6629251700680272, |
|
"eval_recall": 0.6524368066921259, |
|
"eval_runtime": 5.0686, |
|
"eval_samples_per_second": 78.72, |
|
"eval_steps_per_second": 9.865, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.6171164512634277, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4527, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7944862155388471, |
|
"eval_f1": 0.775152564736929, |
|
"eval_loss": 0.4319760501384735, |
|
"eval_precision": 0.7667055167055168, |
|
"eval_recall": 0.8121022004000726, |
|
"eval_runtime": 5.0454, |
|
"eval_samples_per_second": 79.082, |
|
"eval_steps_per_second": 9.91, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.439111709594727, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3603, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.7984931903796002, |
|
"eval_loss": 0.33696243166923523, |
|
"eval_precision": 0.8393082695594026, |
|
"eval_recall": 0.7768230587379523, |
|
"eval_runtime": 5.0496, |
|
"eval_samples_per_second": 79.016, |
|
"eval_steps_per_second": 9.902, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.9325271248817444, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3081, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8461962888779714, |
|
"eval_loss": 0.29947736859321594, |
|
"eval_precision": 0.8453465227094517, |
|
"eval_recall": 0.8470631023822512, |
|
"eval_runtime": 5.0527, |
|
"eval_samples_per_second": 78.967, |
|
"eval_steps_per_second": 9.896, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.1945856809616089, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2793, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8456742372671576, |
|
"eval_loss": 0.3008015751838684, |
|
"eval_precision": 0.8536697247706422, |
|
"eval_recall": 0.8388343335151845, |
|
"eval_runtime": 5.0466, |
|
"eval_samples_per_second": 79.063, |
|
"eval_steps_per_second": 9.908, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.8788158893585205, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2526, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8411818738518064, |
|
"eval_loss": 0.2986568510532379, |
|
"eval_precision": 0.8448542607834644, |
|
"eval_recall": 0.8377886888525186, |
|
"eval_runtime": 5.0571, |
|
"eval_samples_per_second": 78.899, |
|
"eval_steps_per_second": 9.887, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.2110929489135742, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2478, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.846679266293906, |
|
"eval_loss": 0.3030013144016266, |
|
"eval_precision": 0.8609191655801824, |
|
"eval_recall": 0.8356064739043463, |
|
"eval_runtime": 5.0491, |
|
"eval_samples_per_second": 79.023, |
|
"eval_steps_per_second": 9.903, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.5065371990203857, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2337, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8350789627607721, |
|
"eval_loss": 0.2974022924900055, |
|
"eval_precision": 0.8463358876939919, |
|
"eval_recall": 0.8260138206946717, |
|
"eval_runtime": 5.0637, |
|
"eval_samples_per_second": 78.796, |
|
"eval_steps_per_second": 9.874, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.127802848815918, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.217, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8395201930584144, |
|
"eval_loss": 0.2773597240447998, |
|
"eval_precision": 0.8562091503267973, |
|
"eval_recall": 0.8270594653573378, |
|
"eval_runtime": 5.0754, |
|
"eval_samples_per_second": 78.615, |
|
"eval_steps_per_second": 9.851, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.408611297607422, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1966, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.844327731092437, |
|
"eval_loss": 0.2846048176288605, |
|
"eval_precision": 0.8411320530352577, |
|
"eval_recall": 0.8477905073649754, |
|
"eval_runtime": 5.0388, |
|
"eval_samples_per_second": 79.185, |
|
"eval_steps_per_second": 9.923, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.3639743328094482, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.199, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8545433391506689, |
|
"eval_loss": 0.2909916341304779, |
|
"eval_precision": 0.8639270714012982, |
|
"eval_recall": 0.846653937079469, |
|
"eval_runtime": 5.0638, |
|
"eval_samples_per_second": 78.794, |
|
"eval_steps_per_second": 9.874, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 3.4640684127807617, |
|
"learning_rate": 2e-05, |
|
"loss": 0.187, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.846679266293906, |
|
"eval_loss": 0.2870851755142212, |
|
"eval_precision": 0.8609191655801824, |
|
"eval_recall": 0.8356064739043463, |
|
"eval_runtime": 5.0487, |
|
"eval_samples_per_second": 79.031, |
|
"eval_steps_per_second": 9.904, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 2.2104856967926025, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1812, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8526315789473684, |
|
"eval_loss": 0.2812826931476593, |
|
"eval_precision": 0.8585304054054055, |
|
"eval_recall": 0.8473813420621932, |
|
"eval_runtime": 5.0484, |
|
"eval_samples_per_second": 79.034, |
|
"eval_steps_per_second": 9.904, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 3.044590950012207, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1633, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8596342841745197, |
|
"eval_loss": 0.2956988215446472, |
|
"eval_precision": 0.8555364857667042, |
|
"eval_recall": 0.8641571194762684, |
|
"eval_runtime": 5.0481, |
|
"eval_samples_per_second": 79.04, |
|
"eval_steps_per_second": 9.905, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.9169403314590454, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1607, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8696722245432793, |
|
"eval_loss": 0.28752827644348145, |
|
"eval_precision": 0.8706135006701596, |
|
"eval_recall": 0.8687488634297145, |
|
"eval_runtime": 5.0457, |
|
"eval_samples_per_second": 79.078, |
|
"eval_steps_per_second": 9.91, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.9245001077651978, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1584, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8560793854229822, |
|
"eval_loss": 0.2859440743923187, |
|
"eval_precision": 0.8609538327526132, |
|
"eval_recall": 0.8516548463356974, |
|
"eval_runtime": 5.086, |
|
"eval_samples_per_second": 78.451, |
|
"eval_steps_per_second": 9.831, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.737988233566284, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1535, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8609292598654301, |
|
"eval_loss": 0.29243767261505127, |
|
"eval_precision": 0.8609292598654301, |
|
"eval_recall": 0.8609292598654301, |
|
"eval_runtime": 5.0479, |
|
"eval_samples_per_second": 79.042, |
|
"eval_steps_per_second": 9.905, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.7625505924224854, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1432, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8616171059774413, |
|
"eval_loss": 0.29657039046287537, |
|
"eval_precision": 0.859873949579832, |
|
"eval_recall": 0.8634297144935443, |
|
"eval_runtime": 5.0616, |
|
"eval_samples_per_second": 78.829, |
|
"eval_steps_per_second": 9.878, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.5611210465431213, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1466, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8568221901555235, |
|
"eval_loss": 0.29467570781707764, |
|
"eval_precision": 0.8596491228070176, |
|
"eval_recall": 0.8541553009638116, |
|
"eval_runtime": 5.0458, |
|
"eval_samples_per_second": 79.075, |
|
"eval_steps_per_second": 9.909, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 7.14449405670166, |
|
"learning_rate": 0.0, |
|
"loss": 0.1411, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8609292598654301, |
|
"eval_loss": 0.2950780689716339, |
|
"eval_precision": 0.8609292598654301, |
|
"eval_recall": 0.8609292598654301, |
|
"eval_runtime": 5.0549, |
|
"eval_samples_per_second": 78.933, |
|
"eval_steps_per_second": 9.891, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8460375995160000.0, |
|
"train_loss": 0.2363556080177182, |
|
"train_runtime": 1909.167, |
|
"train_samples_per_second": 38.184, |
|
"train_steps_per_second": 1.278 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8460375995160000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|