|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.062061786651611, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.566, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_f1": 0.6402562480227776, |
|
"eval_loss": 0.5205540657043457, |
|
"eval_precision": 0.6483653398896937, |
|
"eval_recall": 0.6353427895981087, |
|
"eval_runtime": 5.1194, |
|
"eval_samples_per_second": 77.939, |
|
"eval_steps_per_second": 9.767, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.104482173919678, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5117, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7343358395989975, |
|
"eval_f1": 0.69391211208893, |
|
"eval_loss": 0.5062463879585266, |
|
"eval_precision": 0.6880119239984399, |
|
"eval_recall": 0.7045371885797418, |
|
"eval_runtime": 5.0454, |
|
"eval_samples_per_second": 79.082, |
|
"eval_steps_per_second": 9.91, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.243982791900635, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4804, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7669172932330827, |
|
"eval_f1": 0.7152209115816456, |
|
"eval_loss": 0.46674054861068726, |
|
"eval_precision": 0.7182055749128919, |
|
"eval_recall": 0.7125841062011276, |
|
"eval_runtime": 5.0617, |
|
"eval_samples_per_second": 78.828, |
|
"eval_steps_per_second": 9.878, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.68826961517334, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4345, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7919799498746867, |
|
"eval_f1": 0.744501107107864, |
|
"eval_loss": 0.43496260046958923, |
|
"eval_precision": 0.7494180559924504, |
|
"eval_recall": 0.7403164211674851, |
|
"eval_runtime": 5.0625, |
|
"eval_samples_per_second": 78.814, |
|
"eval_steps_per_second": 9.876, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.9247703552246094, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4081, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7944862155388471, |
|
"eval_f1": 0.7660995138690305, |
|
"eval_loss": 0.43371620774269104, |
|
"eval_precision": 0.7565013111888113, |
|
"eval_recall": 0.7845971994908165, |
|
"eval_runtime": 5.0485, |
|
"eval_samples_per_second": 79.033, |
|
"eval_steps_per_second": 9.904, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.43408203125, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3793, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8195488721804511, |
|
"eval_f1": 0.7753378378378378, |
|
"eval_loss": 0.39230969548225403, |
|
"eval_precision": 0.7857142857142857, |
|
"eval_recall": 0.7673213311511184, |
|
"eval_runtime": 5.0697, |
|
"eval_samples_per_second": 78.702, |
|
"eval_steps_per_second": 9.862, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.72346031665802, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3665, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8295739348370927, |
|
"eval_f1": 0.7933776044839771, |
|
"eval_loss": 0.3765198886394501, |
|
"eval_precision": 0.7949020208205757, |
|
"eval_recall": 0.7919167121294781, |
|
"eval_runtime": 5.0595, |
|
"eval_samples_per_second": 78.862, |
|
"eval_steps_per_second": 9.882, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.3123555183410645, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3471, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.796615353247018, |
|
"eval_loss": 0.368134468793869, |
|
"eval_precision": 0.8088983050847458, |
|
"eval_recall": 0.7872340425531914, |
|
"eval_runtime": 5.1048, |
|
"eval_samples_per_second": 78.162, |
|
"eval_steps_per_second": 9.795, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.656528949737549, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3498, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.790357364116157, |
|
"eval_loss": 0.3676688075065613, |
|
"eval_precision": 0.8023956975228161, |
|
"eval_recall": 0.7811874886342971, |
|
"eval_runtime": 5.0584, |
|
"eval_samples_per_second": 78.879, |
|
"eval_steps_per_second": 9.885, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.076303482055664, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3282, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8345864661654135, |
|
"eval_f1": 0.7917273014868713, |
|
"eval_loss": 0.363395094871521, |
|
"eval_precision": 0.8074456774536514, |
|
"eval_recall": 0.780460083651573, |
|
"eval_runtime": 5.0503, |
|
"eval_samples_per_second": 79.005, |
|
"eval_steps_per_second": 9.9, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.567991256713867, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.3149, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8065409159159159, |
|
"eval_loss": 0.3537313938140869, |
|
"eval_precision": 0.8180272108843537, |
|
"eval_recall": 0.7975541007455902, |
|
"eval_runtime": 5.057, |
|
"eval_samples_per_second": 78.901, |
|
"eval_steps_per_second": 9.887, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 11.14825439453125, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3092, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8167483159828537, |
|
"eval_loss": 0.3528764247894287, |
|
"eval_precision": 0.8201621387462095, |
|
"eval_recall": 0.8136024731769412, |
|
"eval_runtime": 5.0562, |
|
"eval_samples_per_second": 78.913, |
|
"eval_steps_per_second": 9.889, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.867825031280518, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.3135, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8121903546212454, |
|
"eval_loss": 0.3471047580242157, |
|
"eval_precision": 0.8331751305173232, |
|
"eval_recall": 0.7978723404255319, |
|
"eval_runtime": 5.0923, |
|
"eval_samples_per_second": 78.353, |
|
"eval_steps_per_second": 9.819, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 12.051921844482422, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.3103, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8269335415335841, |
|
"eval_loss": 0.3426941931247711, |
|
"eval_precision": 0.8430382253911666, |
|
"eval_recall": 0.8149663575195489, |
|
"eval_runtime": 5.0532, |
|
"eval_samples_per_second": 78.96, |
|
"eval_steps_per_second": 9.895, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.2898627519607544, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2974, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8297847585805701, |
|
"eval_loss": 0.33716997504234314, |
|
"eval_precision": 0.8385357006491028, |
|
"eval_recall": 0.8224677214038916, |
|
"eval_runtime": 5.0847, |
|
"eval_samples_per_second": 78.471, |
|
"eval_steps_per_second": 9.833, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.9146409034729, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2905, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8386324041811847, |
|
"eval_loss": 0.3345378339290619, |
|
"eval_precision": 0.8487869670976828, |
|
"eval_recall": 0.830287324968176, |
|
"eval_runtime": 5.0913, |
|
"eval_samples_per_second": 78.368, |
|
"eval_steps_per_second": 9.821, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 4.737354278564453, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2895, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8269335415335841, |
|
"eval_loss": 0.3339170217514038, |
|
"eval_precision": 0.8430382253911666, |
|
"eval_recall": 0.8149663575195489, |
|
"eval_runtime": 5.0546, |
|
"eval_samples_per_second": 78.938, |
|
"eval_steps_per_second": 9.892, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.6233842372894287, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2922, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8386324041811847, |
|
"eval_loss": 0.3318663537502289, |
|
"eval_precision": 0.8487869670976828, |
|
"eval_recall": 0.830287324968176, |
|
"eval_runtime": 5.0825, |
|
"eval_samples_per_second": 78.505, |
|
"eval_steps_per_second": 9.838, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.824616432189941, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2843, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8269335415335841, |
|
"eval_loss": 0.3319249749183655, |
|
"eval_precision": 0.8430382253911666, |
|
"eval_recall": 0.8149663575195489, |
|
"eval_runtime": 5.0652, |
|
"eval_samples_per_second": 78.772, |
|
"eval_steps_per_second": 9.871, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 7.412130832672119, |
|
"learning_rate": 0.0, |
|
"loss": 0.287, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8279052989013229, |
|
"eval_loss": 0.33124828338623047, |
|
"eval_precision": 0.8414113428943938, |
|
"eval_recall": 0.8174668121476631, |
|
"eval_runtime": 5.0449, |
|
"eval_samples_per_second": 79.09, |
|
"eval_steps_per_second": 9.911, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7597037114448000.0, |
|
"train_loss": 0.35801424432973394, |
|
"train_runtime": 1954.4162, |
|
"train_samples_per_second": 37.229, |
|
"train_steps_per_second": 1.248 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7597037114448000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|