|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.332692623138428, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5551, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7167919799498746, |
|
"eval_f1": 0.6337397746565828, |
|
"eval_loss": 0.49750953912734985, |
|
"eval_precision": 0.6484917150545249, |
|
"eval_recall": 0.6271140207310419, |
|
"eval_runtime": 1.7156, |
|
"eval_samples_per_second": 232.578, |
|
"eval_steps_per_second": 29.145, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.58549690246582, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4753, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7418546365914787, |
|
"eval_f1": 0.7171128258430446, |
|
"eval_loss": 0.468201607465744, |
|
"eval_precision": 0.7117669953295278, |
|
"eval_recall": 0.747363156937625, |
|
"eval_runtime": 1.7118, |
|
"eval_samples_per_second": 233.092, |
|
"eval_steps_per_second": 29.21, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.9045073986053467, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.412, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8270676691729323, |
|
"eval_f1": 0.7803582113367107, |
|
"eval_loss": 0.38815850019454956, |
|
"eval_precision": 0.7993592785951591, |
|
"eval_recall": 0.7676395708310602, |
|
"eval_runtime": 1.7097, |
|
"eval_samples_per_second": 233.375, |
|
"eval_steps_per_second": 29.245, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.453057289123535, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3498, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8090546545634092, |
|
"eval_loss": 0.3690779209136963, |
|
"eval_precision": 0.8098422078713293, |
|
"eval_recall": 0.808283324240771, |
|
"eval_runtime": 1.7281, |
|
"eval_samples_per_second": 230.885, |
|
"eval_steps_per_second": 28.933, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.748023986816406, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3361, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8145363408521303, |
|
"eval_f1": 0.7897487752079297, |
|
"eval_loss": 0.3794577121734619, |
|
"eval_precision": 0.7788623404833017, |
|
"eval_recall": 0.8112838697945082, |
|
"eval_runtime": 1.7098, |
|
"eval_samples_per_second": 233.358, |
|
"eval_steps_per_second": 29.243, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.2061963081359863, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3081, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7969924812030075, |
|
"eval_f1": 0.7760554889583348, |
|
"eval_loss": 0.4142220616340637, |
|
"eval_precision": 0.7665010460251046, |
|
"eval_recall": 0.8088743407892345, |
|
"eval_runtime": 1.7108, |
|
"eval_samples_per_second": 233.229, |
|
"eval_steps_per_second": 29.227, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.4639934301376343, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2918, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8234833375639119, |
|
"eval_loss": 0.3555023968219757, |
|
"eval_precision": 0.8130172220979647, |
|
"eval_recall": 0.8393344244408074, |
|
"eval_runtime": 1.7094, |
|
"eval_samples_per_second": 233.421, |
|
"eval_steps_per_second": 29.251, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 5.498388767242432, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2739, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8315033783783784, |
|
"eval_loss": 0.33167070150375366, |
|
"eval_precision": 0.8438775510204082, |
|
"eval_recall": 0.8217403164211674, |
|
"eval_runtime": 1.7093, |
|
"eval_samples_per_second": 233.426, |
|
"eval_steps_per_second": 29.251, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 10.206114768981934, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2586, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8336296003686878, |
|
"eval_loss": 0.35962939262390137, |
|
"eval_precision": 0.8243105209397344, |
|
"eval_recall": 0.8464266230223677, |
|
"eval_runtime": 1.7093, |
|
"eval_samples_per_second": 233.426, |
|
"eval_steps_per_second": 29.251, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.042139530181885, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2509, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8349466368826011, |
|
"eval_loss": 0.329887330532074, |
|
"eval_precision": 0.8325716845878136, |
|
"eval_recall": 0.8374704491725768, |
|
"eval_runtime": 1.7094, |
|
"eval_samples_per_second": 233.418, |
|
"eval_steps_per_second": 29.25, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 8.531843185424805, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2468, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8401647707947546, |
|
"eval_loss": 0.32244208455085754, |
|
"eval_precision": 0.8393298751432535, |
|
"eval_recall": 0.8410165484633569, |
|
"eval_runtime": 1.7111, |
|
"eval_samples_per_second": 233.186, |
|
"eval_steps_per_second": 29.221, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 8.214770317077637, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2372, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8313646980313647, |
|
"eval_loss": 0.32940539717674255, |
|
"eval_precision": 0.825091575091575, |
|
"eval_recall": 0.8389252591380251, |
|
"eval_runtime": 1.7104, |
|
"eval_samples_per_second": 233.277, |
|
"eval_steps_per_second": 29.233, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 3.748082160949707, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2305, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8411818738518064, |
|
"eval_loss": 0.31342318654060364, |
|
"eval_precision": 0.8448542607834644, |
|
"eval_recall": 0.8377886888525186, |
|
"eval_runtime": 1.7084, |
|
"eval_samples_per_second": 233.547, |
|
"eval_steps_per_second": 29.267, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 14.465522766113281, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2249, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8457993935430168, |
|
"eval_loss": 0.32253342866897583, |
|
"eval_precision": 0.8398540145985401, |
|
"eval_recall": 0.8527914166212038, |
|
"eval_runtime": 1.7086, |
|
"eval_samples_per_second": 233.518, |
|
"eval_steps_per_second": 29.263, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.3240478038787842, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2193, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8503151260504201, |
|
"eval_loss": 0.3187616467475891, |
|
"eval_precision": 0.8470628455912955, |
|
"eval_recall": 0.8538370612838698, |
|
"eval_runtime": 1.7079, |
|
"eval_samples_per_second": 233.622, |
|
"eval_steps_per_second": 29.276, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 5.576813697814941, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2061, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8277920424868874, |
|
"eval_loss": 0.3392154574394226, |
|
"eval_precision": 0.8186499829758257, |
|
"eval_recall": 0.8403800691034733, |
|
"eval_runtime": 1.7081, |
|
"eval_samples_per_second": 233.595, |
|
"eval_steps_per_second": 29.273, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 9.40192985534668, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.21, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8541488972828073, |
|
"eval_loss": 0.31219902634620667, |
|
"eval_precision": 0.8559859154929578, |
|
"eval_recall": 0.8523822513184216, |
|
"eval_runtime": 1.7119, |
|
"eval_samples_per_second": 233.08, |
|
"eval_steps_per_second": 29.208, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 7.254611492156982, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2112, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8303372434017595, |
|
"eval_loss": 0.3332211673259735, |
|
"eval_precision": 0.8215506437279252, |
|
"eval_recall": 0.8421531187488634, |
|
"eval_runtime": 1.7073, |
|
"eval_samples_per_second": 233.698, |
|
"eval_steps_per_second": 29.285, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.143869876861572, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2002, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8514869535493182, |
|
"eval_loss": 0.3120571970939636, |
|
"eval_precision": 0.8523821128305106, |
|
"eval_recall": 0.8506092016730314, |
|
"eval_runtime": 1.7097, |
|
"eval_samples_per_second": 233.375, |
|
"eval_steps_per_second": 29.245, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 7.309845924377441, |
|
"learning_rate": 0.0, |
|
"loss": 0.2041, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8495838108450449, |
|
"eval_loss": 0.3129415214061737, |
|
"eval_precision": 0.8478991596638655, |
|
"eval_recall": 0.8513366066557555, |
|
"eval_runtime": 1.7073, |
|
"eval_samples_per_second": 233.696, |
|
"eval_steps_per_second": 29.285, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7662265464912000.0, |
|
"train_loss": 0.28510660187142794, |
|
"train_runtime": 615.9137, |
|
"train_samples_per_second": 118.133, |
|
"train_steps_per_second": 3.962 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7662265464912000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|