|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.245405197143555, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5417, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7368421052631579, |
|
"eval_f1": 0.6531440162271805, |
|
"eval_loss": 0.4691583514213562, |
|
"eval_precision": 0.6762575228471654, |
|
"eval_recall": 0.6437988725222767, |
|
"eval_runtime": 5.2679, |
|
"eval_samples_per_second": 75.742, |
|
"eval_steps_per_second": 9.491, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.8213655948638916, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4301, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7769423558897243, |
|
"eval_f1": 0.7593078346448687, |
|
"eval_loss": 0.4378258287906647, |
|
"eval_precision": 0.7546743295019157, |
|
"eval_recall": 0.8021913075104565, |
|
"eval_runtime": 5.0861, |
|
"eval_samples_per_second": 78.449, |
|
"eval_steps_per_second": 9.831, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.3787574768066406, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3347, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8086360542112754, |
|
"eval_loss": 0.34514203667640686, |
|
"eval_precision": 0.8158019614046188, |
|
"eval_recall": 0.8025550100018185, |
|
"eval_runtime": 5.1159, |
|
"eval_samples_per_second": 77.992, |
|
"eval_steps_per_second": 9.773, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.8603061437606812, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2954, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8359175094431583, |
|
"eval_loss": 0.33369535207748413, |
|
"eval_precision": 0.8376607470912432, |
|
"eval_recall": 0.8342425895617385, |
|
"eval_runtime": 5.1268, |
|
"eval_samples_per_second": 77.826, |
|
"eval_steps_per_second": 9.753, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.168339967727661, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2632, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8321363988633428, |
|
"eval_loss": 0.33563923835754395, |
|
"eval_precision": 0.8247520756457565, |
|
"eval_recall": 0.8414257137661394, |
|
"eval_runtime": 5.117, |
|
"eval_samples_per_second": 77.976, |
|
"eval_steps_per_second": 9.771, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.1380066871643066, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2492, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8231484129253646, |
|
"eval_loss": 0.32611119747161865, |
|
"eval_precision": 0.8109975961538461, |
|
"eval_recall": 0.84506273867976, |
|
"eval_runtime": 5.0938, |
|
"eval_samples_per_second": 78.331, |
|
"eval_steps_per_second": 9.816, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.41156822443008423, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.227, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8602043795620438, |
|
"eval_loss": 0.2977831959724426, |
|
"eval_precision": 0.849624060150376, |
|
"eval_recall": 0.8748863429714493, |
|
"eval_runtime": 5.1152, |
|
"eval_samples_per_second": 78.003, |
|
"eval_steps_per_second": 9.775, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.2771873474121094, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2189, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8703663593044124, |
|
"eval_loss": 0.2742370069026947, |
|
"eval_precision": 0.8789149003479912, |
|
"eval_recall": 0.8630205491907619, |
|
"eval_runtime": 5.0805, |
|
"eval_samples_per_second": 78.536, |
|
"eval_steps_per_second": 9.842, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 7.185235023498535, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2068, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8715803025426456, |
|
"eval_loss": 0.28745463490486145, |
|
"eval_precision": 0.8673433153814287, |
|
"eval_recall": 0.8762502273140571, |
|
"eval_runtime": 5.257, |
|
"eval_samples_per_second": 75.898, |
|
"eval_steps_per_second": 9.511, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.480859756469727, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1935, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8872855539522206, |
|
"eval_loss": 0.2693336009979248, |
|
"eval_precision": 0.8903508771929824, |
|
"eval_recall": 0.8843880705582834, |
|
"eval_runtime": 5.0952, |
|
"eval_samples_per_second": 78.309, |
|
"eval_steps_per_second": 9.813, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.1324069499969482, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1729, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8900228699985846, |
|
"eval_loss": 0.27149420976638794, |
|
"eval_precision": 0.8840175953079179, |
|
"eval_recall": 0.8968903436988543, |
|
"eval_runtime": 5.1005, |
|
"eval_samples_per_second": 78.228, |
|
"eval_steps_per_second": 9.803, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.9420219659805298, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1639, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.882467302933899, |
|
"eval_loss": 0.2754858136177063, |
|
"eval_precision": 0.8732988802756245, |
|
"eval_recall": 0.8940716493907983, |
|
"eval_runtime": 5.1018, |
|
"eval_samples_per_second": 78.208, |
|
"eval_steps_per_second": 9.801, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 3.3185982704162598, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1564, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8817957385392532, |
|
"eval_loss": 0.2662343382835388, |
|
"eval_precision": 0.8827677592299257, |
|
"eval_recall": 0.8808419712675032, |
|
"eval_runtime": 5.116, |
|
"eval_samples_per_second": 77.991, |
|
"eval_steps_per_second": 9.773, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.053642272949219, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1495, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8835036496350365, |
|
"eval_loss": 0.29733341932296753, |
|
"eval_precision": 0.8721804511278195, |
|
"eval_recall": 0.8990725586470267, |
|
"eval_runtime": 5.3105, |
|
"eval_samples_per_second": 75.134, |
|
"eval_steps_per_second": 9.415, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.5649229884147644, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1487, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9097744360902256, |
|
"eval_f1": 0.8932457339913193, |
|
"eval_loss": 0.27316734194755554, |
|
"eval_precision": 0.8864525547445254, |
|
"eval_recall": 0.9011638479723586, |
|
"eval_runtime": 5.0557, |
|
"eval_samples_per_second": 78.921, |
|
"eval_steps_per_second": 9.89, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.219386100769043, |
|
"learning_rate": 1e-05, |
|
"loss": 0.141, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8888416422287391, |
|
"eval_loss": 0.2841818034648895, |
|
"eval_precision": 0.8783752990771334, |
|
"eval_recall": 0.9026186579378068, |
|
"eval_runtime": 5.1317, |
|
"eval_samples_per_second": 77.752, |
|
"eval_steps_per_second": 9.743, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.3799021244049072, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1276, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8878351186601172, |
|
"eval_loss": 0.2794151306152344, |
|
"eval_precision": 0.879776516905975, |
|
"eval_recall": 0.8976177486815784, |
|
"eval_runtime": 5.1403, |
|
"eval_samples_per_second": 77.623, |
|
"eval_steps_per_second": 9.727, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.4008965492248535, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1383, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8910359080340997, |
|
"eval_loss": 0.2787146270275116, |
|
"eval_precision": 0.8822647601476015, |
|
"eval_recall": 0.9018912529550827, |
|
"eval_runtime": 5.1249, |
|
"eval_samples_per_second": 77.855, |
|
"eval_steps_per_second": 9.756, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.290771245956421, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1371, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8856624319419237, |
|
"eval_loss": 0.2780250012874603, |
|
"eval_precision": 0.8758364312267658, |
|
"eval_recall": 0.8983451536643026, |
|
"eval_runtime": 5.1132, |
|
"eval_samples_per_second": 78.033, |
|
"eval_steps_per_second": 9.779, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.2898199558258057, |
|
"learning_rate": 0.0, |
|
"loss": 0.1248, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8856624319419237, |
|
"eval_loss": 0.2808963358402252, |
|
"eval_precision": 0.8758364312267658, |
|
"eval_recall": 0.8983451536643026, |
|
"eval_runtime": 5.079, |
|
"eval_samples_per_second": 78.559, |
|
"eval_steps_per_second": 9.845, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.22104478507745462, |
|
"train_runtime": 1939.3334, |
|
"train_samples_per_second": 37.518, |
|
"train_steps_per_second": 1.258 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|