|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.0894904136657715, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.541, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7293233082706767, |
|
"eval_f1": 0.6396989966555184, |
|
"eval_loss": 0.4985284209251404, |
|
"eval_precision": 0.664766661583041, |
|
"eval_recall": 0.630978359701764, |
|
"eval_runtime": 5.2157, |
|
"eval_samples_per_second": 76.5, |
|
"eval_steps_per_second": 9.586, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.95181941986084, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4477, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7644110275689223, |
|
"eval_f1": 0.7461557203963398, |
|
"eval_loss": 0.44652456045150757, |
|
"eval_precision": 0.7426785714285714, |
|
"eval_recall": 0.7883251500272777, |
|
"eval_runtime": 5.1119, |
|
"eval_samples_per_second": 78.053, |
|
"eval_steps_per_second": 9.781, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.869917154312134, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.347, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8255102040816327, |
|
"eval_loss": 0.323697566986084, |
|
"eval_precision": 0.8556293485135991, |
|
"eval_recall": 0.8067375886524822, |
|
"eval_runtime": 5.1398, |
|
"eval_samples_per_second": 77.63, |
|
"eval_steps_per_second": 9.728, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.4031054973602295, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3005, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8683279483657071, |
|
"eval_loss": 0.2991793751716614, |
|
"eval_precision": 0.873366724738676, |
|
"eval_recall": 0.863747954173486, |
|
"eval_runtime": 5.1616, |
|
"eval_samples_per_second": 77.302, |
|
"eval_steps_per_second": 9.687, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.774945020675659, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.281, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8342105263157895, |
|
"eval_loss": 0.2868594527244568, |
|
"eval_precision": 0.8398085585585586, |
|
"eval_recall": 0.82924168030551, |
|
"eval_runtime": 5.1483, |
|
"eval_samples_per_second": 77.502, |
|
"eval_steps_per_second": 9.712, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.3688924312591553, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2419, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8537390029325513, |
|
"eval_loss": 0.29453349113464355, |
|
"eval_precision": 0.8442805058676086, |
|
"eval_recall": 0.8663393344244408, |
|
"eval_runtime": 5.0969, |
|
"eval_samples_per_second": 78.284, |
|
"eval_steps_per_second": 9.81, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.5997357964515686, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2394, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8529524583135901, |
|
"eval_loss": 0.2835337221622467, |
|
"eval_precision": 0.8504480286738352, |
|
"eval_recall": 0.8556101109292599, |
|
"eval_runtime": 5.0928, |
|
"eval_samples_per_second": 78.346, |
|
"eval_steps_per_second": 9.818, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.8720760345458984, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2192, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.850729517396184, |
|
"eval_loss": 0.28028008341789246, |
|
"eval_precision": 0.8535087719298247, |
|
"eval_recall": 0.8481087470449173, |
|
"eval_runtime": 5.1964, |
|
"eval_samples_per_second": 76.784, |
|
"eval_steps_per_second": 9.622, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.954423904418945, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2144, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8480717680029244, |
|
"eval_loss": 0.28611448407173157, |
|
"eval_precision": 0.8498775260257195, |
|
"eval_recall": 0.8463356973995272, |
|
"eval_runtime": 5.1448, |
|
"eval_samples_per_second": 77.554, |
|
"eval_steps_per_second": 9.719, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.511594772338867, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2056, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8696722245432793, |
|
"eval_loss": 0.27244648337364197, |
|
"eval_precision": 0.8706135006701596, |
|
"eval_recall": 0.8687488634297145, |
|
"eval_runtime": 5.104, |
|
"eval_samples_per_second": 78.174, |
|
"eval_steps_per_second": 9.796, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.713789463043213, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1822, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8662440310793597, |
|
"eval_loss": 0.28133633732795715, |
|
"eval_precision": 0.8606158357771261, |
|
"eval_recall": 0.872704128023277, |
|
"eval_runtime": 5.1442, |
|
"eval_samples_per_second": 77.563, |
|
"eval_steps_per_second": 9.72, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 6.766155242919922, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1817, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8584001703456596, |
|
"eval_loss": 0.2900215685367584, |
|
"eval_precision": 0.8759655377302435, |
|
"eval_recall": 0.8451991271140207, |
|
"eval_runtime": 5.1374, |
|
"eval_samples_per_second": 77.665, |
|
"eval_steps_per_second": 9.733, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.9034644365310669, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1621, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8710526315789473, |
|
"eval_loss": 0.29263192415237427, |
|
"eval_precision": 0.8772522522522522, |
|
"eval_recall": 0.8655210038188761, |
|
"eval_runtime": 5.1149, |
|
"eval_samples_per_second": 78.008, |
|
"eval_steps_per_second": 9.775, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.05497932434082, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1577, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8709582797445791, |
|
"eval_loss": 0.29044803977012634, |
|
"eval_precision": 0.8683243727598566, |
|
"eval_recall": 0.8737497726859429, |
|
"eval_runtime": 5.096, |
|
"eval_samples_per_second": 78.297, |
|
"eval_steps_per_second": 9.812, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.013443946838379, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1612, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8587719298245614, |
|
"eval_loss": 0.2996305227279663, |
|
"eval_precision": 0.864771021021021, |
|
"eval_recall": 0.8534278959810875, |
|
"eval_runtime": 5.1189, |
|
"eval_samples_per_second": 77.946, |
|
"eval_steps_per_second": 9.768, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.775810718536377, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1496, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8649563392675828, |
|
"eval_loss": 0.29704856872558594, |
|
"eval_precision": 0.8623655913978494, |
|
"eval_recall": 0.8677032187670486, |
|
"eval_runtime": 5.2102, |
|
"eval_samples_per_second": 76.58, |
|
"eval_steps_per_second": 9.596, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.5526378154754639, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.149, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.858259325044405, |
|
"eval_loss": 0.29482966661453247, |
|
"eval_precision": 0.8573798178418481, |
|
"eval_recall": 0.8591562102200401, |
|
"eval_runtime": 5.1218, |
|
"eval_samples_per_second": 77.902, |
|
"eval_steps_per_second": 9.762, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.444484233856201, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1424, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8609292598654301, |
|
"eval_loss": 0.29769569635391235, |
|
"eval_precision": 0.8609292598654301, |
|
"eval_recall": 0.8609292598654301, |
|
"eval_runtime": 5.0934, |
|
"eval_samples_per_second": 78.336, |
|
"eval_steps_per_second": 9.817, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.0727450847625732, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1383, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8602260265626904, |
|
"eval_loss": 0.2990491986274719, |
|
"eval_precision": 0.8620943049601959, |
|
"eval_recall": 0.8584288052373159, |
|
"eval_runtime": 5.1176, |
|
"eval_samples_per_second": 77.966, |
|
"eval_steps_per_second": 9.77, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 4.4248151779174805, |
|
"learning_rate": 0.0, |
|
"loss": 0.1407, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.858259325044405, |
|
"eval_loss": 0.2988053262233734, |
|
"eval_precision": 0.8573798178418481, |
|
"eval_recall": 0.8591562102200401, |
|
"eval_runtime": 5.0823, |
|
"eval_samples_per_second": 78.508, |
|
"eval_steps_per_second": 9.838, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.2301350734272941, |
|
"train_runtime": 1951.0131, |
|
"train_samples_per_second": 37.293, |
|
"train_steps_per_second": 1.251 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|