|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.088006496429443, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5634, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7192982456140351, |
|
"eval_f1": 0.6524390243902439, |
|
"eval_loss": 0.5107927322387695, |
|
"eval_precision": 0.6572301881961337, |
|
"eval_recall": 0.6488907074013457, |
|
"eval_runtime": 5.127, |
|
"eval_samples_per_second": 77.824, |
|
"eval_steps_per_second": 9.752, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.8167991638183594, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5081, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7218045112781954, |
|
"eval_f1": 0.6887653635603403, |
|
"eval_loss": 0.5049471855163574, |
|
"eval_precision": 0.6829453441295547, |
|
"eval_recall": 0.7081742134933624, |
|
"eval_runtime": 5.0551, |
|
"eval_samples_per_second": 78.931, |
|
"eval_steps_per_second": 9.891, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.013192176818848, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4924, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7493734335839599, |
|
"eval_f1": 0.6976723040552828, |
|
"eval_loss": 0.46672317385673523, |
|
"eval_precision": 0.6976723040552828, |
|
"eval_recall": 0.6976723040552828, |
|
"eval_runtime": 5.1333, |
|
"eval_samples_per_second": 77.727, |
|
"eval_steps_per_second": 9.74, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.717806100845337, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4698, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7794486215538847, |
|
"eval_f1": 0.7206949217258496, |
|
"eval_loss": 0.43917685747146606, |
|
"eval_precision": 0.7348989898989899, |
|
"eval_recall": 0.711447535915621, |
|
"eval_runtime": 5.0551, |
|
"eval_samples_per_second": 78.93, |
|
"eval_steps_per_second": 9.891, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.4102962017059326, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4519, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7468671679197995, |
|
"eval_f1": 0.7226057806810438, |
|
"eval_loss": 0.4547964334487915, |
|
"eval_precision": 0.7169434353918007, |
|
"eval_recall": 0.7534097108565194, |
|
"eval_runtime": 5.0693, |
|
"eval_samples_per_second": 78.709, |
|
"eval_steps_per_second": 9.863, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.93445885181427, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4356, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8145363408521303, |
|
"eval_f1": 0.7739895897121861, |
|
"eval_loss": 0.41105952858924866, |
|
"eval_precision": 0.7769509251810136, |
|
"eval_recall": 0.7712765957446808, |
|
"eval_runtime": 5.0773, |
|
"eval_samples_per_second": 78.585, |
|
"eval_steps_per_second": 9.848, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.6042848825454712, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.421, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7944862155388471, |
|
"eval_f1": 0.7611824817518249, |
|
"eval_loss": 0.41012144088745117, |
|
"eval_precision": 0.7537593984962405, |
|
"eval_recall": 0.7720949263502455, |
|
"eval_runtime": 5.0548, |
|
"eval_samples_per_second": 78.934, |
|
"eval_steps_per_second": 9.892, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 18.006345748901367, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4039, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8295739348370927, |
|
"eval_f1": 0.7933776044839771, |
|
"eval_loss": 0.38294023275375366, |
|
"eval_precision": 0.7949020208205757, |
|
"eval_recall": 0.7919167121294781, |
|
"eval_runtime": 5.0552, |
|
"eval_samples_per_second": 78.929, |
|
"eval_steps_per_second": 9.891, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.092437744140625, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3887, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.7979441442122369, |
|
"eval_loss": 0.3800281286239624, |
|
"eval_precision": 0.7972133421798662, |
|
"eval_recall": 0.7986906710310966, |
|
"eval_runtime": 5.0562, |
|
"eval_samples_per_second": 78.914, |
|
"eval_steps_per_second": 9.889, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.934494972229004, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3797, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.8019881353214686, |
|
"eval_loss": 0.37680280208587646, |
|
"eval_precision": 0.8043859649122806, |
|
"eval_recall": 0.7997363156937625, |
|
"eval_runtime": 5.0583, |
|
"eval_samples_per_second": 78.88, |
|
"eval_steps_per_second": 9.885, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.9598286151885986, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.368, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8220551378446115, |
|
"eval_f1": 0.791846991484015, |
|
"eval_loss": 0.38417863845825195, |
|
"eval_precision": 0.7845581927366314, |
|
"eval_recall": 0.801600290961993, |
|
"eval_runtime": 5.1148, |
|
"eval_samples_per_second": 78.009, |
|
"eval_steps_per_second": 9.776, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 11.679308891296387, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3598, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8270676691729323, |
|
"eval_f1": 0.7967966933608887, |
|
"eval_loss": 0.37781035900115967, |
|
"eval_precision": 0.7902444649446494, |
|
"eval_recall": 0.8051463902527732, |
|
"eval_runtime": 5.0765, |
|
"eval_samples_per_second": 78.598, |
|
"eval_steps_per_second": 9.849, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.9093892574310303, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.3548, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8141734808401475, |
|
"eval_loss": 0.3623768091201782, |
|
"eval_precision": 0.8166666666666667, |
|
"eval_recall": 0.8118294235315512, |
|
"eval_runtime": 5.069, |
|
"eval_samples_per_second": 78.714, |
|
"eval_steps_per_second": 9.864, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 8.159423828125, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.3469, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8134839254478557, |
|
"eval_loss": 0.36370429396629333, |
|
"eval_precision": 0.8119747899159664, |
|
"eval_recall": 0.8150572831423895, |
|
"eval_runtime": 5.0636, |
|
"eval_samples_per_second": 78.798, |
|
"eval_steps_per_second": 9.874, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.1732066869735718, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.3431, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.81021463820679, |
|
"eval_loss": 0.3684653341770172, |
|
"eval_precision": 0.8049051094890511, |
|
"eval_recall": 0.8165120931078378, |
|
"eval_runtime": 5.0578, |
|
"eval_samples_per_second": 78.888, |
|
"eval_steps_per_second": 9.886, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.608555316925049, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3275, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.8085765951950401, |
|
"eval_loss": 0.3663918673992157, |
|
"eval_precision": 0.8017470018450185, |
|
"eval_recall": 0.817239498090562, |
|
"eval_runtime": 5.0818, |
|
"eval_samples_per_second": 78.516, |
|
"eval_steps_per_second": 9.839, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.14452862739563, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.3288, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.8084033613445378, |
|
"eval_loss": 0.359037846326828, |
|
"eval_precision": 0.8055472976990309, |
|
"eval_recall": 0.8115111838516094, |
|
"eval_runtime": 5.0521, |
|
"eval_samples_per_second": 78.978, |
|
"eval_steps_per_second": 9.897, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 2.8248753547668457, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3335, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8186863532409097, |
|
"eval_loss": 0.36074599623680115, |
|
"eval_precision": 0.8138123167155425, |
|
"eval_recall": 0.8243316966721222, |
|
"eval_runtime": 5.0625, |
|
"eval_samples_per_second": 78.815, |
|
"eval_steps_per_second": 9.877, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.17168664932251, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.3239, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8161454307628278, |
|
"eval_loss": 0.36129772663116455, |
|
"eval_precision": 0.8107299270072992, |
|
"eval_recall": 0.8225586470267321, |
|
"eval_runtime": 5.1105, |
|
"eval_samples_per_second": 78.075, |
|
"eval_steps_per_second": 9.784, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 6.108452796936035, |
|
"learning_rate": 0.0, |
|
"loss": 0.327, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8186863532409097, |
|
"eval_loss": 0.3607771396636963, |
|
"eval_precision": 0.8138123167155425, |
|
"eval_recall": 0.8243316966721222, |
|
"eval_runtime": 5.0675, |
|
"eval_samples_per_second": 78.738, |
|
"eval_steps_per_second": 9.867, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7590599775312000.0, |
|
"train_loss": 0.39638843536376955, |
|
"train_runtime": 1955.6065, |
|
"train_samples_per_second": 37.206, |
|
"train_steps_per_second": 1.248 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7590599775312000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|