{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.9823102951049805, "learning_rate": 4.75e-05, "loss": 0.5456, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7468671679197995, "eval_f1": 0.6641666041651041, "eval_loss": 0.49161723256111145, "eval_precision": 0.6912698412698413, "eval_recall": 0.6533915257319513, "eval_runtime": 1.7896, "eval_samples_per_second": 222.952, "eval_steps_per_second": 27.939, "step": 122 }, { "epoch": 2.0, "grad_norm": 2.931857109069824, "learning_rate": 4.5e-05, "loss": 0.4369, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8120300751879699, "eval_f1": 0.7912087912087912, "eval_loss": 0.41080209612846375, "eval_precision": 0.7802706552706553, "eval_recall": 0.822013093289689, "eval_runtime": 1.8221, "eval_samples_per_second": 218.982, "eval_steps_per_second": 27.441, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.00280499458313, "learning_rate": 4.25e-05, "loss": 0.3316, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.81524926686217, "eval_loss": 0.3294452726840973, "eval_precision": 0.8462682339611953, "eval_recall": 0.7964175304600837, "eval_runtime": 1.7904, "eval_samples_per_second": 222.854, "eval_steps_per_second": 27.927, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.0830881595611572, "learning_rate": 4e-05, "loss": 0.2909, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8499551039516197, "eval_loss": 0.30187976360321045, "eval_precision": 0.8547473867595818, "eval_recall": 0.8456082924168031, "eval_runtime": 1.8, "eval_samples_per_second": 221.672, "eval_steps_per_second": 27.778, "step": 488 }, { "epoch": 5.0, "grad_norm": 2.5169291496276855, "learning_rate": 3.7500000000000003e-05, "loss": 0.2584, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.842789598108747, "eval_loss": 0.3023463487625122, "eval_precision": 0.842789598108747, "eval_recall": 0.842789598108747, "eval_runtime": 1.7982, "eval_samples_per_second": 221.888, "eval_steps_per_second": 27.805, "step": 610 }, { "epoch": 6.0, "grad_norm": 1.894607424736023, "learning_rate": 3.5e-05, "loss": 0.237, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8375505157126486, "eval_loss": 0.30203157663345337, "eval_precision": 0.8359243697478991, "eval_recall": 0.8392434988179669, "eval_runtime": 1.7962, "eval_samples_per_second": 222.138, "eval_steps_per_second": 27.837, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.33639559149742126, "learning_rate": 3.2500000000000004e-05, "loss": 0.2186, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8491157824491158, "eval_loss": 0.2988818287849426, "eval_precision": 0.8424908424908425, "eval_recall": 0.8570649208947081, "eval_runtime": 1.8005, "eval_samples_per_second": 221.607, "eval_steps_per_second": 27.77, "step": 854 }, { "epoch": 8.0, "grad_norm": 1.405568242073059, "learning_rate": 3e-05, "loss": 0.2108, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8614765038536611, "eval_loss": 0.29612118005752563, "eval_precision": 0.8686536646744258, "eval_recall": 0.8552009456264775, "eval_runtime": 1.8282, "eval_samples_per_second": 218.244, "eval_steps_per_second": 27.349, "step": 976 }, { "epoch": 9.0, "grad_norm": 6.2904157638549805, "learning_rate": 2.7500000000000004e-05, "loss": 0.1898, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8480717680029244, "eval_loss": 0.30133265256881714, "eval_precision": 0.8498775260257195, "eval_recall": 0.8463356973995272, "eval_runtime": 1.7951, "eval_samples_per_second": 222.272, "eval_steps_per_second": 27.854, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.820188522338867, "learning_rate": 2.5e-05, "loss": 0.1894, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8456742372671576, "eval_loss": 0.32312536239624023, "eval_precision": 0.8536697247706422, "eval_recall": 0.8388343335151845, "eval_runtime": 1.8085, "eval_samples_per_second": 220.63, "eval_steps_per_second": 27.648, "step": 1220 }, { "epoch": 11.0, "grad_norm": 1.6913771629333496, "learning_rate": 2.25e-05, "loss": 0.1817, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8514869535493182, "eval_loss": 0.301199346780777, "eval_precision": 0.8523821128305106, "eval_recall": 0.8506092016730314, "eval_runtime": 1.8217, "eval_samples_per_second": 219.025, "eval_steps_per_second": 27.447, "step": 1342 }, { "epoch": 12.0, "grad_norm": 5.912967681884766, "learning_rate": 2e-05, "loss": 0.1723, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8359175094431583, "eval_loss": 0.29794374108314514, "eval_precision": 0.8376607470912432, "eval_recall": 0.8342425895617385, "eval_runtime": 1.7955, "eval_samples_per_second": 222.218, "eval_steps_per_second": 27.847, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.4766283929347992, "learning_rate": 1.75e-05, "loss": 0.1547, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8411818738518064, "eval_loss": 0.2937452793121338, "eval_precision": 0.8448542607834644, "eval_recall": 0.8377886888525186, "eval_runtime": 1.8242, "eval_samples_per_second": 218.73, "eval_steps_per_second": 27.41, "step": 1586 }, { "epoch": 14.0, "grad_norm": 4.687190532684326, "learning_rate": 1.5e-05, "loss": 0.1569, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8485547445255475, "eval_loss": 0.3064776360988617, "eval_precision": 0.8383458646616542, "eval_recall": 0.8627932351336607, "eval_runtime": 1.7921, "eval_samples_per_second": 222.641, "eval_steps_per_second": 27.9, "step": 1708 }, { "epoch": 15.0, "grad_norm": 4.828568458557129, "learning_rate": 1.25e-05, "loss": 0.1442, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8529524583135901, "eval_loss": 0.2884095013141632, "eval_precision": 0.8504480286738352, "eval_recall": 0.8556101109292599, "eval_runtime": 1.8008, "eval_samples_per_second": 221.563, "eval_steps_per_second": 27.765, "step": 1830 }, { "epoch": 16.0, "grad_norm": 3.8095035552978516, "learning_rate": 1e-05, "loss": 0.1435, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8563025210084034, "eval_loss": 0.30156683921813965, "eval_precision": 0.8529936381473334, "eval_recall": 0.8598836152027641, "eval_runtime": 1.8479, "eval_samples_per_second": 215.919, "eval_steps_per_second": 27.058, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.34168025851249695, "learning_rate": 7.5e-06, "loss": 0.1378, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8556962025316456, "eval_loss": 0.31144392490386963, "eval_precision": 0.8477571494464944, "eval_recall": 0.8656119294417166, "eval_runtime": 1.8294, "eval_samples_per_second": 218.106, "eval_steps_per_second": 27.332, "step": 2074 }, { "epoch": 18.0, "grad_norm": 0.2586296498775482, "learning_rate": 5e-06, "loss": 0.1377, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8550328105883662, "eval_loss": 0.3096340000629425, "eval_precision": 0.8482905982905984, "eval_recall": 0.8631114748136025, "eval_runtime": 1.8332, "eval_samples_per_second": 217.658, "eval_steps_per_second": 27.275, "step": 2196 }, { "epoch": 19.0, "grad_norm": 0.0663389191031456, "learning_rate": 2.5e-06, "loss": 0.1307, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8556004584112431, "eval_loss": 0.30646833777427673, "eval_precision": 0.8538865546218487, "eval_recall": 0.85738316057465, "eval_runtime": 1.8307, "eval_samples_per_second": 217.954, "eval_steps_per_second": 27.313, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.8260014057159424, "learning_rate": 0.0, "loss": 0.126, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8656072933585827, "eval_loss": 0.30540019273757935, "eval_precision": 0.8614399005740664, "eval_recall": 0.8702036733951628, "eval_runtime": 1.8294, "eval_samples_per_second": 218.099, "eval_steps_per_second": 27.331, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.21972156196344095, "train_runtime": 635.9425, "train_samples_per_second": 114.413, "train_steps_per_second": 3.837 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }