{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.0131800174713135, "learning_rate": 4.75e-05, "loss": 0.5623, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7167919799498746, "eval_f1": 0.5794790005316321, "eval_loss": 0.5053456425666809, "eval_precision": 0.6409822866344606, "eval_recall": 0.5796053827968721, "eval_runtime": 5.6071, "eval_samples_per_second": 71.159, "eval_steps_per_second": 8.917, "step": 122 }, { "epoch": 2.0, "grad_norm": 5.634490966796875, "learning_rate": 4.5e-05, "loss": 0.518, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7293233082706767, "eval_f1": 0.599784530797236, "eval_loss": 0.4860531687736511, "eval_precision": 0.6673625792811839, "eval_recall": 0.5959719949081652, "eval_runtime": 5.7755, "eval_samples_per_second": 69.085, "eval_steps_per_second": 8.657, "step": 244 }, { "epoch": 3.0, "grad_norm": 1.959808111190796, "learning_rate": 4.25e-05, "loss": 0.4835, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7694235588972431, "eval_f1": 0.7145034843205575, "eval_loss": 0.45518842339515686, "eval_precision": 0.7210824478299833, "eval_recall": 0.7093562465902892, "eval_runtime": 5.2584, "eval_samples_per_second": 75.878, "eval_steps_per_second": 9.509, "step": 366 }, { "epoch": 4.0, "grad_norm": 4.635169506072998, "learning_rate": 4e-05, "loss": 0.4497, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.7944862155388471, "eval_f1": 0.7520912893253319, "eval_loss": 0.4223441481590271, "eval_precision": 0.7520912893253319, "eval_recall": 0.7520912893253319, "eval_runtime": 5.0487, "eval_samples_per_second": 79.03, "eval_steps_per_second": 9.903, "step": 488 }, { "epoch": 5.0, "grad_norm": 8.219679832458496, "learning_rate": 3.7500000000000003e-05, "loss": 0.4266, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8170426065162907, "eval_f1": 0.7740779522978476, "eval_loss": 0.399569034576416, "eval_precision": 0.7814051164566629, "eval_recall": 0.7680487361338425, "eval_runtime": 5.0767, "eval_samples_per_second": 78.595, "eval_steps_per_second": 9.849, "step": 610 }, { "epoch": 6.0, "grad_norm": 4.150725841522217, "learning_rate": 3.5e-05, "loss": 0.3907, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8195488721804511, "eval_f1": 0.784453781512605, "eval_loss": 0.3830115497112274, "eval_precision": 0.7818241274748796, "eval_recall": 0.787324968176032, "eval_runtime": 5.0718, "eval_samples_per_second": 78.67, "eval_steps_per_second": 9.858, "step": 732 }, { "epoch": 7.0, "grad_norm": 3.297985076904297, "learning_rate": 3.2500000000000004e-05, "loss": 0.3742, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.798423147581139, "eval_loss": 0.3684135675430298, "eval_precision": 0.8016430472182685, "eval_recall": 0.7954628114202582, "eval_runtime": 5.0743, "eval_samples_per_second": 78.632, "eval_steps_per_second": 9.854, "step": 854 }, { "epoch": 8.0, "grad_norm": 8.395323753356934, "learning_rate": 3e-05, "loss": 0.3616, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8270676691729323, "eval_f1": 0.7967966933608887, "eval_loss": 0.3719731867313385, "eval_precision": 0.7902444649446494, "eval_recall": 0.8051463902527732, "eval_runtime": 5.0484, "eval_samples_per_second": 79.035, "eval_steps_per_second": 9.904, "step": 976 }, { "epoch": 9.0, "grad_norm": 3.748974561691284, "learning_rate": 2.7500000000000004e-05, "loss": 0.3294, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8076965854743632, "eval_loss": 0.36888691782951355, "eval_precision": 0.8018925518925519, "eval_recall": 0.8147390434624477, "eval_runtime": 5.0543, "eval_samples_per_second": 78.943, "eval_steps_per_second": 9.893, "step": 1098 }, { "epoch": 10.0, "grad_norm": 9.309541702270508, "learning_rate": 2.5e-05, "loss": 0.3207, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.8110907261644079, "eval_loss": 0.36315786838531494, "eval_precision": 0.8046983557202408, "eval_recall": 0.819012547735952, "eval_runtime": 5.0709, "eval_samples_per_second": 78.684, "eval_steps_per_second": 9.86, "step": 1220 }, { "epoch": 11.0, "grad_norm": 1.2568168640136719, "learning_rate": 2.25e-05, "loss": 0.3214, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8085765951950401, "eval_loss": 0.3577338457107544, "eval_precision": 0.8017470018450185, "eval_recall": 0.817239498090562, "eval_runtime": 5.1071, "eval_samples_per_second": 78.126, "eval_steps_per_second": 9.79, "step": 1342 }, { "epoch": 12.0, "grad_norm": 1.915198802947998, "learning_rate": 2e-05, "loss": 0.3167, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.8119476846942383, "eval_loss": 0.36069995164871216, "eval_precision": 0.8045650301464256, "eval_recall": 0.8215130023640662, "eval_runtime": 5.0598, "eval_samples_per_second": 78.857, "eval_steps_per_second": 9.882, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.9545631408691406, "learning_rate": 1.75e-05, "loss": 0.289, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.8060710498409331, "eval_loss": 0.3684280812740326, "eval_precision": 0.7988372093023256, "eval_recall": 0.8154664484451719, "eval_runtime": 5.1019, "eval_samples_per_second": 78.206, "eval_steps_per_second": 9.8, "step": 1586 }, { "epoch": 14.0, "grad_norm": 5.748187065124512, "learning_rate": 1.5e-05, "loss": 0.2997, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8176861216035092, "eval_loss": 0.3479882776737213, "eval_precision": 0.8193355786895284, "eval_recall": 0.8161029278050556, "eval_runtime": 5.0557, "eval_samples_per_second": 78.92, "eval_steps_per_second": 9.89, "step": 1708 }, { "epoch": 15.0, "grad_norm": 4.010083198547363, "learning_rate": 1.25e-05, "loss": 0.2986, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.821236559139785, "eval_loss": 0.35758015513420105, "eval_precision": 0.8169406150583245, "eval_recall": 0.8261047463175123, "eval_runtime": 5.0955, "eval_samples_per_second": 78.304, "eval_steps_per_second": 9.813, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.9220337271690369, "learning_rate": 1e-05, "loss": 0.2914, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8195005730140539, "eval_loss": 0.34965991973876953, "eval_precision": 0.8179621848739496, "eval_recall": 0.8211038370612839, "eval_runtime": 5.0617, "eval_samples_per_second": 78.827, "eval_steps_per_second": 9.878, "step": 1952 }, { "epoch": 17.0, "grad_norm": 1.7026562690734863, "learning_rate": 7.5e-06, "loss": 0.278, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8229427559286084, "eval_loss": 0.3539772927761078, "eval_precision": 0.8206541218637993, "eval_recall": 0.8253773413347881, "eval_runtime": 5.1199, "eval_samples_per_second": 77.931, "eval_steps_per_second": 9.766, "step": 2074 }, { "epoch": 18.0, "grad_norm": 5.839470863342285, "learning_rate": 5e-06, "loss": 0.2887, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8229427559286084, "eval_loss": 0.35161107778549194, "eval_precision": 0.8206541218637993, "eval_recall": 0.8253773413347881, "eval_runtime": 5.1154, "eval_samples_per_second": 77.999, "eval_steps_per_second": 9.774, "step": 2196 }, { "epoch": 19.0, "grad_norm": 7.782900810241699, "learning_rate": 2.5e-06, "loss": 0.2829, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8229427559286084, "eval_loss": 0.35369938611984253, "eval_precision": 0.8206541218637993, "eval_recall": 0.8253773413347881, "eval_runtime": 5.0565, "eval_samples_per_second": 78.908, "eval_steps_per_second": 9.888, "step": 2318 }, { "epoch": 20.0, "grad_norm": 5.059621334075928, "learning_rate": 0.0, "loss": 0.2771, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8263655462184873, "eval_loss": 0.35401326417922974, "eval_precision": 0.8233396753671443, "eval_recall": 0.8296508456082925, "eval_runtime": 5.0854, "eval_samples_per_second": 78.459, "eval_steps_per_second": 9.832, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7597037114448000.0, "train_loss": 0.35800845193081215, "train_runtime": 2113.1391, "train_samples_per_second": 34.432, "train_steps_per_second": 1.155 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7597037114448000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }