{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.953822374343872, "learning_rate": 4.75e-05, "loss": 0.5448, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7243107769423559, "eval_f1": 0.6567661411411412, "eval_loss": 0.5046952366828918, "eval_precision": 0.6629251700680272, "eval_recall": 0.6524368066921259, "eval_runtime": 5.0686, "eval_samples_per_second": 78.72, "eval_steps_per_second": 9.865, "step": 122 }, { "epoch": 2.0, "grad_norm": 2.6171164512634277, "learning_rate": 4.5e-05, "loss": 0.4527, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7944862155388471, "eval_f1": 0.775152564736929, "eval_loss": 0.4319760501384735, "eval_precision": 0.7667055167055168, "eval_recall": 0.8121022004000726, "eval_runtime": 5.0454, "eval_samples_per_second": 79.082, "eval_steps_per_second": 9.91, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.439111709594727, "learning_rate": 4.25e-05, "loss": 0.3603, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.7984931903796002, "eval_loss": 0.33696243166923523, "eval_precision": 0.8393082695594026, "eval_recall": 0.7768230587379523, "eval_runtime": 5.0496, "eval_samples_per_second": 79.016, "eval_steps_per_second": 9.902, "step": 366 }, { "epoch": 4.0, "grad_norm": 0.9325271248817444, "learning_rate": 4e-05, "loss": 0.3081, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8461962888779714, "eval_loss": 0.29947736859321594, "eval_precision": 0.8453465227094517, "eval_recall": 0.8470631023822512, "eval_runtime": 5.0527, "eval_samples_per_second": 78.967, "eval_steps_per_second": 9.896, "step": 488 }, { "epoch": 5.0, "grad_norm": 1.1945856809616089, "learning_rate": 3.7500000000000003e-05, "loss": 0.2793, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8456742372671576, "eval_loss": 0.3008015751838684, "eval_precision": 0.8536697247706422, "eval_recall": 0.8388343335151845, "eval_runtime": 5.0466, "eval_samples_per_second": 79.063, "eval_steps_per_second": 9.908, "step": 610 }, { "epoch": 6.0, "grad_norm": 3.8788158893585205, "learning_rate": 3.5e-05, "loss": 0.2526, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8411818738518064, "eval_loss": 0.2986568510532379, "eval_precision": 0.8448542607834644, "eval_recall": 0.8377886888525186, "eval_runtime": 5.0571, "eval_samples_per_second": 78.899, "eval_steps_per_second": 9.887, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.2110929489135742, "learning_rate": 3.2500000000000004e-05, "loss": 0.2478, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.846679266293906, "eval_loss": 0.3030013144016266, "eval_precision": 0.8609191655801824, "eval_recall": 0.8356064739043463, "eval_runtime": 5.0491, "eval_samples_per_second": 79.023, "eval_steps_per_second": 9.903, "step": 854 }, { "epoch": 8.0, "grad_norm": 3.5065371990203857, "learning_rate": 3e-05, "loss": 0.2337, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8350789627607721, "eval_loss": 0.2974022924900055, "eval_precision": 0.8463358876939919, "eval_recall": 0.8260138206946717, "eval_runtime": 5.0637, "eval_samples_per_second": 78.796, "eval_steps_per_second": 9.874, "step": 976 }, { "epoch": 9.0, "grad_norm": 4.127802848815918, "learning_rate": 2.7500000000000004e-05, "loss": 0.217, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8395201930584144, "eval_loss": 0.2773597240447998, "eval_precision": 0.8562091503267973, "eval_recall": 0.8270594653573378, "eval_runtime": 5.0754, "eval_samples_per_second": 78.615, "eval_steps_per_second": 9.851, "step": 1098 }, { "epoch": 10.0, "grad_norm": 2.408611297607422, "learning_rate": 2.5e-05, "loss": 0.1966, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.844327731092437, "eval_loss": 0.2846048176288605, "eval_precision": 0.8411320530352577, "eval_recall": 0.8477905073649754, "eval_runtime": 5.0388, "eval_samples_per_second": 79.185, "eval_steps_per_second": 9.923, "step": 1220 }, { "epoch": 11.0, "grad_norm": 2.3639743328094482, "learning_rate": 2.25e-05, "loss": 0.199, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8545433391506689, "eval_loss": 0.2909916341304779, "eval_precision": 0.8639270714012982, "eval_recall": 0.846653937079469, "eval_runtime": 5.0638, "eval_samples_per_second": 78.794, "eval_steps_per_second": 9.874, "step": 1342 }, { "epoch": 12.0, "grad_norm": 3.4640684127807617, "learning_rate": 2e-05, "loss": 0.187, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.846679266293906, "eval_loss": 0.2870851755142212, "eval_precision": 0.8609191655801824, "eval_recall": 0.8356064739043463, "eval_runtime": 5.0487, "eval_samples_per_second": 79.031, "eval_steps_per_second": 9.904, "step": 1464 }, { "epoch": 13.0, "grad_norm": 2.2104856967926025, "learning_rate": 1.75e-05, "loss": 0.1812, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8526315789473684, "eval_loss": 0.2812826931476593, "eval_precision": 0.8585304054054055, "eval_recall": 0.8473813420621932, "eval_runtime": 5.0484, "eval_samples_per_second": 79.034, "eval_steps_per_second": 9.904, "step": 1586 }, { "epoch": 14.0, "grad_norm": 3.044590950012207, "learning_rate": 1.5e-05, "loss": 0.1633, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8596342841745197, "eval_loss": 0.2956988215446472, "eval_precision": 0.8555364857667042, "eval_recall": 0.8641571194762684, "eval_runtime": 5.0481, "eval_samples_per_second": 79.04, "eval_steps_per_second": 9.905, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.9169403314590454, "learning_rate": 1.25e-05, "loss": 0.1607, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8696722245432793, "eval_loss": 0.28752827644348145, "eval_precision": 0.8706135006701596, "eval_recall": 0.8687488634297145, "eval_runtime": 5.0457, "eval_samples_per_second": 79.078, "eval_steps_per_second": 9.91, "step": 1830 }, { "epoch": 16.0, "grad_norm": 1.9245001077651978, "learning_rate": 1e-05, "loss": 0.1584, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8560793854229822, "eval_loss": 0.2859440743923187, "eval_precision": 0.8609538327526132, "eval_recall": 0.8516548463356974, "eval_runtime": 5.086, "eval_samples_per_second": 78.451, "eval_steps_per_second": 9.831, "step": 1952 }, { "epoch": 17.0, "grad_norm": 3.737988233566284, "learning_rate": 7.5e-06, "loss": 0.1535, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8609292598654301, "eval_loss": 0.29243767261505127, "eval_precision": 0.8609292598654301, "eval_recall": 0.8609292598654301, "eval_runtime": 5.0479, "eval_samples_per_second": 79.042, "eval_steps_per_second": 9.905, "step": 2074 }, { "epoch": 18.0, "grad_norm": 3.7625505924224854, "learning_rate": 5e-06, "loss": 0.1432, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8616171059774413, "eval_loss": 0.29657039046287537, "eval_precision": 0.859873949579832, "eval_recall": 0.8634297144935443, "eval_runtime": 5.0616, "eval_samples_per_second": 78.829, "eval_steps_per_second": 9.878, "step": 2196 }, { "epoch": 19.0, "grad_norm": 0.5611210465431213, "learning_rate": 2.5e-06, "loss": 0.1466, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8568221901555235, "eval_loss": 0.29467570781707764, "eval_precision": 0.8596491228070176, "eval_recall": 0.8541553009638116, "eval_runtime": 5.0458, "eval_samples_per_second": 79.075, "eval_steps_per_second": 9.909, "step": 2318 }, { "epoch": 20.0, "grad_norm": 7.14449405670166, "learning_rate": 0.0, "loss": 0.1411, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8609292598654301, "eval_loss": 0.2950780689716339, "eval_precision": 0.8609292598654301, "eval_recall": 0.8609292598654301, "eval_runtime": 5.0549, "eval_samples_per_second": 78.933, "eval_steps_per_second": 9.891, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8460375995160000.0, "train_loss": 0.2363556080177182, "train_runtime": 1909.167, "train_samples_per_second": 38.184, "train_steps_per_second": 1.278 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8460375995160000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }