{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.112319469451904, "learning_rate": 4.75e-05, "loss": 0.5509, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7393483709273183, "eval_f1": 0.6507070707070707, "eval_loss": 0.4983255863189697, "eval_precision": 0.6800605637083625, "eval_recall": 0.6405710129114385, "eval_runtime": 1.7657, "eval_samples_per_second": 225.971, "eval_steps_per_second": 28.317, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.6866044998168945, "learning_rate": 4.5e-05, "loss": 0.4511, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7769423558897243, "eval_f1": 0.7593078346448687, "eval_loss": 0.4377373456954956, "eval_precision": 0.7546743295019157, "eval_recall": 0.8021913075104565, "eval_runtime": 1.769, "eval_samples_per_second": 225.555, "eval_steps_per_second": 28.265, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.584764242172241, "learning_rate": 4.25e-05, "loss": 0.368, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8196102381877741, "eval_loss": 0.32603567838668823, "eval_precision": 0.8381270903010034, "eval_recall": 0.8064193489725404, "eval_runtime": 1.7715, "eval_samples_per_second": 225.23, "eval_steps_per_second": 28.224, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.8483095169067383, "learning_rate": 4e-05, "loss": 0.3019, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8333281762485303, "eval_loss": 0.30364951491355896, "eval_precision": 0.8410471369819678, "eval_recall": 0.8267412256773959, "eval_runtime": 1.7702, "eval_samples_per_second": 225.393, "eval_steps_per_second": 28.245, "step": 488 }, { "epoch": 5.0, "grad_norm": 2.774143934249878, "learning_rate": 3.7500000000000003e-05, "loss": 0.2668, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8424651921601347, "eval_loss": 0.31921207904815674, "eval_precision": 0.8372140762463343, "eval_recall": 0.8485179123476996, "eval_runtime": 1.7714, "eval_samples_per_second": 225.248, "eval_steps_per_second": 28.227, "step": 610 }, { "epoch": 6.0, "grad_norm": 4.2327117919921875, "learning_rate": 3.5e-05, "loss": 0.2471, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8380263497804185, "eval_loss": 0.30589351058006287, "eval_precision": 0.830503344095941, "eval_recall": 0.8474722676850337, "eval_runtime": 1.7732, "eval_samples_per_second": 225.015, "eval_steps_per_second": 28.197, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.5115749835968018, "learning_rate": 3.2500000000000004e-05, "loss": 0.2422, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8524146298159436, "eval_loss": 0.2949831783771515, "eval_precision": 0.8451250578971746, "eval_recall": 0.8613384251682124, "eval_runtime": 1.7731, "eval_samples_per_second": 225.024, "eval_steps_per_second": 28.198, "step": 854 }, { "epoch": 8.0, "grad_norm": 1.2918312549591064, "learning_rate": 3e-05, "loss": 0.2258, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8454251965513313, "eval_loss": 0.29280924797058105, "eval_precision": 0.8463049835506276, "eval_recall": 0.8445626477541371, "eval_runtime": 1.7799, "eval_samples_per_second": 224.171, "eval_steps_per_second": 28.092, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.160737037658691, "learning_rate": 2.7500000000000004e-05, "loss": 0.2054, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8533986527862829, "eval_loss": 0.30492648482322693, "eval_precision": 0.8572003218020917, "eval_recall": 0.8498817966903074, "eval_runtime": 1.779, "eval_samples_per_second": 224.288, "eval_steps_per_second": 28.106, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.917464017868042, "learning_rate": 2.5e-05, "loss": 0.2009, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8488361520276414, "eval_loss": 0.30127042531967163, "eval_precision": 0.8488361520276414, "eval_recall": 0.8488361520276414, "eval_runtime": 1.7757, "eval_samples_per_second": 224.7, "eval_steps_per_second": 28.158, "step": 1220 }, { "epoch": 11.0, "grad_norm": 6.667805194854736, "learning_rate": 2.25e-05, "loss": 0.1755, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.858259325044405, "eval_loss": 0.30701279640197754, "eval_precision": 0.8573798178418481, "eval_recall": 0.8591562102200401, "eval_runtime": 1.7942, "eval_samples_per_second": 222.38, "eval_steps_per_second": 27.867, "step": 1342 }, { "epoch": 12.0, "grad_norm": 8.611730575561523, "learning_rate": 2e-05, "loss": 0.1821, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8568221901555235, "eval_loss": 0.2995355427265167, "eval_precision": 0.8596491228070176, "eval_recall": 0.8541553009638116, "eval_runtime": 1.7796, "eval_samples_per_second": 224.202, "eval_steps_per_second": 28.095, "step": 1464 }, { "epoch": 13.0, "grad_norm": 2.71295428276062, "learning_rate": 1.75e-05, "loss": 0.1652, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.866029197080292, "eval_loss": 0.3272043764591217, "eval_precision": 0.8552631578947368, "eval_recall": 0.8809328968903437, "eval_runtime": 1.7775, "eval_samples_per_second": 224.467, "eval_steps_per_second": 28.129, "step": 1586 }, { "epoch": 14.0, "grad_norm": 5.373868942260742, "learning_rate": 1.5e-05, "loss": 0.1566, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8718540145985401, "eval_loss": 0.33357149362564087, "eval_precision": 0.8609022556390977, "eval_recall": 0.886979450809238, "eval_runtime": 1.7836, "eval_samples_per_second": 223.703, "eval_steps_per_second": 28.033, "step": 1708 }, { "epoch": 15.0, "grad_norm": 5.369639873504639, "learning_rate": 1.25e-05, "loss": 0.1634, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8622899159663866, "eval_loss": 0.314995676279068, "eval_precision": 0.8589244307033712, "eval_recall": 0.8659301691216585, "eval_runtime": 1.777, "eval_samples_per_second": 224.539, "eval_steps_per_second": 28.138, "step": 1830 }, { "epoch": 16.0, "grad_norm": 6.779192924499512, "learning_rate": 1e-05, "loss": 0.1496, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8696722245432793, "eval_loss": 0.3320792317390442, "eval_precision": 0.8706135006701596, "eval_recall": 0.8687488634297145, "eval_runtime": 1.7833, "eval_samples_per_second": 223.741, "eval_steps_per_second": 28.038, "step": 1952 }, { "epoch": 17.0, "grad_norm": 1.1515932083129883, "learning_rate": 7.5e-06, "loss": 0.1355, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8616171059774413, "eval_loss": 0.32759982347488403, "eval_precision": 0.859873949579832, "eval_recall": 0.8634297144935443, "eval_runtime": 1.7782, "eval_samples_per_second": 224.387, "eval_steps_per_second": 28.119, "step": 2074 }, { "epoch": 18.0, "grad_norm": 1.6571087837219238, "learning_rate": 5e-06, "loss": 0.1477, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8563025210084034, "eval_loss": 0.33653610944747925, "eval_precision": 0.8529936381473334, "eval_recall": 0.8598836152027641, "eval_runtime": 1.7851, "eval_samples_per_second": 223.518, "eval_steps_per_second": 28.01, "step": 2196 }, { "epoch": 19.0, "grad_norm": 2.6701011657714844, "learning_rate": 2.5e-06, "loss": 0.1317, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.858259325044405, "eval_loss": 0.3385031819343567, "eval_precision": 0.8573798178418481, "eval_recall": 0.8591562102200401, "eval_runtime": 1.7765, "eval_samples_per_second": 224.597, "eval_steps_per_second": 28.145, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.197312593460083, "learning_rate": 0.0, "loss": 0.1267, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.858259325044405, "eval_loss": 0.3389217257499695, "eval_precision": 0.8573798178418481, "eval_recall": 0.8591562102200401, "eval_runtime": 1.7779, "eval_samples_per_second": 224.423, "eval_steps_per_second": 28.123, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.22970127551282038, "train_runtime": 621.3103, "train_samples_per_second": 117.107, "train_steps_per_second": 3.927 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }