{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9992e-05, "loss": 2.3035, "step": 1 }, { "epoch": 0.5, "learning_rate": 1.9e-05, "loss": 2.1461, "step": 125 }, { "epoch": 0.5, "eval_accuracy": 0.5465, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.584102630615234, "eval_f1": 0.4711252488920671, "eval_gpu_ram_allocated": 0.3996291160583496, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 28, "eval_loss": 1.8486889600753784, "eval_precision": 0.5180898369172832, "eval_recall": 0.5465, "eval_runtime": 1.5234, "eval_samples_per_second": 1312.845, "eval_steps_per_second": 41.355, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8797760009765625, "step": 125 }, { "epoch": 1.0, "learning_rate": 1.8e-05, "loss": 1.6793, "step": 250 }, { "epoch": 1.0, "eval_accuracy": 0.615, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.584190368652344, "eval_f1": 0.5799151958404197, "eval_gpu_ram_allocated": 0.3996119499206543, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 28, "eval_loss": 1.5279773473739624, "eval_precision": 0.6206909881233017, "eval_recall": 0.615, "eval_runtime": 1.5357, "eval_samples_per_second": 1302.337, "eval_steps_per_second": 41.024, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8827476501464844, "step": 250 }, { "epoch": 1.5, "learning_rate": 1.7e-05, "loss": 1.4163, "step": 375 }, { "epoch": 1.5, "eval_accuracy": 0.6675, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.584190368652344, "eval_f1": 0.6507803054121275, "eval_gpu_ram_allocated": 0.3996291160583496, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 28, "eval_loss": 1.3395830392837524, "eval_precision": 0.6690538200035159, "eval_recall": 0.6675, "eval_runtime": 1.5175, "eval_samples_per_second": 1317.927, "eval_steps_per_second": 41.515, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.883075714111328, "step": 375 }, { "epoch": 2.0, "learning_rate": 1.6000000000000003e-05, "loss": 1.2855, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.6745, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.584270477294922, "eval_f1": 0.6633189544214071, "eval_gpu_ram_allocated": 0.3995933532714844, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 30, "eval_loss": 1.2413243055343628, "eval_precision": 0.6741774761001332, "eval_recall": 0.6745, "eval_runtime": 1.5166, "eval_samples_per_second": 1318.761, "eval_steps_per_second": 41.541, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.897541046142578, "step": 500 }, { "epoch": 2.5, "learning_rate": 1.5000000000000002e-05, "loss": 1.1364, "step": 625 }, { "epoch": 2.5, "eval_accuracy": 0.6725, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.457107543945312, "eval_f1": 0.665830199533775, "eval_gpu_ram_allocated": 0.3995990753173828, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 31, "eval_loss": 1.1794828176498413, "eval_precision": 0.6758089281502129, "eval_recall": 0.6725, "eval_runtime": 1.5306, "eval_samples_per_second": 1306.704, "eval_steps_per_second": 41.161, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.096714019775391, "step": 625 }, { "epoch": 3.0, "learning_rate": 1.4e-05, "loss": 1.0569, "step": 750 }, { "epoch": 3.0, "eval_accuracy": 0.6845, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.457260131835938, "eval_f1": 0.6785417667541389, "eval_gpu_ram_allocated": 0.3996105194091797, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 29, "eval_loss": 1.1167317628860474, "eval_precision": 0.6841220967269329, "eval_recall": 0.6845, "eval_runtime": 1.5379, "eval_samples_per_second": 1300.49, "eval_steps_per_second": 40.965, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.092338562011719, "step": 750 }, { "epoch": 3.5, "learning_rate": 1.3000000000000001e-05, "loss": 0.9596, "step": 875 }, { "epoch": 3.5, "eval_accuracy": 0.698, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.457298278808594, "eval_f1": 0.6882774085430846, "eval_gpu_ram_allocated": 0.3996591567993164, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 29, "eval_loss": 1.0866354703903198, "eval_precision": 0.692037318372609, "eval_recall": 0.698, "eval_runtime": 1.5638, "eval_samples_per_second": 1278.976, "eval_steps_per_second": 40.288, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.876514434814453, "step": 875 }, { "epoch": 4.0, "learning_rate": 1.2e-05, "loss": 0.917, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.6875, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.45730209350586, "eval_f1": 0.6796355379748251, "eval_gpu_ram_allocated": 0.3995933532714844, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 29, "eval_loss": 1.0703401565551758, "eval_precision": 0.6840559389245768, "eval_recall": 0.6875, "eval_runtime": 1.5204, "eval_samples_per_second": 1315.473, "eval_steps_per_second": 41.437, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8976211547851562, "step": 1000 }, { "epoch": 4.5, "learning_rate": 1.1008e-05, "loss": 0.8512, "step": 1125 }, { "epoch": 4.5, "eval_accuracy": 0.6915, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.830577850341797, "eval_f1": 0.6913447845710189, "eval_gpu_ram_allocated": 0.3996849060058594, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 28, "eval_loss": 1.0628596544265747, "eval_precision": 0.6944500140220231, "eval_recall": 0.6915, "eval_runtime": 1.5986, "eval_samples_per_second": 1251.121, "eval_steps_per_second": 39.41, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.0599517822265625, "step": 1125 }, { "epoch": 5.0, "learning_rate": 1.0008e-05, "loss": 0.8121, "step": 1250 }, { "epoch": 5.0, "eval_accuracy": 0.691, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.830596923828125, "eval_f1": 0.6838005278906992, "eval_gpu_ram_allocated": 0.39963626861572266, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 31, "eval_loss": 1.0575964450836182, "eval_precision": 0.6905089592227732, "eval_recall": 0.691, "eval_runtime": 1.5391, "eval_samples_per_second": 1299.447, "eval_steps_per_second": 40.933, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.043186187744141, "step": 1250 }, { "epoch": 5.5, "learning_rate": 9.008e-06, "loss": 0.7733, "step": 1375 }, { "epoch": 5.5, "eval_accuracy": 0.6805, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.83068084716797, "eval_f1": 0.6773653982197514, "eval_gpu_ram_allocated": 0.3996090888977051, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 28, "eval_loss": 1.0597690343856812, "eval_precision": 0.683800229871734, "eval_recall": 0.6805, "eval_runtime": 1.5511, "eval_samples_per_second": 1289.373, "eval_steps_per_second": 40.615, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.837909698486328, "step": 1375 }, { "epoch": 6.0, "learning_rate": 8.008e-06, "loss": 0.7431, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.702, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.830699920654297, "eval_f1": 0.6973584149072398, "eval_gpu_ram_allocated": 0.39962053298950195, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 31, "eval_loss": 1.0375711917877197, "eval_precision": 0.6975777506719404, "eval_recall": 0.702, "eval_runtime": 1.5383, "eval_samples_per_second": 1300.155, "eval_steps_per_second": 40.955, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8545989990234375, "step": 1500 }, { "epoch": 6.5, "learning_rate": 7.0080000000000005e-06, "loss": 0.7065, "step": 1625 }, { "epoch": 6.5, "eval_accuracy": 0.6995, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.203968048095703, "eval_f1": 0.6990180197515704, "eval_gpu_ram_allocated": 0.39959049224853516, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 28, "eval_loss": 1.0457407236099243, "eval_precision": 0.701399008937391, "eval_recall": 0.6995, "eval_runtime": 1.5796, "eval_samples_per_second": 1266.127, "eval_steps_per_second": 39.883, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.033943176269531, "step": 1625 }, { "epoch": 7.0, "learning_rate": 6.008000000000001e-06, "loss": 0.671, "step": 1750 }, { "epoch": 7.0, "eval_accuracy": 0.698, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.203968048095703, "eval_f1": 0.6956310421863513, "eval_gpu_ram_allocated": 0.39962339401245117, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 28, "eval_loss": 1.039590835571289, "eval_precision": 0.696566876815876, "eval_recall": 0.698, "eval_runtime": 1.5301, "eval_samples_per_second": 1307.136, "eval_steps_per_second": 41.175, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.038356781005859, "step": 1750 }, { "epoch": 7.5, "learning_rate": 5.008000000000001e-06, "loss": 0.6438, "step": 1875 }, { "epoch": 7.5, "eval_accuracy": 0.6925, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.203964233398438, "eval_f1": 0.6886960392924729, "eval_gpu_ram_allocated": 0.3995976448059082, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 28, "eval_loss": 1.0473765134811401, "eval_precision": 0.6906747949914751, "eval_recall": 0.6925, "eval_runtime": 1.5569, "eval_samples_per_second": 1284.611, "eval_steps_per_second": 40.465, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.827362060546875, "step": 1875 }, { "epoch": 8.0, "learning_rate": 4.008e-06, "loss": 0.6326, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.698, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.204063415527344, "eval_f1": 0.6972467602684949, "eval_gpu_ram_allocated": 0.39958906173706055, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 34, "eval_loss": 1.03837251663208, "eval_precision": 0.6982790583069545, "eval_recall": 0.698, "eval_runtime": 1.5106, "eval_samples_per_second": 1323.952, "eval_steps_per_second": 41.704, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8401947021484375, "step": 2000 }, { "epoch": 8.5, "learning_rate": 3.0080000000000003e-06, "loss": 0.6121, "step": 2125 }, { "epoch": 8.5, "eval_accuracy": 0.698, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.20423126220703, "eval_f1": 0.6962705560782918, "eval_gpu_ram_allocated": 0.39959049224853516, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 29, "eval_loss": 1.0439949035644531, "eval_precision": 0.6975979381837488, "eval_recall": 0.698, "eval_runtime": 1.5616, "eval_samples_per_second": 1280.706, "eval_steps_per_second": 40.342, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.016208648681641, "step": 2125 }, { "epoch": 9.0, "learning_rate": 2.008e-06, "loss": 0.5911, "step": 2250 }, { "epoch": 9.0, "eval_accuracy": 0.701, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.204296112060547, "eval_f1": 0.6995247290600446, "eval_gpu_ram_allocated": 0.3996105194091797, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 28, "eval_loss": 1.0518174171447754, "eval_precision": 0.7006323341620143, "eval_recall": 0.701, "eval_runtime": 1.5659, "eval_samples_per_second": 1277.26, "eval_steps_per_second": 40.234, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.033794403076172, "step": 2250 }, { "epoch": 9.5, "learning_rate": 1.0080000000000001e-06, "loss": 0.592, "step": 2375 }, { "epoch": 9.5, "eval_accuracy": 0.7035, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.20429229736328, "eval_f1": 0.7022917649865421, "eval_gpu_ram_allocated": 0.3996419906616211, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 27, "eval_loss": 1.0490039587020874, "eval_precision": 0.702476293906477, "eval_recall": 0.7035, "eval_runtime": 1.5585, "eval_samples_per_second": 1283.281, "eval_steps_per_second": 40.423, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8126220703125, "step": 2375 }, { "epoch": 10.0, "learning_rate": 8e-09, "loss": 0.5586, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.701, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.204303741455078, "eval_f1": 0.6984193396838297, "eval_gpu_ram_allocated": 0.3996291160583496, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 27, "eval_loss": 1.0510661602020264, "eval_precision": 0.700028711222529, "eval_recall": 0.701, "eval_runtime": 1.5162, "eval_samples_per_second": 1319.056, "eval_steps_per_second": 41.55, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.844818115234375, "step": 2500 } ], "max_steps": 2500, "num_train_epochs": 10, "total_flos": 1262611619041920.0, "trial_name": null, "trial_params": null }