|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9992e-05, |
|
"loss": 2.3035, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.9e-05, |
|
"loss": 2.1461, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5465, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.584102630615234, |
|
"eval_f1": 0.4711252488920671, |
|
"eval_gpu_ram_allocated": 0.3996291160583496, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 28, |
|
"eval_loss": 1.8486889600753784, |
|
"eval_precision": 0.5180898369172832, |
|
"eval_recall": 0.5465, |
|
"eval_runtime": 1.5234, |
|
"eval_samples_per_second": 1312.845, |
|
"eval_steps_per_second": 41.355, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8797760009765625, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.6793, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.615, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.584190368652344, |
|
"eval_f1": 0.5799151958404197, |
|
"eval_gpu_ram_allocated": 0.3996119499206543, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 28, |
|
"eval_loss": 1.5279773473739624, |
|
"eval_precision": 0.6206909881233017, |
|
"eval_recall": 0.615, |
|
"eval_runtime": 1.5357, |
|
"eval_samples_per_second": 1302.337, |
|
"eval_steps_per_second": 41.024, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8827476501464844, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.7e-05, |
|
"loss": 1.4163, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.6675, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.584190368652344, |
|
"eval_f1": 0.6507803054121275, |
|
"eval_gpu_ram_allocated": 0.3996291160583496, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 28, |
|
"eval_loss": 1.3395830392837524, |
|
"eval_precision": 0.6690538200035159, |
|
"eval_recall": 0.6675, |
|
"eval_runtime": 1.5175, |
|
"eval_samples_per_second": 1317.927, |
|
"eval_steps_per_second": 41.515, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.883075714111328, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.2855, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6745, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.584270477294922, |
|
"eval_f1": 0.6633189544214071, |
|
"eval_gpu_ram_allocated": 0.3995933532714844, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 30, |
|
"eval_loss": 1.2413243055343628, |
|
"eval_precision": 0.6741774761001332, |
|
"eval_recall": 0.6745, |
|
"eval_runtime": 1.5166, |
|
"eval_samples_per_second": 1318.761, |
|
"eval_steps_per_second": 41.541, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.897541046142578, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 1.1364, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_accuracy": 0.6725, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 25.457107543945312, |
|
"eval_f1": 0.665830199533775, |
|
"eval_gpu_ram_allocated": 0.3995990753173828, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 31, |
|
"eval_loss": 1.1794828176498413, |
|
"eval_precision": 0.6758089281502129, |
|
"eval_recall": 0.6725, |
|
"eval_runtime": 1.5306, |
|
"eval_samples_per_second": 1306.704, |
|
"eval_steps_per_second": 41.161, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.096714019775391, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.4e-05, |
|
"loss": 1.0569, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6845, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 25.457260131835938, |
|
"eval_f1": 0.6785417667541389, |
|
"eval_gpu_ram_allocated": 0.3996105194091797, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 29, |
|
"eval_loss": 1.1167317628860474, |
|
"eval_precision": 0.6841220967269329, |
|
"eval_recall": 0.6845, |
|
"eval_runtime": 1.5379, |
|
"eval_samples_per_second": 1300.49, |
|
"eval_steps_per_second": 40.965, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.092338562011719, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.9596, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.698, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 25.457298278808594, |
|
"eval_f1": 0.6882774085430846, |
|
"eval_gpu_ram_allocated": 0.3996591567993164, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 29, |
|
"eval_loss": 1.0866354703903198, |
|
"eval_precision": 0.692037318372609, |
|
"eval_recall": 0.698, |
|
"eval_runtime": 1.5638, |
|
"eval_samples_per_second": 1278.976, |
|
"eval_steps_per_second": 40.288, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.876514434814453, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.917, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6875, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 25.45730209350586, |
|
"eval_f1": 0.6796355379748251, |
|
"eval_gpu_ram_allocated": 0.3995933532714844, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 29, |
|
"eval_loss": 1.0703401565551758, |
|
"eval_precision": 0.6840559389245768, |
|
"eval_recall": 0.6875, |
|
"eval_runtime": 1.5204, |
|
"eval_samples_per_second": 1315.473, |
|
"eval_steps_per_second": 41.437, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8976211547851562, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.1008e-05, |
|
"loss": 0.8512, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_accuracy": 0.6915, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 25.830577850341797, |
|
"eval_f1": 0.6913447845710189, |
|
"eval_gpu_ram_allocated": 0.3996849060058594, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 28, |
|
"eval_loss": 1.0628596544265747, |
|
"eval_precision": 0.6944500140220231, |
|
"eval_recall": 0.6915, |
|
"eval_runtime": 1.5986, |
|
"eval_samples_per_second": 1251.121, |
|
"eval_steps_per_second": 39.41, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.0599517822265625, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.0008e-05, |
|
"loss": 0.8121, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.691, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 25.830596923828125, |
|
"eval_f1": 0.6838005278906992, |
|
"eval_gpu_ram_allocated": 0.39963626861572266, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 31, |
|
"eval_loss": 1.0575964450836182, |
|
"eval_precision": 0.6905089592227732, |
|
"eval_recall": 0.691, |
|
"eval_runtime": 1.5391, |
|
"eval_samples_per_second": 1299.447, |
|
"eval_steps_per_second": 40.933, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.043186187744141, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 9.008e-06, |
|
"loss": 0.7733, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_accuracy": 0.6805, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 25.83068084716797, |
|
"eval_f1": 0.6773653982197514, |
|
"eval_gpu_ram_allocated": 0.3996090888977051, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 28, |
|
"eval_loss": 1.0597690343856812, |
|
"eval_precision": 0.683800229871734, |
|
"eval_recall": 0.6805, |
|
"eval_runtime": 1.5511, |
|
"eval_samples_per_second": 1289.373, |
|
"eval_steps_per_second": 40.615, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.837909698486328, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 8.008e-06, |
|
"loss": 0.7431, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.702, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 25.830699920654297, |
|
"eval_f1": 0.6973584149072398, |
|
"eval_gpu_ram_allocated": 0.39962053298950195, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 31, |
|
"eval_loss": 1.0375711917877197, |
|
"eval_precision": 0.6975777506719404, |
|
"eval_recall": 0.702, |
|
"eval_runtime": 1.5383, |
|
"eval_samples_per_second": 1300.155, |
|
"eval_steps_per_second": 40.955, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8545989990234375, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 7.0080000000000005e-06, |
|
"loss": 0.7065, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_accuracy": 0.6995, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.203968048095703, |
|
"eval_f1": 0.6990180197515704, |
|
"eval_gpu_ram_allocated": 0.39959049224853516, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 28, |
|
"eval_loss": 1.0457407236099243, |
|
"eval_precision": 0.701399008937391, |
|
"eval_recall": 0.6995, |
|
"eval_runtime": 1.5796, |
|
"eval_samples_per_second": 1266.127, |
|
"eval_steps_per_second": 39.883, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.033943176269531, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 6.008000000000001e-06, |
|
"loss": 0.671, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.698, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.203968048095703, |
|
"eval_f1": 0.6956310421863513, |
|
"eval_gpu_ram_allocated": 0.39962339401245117, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 28, |
|
"eval_loss": 1.039590835571289, |
|
"eval_precision": 0.696566876815876, |
|
"eval_recall": 0.698, |
|
"eval_runtime": 1.5301, |
|
"eval_samples_per_second": 1307.136, |
|
"eval_steps_per_second": 41.175, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.038356781005859, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 5.008000000000001e-06, |
|
"loss": 0.6438, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_accuracy": 0.6925, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.203964233398438, |
|
"eval_f1": 0.6886960392924729, |
|
"eval_gpu_ram_allocated": 0.3995976448059082, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 28, |
|
"eval_loss": 1.0473765134811401, |
|
"eval_precision": 0.6906747949914751, |
|
"eval_recall": 0.6925, |
|
"eval_runtime": 1.5569, |
|
"eval_samples_per_second": 1284.611, |
|
"eval_steps_per_second": 40.465, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.827362060546875, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.008e-06, |
|
"loss": 0.6326, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.698, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.204063415527344, |
|
"eval_f1": 0.6972467602684949, |
|
"eval_gpu_ram_allocated": 0.39958906173706055, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 34, |
|
"eval_loss": 1.03837251663208, |
|
"eval_precision": 0.6982790583069545, |
|
"eval_recall": 0.698, |
|
"eval_runtime": 1.5106, |
|
"eval_samples_per_second": 1323.952, |
|
"eval_steps_per_second": 41.704, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8401947021484375, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 3.0080000000000003e-06, |
|
"loss": 0.6121, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"eval_accuracy": 0.698, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.20423126220703, |
|
"eval_f1": 0.6962705560782918, |
|
"eval_gpu_ram_allocated": 0.39959049224853516, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 29, |
|
"eval_loss": 1.0439949035644531, |
|
"eval_precision": 0.6975979381837488, |
|
"eval_recall": 0.698, |
|
"eval_runtime": 1.5616, |
|
"eval_samples_per_second": 1280.706, |
|
"eval_steps_per_second": 40.342, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.016208648681641, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.008e-06, |
|
"loss": 0.5911, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.701, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.204296112060547, |
|
"eval_f1": 0.6995247290600446, |
|
"eval_gpu_ram_allocated": 0.3996105194091797, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 28, |
|
"eval_loss": 1.0518174171447754, |
|
"eval_precision": 0.7006323341620143, |
|
"eval_recall": 0.701, |
|
"eval_runtime": 1.5659, |
|
"eval_samples_per_second": 1277.26, |
|
"eval_steps_per_second": 40.234, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.033794403076172, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 1.0080000000000001e-06, |
|
"loss": 0.592, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"eval_accuracy": 0.7035, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.20429229736328, |
|
"eval_f1": 0.7022917649865421, |
|
"eval_gpu_ram_allocated": 0.3996419906616211, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 27, |
|
"eval_loss": 1.0490039587020874, |
|
"eval_precision": 0.702476293906477, |
|
"eval_recall": 0.7035, |
|
"eval_runtime": 1.5585, |
|
"eval_samples_per_second": 1283.281, |
|
"eval_steps_per_second": 40.423, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8126220703125, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 8e-09, |
|
"loss": 0.5586, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.701, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.204303741455078, |
|
"eval_f1": 0.6984193396838297, |
|
"eval_gpu_ram_allocated": 0.3996291160583496, |
|
"eval_gpu_ram_cached": 12.9296875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 27, |
|
"eval_loss": 1.0510661602020264, |
|
"eval_precision": 0.700028711222529, |
|
"eval_recall": 0.701, |
|
"eval_runtime": 1.5162, |
|
"eval_samples_per_second": 1319.056, |
|
"eval_steps_per_second": 41.55, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.844818115234375, |
|
"step": 2500 |
|
} |
|
], |
|
"max_steps": 2500, |
|
"num_train_epochs": 10, |
|
"total_flos": 1262611619041920.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|