|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 12500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.99984e-05, |
|
"loss": 2.3075, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.90016e-05, |
|
"loss": 1.5316, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.69285, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 25.717952728271484, |
|
"eval_f1": 0.6823748842925246, |
|
"eval_gpu_ram_allocated": 0.39347171783447266, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 33, |
|
"eval_loss": 1.1301642656326294, |
|
"eval_precision": 0.6859286816875455, |
|
"eval_recall": 0.69285, |
|
"eval_runtime": 15.5551, |
|
"eval_samples_per_second": 1285.751, |
|
"eval_steps_per_second": 40.18, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.108898162841797, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.80016e-05, |
|
"loss": 1.0615, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.7049, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.091278076171875, |
|
"eval_f1": 0.7010538468880997, |
|
"eval_gpu_ram_allocated": 0.39355039596557617, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 33, |
|
"eval_loss": 1.002210259437561, |
|
"eval_precision": 0.7064795769852438, |
|
"eval_recall": 0.7049, |
|
"eval_runtime": 15.2502, |
|
"eval_samples_per_second": 1311.462, |
|
"eval_steps_per_second": 40.983, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.858478546142578, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.70016e-05, |
|
"loss": 0.9804, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.71915, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.464637756347656, |
|
"eval_f1": 0.7157918638422971, |
|
"eval_gpu_ram_allocated": 0.39350461959838867, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 33, |
|
"eval_loss": 0.9258390665054321, |
|
"eval_precision": 0.7200804939824763, |
|
"eval_recall": 0.71915, |
|
"eval_runtime": 15.2672, |
|
"eval_samples_per_second": 1310.0, |
|
"eval_steps_per_second": 40.937, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.863994598388672, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.6001600000000003e-05, |
|
"loss": 0.9244, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7286, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.464874267578125, |
|
"eval_f1": 0.721912767172515, |
|
"eval_gpu_ram_allocated": 0.39352893829345703, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 32, |
|
"eval_loss": 0.8794927000999451, |
|
"eval_precision": 0.7265669055087627, |
|
"eval_recall": 0.7286, |
|
"eval_runtime": 15.2593, |
|
"eval_samples_per_second": 1310.678, |
|
"eval_steps_per_second": 40.959, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.881519317626953, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.50016e-05, |
|
"loss": 0.8471, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.73045, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.465320587158203, |
|
"eval_f1": 0.7243049440325827, |
|
"eval_gpu_ram_allocated": 0.39346885681152344, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 31, |
|
"eval_loss": 0.8885928988456726, |
|
"eval_precision": 0.7280280015075138, |
|
"eval_recall": 0.73045, |
|
"eval_runtime": 15.2207, |
|
"eval_samples_per_second": 1313.996, |
|
"eval_steps_per_second": 41.062, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.031795501708984, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.4001600000000002e-05, |
|
"loss": 0.8294, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.7303, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.465579986572266, |
|
"eval_f1": 0.7285125149375191, |
|
"eval_gpu_ram_allocated": 0.39354896545410156, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 33, |
|
"eval_loss": 0.8648403286933899, |
|
"eval_precision": 0.7304270054757978, |
|
"eval_recall": 0.7303, |
|
"eval_runtime": 15.1251, |
|
"eval_samples_per_second": 1322.302, |
|
"eval_steps_per_second": 41.322, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.822803497314453, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.30032e-05, |
|
"loss": 0.8229, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.7347, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.4658203125, |
|
"eval_f1": 0.7306008054961884, |
|
"eval_gpu_ram_allocated": 0.3934760093688965, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 32, |
|
"eval_loss": 0.8477036952972412, |
|
"eval_precision": 0.7313531828325577, |
|
"eval_recall": 0.7347, |
|
"eval_runtime": 15.2385, |
|
"eval_samples_per_second": 1312.463, |
|
"eval_steps_per_second": 41.014, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.870433807373047, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.2003200000000002e-05, |
|
"loss": 0.8227, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7321, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.466053009033203, |
|
"eval_f1": 0.729989989450906, |
|
"eval_gpu_ram_allocated": 0.3935103416442871, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 34, |
|
"eval_loss": 0.8513504862785339, |
|
"eval_precision": 0.7342944597310201, |
|
"eval_recall": 0.7321, |
|
"eval_runtime": 15.2256, |
|
"eval_samples_per_second": 1313.581, |
|
"eval_steps_per_second": 41.049, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8656463623046875, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.10048e-05, |
|
"loss": 0.7515, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_accuracy": 0.73265, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.46636199951172, |
|
"eval_f1": 0.7286075365290565, |
|
"eval_gpu_ram_allocated": 0.3934974670410156, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 32, |
|
"eval_loss": 0.857990026473999, |
|
"eval_precision": 0.7324340392002969, |
|
"eval_recall": 0.73265, |
|
"eval_runtime": 15.6042, |
|
"eval_samples_per_second": 1281.705, |
|
"eval_steps_per_second": 40.053, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.057643890380859, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.0004800000000001e-05, |
|
"loss": 0.7523, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_accuracy": 0.734, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.46658706665039, |
|
"eval_f1": 0.7295923418650617, |
|
"eval_gpu_ram_allocated": 0.39351463317871094, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 32, |
|
"eval_loss": 0.8498074412345886, |
|
"eval_precision": 0.7313935171526833, |
|
"eval_recall": 0.734, |
|
"eval_runtime": 15.7189, |
|
"eval_samples_per_second": 1272.356, |
|
"eval_steps_per_second": 39.761, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8655662536621094, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 9.0048e-06, |
|
"loss": 0.7396, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_accuracy": 0.73645, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.466869354248047, |
|
"eval_f1": 0.7326318327243708, |
|
"eval_gpu_ram_allocated": 0.3934817314147949, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 33, |
|
"eval_loss": 0.8402908444404602, |
|
"eval_precision": 0.7323211491997443, |
|
"eval_recall": 0.73645, |
|
"eval_runtime": 15.1367, |
|
"eval_samples_per_second": 1321.295, |
|
"eval_steps_per_second": 41.29, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8685684204101562, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.0048e-06, |
|
"loss": 0.7308, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.73775, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.467105865478516, |
|
"eval_f1": 0.7347713515400808, |
|
"eval_gpu_ram_allocated": 0.3934803009033203, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 26, |
|
"eval_loss": 0.8414311408996582, |
|
"eval_precision": 0.7338961618089487, |
|
"eval_recall": 0.73775, |
|
"eval_runtime": 15.1121, |
|
"eval_samples_per_second": 1323.444, |
|
"eval_steps_per_second": 41.358, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8610610961914062, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 7.0048e-06, |
|
"loss": 0.6929, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_accuracy": 0.73505, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.468040466308594, |
|
"eval_f1": 0.7321519443896675, |
|
"eval_gpu_ram_allocated": 0.39360761642456055, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 29, |
|
"eval_loss": 0.855096697807312, |
|
"eval_precision": 0.7376195934146232, |
|
"eval_recall": 0.73505, |
|
"eval_runtime": 15.2462, |
|
"eval_samples_per_second": 1311.799, |
|
"eval_steps_per_second": 40.994, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.056495666503906, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 6.0048000000000005e-06, |
|
"loss": 0.6772, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.738, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.468387603759766, |
|
"eval_f1": 0.7334835535994888, |
|
"eval_gpu_ram_allocated": 0.3935232162475586, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 31, |
|
"eval_loss": 0.84714674949646, |
|
"eval_precision": 0.7326757628965238, |
|
"eval_recall": 0.738, |
|
"eval_runtime": 15.3571, |
|
"eval_samples_per_second": 1302.33, |
|
"eval_steps_per_second": 40.698, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.835094451904297, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 5.0064e-06, |
|
"loss": 0.682, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_accuracy": 0.735, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.468624114990234, |
|
"eval_f1": 0.731102054402859, |
|
"eval_gpu_ram_allocated": 0.3934817314147949, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 34, |
|
"eval_loss": 0.8460220694541931, |
|
"eval_precision": 0.7310632962275837, |
|
"eval_recall": 0.735, |
|
"eval_runtime": 15.2336, |
|
"eval_samples_per_second": 1312.891, |
|
"eval_steps_per_second": 41.028, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8781509399414062, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.0064e-06, |
|
"loss": 0.6741, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7376, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.46900177001953, |
|
"eval_f1": 0.7334789109935773, |
|
"eval_gpu_ram_allocated": 0.393521785736084, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 31, |
|
"eval_loss": 0.8409038782119751, |
|
"eval_precision": 0.7329890286601207, |
|
"eval_recall": 0.7376, |
|
"eval_runtime": 15.1968, |
|
"eval_samples_per_second": 1316.067, |
|
"eval_steps_per_second": 41.127, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8847808837890625, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 3.0064000000000006e-06, |
|
"loss": 0.6247, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_accuracy": 0.736, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.469379425048828, |
|
"eval_f1": 0.7332127747065252, |
|
"eval_gpu_ram_allocated": 0.39348888397216797, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 32, |
|
"eval_loss": 0.8499526381492615, |
|
"eval_precision": 0.7324176547179257, |
|
"eval_recall": 0.736, |
|
"eval_runtime": 15.3802, |
|
"eval_samples_per_second": 1300.37, |
|
"eval_steps_per_second": 40.637, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.0838470458984375, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.0064000000000004e-06, |
|
"loss": 0.6446, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_accuracy": 0.7358, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.469707489013672, |
|
"eval_f1": 0.7322672918855475, |
|
"eval_gpu_ram_allocated": 0.39359617233276367, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 31, |
|
"eval_loss": 0.8464106321334839, |
|
"eval_precision": 0.7319786149192336, |
|
"eval_recall": 0.7358, |
|
"eval_runtime": 15.2922, |
|
"eval_samples_per_second": 1307.853, |
|
"eval_steps_per_second": 40.87, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8687171936035156, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 1.0064e-06, |
|
"loss": 0.6355, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_accuracy": 0.73495, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.470008850097656, |
|
"eval_f1": 0.7311394674076037, |
|
"eval_gpu_ram_allocated": 0.39347314834594727, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 30, |
|
"eval_loss": 0.850346565246582, |
|
"eval_precision": 0.7308376671248696, |
|
"eval_recall": 0.73495, |
|
"eval_runtime": 15.2775, |
|
"eval_samples_per_second": 1309.111, |
|
"eval_steps_per_second": 40.91, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8853225708007812, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 6.400000000000001e-09, |
|
"loss": 0.6396, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.73565, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 26.47040557861328, |
|
"eval_f1": 0.732235722379434, |
|
"eval_gpu_ram_allocated": 0.3934931755065918, |
|
"eval_gpu_ram_cached": 16.05078125, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 33, |
|
"eval_loss": 0.8507665991783142, |
|
"eval_precision": 0.7317733808462167, |
|
"eval_recall": 0.73565, |
|
"eval_runtime": 15.2926, |
|
"eval_samples_per_second": 1307.826, |
|
"eval_steps_per_second": 40.87, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8995018005371094, |
|
"step": 12500 |
|
} |
|
], |
|
"max_steps": 12500, |
|
"num_train_epochs": 5, |
|
"total_flos": 6360760919761920.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|