{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 12500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.99984e-05, "loss": 2.3075, "step": 1 }, { "epoch": 0.25, "learning_rate": 1.90016e-05, "loss": 1.5316, "step": 625 }, { "epoch": 0.25, "eval_accuracy": 0.69285, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.717952728271484, "eval_f1": 0.6823748842925246, "eval_gpu_ram_allocated": 0.39347171783447266, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 33, "eval_loss": 1.1301642656326294, "eval_precision": 0.6859286816875455, "eval_recall": 0.69285, "eval_runtime": 15.5551, "eval_samples_per_second": 1285.751, "eval_steps_per_second": 40.18, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.108898162841797, "step": 625 }, { "epoch": 0.5, "learning_rate": 1.80016e-05, "loss": 1.0615, "step": 1250 }, { "epoch": 0.5, "eval_accuracy": 0.7049, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.091278076171875, "eval_f1": 0.7010538468880997, "eval_gpu_ram_allocated": 0.39355039596557617, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 33, "eval_loss": 1.002210259437561, "eval_precision": 0.7064795769852438, "eval_recall": 0.7049, "eval_runtime": 15.2502, "eval_samples_per_second": 1311.462, "eval_steps_per_second": 40.983, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.858478546142578, "step": 1250 }, { "epoch": 0.75, "learning_rate": 1.70016e-05, "loss": 0.9804, "step": 1875 }, { "epoch": 0.75, "eval_accuracy": 0.71915, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.464637756347656, "eval_f1": 0.7157918638422971, "eval_gpu_ram_allocated": 0.39350461959838867, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 33, "eval_loss": 0.9258390665054321, "eval_precision": 0.7200804939824763, "eval_recall": 0.71915, "eval_runtime": 15.2672, "eval_samples_per_second": 1310.0, "eval_steps_per_second": 40.937, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.863994598388672, "step": 1875 }, { "epoch": 1.0, "learning_rate": 1.6001600000000003e-05, "loss": 0.9244, "step": 2500 }, { "epoch": 1.0, "eval_accuracy": 0.7286, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.464874267578125, "eval_f1": 0.721912767172515, "eval_gpu_ram_allocated": 0.39352893829345703, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 32, "eval_loss": 0.8794927000999451, "eval_precision": 0.7265669055087627, "eval_recall": 0.7286, "eval_runtime": 15.2593, "eval_samples_per_second": 1310.678, "eval_steps_per_second": 40.959, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.881519317626953, "step": 2500 }, { "epoch": 1.25, "learning_rate": 1.50016e-05, "loss": 0.8471, "step": 3125 }, { "epoch": 1.25, "eval_accuracy": 0.73045, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.465320587158203, "eval_f1": 0.7243049440325827, "eval_gpu_ram_allocated": 0.39346885681152344, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 31, "eval_loss": 0.8885928988456726, "eval_precision": 0.7280280015075138, "eval_recall": 0.73045, "eval_runtime": 15.2207, "eval_samples_per_second": 1313.996, "eval_steps_per_second": 41.062, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.031795501708984, "step": 3125 }, { "epoch": 1.5, "learning_rate": 1.4001600000000002e-05, "loss": 0.8294, "step": 3750 }, { "epoch": 1.5, "eval_accuracy": 0.7303, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.465579986572266, "eval_f1": 0.7285125149375191, "eval_gpu_ram_allocated": 0.39354896545410156, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 33, "eval_loss": 0.8648403286933899, "eval_precision": 0.7304270054757978, "eval_recall": 0.7303, "eval_runtime": 15.1251, "eval_samples_per_second": 1322.302, "eval_steps_per_second": 41.322, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.822803497314453, "step": 3750 }, { "epoch": 1.75, "learning_rate": 1.30032e-05, "loss": 0.8229, "step": 4375 }, { "epoch": 1.75, "eval_accuracy": 0.7347, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.4658203125, "eval_f1": 0.7306008054961884, "eval_gpu_ram_allocated": 0.3934760093688965, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 32, "eval_loss": 0.8477036952972412, "eval_precision": 0.7313531828325577, "eval_recall": 0.7347, "eval_runtime": 15.2385, "eval_samples_per_second": 1312.463, "eval_steps_per_second": 41.014, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.870433807373047, "step": 4375 }, { "epoch": 2.0, "learning_rate": 1.2003200000000002e-05, "loss": 0.8227, "step": 5000 }, { "epoch": 2.0, "eval_accuracy": 0.7321, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.466053009033203, "eval_f1": 0.729989989450906, "eval_gpu_ram_allocated": 0.3935103416442871, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 34, "eval_loss": 0.8513504862785339, "eval_precision": 0.7342944597310201, "eval_recall": 0.7321, "eval_runtime": 15.2256, "eval_samples_per_second": 1313.581, "eval_steps_per_second": 41.049, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8656463623046875, "step": 5000 }, { "epoch": 2.25, "learning_rate": 1.10048e-05, "loss": 0.7515, "step": 5625 }, { "epoch": 2.25, "eval_accuracy": 0.73265, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.46636199951172, "eval_f1": 0.7286075365290565, "eval_gpu_ram_allocated": 0.3934974670410156, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 32, "eval_loss": 0.857990026473999, "eval_precision": 0.7324340392002969, "eval_recall": 0.73265, "eval_runtime": 15.6042, "eval_samples_per_second": 1281.705, "eval_steps_per_second": 40.053, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.057643890380859, "step": 5625 }, { "epoch": 2.5, "learning_rate": 1.0004800000000001e-05, "loss": 0.7523, "step": 6250 }, { "epoch": 2.5, "eval_accuracy": 0.734, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.46658706665039, "eval_f1": 0.7295923418650617, "eval_gpu_ram_allocated": 0.39351463317871094, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 32, "eval_loss": 0.8498074412345886, "eval_precision": 0.7313935171526833, "eval_recall": 0.734, "eval_runtime": 15.7189, "eval_samples_per_second": 1272.356, "eval_steps_per_second": 39.761, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8655662536621094, "step": 6250 }, { "epoch": 2.75, "learning_rate": 9.0048e-06, "loss": 0.7396, "step": 6875 }, { "epoch": 2.75, "eval_accuracy": 0.73645, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.466869354248047, "eval_f1": 0.7326318327243708, "eval_gpu_ram_allocated": 0.3934817314147949, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 33, "eval_loss": 0.8402908444404602, "eval_precision": 0.7323211491997443, "eval_recall": 0.73645, "eval_runtime": 15.1367, "eval_samples_per_second": 1321.295, "eval_steps_per_second": 41.29, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8685684204101562, "step": 6875 }, { "epoch": 3.0, "learning_rate": 8.0048e-06, "loss": 0.7308, "step": 7500 }, { "epoch": 3.0, "eval_accuracy": 0.73775, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.467105865478516, "eval_f1": 0.7347713515400808, "eval_gpu_ram_allocated": 0.3934803009033203, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 26, "eval_loss": 0.8414311408996582, "eval_precision": 0.7338961618089487, "eval_recall": 0.73775, "eval_runtime": 15.1121, "eval_samples_per_second": 1323.444, "eval_steps_per_second": 41.358, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8610610961914062, "step": 7500 }, { "epoch": 3.25, "learning_rate": 7.0048e-06, "loss": 0.6929, "step": 8125 }, { "epoch": 3.25, "eval_accuracy": 0.73505, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.468040466308594, "eval_f1": 0.7321519443896675, "eval_gpu_ram_allocated": 0.39360761642456055, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 29, "eval_loss": 0.855096697807312, "eval_precision": 0.7376195934146232, "eval_recall": 0.73505, "eval_runtime": 15.2462, "eval_samples_per_second": 1311.799, "eval_steps_per_second": 40.994, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.056495666503906, "step": 8125 }, { "epoch": 3.5, "learning_rate": 6.0048000000000005e-06, "loss": 0.6772, "step": 8750 }, { "epoch": 3.5, "eval_accuracy": 0.738, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.468387603759766, "eval_f1": 0.7334835535994888, "eval_gpu_ram_allocated": 0.3935232162475586, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 31, "eval_loss": 0.84714674949646, "eval_precision": 0.7326757628965238, "eval_recall": 0.738, "eval_runtime": 15.3571, "eval_samples_per_second": 1302.33, "eval_steps_per_second": 40.698, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.835094451904297, "step": 8750 }, { "epoch": 3.75, "learning_rate": 5.0064e-06, "loss": 0.682, "step": 9375 }, { "epoch": 3.75, "eval_accuracy": 0.735, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.468624114990234, "eval_f1": 0.731102054402859, "eval_gpu_ram_allocated": 0.3934817314147949, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 34, "eval_loss": 0.8460220694541931, "eval_precision": 0.7310632962275837, "eval_recall": 0.735, "eval_runtime": 15.2336, "eval_samples_per_second": 1312.891, "eval_steps_per_second": 41.028, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8781509399414062, "step": 9375 }, { "epoch": 4.0, "learning_rate": 4.0064e-06, "loss": 0.6741, "step": 10000 }, { "epoch": 4.0, "eval_accuracy": 0.7376, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.46900177001953, "eval_f1": 0.7334789109935773, "eval_gpu_ram_allocated": 0.393521785736084, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 31, "eval_loss": 0.8409038782119751, "eval_precision": 0.7329890286601207, "eval_recall": 0.7376, "eval_runtime": 15.1968, "eval_samples_per_second": 1316.067, "eval_steps_per_second": 41.127, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8847808837890625, "step": 10000 }, { "epoch": 4.25, "learning_rate": 3.0064000000000006e-06, "loss": 0.6247, "step": 10625 }, { "epoch": 4.25, "eval_accuracy": 0.736, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.469379425048828, "eval_f1": 0.7332127747065252, "eval_gpu_ram_allocated": 0.39348888397216797, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 32, "eval_loss": 0.8499526381492615, "eval_precision": 0.7324176547179257, "eval_recall": 0.736, "eval_runtime": 15.3802, "eval_samples_per_second": 1300.37, "eval_steps_per_second": 40.637, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.0838470458984375, "step": 10625 }, { "epoch": 4.5, "learning_rate": 2.0064000000000004e-06, "loss": 0.6446, "step": 11250 }, { "epoch": 4.5, "eval_accuracy": 0.7358, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.469707489013672, "eval_f1": 0.7322672918855475, "eval_gpu_ram_allocated": 0.39359617233276367, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 31, "eval_loss": 0.8464106321334839, "eval_precision": 0.7319786149192336, "eval_recall": 0.7358, "eval_runtime": 15.2922, "eval_samples_per_second": 1307.853, "eval_steps_per_second": 40.87, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8687171936035156, "step": 11250 }, { "epoch": 4.75, "learning_rate": 1.0064e-06, "loss": 0.6355, "step": 11875 }, { "epoch": 4.75, "eval_accuracy": 0.73495, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.470008850097656, "eval_f1": 0.7311394674076037, "eval_gpu_ram_allocated": 0.39347314834594727, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 30, "eval_loss": 0.850346565246582, "eval_precision": 0.7308376671248696, "eval_recall": 0.73495, "eval_runtime": 15.2775, "eval_samples_per_second": 1309.111, "eval_steps_per_second": 40.91, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8853225708007812, "step": 11875 }, { "epoch": 5.0, "learning_rate": 6.400000000000001e-09, "loss": 0.6396, "step": 12500 }, { "epoch": 5.0, "eval_accuracy": 0.73565, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 26.47040557861328, "eval_f1": 0.732235722379434, "eval_gpu_ram_allocated": 0.3934931755065918, "eval_gpu_ram_cached": 16.05078125, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 33, "eval_loss": 0.8507665991783142, "eval_precision": 0.7317733808462167, "eval_recall": 0.73565, "eval_runtime": 15.2926, "eval_samples_per_second": 1307.826, "eval_steps_per_second": 40.87, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8995018005371094, "step": 12500 } ], "max_steps": 12500, "num_train_epochs": 5, "total_flos": 6360760919761920.0, "trial_name": null, "trial_params": null }