diogopaes10's picture
Training in progress, step 12500
cd6047c
raw
history blame
17.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 12500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.99984e-05,
"loss": 2.3075,
"step": 1
},
{
"epoch": 0.25,
"learning_rate": 1.90016e-05,
"loss": 1.5316,
"step": 625
},
{
"epoch": 0.25,
"eval_accuracy": 0.69285,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 25.717952728271484,
"eval_f1": 0.6823748842925246,
"eval_gpu_ram_allocated": 0.39347171783447266,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 33,
"eval_loss": 1.1301642656326294,
"eval_precision": 0.6859286816875455,
"eval_recall": 0.69285,
"eval_runtime": 15.5551,
"eval_samples_per_second": 1285.751,
"eval_steps_per_second": 40.18,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.108898162841797,
"step": 625
},
{
"epoch": 0.5,
"learning_rate": 1.80016e-05,
"loss": 1.0615,
"step": 1250
},
{
"epoch": 0.5,
"eval_accuracy": 0.7049,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.091278076171875,
"eval_f1": 0.7010538468880997,
"eval_gpu_ram_allocated": 0.39355039596557617,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 33,
"eval_loss": 1.002210259437561,
"eval_precision": 0.7064795769852438,
"eval_recall": 0.7049,
"eval_runtime": 15.2502,
"eval_samples_per_second": 1311.462,
"eval_steps_per_second": 40.983,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.858478546142578,
"step": 1250
},
{
"epoch": 0.75,
"learning_rate": 1.70016e-05,
"loss": 0.9804,
"step": 1875
},
{
"epoch": 0.75,
"eval_accuracy": 0.71915,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.464637756347656,
"eval_f1": 0.7157918638422971,
"eval_gpu_ram_allocated": 0.39350461959838867,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 33,
"eval_loss": 0.9258390665054321,
"eval_precision": 0.7200804939824763,
"eval_recall": 0.71915,
"eval_runtime": 15.2672,
"eval_samples_per_second": 1310.0,
"eval_steps_per_second": 40.937,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.863994598388672,
"step": 1875
},
{
"epoch": 1.0,
"learning_rate": 1.6001600000000003e-05,
"loss": 0.9244,
"step": 2500
},
{
"epoch": 1.0,
"eval_accuracy": 0.7286,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.464874267578125,
"eval_f1": 0.721912767172515,
"eval_gpu_ram_allocated": 0.39352893829345703,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 32,
"eval_loss": 0.8794927000999451,
"eval_precision": 0.7265669055087627,
"eval_recall": 0.7286,
"eval_runtime": 15.2593,
"eval_samples_per_second": 1310.678,
"eval_steps_per_second": 40.959,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.881519317626953,
"step": 2500
},
{
"epoch": 1.25,
"learning_rate": 1.50016e-05,
"loss": 0.8471,
"step": 3125
},
{
"epoch": 1.25,
"eval_accuracy": 0.73045,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.465320587158203,
"eval_f1": 0.7243049440325827,
"eval_gpu_ram_allocated": 0.39346885681152344,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 31,
"eval_loss": 0.8885928988456726,
"eval_precision": 0.7280280015075138,
"eval_recall": 0.73045,
"eval_runtime": 15.2207,
"eval_samples_per_second": 1313.996,
"eval_steps_per_second": 41.062,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.031795501708984,
"step": 3125
},
{
"epoch": 1.5,
"learning_rate": 1.4001600000000002e-05,
"loss": 0.8294,
"step": 3750
},
{
"epoch": 1.5,
"eval_accuracy": 0.7303,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.465579986572266,
"eval_f1": 0.7285125149375191,
"eval_gpu_ram_allocated": 0.39354896545410156,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 33,
"eval_loss": 0.8648403286933899,
"eval_precision": 0.7304270054757978,
"eval_recall": 0.7303,
"eval_runtime": 15.1251,
"eval_samples_per_second": 1322.302,
"eval_steps_per_second": 41.322,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.822803497314453,
"step": 3750
},
{
"epoch": 1.75,
"learning_rate": 1.30032e-05,
"loss": 0.8229,
"step": 4375
},
{
"epoch": 1.75,
"eval_accuracy": 0.7347,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.4658203125,
"eval_f1": 0.7306008054961884,
"eval_gpu_ram_allocated": 0.3934760093688965,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 32,
"eval_loss": 0.8477036952972412,
"eval_precision": 0.7313531828325577,
"eval_recall": 0.7347,
"eval_runtime": 15.2385,
"eval_samples_per_second": 1312.463,
"eval_steps_per_second": 41.014,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.870433807373047,
"step": 4375
},
{
"epoch": 2.0,
"learning_rate": 1.2003200000000002e-05,
"loss": 0.8227,
"step": 5000
},
{
"epoch": 2.0,
"eval_accuracy": 0.7321,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.466053009033203,
"eval_f1": 0.729989989450906,
"eval_gpu_ram_allocated": 0.3935103416442871,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 34,
"eval_loss": 0.8513504862785339,
"eval_precision": 0.7342944597310201,
"eval_recall": 0.7321,
"eval_runtime": 15.2256,
"eval_samples_per_second": 1313.581,
"eval_steps_per_second": 41.049,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8656463623046875,
"step": 5000
},
{
"epoch": 2.25,
"learning_rate": 1.10048e-05,
"loss": 0.7515,
"step": 5625
},
{
"epoch": 2.25,
"eval_accuracy": 0.73265,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.46636199951172,
"eval_f1": 0.7286075365290565,
"eval_gpu_ram_allocated": 0.3934974670410156,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 32,
"eval_loss": 0.857990026473999,
"eval_precision": 0.7324340392002969,
"eval_recall": 0.73265,
"eval_runtime": 15.6042,
"eval_samples_per_second": 1281.705,
"eval_steps_per_second": 40.053,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.057643890380859,
"step": 5625
},
{
"epoch": 2.5,
"learning_rate": 1.0004800000000001e-05,
"loss": 0.7523,
"step": 6250
},
{
"epoch": 2.5,
"eval_accuracy": 0.734,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.46658706665039,
"eval_f1": 0.7295923418650617,
"eval_gpu_ram_allocated": 0.39351463317871094,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 32,
"eval_loss": 0.8498074412345886,
"eval_precision": 0.7313935171526833,
"eval_recall": 0.734,
"eval_runtime": 15.7189,
"eval_samples_per_second": 1272.356,
"eval_steps_per_second": 39.761,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8655662536621094,
"step": 6250
},
{
"epoch": 2.75,
"learning_rate": 9.0048e-06,
"loss": 0.7396,
"step": 6875
},
{
"epoch": 2.75,
"eval_accuracy": 0.73645,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.466869354248047,
"eval_f1": 0.7326318327243708,
"eval_gpu_ram_allocated": 0.3934817314147949,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 33,
"eval_loss": 0.8402908444404602,
"eval_precision": 0.7323211491997443,
"eval_recall": 0.73645,
"eval_runtime": 15.1367,
"eval_samples_per_second": 1321.295,
"eval_steps_per_second": 41.29,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8685684204101562,
"step": 6875
},
{
"epoch": 3.0,
"learning_rate": 8.0048e-06,
"loss": 0.7308,
"step": 7500
},
{
"epoch": 3.0,
"eval_accuracy": 0.73775,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.467105865478516,
"eval_f1": 0.7347713515400808,
"eval_gpu_ram_allocated": 0.3934803009033203,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 26,
"eval_loss": 0.8414311408996582,
"eval_precision": 0.7338961618089487,
"eval_recall": 0.73775,
"eval_runtime": 15.1121,
"eval_samples_per_second": 1323.444,
"eval_steps_per_second": 41.358,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8610610961914062,
"step": 7500
},
{
"epoch": 3.25,
"learning_rate": 7.0048e-06,
"loss": 0.6929,
"step": 8125
},
{
"epoch": 3.25,
"eval_accuracy": 0.73505,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.468040466308594,
"eval_f1": 0.7321519443896675,
"eval_gpu_ram_allocated": 0.39360761642456055,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 29,
"eval_loss": 0.855096697807312,
"eval_precision": 0.7376195934146232,
"eval_recall": 0.73505,
"eval_runtime": 15.2462,
"eval_samples_per_second": 1311.799,
"eval_steps_per_second": 40.994,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.056495666503906,
"step": 8125
},
{
"epoch": 3.5,
"learning_rate": 6.0048000000000005e-06,
"loss": 0.6772,
"step": 8750
},
{
"epoch": 3.5,
"eval_accuracy": 0.738,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.468387603759766,
"eval_f1": 0.7334835535994888,
"eval_gpu_ram_allocated": 0.3935232162475586,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 31,
"eval_loss": 0.84714674949646,
"eval_precision": 0.7326757628965238,
"eval_recall": 0.738,
"eval_runtime": 15.3571,
"eval_samples_per_second": 1302.33,
"eval_steps_per_second": 40.698,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.835094451904297,
"step": 8750
},
{
"epoch": 3.75,
"learning_rate": 5.0064e-06,
"loss": 0.682,
"step": 9375
},
{
"epoch": 3.75,
"eval_accuracy": 0.735,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.468624114990234,
"eval_f1": 0.731102054402859,
"eval_gpu_ram_allocated": 0.3934817314147949,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 34,
"eval_loss": 0.8460220694541931,
"eval_precision": 0.7310632962275837,
"eval_recall": 0.735,
"eval_runtime": 15.2336,
"eval_samples_per_second": 1312.891,
"eval_steps_per_second": 41.028,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8781509399414062,
"step": 9375
},
{
"epoch": 4.0,
"learning_rate": 4.0064e-06,
"loss": 0.6741,
"step": 10000
},
{
"epoch": 4.0,
"eval_accuracy": 0.7376,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.46900177001953,
"eval_f1": 0.7334789109935773,
"eval_gpu_ram_allocated": 0.393521785736084,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 31,
"eval_loss": 0.8409038782119751,
"eval_precision": 0.7329890286601207,
"eval_recall": 0.7376,
"eval_runtime": 15.1968,
"eval_samples_per_second": 1316.067,
"eval_steps_per_second": 41.127,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8847808837890625,
"step": 10000
},
{
"epoch": 4.25,
"learning_rate": 3.0064000000000006e-06,
"loss": 0.6247,
"step": 10625
},
{
"epoch": 4.25,
"eval_accuracy": 0.736,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.469379425048828,
"eval_f1": 0.7332127747065252,
"eval_gpu_ram_allocated": 0.39348888397216797,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 32,
"eval_loss": 0.8499526381492615,
"eval_precision": 0.7324176547179257,
"eval_recall": 0.736,
"eval_runtime": 15.3802,
"eval_samples_per_second": 1300.37,
"eval_steps_per_second": 40.637,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 4.0838470458984375,
"step": 10625
},
{
"epoch": 4.5,
"learning_rate": 2.0064000000000004e-06,
"loss": 0.6446,
"step": 11250
},
{
"epoch": 4.5,
"eval_accuracy": 0.7358,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.469707489013672,
"eval_f1": 0.7322672918855475,
"eval_gpu_ram_allocated": 0.39359617233276367,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 31,
"eval_loss": 0.8464106321334839,
"eval_precision": 0.7319786149192336,
"eval_recall": 0.7358,
"eval_runtime": 15.2922,
"eval_samples_per_second": 1307.853,
"eval_steps_per_second": 40.87,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8687171936035156,
"step": 11250
},
{
"epoch": 4.75,
"learning_rate": 1.0064e-06,
"loss": 0.6355,
"step": 11875
},
{
"epoch": 4.75,
"eval_accuracy": 0.73495,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.470008850097656,
"eval_f1": 0.7311394674076037,
"eval_gpu_ram_allocated": 0.39347314834594727,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 30,
"eval_loss": 0.850346565246582,
"eval_precision": 0.7308376671248696,
"eval_recall": 0.73495,
"eval_runtime": 15.2775,
"eval_samples_per_second": 1309.111,
"eval_steps_per_second": 40.91,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8853225708007812,
"step": 11875
},
{
"epoch": 5.0,
"learning_rate": 6.400000000000001e-09,
"loss": 0.6396,
"step": 12500
},
{
"epoch": 5.0,
"eval_accuracy": 0.73565,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 26.47040557861328,
"eval_f1": 0.732235722379434,
"eval_gpu_ram_allocated": 0.3934931755065918,
"eval_gpu_ram_cached": 16.05078125,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 33,
"eval_loss": 0.8507665991783142,
"eval_precision": 0.7317733808462167,
"eval_recall": 0.73565,
"eval_runtime": 15.2926,
"eval_samples_per_second": 1307.826,
"eval_steps_per_second": 40.87,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8995018005371094,
"step": 12500
}
],
"max_steps": 12500,
"num_train_epochs": 5,
"total_flos": 6360760919761920.0,
"trial_name": null,
"trial_params": null
}