diogopaes10's picture
Training in progress, step 500
693d611
raw
history blame
12.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 1.9968e-05,
"loss": 2.3062,
"step": 1
},
{
"epoch": 1.28,
"learning_rate": 1.8976000000000003e-05,
"loss": 2.3021,
"step": 32
},
{
"epoch": 1.28,
"eval_accuracy": 0.12,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.56061553955078,
"eval_f1": 0.05185185185185186,
"eval_gpu_ram_allocated": 0.3903946876525879,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 29,
"eval_loss": 2.2975096702575684,
"eval_precision": 0.11020408163265306,
"eval_recall": 0.12,
"eval_runtime": 0.2913,
"eval_samples_per_second": 686.621,
"eval_steps_per_second": 24.032,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8423919677734375,
"step": 32
},
{
"epoch": 2.56,
"learning_rate": 1.7952e-05,
"loss": 2.2615,
"step": 64
},
{
"epoch": 2.56,
"eval_accuracy": 0.31,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.56061553955078,
"eval_f1": 0.2339181338497344,
"eval_gpu_ram_allocated": 0.39037179946899414,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 30,
"eval_loss": 2.192563533782959,
"eval_precision": 0.4648550724637681,
"eval_recall": 0.31,
"eval_runtime": 0.2737,
"eval_samples_per_second": 730.737,
"eval_steps_per_second": 25.576,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8513946533203125,
"step": 64
},
{
"epoch": 3.84,
"learning_rate": 1.6928e-05,
"loss": 2.0677,
"step": 96
},
{
"epoch": 3.84,
"eval_accuracy": 0.51,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.56061553955078,
"eval_f1": 0.4301287248566463,
"eval_gpu_ram_allocated": 0.39051055908203125,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 22,
"eval_loss": 1.965756893157959,
"eval_precision": 0.3949837686822981,
"eval_recall": 0.51,
"eval_runtime": 0.3124,
"eval_samples_per_second": 640.226,
"eval_steps_per_second": 22.408,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8537025451660156,
"step": 96
},
{
"epoch": 5.12,
"learning_rate": 1.5904000000000002e-05,
"loss": 1.8562,
"step": 128
},
{
"epoch": 5.12,
"eval_accuracy": 0.545,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.560619354248047,
"eval_f1": 0.46549419445976403,
"eval_gpu_ram_allocated": 0.39040040969848633,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 41,
"eval_loss": 1.8382666110992432,
"eval_precision": 0.45874241633360424,
"eval_recall": 0.545,
"eval_runtime": 0.2734,
"eval_samples_per_second": 731.569,
"eval_steps_per_second": 25.605,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.857379913330078,
"step": 128
},
{
"epoch": 6.4,
"learning_rate": 1.4880000000000002e-05,
"loss": 1.6929,
"step": 160
},
{
"epoch": 6.4,
"eval_accuracy": 0.555,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.560688018798828,
"eval_f1": 0.4942300415011664,
"eval_gpu_ram_allocated": 0.39035606384277344,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 29,
"eval_loss": 1.7403417825698853,
"eval_precision": 0.5261452781574115,
"eval_recall": 0.555,
"eval_runtime": 0.2751,
"eval_samples_per_second": 727.05,
"eval_steps_per_second": 25.447,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.854930877685547,
"step": 160
},
{
"epoch": 7.68,
"learning_rate": 1.3856e-05,
"loss": 1.5569,
"step": 192
},
{
"epoch": 7.68,
"eval_accuracy": 0.585,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.560688018798828,
"eval_f1": 0.5466732171455438,
"eval_gpu_ram_allocated": 0.39036035537719727,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 37,
"eval_loss": 1.6662890911102295,
"eval_precision": 0.6495824550556009,
"eval_recall": 0.585,
"eval_runtime": 0.2754,
"eval_samples_per_second": 726.273,
"eval_steps_per_second": 25.42,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8549232482910156,
"step": 192
},
{
"epoch": 8.96,
"learning_rate": 1.2832e-05,
"loss": 1.4636,
"step": 224
},
{
"epoch": 8.96,
"eval_accuracy": 0.58,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.560688018798828,
"eval_f1": 0.5475206766586077,
"eval_gpu_ram_allocated": 0.39038610458374023,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 30,
"eval_loss": 1.612341284751892,
"eval_precision": 0.5538885280526147,
"eval_recall": 0.58,
"eval_runtime": 0.2831,
"eval_samples_per_second": 706.386,
"eval_steps_per_second": 24.724,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8539352416992188,
"step": 224
},
{
"epoch": 10.24,
"learning_rate": 1.1808000000000001e-05,
"loss": 1.3683,
"step": 256
},
{
"epoch": 10.24,
"eval_accuracy": 0.595,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.56069564819336,
"eval_f1": 0.5828575381901582,
"eval_gpu_ram_allocated": 0.39037179946899414,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 41,
"eval_loss": 1.5615015029907227,
"eval_precision": 0.6016443571048834,
"eval_recall": 0.595,
"eval_runtime": 0.2727,
"eval_samples_per_second": 733.478,
"eval_steps_per_second": 25.672,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8527297973632812,
"step": 256
},
{
"epoch": 11.52,
"learning_rate": 1.0784e-05,
"loss": 1.2649,
"step": 288
},
{
"epoch": 11.52,
"eval_accuracy": 0.61,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.560710906982422,
"eval_f1": 0.590376982026963,
"eval_gpu_ram_allocated": 0.3903775215148926,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 30,
"eval_loss": 1.526059627532959,
"eval_precision": 0.6242595289277212,
"eval_recall": 0.61,
"eval_runtime": 0.2757,
"eval_samples_per_second": 725.456,
"eval_steps_per_second": 25.391,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.864604949951172,
"step": 288
},
{
"epoch": 12.8,
"learning_rate": 9.760000000000001e-06,
"loss": 1.1968,
"step": 320
},
{
"epoch": 12.8,
"eval_accuracy": 0.615,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.560710906982422,
"eval_f1": 0.6011782661782662,
"eval_gpu_ram_allocated": 0.3903789520263672,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 45,
"eval_loss": 1.4975976943969727,
"eval_precision": 0.6069767600820232,
"eval_recall": 0.615,
"eval_runtime": 0.2757,
"eval_samples_per_second": 725.338,
"eval_steps_per_second": 25.387,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.876636505126953,
"step": 320
},
{
"epoch": 14.08,
"learning_rate": 8.736e-06,
"loss": 1.1291,
"step": 352
},
{
"epoch": 14.08,
"eval_accuracy": 0.615,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.560710906982422,
"eval_f1": 0.598349347882615,
"eval_gpu_ram_allocated": 0.39051055908203125,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 47,
"eval_loss": 1.4755542278289795,
"eval_precision": 0.6163792581687317,
"eval_recall": 0.615,
"eval_runtime": 0.2792,
"eval_samples_per_second": 716.432,
"eval_steps_per_second": 25.075,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.874866485595703,
"step": 352
},
{
"epoch": 15.36,
"learning_rate": 7.712e-06,
"loss": 1.0673,
"step": 384
},
{
"epoch": 15.36,
"eval_accuracy": 0.62,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.560714721679688,
"eval_f1": 0.6063605762373413,
"eval_gpu_ram_allocated": 0.39066219329833984,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 35,
"eval_loss": 1.4659814834594727,
"eval_precision": 0.6257950937950938,
"eval_recall": 0.62,
"eval_runtime": 0.2762,
"eval_samples_per_second": 724.01,
"eval_steps_per_second": 25.34,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.875171661376953,
"step": 384
},
{
"epoch": 16.64,
"learning_rate": 6.688e-06,
"loss": 0.9884,
"step": 416
},
{
"epoch": 16.64,
"eval_accuracy": 0.625,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.560771942138672,
"eval_f1": 0.6134740161239549,
"eval_gpu_ram_allocated": 0.3903632164001465,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 33,
"eval_loss": 1.4409570693969727,
"eval_precision": 0.6203985870473285,
"eval_recall": 0.625,
"eval_runtime": 0.2727,
"eval_samples_per_second": 733.533,
"eval_steps_per_second": 25.674,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8757286071777344,
"step": 416
},
{
"epoch": 17.92,
"learning_rate": 5.664e-06,
"loss": 0.9743,
"step": 448
},
{
"epoch": 17.92,
"eval_accuracy": 0.635,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.560775756835938,
"eval_f1": 0.6233316744944652,
"eval_gpu_ram_allocated": 0.3904705047607422,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 44,
"eval_loss": 1.4327807426452637,
"eval_precision": 0.6343307918132929,
"eval_recall": 0.635,
"eval_runtime": 0.2727,
"eval_samples_per_second": 733.287,
"eval_steps_per_second": 25.665,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.874713897705078,
"step": 448
},
{
"epoch": 19.2,
"learning_rate": 4.6400000000000005e-06,
"loss": 0.926,
"step": 480
},
{
"epoch": 19.2,
"eval_accuracy": 0.615,
"eval_disk_space_total": 78.1898422241211,
"eval_disk_space_used": 24.560775756835938,
"eval_f1": 0.6088320577269858,
"eval_gpu_ram_allocated": 0.39035749435424805,
"eval_gpu_ram_cached": 12.833984375,
"eval_gpu_ram_total": 39.56402587890625,
"eval_gpu_utilization": 31,
"eval_loss": 1.4344311952590942,
"eval_precision": 0.6237836346659876,
"eval_recall": 0.615,
"eval_runtime": 0.2758,
"eval_samples_per_second": 725.226,
"eval_steps_per_second": 25.383,
"eval_system_ram_total": 83.48074722290039,
"eval_system_ram_used": 3.8742332458496094,
"step": 480
}
],
"max_steps": 625,
"num_train_epochs": 25,
"total_flos": 287732237726976.0,
"trial_name": null,
"trial_params": null
}