|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9968e-05, |
|
"loss": 2.3062, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.8976000000000003e-05, |
|
"loss": 2.3021, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_accuracy": 0.12, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.56061553955078, |
|
"eval_f1": 0.05185185185185186, |
|
"eval_gpu_ram_allocated": 0.3903946876525879, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 29, |
|
"eval_loss": 2.2975096702575684, |
|
"eval_precision": 0.11020408163265306, |
|
"eval_recall": 0.12, |
|
"eval_runtime": 0.2913, |
|
"eval_samples_per_second": 686.621, |
|
"eval_steps_per_second": 24.032, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8423919677734375, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.7952e-05, |
|
"loss": 2.2615, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_accuracy": 0.31, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.56061553955078, |
|
"eval_f1": 0.2339181338497344, |
|
"eval_gpu_ram_allocated": 0.39037179946899414, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 30, |
|
"eval_loss": 2.192563533782959, |
|
"eval_precision": 0.4648550724637681, |
|
"eval_recall": 0.31, |
|
"eval_runtime": 0.2737, |
|
"eval_samples_per_second": 730.737, |
|
"eval_steps_per_second": 25.576, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8513946533203125, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.6928e-05, |
|
"loss": 2.0677, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_accuracy": 0.51, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.56061553955078, |
|
"eval_f1": 0.4301287248566463, |
|
"eval_gpu_ram_allocated": 0.39051055908203125, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 22, |
|
"eval_loss": 1.965756893157959, |
|
"eval_precision": 0.3949837686822981, |
|
"eval_recall": 0.51, |
|
"eval_runtime": 0.3124, |
|
"eval_samples_per_second": 640.226, |
|
"eval_steps_per_second": 22.408, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8537025451660156, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 1.5904000000000002e-05, |
|
"loss": 1.8562, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_accuracy": 0.545, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.560619354248047, |
|
"eval_f1": 0.46549419445976403, |
|
"eval_gpu_ram_allocated": 0.39040040969848633, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 41, |
|
"eval_loss": 1.8382666110992432, |
|
"eval_precision": 0.45874241633360424, |
|
"eval_recall": 0.545, |
|
"eval_runtime": 0.2734, |
|
"eval_samples_per_second": 731.569, |
|
"eval_steps_per_second": 25.605, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.857379913330078, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.4880000000000002e-05, |
|
"loss": 1.6929, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_accuracy": 0.555, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.560688018798828, |
|
"eval_f1": 0.4942300415011664, |
|
"eval_gpu_ram_allocated": 0.39035606384277344, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 29, |
|
"eval_loss": 1.7403417825698853, |
|
"eval_precision": 0.5261452781574115, |
|
"eval_recall": 0.555, |
|
"eval_runtime": 0.2751, |
|
"eval_samples_per_second": 727.05, |
|
"eval_steps_per_second": 25.447, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.854930877685547, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 1.3856e-05, |
|
"loss": 1.5569, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_accuracy": 0.585, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.560688018798828, |
|
"eval_f1": 0.5466732171455438, |
|
"eval_gpu_ram_allocated": 0.39036035537719727, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 37, |
|
"eval_loss": 1.6662890911102295, |
|
"eval_precision": 0.6495824550556009, |
|
"eval_recall": 0.585, |
|
"eval_runtime": 0.2754, |
|
"eval_samples_per_second": 726.273, |
|
"eval_steps_per_second": 25.42, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8549232482910156, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 1.2832e-05, |
|
"loss": 1.4636, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_accuracy": 0.58, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.560688018798828, |
|
"eval_f1": 0.5475206766586077, |
|
"eval_gpu_ram_allocated": 0.39038610458374023, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 30, |
|
"eval_loss": 1.612341284751892, |
|
"eval_precision": 0.5538885280526147, |
|
"eval_recall": 0.58, |
|
"eval_runtime": 0.2831, |
|
"eval_samples_per_second": 706.386, |
|
"eval_steps_per_second": 24.724, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8539352416992188, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 1.1808000000000001e-05, |
|
"loss": 1.3683, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"eval_accuracy": 0.595, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.56069564819336, |
|
"eval_f1": 0.5828575381901582, |
|
"eval_gpu_ram_allocated": 0.39037179946899414, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 41, |
|
"eval_loss": 1.5615015029907227, |
|
"eval_precision": 0.6016443571048834, |
|
"eval_recall": 0.595, |
|
"eval_runtime": 0.2727, |
|
"eval_samples_per_second": 733.478, |
|
"eval_steps_per_second": 25.672, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8527297973632812, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 1.0784e-05, |
|
"loss": 1.2649, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"eval_accuracy": 0.61, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.560710906982422, |
|
"eval_f1": 0.590376982026963, |
|
"eval_gpu_ram_allocated": 0.3903775215148926, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 30, |
|
"eval_loss": 1.526059627532959, |
|
"eval_precision": 0.6242595289277212, |
|
"eval_recall": 0.61, |
|
"eval_runtime": 0.2757, |
|
"eval_samples_per_second": 725.456, |
|
"eval_steps_per_second": 25.391, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.864604949951172, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 9.760000000000001e-06, |
|
"loss": 1.1968, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_accuracy": 0.615, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.560710906982422, |
|
"eval_f1": 0.6011782661782662, |
|
"eval_gpu_ram_allocated": 0.3903789520263672, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 45, |
|
"eval_loss": 1.4975976943969727, |
|
"eval_precision": 0.6069767600820232, |
|
"eval_recall": 0.615, |
|
"eval_runtime": 0.2757, |
|
"eval_samples_per_second": 725.338, |
|
"eval_steps_per_second": 25.387, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.876636505126953, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 8.736e-06, |
|
"loss": 1.1291, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"eval_accuracy": 0.615, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.560710906982422, |
|
"eval_f1": 0.598349347882615, |
|
"eval_gpu_ram_allocated": 0.39051055908203125, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 47, |
|
"eval_loss": 1.4755542278289795, |
|
"eval_precision": 0.6163792581687317, |
|
"eval_recall": 0.615, |
|
"eval_runtime": 0.2792, |
|
"eval_samples_per_second": 716.432, |
|
"eval_steps_per_second": 25.075, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.874866485595703, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 7.712e-06, |
|
"loss": 1.0673, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"eval_accuracy": 0.62, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.560714721679688, |
|
"eval_f1": 0.6063605762373413, |
|
"eval_gpu_ram_allocated": 0.39066219329833984, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 35, |
|
"eval_loss": 1.4659814834594727, |
|
"eval_precision": 0.6257950937950938, |
|
"eval_recall": 0.62, |
|
"eval_runtime": 0.2762, |
|
"eval_samples_per_second": 724.01, |
|
"eval_steps_per_second": 25.34, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.875171661376953, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 6.688e-06, |
|
"loss": 0.9884, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"eval_accuracy": 0.625, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.560771942138672, |
|
"eval_f1": 0.6134740161239549, |
|
"eval_gpu_ram_allocated": 0.3903632164001465, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 33, |
|
"eval_loss": 1.4409570693969727, |
|
"eval_precision": 0.6203985870473285, |
|
"eval_recall": 0.625, |
|
"eval_runtime": 0.2727, |
|
"eval_samples_per_second": 733.533, |
|
"eval_steps_per_second": 25.674, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8757286071777344, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 5.664e-06, |
|
"loss": 0.9743, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"eval_accuracy": 0.635, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.560775756835938, |
|
"eval_f1": 0.6233316744944652, |
|
"eval_gpu_ram_allocated": 0.3904705047607422, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 44, |
|
"eval_loss": 1.4327807426452637, |
|
"eval_precision": 0.6343307918132929, |
|
"eval_recall": 0.635, |
|
"eval_runtime": 0.2727, |
|
"eval_samples_per_second": 733.287, |
|
"eval_steps_per_second": 25.665, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.874713897705078, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 4.6400000000000005e-06, |
|
"loss": 0.926, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_accuracy": 0.615, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 24.560775756835938, |
|
"eval_f1": 0.6088320577269858, |
|
"eval_gpu_ram_allocated": 0.39035749435424805, |
|
"eval_gpu_ram_cached": 12.833984375, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 31, |
|
"eval_loss": 1.4344311952590942, |
|
"eval_precision": 0.6237836346659876, |
|
"eval_recall": 0.615, |
|
"eval_runtime": 0.2758, |
|
"eval_samples_per_second": 725.226, |
|
"eval_steps_per_second": 25.383, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 3.8742332458496094, |
|
"step": 480 |
|
} |
|
], |
|
"max_steps": 625, |
|
"num_train_epochs": 25, |
|
"total_flos": 287732237726976.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|