{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.9968e-05, "loss": 2.3062, "step": 1 }, { "epoch": 1.28, "learning_rate": 1.8976000000000003e-05, "loss": 2.3021, "step": 32 }, { "epoch": 1.28, "eval_accuracy": 0.12, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.56061553955078, "eval_f1": 0.05185185185185186, "eval_gpu_ram_allocated": 0.3903946876525879, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 29, "eval_loss": 2.2975096702575684, "eval_precision": 0.11020408163265306, "eval_recall": 0.12, "eval_runtime": 0.2913, "eval_samples_per_second": 686.621, "eval_steps_per_second": 24.032, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8423919677734375, "step": 32 }, { "epoch": 2.56, "learning_rate": 1.7952e-05, "loss": 2.2615, "step": 64 }, { "epoch": 2.56, "eval_accuracy": 0.31, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.56061553955078, "eval_f1": 0.2339181338497344, "eval_gpu_ram_allocated": 0.39037179946899414, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 30, "eval_loss": 2.192563533782959, "eval_precision": 0.4648550724637681, "eval_recall": 0.31, "eval_runtime": 0.2737, "eval_samples_per_second": 730.737, "eval_steps_per_second": 25.576, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8513946533203125, "step": 64 }, { "epoch": 3.84, "learning_rate": 1.6928e-05, "loss": 2.0677, "step": 96 }, { "epoch": 3.84, "eval_accuracy": 0.51, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.56061553955078, "eval_f1": 0.4301287248566463, "eval_gpu_ram_allocated": 0.39051055908203125, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 22, "eval_loss": 1.965756893157959, "eval_precision": 0.3949837686822981, "eval_recall": 0.51, "eval_runtime": 0.3124, "eval_samples_per_second": 640.226, "eval_steps_per_second": 22.408, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8537025451660156, "step": 96 }, { "epoch": 5.12, "learning_rate": 1.5904000000000002e-05, "loss": 1.8562, "step": 128 }, { "epoch": 5.12, "eval_accuracy": 0.545, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.560619354248047, "eval_f1": 0.46549419445976403, "eval_gpu_ram_allocated": 0.39040040969848633, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 41, "eval_loss": 1.8382666110992432, "eval_precision": 0.45874241633360424, "eval_recall": 0.545, "eval_runtime": 0.2734, "eval_samples_per_second": 731.569, "eval_steps_per_second": 25.605, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.857379913330078, "step": 128 }, { "epoch": 6.4, "learning_rate": 1.4880000000000002e-05, "loss": 1.6929, "step": 160 }, { "epoch": 6.4, "eval_accuracy": 0.555, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.560688018798828, "eval_f1": 0.4942300415011664, "eval_gpu_ram_allocated": 0.39035606384277344, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 29, "eval_loss": 1.7403417825698853, "eval_precision": 0.5261452781574115, "eval_recall": 0.555, "eval_runtime": 0.2751, "eval_samples_per_second": 727.05, "eval_steps_per_second": 25.447, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.854930877685547, "step": 160 }, { "epoch": 7.68, "learning_rate": 1.3856e-05, "loss": 1.5569, "step": 192 }, { "epoch": 7.68, "eval_accuracy": 0.585, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.560688018798828, "eval_f1": 0.5466732171455438, "eval_gpu_ram_allocated": 0.39036035537719727, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 37, "eval_loss": 1.6662890911102295, "eval_precision": 0.6495824550556009, "eval_recall": 0.585, "eval_runtime": 0.2754, "eval_samples_per_second": 726.273, "eval_steps_per_second": 25.42, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8549232482910156, "step": 192 }, { "epoch": 8.96, "learning_rate": 1.2832e-05, "loss": 1.4636, "step": 224 }, { "epoch": 8.96, "eval_accuracy": 0.58, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.560688018798828, "eval_f1": 0.5475206766586077, "eval_gpu_ram_allocated": 0.39038610458374023, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 30, "eval_loss": 1.612341284751892, "eval_precision": 0.5538885280526147, "eval_recall": 0.58, "eval_runtime": 0.2831, "eval_samples_per_second": 706.386, "eval_steps_per_second": 24.724, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8539352416992188, "step": 224 }, { "epoch": 10.24, "learning_rate": 1.1808000000000001e-05, "loss": 1.3683, "step": 256 }, { "epoch": 10.24, "eval_accuracy": 0.595, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.56069564819336, "eval_f1": 0.5828575381901582, "eval_gpu_ram_allocated": 0.39037179946899414, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 41, "eval_loss": 1.5615015029907227, "eval_precision": 0.6016443571048834, "eval_recall": 0.595, "eval_runtime": 0.2727, "eval_samples_per_second": 733.478, "eval_steps_per_second": 25.672, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8527297973632812, "step": 256 }, { "epoch": 11.52, "learning_rate": 1.0784e-05, "loss": 1.2649, "step": 288 }, { "epoch": 11.52, "eval_accuracy": 0.61, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.560710906982422, "eval_f1": 0.590376982026963, "eval_gpu_ram_allocated": 0.3903775215148926, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 30, "eval_loss": 1.526059627532959, "eval_precision": 0.6242595289277212, "eval_recall": 0.61, "eval_runtime": 0.2757, "eval_samples_per_second": 725.456, "eval_steps_per_second": 25.391, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.864604949951172, "step": 288 }, { "epoch": 12.8, "learning_rate": 9.760000000000001e-06, "loss": 1.1968, "step": 320 }, { "epoch": 12.8, "eval_accuracy": 0.615, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.560710906982422, "eval_f1": 0.6011782661782662, "eval_gpu_ram_allocated": 0.3903789520263672, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 45, "eval_loss": 1.4975976943969727, "eval_precision": 0.6069767600820232, "eval_recall": 0.615, "eval_runtime": 0.2757, "eval_samples_per_second": 725.338, "eval_steps_per_second": 25.387, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.876636505126953, "step": 320 }, { "epoch": 14.08, "learning_rate": 8.736e-06, "loss": 1.1291, "step": 352 }, { "epoch": 14.08, "eval_accuracy": 0.615, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.560710906982422, "eval_f1": 0.598349347882615, "eval_gpu_ram_allocated": 0.39051055908203125, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 47, "eval_loss": 1.4755542278289795, "eval_precision": 0.6163792581687317, "eval_recall": 0.615, "eval_runtime": 0.2792, "eval_samples_per_second": 716.432, "eval_steps_per_second": 25.075, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.874866485595703, "step": 352 }, { "epoch": 15.36, "learning_rate": 7.712e-06, "loss": 1.0673, "step": 384 }, { "epoch": 15.36, "eval_accuracy": 0.62, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.560714721679688, "eval_f1": 0.6063605762373413, "eval_gpu_ram_allocated": 0.39066219329833984, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 35, "eval_loss": 1.4659814834594727, "eval_precision": 0.6257950937950938, "eval_recall": 0.62, "eval_runtime": 0.2762, "eval_samples_per_second": 724.01, "eval_steps_per_second": 25.34, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.875171661376953, "step": 384 }, { "epoch": 16.64, "learning_rate": 6.688e-06, "loss": 0.9884, "step": 416 }, { "epoch": 16.64, "eval_accuracy": 0.625, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.560771942138672, "eval_f1": 0.6134740161239549, "eval_gpu_ram_allocated": 0.3903632164001465, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 33, "eval_loss": 1.4409570693969727, "eval_precision": 0.6203985870473285, "eval_recall": 0.625, "eval_runtime": 0.2727, "eval_samples_per_second": 733.533, "eval_steps_per_second": 25.674, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8757286071777344, "step": 416 }, { "epoch": 17.92, "learning_rate": 5.664e-06, "loss": 0.9743, "step": 448 }, { "epoch": 17.92, "eval_accuracy": 0.635, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.560775756835938, "eval_f1": 0.6233316744944652, "eval_gpu_ram_allocated": 0.3904705047607422, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 44, "eval_loss": 1.4327807426452637, "eval_precision": 0.6343307918132929, "eval_recall": 0.635, "eval_runtime": 0.2727, "eval_samples_per_second": 733.287, "eval_steps_per_second": 25.665, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.874713897705078, "step": 448 }, { "epoch": 19.2, "learning_rate": 4.6400000000000005e-06, "loss": 0.926, "step": 480 }, { "epoch": 19.2, "eval_accuracy": 0.615, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.560775756835938, "eval_f1": 0.6088320577269858, "eval_gpu_ram_allocated": 0.39035749435424805, "eval_gpu_ram_cached": 12.833984375, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 31, "eval_loss": 1.4344311952590942, "eval_precision": 0.6237836346659876, "eval_recall": 0.615, "eval_runtime": 0.2758, "eval_samples_per_second": 725.226, "eval_steps_per_second": 25.383, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8742332458496094, "step": 480 } ], "max_steps": 625, "num_train_epochs": 25, "total_flos": 287732237726976.0, "trial_name": null, "trial_params": null }