{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9992e-05, "loss": 2.3035, "step": 1 }, { "epoch": 0.5, "learning_rate": 1.9e-05, "loss": 2.1461, "step": 125 }, { "epoch": 0.5, "eval_accuracy": 0.5465, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.584102630615234, "eval_f1": 0.4711252488920671, "eval_gpu_ram_allocated": 0.3996291160583496, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 28, "eval_loss": 1.8486889600753784, "eval_precision": 0.5180898369172832, "eval_recall": 0.5465, "eval_runtime": 1.5234, "eval_samples_per_second": 1312.845, "eval_steps_per_second": 41.355, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8797760009765625, "step": 125 }, { "epoch": 1.0, "learning_rate": 1.8e-05, "loss": 1.6793, "step": 250 }, { "epoch": 1.0, "eval_accuracy": 0.615, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.584190368652344, "eval_f1": 0.5799151958404197, "eval_gpu_ram_allocated": 0.3996119499206543, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 28, "eval_loss": 1.5279773473739624, "eval_precision": 0.6206909881233017, "eval_recall": 0.615, "eval_runtime": 1.5357, "eval_samples_per_second": 1302.337, "eval_steps_per_second": 41.024, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8827476501464844, "step": 250 }, { "epoch": 1.5, "learning_rate": 1.7e-05, "loss": 1.4163, "step": 375 }, { "epoch": 1.5, "eval_accuracy": 0.6675, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.584190368652344, "eval_f1": 0.6507803054121275, "eval_gpu_ram_allocated": 0.3996291160583496, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 28, "eval_loss": 1.3395830392837524, "eval_precision": 0.6690538200035159, "eval_recall": 0.6675, "eval_runtime": 1.5175, "eval_samples_per_second": 1317.927, "eval_steps_per_second": 41.515, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.883075714111328, "step": 375 }, { "epoch": 2.0, "learning_rate": 1.6000000000000003e-05, "loss": 1.2855, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.6745, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 24.584270477294922, "eval_f1": 0.6633189544214071, "eval_gpu_ram_allocated": 0.3995933532714844, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 30, "eval_loss": 1.2413243055343628, "eval_precision": 0.6741774761001332, "eval_recall": 0.6745, "eval_runtime": 1.5166, "eval_samples_per_second": 1318.761, "eval_steps_per_second": 41.541, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.897541046142578, "step": 500 }, { "epoch": 2.5, "learning_rate": 1.5000000000000002e-05, "loss": 1.1364, "step": 625 }, { "epoch": 2.5, "eval_accuracy": 0.6725, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.457107543945312, "eval_f1": 0.665830199533775, "eval_gpu_ram_allocated": 0.3995990753173828, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 31, "eval_loss": 1.1794828176498413, "eval_precision": 0.6758089281502129, "eval_recall": 0.6725, "eval_runtime": 1.5306, "eval_samples_per_second": 1306.704, "eval_steps_per_second": 41.161, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.096714019775391, "step": 625 }, { "epoch": 3.0, "learning_rate": 1.4e-05, "loss": 1.0569, "step": 750 }, { "epoch": 3.0, "eval_accuracy": 0.6845, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.457260131835938, "eval_f1": 0.6785417667541389, "eval_gpu_ram_allocated": 0.3996105194091797, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 29, "eval_loss": 1.1167317628860474, "eval_precision": 0.6841220967269329, "eval_recall": 0.6845, "eval_runtime": 1.5379, "eval_samples_per_second": 1300.49, "eval_steps_per_second": 40.965, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.092338562011719, "step": 750 }, { "epoch": 3.5, "learning_rate": 1.3000000000000001e-05, "loss": 0.9596, "step": 875 }, { "epoch": 3.5, "eval_accuracy": 0.698, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.457298278808594, "eval_f1": 0.6882774085430846, "eval_gpu_ram_allocated": 0.3996591567993164, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 29, "eval_loss": 1.0866354703903198, "eval_precision": 0.692037318372609, "eval_recall": 0.698, "eval_runtime": 1.5638, "eval_samples_per_second": 1278.976, "eval_steps_per_second": 40.288, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.876514434814453, "step": 875 }, { "epoch": 4.0, "learning_rate": 1.2e-05, "loss": 0.917, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.6875, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 25.45730209350586, "eval_f1": 0.6796355379748251, "eval_gpu_ram_allocated": 0.3995933532714844, "eval_gpu_ram_cached": 12.9296875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 29, "eval_loss": 1.0703401565551758, "eval_precision": 0.6840559389245768, "eval_recall": 0.6875, "eval_runtime": 1.5204, "eval_samples_per_second": 1315.473, "eval_steps_per_second": 41.437, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 3.8976211547851562, "step": 1000 } ], "max_steps": 2500, "num_train_epochs": 10, "total_flos": 504019356005760.0, "trial_name": null, "trial_params": null }