{ "best_metric": 1.2198114395141602, "best_model_checkpoint": "./outputs/checkpoint-4000", "epoch": 2.914754098360656, "eval_steps": 100, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 2.1823, "step": 100 }, { "epoch": 0.07, "eval_loss": 2.0118842124938965, "eval_runtime": 144.2983, "eval_samples_per_second": 43.479, "eval_steps_per_second": 5.44, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 1.962, "step": 200 }, { "epoch": 0.15, "eval_loss": 1.9193025827407837, "eval_runtime": 144.1022, "eval_samples_per_second": 43.539, "eval_steps_per_second": 5.448, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 1.8883, "step": 300 }, { "epoch": 0.22, "eval_loss": 1.8596361875534058, "eval_runtime": 144.0831, "eval_samples_per_second": 43.544, "eval_steps_per_second": 5.448, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 1.8371, "step": 400 }, { "epoch": 0.29, "eval_loss": 1.813263177871704, "eval_runtime": 144.1028, "eval_samples_per_second": 43.538, "eval_steps_per_second": 5.447, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 1.7855, "step": 500 }, { "epoch": 0.36, "eval_loss": 1.772437572479248, "eval_runtime": 144.0494, "eval_samples_per_second": 43.555, "eval_steps_per_second": 5.45, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 1.757, "step": 600 }, { "epoch": 0.44, "eval_loss": 1.7428079843521118, "eval_runtime": 144.0319, "eval_samples_per_second": 43.56, "eval_steps_per_second": 5.45, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 1.7183, "step": 700 }, { "epoch": 0.51, "eval_loss": 1.7120596170425415, "eval_runtime": 144.1455, "eval_samples_per_second": 43.525, "eval_steps_per_second": 5.446, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 1.6973, "step": 800 }, { "epoch": 0.58, "eval_loss": 1.6833879947662354, "eval_runtime": 144.034, "eval_samples_per_second": 43.559, "eval_steps_per_second": 5.45, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 1.662, "step": 900 }, { "epoch": 0.66, "eval_loss": 1.6580077409744263, "eval_runtime": 144.0204, "eval_samples_per_second": 43.563, "eval_steps_per_second": 5.451, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 1.6473, "step": 1000 }, { "epoch": 0.73, "eval_loss": 1.6349676847457886, "eval_runtime": 144.1987, "eval_samples_per_second": 43.509, "eval_steps_per_second": 5.444, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.6273, "step": 1100 }, { "epoch": 0.8, "eval_loss": 1.6135053634643555, "eval_runtime": 144.1005, "eval_samples_per_second": 43.539, "eval_steps_per_second": 5.448, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 1.5919, "step": 1200 }, { "epoch": 0.87, "eval_loss": 1.5944637060165405, "eval_runtime": 144.0899, "eval_samples_per_second": 43.542, "eval_steps_per_second": 5.448, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 1.5994, "step": 1300 }, { "epoch": 0.95, "eval_loss": 1.5728504657745361, "eval_runtime": 144.043, "eval_samples_per_second": 43.556, "eval_steps_per_second": 5.45, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 1.5528, "step": 1400 }, { "epoch": 1.02, "eval_loss": 1.552846074104309, "eval_runtime": 144.0891, "eval_samples_per_second": 43.543, "eval_steps_per_second": 5.448, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 1.5246, "step": 1500 }, { "epoch": 1.09, "eval_loss": 1.5355615615844727, "eval_runtime": 144.0408, "eval_samples_per_second": 43.557, "eval_steps_per_second": 5.45, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 1.5062, "step": 1600 }, { "epoch": 1.17, "eval_loss": 1.5179370641708374, "eval_runtime": 144.0242, "eval_samples_per_second": 43.562, "eval_steps_per_second": 5.45, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 1.5038, "step": 1700 }, { "epoch": 1.24, "eval_loss": 1.5012134313583374, "eval_runtime": 144.0166, "eval_samples_per_second": 43.564, "eval_steps_per_second": 5.451, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 1.5144, "step": 1800 }, { "epoch": 1.31, "eval_loss": 1.511275291442871, "eval_runtime": 133.6708, "eval_samples_per_second": 46.936, "eval_steps_per_second": 5.873, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 1.4715, "step": 1900 }, { "epoch": 1.38, "eval_loss": 1.48880934715271, "eval_runtime": 133.6301, "eval_samples_per_second": 46.95, "eval_steps_per_second": 5.874, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 1.4621, "step": 2000 }, { "epoch": 1.46, "eval_loss": 1.4694921970367432, "eval_runtime": 133.7845, "eval_samples_per_second": 46.896, "eval_steps_per_second": 5.868, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 1.4364, "step": 2100 }, { "epoch": 1.53, "eval_loss": 1.4534579515457153, "eval_runtime": 133.5069, "eval_samples_per_second": 46.994, "eval_steps_per_second": 5.88, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.4388, "step": 2200 }, { "epoch": 1.6, "eval_loss": 1.4382610321044922, "eval_runtime": 133.6146, "eval_samples_per_second": 46.956, "eval_steps_per_second": 5.875, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 1.4139, "step": 2300 }, { "epoch": 1.68, "eval_loss": 1.4250658750534058, "eval_runtime": 133.6118, "eval_samples_per_second": 46.957, "eval_steps_per_second": 5.875, "step": 2300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 1.4145, "step": 2400 }, { "epoch": 1.75, "eval_loss": 1.408768892288208, "eval_runtime": 133.5488, "eval_samples_per_second": 46.979, "eval_steps_per_second": 5.878, "step": 2400 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 1.3897, "step": 2500 }, { "epoch": 1.82, "eval_loss": 1.3950382471084595, "eval_runtime": 133.6055, "eval_samples_per_second": 46.959, "eval_steps_per_second": 5.876, "step": 2500 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 1.3718, "step": 2600 }, { "epoch": 1.89, "eval_loss": 1.38124418258667, "eval_runtime": 133.4827, "eval_samples_per_second": 47.002, "eval_steps_per_second": 5.881, "step": 2600 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 1.3685, "step": 2700 }, { "epoch": 1.97, "eval_loss": 1.3680918216705322, "eval_runtime": 133.3719, "eval_samples_per_second": 47.041, "eval_steps_per_second": 5.886, "step": 2700 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 1.3321, "step": 2800 }, { "epoch": 2.04, "eval_loss": 1.356438159942627, "eval_runtime": 133.4175, "eval_samples_per_second": 47.025, "eval_steps_per_second": 5.884, "step": 2800 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 1.3105, "step": 2900 }, { "epoch": 2.11, "eval_loss": 1.3440583944320679, "eval_runtime": 133.4445, "eval_samples_per_second": 47.016, "eval_steps_per_second": 5.883, "step": 2900 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 1.3096, "step": 3000 }, { "epoch": 2.19, "eval_loss": 1.3319122791290283, "eval_runtime": 133.4813, "eval_samples_per_second": 47.003, "eval_steps_per_second": 5.881, "step": 3000 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 1.2963, "step": 3100 }, { "epoch": 2.26, "eval_loss": 1.320979356765747, "eval_runtime": 133.3932, "eval_samples_per_second": 47.034, "eval_steps_per_second": 5.885, "step": 3100 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 1.2953, "step": 3200 }, { "epoch": 2.33, "eval_loss": 1.3095366954803467, "eval_runtime": 138.3583, "eval_samples_per_second": 45.346, "eval_steps_per_second": 5.674, "step": 3200 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 1.2786, "step": 3300 }, { "epoch": 2.4, "eval_loss": 1.2959859371185303, "eval_runtime": 133.5245, "eval_samples_per_second": 46.988, "eval_steps_per_second": 5.879, "step": 3300 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 1.2585, "step": 3400 }, { "epoch": 2.48, "eval_loss": 1.2855974435806274, "eval_runtime": 133.542, "eval_samples_per_second": 46.981, "eval_steps_per_second": 5.878, "step": 3400 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 1.2586, "step": 3500 }, { "epoch": 2.55, "eval_loss": 1.2743042707443237, "eval_runtime": 133.3944, "eval_samples_per_second": 47.033, "eval_steps_per_second": 5.885, "step": 3500 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 1.2466, "step": 3600 }, { "epoch": 2.62, "eval_loss": 1.2626945972442627, "eval_runtime": 133.3673, "eval_samples_per_second": 47.043, "eval_steps_per_second": 5.886, "step": 3600 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 1.2303, "step": 3700 }, { "epoch": 2.7, "eval_loss": 1.2512463331222534, "eval_runtime": 133.4502, "eval_samples_per_second": 47.014, "eval_steps_per_second": 5.882, "step": 3700 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 1.2088, "step": 3800 }, { "epoch": 2.77, "eval_loss": 1.2429386377334595, "eval_runtime": 133.4443, "eval_samples_per_second": 47.016, "eval_steps_per_second": 5.883, "step": 3800 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 1.2171, "step": 3900 }, { "epoch": 2.84, "eval_loss": 1.2287747859954834, "eval_runtime": 133.4716, "eval_samples_per_second": 47.006, "eval_steps_per_second": 5.881, "step": 3900 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 1.1995, "step": 4000 }, { "epoch": 2.91, "eval_loss": 1.2198114395141602, "eval_runtime": 133.4977, "eval_samples_per_second": 46.997, "eval_steps_per_second": 5.88, "step": 4000 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 2.384126359199662e+17, "trial_name": null, "trial_params": null }