{ "best_metric": 1.2501240968704224, "best_model_checkpoint": "./outputs/checkpoint-2300", "epoch": 1.6757741347905282, "eval_steps": 100, "global_step": 2300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 1.7654, "step": 100 }, { "epoch": 0.07, "eval_loss": 1.6328892707824707, "eval_runtime": 419.2151, "eval_samples_per_second": 14.966, "eval_steps_per_second": 1.873, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 1.6081, "step": 200 }, { "epoch": 0.15, "eval_loss": 1.5872766971588135, "eval_runtime": 418.8615, "eval_samples_per_second": 14.979, "eval_steps_per_second": 1.874, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 1.5756, "step": 300 }, { "epoch": 0.22, "eval_loss": 1.556390643119812, "eval_runtime": 418.8265, "eval_samples_per_second": 14.98, "eval_steps_per_second": 1.874, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 1.5444, "step": 400 }, { "epoch": 0.29, "eval_loss": 1.5300668478012085, "eval_runtime": 418.8771, "eval_samples_per_second": 14.978, "eval_steps_per_second": 1.874, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 1.5114, "step": 500 }, { "epoch": 0.36, "eval_loss": 1.5074748992919922, "eval_runtime": 418.9574, "eval_samples_per_second": 14.975, "eval_steps_per_second": 1.874, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 1.4948, "step": 600 }, { "epoch": 0.44, "eval_loss": 1.4872732162475586, "eval_runtime": 419.0224, "eval_samples_per_second": 14.973, "eval_steps_per_second": 1.873, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 1.4723, "step": 700 }, { "epoch": 0.51, "eval_loss": 1.4686368703842163, "eval_runtime": 418.747, "eval_samples_per_second": 14.983, "eval_steps_per_second": 1.875, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 1.4628, "step": 800 }, { "epoch": 0.58, "eval_loss": 1.450691819190979, "eval_runtime": 418.912, "eval_samples_per_second": 14.977, "eval_steps_per_second": 1.874, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 1.4332, "step": 900 }, { "epoch": 0.66, "eval_loss": 1.432775616645813, "eval_runtime": 418.8955, "eval_samples_per_second": 14.977, "eval_steps_per_second": 1.874, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 1.4271, "step": 1000 }, { "epoch": 0.73, "eval_loss": 1.4167886972427368, "eval_runtime": 418.6904, "eval_samples_per_second": 14.985, "eval_steps_per_second": 1.875, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.4129, "step": 1100 }, { "epoch": 0.8, "eval_loss": 1.402354121208191, "eval_runtime": 418.7796, "eval_samples_per_second": 14.982, "eval_steps_per_second": 1.874, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 1.3853, "step": 1200 }, { "epoch": 0.87, "eval_loss": 1.3865565061569214, "eval_runtime": 418.6288, "eval_samples_per_second": 14.987, "eval_steps_per_second": 1.875, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 1.394, "step": 1300 }, { "epoch": 0.95, "eval_loss": 1.372268795967102, "eval_runtime": 418.7001, "eval_samples_per_second": 14.984, "eval_steps_per_second": 1.875, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 1.356, "step": 1400 }, { "epoch": 1.02, "eval_loss": 1.3581469058990479, "eval_runtime": 418.565, "eval_samples_per_second": 14.989, "eval_steps_per_second": 1.875, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 1.3321, "step": 1500 }, { "epoch": 1.09, "eval_loss": 1.3450849056243896, "eval_runtime": 418.8093, "eval_samples_per_second": 14.981, "eval_steps_per_second": 1.874, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 1.3214, "step": 1600 }, { "epoch": 1.17, "eval_loss": 1.3320348262786865, "eval_runtime": 418.8397, "eval_samples_per_second": 14.979, "eval_steps_per_second": 1.874, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 1.3215, "step": 1700 }, { "epoch": 1.24, "eval_loss": 1.3184651136398315, "eval_runtime": 418.7321, "eval_samples_per_second": 14.983, "eval_steps_per_second": 1.875, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 1.2973, "step": 1800 }, { "epoch": 1.31, "eval_loss": 1.306998610496521, "eval_runtime": 418.7252, "eval_samples_per_second": 14.984, "eval_steps_per_second": 1.875, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 1.2893, "step": 1900 }, { "epoch": 1.38, "eval_loss": 1.293902039527893, "eval_runtime": 449.0285, "eval_samples_per_second": 13.972, "eval_steps_per_second": 1.748, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 1.2763, "step": 2000 }, { "epoch": 1.46, "eval_loss": 1.2827019691467285, "eval_runtime": 418.4577, "eval_samples_per_second": 14.993, "eval_steps_per_second": 1.876, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 1.2665, "step": 2100 }, { "epoch": 1.53, "eval_loss": 1.2712739706039429, "eval_runtime": 418.4388, "eval_samples_per_second": 14.994, "eval_steps_per_second": 1.876, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.2452, "step": 2200 }, { "epoch": 1.6, "eval_loss": 1.2593406438827515, "eval_runtime": 418.3773, "eval_samples_per_second": 14.996, "eval_steps_per_second": 1.876, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 1.2438, "step": 2300 }, { "epoch": 1.68, "eval_loss": 1.2501240968704224, "eval_runtime": 418.3937, "eval_samples_per_second": 14.995, "eval_steps_per_second": 1.876, "step": 2300 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 7.457059409239757e+17, "trial_name": null, "trial_params": null }