{ "best_metric": 1.408768892288208, "best_model_checkpoint": "./outputs/checkpoint-2400", "epoch": 1.7489981785063753, "eval_steps": 100, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 2.1823, "step": 100 }, { "epoch": 0.07, "eval_loss": 2.0118842124938965, "eval_runtime": 144.2983, "eval_samples_per_second": 43.479, "eval_steps_per_second": 5.44, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 1.962, "step": 200 }, { "epoch": 0.15, "eval_loss": 1.9193025827407837, "eval_runtime": 144.1022, "eval_samples_per_second": 43.539, "eval_steps_per_second": 5.448, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 1.8883, "step": 300 }, { "epoch": 0.22, "eval_loss": 1.8596361875534058, "eval_runtime": 144.0831, "eval_samples_per_second": 43.544, "eval_steps_per_second": 5.448, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 1.8371, "step": 400 }, { "epoch": 0.29, "eval_loss": 1.813263177871704, "eval_runtime": 144.1028, "eval_samples_per_second": 43.538, "eval_steps_per_second": 5.447, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 1.7855, "step": 500 }, { "epoch": 0.36, "eval_loss": 1.772437572479248, "eval_runtime": 144.0494, "eval_samples_per_second": 43.555, "eval_steps_per_second": 5.45, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 1.757, "step": 600 }, { "epoch": 0.44, "eval_loss": 1.7428079843521118, "eval_runtime": 144.0319, "eval_samples_per_second": 43.56, "eval_steps_per_second": 5.45, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 1.7183, "step": 700 }, { "epoch": 0.51, "eval_loss": 1.7120596170425415, "eval_runtime": 144.1455, "eval_samples_per_second": 43.525, "eval_steps_per_second": 5.446, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 1.6973, "step": 800 }, { "epoch": 0.58, "eval_loss": 1.6833879947662354, "eval_runtime": 144.034, "eval_samples_per_second": 43.559, "eval_steps_per_second": 5.45, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 1.662, "step": 900 }, { "epoch": 0.66, "eval_loss": 1.6580077409744263, "eval_runtime": 144.0204, "eval_samples_per_second": 43.563, "eval_steps_per_second": 5.451, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 1.6473, "step": 1000 }, { "epoch": 0.73, "eval_loss": 1.6349676847457886, "eval_runtime": 144.1987, "eval_samples_per_second": 43.509, "eval_steps_per_second": 5.444, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.6273, "step": 1100 }, { "epoch": 0.8, "eval_loss": 1.6135053634643555, "eval_runtime": 144.1005, "eval_samples_per_second": 43.539, "eval_steps_per_second": 5.448, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 1.5919, "step": 1200 }, { "epoch": 0.87, "eval_loss": 1.5944637060165405, "eval_runtime": 144.0899, "eval_samples_per_second": 43.542, "eval_steps_per_second": 5.448, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 1.5994, "step": 1300 }, { "epoch": 0.95, "eval_loss": 1.5728504657745361, "eval_runtime": 144.043, "eval_samples_per_second": 43.556, "eval_steps_per_second": 5.45, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 1.5528, "step": 1400 }, { "epoch": 1.02, "eval_loss": 1.552846074104309, "eval_runtime": 144.0891, "eval_samples_per_second": 43.543, "eval_steps_per_second": 5.448, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 1.5246, "step": 1500 }, { "epoch": 1.09, "eval_loss": 1.5355615615844727, "eval_runtime": 144.0408, "eval_samples_per_second": 43.557, "eval_steps_per_second": 5.45, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 1.5062, "step": 1600 }, { "epoch": 1.17, "eval_loss": 1.5179370641708374, "eval_runtime": 144.0242, "eval_samples_per_second": 43.562, "eval_steps_per_second": 5.45, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 1.5038, "step": 1700 }, { "epoch": 1.24, "eval_loss": 1.5012134313583374, "eval_runtime": 144.0166, "eval_samples_per_second": 43.564, "eval_steps_per_second": 5.451, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 1.5144, "step": 1800 }, { "epoch": 1.31, "eval_loss": 1.511275291442871, "eval_runtime": 133.6708, "eval_samples_per_second": 46.936, "eval_steps_per_second": 5.873, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 1.4715, "step": 1900 }, { "epoch": 1.38, "eval_loss": 1.48880934715271, "eval_runtime": 133.6301, "eval_samples_per_second": 46.95, "eval_steps_per_second": 5.874, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 1.4621, "step": 2000 }, { "epoch": 1.46, "eval_loss": 1.4694921970367432, "eval_runtime": 133.7845, "eval_samples_per_second": 46.896, "eval_steps_per_second": 5.868, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 1.4364, "step": 2100 }, { "epoch": 1.53, "eval_loss": 1.4534579515457153, "eval_runtime": 133.5069, "eval_samples_per_second": 46.994, "eval_steps_per_second": 5.88, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.4388, "step": 2200 }, { "epoch": 1.6, "eval_loss": 1.4382610321044922, "eval_runtime": 133.6146, "eval_samples_per_second": 46.956, "eval_steps_per_second": 5.875, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 1.4139, "step": 2300 }, { "epoch": 1.68, "eval_loss": 1.4250658750534058, "eval_runtime": 133.6118, "eval_samples_per_second": 46.957, "eval_steps_per_second": 5.875, "step": 2300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 1.4145, "step": 2400 }, { "epoch": 1.75, "eval_loss": 1.408768892288208, "eval_runtime": 133.5488, "eval_samples_per_second": 46.979, "eval_steps_per_second": 5.878, "step": 2400 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 1.4305055049825485e+17, "trial_name": null, "trial_params": null }