{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8720549976018488, "eval_steps": 500, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4e-05, "loss": 4.4951, "step": 500 }, { "epoch": 0.03, "learning_rate": 8e-05, "loss": 3.777, "step": 1000 }, { "epoch": 0.04, "learning_rate": 7.999831247941866e-05, "loss": 3.6246, "step": 1500 }, { "epoch": 0.06, "learning_rate": 7.99932500600609e-05, "loss": 3.5067, "step": 2000 }, { "epoch": 0.07, "learning_rate": 7.998481316907362e-05, "loss": 3.4947, "step": 2500 }, { "epoch": 0.09, "learning_rate": 7.99730025183281e-05, "loss": 3.4452, "step": 3000 }, { "epoch": 0.1, "learning_rate": 7.995781910436019e-05, "loss": 3.3696, "step": 3500 }, { "epoch": 0.12, "learning_rate": 7.993926420828609e-05, "loss": 3.4226, "step": 4000 }, { "epoch": 0.13, "learning_rate": 7.991733939569422e-05, "loss": 3.3765, "step": 4500 }, { "epoch": 0.15, "learning_rate": 7.989204651651322e-05, "loss": 3.4237, "step": 5000 }, { "epoch": 0.16, "learning_rate": 7.986338770485576e-05, "loss": 3.3054, "step": 5500 }, { "epoch": 0.17, "learning_rate": 7.983136537883862e-05, "loss": 3.3544, "step": 6000 }, { "epoch": 0.19, "learning_rate": 7.97959822403785e-05, "loss": 3.3659, "step": 6500 }, { "epoch": 0.2, "learning_rate": 7.97572412749641e-05, "loss": 3.3426, "step": 7000 }, { "epoch": 0.22, "learning_rate": 7.971514575140424e-05, "loss": 3.3332, "step": 7500 }, { "epoch": 0.23, "learning_rate": 7.966969922155206e-05, "loss": 3.3163, "step": 8000 }, { "epoch": 0.25, "learning_rate": 7.962090552000528e-05, "loss": 3.3127, "step": 8500 }, { "epoch": 0.26, "learning_rate": 7.956876876378266e-05, "loss": 3.3187, "step": 9000 }, { "epoch": 0.28, "learning_rate": 7.951329335197668e-05, "loss": 3.3195, "step": 9500 }, { "epoch": 0.29, "learning_rate": 7.94544839653823e-05, "loss": 3.2599, "step": 10000 }, { "epoch": 0.31, "learning_rate": 7.939234556610201e-05, "loss": 3.3353, "step": 10500 }, { "epoch": 0.32, "learning_rate": 7.932688339712721e-05, "loss": 3.1893, "step": 11000 }, { "epoch": 0.33, "learning_rate": 7.925810298189578e-05, "loss": 3.2074, "step": 11500 }, { "epoch": 0.35, "learning_rate": 7.918601012382602e-05, "loss": 3.2427, "step": 12000 }, { "epoch": 0.36, "learning_rate": 7.911061090582703e-05, "loss": 3.3292, "step": 12500 }, { "epoch": 0.38, "learning_rate": 7.90319116897854e-05, "loss": 3.2498, "step": 13000 }, { "epoch": 0.39, "learning_rate": 7.89499191160285e-05, "loss": 3.2745, "step": 13500 }, { "epoch": 0.41, "learning_rate": 7.88646401027641e-05, "loss": 3.2645, "step": 14000 }, { "epoch": 0.42, "learning_rate": 7.87760818454967e-05, "loss": 3.2276, "step": 14500 }, { "epoch": 0.44, "learning_rate": 7.868425181642037e-05, "loss": 3.235, "step": 15000 }, { "epoch": 0.45, "learning_rate": 7.858915776378836e-05, "loss": 3.1867, "step": 15500 }, { "epoch": 0.47, "learning_rate": 7.849080771125918e-05, "loss": 3.1661, "step": 16000 }, { "epoch": 0.48, "learning_rate": 7.838920995721975e-05, "loss": 3.2233, "step": 16500 }, { "epoch": 0.49, "learning_rate": 7.828437307408509e-05, "loss": 3.1632, "step": 17000 }, { "epoch": 0.51, "learning_rate": 7.81763059075751e-05, "loss": 3.2981, "step": 17500 }, { "epoch": 0.52, "learning_rate": 7.806501757596819e-05, "loss": 3.2572, "step": 18000 }, { "epoch": 0.54, "learning_rate": 7.795051746933185e-05, "loss": 3.1959, "step": 18500 }, { "epoch": 0.55, "learning_rate": 7.783281524873039e-05, "loss": 3.2433, "step": 19000 }, { "epoch": 0.57, "learning_rate": 7.771192084540983e-05, "loss": 3.1956, "step": 19500 }, { "epoch": 0.58, "learning_rate": 7.75878444599598e-05, "loss": 3.2134, "step": 20000 }, { "epoch": 0.6, "learning_rate": 7.746059656145306e-05, "loss": 3.1633, "step": 20500 }, { "epoch": 0.61, "learning_rate": 7.733018788656199e-05, "loss": 3.2601, "step": 21000 }, { "epoch": 0.62, "learning_rate": 7.71966294386527e-05, "loss": 3.173, "step": 21500 }, { "epoch": 0.64, "learning_rate": 7.70599324868567e-05, "loss": 3.2264, "step": 22000 }, { "epoch": 0.65, "learning_rate": 7.692010856511996e-05, "loss": 3.1828, "step": 22500 }, { "epoch": 0.67, "learning_rate": 7.677716947122976e-05, "loss": 3.1522, "step": 23000 }, { "epoch": 0.68, "learning_rate": 7.663112726581924e-05, "loss": 3.2148, "step": 23500 }, { "epoch": 0.7, "learning_rate": 7.648199427134978e-05, "loss": 3.1741, "step": 24000 }, { "epoch": 0.71, "learning_rate": 7.632978307107125e-05, "loss": 3.2386, "step": 24500 }, { "epoch": 0.73, "learning_rate": 7.617450650796032e-05, "loss": 3.1865, "step": 25000 }, { "epoch": 0.74, "learning_rate": 7.601617768363678e-05, "loss": 3.2224, "step": 25500 }, { "epoch": 0.76, "learning_rate": 7.58548099572581e-05, "loss": 3.1192, "step": 26000 }, { "epoch": 0.77, "learning_rate": 7.569041694439229e-05, "loss": 3.1802, "step": 26500 }, { "epoch": 0.78, "learning_rate": 7.552301251586894e-05, "loss": 3.1781, "step": 27000 }, { "epoch": 0.8, "learning_rate": 7.5352610796609e-05, "loss": 3.1921, "step": 27500 }, { "epoch": 0.81, "learning_rate": 7.517922616443289e-05, "loss": 3.1896, "step": 28000 }, { "epoch": 0.83, "learning_rate": 7.500287324884736e-05, "loss": 3.1911, "step": 28500 }, { "epoch": 0.84, "learning_rate": 7.482356692981116e-05, "loss": 3.1367, "step": 29000 }, { "epoch": 0.86, "learning_rate": 7.464132233647945e-05, "loss": 3.1416, "step": 29500 }, { "epoch": 0.87, "learning_rate": 7.445615484592736e-05, "loss": 3.1682, "step": 30000 } ], "logging_steps": 500, "max_steps": 172005, "num_train_epochs": 5, "save_steps": 5000, "total_flos": 1.02801736728576e+18, "trial_name": null, "trial_params": null }