{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.34569480334286873, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9956788149582146e-05, "loss": 2.3181, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.991357629916428e-05, "loss": 2.2625, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.9870364448746425e-05, "loss": 2.1859, "step": 1500 }, { "epoch": 0.02, "learning_rate": 4.982715259832857e-05, "loss": 2.1855, "step": 2000 }, { "epoch": 0.02, "learning_rate": 4.9783940747910704e-05, "loss": 2.1539, "step": 2500 }, { "epoch": 0.03, "learning_rate": 4.974072889749285e-05, "loss": 2.1454, "step": 3000 }, { "epoch": 0.03, "learning_rate": 4.969751704707499e-05, "loss": 2.1311, "step": 3500 }, { "epoch": 0.03, "learning_rate": 4.9654305196657134e-05, "loss": 2.1232, "step": 4000 }, { "epoch": 0.04, "learning_rate": 4.961109334623927e-05, "loss": 2.0882, "step": 4500 }, { "epoch": 0.04, "learning_rate": 4.956788149582141e-05, "loss": 2.1255, "step": 5000 }, { "epoch": 0.05, "learning_rate": 4.9524669645403556e-05, "loss": 2.1109, "step": 5500 }, { "epoch": 0.05, "learning_rate": 4.94814577949857e-05, "loss": 2.0932, "step": 6000 }, { "epoch": 0.06, "learning_rate": 4.943824594456784e-05, "loss": 2.0806, "step": 6500 }, { "epoch": 0.06, "learning_rate": 4.9395034094149986e-05, "loss": 2.108, "step": 7000 }, { "epoch": 0.06, "learning_rate": 4.935182224373212e-05, "loss": 2.1137, "step": 7500 }, { "epoch": 0.07, "learning_rate": 4.9308610393314265e-05, "loss": 2.0663, "step": 8000 }, { "epoch": 0.07, "learning_rate": 4.926539854289641e-05, "loss": 2.0729, "step": 8500 }, { "epoch": 0.08, "learning_rate": 4.922218669247855e-05, "loss": 2.0571, "step": 9000 }, { "epoch": 0.08, "learning_rate": 4.917897484206069e-05, "loss": 2.1231, "step": 9500 }, { "epoch": 0.09, "learning_rate": 4.913576299164283e-05, "loss": 2.0631, "step": 10000 }, { "epoch": 0.09, "learning_rate": 4.9092551141224974e-05, "loss": 2.0889, "step": 10500 }, { "epoch": 0.1, "learning_rate": 4.904933929080711e-05, "loss": 2.0952, "step": 11000 }, { "epoch": 0.1, "learning_rate": 4.9006127440389254e-05, "loss": 2.0885, "step": 11500 }, { "epoch": 0.1, "learning_rate": 4.89629155899714e-05, "loss": 2.0642, "step": 12000 }, { "epoch": 0.11, "learning_rate": 4.891970373955353e-05, "loss": 2.0385, "step": 12500 }, { "epoch": 0.11, "learning_rate": 4.8876491889135676e-05, "loss": 2.0664, "step": 13000 }, { "epoch": 0.12, "learning_rate": 4.883328003871782e-05, "loss": 2.0678, "step": 13500 }, { "epoch": 0.12, "learning_rate": 4.879006818829996e-05, "loss": 2.0487, "step": 14000 }, { "epoch": 0.13, "learning_rate": 4.87468563378821e-05, "loss": 2.0119, "step": 14500 }, { "epoch": 0.13, "learning_rate": 4.870364448746424e-05, "loss": 2.0541, "step": 15000 }, { "epoch": 0.13, "learning_rate": 4.8660432637046385e-05, "loss": 2.0105, "step": 15500 }, { "epoch": 0.14, "learning_rate": 4.861722078662853e-05, "loss": 2.0374, "step": 16000 }, { "epoch": 0.14, "learning_rate": 4.857400893621067e-05, "loss": 2.0205, "step": 16500 }, { "epoch": 0.15, "learning_rate": 4.8530797085792814e-05, "loss": 2.0243, "step": 17000 }, { "epoch": 0.15, "learning_rate": 4.848758523537495e-05, "loss": 2.0473, "step": 17500 }, { "epoch": 0.16, "learning_rate": 4.8444373384957094e-05, "loss": 2.0331, "step": 18000 }, { "epoch": 0.16, "learning_rate": 4.840116153453924e-05, "loss": 2.0622, "step": 18500 }, { "epoch": 0.16, "learning_rate": 4.835794968412137e-05, "loss": 2.0277, "step": 19000 }, { "epoch": 0.17, "learning_rate": 4.8314737833703516e-05, "loss": 2.0355, "step": 19500 }, { "epoch": 0.17, "learning_rate": 4.827152598328566e-05, "loss": 2.0415, "step": 20000 }, { "epoch": 0.18, "learning_rate": 4.82283141328678e-05, "loss": 2.0355, "step": 20500 }, { "epoch": 0.18, "learning_rate": 4.818510228244994e-05, "loss": 1.9982, "step": 21000 }, { "epoch": 0.19, "learning_rate": 4.814189043203208e-05, "loss": 2.0237, "step": 21500 }, { "epoch": 0.19, "learning_rate": 4.8098678581614225e-05, "loss": 2.0248, "step": 22000 }, { "epoch": 0.19, "learning_rate": 4.805546673119636e-05, "loss": 2.0038, "step": 22500 }, { "epoch": 0.2, "learning_rate": 4.8012254880778505e-05, "loss": 2.0252, "step": 23000 }, { "epoch": 0.2, "learning_rate": 4.796904303036065e-05, "loss": 1.9888, "step": 23500 }, { "epoch": 0.21, "learning_rate": 4.7925831179942784e-05, "loss": 2.0121, "step": 24000 }, { "epoch": 0.21, "learning_rate": 4.788261932952493e-05, "loss": 1.9794, "step": 24500 }, { "epoch": 0.22, "learning_rate": 4.783940747910707e-05, "loss": 1.9893, "step": 25000 }, { "epoch": 0.22, "learning_rate": 4.7796195628689213e-05, "loss": 1.9933, "step": 25500 }, { "epoch": 0.22, "learning_rate": 4.7752983778271357e-05, "loss": 1.9897, "step": 26000 }, { "epoch": 0.23, "learning_rate": 4.77097719278535e-05, "loss": 2.0199, "step": 26500 }, { "epoch": 0.23, "learning_rate": 4.766656007743564e-05, "loss": 2.0092, "step": 27000 }, { "epoch": 0.24, "learning_rate": 4.762334822701778e-05, "loss": 1.9775, "step": 27500 }, { "epoch": 0.24, "learning_rate": 4.758013637659992e-05, "loss": 1.9796, "step": 28000 }, { "epoch": 0.25, "learning_rate": 4.7536924526182065e-05, "loss": 1.9855, "step": 28500 }, { "epoch": 0.25, "learning_rate": 4.74937126757642e-05, "loss": 2.0539, "step": 29000 }, { "epoch": 0.25, "learning_rate": 4.7450500825346345e-05, "loss": 2.0053, "step": 29500 }, { "epoch": 0.26, "learning_rate": 4.740728897492849e-05, "loss": 1.9682, "step": 30000 }, { "epoch": 0.26, "learning_rate": 4.736407712451063e-05, "loss": 2.0241, "step": 30500 }, { "epoch": 0.27, "learning_rate": 4.732086527409277e-05, "loss": 1.9906, "step": 31000 }, { "epoch": 0.27, "learning_rate": 4.727765342367491e-05, "loss": 1.999, "step": 31500 }, { "epoch": 0.28, "learning_rate": 4.7234441573257054e-05, "loss": 2.0129, "step": 32000 }, { "epoch": 0.28, "learning_rate": 4.719122972283919e-05, "loss": 2.0016, "step": 32500 }, { "epoch": 0.29, "learning_rate": 4.714801787242133e-05, "loss": 1.9669, "step": 33000 }, { "epoch": 0.29, "learning_rate": 4.7104806022003476e-05, "loss": 2.0073, "step": 33500 }, { "epoch": 0.29, "learning_rate": 4.706159417158561e-05, "loss": 1.9432, "step": 34000 }, { "epoch": 0.3, "learning_rate": 4.7018382321167756e-05, "loss": 1.9661, "step": 34500 }, { "epoch": 0.3, "learning_rate": 4.69751704707499e-05, "loss": 1.9904, "step": 35000 }, { "epoch": 0.31, "learning_rate": 4.693195862033204e-05, "loss": 1.9749, "step": 35500 }, { "epoch": 0.31, "learning_rate": 4.6888746769914185e-05, "loss": 1.9963, "step": 36000 }, { "epoch": 0.32, "learning_rate": 4.684553491949633e-05, "loss": 1.9604, "step": 36500 }, { "epoch": 0.32, "learning_rate": 4.680232306907847e-05, "loss": 2.0006, "step": 37000 }, { "epoch": 0.32, "learning_rate": 4.675911121866061e-05, "loss": 1.9637, "step": 37500 }, { "epoch": 0.33, "learning_rate": 4.671589936824275e-05, "loss": 1.9799, "step": 38000 }, { "epoch": 0.33, "learning_rate": 4.6672687517824894e-05, "loss": 1.9626, "step": 38500 }, { "epoch": 0.34, "learning_rate": 4.662947566740703e-05, "loss": 1.9352, "step": 39000 }, { "epoch": 0.34, "learning_rate": 4.6586263816989173e-05, "loss": 1.9919, "step": 39500 }, { "epoch": 0.35, "learning_rate": 4.6543051966571317e-05, "loss": 1.9473, "step": 40000 } ], "max_steps": 578545, "num_train_epochs": 5, "total_flos": 5.842369833311232e+16, "trial_name": null, "trial_params": null }