{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.47945205479452, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 0.0, "loss": 1.7614, "step": 1 }, { "epoch": 0.22, "learning_rate": 3.3333333333333333e-06, "loss": 1.7635, "step": 2 }, { "epoch": 0.33, "learning_rate": 6.666666666666667e-06, "loss": 1.8003, "step": 3 }, { "epoch": 0.44, "learning_rate": 1e-05, "loss": 1.5796, "step": 4 }, { "epoch": 0.55, "learning_rate": 1.3333333333333333e-05, "loss": 1.2226, "step": 5 }, { "epoch": 0.66, "learning_rate": 1.6666666666666667e-05, "loss": 1.0885, "step": 6 }, { "epoch": 0.77, "learning_rate": 2e-05, "loss": 0.9511, "step": 7 }, { "epoch": 0.88, "learning_rate": 1.9998370105646414e-05, "loss": 0.8972, "step": 8 }, { "epoch": 0.99, "learning_rate": 1.999348095389677e-05, "loss": 0.8481, "step": 9 }, { "epoch": 1.1, "learning_rate": 1.998533413851124e-05, "loss": 0.72, "step": 10 }, { "epoch": 1.21, "learning_rate": 1.9973932315179502e-05, "loss": 0.6775, "step": 11 }, { "epoch": 1.32, "learning_rate": 1.9959279200655044e-05, "loss": 0.6507, "step": 12 }, { "epoch": 1.42, "learning_rate": 1.9941379571543597e-05, "loss": 0.6566, "step": 13 }, { "epoch": 1.53, "learning_rate": 1.9920239262746045e-05, "loss": 0.6627, "step": 14 }, { "epoch": 1.64, "learning_rate": 1.9895865165556375e-05, "loss": 0.6135, "step": 15 }, { "epoch": 1.75, "learning_rate": 1.9868265225415263e-05, "loss": 0.6025, "step": 16 }, { "epoch": 1.86, "learning_rate": 1.9837448439320027e-05, "loss": 0.6242, "step": 17 }, { "epoch": 1.97, "learning_rate": 1.9803424852891803e-05, "loss": 0.5891, "step": 18 }, { "epoch": 2.08, "learning_rate": 1.976620555710087e-05, "loss": 0.4496, "step": 19 }, { "epoch": 2.19, "learning_rate": 1.9725802684651235e-05, "loss": 0.3549, "step": 20 }, { "epoch": 2.3, "learning_rate": 1.9682229406025635e-05, "loss": 0.3397, "step": 21 }, { "epoch": 2.41, "learning_rate": 1.963549992519223e-05, "loss": 0.3178, "step": 22 }, { "epoch": 2.52, "learning_rate": 1.9585629474974413e-05, "loss": 0.3091, "step": 23 }, { "epoch": 2.63, "learning_rate": 1.953263431208523e-05, "loss": 0.3234, "step": 24 }, { "epoch": 2.74, "learning_rate": 1.9476531711828027e-05, "loss": 0.3159, "step": 25 }, { "epoch": 2.85, "learning_rate": 1.9417339962465084e-05, "loss": 0.3129, "step": 26 }, { "epoch": 2.96, "learning_rate": 1.935507835925601e-05, "loss": 0.3669, "step": 27 }, { "epoch": 3.07, "learning_rate": 1.9289767198167918e-05, "loss": 0.2294, "step": 28 }, { "epoch": 3.18, "learning_rate": 1.9221427769259333e-05, "loss": 0.1704, "step": 29 }, { "epoch": 3.29, "learning_rate": 1.9150082349740123e-05, "loss": 0.1899, "step": 30 }, { "epoch": 3.4, "learning_rate": 1.9075754196709574e-05, "loss": 0.1905, "step": 31 }, { "epoch": 3.51, "learning_rate": 1.899846753957507e-05, "loss": 0.1895, "step": 32 }, { "epoch": 3.62, "learning_rate": 1.8918247572153822e-05, "loss": 0.1743, "step": 33 }, { "epoch": 3.73, "learning_rate": 1.883512044446023e-05, "loss": 0.1939, "step": 34 }, { "epoch": 3.84, "learning_rate": 1.8749113254181498e-05, "loss": 0.1848, "step": 35 }, { "epoch": 3.95, "learning_rate": 1.866025403784439e-05, "loss": 0.2112, "step": 36 }, { "epoch": 4.05, "learning_rate": 1.8568571761675893e-05, "loss": 0.15, "step": 37 }, { "epoch": 4.16, "learning_rate": 1.8474096312160866e-05, "loss": 0.1066, "step": 38 }, { "epoch": 4.27, "learning_rate": 1.837685848629965e-05, "loss": 0.1108, "step": 39 }, { "epoch": 4.38, "learning_rate": 1.827688998156891e-05, "loss": 0.1048, "step": 40 }, { "epoch": 4.49, "learning_rate": 1.817422338558892e-05, "loss": 0.1231, "step": 41 }, { "epoch": 4.6, "learning_rate": 1.8068892165500704e-05, "loss": 0.1145, "step": 42 }, { "epoch": 4.71, "learning_rate": 1.796093065705644e-05, "loss": 0.1314, "step": 43 }, { "epoch": 4.82, "learning_rate": 1.7850374053426725e-05, "loss": 0.1281, "step": 44 }, { "epoch": 4.93, "learning_rate": 1.7737258393728363e-05, "loss": 0.1389, "step": 45 }, { "epoch": 5.04, "learning_rate": 1.7621620551276366e-05, "loss": 0.0999, "step": 46 }, { "epoch": 5.15, "learning_rate": 1.7503498221564026e-05, "loss": 0.0716, "step": 47 }, { "epoch": 5.26, "learning_rate": 1.7382929909974988e-05, "loss": 0.088, "step": 48 }, { "epoch": 5.37, "learning_rate": 1.725995491923131e-05, "loss": 0.0809, "step": 49 }, { "epoch": 5.48, "learning_rate": 1.7134613336581602e-05, "loss": 0.0747, "step": 50 } ], "max_steps": 180, "num_train_epochs": 20, "total_flos": 1.32068633739264e+17, "trial_name": null, "trial_params": null }