{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.8219074598677998, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.0004, "loss": 2.8174, "step": 2 }, { "epoch": 0.05, "learning_rate": 0.0008, "loss": 2.8801, "step": 4 }, { "epoch": 0.07, "learning_rate": 0.0009999575185316995, "loss": 2.8676, "step": 6 }, { "epoch": 0.1, "learning_rate": 0.0009996177100962712, "loss": 2.8733, "step": 8 }, { "epoch": 0.12, "learning_rate": 0.000998938324184584, "loss": 2.9105, "step": 10 }, { "epoch": 0.15, "learning_rate": 0.0009979198225579969, "loss": 2.9035, "step": 12 }, { "epoch": 0.17, "learning_rate": 0.0009965628974662145, "loss": 2.8884, "step": 14 }, { "epoch": 0.19, "learning_rate": 0.00099486847117678, "loss": 2.879, "step": 16 }, { "epoch": 0.22, "learning_rate": 0.0009928376953482344, "loss": 2.9521, "step": 18 }, { "epoch": 0.24, "learning_rate": 0.0009904719502473634, "loss": 2.9526, "step": 20 }, { "epoch": 0.27, "learning_rate": 0.0009877728438110645, "loss": 2.9211, "step": 22 }, { "epoch": 0.29, "learning_rate": 0.0009847422105534738, "loss": 2.8804, "step": 24 }, { "epoch": 0.31, "learning_rate": 0.000981382110319093, "loss": 2.9045, "step": 26 }, { "epoch": 0.34, "learning_rate": 0.0009776948268827657, "loss": 2.9369, "step": 28 }, { "epoch": 0.36, "learning_rate": 0.0009736828663974526, "loss": 2.9049, "step": 30 }, { "epoch": 0.39, "learning_rate": 0.0009693489556908641, "loss": 2.9069, "step": 32 }, { "epoch": 0.41, "learning_rate": 0.0009646960404121041, "loss": 2.9257, "step": 34 }, { "epoch": 0.44, "learning_rate": 0.0009597272830295876, "loss": 2.8909, "step": 36 }, { "epoch": 0.46, "learning_rate": 0.00095444606068159, "loss": 2.9512, "step": 38 }, { "epoch": 0.48, "learning_rate": 0.0009488559628808938, "loss": 3.0004, "step": 40 }, { "epoch": 0.51, "learning_rate": 0.0009429607890750862, "loss": 3.1118, "step": 42 }, { "epoch": 0.53, "learning_rate": 0.0009367645460641715, "loss": 3.082, "step": 44 }, { "epoch": 0.56, "learning_rate": 0.0009302714452772515, "loss": 3.0348, "step": 46 }, { "epoch": 0.58, "learning_rate": 0.0009234858999101231, "loss": 3.0918, "step": 48 }, { "epoch": 0.6, "learning_rate": 0.0009164125219257418, "loss": 3.0042, "step": 50 }, { "epoch": 0.63, "learning_rate": 0.000909056118919587, "loss": 2.971, "step": 52 }, { "epoch": 0.65, "learning_rate": 0.0009014216908520619, "loss": 2.9717, "step": 54 }, { "epoch": 0.68, "learning_rate": 0.0008935144266501469, "loss": 3.0012, "step": 56 }, { "epoch": 0.7, "learning_rate": 0.0008853397006806182, "loss": 2.9478, "step": 58 }, { "epoch": 0.73, "learning_rate": 0.0008769030690972262, "loss": 2.9312, "step": 60 }, { "epoch": 0.75, "learning_rate": 0.0008682102660643197, "loss": 2.898, "step": 62 }, { "epoch": 0.77, "learning_rate": 0.0008592671998594795, "loss": 2.8673, "step": 64 }, { "epoch": 0.8, "learning_rate": 0.0008500799488578119, "loss": 2.8984, "step": 66 }, { "epoch": 0.82, "learning_rate": 0.0008406547574006325, "loss": 2.8778, "step": 68 }, { "epoch": 0.85, "learning_rate": 0.0008309980315513443, "loss": 2.8978, "step": 70 }, { "epoch": 0.87, "learning_rate": 0.0008211163347414004, "loss": 2.8657, "step": 72 }, { "epoch": 0.89, "learning_rate": 0.0008110163833093049, "loss": 2.881, "step": 74 }, { "epoch": 0.92, "learning_rate": 0.0008007050419356898, "loss": 2.8425, "step": 76 }, { "epoch": 0.94, "learning_rate": 0.0007901893189775639, "loss": 2.899, "step": 78 }, { "epoch": 0.97, "learning_rate": 0.0007794763617049123, "loss": 2.8116, "step": 80 }, { "epoch": 0.99, "learning_rate": 0.0007685734514428766, "loss": 2.8995, "step": 82 }, { "epoch": 1.02, "learning_rate": 0.0007574879986228245, "loss": 3.874, "step": 84 }, { "epoch": 1.05, "learning_rate": 0.000746227537745667, "loss": 2.7812, "step": 86 }, { "epoch": 1.07, "learning_rate": 0.0007347997222608493, "loss": 2.7706, "step": 88 }, { "epoch": 1.1, "learning_rate": 0.0007232123193644956, "loss": 2.727, "step": 90 }, { "epoch": 1.12, "learning_rate": 0.0007114732047202433, "loss": 2.7466, "step": 92 }, { "epoch": 1.15, "learning_rate": 0.000699590357106354, "loss": 2.7615, "step": 94 }, { "epoch": 1.17, "learning_rate": 0.0006875718529927403, "loss": 2.7312, "step": 96 }, { "epoch": 1.19, "learning_rate": 0.0006754258610515948, "loss": 2.7192, "step": 98 }, { "epoch": 1.22, "learning_rate": 0.0006631606366053507, "loss": 2.7178, "step": 100 }, { "epoch": 1.24, "learning_rate": 0.0006507845160157477, "loss": 2.7426, "step": 102 }, { "epoch": 1.27, "learning_rate": 0.0006383059110178205, "loss": 2.78, "step": 104 }, { "epoch": 1.29, "learning_rate": 0.0006257333030026537, "loss": 2.801, "step": 106 }, { "epoch": 1.31, "learning_rate": 0.0006130752372527982, "loss": 2.8101, "step": 108 }, { "epoch": 1.34, "learning_rate": 0.0006003403171342563, "loss": 2.8212, "step": 110 }, { "epoch": 1.36, "learning_rate": 0.0005875371982489958, "loss": 2.8073, "step": 112 }, { "epoch": 1.39, "learning_rate": 0.0005746745825519538, "loss": 2.7252, "step": 114 }, { "epoch": 1.41, "learning_rate": 0.0005617612124365409, "loss": 2.783, "step": 116 }, { "epoch": 1.44, "learning_rate": 0.0005488058647926577, "loss": 2.7034, "step": 118 }, { "epoch": 1.46, "learning_rate": 0.0005358173450412649, "loss": 2.7864, "step": 120 }, { "epoch": 1.48, "learning_rate": 0.0005228044811495631, "loss": 2.7136, "step": 122 }, { "epoch": 1.51, "learning_rate": 0.0005097761176308471, "loss": 2.762, "step": 124 }, { "epoch": 1.53, "learning_rate": 0.0004967411095331149, "loss": 2.7633, "step": 126 }, { "epoch": 1.56, "learning_rate": 0.0004837083164205159, "loss": 2.7863, "step": 128 }, { "epoch": 1.58, "learning_rate": 0.00047068659635173026, "loss": 2.7523, "step": 130 }, { "epoch": 1.6, "learning_rate": 0.00045768479985937196, "loss": 2.6834, "step": 132 }, { "epoch": 1.63, "learning_rate": 0.0004447117639345052, "loss": 2.7107, "step": 134 }, { "epoch": 1.65, "learning_rate": 0.0004317763060203664, "loss": 2.7554, "step": 136 }, { "epoch": 1.68, "learning_rate": 0.0004188872180193723, "loss": 2.7709, "step": 138 }, { "epoch": 1.7, "learning_rate": 0.00040605326031748645, "loss": 2.7199, "step": 140 }, { "epoch": 1.73, "learning_rate": 0.0003932831558300074, "loss": 2.702, "step": 142 }, { "epoch": 1.75, "learning_rate": 0.0003805855840728246, "loss": 2.7447, "step": 144 }, { "epoch": 1.77, "learning_rate": 0.00036796917526317154, "loss": 2.7329, "step": 146 }, { "epoch": 1.8, "learning_rate": 0.0003554425044538868, "loss": 2.7188, "step": 148 }, { "epoch": 1.82, "learning_rate": 0.00034301408570516746, "loss": 2.7646, "step": 150 } ], "max_steps": 246, "num_train_epochs": 3, "total_flos": 1.4229744023408476e+18, "trial_name": null, "trial_params": null }