{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 33774, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 2.9555871380351753e-05, "loss": 0.5739, "step": 500 }, { "epoch": 0.09, "learning_rate": 2.91117427607035e-05, "loss": 0.4701, "step": 1000 }, { "epoch": 0.13, "learning_rate": 2.8667614141055253e-05, "loss": 0.4401, "step": 1500 }, { "epoch": 0.18, "learning_rate": 2.8223485521406998e-05, "loss": 0.4007, "step": 2000 }, { "epoch": 0.22, "learning_rate": 2.777935690175875e-05, "loss": 0.3886, "step": 2500 }, { "epoch": 0.27, "learning_rate": 2.73352282821105e-05, "loss": 0.3668, "step": 3000 }, { "epoch": 0.31, "learning_rate": 2.689109966246225e-05, "loss": 0.3622, "step": 3500 }, { "epoch": 0.36, "learning_rate": 2.6446971042814e-05, "loss": 0.3513, "step": 4000 }, { "epoch": 0.4, "learning_rate": 2.600284242316575e-05, "loss": 0.3233, "step": 4500 }, { "epoch": 0.44, "learning_rate": 2.55587138035175e-05, "loss": 0.3341, "step": 5000 }, { "epoch": 0.49, "learning_rate": 2.5114585183869247e-05, "loss": 0.325, "step": 5500 }, { "epoch": 0.53, "learning_rate": 2.4670456564221e-05, "loss": 0.3164, "step": 6000 }, { "epoch": 0.58, "learning_rate": 2.4226327944572748e-05, "loss": 0.2953, "step": 6500 }, { "epoch": 0.62, "learning_rate": 2.37821993249245e-05, "loss": 0.3033, "step": 7000 }, { "epoch": 0.67, "learning_rate": 2.3338070705276248e-05, "loss": 0.2875, "step": 7500 }, { "epoch": 0.71, "learning_rate": 2.2893942085628e-05, "loss": 0.2848, "step": 8000 }, { "epoch": 0.76, "learning_rate": 2.244981346597975e-05, "loss": 0.27, "step": 8500 }, { "epoch": 0.8, "learning_rate": 2.20056848463315e-05, "loss": 0.2822, "step": 9000 }, { "epoch": 0.84, "learning_rate": 2.1561556226683245e-05, "loss": 0.2595, "step": 9500 }, { "epoch": 0.89, "learning_rate": 2.1117427607034997e-05, "loss": 0.2592, "step": 10000 }, { "epoch": 0.93, "learning_rate": 2.0673298987386746e-05, "loss": 0.2493, "step": 10500 }, { "epoch": 0.98, "learning_rate": 2.0229170367738498e-05, "loss": 0.2574, "step": 11000 }, { "epoch": 1.02, "learning_rate": 1.978504174809025e-05, "loss": 0.2059, "step": 11500 }, { "epoch": 1.07, "learning_rate": 1.9340913128441998e-05, "loss": 0.1537, "step": 12000 }, { "epoch": 1.11, "learning_rate": 1.889678450879375e-05, "loss": 0.1588, "step": 12500 }, { "epoch": 1.15, "learning_rate": 1.8452655889145495e-05, "loss": 0.1487, "step": 13000 }, { "epoch": 1.2, "learning_rate": 1.8008527269497247e-05, "loss": 0.1608, "step": 13500 }, { "epoch": 1.24, "learning_rate": 1.7564398649848995e-05, "loss": 0.1619, "step": 14000 }, { "epoch": 1.29, "learning_rate": 1.7120270030200747e-05, "loss": 0.155, "step": 14500 }, { "epoch": 1.33, "learning_rate": 1.6676141410552495e-05, "loss": 0.1418, "step": 15000 }, { "epoch": 1.38, "learning_rate": 1.6232012790904247e-05, "loss": 0.1558, "step": 15500 }, { "epoch": 1.42, "learning_rate": 1.5787884171255996e-05, "loss": 0.155, "step": 16000 }, { "epoch": 1.47, "learning_rate": 1.5343755551607744e-05, "loss": 0.1522, "step": 16500 }, { "epoch": 1.51, "learning_rate": 1.4899626931959496e-05, "loss": 0.1386, "step": 17000 }, { "epoch": 1.55, "learning_rate": 1.4455498312311245e-05, "loss": 0.1442, "step": 17500 }, { "epoch": 1.6, "learning_rate": 1.4011369692662995e-05, "loss": 0.1382, "step": 18000 }, { "epoch": 1.64, "learning_rate": 1.3567241073014745e-05, "loss": 0.1437, "step": 18500 }, { "epoch": 1.69, "learning_rate": 1.3123112453366497e-05, "loss": 0.1405, "step": 19000 }, { "epoch": 1.73, "learning_rate": 1.2678983833718245e-05, "loss": 0.1357, "step": 19500 }, { "epoch": 1.78, "learning_rate": 1.2234855214069995e-05, "loss": 0.125, "step": 20000 }, { "epoch": 1.82, "learning_rate": 1.1790726594421745e-05, "loss": 0.1376, "step": 20500 }, { "epoch": 1.87, "learning_rate": 1.1346597974773496e-05, "loss": 0.136, "step": 21000 }, { "epoch": 1.91, "learning_rate": 1.0902469355125244e-05, "loss": 0.1251, "step": 21500 }, { "epoch": 1.95, "learning_rate": 1.0458340735476994e-05, "loss": 0.1258, "step": 22000 }, { "epoch": 2.0, "learning_rate": 1.0014212115828744e-05, "loss": 0.1303, "step": 22500 }, { "epoch": 2.04, "learning_rate": 9.570083496180493e-06, "loss": 0.0792, "step": 23000 }, { "epoch": 2.09, "learning_rate": 9.125954876532243e-06, "loss": 0.0794, "step": 23500 }, { "epoch": 2.13, "learning_rate": 8.681826256883993e-06, "loss": 0.0781, "step": 24000 }, { "epoch": 2.18, "learning_rate": 8.237697637235745e-06, "loss": 0.0777, "step": 24500 }, { "epoch": 2.22, "learning_rate": 7.793569017587494e-06, "loss": 0.077, "step": 25000 }, { "epoch": 2.27, "learning_rate": 7.349440397939243e-06, "loss": 0.0792, "step": 25500 }, { "epoch": 2.31, "learning_rate": 6.905311778290994e-06, "loss": 0.0774, "step": 26000 }, { "epoch": 2.35, "learning_rate": 6.461183158642743e-06, "loss": 0.0762, "step": 26500 }, { "epoch": 2.4, "learning_rate": 6.017054538994493e-06, "loss": 0.0744, "step": 27000 }, { "epoch": 2.44, "learning_rate": 5.572925919346243e-06, "loss": 0.0725, "step": 27500 }, { "epoch": 2.49, "learning_rate": 5.128797299697992e-06, "loss": 0.076, "step": 28000 }, { "epoch": 2.53, "learning_rate": 4.684668680049743e-06, "loss": 0.0682, "step": 28500 }, { "epoch": 2.58, "learning_rate": 4.240540060401492e-06, "loss": 0.074, "step": 29000 }, { "epoch": 2.62, "learning_rate": 3.7964114407532424e-06, "loss": 0.0738, "step": 29500 }, { "epoch": 2.66, "learning_rate": 3.3522828211049918e-06, "loss": 0.0678, "step": 30000 }, { "epoch": 2.71, "learning_rate": 2.908154201456742e-06, "loss": 0.0668, "step": 30500 }, { "epoch": 2.75, "learning_rate": 2.4640255818084917e-06, "loss": 0.0685, "step": 31000 }, { "epoch": 2.8, "learning_rate": 2.0198969621602414e-06, "loss": 0.0668, "step": 31500 }, { "epoch": 2.84, "learning_rate": 1.5757683425119916e-06, "loss": 0.068, "step": 32000 }, { "epoch": 2.89, "learning_rate": 1.1316397228637413e-06, "loss": 0.0663, "step": 32500 }, { "epoch": 2.93, "learning_rate": 6.875111032154912e-07, "loss": 0.0629, "step": 33000 }, { "epoch": 2.98, "learning_rate": 2.4338248356724103e-07, "loss": 0.0676, "step": 33500 }, { "epoch": 3.0, "step": 33774, "train_runtime": 1945.6382, "train_samples_per_second": 17.359 } ], "max_steps": 33774, "num_train_epochs": 3, "total_flos": 52036968346583040, "trial_name": null, "trial_params": null }