{ "best_metric": 0.0171243567019701, "best_model_checkpoint": "D:\\models/outputsstar/checkpoint-1350", "epoch": 5.0, "eval_steps": 500, "global_step": 1350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.037037037037037035, "grad_norm": 1.6629393100738525, "learning_rate": 1.9851851851851855e-05, "loss": 0.5627, "step": 10 }, { "epoch": 0.07407407407407407, "grad_norm": 1.3468966484069824, "learning_rate": 1.9703703703703704e-05, "loss": 0.3602, "step": 20 }, { "epoch": 0.1111111111111111, "grad_norm": 1.9818885326385498, "learning_rate": 1.9555555555555557e-05, "loss": 0.2409, "step": 30 }, { "epoch": 0.14814814814814814, "grad_norm": 0.6068008542060852, "learning_rate": 1.9407407407407407e-05, "loss": 0.1136, "step": 40 }, { "epoch": 0.18518518518518517, "grad_norm": 0.5792471170425415, "learning_rate": 1.925925925925926e-05, "loss": 0.0912, "step": 50 }, { "epoch": 0.2222222222222222, "grad_norm": 0.36238569021224976, "learning_rate": 1.9111111111111113e-05, "loss": 0.0793, "step": 60 }, { "epoch": 0.25925925925925924, "grad_norm": 0.40632012486457825, "learning_rate": 1.8962962962962966e-05, "loss": 0.0783, "step": 70 }, { "epoch": 0.2962962962962963, "grad_norm": 0.3358970582485199, "learning_rate": 1.8814814814814816e-05, "loss": 0.0976, "step": 80 }, { "epoch": 0.3333333333333333, "grad_norm": 0.22383123636245728, "learning_rate": 1.866666666666667e-05, "loss": 0.0453, "step": 90 }, { "epoch": 0.37037037037037035, "grad_norm": 0.17972876131534576, "learning_rate": 1.851851851851852e-05, "loss": 0.0435, "step": 100 }, { "epoch": 0.4074074074074074, "grad_norm": 0.17635494470596313, "learning_rate": 1.837037037037037e-05, "loss": 0.0255, "step": 110 }, { "epoch": 0.4444444444444444, "grad_norm": 2.2137696743011475, "learning_rate": 1.8222222222222224e-05, "loss": 0.0251, "step": 120 }, { "epoch": 0.48148148148148145, "grad_norm": 0.1810089647769928, "learning_rate": 1.8074074074074074e-05, "loss": 0.0588, "step": 130 }, { "epoch": 0.5185185185185185, "grad_norm": 0.1574196070432663, "learning_rate": 1.7925925925925927e-05, "loss": 0.043, "step": 140 }, { "epoch": 0.5555555555555556, "grad_norm": 0.13450497388839722, "learning_rate": 1.7777777777777777e-05, "loss": 0.0647, "step": 150 }, { "epoch": 0.5925925925925926, "grad_norm": 0.14055782556533813, "learning_rate": 1.7629629629629633e-05, "loss": 0.025, "step": 160 }, { "epoch": 0.6296296296296297, "grad_norm": 2.997166395187378, "learning_rate": 1.7481481481481483e-05, "loss": 0.0926, "step": 170 }, { "epoch": 0.6666666666666666, "grad_norm": 0.12140078842639923, "learning_rate": 1.7333333333333336e-05, "loss": 0.015, "step": 180 }, { "epoch": 0.7037037037037037, "grad_norm": 0.11859618127346039, "learning_rate": 1.7185185185185185e-05, "loss": 0.058, "step": 190 }, { "epoch": 0.7407407407407407, "grad_norm": 0.09671270847320557, "learning_rate": 1.7037037037037038e-05, "loss": 0.0127, "step": 200 }, { "epoch": 0.7777777777777778, "grad_norm": 0.100140281021595, "learning_rate": 1.688888888888889e-05, "loss": 0.0384, "step": 210 }, { "epoch": 0.8148148148148148, "grad_norm": 0.10083166509866714, "learning_rate": 1.674074074074074e-05, "loss": 0.0121, "step": 220 }, { "epoch": 0.8518518518518519, "grad_norm": 1.3739676475524902, "learning_rate": 1.6592592592592594e-05, "loss": 0.089, "step": 230 }, { "epoch": 0.8888888888888888, "grad_norm": 0.10838634520769119, "learning_rate": 1.6444444444444444e-05, "loss": 0.1098, "step": 240 }, { "epoch": 0.9259259259259259, "grad_norm": 0.1810838133096695, "learning_rate": 1.6296296296296297e-05, "loss": 0.0287, "step": 250 }, { "epoch": 0.9629629629629629, "grad_norm": 0.11047308892011642, "learning_rate": 1.614814814814815e-05, "loss": 0.0266, "step": 260 }, { "epoch": 1.0, "grad_norm": 2.2907960414886475, "learning_rate": 1.6000000000000003e-05, "loss": 0.0394, "step": 270 }, { "epoch": 1.0, "eval_accuracy": 0.989501312335958, "eval_loss": 0.04065331816673279, "eval_runtime": 52.3322, "eval_samples_per_second": 7.28, "eval_steps_per_second": 0.917, "step": 270 }, { "epoch": 1.037037037037037, "grad_norm": 0.08815015852451324, "learning_rate": 1.5851851851851852e-05, "loss": 0.0255, "step": 280 }, { "epoch": 1.074074074074074, "grad_norm": 0.09346853941679001, "learning_rate": 1.5703703703703705e-05, "loss": 0.0107, "step": 290 }, { "epoch": 1.1111111111111112, "grad_norm": 0.07555428147315979, "learning_rate": 1.555555555555556e-05, "loss": 0.0188, "step": 300 }, { "epoch": 1.1481481481481481, "grad_norm": 0.08554250746965408, "learning_rate": 1.5407407407407408e-05, "loss": 0.056, "step": 310 }, { "epoch": 1.1851851851851851, "grad_norm": 0.07133428752422333, "learning_rate": 1.525925925925926e-05, "loss": 0.0376, "step": 320 }, { "epoch": 1.2222222222222223, "grad_norm": 0.07578225433826447, "learning_rate": 1.5111111111111112e-05, "loss": 0.0115, "step": 330 }, { "epoch": 1.2592592592592593, "grad_norm": 0.07077538967132568, "learning_rate": 1.4962962962962964e-05, "loss": 0.0091, "step": 340 }, { "epoch": 1.2962962962962963, "grad_norm": 0.06836975365877151, "learning_rate": 1.4814814814814815e-05, "loss": 0.0089, "step": 350 }, { "epoch": 1.3333333333333333, "grad_norm": 23.20952606201172, "learning_rate": 1.4666666666666666e-05, "loss": 0.0995, "step": 360 }, { "epoch": 1.3703703703703702, "grad_norm": 0.07452990114688873, "learning_rate": 1.4518518518518521e-05, "loss": 0.0089, "step": 370 }, { "epoch": 1.4074074074074074, "grad_norm": 0.07127617299556732, "learning_rate": 1.4370370370370372e-05, "loss": 0.0104, "step": 380 }, { "epoch": 1.4444444444444444, "grad_norm": 0.06184624508023262, "learning_rate": 1.4222222222222224e-05, "loss": 0.0081, "step": 390 }, { "epoch": 1.4814814814814814, "grad_norm": 0.08141285926103592, "learning_rate": 1.4074074074074075e-05, "loss": 0.0104, "step": 400 }, { "epoch": 1.5185185185185186, "grad_norm": 0.06248854473233223, "learning_rate": 1.3925925925925928e-05, "loss": 0.095, "step": 410 }, { "epoch": 1.5555555555555556, "grad_norm": 0.07375632971525192, "learning_rate": 1.377777777777778e-05, "loss": 0.0918, "step": 420 }, { "epoch": 1.5925925925925926, "grad_norm": 0.06345756351947784, "learning_rate": 1.362962962962963e-05, "loss": 0.0077, "step": 430 }, { "epoch": 1.6296296296296298, "grad_norm": 0.06055911257863045, "learning_rate": 1.3481481481481482e-05, "loss": 0.0237, "step": 440 }, { "epoch": 1.6666666666666665, "grad_norm": 0.05871434509754181, "learning_rate": 1.3333333333333333e-05, "loss": 0.0072, "step": 450 }, { "epoch": 1.7037037037037037, "grad_norm": 0.0945742130279541, "learning_rate": 1.3185185185185185e-05, "loss": 0.0674, "step": 460 }, { "epoch": 1.7407407407407407, "grad_norm": 0.0636238381266594, "learning_rate": 1.303703703703704e-05, "loss": 0.0623, "step": 470 }, { "epoch": 1.7777777777777777, "grad_norm": 0.08100485056638718, "learning_rate": 1.288888888888889e-05, "loss": 0.0103, "step": 480 }, { "epoch": 1.8148148148148149, "grad_norm": 0.06018243730068207, "learning_rate": 1.2740740740740742e-05, "loss": 0.0072, "step": 490 }, { "epoch": 1.8518518518518519, "grad_norm": 0.06805320084095001, "learning_rate": 1.2592592592592593e-05, "loss": 0.0602, "step": 500 }, { "epoch": 1.8888888888888888, "grad_norm": 9.21893310546875, "learning_rate": 1.2444444444444446e-05, "loss": 0.0842, "step": 510 }, { "epoch": 1.925925925925926, "grad_norm": 0.07045555114746094, "learning_rate": 1.2296296296296298e-05, "loss": 0.1148, "step": 520 }, { "epoch": 1.9629629629629628, "grad_norm": 0.07674137502908707, "learning_rate": 1.2148148148148149e-05, "loss": 0.0094, "step": 530 }, { "epoch": 2.0, "grad_norm": 62.091854095458984, "learning_rate": 1.2e-05, "loss": 0.0973, "step": 540 }, { "epoch": 2.0, "eval_accuracy": 0.984251968503937, "eval_loss": 0.07090622931718826, "eval_runtime": 52.2703, "eval_samples_per_second": 7.289, "eval_steps_per_second": 0.918, "step": 540 }, { "epoch": 2.037037037037037, "grad_norm": 0.05459068343043327, "learning_rate": 1.1851851851851852e-05, "loss": 0.0663, "step": 550 }, { "epoch": 2.074074074074074, "grad_norm": 0.06096257269382477, "learning_rate": 1.1703703703703703e-05, "loss": 0.0113, "step": 560 }, { "epoch": 2.111111111111111, "grad_norm": 0.05534420534968376, "learning_rate": 1.1555555555555556e-05, "loss": 0.0072, "step": 570 }, { "epoch": 2.148148148148148, "grad_norm": 3.7594122886657715, "learning_rate": 1.1407407407407409e-05, "loss": 0.0592, "step": 580 }, { "epoch": 2.185185185185185, "grad_norm": 0.05278675630688667, "learning_rate": 1.125925925925926e-05, "loss": 0.007, "step": 590 }, { "epoch": 2.2222222222222223, "grad_norm": 0.050314392894506454, "learning_rate": 1.1111111111111113e-05, "loss": 0.0081, "step": 600 }, { "epoch": 2.259259259259259, "grad_norm": 0.0523914210498333, "learning_rate": 1.0962962962962965e-05, "loss": 0.0104, "step": 610 }, { "epoch": 2.2962962962962963, "grad_norm": 0.06894738972187042, "learning_rate": 1.0814814814814816e-05, "loss": 0.0071, "step": 620 }, { "epoch": 2.3333333333333335, "grad_norm": 0.05035197734832764, "learning_rate": 1.0666666666666667e-05, "loss": 0.0062, "step": 630 }, { "epoch": 2.3703703703703702, "grad_norm": 0.04814285784959793, "learning_rate": 1.0518518518518519e-05, "loss": 0.1157, "step": 640 }, { "epoch": 2.4074074074074074, "grad_norm": 2.7678489685058594, "learning_rate": 1.037037037037037e-05, "loss": 0.0565, "step": 650 }, { "epoch": 2.4444444444444446, "grad_norm": 3.185380220413208, "learning_rate": 1.0222222222222223e-05, "loss": 0.0508, "step": 660 }, { "epoch": 2.4814814814814814, "grad_norm": 0.05677470192313194, "learning_rate": 1.0074074074074074e-05, "loss": 0.0074, "step": 670 }, { "epoch": 2.5185185185185186, "grad_norm": 0.3886967599391937, "learning_rate": 9.925925925925927e-06, "loss": 0.0129, "step": 680 }, { "epoch": 2.5555555555555554, "grad_norm": 0.0454292856156826, "learning_rate": 9.777777777777779e-06, "loss": 0.0958, "step": 690 }, { "epoch": 2.5925925925925926, "grad_norm": 0.04909510165452957, "learning_rate": 9.62962962962963e-06, "loss": 0.0064, "step": 700 }, { "epoch": 2.6296296296296298, "grad_norm": 0.04817191883921623, "learning_rate": 9.481481481481483e-06, "loss": 0.0192, "step": 710 }, { "epoch": 2.6666666666666665, "grad_norm": 3.5723814964294434, "learning_rate": 9.333333333333334e-06, "loss": 0.2254, "step": 720 }, { "epoch": 2.7037037037037037, "grad_norm": 0.8698053359985352, "learning_rate": 9.185185185185186e-06, "loss": 0.0099, "step": 730 }, { "epoch": 2.7407407407407405, "grad_norm": 3.2689051628112793, "learning_rate": 9.037037037037037e-06, "loss": 0.0534, "step": 740 }, { "epoch": 2.7777777777777777, "grad_norm": 0.1893489956855774, "learning_rate": 8.888888888888888e-06, "loss": 0.0465, "step": 750 }, { "epoch": 2.814814814814815, "grad_norm": 0.05454478785395622, "learning_rate": 8.740740740740741e-06, "loss": 0.0106, "step": 760 }, { "epoch": 2.851851851851852, "grad_norm": 0.0487472228705883, "learning_rate": 8.592592592592593e-06, "loss": 0.0914, "step": 770 }, { "epoch": 2.888888888888889, "grad_norm": 0.05404801294207573, "learning_rate": 8.444444444444446e-06, "loss": 0.0168, "step": 780 }, { "epoch": 2.925925925925926, "grad_norm": 0.043936073780059814, "learning_rate": 8.296296296296297e-06, "loss": 0.0079, "step": 790 }, { "epoch": 2.962962962962963, "grad_norm": 0.05625942721962929, "learning_rate": 8.148148148148148e-06, "loss": 0.0148, "step": 800 }, { "epoch": 3.0, "grad_norm": 0.04617544263601303, "learning_rate": 8.000000000000001e-06, "loss": 0.0057, "step": 810 }, { "epoch": 3.0, "eval_accuracy": 0.9868766404199475, "eval_loss": 0.04252076894044876, "eval_runtime": 52.5906, "eval_samples_per_second": 7.245, "eval_steps_per_second": 0.913, "step": 810 }, { "epoch": 3.037037037037037, "grad_norm": 0.5136130452156067, "learning_rate": 7.851851851851853e-06, "loss": 0.0257, "step": 820 }, { "epoch": 3.074074074074074, "grad_norm": 2.9422848224639893, "learning_rate": 7.703703703703704e-06, "loss": 0.0544, "step": 830 }, { "epoch": 3.111111111111111, "grad_norm": 0.0419733040034771, "learning_rate": 7.555555555555556e-06, "loss": 0.0073, "step": 840 }, { "epoch": 3.148148148148148, "grad_norm": 0.052699312567710876, "learning_rate": 7.4074074074074075e-06, "loss": 0.0068, "step": 850 }, { "epoch": 3.185185185185185, "grad_norm": 0.04446011409163475, "learning_rate": 7.2592592592592605e-06, "loss": 0.0086, "step": 860 }, { "epoch": 3.2222222222222223, "grad_norm": 0.04285755380988121, "learning_rate": 7.111111111111112e-06, "loss": 0.0063, "step": 870 }, { "epoch": 3.259259259259259, "grad_norm": 0.04666002467274666, "learning_rate": 6.962962962962964e-06, "loss": 0.0586, "step": 880 }, { "epoch": 3.2962962962962963, "grad_norm": 0.04253947734832764, "learning_rate": 6.814814814814815e-06, "loss": 0.0561, "step": 890 }, { "epoch": 3.3333333333333335, "grad_norm": 0.04394235461950302, "learning_rate": 6.666666666666667e-06, "loss": 0.0059, "step": 900 }, { "epoch": 3.3703703703703702, "grad_norm": 0.04241425171494484, "learning_rate": 6.51851851851852e-06, "loss": 0.0056, "step": 910 }, { "epoch": 3.4074074074074074, "grad_norm": 0.08582015335559845, "learning_rate": 6.370370370370371e-06, "loss": 0.0067, "step": 920 }, { "epoch": 3.4444444444444446, "grad_norm": 0.03971382975578308, "learning_rate": 6.222222222222223e-06, "loss": 0.0393, "step": 930 }, { "epoch": 3.4814814814814814, "grad_norm": 0.04343000799417496, "learning_rate": 6.0740740740740745e-06, "loss": 0.0056, "step": 940 }, { "epoch": 3.5185185185185186, "grad_norm": 0.04006602242588997, "learning_rate": 5.925925925925926e-06, "loss": 0.0533, "step": 950 }, { "epoch": 3.5555555555555554, "grad_norm": 0.040533244609832764, "learning_rate": 5.777777777777778e-06, "loss": 0.0277, "step": 960 }, { "epoch": 3.5925925925925926, "grad_norm": 0.0381636768579483, "learning_rate": 5.62962962962963e-06, "loss": 0.022, "step": 970 }, { "epoch": 3.6296296296296298, "grad_norm": 0.0465327687561512, "learning_rate": 5.481481481481482e-06, "loss": 0.0086, "step": 980 }, { "epoch": 3.6666666666666665, "grad_norm": 0.039545219391584396, "learning_rate": 5.333333333333334e-06, "loss": 0.0053, "step": 990 }, { "epoch": 3.7037037037037037, "grad_norm": 0.038673460483551025, "learning_rate": 5.185185185185185e-06, "loss": 0.0787, "step": 1000 }, { "epoch": 3.7407407407407405, "grad_norm": 0.04055389016866684, "learning_rate": 5.037037037037037e-06, "loss": 0.0062, "step": 1010 }, { "epoch": 3.7777777777777777, "grad_norm": 9.543475151062012, "learning_rate": 4.888888888888889e-06, "loss": 0.044, "step": 1020 }, { "epoch": 3.814814814814815, "grad_norm": 0.050516992807388306, "learning_rate": 4.7407407407407415e-06, "loss": 0.0531, "step": 1030 }, { "epoch": 3.851851851851852, "grad_norm": 0.040959686040878296, "learning_rate": 4.592592592592593e-06, "loss": 0.0054, "step": 1040 }, { "epoch": 3.888888888888889, "grad_norm": 0.2083148956298828, "learning_rate": 4.444444444444444e-06, "loss": 0.0055, "step": 1050 }, { "epoch": 3.925925925925926, "grad_norm": 0.04079532250761986, "learning_rate": 4.296296296296296e-06, "loss": 0.0758, "step": 1060 }, { "epoch": 3.962962962962963, "grad_norm": 3.1428794860839844, "learning_rate": 4.1481481481481485e-06, "loss": 0.1065, "step": 1070 }, { "epoch": 4.0, "grad_norm": 0.04060327261686325, "learning_rate": 4.000000000000001e-06, "loss": 0.0403, "step": 1080 }, { "epoch": 4.0, "eval_accuracy": 0.9868766404199475, "eval_loss": 0.04992913454771042, "eval_runtime": 52.4443, "eval_samples_per_second": 7.265, "eval_steps_per_second": 0.915, "step": 1080 }, { "epoch": 4.037037037037037, "grad_norm": 0.037297219038009644, "learning_rate": 3.851851851851852e-06, "loss": 0.005, "step": 1090 }, { "epoch": 4.074074074074074, "grad_norm": 0.040778644382953644, "learning_rate": 3.7037037037037037e-06, "loss": 0.005, "step": 1100 }, { "epoch": 4.111111111111111, "grad_norm": 0.03788766264915466, "learning_rate": 3.555555555555556e-06, "loss": 0.005, "step": 1110 }, { "epoch": 4.148148148148148, "grad_norm": 0.04125545546412468, "learning_rate": 3.4074074074074077e-06, "loss": 0.0047, "step": 1120 }, { "epoch": 4.185185185185185, "grad_norm": 0.03932119160890579, "learning_rate": 3.25925925925926e-06, "loss": 0.0209, "step": 1130 }, { "epoch": 4.222222222222222, "grad_norm": 0.040045417845249176, "learning_rate": 3.1111111111111116e-06, "loss": 0.005, "step": 1140 }, { "epoch": 4.2592592592592595, "grad_norm": 0.04132600873708725, "learning_rate": 2.962962962962963e-06, "loss": 0.0567, "step": 1150 }, { "epoch": 4.296296296296296, "grad_norm": 0.16079649329185486, "learning_rate": 2.814814814814815e-06, "loss": 0.0058, "step": 1160 }, { "epoch": 4.333333333333333, "grad_norm": 0.03831535950303078, "learning_rate": 2.666666666666667e-06, "loss": 0.0473, "step": 1170 }, { "epoch": 4.37037037037037, "grad_norm": 0.03685208782553673, "learning_rate": 2.5185185185185186e-06, "loss": 0.0051, "step": 1180 }, { "epoch": 4.407407407407407, "grad_norm": 0.08284825831651688, "learning_rate": 2.3703703703703707e-06, "loss": 0.0053, "step": 1190 }, { "epoch": 4.444444444444445, "grad_norm": 0.037671931087970734, "learning_rate": 2.222222222222222e-06, "loss": 0.0048, "step": 1200 }, { "epoch": 4.481481481481482, "grad_norm": 0.048201784491539, "learning_rate": 2.0740740740740742e-06, "loss": 0.0046, "step": 1210 }, { "epoch": 4.518518518518518, "grad_norm": 0.036273613572120667, "learning_rate": 1.925925925925926e-06, "loss": 0.008, "step": 1220 }, { "epoch": 4.555555555555555, "grad_norm": 0.03639749437570572, "learning_rate": 1.777777777777778e-06, "loss": 0.0268, "step": 1230 }, { "epoch": 4.592592592592593, "grad_norm": 0.03778412565588951, "learning_rate": 1.62962962962963e-06, "loss": 0.0047, "step": 1240 }, { "epoch": 4.62962962962963, "grad_norm": 0.037292417138814926, "learning_rate": 1.4814814814814815e-06, "loss": 0.0522, "step": 1250 }, { "epoch": 4.666666666666667, "grad_norm": 0.03608781844377518, "learning_rate": 1.3333333333333334e-06, "loss": 0.0156, "step": 1260 }, { "epoch": 4.703703703703704, "grad_norm": 0.04082287847995758, "learning_rate": 1.1851851851851854e-06, "loss": 0.0072, "step": 1270 }, { "epoch": 4.7407407407407405, "grad_norm": 0.03990180045366287, "learning_rate": 1.0370370370370371e-06, "loss": 0.027, "step": 1280 }, { "epoch": 4.777777777777778, "grad_norm": 0.2720341086387634, "learning_rate": 8.88888888888889e-07, "loss": 0.0384, "step": 1290 }, { "epoch": 4.814814814814815, "grad_norm": 0.21008038520812988, "learning_rate": 7.407407407407407e-07, "loss": 0.0495, "step": 1300 }, { "epoch": 4.851851851851852, "grad_norm": 0.038784921169281006, "learning_rate": 5.925925925925927e-07, "loss": 0.0605, "step": 1310 }, { "epoch": 4.888888888888889, "grad_norm": 0.03550861403346062, "learning_rate": 4.444444444444445e-07, "loss": 0.007, "step": 1320 }, { "epoch": 4.925925925925926, "grad_norm": 0.03672279790043831, "learning_rate": 2.9629629629629634e-07, "loss": 0.005, "step": 1330 }, { "epoch": 4.962962962962963, "grad_norm": 0.04202316328883171, "learning_rate": 1.4814814814814817e-07, "loss": 0.0475, "step": 1340 }, { "epoch": 5.0, "grad_norm": 0.0387168787419796, "learning_rate": 0.0, "loss": 0.0608, "step": 1350 }, { "epoch": 5.0, "eval_accuracy": 0.994750656167979, "eval_loss": 0.0171243567019701, "eval_runtime": 52.6363, "eval_samples_per_second": 7.238, "eval_steps_per_second": 0.912, "step": 1350 }, { "epoch": 5.0, "step": 1350, "total_flos": 8.345887281491558e+17, "train_loss": 0.04418781167379132, "train_runtime": 3285.5702, "train_samples_per_second": 3.278, "train_steps_per_second": 0.411 } ], "logging_steps": 10, "max_steps": 1350, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.345887281491558e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }