{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.9867197875166, "global_step": 5640, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 9.411764705882353e-05, "loss": 1.501, "step": 20 }, { "epoch": 0.07, "learning_rate": 0.00018823529411764707, "loss": 1.4128, "step": 40 }, { "epoch": 0.11, "learning_rate": 0.0002823529411764706, "loss": 1.325, "step": 60 }, { "epoch": 0.14, "learning_rate": 0.00037647058823529414, "loss": 1.2828, "step": 80 }, { "epoch": 0.18, "learning_rate": 0.00047058823529411766, "loss": 1.2758, "step": 100 }, { "epoch": 0.21, "learning_rate": 0.0005647058823529412, "loss": 1.2667, "step": 120 }, { "epoch": 0.25, "learning_rate": 0.0006588235294117648, "loss": 1.2504, "step": 140 }, { "epoch": 0.28, "learning_rate": 0.0007529411764705883, "loss": 1.2394, "step": 160 }, { "epoch": 0.32, "learning_rate": 0.0007999934028874321, "loss": 1.228, "step": 180 }, { "epoch": 0.35, "learning_rate": 0.0007999406272925394, "loss": 1.2138, "step": 200 }, { "epoch": 0.39, "learning_rate": 0.0007998350830660272, "loss": 1.2122, "step": 220 }, { "epoch": 0.42, "learning_rate": 0.0007996767841335234, "loss": 1.219, "step": 240 }, { "epoch": 0.46, "learning_rate": 0.0007994657513811737, "loss": 1.1998, "step": 260 }, { "epoch": 0.5, "learning_rate": 0.0007992020126528848, "loss": 1.188, "step": 280 }, { "epoch": 0.53, "learning_rate": 0.0007988856027466511, "loss": 1.1931, "step": 300 }, { "epoch": 0.57, "learning_rate": 0.000798516563409964, "loss": 1.1753, "step": 320 }, { "epoch": 0.6, "learning_rate": 0.0007980949433343026, "loss": 1.173, "step": 340 }, { "epoch": 0.64, "learning_rate": 0.0007976207981487104, "loss": 1.1637, "step": 360 }, { "epoch": 0.67, "learning_rate": 0.0007970941904124546, "loss": 1.1651, "step": 380 }, { "epoch": 0.71, "learning_rate": 0.0007965151896067728, "loss": 1.1559, "step": 400 }, { "epoch": 0.74, "learning_rate": 0.0007958838721257046, "loss": 1.1658, "step": 420 }, { "epoch": 0.78, "learning_rate": 0.0007952003212660127, "loss": 1.1496, "step": 440 }, { "epoch": 0.81, "learning_rate": 0.0007944646272161933, "loss": 1.1471, "step": 460 }, { "epoch": 0.85, "learning_rate": 0.0007936768870445747, "loss": 1.1311, "step": 480 }, { "epoch": 0.89, "learning_rate": 0.0007928372046865116, "loss": 1.1301, "step": 500 }, { "epoch": 0.92, "learning_rate": 0.0007919456909306711, "loss": 1.134, "step": 520 }, { "epoch": 0.96, "learning_rate": 0.0007910024634044154, "loss": 1.1235, "step": 540 }, { "epoch": 0.99, "learning_rate": 0.0007900076465582816, "loss": 1.1239, "step": 560 }, { "epoch": 1.03, "learning_rate": 0.0007889613716495616, "loss": 1.0878, "step": 580 }, { "epoch": 1.06, "learning_rate": 0.0007878637767249839, "loss": 1.0879, "step": 600 }, { "epoch": 1.1, "learning_rate": 0.0007867150066024996, "loss": 1.0671, "step": 620 }, { "epoch": 1.13, "learning_rate": 0.0007855152128521754, "loss": 1.0689, "step": 640 }, { "epoch": 1.17, "learning_rate": 0.0007842645537761941, "loss": 1.0794, "step": 660 }, { "epoch": 1.2, "learning_rate": 0.0007829631943879694, "loss": 1.0653, "step": 680 }, { "epoch": 1.24, "learning_rate": 0.0007816113063903726, "loss": 1.066, "step": 700 }, { "epoch": 1.27, "learning_rate": 0.0007802090681530788, "loss": 1.0675, "step": 720 }, { "epoch": 1.31, "learning_rate": 0.0007787566646890325, "loss": 1.0598, "step": 740 }, { "epoch": 1.35, "learning_rate": 0.0007772542876300359, "loss": 1.0669, "step": 760 }, { "epoch": 1.38, "learning_rate": 0.0007757021352014663, "loss": 1.0558, "step": 780 }, { "epoch": 1.42, "learning_rate": 0.0007741004121961207, "loss": 1.0578, "step": 800 }, { "epoch": 1.45, "learning_rate": 0.0007724493299471956, "loss": 1.056, "step": 820 }, { "epoch": 1.49, "learning_rate": 0.0007707491063004035, "loss": 1.0491, "step": 840 }, { "epoch": 1.52, "learning_rate": 0.0007689999655852306, "loss": 1.0497, "step": 860 }, { "epoch": 1.56, "learning_rate": 0.0007672021385853376, "loss": 1.0393, "step": 880 }, { "epoch": 1.59, "learning_rate": 0.0007653558625081099, "loss": 1.0379, "step": 900 }, { "epoch": 1.63, "learning_rate": 0.0007634613809533613, "loss": 1.049, "step": 920 }, { "epoch": 1.66, "learning_rate": 0.0007615189438811918, "loss": 1.0594, "step": 940 }, { "epoch": 1.7, "learning_rate": 0.0007595288075790085, "loss": 1.0375, "step": 960 }, { "epoch": 1.74, "learning_rate": 0.0007574912346277103, "loss": 1.0453, "step": 980 }, { "epoch": 1.77, "learning_rate": 0.0007554064938670426, "loss": 1.0348, "step": 1000 }, { "epoch": 1.81, "learning_rate": 0.0007532748603601265, "loss": 1.0441, "step": 1020 }, { "epoch": 1.84, "learning_rate": 0.0007510966153571667, "loss": 1.0237, "step": 1040 }, { "epoch": 1.88, "learning_rate": 0.000748872046258343, "loss": 1.0478, "step": 1060 }, { "epoch": 1.91, "learning_rate": 0.0007466014465758899, "loss": 1.0289, "step": 1080 }, { "epoch": 1.95, "learning_rate": 0.0007442851158953712, "loss": 1.026, "step": 1100 }, { "epoch": 1.98, "learning_rate": 0.0007419233598361512, "loss": 1.0244, "step": 1120 }, { "epoch": 2.02, "learning_rate": 0.0007395164900110721, "loss": 0.9968, "step": 1140 }, { "epoch": 2.05, "learning_rate": 0.0007370648239853385, "loss": 0.9798, "step": 1160 }, { "epoch": 2.09, "learning_rate": 0.0007345686852346176, "loss": 0.9529, "step": 1180 }, { "epoch": 2.12, "learning_rate": 0.0007320284031023603, "loss": 0.9666, "step": 1200 }, { "epoch": 2.16, "learning_rate": 0.000729444312756346, "loss": 0.9797, "step": 1220 }, { "epoch": 2.2, "learning_rate": 0.0007268167551444611, "loss": 0.9674, "step": 1240 }, { "epoch": 2.23, "learning_rate": 0.0007241460769497138, "loss": 0.9666, "step": 1260 }, { "epoch": 2.27, "learning_rate": 0.0007214326305444917, "loss": 0.9785, "step": 1280 }, { "epoch": 2.3, "learning_rate": 0.0007186767739440701, "loss": 0.9629, "step": 1300 }, { "epoch": 2.34, "learning_rate": 0.0007158788707593748, "loss": 0.973, "step": 1320 }, { "epoch": 2.37, "learning_rate": 0.0007130392901490069, "loss": 0.9649, "step": 1340 }, { "epoch": 2.41, "learning_rate": 0.0007101584067705355, "loss": 0.9766, "step": 1360 }, { "epoch": 2.44, "learning_rate": 0.0007072366007310646, "loss": 0.954, "step": 1380 }, { "epoch": 2.48, "learning_rate": 0.0007042742575370822, "loss": 0.9576, "step": 1400 }, { "epoch": 2.51, "learning_rate": 0.0007012717680435956, "loss": 0.9783, "step": 1420 }, { "epoch": 2.55, "learning_rate": 0.0006982295284025612, "loss": 0.9553, "step": 1440 }, { "epoch": 2.59, "learning_rate": 0.0006951479400106161, "loss": 0.951, "step": 1460 }, { "epoch": 2.62, "learning_rate": 0.000692027409456118, "loss": 0.9647, "step": 1480 }, { "epoch": 2.66, "learning_rate": 0.0006888683484654981, "loss": 0.9656, "step": 1500 }, { "epoch": 2.69, "learning_rate": 0.0006856711738489386, "loss": 0.9507, "step": 1520 }, { "epoch": 2.73, "learning_rate": 0.0006824363074453778, "loss": 0.9496, "step": 1540 }, { "epoch": 2.76, "learning_rate": 0.0006791641760668519, "loss": 0.9571, "step": 1560 }, { "epoch": 2.8, "learning_rate": 0.0006758552114421815, "loss": 0.9494, "step": 1580 }, { "epoch": 2.83, "learning_rate": 0.0006725098501600088, "loss": 0.9556, "step": 1600 }, { "epoch": 2.87, "learning_rate": 0.0006691285336111928, "loss": 0.9411, "step": 1620 }, { "epoch": 2.9, "learning_rate": 0.0006657117079305725, "loss": 0.9501, "step": 1640 }, { "epoch": 2.94, "learning_rate": 0.0006622598239381033, "loss": 0.9598, "step": 1660 }, { "epoch": 2.97, "learning_rate": 0.0006587733370793743, "loss": 0.9599, "step": 1680 }, { "epoch": 3.01, "learning_rate": 0.0006552527073655178, "loss": 0.9306, "step": 1700 }, { "epoch": 3.05, "learning_rate": 0.0006516983993125138, "loss": 0.9013, "step": 1720 }, { "epoch": 3.08, "learning_rate": 0.0006481108818799015, "loss": 0.8798, "step": 1740 }, { "epoch": 3.12, "learning_rate": 0.0006444906284089044, "loss": 0.903, "step": 1760 }, { "epoch": 3.15, "learning_rate": 0.000640838116559977, "loss": 0.9017, "step": 1780 }, { "epoch": 3.19, "learning_rate": 0.0006371538282497815, "loss": 0.9051, "step": 1800 }, { "epoch": 3.22, "learning_rate": 0.0006334382495876036, "loss": 0.8966, "step": 1820 }, { "epoch": 3.26, "learning_rate": 0.0006296918708112143, "loss": 0.8863, "step": 1840 }, { "epoch": 3.29, "learning_rate": 0.0006259151862221875, "loss": 0.8926, "step": 1860 }, { "epoch": 3.33, "learning_rate": 0.0006221086941206817, "loss": 0.908, "step": 1880 }, { "epoch": 3.36, "learning_rate": 0.0006182728967396925, "loss": 0.9007, "step": 1900 }, { "epoch": 3.4, "learning_rate": 0.0006144083001787886, "loss": 0.8872, "step": 1920 }, { "epoch": 3.44, "learning_rate": 0.0006105154143373362, "loss": 0.8984, "step": 1940 }, { "epoch": 3.47, "learning_rate": 0.0006065947528472215, "loss": 0.9123, "step": 1960 }, { "epoch": 3.51, "learning_rate": 0.0006026468330050827, "loss": 0.8929, "step": 1980 }, { "epoch": 3.54, "learning_rate": 0.0005986721757040564, "loss": 0.9145, "step": 2000 }, { "epoch": 3.58, "learning_rate": 0.0005946713053650507, "loss": 0.8867, "step": 2020 }, { "epoch": 3.61, "learning_rate": 0.0005906447498675521, "loss": 0.8914, "step": 2040 }, { "epoch": 3.65, "learning_rate": 0.0005865930404799774, "loss": 0.8946, "step": 2060 }, { "epoch": 3.68, "learning_rate": 0.0005825167117895765, "loss": 0.892, "step": 2080 }, { "epoch": 3.72, "learning_rate": 0.0005784163016318987, "loss": 0.8875, "step": 2100 }, { "epoch": 3.75, "learning_rate": 0.0005742923510198303, "loss": 0.888, "step": 2120 }, { "epoch": 3.79, "learning_rate": 0.0005701454040722124, "loss": 0.9078, "step": 2140 }, { "epoch": 3.82, "learning_rate": 0.0005659760079420498, "loss": 0.9027, "step": 2160 }, { "epoch": 3.86, "learning_rate": 0.000561784712744318, "loss": 0.8997, "step": 2180 }, { "epoch": 3.9, "learning_rate": 0.0005575720714833808, "loss": 0.9053, "step": 2200 }, { "epoch": 3.93, "learning_rate": 0.0005533386399800275, "loss": 0.9054, "step": 2220 }, { "epoch": 3.97, "learning_rate": 0.0005490849767981348, "loss": 0.8988, "step": 2240 }, { "epoch": 4.0, "learning_rate": 0.0005448116431709716, "loss": 0.8903, "step": 2260 }, { "epoch": 4.04, "learning_rate": 0.0005405192029271477, "loss": 0.8373, "step": 2280 }, { "epoch": 4.07, "learning_rate": 0.0005362082224162223, "loss": 0.8336, "step": 2300 }, { "epoch": 4.11, "learning_rate": 0.0005318792704339792, "loss": 0.8483, "step": 2320 }, { "epoch": 4.14, "learning_rate": 0.0005275329181473787, "loss": 0.8453, "step": 2340 }, { "epoch": 4.18, "learning_rate": 0.0005231697390191976, "loss": 0.8351, "step": 2360 }, { "epoch": 4.21, "learning_rate": 0.000518790308732366, "loss": 0.8329, "step": 2380 }, { "epoch": 4.25, "learning_rate": 0.0005143952051140103, "loss": 0.8394, "step": 2400 }, { "epoch": 4.29, "learning_rate": 0.000509985008059215, "loss": 0.8526, "step": 2420 }, { "epoch": 4.32, "learning_rate": 0.0005055602994545098, "loss": 0.826, "step": 2440 }, { "epoch": 4.36, "learning_rate": 0.0005011216631010953, "loss": 0.849, "step": 2460 }, { "epoch": 4.39, "learning_rate": 0.0004966696846378156, "loss": 0.8507, "step": 2480 }, { "epoch": 4.43, "learning_rate": 0.000492204951463888, "loss": 0.8461, "step": 2500 }, { "epoch": 4.46, "learning_rate": 0.00048772805266140154, "loss": 0.8533, "step": 2520 }, { "epoch": 4.5, "learning_rate": 0.00048323957891759203, "loss": 0.8384, "step": 2540 }, { "epoch": 4.53, "learning_rate": 0.00047874012244690696, "loss": 0.842, "step": 2560 }, { "epoch": 4.57, "learning_rate": 0.000474230276912867, "loss": 0.8608, "step": 2580 }, { "epoch": 4.6, "learning_rate": 0.00046971063734973833, "loss": 0.8562, "step": 2600 }, { "epoch": 4.64, "learning_rate": 0.0004651818000840229, "loss": 0.8594, "step": 2620 }, { "epoch": 4.67, "learning_rate": 0.0004606443626557778, "loss": 0.8608, "step": 2640 }, { "epoch": 4.71, "learning_rate": 0.0004560989237397758, "loss": 0.8486, "step": 2660 }, { "epoch": 4.75, "learning_rate": 0.00045154608306651514, "loss": 0.869, "step": 2680 }, { "epoch": 4.78, "learning_rate": 0.0004469864413430907, "loss": 0.8482, "step": 2700 }, { "epoch": 4.82, "learning_rate": 0.00044242060017393573, "loss": 0.8583, "step": 2720 }, { "epoch": 4.85, "learning_rate": 0.00043784916198144543, "loss": 0.8582, "step": 2740 }, { "epoch": 4.89, "learning_rate": 0.00043327272992649317, "loss": 0.8504, "step": 2760 }, { "epoch": 4.92, "learning_rate": 0.00042869190782884794, "loss": 0.8592, "step": 2780 }, { "epoch": 4.96, "learning_rate": 0.00042410730008750623, "loss": 0.8545, "step": 2800 }, { "epoch": 4.99, "learning_rate": 0.00041951951160094664, "loss": 0.855, "step": 2820 }, { "epoch": 5.03, "learning_rate": 0.00041492914768731927, "loss": 0.7869, "step": 2840 }, { "epoch": 5.06, "learning_rate": 0.0004103368140045789, "loss": 0.8083, "step": 2860 }, { "epoch": 5.1, "learning_rate": 0.00040574311647057366, "loss": 0.8108, "step": 2880 }, { "epoch": 5.14, "learning_rate": 0.00040114866118310045, "loss": 0.7968, "step": 2900 }, { "epoch": 5.17, "learning_rate": 0.0003965540543399344, "loss": 0.8016, "step": 2920 }, { "epoch": 5.21, "learning_rate": 0.00039195990215884756, "loss": 0.7967, "step": 2940 }, { "epoch": 5.24, "learning_rate": 0.00038736681079762293, "loss": 0.8096, "step": 2960 }, { "epoch": 5.28, "learning_rate": 0.0003827753862740779, "loss": 0.8073, "step": 2980 }, { "epoch": 5.31, "learning_rate": 0.0003781862343861055, "loss": 0.804, "step": 3000 }, { "epoch": 5.35, "learning_rate": 0.00037359996063174425, "loss": 0.8119, "step": 3020 }, { "epoch": 5.38, "learning_rate": 0.0003690171701292887, "loss": 0.7997, "step": 3040 }, { "epoch": 5.42, "learning_rate": 0.0003644384675374489, "loss": 0.8202, "step": 3060 }, { "epoch": 5.45, "learning_rate": 0.0003598644569755713, "loss": 0.815, "step": 3080 }, { "epoch": 5.49, "learning_rate": 0.00035529574194393033, "loss": 0.825, "step": 3100 }, { "epoch": 5.52, "learning_rate": 0.00035073292524410207, "loss": 0.8171, "step": 3120 }, { "epoch": 5.56, "learning_rate": 0.00034617660889943, "loss": 0.7921, "step": 3140 }, { "epoch": 5.6, "learning_rate": 0.00034162739407559285, "loss": 0.8299, "step": 3160 }, { "epoch": 5.63, "learning_rate": 0.0003370858810012869, "loss": 0.811, "step": 3180 }, { "epoch": 5.67, "learning_rate": 0.00033255266888903006, "loss": 0.8093, "step": 3200 }, { "epoch": 5.7, "learning_rate": 0.00032802835585610225, "loss": 0.8106, "step": 3220 }, { "epoch": 5.74, "learning_rate": 0.00032351353884562783, "loss": 0.8053, "step": 3240 }, { "epoch": 5.77, "learning_rate": 0.00031900881354781556, "loss": 0.8161, "step": 3260 }, { "epoch": 5.81, "learning_rate": 0.00031451477432136154, "loss": 0.8186, "step": 3280 }, { "epoch": 5.84, "learning_rate": 0.0003100320141150293, "loss": 0.8046, "step": 3300 }, { "epoch": 5.88, "learning_rate": 0.00030556112438941526, "loss": 0.8236, "step": 3320 }, { "epoch": 5.91, "learning_rate": 0.00030110269503891084, "loss": 0.8057, "step": 3340 }, { "epoch": 5.95, "learning_rate": 0.0002966573143138713, "loss": 0.8109, "step": 3360 }, { "epoch": 5.98, "learning_rate": 0.00029222556874300036, "loss": 0.8163, "step": 3380 }, { "epoch": 6.02, "learning_rate": 0.0002878080430559646, "loss": 0.7901, "step": 3400 }, { "epoch": 6.06, "learning_rate": 0.0002834053201062417, "loss": 0.7749, "step": 3420 }, { "epoch": 6.09, "learning_rate": 0.00027901798079421977, "loss": 0.7775, "step": 3440 }, { "epoch": 6.13, "learning_rate": 0.0002746466039905513, "loss": 0.7589, "step": 3460 }, { "epoch": 6.16, "learning_rate": 0.000270291766459777, "loss": 0.7742, "step": 3480 }, { "epoch": 6.2, "learning_rate": 0.00026595404278422684, "loss": 0.7914, "step": 3500 }, { "epoch": 6.23, "learning_rate": 0.00026163400528820836, "loss": 0.7909, "step": 3520 }, { "epoch": 6.27, "learning_rate": 0.0002573322239624947, "loss": 0.7653, "step": 3540 }, { "epoch": 6.3, "learning_rate": 0.000253049266389118, "loss": 0.7779, "step": 3560 }, { "epoch": 6.34, "learning_rate": 0.0002487856976664831, "loss": 0.7771, "step": 3580 }, { "epoch": 6.37, "learning_rate": 0.00024454208033480683, "loss": 0.771, "step": 3600 }, { "epoch": 6.41, "learning_rate": 0.00024031897430189695, "loss": 0.7788, "step": 3620 }, { "epoch": 6.45, "learning_rate": 0.00023611693676927606, "loss": 0.7798, "step": 3640 }, { "epoch": 6.48, "learning_rate": 0.00023193652215866429, "loss": 0.7779, "step": 3660 }, { "epoch": 6.52, "learning_rate": 0.00022777828203882875, "loss": 0.7784, "step": 3680 }, { "epoch": 6.55, "learning_rate": 0.00022364276505280794, "loss": 0.7946, "step": 3700 }, { "epoch": 6.59, "learning_rate": 0.0002195305168455239, "loss": 0.774, "step": 3720 }, { "epoch": 6.62, "learning_rate": 0.00021544207999178917, "loss": 0.7823, "step": 3740 }, { "epoch": 6.66, "learning_rate": 0.00021137799392471814, "loss": 0.7779, "step": 3760 }, { "epoch": 6.69, "learning_rate": 0.00020733879486455433, "loss": 0.7867, "step": 3780 }, { "epoch": 6.73, "learning_rate": 0.0002033250157479206, "loss": 0.7738, "step": 3800 }, { "epoch": 6.76, "learning_rate": 0.0001993371861575028, "loss": 0.7814, "step": 3820 }, { "epoch": 6.8, "learning_rate": 0.00019537583225217605, "loss": 0.7695, "step": 3840 }, { "epoch": 6.83, "learning_rate": 0.00019144147669758322, "loss": 0.7846, "step": 3860 }, { "epoch": 6.87, "learning_rate": 0.00018753463859717283, "loss": 0.7779, "step": 3880 }, { "epoch": 6.91, "learning_rate": 0.0001836558334237088, "loss": 0.7769, "step": 3900 }, { "epoch": 6.94, "learning_rate": 0.0001798055729512579, "loss": 0.782, "step": 3920 }, { "epoch": 6.98, "learning_rate": 0.00017598436518766596, "loss": 0.7683, "step": 3940 }, { "epoch": 7.01, "learning_rate": 0.0001721927143075305, "loss": 0.7674, "step": 3960 }, { "epoch": 7.05, "learning_rate": 0.00016843112058567935, "loss": 0.7427, "step": 3980 }, { "epoch": 7.08, "learning_rate": 0.00016470008033116443, "loss": 0.7627, "step": 4000 }, { "epoch": 7.12, "learning_rate": 0.00016100008582177705, "loss": 0.7541, "step": 4020 }, { "epoch": 7.15, "learning_rate": 0.00015733162523909707, "loss": 0.7654, "step": 4040 }, { "epoch": 7.19, "learning_rate": 0.0001536951826040813, "loss": 0.7382, "step": 4060 }, { "epoch": 7.22, "learning_rate": 0.0001500912377132013, "loss": 0.7555, "step": 4080 }, { "epoch": 7.26, "learning_rate": 0.00014652026607513848, "loss": 0.748, "step": 4100 }, { "epoch": 7.3, "learning_rate": 0.00014298273884804478, "loss": 0.753, "step": 4120 }, { "epoch": 7.33, "learning_rate": 0.00013947912277737808, "loss": 0.7594, "step": 4140 }, { "epoch": 7.37, "learning_rate": 0.00013600988013431832, "loss": 0.7387, "step": 4160 }, { "epoch": 7.4, "learning_rate": 0.00013257546865477572, "loss": 0.7551, "step": 4180 }, { "epoch": 7.44, "learning_rate": 0.00012917634147899607, "loss": 0.7609, "step": 4200 }, { "epoch": 7.47, "learning_rate": 0.00012581294709177327, "loss": 0.762, "step": 4220 }, { "epoch": 7.51, "learning_rate": 0.00012248572926327537, "loss": 0.755, "step": 4240 }, { "epoch": 7.54, "learning_rate": 0.00011919512699049314, "loss": 0.753, "step": 4260 }, { "epoch": 7.58, "learning_rate": 0.00011594157443931872, "loss": 0.7603, "step": 4280 }, { "epoch": 7.61, "learning_rate": 0.0001127255008872604, "loss": 0.7565, "step": 4300 }, { "epoch": 7.65, "learning_rate": 0.00010954733066680401, "loss": 0.7542, "step": 4320 }, { "epoch": 7.68, "learning_rate": 0.00010640748310942559, "loss": 0.7674, "step": 4340 }, { "epoch": 7.72, "learning_rate": 0.00010330637249026445, "loss": 0.7447, "step": 4360 }, { "epoch": 7.76, "learning_rate": 0.00010024440797346324, "loss": 0.7582, "step": 4380 }, { "epoch": 7.79, "learning_rate": 9.722199355818227e-05, "loss": 0.7598, "step": 4400 }, { "epoch": 7.83, "learning_rate": 9.423952802529564e-05, "loss": 0.7389, "step": 4420 }, { "epoch": 7.86, "learning_rate": 9.129740488477518e-05, "loss": 0.759, "step": 4440 }, { "epoch": 7.9, "learning_rate": 8.83960123237706e-05, "loss": 0.7442, "step": 4460 }, { "epoch": 7.93, "learning_rate": 8.553573315539188e-05, "loss": 0.7627, "step": 4480 }, { "epoch": 7.97, "learning_rate": 8.271694476819956e-05, "loss": 0.7531, "step": 4500 }, { "epoch": 8.0, "learning_rate": 7.994001907641262e-05, "loss": 0.7567, "step": 4520 }, { "epoch": 8.04, "learning_rate": 7.720532247083743e-05, "loss": 0.7274, "step": 4540 }, { "epoch": 8.07, "learning_rate": 7.451321577052533e-05, "loss": 0.7347, "step": 4560 }, { "epoch": 8.11, "learning_rate": 7.18640541751661e-05, "loss": 0.752, "step": 4580 }, { "epoch": 8.15, "learning_rate": 6.925818721822239e-05, "loss": 0.7293, "step": 4600 }, { "epoch": 8.18, "learning_rate": 6.669595872081211e-05, "loss": 0.7213, "step": 4620 }, { "epoch": 8.22, "learning_rate": 6.417770674634365e-05, "loss": 0.7246, "step": 4640 }, { "epoch": 8.25, "learning_rate": 6.170376355591204e-05, "loss": 0.7353, "step": 4660 }, { "epoch": 8.29, "learning_rate": 5.9274455564459896e-05, "loss": 0.7396, "step": 4680 }, { "epoch": 8.32, "learning_rate": 5.689010329770965e-05, "loss": 0.7401, "step": 4700 }, { "epoch": 8.36, "learning_rate": 5.455102134987304e-05, "loss": 0.7399, "step": 4720 }, { "epoch": 8.39, "learning_rate": 5.225751834214339e-05, "loss": 0.7427, "step": 4740 }, { "epoch": 8.43, "learning_rate": 5.000989688197555e-05, "loss": 0.7289, "step": 4760 }, { "epoch": 8.46, "learning_rate": 4.780845352315968e-05, "loss": 0.7398, "step": 4780 }, { "epoch": 8.5, "learning_rate": 4.565347872669339e-05, "loss": 0.7332, "step": 4800 }, { "epoch": 8.53, "learning_rate": 4.3545256822458445e-05, "loss": 0.7365, "step": 4820 }, { "epoch": 8.57, "learning_rate": 4.148406597170529e-05, "loss": 0.7347, "step": 4840 }, { "epoch": 8.61, "learning_rate": 3.947017813035254e-05, "loss": 0.7515, "step": 4860 }, { "epoch": 8.64, "learning_rate": 3.7503859013104806e-05, "loss": 0.7397, "step": 4880 }, { "epoch": 8.68, "learning_rate": 3.5585368058393834e-05, "loss": 0.7597, "step": 4900 }, { "epoch": 8.71, "learning_rate": 3.3714958394147975e-05, "loss": 0.7391, "step": 4920 }, { "epoch": 8.75, "learning_rate": 3.1892876804394144e-05, "loss": 0.7359, "step": 4940 }, { "epoch": 8.78, "learning_rate": 3.0119363696697078e-05, "loss": 0.7412, "step": 4960 }, { "epoch": 8.82, "learning_rate": 2.839465307043927e-05, "loss": 0.7485, "step": 4980 }, { "epoch": 8.85, "learning_rate": 2.6718972485947037e-05, "loss": 0.7534, "step": 5000 }, { "epoch": 8.89, "learning_rate": 2.5092543034466264e-05, "loss": 0.7405, "step": 5020 }, { "epoch": 8.92, "learning_rate": 2.3515579308990597e-05, "loss": 0.7415, "step": 5040 }, { "epoch": 8.96, "learning_rate": 2.1988289375948524e-05, "loss": 0.7309, "step": 5060 }, { "epoch": 9.0, "learning_rate": 2.0510874747750575e-05, "loss": 0.7418, "step": 5080 }, { "epoch": 9.03, "learning_rate": 1.9083530356201407e-05, "loss": 0.7469, "step": 5100 }, { "epoch": 9.07, "learning_rate": 1.7706444526780585e-05, "loss": 0.7385, "step": 5120 }, { "epoch": 9.1, "learning_rate": 1.637979895379429e-05, "loss": 0.7333, "step": 5140 }, { "epoch": 9.14, "learning_rate": 1.5103768676402885e-05, "loss": 0.7314, "step": 5160 }, { "epoch": 9.17, "learning_rate": 1.38785220555254e-05, "loss": 0.74, "step": 5180 }, { "epoch": 9.21, "learning_rate": 1.270422075162645e-05, "loss": 0.7481, "step": 5200 }, { "epoch": 9.24, "learning_rate": 1.1581019703386143e-05, "loss": 0.7222, "step": 5220 }, { "epoch": 9.28, "learning_rate": 1.0509067107257365e-05, "loss": 0.7259, "step": 5240 }, { "epoch": 9.31, "learning_rate": 9.488504397912712e-06, "loss": 0.7309, "step": 5260 }, { "epoch": 9.35, "learning_rate": 8.51946622958324e-06, "loss": 0.7212, "step": 5280 }, { "epoch": 9.38, "learning_rate": 7.602080458292227e-06, "loss": 0.7254, "step": 5300 }, { "epoch": 9.42, "learning_rate": 6.7364681249854735e-06, "loss": 0.728, "step": 5320 }, { "epoch": 9.46, "learning_rate": 5.922743439561229e-06, "loss": 0.7341, "step": 5340 }, { "epoch": 9.49, "learning_rate": 5.161013765801137e-06, "loss": 0.721, "step": 5360 }, { "epoch": 9.53, "learning_rate": 4.451379607204453e-06, "loss": 0.725, "step": 5380 }, { "epoch": 9.56, "learning_rate": 3.7939345937275884e-06, "loss": 0.7399, "step": 5400 }, { "epoch": 9.6, "learning_rate": 3.1887654694303883e-06, "loss": 0.7301, "step": 5420 }, { "epoch": 9.63, "learning_rate": 2.635952081031201e-06, "loss": 0.7222, "step": 5440 }, { "epoch": 9.67, "learning_rate": 2.1355673673715716e-06, "loss": 0.7308, "step": 5460 }, { "epoch": 9.7, "learning_rate": 1.6876773497926046e-06, "loss": 0.7417, "step": 5480 }, { "epoch": 9.74, "learning_rate": 1.292341123424423e-06, "loss": 0.7348, "step": 5500 }, { "epoch": 9.77, "learning_rate": 9.496108493884936e-07, "loss": 0.7278, "step": 5520 }, { "epoch": 9.81, "learning_rate": 6.595317479159313e-07, "loss": 0.7257, "step": 5540 }, { "epoch": 9.85, "learning_rate": 4.2214209238085054e-07, "loss": 0.7296, "step": 5560 }, { "epoch": 9.88, "learning_rate": 2.3747320425053786e-07, "loss": 0.7301, "step": 5580 }, { "epoch": 9.92, "learning_rate": 1.0554944895293517e-07, "loss": 0.739, "step": 5600 }, { "epoch": 9.95, "learning_rate": 2.638823266174484e-08, "loss": 0.7284, "step": 5620 }, { "epoch": 9.99, "learning_rate": 0.0, "loss": 0.7294, "step": 5640 }, { "epoch": 9.99, "step": 5640, "total_flos": 1.4686171215861645e+19, "train_loss": 0.8784972819876163, "train_runtime": 13393.5014, "train_samples_per_second": 53.949, "train_steps_per_second": 0.421 } ], "max_steps": 5640, "num_train_epochs": 10, "total_flos": 1.4686171215861645e+19, "trial_name": null, "trial_params": null }