{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.807017543859649, "eval_steps": 50, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 2.478505000877347e-05, "loss": 2.8559, "step": 50 }, { "epoch": 0.04, "eval_loss": 2.195713520050049, "eval_runtime": 14.6436, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 50 }, { "epoch": 0.07, "learning_rate": 2.4565713283032112e-05, "loss": 2.2991, "step": 100 }, { "epoch": 0.07, "eval_loss": 1.9645323753356934, "eval_runtime": 14.6522, "eval_samples_per_second": 1.433, "eval_steps_per_second": 0.205, "step": 100 }, { "epoch": 0.11, "learning_rate": 2.4346376557290754e-05, "loss": 2.1318, "step": 150 }, { "epoch": 0.11, "eval_loss": 1.8834866285324097, "eval_runtime": 14.646, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 150 }, { "epoch": 0.14, "learning_rate": 2.4127039831549393e-05, "loss": 2.035, "step": 200 }, { "epoch": 0.14, "eval_loss": 1.8372365236282349, "eval_runtime": 14.6452, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 200 }, { "epoch": 0.18, "learning_rate": 2.390770310580804e-05, "loss": 2.0511, "step": 250 }, { "epoch": 0.18, "eval_loss": 1.8165128231048584, "eval_runtime": 14.6408, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 250 }, { "epoch": 0.21, "learning_rate": 2.3688366380066677e-05, "loss": 2.0054, "step": 300 }, { "epoch": 0.21, "eval_loss": 1.7788053750991821, "eval_runtime": 14.6416, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 300 }, { "epoch": 0.25, "learning_rate": 2.3469029654325323e-05, "loss": 1.944, "step": 350 }, { "epoch": 0.25, "eval_loss": 1.7566713094711304, "eval_runtime": 14.6486, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 350 }, { "epoch": 0.28, "learning_rate": 2.324969292858396e-05, "loss": 1.9487, "step": 400 }, { "epoch": 0.28, "eval_loss": 1.7560102939605713, "eval_runtime": 14.6501, "eval_samples_per_second": 1.433, "eval_steps_per_second": 0.205, "step": 400 }, { "epoch": 0.32, "learning_rate": 2.3030356202842604e-05, "loss": 1.9183, "step": 450 }, { "epoch": 0.32, "eval_loss": 1.7329343557357788, "eval_runtime": 14.6419, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 450 }, { "epoch": 0.35, "learning_rate": 2.2811019477101246e-05, "loss": 1.8684, "step": 500 }, { "epoch": 0.35, "eval_loss": 1.7263619899749756, "eval_runtime": 14.647, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 500 }, { "epoch": 0.39, "learning_rate": 2.2591682751359888e-05, "loss": 1.9194, "step": 550 }, { "epoch": 0.39, "eval_loss": 1.7277635335922241, "eval_runtime": 14.6488, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 550 }, { "epoch": 0.42, "learning_rate": 2.237234602561853e-05, "loss": 1.8842, "step": 600 }, { "epoch": 0.42, "eval_loss": 1.728346347808838, "eval_runtime": 14.6409, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 600 }, { "epoch": 0.46, "learning_rate": 2.2153009299877172e-05, "loss": 1.817, "step": 650 }, { "epoch": 0.46, "eval_loss": 1.709412693977356, "eval_runtime": 14.6427, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 650 }, { "epoch": 0.49, "learning_rate": 2.1933672574135815e-05, "loss": 1.8139, "step": 700 }, { "epoch": 0.49, "eval_loss": 1.7006276845932007, "eval_runtime": 14.6444, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 700 }, { "epoch": 0.53, "learning_rate": 2.1714335848394457e-05, "loss": 1.8657, "step": 750 }, { "epoch": 0.53, "eval_loss": 1.688984990119934, "eval_runtime": 14.6453, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 750 }, { "epoch": 0.56, "learning_rate": 2.14949991226531e-05, "loss": 1.8366, "step": 800 }, { "epoch": 0.56, "eval_loss": 1.6756386756896973, "eval_runtime": 14.6493, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 800 }, { "epoch": 0.6, "learning_rate": 2.1275662396911738e-05, "loss": 1.8521, "step": 850 }, { "epoch": 0.6, "eval_loss": 1.6723501682281494, "eval_runtime": 14.645, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 850 }, { "epoch": 0.63, "learning_rate": 2.1056325671170383e-05, "loss": 1.8357, "step": 900 }, { "epoch": 0.63, "eval_loss": 1.6671411991119385, "eval_runtime": 14.646, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 900 }, { "epoch": 0.67, "learning_rate": 2.0836988945429022e-05, "loss": 1.8667, "step": 950 }, { "epoch": 0.67, "eval_loss": 1.6564487218856812, "eval_runtime": 14.6438, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 950 }, { "epoch": 0.7, "learning_rate": 2.0617652219687667e-05, "loss": 1.8163, "step": 1000 }, { "epoch": 0.7, "eval_loss": 1.6533170938491821, "eval_runtime": 14.6431, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 1000 }, { "epoch": 0.74, "learning_rate": 2.0398315493946306e-05, "loss": 1.844, "step": 1050 }, { "epoch": 0.74, "eval_loss": 1.6446335315704346, "eval_runtime": 14.6477, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 1050 }, { "epoch": 0.77, "learning_rate": 2.017897876820495e-05, "loss": 1.8403, "step": 1100 }, { "epoch": 0.77, "eval_loss": 1.6377147436141968, "eval_runtime": 14.65, "eval_samples_per_second": 1.433, "eval_steps_per_second": 0.205, "step": 1100 }, { "epoch": 0.81, "learning_rate": 1.995964204246359e-05, "loss": 1.8436, "step": 1150 }, { "epoch": 0.81, "eval_loss": 1.6400964260101318, "eval_runtime": 14.6512, "eval_samples_per_second": 1.433, "eval_steps_per_second": 0.205, "step": 1150 }, { "epoch": 0.84, "learning_rate": 1.9740305316722233e-05, "loss": 1.8255, "step": 1200 }, { "epoch": 0.84, "eval_loss": 1.6376707553863525, "eval_runtime": 14.6441, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 1200 }, { "epoch": 0.88, "learning_rate": 1.9520968590980875e-05, "loss": 1.7917, "step": 1250 }, { "epoch": 0.88, "eval_loss": 1.6302813291549683, "eval_runtime": 14.6397, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 1250 }, { "epoch": 0.91, "learning_rate": 1.9301631865239517e-05, "loss": 1.8289, "step": 1300 }, { "epoch": 0.91, "eval_loss": 1.6255093812942505, "eval_runtime": 14.6406, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 1300 }, { "epoch": 0.95, "learning_rate": 1.9082295139498156e-05, "loss": 1.8736, "step": 1350 }, { "epoch": 0.95, "eval_loss": 1.617380976676941, "eval_runtime": 14.6455, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 1350 }, { "epoch": 0.98, "learning_rate": 1.88629584137568e-05, "loss": 1.748, "step": 1400 }, { "epoch": 0.98, "eval_loss": 1.6211485862731934, "eval_runtime": 14.6529, "eval_samples_per_second": 1.433, "eval_steps_per_second": 0.205, "step": 1400 }, { "epoch": 1.02, "learning_rate": 1.8643621688015444e-05, "loss": 1.7769, "step": 1450 }, { "epoch": 1.02, "eval_loss": 1.6201837062835693, "eval_runtime": 14.6729, "eval_samples_per_second": 1.431, "eval_steps_per_second": 0.204, "step": 1450 }, { "epoch": 1.05, "learning_rate": 1.8424284962274086e-05, "loss": 1.7295, "step": 1500 }, { "epoch": 1.05, "eval_loss": 1.6232948303222656, "eval_runtime": 14.6843, "eval_samples_per_second": 1.43, "eval_steps_per_second": 0.204, "step": 1500 }, { "epoch": 1.09, "learning_rate": 1.8204948236532728e-05, "loss": 1.6845, "step": 1550 }, { "epoch": 1.09, "eval_loss": 1.626202940940857, "eval_runtime": 14.6738, "eval_samples_per_second": 1.431, "eval_steps_per_second": 0.204, "step": 1550 }, { "epoch": 1.12, "learning_rate": 1.7985611510791367e-05, "loss": 1.6655, "step": 1600 }, { "epoch": 1.12, "eval_loss": 1.617616057395935, "eval_runtime": 14.6736, "eval_samples_per_second": 1.431, "eval_steps_per_second": 0.204, "step": 1600 }, { "epoch": 1.16, "learning_rate": 1.7766274785050012e-05, "loss": 1.6687, "step": 1650 }, { "epoch": 1.16, "eval_loss": 1.6166654825210571, "eval_runtime": 14.7006, "eval_samples_per_second": 1.429, "eval_steps_per_second": 0.204, "step": 1650 }, { "epoch": 1.19, "learning_rate": 1.754693805930865e-05, "loss": 1.704, "step": 1700 }, { "epoch": 1.19, "eval_loss": 1.6115292310714722, "eval_runtime": 14.6762, "eval_samples_per_second": 1.431, "eval_steps_per_second": 0.204, "step": 1700 }, { "epoch": 1.23, "learning_rate": 1.7327601333567293e-05, "loss": 1.6818, "step": 1750 }, { "epoch": 1.23, "eval_loss": 1.6096872091293335, "eval_runtime": 14.6731, "eval_samples_per_second": 1.431, "eval_steps_per_second": 0.204, "step": 1750 }, { "epoch": 1.26, "learning_rate": 1.7108264607825935e-05, "loss": 1.675, "step": 1800 }, { "epoch": 1.26, "eval_loss": 1.6069471836090088, "eval_runtime": 14.6844, "eval_samples_per_second": 1.43, "eval_steps_per_second": 0.204, "step": 1800 }, { "epoch": 1.3, "learning_rate": 1.6888927882084577e-05, "loss": 1.6865, "step": 1850 }, { "epoch": 1.3, "eval_loss": 1.6076393127441406, "eval_runtime": 14.648, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 1850 }, { "epoch": 1.33, "learning_rate": 1.666959115634322e-05, "loss": 1.6966, "step": 1900 }, { "epoch": 1.33, "eval_loss": 1.603057861328125, "eval_runtime": 14.6382, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 1900 }, { "epoch": 1.37, "learning_rate": 1.6450254430601862e-05, "loss": 1.7239, "step": 1950 }, { "epoch": 1.37, "eval_loss": 1.6086949110031128, "eval_runtime": 14.6442, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 1950 }, { "epoch": 1.4, "learning_rate": 1.62309177048605e-05, "loss": 1.677, "step": 2000 }, { "epoch": 1.4, "eval_loss": 1.6053651571273804, "eval_runtime": 14.6414, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 2000 }, { "epoch": 1.44, "learning_rate": 1.6011580979119146e-05, "loss": 1.6925, "step": 2050 }, { "epoch": 1.44, "eval_loss": 1.5961679220199585, "eval_runtime": 14.6373, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 2050 }, { "epoch": 1.47, "learning_rate": 1.5792244253377785e-05, "loss": 1.7188, "step": 2100 }, { "epoch": 1.47, "eval_loss": 1.591933250427246, "eval_runtime": 14.6374, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 2100 }, { "epoch": 1.51, "learning_rate": 1.557290752763643e-05, "loss": 1.6994, "step": 2150 }, { "epoch": 1.51, "eval_loss": 1.5993770360946655, "eval_runtime": 14.6433, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 2150 }, { "epoch": 1.54, "learning_rate": 1.535357080189507e-05, "loss": 1.6817, "step": 2200 }, { "epoch": 1.54, "eval_loss": 1.5951728820800781, "eval_runtime": 14.6423, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 2200 }, { "epoch": 1.58, "learning_rate": 1.5134234076153711e-05, "loss": 1.6576, "step": 2250 }, { "epoch": 1.58, "eval_loss": 1.595167636871338, "eval_runtime": 14.6398, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 2250 }, { "epoch": 1.61, "learning_rate": 1.4914897350412355e-05, "loss": 1.7117, "step": 2300 }, { "epoch": 1.61, "eval_loss": 1.594815969467163, "eval_runtime": 14.633, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 2300 }, { "epoch": 1.65, "learning_rate": 1.4695560624670996e-05, "loss": 1.6795, "step": 2350 }, { "epoch": 1.65, "eval_loss": 1.5858888626098633, "eval_runtime": 14.6449, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 2350 }, { "epoch": 1.68, "learning_rate": 1.4476223898929636e-05, "loss": 1.7084, "step": 2400 }, { "epoch": 1.68, "eval_loss": 1.584323525428772, "eval_runtime": 14.6394, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 2400 }, { "epoch": 1.72, "learning_rate": 1.425688717318828e-05, "loss": 1.6785, "step": 2450 }, { "epoch": 1.72, "eval_loss": 1.5876024961471558, "eval_runtime": 14.6397, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 2450 }, { "epoch": 1.75, "learning_rate": 1.403755044744692e-05, "loss": 1.6863, "step": 2500 }, { "epoch": 1.75, "eval_loss": 1.5827977657318115, "eval_runtime": 14.6332, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 2500 }, { "epoch": 1.79, "learning_rate": 1.3818213721705564e-05, "loss": 1.718, "step": 2550 }, { "epoch": 1.79, "eval_loss": 1.5843836069107056, "eval_runtime": 14.6334, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 2550 }, { "epoch": 1.82, "learning_rate": 1.3598876995964205e-05, "loss": 1.6811, "step": 2600 }, { "epoch": 1.82, "eval_loss": 1.5835539102554321, "eval_runtime": 14.6371, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 2600 }, { "epoch": 1.86, "learning_rate": 1.3379540270222845e-05, "loss": 1.6748, "step": 2650 }, { "epoch": 1.86, "eval_loss": 1.5856655836105347, "eval_runtime": 14.6358, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 2650 }, { "epoch": 1.89, "learning_rate": 1.3160203544481489e-05, "loss": 1.6614, "step": 2700 }, { "epoch": 1.89, "eval_loss": 1.5807286500930786, "eval_runtime": 14.6393, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 2700 }, { "epoch": 1.93, "learning_rate": 1.294086681874013e-05, "loss": 1.6587, "step": 2750 }, { "epoch": 1.93, "eval_loss": 1.5819549560546875, "eval_runtime": 14.6411, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 2750 }, { "epoch": 1.96, "learning_rate": 1.2721530092998773e-05, "loss": 1.7167, "step": 2800 }, { "epoch": 1.96, "eval_loss": 1.5828579664230347, "eval_runtime": 14.6359, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 2800 }, { "epoch": 2.0, "learning_rate": 1.2502193367257414e-05, "loss": 1.663, "step": 2850 }, { "epoch": 2.0, "eval_loss": 1.5743989944458008, "eval_runtime": 14.6401, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 2850 }, { "epoch": 2.04, "learning_rate": 1.2282856641516056e-05, "loss": 1.5641, "step": 2900 }, { "epoch": 2.04, "eval_loss": 1.5980640649795532, "eval_runtime": 14.6296, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 2900 }, { "epoch": 2.07, "learning_rate": 1.2063519915774696e-05, "loss": 1.6048, "step": 2950 }, { "epoch": 2.07, "eval_loss": 1.598111629486084, "eval_runtime": 14.6392, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 2950 }, { "epoch": 2.11, "learning_rate": 1.1844183190033339e-05, "loss": 1.542, "step": 3000 }, { "epoch": 2.11, "eval_loss": 1.5949300527572632, "eval_runtime": 14.6318, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 3000 }, { "epoch": 2.14, "learning_rate": 1.162484646429198e-05, "loss": 1.5717, "step": 3050 }, { "epoch": 2.14, "eval_loss": 1.5896787643432617, "eval_runtime": 14.6355, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 3050 }, { "epoch": 2.18, "learning_rate": 1.1405509738550623e-05, "loss": 1.5775, "step": 3100 }, { "epoch": 2.18, "eval_loss": 1.5945594310760498, "eval_runtime": 14.6318, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 3100 }, { "epoch": 2.21, "learning_rate": 1.1186173012809265e-05, "loss": 1.5514, "step": 3150 }, { "epoch": 2.21, "eval_loss": 1.596181035041809, "eval_runtime": 14.6312, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 3150 }, { "epoch": 2.25, "learning_rate": 1.0966836287067907e-05, "loss": 1.5479, "step": 3200 }, { "epoch": 2.25, "eval_loss": 1.5988695621490479, "eval_runtime": 14.6305, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 3200 }, { "epoch": 2.28, "learning_rate": 1.074749956132655e-05, "loss": 1.5567, "step": 3250 }, { "epoch": 2.28, "eval_loss": 1.5939878225326538, "eval_runtime": 14.6282, "eval_samples_per_second": 1.436, "eval_steps_per_second": 0.205, "step": 3250 }, { "epoch": 2.32, "learning_rate": 1.0528162835585192e-05, "loss": 1.5917, "step": 3300 }, { "epoch": 2.32, "eval_loss": 1.5928601026535034, "eval_runtime": 14.6304, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 3300 }, { "epoch": 2.35, "learning_rate": 1.0308826109843834e-05, "loss": 1.5635, "step": 3350 }, { "epoch": 2.35, "eval_loss": 1.5916301012039185, "eval_runtime": 14.6301, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 3350 }, { "epoch": 2.39, "learning_rate": 1.0089489384102474e-05, "loss": 1.6097, "step": 3400 }, { "epoch": 2.39, "eval_loss": 1.5879075527191162, "eval_runtime": 14.6343, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 3400 }, { "epoch": 2.42, "learning_rate": 9.870152658361116e-06, "loss": 1.5256, "step": 3450 }, { "epoch": 2.42, "eval_loss": 1.5953552722930908, "eval_runtime": 14.6382, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 3450 }, { "epoch": 2.46, "learning_rate": 9.650815932619759e-06, "loss": 1.5758, "step": 3500 }, { "epoch": 2.46, "eval_loss": 1.5901867151260376, "eval_runtime": 14.6575, "eval_samples_per_second": 1.433, "eval_steps_per_second": 0.205, "step": 3500 }, { "epoch": 2.49, "learning_rate": 9.4314792068784e-06, "loss": 1.5924, "step": 3550 }, { "epoch": 2.49, "eval_loss": 1.592368721961975, "eval_runtime": 14.6369, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 3550 }, { "epoch": 2.53, "learning_rate": 9.212142481137043e-06, "loss": 1.5442, "step": 3600 }, { "epoch": 2.53, "eval_loss": 1.5874643325805664, "eval_runtime": 14.6605, "eval_samples_per_second": 1.432, "eval_steps_per_second": 0.205, "step": 3600 }, { "epoch": 2.56, "learning_rate": 8.992805755395683e-06, "loss": 1.5633, "step": 3650 }, { "epoch": 2.56, "eval_loss": 1.5897241830825806, "eval_runtime": 14.6465, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 3650 }, { "epoch": 2.6, "learning_rate": 8.773469029654325e-06, "loss": 1.5578, "step": 3700 }, { "epoch": 2.6, "eval_loss": 1.5877512693405151, "eval_runtime": 14.6433, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 3700 }, { "epoch": 2.63, "learning_rate": 8.554132303912968e-06, "loss": 1.5486, "step": 3750 }, { "epoch": 2.63, "eval_loss": 1.5919996500015259, "eval_runtime": 14.6424, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 3750 }, { "epoch": 2.67, "learning_rate": 8.33479557817161e-06, "loss": 1.559, "step": 3800 }, { "epoch": 2.67, "eval_loss": 1.5883424282073975, "eval_runtime": 14.6464, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 3800 }, { "epoch": 2.7, "learning_rate": 8.11545885243025e-06, "loss": 1.5766, "step": 3850 }, { "epoch": 2.7, "eval_loss": 1.5866857767105103, "eval_runtime": 14.6428, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 3850 }, { "epoch": 2.74, "learning_rate": 7.896122126688892e-06, "loss": 1.5489, "step": 3900 }, { "epoch": 2.74, "eval_loss": 1.5900993347167969, "eval_runtime": 14.6566, "eval_samples_per_second": 1.433, "eval_steps_per_second": 0.205, "step": 3900 }, { "epoch": 2.77, "learning_rate": 7.676785400947535e-06, "loss": 1.5606, "step": 3950 }, { "epoch": 2.77, "eval_loss": 1.5870875120162964, "eval_runtime": 14.632, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.205, "step": 3950 }, { "epoch": 2.81, "learning_rate": 7.457448675206178e-06, "loss": 1.5128, "step": 4000 }, { "epoch": 2.81, "eval_loss": 1.586663007736206, "eval_runtime": 14.6412, "eval_samples_per_second": 1.434, "eval_steps_per_second": 0.205, "step": 4000 } ], "logging_steps": 50, "max_steps": 5700, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "total_flos": 7.98304400105472e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }