{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.5347222222222223, "global_step": 730, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 9.88425925925926e-05, "loss": 2.0821, "step": 10 }, { "epoch": 0.03, "eval_accuracy": 0.1527777761220932, "eval_loss": 2.099255084991455, "eval_runtime": 90.7163, "eval_samples_per_second": 6.349, "eval_steps_per_second": 1.587, "step": 10 }, { "epoch": 0.07, "learning_rate": 9.768518518518519e-05, "loss": 1.9991, "step": 20 }, { "epoch": 0.07, "eval_accuracy": 0.2326388955116272, "eval_loss": 2.011439085006714, "eval_runtime": 90.9685, "eval_samples_per_second": 6.332, "eval_steps_per_second": 1.583, "step": 20 }, { "epoch": 0.1, "learning_rate": 9.664351851851853e-05, "loss": 2.0133, "step": 30 }, { "epoch": 0.1, "eval_accuracy": 0.1822916716337204, "eval_loss": 2.0611398220062256, "eval_runtime": 91.1422, "eval_samples_per_second": 6.32, "eval_steps_per_second": 1.58, "step": 30 }, { "epoch": 0.14, "learning_rate": 9.548611111111112e-05, "loss": 1.9912, "step": 40 }, { "epoch": 0.14, "eval_accuracy": 0.1944444477558136, "eval_loss": 1.9874335527420044, "eval_runtime": 91.2497, "eval_samples_per_second": 6.312, "eval_steps_per_second": 1.578, "step": 40 }, { "epoch": 0.17, "learning_rate": 9.432870370370372e-05, "loss": 1.9825, "step": 50 }, { "epoch": 0.17, "eval_accuracy": 0.1875, "eval_loss": 1.9108080863952637, "eval_runtime": 91.363, "eval_samples_per_second": 6.305, "eval_steps_per_second": 1.576, "step": 50 }, { "epoch": 0.21, "learning_rate": 9.31712962962963e-05, "loss": 1.8281, "step": 60 }, { "epoch": 0.21, "eval_accuracy": 0.2760416567325592, "eval_loss": 1.8094313144683838, "eval_runtime": 90.7999, "eval_samples_per_second": 6.344, "eval_steps_per_second": 1.586, "step": 60 }, { "epoch": 0.24, "learning_rate": 9.201388888888889e-05, "loss": 1.7768, "step": 70 }, { "epoch": 0.24, "eval_accuracy": 0.28125, "eval_loss": 1.8212822675704956, "eval_runtime": 90.3717, "eval_samples_per_second": 6.374, "eval_steps_per_second": 1.593, "step": 70 }, { "epoch": 0.28, "learning_rate": 9.085648148148149e-05, "loss": 1.8747, "step": 80 }, { "epoch": 0.28, "eval_accuracy": 0.2916666567325592, "eval_loss": 1.819143533706665, "eval_runtime": 89.6205, "eval_samples_per_second": 6.427, "eval_steps_per_second": 1.607, "step": 80 }, { "epoch": 0.31, "learning_rate": 8.969907407407407e-05, "loss": 1.8258, "step": 90 }, { "epoch": 0.31, "eval_accuracy": 0.2673611044883728, "eval_loss": 1.8833109140396118, "eval_runtime": 90.3547, "eval_samples_per_second": 6.375, "eval_steps_per_second": 1.594, "step": 90 }, { "epoch": 0.35, "learning_rate": 8.854166666666667e-05, "loss": 1.8466, "step": 100 }, { "epoch": 0.35, "eval_accuracy": 0.3020833432674408, "eval_loss": 1.794202208518982, "eval_runtime": 89.9211, "eval_samples_per_second": 6.406, "eval_steps_per_second": 1.601, "step": 100 }, { "epoch": 0.38, "learning_rate": 8.738425925925926e-05, "loss": 1.7949, "step": 110 }, { "epoch": 0.38, "eval_accuracy": 0.234375, "eval_loss": 1.737123727798462, "eval_runtime": 90.2733, "eval_samples_per_second": 6.381, "eval_steps_per_second": 1.595, "step": 110 }, { "epoch": 0.42, "learning_rate": 8.622685185185186e-05, "loss": 1.6993, "step": 120 }, { "epoch": 0.42, "eval_accuracy": 0.3333333432674408, "eval_loss": 1.714468240737915, "eval_runtime": 90.6521, "eval_samples_per_second": 6.354, "eval_steps_per_second": 1.588, "step": 120 }, { "epoch": 0.45, "learning_rate": 8.506944444444444e-05, "loss": 1.9949, "step": 130 }, { "epoch": 0.45, "eval_accuracy": 0.3628472089767456, "eval_loss": 1.7169595956802368, "eval_runtime": 89.657, "eval_samples_per_second": 6.424, "eval_steps_per_second": 1.606, "step": 130 }, { "epoch": 0.49, "learning_rate": 8.391203703703704e-05, "loss": 1.6402, "step": 140 }, { "epoch": 0.49, "eval_accuracy": 0.3506944477558136, "eval_loss": 1.7289636135101318, "eval_runtime": 90.3787, "eval_samples_per_second": 6.373, "eval_steps_per_second": 1.593, "step": 140 }, { "epoch": 0.52, "learning_rate": 8.275462962962963e-05, "loss": 1.7599, "step": 150 }, { "epoch": 0.52, "eval_accuracy": 0.3090277910232544, "eval_loss": 1.7477116584777832, "eval_runtime": 90.5736, "eval_samples_per_second": 6.359, "eval_steps_per_second": 1.59, "step": 150 }, { "epoch": 0.56, "learning_rate": 8.159722222222223e-05, "loss": 1.5776, "step": 160 }, { "epoch": 0.56, "eval_accuracy": 0.3715277910232544, "eval_loss": 1.6158109903335571, "eval_runtime": 89.7843, "eval_samples_per_second": 6.415, "eval_steps_per_second": 1.604, "step": 160 }, { "epoch": 0.59, "learning_rate": 8.043981481481482e-05, "loss": 1.7169, "step": 170 }, { "epoch": 0.59, "eval_accuracy": 0.3663194477558136, "eval_loss": 1.6075005531311035, "eval_runtime": 90.228, "eval_samples_per_second": 6.384, "eval_steps_per_second": 1.596, "step": 170 }, { "epoch": 0.62, "learning_rate": 7.928240740740742e-05, "loss": 1.653, "step": 180 }, { "epoch": 0.62, "eval_accuracy": 0.4201388955116272, "eval_loss": 1.524334192276001, "eval_runtime": 91.575, "eval_samples_per_second": 6.29, "eval_steps_per_second": 1.572, "step": 180 }, { "epoch": 0.66, "learning_rate": 7.8125e-05, "loss": 1.5733, "step": 190 }, { "epoch": 0.66, "eval_accuracy": 0.359375, "eval_loss": 1.7072927951812744, "eval_runtime": 90.2896, "eval_samples_per_second": 6.379, "eval_steps_per_second": 1.595, "step": 190 }, { "epoch": 0.69, "learning_rate": 7.69675925925926e-05, "loss": 1.6704, "step": 200 }, { "epoch": 0.69, "eval_accuracy": 0.4722222089767456, "eval_loss": 1.4267817735671997, "eval_runtime": 90.57, "eval_samples_per_second": 6.36, "eval_steps_per_second": 1.59, "step": 200 }, { "epoch": 0.73, "learning_rate": 7.581018518518519e-05, "loss": 1.4389, "step": 210 }, { "epoch": 0.73, "eval_accuracy": 0.3940972089767456, "eval_loss": 1.7218824625015259, "eval_runtime": 89.8269, "eval_samples_per_second": 6.412, "eval_steps_per_second": 1.603, "step": 210 }, { "epoch": 0.76, "learning_rate": 7.465277777777779e-05, "loss": 1.5342, "step": 220 }, { "epoch": 0.76, "eval_accuracy": 0.3940972089767456, "eval_loss": 1.5133135318756104, "eval_runtime": 89.6337, "eval_samples_per_second": 6.426, "eval_steps_per_second": 1.607, "step": 220 }, { "epoch": 0.8, "learning_rate": 7.349537037037037e-05, "loss": 1.5165, "step": 230 }, { "epoch": 0.8, "eval_accuracy": 0.4322916567325592, "eval_loss": 1.4692301750183105, "eval_runtime": 90.2666, "eval_samples_per_second": 6.381, "eval_steps_per_second": 1.595, "step": 230 }, { "epoch": 0.83, "learning_rate": 7.233796296296297e-05, "loss": 1.4743, "step": 240 }, { "epoch": 0.83, "eval_accuracy": 0.3975694477558136, "eval_loss": 1.5509642362594604, "eval_runtime": 90.8282, "eval_samples_per_second": 6.342, "eval_steps_per_second": 1.585, "step": 240 }, { "epoch": 0.87, "learning_rate": 7.118055555555556e-05, "loss": 1.4903, "step": 250 }, { "epoch": 0.87, "eval_accuracy": 0.4739583432674408, "eval_loss": 1.3426711559295654, "eval_runtime": 91.1121, "eval_samples_per_second": 6.322, "eval_steps_per_second": 1.58, "step": 250 }, { "epoch": 0.9, "learning_rate": 7.002314814814816e-05, "loss": 1.2193, "step": 260 }, { "epoch": 0.9, "eval_accuracy": 0.5329861044883728, "eval_loss": 1.3175561428070068, "eval_runtime": 89.5523, "eval_samples_per_second": 6.432, "eval_steps_per_second": 1.608, "step": 260 }, { "epoch": 0.94, "learning_rate": 6.886574074074074e-05, "loss": 1.56, "step": 270 }, { "epoch": 0.94, "eval_accuracy": 0.4635416567325592, "eval_loss": 1.4102365970611572, "eval_runtime": 89.6275, "eval_samples_per_second": 6.427, "eval_steps_per_second": 1.607, "step": 270 }, { "epoch": 0.97, "learning_rate": 6.770833333333334e-05, "loss": 1.4563, "step": 280 }, { "epoch": 0.97, "eval_accuracy": 0.5347222089767456, "eval_loss": 1.291764736175537, "eval_runtime": 89.5795, "eval_samples_per_second": 6.43, "eval_steps_per_second": 1.608, "step": 280 }, { "epoch": 1.01, "learning_rate": 6.655092592592593e-05, "loss": 1.3766, "step": 290 }, { "epoch": 1.01, "eval_accuracy": 0.4305555522441864, "eval_loss": 1.5331988334655762, "eval_runtime": 89.8777, "eval_samples_per_second": 6.409, "eval_steps_per_second": 1.602, "step": 290 }, { "epoch": 1.04, "learning_rate": 6.539351851851853e-05, "loss": 1.218, "step": 300 }, { "epoch": 1.04, "eval_accuracy": 0.4861111044883728, "eval_loss": 1.317336916923523, "eval_runtime": 91.03, "eval_samples_per_second": 6.328, "eval_steps_per_second": 1.582, "step": 300 }, { "epoch": 1.08, "learning_rate": 6.423611111111112e-05, "loss": 1.3211, "step": 310 }, { "epoch": 1.08, "eval_accuracy": 0.5034722089767456, "eval_loss": 1.263780951499939, "eval_runtime": 90.5509, "eval_samples_per_second": 6.361, "eval_steps_per_second": 1.59, "step": 310 }, { "epoch": 1.11, "learning_rate": 6.307870370370372e-05, "loss": 1.1933, "step": 320 }, { "epoch": 1.11, "eval_accuracy": 0.4982638955116272, "eval_loss": 1.3839294910430908, "eval_runtime": 90.7629, "eval_samples_per_second": 6.346, "eval_steps_per_second": 1.587, "step": 320 }, { "epoch": 1.15, "learning_rate": 6.192129629629629e-05, "loss": 1.2803, "step": 330 }, { "epoch": 1.15, "eval_accuracy": 0.5121527910232544, "eval_loss": 1.2145192623138428, "eval_runtime": 90.0175, "eval_samples_per_second": 6.399, "eval_steps_per_second": 1.6, "step": 330 }, { "epoch": 1.18, "learning_rate": 6.076388888888889e-05, "loss": 1.1112, "step": 340 }, { "epoch": 1.18, "eval_accuracy": 0.5711805820465088, "eval_loss": 1.1930283308029175, "eval_runtime": 89.5796, "eval_samples_per_second": 6.43, "eval_steps_per_second": 1.608, "step": 340 }, { "epoch": 1.22, "learning_rate": 5.960648148148148e-05, "loss": 1.0907, "step": 350 }, { "epoch": 1.22, "eval_accuracy": 0.5815972089767456, "eval_loss": 1.145843505859375, "eval_runtime": 90.3053, "eval_samples_per_second": 6.378, "eval_steps_per_second": 1.595, "step": 350 }, { "epoch": 1.25, "learning_rate": 5.844907407407407e-05, "loss": 1.013, "step": 360 }, { "epoch": 1.25, "eval_accuracy": 0.6180555820465088, "eval_loss": 1.0559839010238647, "eval_runtime": 89.8917, "eval_samples_per_second": 6.408, "eval_steps_per_second": 1.602, "step": 360 }, { "epoch": 1.28, "learning_rate": 5.7291666666666666e-05, "loss": 0.9308, "step": 370 }, { "epoch": 1.28, "eval_accuracy": 0.6163194179534912, "eval_loss": 1.0486806631088257, "eval_runtime": 90.7378, "eval_samples_per_second": 6.348, "eval_steps_per_second": 1.587, "step": 370 }, { "epoch": 1.32, "learning_rate": 5.613425925925926e-05, "loss": 1.3225, "step": 380 }, { "epoch": 1.32, "eval_accuracy": 0.6145833134651184, "eval_loss": 1.050321340560913, "eval_runtime": 89.4251, "eval_samples_per_second": 6.441, "eval_steps_per_second": 1.61, "step": 380 }, { "epoch": 1.35, "learning_rate": 5.497685185185185e-05, "loss": 1.0774, "step": 390 }, { "epoch": 1.35, "eval_accuracy": 0.5434027910232544, "eval_loss": 1.25161612033844, "eval_runtime": 89.4532, "eval_samples_per_second": 6.439, "eval_steps_per_second": 1.61, "step": 390 }, { "epoch": 1.39, "learning_rate": 5.3819444444444444e-05, "loss": 1.2251, "step": 400 }, { "epoch": 1.39, "eval_accuracy": 0.609375, "eval_loss": 1.076072335243225, "eval_runtime": 90.2082, "eval_samples_per_second": 6.385, "eval_steps_per_second": 1.596, "step": 400 }, { "epoch": 1.42, "learning_rate": 5.266203703703704e-05, "loss": 0.9848, "step": 410 }, { "epoch": 1.42, "eval_accuracy": 0.6440972089767456, "eval_loss": 1.0271832942962646, "eval_runtime": 89.9601, "eval_samples_per_second": 6.403, "eval_steps_per_second": 1.601, "step": 410 }, { "epoch": 1.46, "learning_rate": 5.150462962962963e-05, "loss": 0.9913, "step": 420 }, { "epoch": 1.46, "eval_accuracy": 0.6041666865348816, "eval_loss": 1.0503506660461426, "eval_runtime": 89.8786, "eval_samples_per_second": 6.409, "eval_steps_per_second": 1.602, "step": 420 }, { "epoch": 1.49, "learning_rate": 5.034722222222222e-05, "loss": 0.9081, "step": 430 }, { "epoch": 1.49, "eval_accuracy": 0.6666666865348816, "eval_loss": 0.9094821214675903, "eval_runtime": 91.6907, "eval_samples_per_second": 6.282, "eval_steps_per_second": 1.57, "step": 430 }, { "epoch": 1.53, "learning_rate": 4.9189814814814815e-05, "loss": 0.8339, "step": 440 }, { "epoch": 1.53, "eval_accuracy": 0.6631944179534912, "eval_loss": 0.9030921459197998, "eval_runtime": 90.547, "eval_samples_per_second": 6.361, "eval_steps_per_second": 1.59, "step": 440 }, { "epoch": 1.56, "learning_rate": 4.803240740740741e-05, "loss": 0.8893, "step": 450 }, { "epoch": 1.56, "eval_accuracy": 0.6423611044883728, "eval_loss": 0.9375382661819458, "eval_runtime": 90.2281, "eval_samples_per_second": 6.384, "eval_steps_per_second": 1.596, "step": 450 }, { "epoch": 1.6, "learning_rate": 4.6875e-05, "loss": 0.9362, "step": 460 }, { "epoch": 1.6, "eval_accuracy": 0.6197916865348816, "eval_loss": 0.9755175113677979, "eval_runtime": 91.0163, "eval_samples_per_second": 6.329, "eval_steps_per_second": 1.582, "step": 460 }, { "epoch": 1.63, "learning_rate": 4.5717592592592594e-05, "loss": 0.835, "step": 470 }, { "epoch": 1.63, "eval_accuracy": 0.6545138955116272, "eval_loss": 0.9399816989898682, "eval_runtime": 89.8555, "eval_samples_per_second": 6.41, "eval_steps_per_second": 1.603, "step": 470 }, { "epoch": 1.67, "learning_rate": 4.456018518518519e-05, "loss": 0.6733, "step": 480 }, { "epoch": 1.67, "eval_accuracy": 0.6927083134651184, "eval_loss": 0.8480438590049744, "eval_runtime": 89.8841, "eval_samples_per_second": 6.408, "eval_steps_per_second": 1.602, "step": 480 }, { "epoch": 1.7, "learning_rate": 4.340277777777778e-05, "loss": 1.0115, "step": 490 }, { "epoch": 1.7, "eval_accuracy": 0.6840277910232544, "eval_loss": 0.8332173824310303, "eval_runtime": 91.0011, "eval_samples_per_second": 6.33, "eval_steps_per_second": 1.582, "step": 490 }, { "epoch": 1.74, "learning_rate": 4.224537037037037e-05, "loss": 0.7473, "step": 500 }, { "epoch": 1.74, "eval_accuracy": 0.6475694179534912, "eval_loss": 0.9618370532989502, "eval_runtime": 90.2515, "eval_samples_per_second": 6.382, "eval_steps_per_second": 1.596, "step": 500 }, { "epoch": 1.77, "learning_rate": 4.1087962962962965e-05, "loss": 0.8355, "step": 510 }, { "epoch": 1.77, "eval_accuracy": 0.6840277910232544, "eval_loss": 0.8845413327217102, "eval_runtime": 89.7252, "eval_samples_per_second": 6.42, "eval_steps_per_second": 1.605, "step": 510 }, { "epoch": 1.81, "learning_rate": 3.993055555555556e-05, "loss": 0.8487, "step": 520 }, { "epoch": 1.81, "eval_accuracy": 0.6875, "eval_loss": 0.8297374844551086, "eval_runtime": 91.9685, "eval_samples_per_second": 6.263, "eval_steps_per_second": 1.566, "step": 520 }, { "epoch": 1.84, "learning_rate": 3.877314814814815e-05, "loss": 0.6038, "step": 530 }, { "epoch": 1.84, "eval_accuracy": 0.6493055820465088, "eval_loss": 0.9539130330085754, "eval_runtime": 90.0856, "eval_samples_per_second": 6.394, "eval_steps_per_second": 1.598, "step": 530 }, { "epoch": 1.88, "learning_rate": 3.7615740740740744e-05, "loss": 0.75, "step": 540 }, { "epoch": 1.88, "eval_accuracy": 0.6857638955116272, "eval_loss": 0.8455307483673096, "eval_runtime": 89.5522, "eval_samples_per_second": 6.432, "eval_steps_per_second": 1.608, "step": 540 }, { "epoch": 1.91, "learning_rate": 3.6458333333333336e-05, "loss": 0.8561, "step": 550 }, { "epoch": 1.91, "eval_accuracy": 0.7013888955116272, "eval_loss": 0.7813519239425659, "eval_runtime": 90.1129, "eval_samples_per_second": 6.392, "eval_steps_per_second": 1.598, "step": 550 }, { "epoch": 1.94, "learning_rate": 3.530092592592593e-05, "loss": 0.7552, "step": 560 }, { "epoch": 1.94, "eval_accuracy": 0.6822916865348816, "eval_loss": 0.8651251196861267, "eval_runtime": 89.9146, "eval_samples_per_second": 6.406, "eval_steps_per_second": 1.602, "step": 560 }, { "epoch": 1.98, "learning_rate": 3.414351851851852e-05, "loss": 0.6972, "step": 570 }, { "epoch": 1.98, "eval_accuracy": 0.71875, "eval_loss": 0.7325252890586853, "eval_runtime": 90.7375, "eval_samples_per_second": 6.348, "eval_steps_per_second": 1.587, "step": 570 }, { "epoch": 2.01, "learning_rate": 3.2986111111111115e-05, "loss": 0.7483, "step": 580 }, { "epoch": 2.01, "eval_accuracy": 0.7690972089767456, "eval_loss": 0.6722133159637451, "eval_runtime": 90.3148, "eval_samples_per_second": 6.378, "eval_steps_per_second": 1.594, "step": 580 }, { "epoch": 2.05, "learning_rate": 3.182870370370371e-05, "loss": 0.5419, "step": 590 }, { "epoch": 2.05, "eval_accuracy": 0.7326388955116272, "eval_loss": 0.7046216130256653, "eval_runtime": 92.1898, "eval_samples_per_second": 6.248, "eval_steps_per_second": 1.562, "step": 590 }, { "epoch": 2.08, "learning_rate": 3.06712962962963e-05, "loss": 0.5203, "step": 600 }, { "epoch": 2.08, "eval_accuracy": 0.7326388955116272, "eval_loss": 0.7062063813209534, "eval_runtime": 90.2661, "eval_samples_per_second": 6.381, "eval_steps_per_second": 1.595, "step": 600 }, { "epoch": 2.12, "learning_rate": 2.951388888888889e-05, "loss": 0.5235, "step": 610 }, { "epoch": 2.12, "eval_accuracy": 0.7534722089767456, "eval_loss": 0.6795992255210876, "eval_runtime": 91.0148, "eval_samples_per_second": 6.329, "eval_steps_per_second": 1.582, "step": 610 }, { "epoch": 2.15, "learning_rate": 2.8356481481481483e-05, "loss": 0.514, "step": 620 }, { "epoch": 2.15, "eval_accuracy": 0.7204861044883728, "eval_loss": 0.746653139591217, "eval_runtime": 89.8044, "eval_samples_per_second": 6.414, "eval_steps_per_second": 1.603, "step": 620 }, { "epoch": 2.19, "learning_rate": 2.7199074074074076e-05, "loss": 0.5402, "step": 630 }, { "epoch": 2.19, "eval_accuracy": 0.7447916865348816, "eval_loss": 0.716274619102478, "eval_runtime": 90.8833, "eval_samples_per_second": 6.338, "eval_steps_per_second": 1.584, "step": 630 }, { "epoch": 2.22, "learning_rate": 2.604166666666667e-05, "loss": 0.7235, "step": 640 }, { "epoch": 2.22, "eval_accuracy": 0.7222222089767456, "eval_loss": 0.7545790076255798, "eval_runtime": 89.6633, "eval_samples_per_second": 6.424, "eval_steps_per_second": 1.606, "step": 640 }, { "epoch": 2.26, "learning_rate": 2.488425925925926e-05, "loss": 0.551, "step": 650 }, { "epoch": 2.26, "eval_accuracy": 0.7534722089767456, "eval_loss": 0.6994116902351379, "eval_runtime": 89.8085, "eval_samples_per_second": 6.414, "eval_steps_per_second": 1.603, "step": 650 }, { "epoch": 2.29, "learning_rate": 2.3726851851851854e-05, "loss": 0.5769, "step": 660 }, { "epoch": 2.29, "eval_accuracy": 0.7534722089767456, "eval_loss": 0.7151244282722473, "eval_runtime": 90.9508, "eval_samples_per_second": 6.333, "eval_steps_per_second": 1.583, "step": 660 }, { "epoch": 2.33, "learning_rate": 2.2569444444444447e-05, "loss": 0.5501, "step": 670 }, { "epoch": 2.33, "eval_accuracy": 0.7604166865348816, "eval_loss": 0.695513129234314, "eval_runtime": 90.6092, "eval_samples_per_second": 6.357, "eval_steps_per_second": 1.589, "step": 670 }, { "epoch": 2.36, "learning_rate": 2.141203703703704e-05, "loss": 0.5416, "step": 680 }, { "epoch": 2.36, "eval_accuracy": 0.7725694179534912, "eval_loss": 0.6533116102218628, "eval_runtime": 89.82, "eval_samples_per_second": 6.413, "eval_steps_per_second": 1.603, "step": 680 }, { "epoch": 2.4, "learning_rate": 2.0254629629629632e-05, "loss": 0.5452, "step": 690 }, { "epoch": 2.4, "eval_accuracy": 0.7777777910232544, "eval_loss": 0.6232606172561646, "eval_runtime": 90.4116, "eval_samples_per_second": 6.371, "eval_steps_per_second": 1.593, "step": 690 }, { "epoch": 2.43, "learning_rate": 1.9097222222222222e-05, "loss": 0.8518, "step": 700 }, { "epoch": 2.43, "eval_accuracy": 0.7777777910232544, "eval_loss": 0.6136298179626465, "eval_runtime": 90.8648, "eval_samples_per_second": 6.339, "eval_steps_per_second": 1.585, "step": 700 }, { "epoch": 2.47, "learning_rate": 1.7939814814814815e-05, "loss": 0.3372, "step": 710 }, { "epoch": 2.47, "eval_accuracy": 0.7986111044883728, "eval_loss": 0.5700623393058777, "eval_runtime": 90.1438, "eval_samples_per_second": 6.39, "eval_steps_per_second": 1.597, "step": 710 }, { "epoch": 2.5, "learning_rate": 1.6782407407407408e-05, "loss": 0.4488, "step": 720 }, { "epoch": 2.5, "eval_accuracy": 0.7847222089767456, "eval_loss": 0.5789040327072144, "eval_runtime": 90.7633, "eval_samples_per_second": 6.346, "eval_steps_per_second": 1.587, "step": 720 }, { "epoch": 2.53, "learning_rate": 1.5625e-05, "loss": 0.3977, "step": 730 }, { "epoch": 2.53, "eval_accuracy": 0.7829861044883728, "eval_loss": 0.5748720169067383, "eval_runtime": 89.6735, "eval_samples_per_second": 6.423, "eval_steps_per_second": 1.606, "step": 730 } ], "max_steps": 864, "num_train_epochs": 3, "total_flos": 2.1573660231214095e+18, "trial_name": null, "trial_params": null }