{ "best_metric": 0.8142857142857143, "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-ADC-3cls-0922/checkpoint-310", "epoch": 200.0, "eval_steps": 500, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.4142857142857143, "eval_loss": 1.0693532228469849, "eval_runtime": 1.0286, "eval_samples_per_second": 68.056, "eval_steps_per_second": 1.944, "step": 2 }, { "epoch": 2.0, "eval_accuracy": 0.4142857142857143, "eval_loss": 1.0689432621002197, "eval_runtime": 1.4012, "eval_samples_per_second": 49.956, "eval_steps_per_second": 1.427, "step": 4 }, { "epoch": 3.0, "eval_accuracy": 0.4142857142857143, "eval_loss": 1.0682058334350586, "eval_runtime": 0.8857, "eval_samples_per_second": 79.031, "eval_steps_per_second": 2.258, "step": 6 }, { "epoch": 4.0, "eval_accuracy": 0.4142857142857143, "eval_loss": 1.0670955181121826, "eval_runtime": 1.6997, "eval_samples_per_second": 41.183, "eval_steps_per_second": 1.177, "step": 8 }, { "epoch": 5.0, "learning_rate": 1.25e-05, "loss": 1.096, "step": 10 }, { "epoch": 5.0, "eval_accuracy": 0.42857142857142855, "eval_loss": 1.065665602684021, "eval_runtime": 0.6117, "eval_samples_per_second": 114.441, "eval_steps_per_second": 3.27, "step": 10 }, { "epoch": 6.0, "eval_accuracy": 0.42857142857142855, "eval_loss": 1.0639806985855103, "eval_runtime": 0.6447, "eval_samples_per_second": 108.583, "eval_steps_per_second": 3.102, "step": 12 }, { "epoch": 7.0, "eval_accuracy": 0.4142857142857143, "eval_loss": 1.0620665550231934, "eval_runtime": 0.806, "eval_samples_per_second": 86.844, "eval_steps_per_second": 2.481, "step": 14 }, { "epoch": 8.0, "eval_accuracy": 0.4, "eval_loss": 1.0598403215408325, "eval_runtime": 0.6374, "eval_samples_per_second": 109.817, "eval_steps_per_second": 3.138, "step": 16 }, { "epoch": 9.0, "eval_accuracy": 0.4, "eval_loss": 1.0572247505187988, "eval_runtime": 0.6352, "eval_samples_per_second": 110.2, "eval_steps_per_second": 3.149, "step": 18 }, { "epoch": 10.0, "learning_rate": 2.5e-05, "loss": 1.0906, "step": 20 }, { "epoch": 10.0, "eval_accuracy": 0.4, "eval_loss": 1.0545086860656738, "eval_runtime": 0.8412, "eval_samples_per_second": 83.211, "eval_steps_per_second": 2.377, "step": 20 }, { "epoch": 11.0, "eval_accuracy": 0.4142857142857143, "eval_loss": 1.0516999959945679, "eval_runtime": 0.6382, "eval_samples_per_second": 109.685, "eval_steps_per_second": 3.134, "step": 22 }, { "epoch": 12.0, "eval_accuracy": 0.4142857142857143, "eval_loss": 1.0486340522766113, "eval_runtime": 0.6383, "eval_samples_per_second": 109.67, "eval_steps_per_second": 3.133, "step": 24 }, { "epoch": 13.0, "eval_accuracy": 0.4142857142857143, "eval_loss": 1.045298457145691, "eval_runtime": 0.8655, "eval_samples_per_second": 80.883, "eval_steps_per_second": 2.311, "step": 26 }, { "epoch": 14.0, "eval_accuracy": 0.4142857142857143, "eval_loss": 1.0417920351028442, "eval_runtime": 0.64, "eval_samples_per_second": 109.372, "eval_steps_per_second": 3.125, "step": 28 }, { "epoch": 15.0, "learning_rate": 3.7500000000000003e-05, "loss": 1.0647, "step": 30 }, { "epoch": 15.0, "eval_accuracy": 0.4142857142857143, "eval_loss": 1.0380207300186157, "eval_runtime": 0.6461, "eval_samples_per_second": 108.335, "eval_steps_per_second": 3.095, "step": 30 }, { "epoch": 16.0, "eval_accuracy": 0.4142857142857143, "eval_loss": 1.0343334674835205, "eval_runtime": 0.8283, "eval_samples_per_second": 84.515, "eval_steps_per_second": 2.415, "step": 32 }, { "epoch": 17.0, "eval_accuracy": 0.4142857142857143, "eval_loss": 1.030653953552246, "eval_runtime": 0.6353, "eval_samples_per_second": 110.181, "eval_steps_per_second": 3.148, "step": 34 }, { "epoch": 18.0, "eval_accuracy": 0.42857142857142855, "eval_loss": 1.0267900228500366, "eval_runtime": 0.6316, "eval_samples_per_second": 110.829, "eval_steps_per_second": 3.167, "step": 36 }, { "epoch": 19.0, "eval_accuracy": 0.42857142857142855, "eval_loss": 1.0229403972625732, "eval_runtime": 1.4107, "eval_samples_per_second": 49.619, "eval_steps_per_second": 1.418, "step": 38 }, { "epoch": 20.0, "learning_rate": 5e-05, "loss": 1.0451, "step": 40 }, { "epoch": 20.0, "eval_accuracy": 0.44285714285714284, "eval_loss": 1.0190969705581665, "eval_runtime": 0.6384, "eval_samples_per_second": 109.648, "eval_steps_per_second": 3.133, "step": 40 }, { "epoch": 21.0, "eval_accuracy": 0.45714285714285713, "eval_loss": 1.0152583122253418, "eval_runtime": 0.6302, "eval_samples_per_second": 111.074, "eval_steps_per_second": 3.174, "step": 42 }, { "epoch": 22.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 1.011588454246521, "eval_runtime": 0.8016, "eval_samples_per_second": 87.325, "eval_steps_per_second": 2.495, "step": 44 }, { "epoch": 23.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 1.0081889629364014, "eval_runtime": 0.6258, "eval_samples_per_second": 111.852, "eval_steps_per_second": 3.196, "step": 46 }, { "epoch": 24.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 1.0049412250518799, "eval_runtime": 0.6247, "eval_samples_per_second": 112.06, "eval_steps_per_second": 3.202, "step": 48 }, { "epoch": 25.0, "learning_rate": 6.25e-05, "loss": 1.037, "step": 50 }, { "epoch": 25.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 1.0015885829925537, "eval_runtime": 0.8268, "eval_samples_per_second": 84.66, "eval_steps_per_second": 2.419, "step": 50 }, { "epoch": 26.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 0.9978756308555603, "eval_runtime": 0.635, "eval_samples_per_second": 110.241, "eval_steps_per_second": 3.15, "step": 52 }, { "epoch": 27.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 0.9943951368331909, "eval_runtime": 0.6443, "eval_samples_per_second": 108.649, "eval_steps_per_second": 3.104, "step": 54 }, { "epoch": 28.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 0.9912726283073425, "eval_runtime": 0.8063, "eval_samples_per_second": 86.821, "eval_steps_per_second": 2.481, "step": 56 }, { "epoch": 29.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 0.9882513880729675, "eval_runtime": 0.635, "eval_samples_per_second": 110.236, "eval_steps_per_second": 3.15, "step": 58 }, { "epoch": 30.0, "learning_rate": 7.500000000000001e-05, "loss": 1.0214, "step": 60 }, { "epoch": 30.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 0.9846696257591248, "eval_runtime": 0.6341, "eval_samples_per_second": 110.395, "eval_steps_per_second": 3.154, "step": 60 }, { "epoch": 31.0, "eval_accuracy": 0.45714285714285713, "eval_loss": 0.9809077382087708, "eval_runtime": 0.8112, "eval_samples_per_second": 86.297, "eval_steps_per_second": 2.466, "step": 62 }, { "epoch": 32.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 0.9768250584602356, "eval_runtime": 0.6357, "eval_samples_per_second": 110.106, "eval_steps_per_second": 3.146, "step": 64 }, { "epoch": 33.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 0.9722528457641602, "eval_runtime": 0.6387, "eval_samples_per_second": 109.603, "eval_steps_per_second": 3.132, "step": 66 }, { "epoch": 34.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 0.9670786261558533, "eval_runtime": 0.83, "eval_samples_per_second": 84.335, "eval_steps_per_second": 2.41, "step": 68 }, { "epoch": 35.0, "learning_rate": 8.75e-05, "loss": 1.0181, "step": 70 }, { "epoch": 35.0, "eval_accuracy": 0.4714285714285714, "eval_loss": 0.9616244435310364, "eval_runtime": 0.6343, "eval_samples_per_second": 110.364, "eval_steps_per_second": 3.153, "step": 70 }, { "epoch": 36.0, "eval_accuracy": 0.4857142857142857, "eval_loss": 0.9561253190040588, "eval_runtime": 0.6551, "eval_samples_per_second": 106.853, "eval_steps_per_second": 3.053, "step": 72 }, { "epoch": 37.0, "eval_accuracy": 0.5, "eval_loss": 0.950469434261322, "eval_runtime": 0.8176, "eval_samples_per_second": 85.619, "eval_steps_per_second": 2.446, "step": 74 }, { "epoch": 38.0, "eval_accuracy": 0.5285714285714286, "eval_loss": 0.9445714950561523, "eval_runtime": 0.6352, "eval_samples_per_second": 110.194, "eval_steps_per_second": 3.148, "step": 76 }, { "epoch": 39.0, "eval_accuracy": 0.5285714285714286, "eval_loss": 0.9387871026992798, "eval_runtime": 0.629, "eval_samples_per_second": 111.283, "eval_steps_per_second": 3.18, "step": 78 }, { "epoch": 40.0, "learning_rate": 0.0001, "loss": 0.9646, "step": 80 }, { "epoch": 40.0, "eval_accuracy": 0.5285714285714286, "eval_loss": 0.9331315159797668, "eval_runtime": 0.8121, "eval_samples_per_second": 86.194, "eval_steps_per_second": 2.463, "step": 80 }, { "epoch": 41.0, "eval_accuracy": 0.5142857142857142, "eval_loss": 0.9276390075683594, "eval_runtime": 0.6356, "eval_samples_per_second": 110.129, "eval_steps_per_second": 3.147, "step": 82 }, { "epoch": 42.0, "eval_accuracy": 0.5285714285714286, "eval_loss": 0.9224144220352173, "eval_runtime": 0.6341, "eval_samples_per_second": 110.396, "eval_steps_per_second": 3.154, "step": 84 }, { "epoch": 43.0, "eval_accuracy": 0.5285714285714286, "eval_loss": 0.917235791683197, "eval_runtime": 0.8135, "eval_samples_per_second": 86.048, "eval_steps_per_second": 2.459, "step": 86 }, { "epoch": 44.0, "eval_accuracy": 0.5285714285714286, "eval_loss": 0.9120003581047058, "eval_runtime": 0.6333, "eval_samples_per_second": 110.541, "eval_steps_per_second": 3.158, "step": 88 }, { "epoch": 45.0, "learning_rate": 9.687500000000001e-05, "loss": 0.946, "step": 90 }, { "epoch": 45.0, "eval_accuracy": 0.5142857142857142, "eval_loss": 0.9070144891738892, "eval_runtime": 0.6382, "eval_samples_per_second": 109.685, "eval_steps_per_second": 3.134, "step": 90 }, { "epoch": 46.0, "eval_accuracy": 0.5285714285714286, "eval_loss": 0.9021272659301758, "eval_runtime": 0.8166, "eval_samples_per_second": 85.719, "eval_steps_per_second": 2.449, "step": 92 }, { "epoch": 47.0, "eval_accuracy": 0.5428571428571428, "eval_loss": 0.897597074508667, "eval_runtime": 0.634, "eval_samples_per_second": 110.411, "eval_steps_per_second": 3.155, "step": 94 }, { "epoch": 48.0, "eval_accuracy": 0.5428571428571428, "eval_loss": 0.8932848572731018, "eval_runtime": 0.6408, "eval_samples_per_second": 109.231, "eval_steps_per_second": 3.121, "step": 96 }, { "epoch": 49.0, "eval_accuracy": 0.5714285714285714, "eval_loss": 0.8890884518623352, "eval_runtime": 0.8162, "eval_samples_per_second": 85.76, "eval_steps_per_second": 2.45, "step": 98 }, { "epoch": 50.0, "learning_rate": 9.375e-05, "loss": 0.9244, "step": 100 }, { "epoch": 50.0, "eval_accuracy": 0.5714285714285714, "eval_loss": 0.8845995664596558, "eval_runtime": 0.6368, "eval_samples_per_second": 109.929, "eval_steps_per_second": 3.141, "step": 100 }, { "epoch": 51.0, "eval_accuracy": 0.5714285714285714, "eval_loss": 0.8802858591079712, "eval_runtime": 0.6401, "eval_samples_per_second": 109.353, "eval_steps_per_second": 3.124, "step": 102 }, { "epoch": 52.0, "eval_accuracy": 0.5714285714285714, "eval_loss": 0.8758621215820312, "eval_runtime": 0.8118, "eval_samples_per_second": 86.227, "eval_steps_per_second": 2.464, "step": 104 }, { "epoch": 53.0, "eval_accuracy": 0.5714285714285714, "eval_loss": 0.8715727925300598, "eval_runtime": 0.6434, "eval_samples_per_second": 108.8, "eval_steps_per_second": 3.109, "step": 106 }, { "epoch": 54.0, "eval_accuracy": 0.5714285714285714, "eval_loss": 0.8674018383026123, "eval_runtime": 0.63, "eval_samples_per_second": 111.108, "eval_steps_per_second": 3.175, "step": 108 }, { "epoch": 55.0, "learning_rate": 9.062500000000001e-05, "loss": 0.9228, "step": 110 }, { "epoch": 55.0, "eval_accuracy": 0.5857142857142857, "eval_loss": 0.86343914270401, "eval_runtime": 0.7324, "eval_samples_per_second": 95.574, "eval_steps_per_second": 2.731, "step": 110 }, { "epoch": 56.0, "eval_accuracy": 0.6, "eval_loss": 0.8597754240036011, "eval_runtime": 0.6359, "eval_samples_per_second": 110.082, "eval_steps_per_second": 3.145, "step": 112 }, { "epoch": 57.0, "eval_accuracy": 0.5857142857142857, "eval_loss": 0.8562148213386536, "eval_runtime": 0.6308, "eval_samples_per_second": 110.969, "eval_steps_per_second": 3.171, "step": 114 }, { "epoch": 58.0, "eval_accuracy": 0.6, "eval_loss": 0.852704644203186, "eval_runtime": 0.6483, "eval_samples_per_second": 107.968, "eval_steps_per_second": 3.085, "step": 116 }, { "epoch": 59.0, "eval_accuracy": 0.6, "eval_loss": 0.8491949439048767, "eval_runtime": 0.7037, "eval_samples_per_second": 99.48, "eval_steps_per_second": 2.842, "step": 118 }, { "epoch": 60.0, "learning_rate": 8.75e-05, "loss": 0.8956, "step": 120 }, { "epoch": 60.0, "eval_accuracy": 0.6142857142857143, "eval_loss": 0.8456201553344727, "eval_runtime": 0.6463, "eval_samples_per_second": 108.309, "eval_steps_per_second": 3.095, "step": 120 }, { "epoch": 61.0, "eval_accuracy": 0.6, "eval_loss": 0.8420506715774536, "eval_runtime": 0.63, "eval_samples_per_second": 111.119, "eval_steps_per_second": 3.175, "step": 122 }, { "epoch": 62.0, "eval_accuracy": 0.6, "eval_loss": 0.8385196924209595, "eval_runtime": 0.7958, "eval_samples_per_second": 87.963, "eval_steps_per_second": 2.513, "step": 124 }, { "epoch": 63.0, "eval_accuracy": 0.6, "eval_loss": 0.8351073861122131, "eval_runtime": 0.6308, "eval_samples_per_second": 110.971, "eval_steps_per_second": 3.171, "step": 126 }, { "epoch": 64.0, "eval_accuracy": 0.6142857142857143, "eval_loss": 0.8317676186561584, "eval_runtime": 0.6457, "eval_samples_per_second": 108.412, "eval_steps_per_second": 3.097, "step": 128 }, { "epoch": 65.0, "learning_rate": 8.4375e-05, "loss": 0.8943, "step": 130 }, { "epoch": 65.0, "eval_accuracy": 0.6142857142857143, "eval_loss": 0.8285678029060364, "eval_runtime": 0.8132, "eval_samples_per_second": 86.08, "eval_steps_per_second": 2.459, "step": 130 }, { "epoch": 66.0, "eval_accuracy": 0.6, "eval_loss": 0.825462281703949, "eval_runtime": 0.6417, "eval_samples_per_second": 109.078, "eval_steps_per_second": 3.117, "step": 132 }, { "epoch": 67.0, "eval_accuracy": 0.6285714285714286, "eval_loss": 0.8222988247871399, "eval_runtime": 0.6393, "eval_samples_per_second": 109.486, "eval_steps_per_second": 3.128, "step": 134 }, { "epoch": 68.0, "eval_accuracy": 0.6428571428571429, "eval_loss": 0.8190925121307373, "eval_runtime": 0.8213, "eval_samples_per_second": 85.231, "eval_steps_per_second": 2.435, "step": 136 }, { "epoch": 69.0, "eval_accuracy": 0.6285714285714286, "eval_loss": 0.8158699870109558, "eval_runtime": 0.6409, "eval_samples_per_second": 109.217, "eval_steps_per_second": 3.12, "step": 138 }, { "epoch": 70.0, "learning_rate": 8.125000000000001e-05, "loss": 0.854, "step": 140 }, { "epoch": 70.0, "eval_accuracy": 0.6428571428571429, "eval_loss": 0.8128588199615479, "eval_runtime": 0.6403, "eval_samples_per_second": 109.332, "eval_steps_per_second": 3.124, "step": 140 }, { "epoch": 71.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8099709749221802, "eval_runtime": 0.8076, "eval_samples_per_second": 86.675, "eval_steps_per_second": 2.476, "step": 142 }, { "epoch": 72.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8072643280029297, "eval_runtime": 0.6395, "eval_samples_per_second": 109.469, "eval_steps_per_second": 3.128, "step": 144 }, { "epoch": 73.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.804807722568512, "eval_runtime": 0.6346, "eval_samples_per_second": 110.311, "eval_steps_per_second": 3.152, "step": 146 }, { "epoch": 74.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8025286793708801, "eval_runtime": 0.7964, "eval_samples_per_second": 87.9, "eval_steps_per_second": 2.511, "step": 148 }, { "epoch": 75.0, "learning_rate": 7.8125e-05, "loss": 0.8615, "step": 150 }, { "epoch": 75.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.8000553250312805, "eval_runtime": 0.6302, "eval_samples_per_second": 111.077, "eval_steps_per_second": 3.174, "step": 150 }, { "epoch": 76.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.797595739364624, "eval_runtime": 0.6315, "eval_samples_per_second": 110.841, "eval_steps_per_second": 3.167, "step": 152 }, { "epoch": 77.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.795224130153656, "eval_runtime": 0.7988, "eval_samples_per_second": 87.627, "eval_steps_per_second": 2.504, "step": 154 }, { "epoch": 78.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.7928095459938049, "eval_runtime": 0.6405, "eval_samples_per_second": 109.291, "eval_steps_per_second": 3.123, "step": 156 }, { "epoch": 79.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.7904472351074219, "eval_runtime": 0.6375, "eval_samples_per_second": 109.802, "eval_steps_per_second": 3.137, "step": 158 }, { "epoch": 80.0, "learning_rate": 7.500000000000001e-05, "loss": 0.8507, "step": 160 }, { "epoch": 80.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.7881516218185425, "eval_runtime": 0.8024, "eval_samples_per_second": 87.237, "eval_steps_per_second": 2.492, "step": 160 }, { "epoch": 81.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.7857871055603027, "eval_runtime": 0.6344, "eval_samples_per_second": 110.338, "eval_steps_per_second": 3.153, "step": 162 }, { "epoch": 82.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.7834708094596863, "eval_runtime": 0.6376, "eval_samples_per_second": 109.781, "eval_steps_per_second": 3.137, "step": 164 }, { "epoch": 83.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.7811411023139954, "eval_runtime": 0.8171, "eval_samples_per_second": 85.665, "eval_steps_per_second": 2.448, "step": 166 }, { "epoch": 84.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.7788016200065613, "eval_runtime": 0.6448, "eval_samples_per_second": 108.552, "eval_steps_per_second": 3.101, "step": 168 }, { "epoch": 85.0, "learning_rate": 7.1875e-05, "loss": 0.838, "step": 170 }, { "epoch": 85.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.7765094041824341, "eval_runtime": 0.6436, "eval_samples_per_second": 108.77, "eval_steps_per_second": 3.108, "step": 170 }, { "epoch": 86.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.7743098139762878, "eval_runtime": 0.8392, "eval_samples_per_second": 83.412, "eval_steps_per_second": 2.383, "step": 172 }, { "epoch": 87.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.7722915410995483, "eval_runtime": 0.6421, "eval_samples_per_second": 109.013, "eval_steps_per_second": 3.115, "step": 174 }, { "epoch": 88.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.7703205943107605, "eval_runtime": 0.6583, "eval_samples_per_second": 106.337, "eval_steps_per_second": 3.038, "step": 176 }, { "epoch": 89.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.768402636051178, "eval_runtime": 0.8092, "eval_samples_per_second": 86.507, "eval_steps_per_second": 2.472, "step": 178 }, { "epoch": 90.0, "learning_rate": 6.875e-05, "loss": 0.8245, "step": 180 }, { "epoch": 90.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.7664420008659363, "eval_runtime": 0.6321, "eval_samples_per_second": 110.749, "eval_steps_per_second": 3.164, "step": 180 }, { "epoch": 91.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.7643933296203613, "eval_runtime": 0.6416, "eval_samples_per_second": 109.102, "eval_steps_per_second": 3.117, "step": 182 }, { "epoch": 92.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.7624573111534119, "eval_runtime": 0.8206, "eval_samples_per_second": 85.305, "eval_steps_per_second": 2.437, "step": 184 }, { "epoch": 93.0, "eval_accuracy": 0.7142857142857143, "eval_loss": 0.7605774998664856, "eval_runtime": 0.6303, "eval_samples_per_second": 111.066, "eval_steps_per_second": 3.173, "step": 186 }, { "epoch": 94.0, "eval_accuracy": 0.7142857142857143, "eval_loss": 0.7587181329727173, "eval_runtime": 0.6487, "eval_samples_per_second": 107.907, "eval_steps_per_second": 3.083, "step": 188 }, { "epoch": 95.0, "learning_rate": 6.562500000000001e-05, "loss": 0.8124, "step": 190 }, { "epoch": 95.0, "eval_accuracy": 0.7142857142857143, "eval_loss": 0.75688236951828, "eval_runtime": 0.83, "eval_samples_per_second": 84.337, "eval_steps_per_second": 2.41, "step": 190 }, { "epoch": 96.0, "eval_accuracy": 0.7285714285714285, "eval_loss": 0.755053699016571, "eval_runtime": 0.6434, "eval_samples_per_second": 108.8, "eval_steps_per_second": 3.109, "step": 192 }, { "epoch": 97.0, "eval_accuracy": 0.7285714285714285, "eval_loss": 0.7533228993415833, "eval_runtime": 0.6338, "eval_samples_per_second": 110.439, "eval_steps_per_second": 3.155, "step": 194 }, { "epoch": 98.0, "eval_accuracy": 0.7285714285714285, "eval_loss": 0.7516511082649231, "eval_runtime": 0.8125, "eval_samples_per_second": 86.157, "eval_steps_per_second": 2.462, "step": 196 }, { "epoch": 99.0, "eval_accuracy": 0.7428571428571429, "eval_loss": 0.7499614953994751, "eval_runtime": 0.6565, "eval_samples_per_second": 106.632, "eval_steps_per_second": 3.047, "step": 198 }, { "epoch": 100.0, "learning_rate": 6.25e-05, "loss": 0.8102, "step": 200 }, { "epoch": 100.0, "eval_accuracy": 0.7428571428571429, "eval_loss": 0.7482544183731079, "eval_runtime": 0.6344, "eval_samples_per_second": 110.346, "eval_steps_per_second": 3.153, "step": 200 }, { "epoch": 101.0, "eval_accuracy": 0.7428571428571429, "eval_loss": 0.7465088963508606, "eval_runtime": 0.8175, "eval_samples_per_second": 85.623, "eval_steps_per_second": 2.446, "step": 202 }, { "epoch": 102.0, "eval_accuracy": 0.7428571428571429, "eval_loss": 0.7449584007263184, "eval_runtime": 0.6372, "eval_samples_per_second": 109.854, "eval_steps_per_second": 3.139, "step": 204 }, { "epoch": 103.0, "eval_accuracy": 0.7428571428571429, "eval_loss": 0.7434430718421936, "eval_runtime": 0.6379, "eval_samples_per_second": 109.741, "eval_steps_per_second": 3.135, "step": 206 }, { "epoch": 104.0, "eval_accuracy": 0.7428571428571429, "eval_loss": 0.7419188618659973, "eval_runtime": 0.8331, "eval_samples_per_second": 84.027, "eval_steps_per_second": 2.401, "step": 208 }, { "epoch": 105.0, "learning_rate": 5.9375e-05, "loss": 0.821, "step": 210 }, { "epoch": 105.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7403832674026489, "eval_runtime": 0.6317, "eval_samples_per_second": 110.817, "eval_steps_per_second": 3.166, "step": 210 }, { "epoch": 106.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7388736605644226, "eval_runtime": 0.638, "eval_samples_per_second": 109.722, "eval_steps_per_second": 3.135, "step": 212 }, { "epoch": 107.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7374056577682495, "eval_runtime": 0.8271, "eval_samples_per_second": 84.63, "eval_steps_per_second": 2.418, "step": 214 }, { "epoch": 108.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7359411120414734, "eval_runtime": 0.6382, "eval_samples_per_second": 109.678, "eval_steps_per_second": 3.134, "step": 216 }, { "epoch": 109.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.734478235244751, "eval_runtime": 0.6424, "eval_samples_per_second": 108.966, "eval_steps_per_second": 3.113, "step": 218 }, { "epoch": 110.0, "learning_rate": 5.6250000000000005e-05, "loss": 0.7918, "step": 220 }, { "epoch": 110.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7330225110054016, "eval_runtime": 0.8294, "eval_samples_per_second": 84.398, "eval_steps_per_second": 2.411, "step": 220 }, { "epoch": 111.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7315928339958191, "eval_runtime": 0.6456, "eval_samples_per_second": 108.432, "eval_steps_per_second": 3.098, "step": 222 }, { "epoch": 112.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7302229404449463, "eval_runtime": 0.6368, "eval_samples_per_second": 109.931, "eval_steps_per_second": 3.141, "step": 224 }, { "epoch": 113.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7288532257080078, "eval_runtime": 0.8298, "eval_samples_per_second": 84.362, "eval_steps_per_second": 2.41, "step": 226 }, { "epoch": 114.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7275059223175049, "eval_runtime": 0.6447, "eval_samples_per_second": 108.584, "eval_steps_per_second": 3.102, "step": 228 }, { "epoch": 115.0, "learning_rate": 5.3125000000000004e-05, "loss": 0.8063, "step": 230 }, { "epoch": 115.0, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.7261765599250793, "eval_runtime": 0.6376, "eval_samples_per_second": 109.779, "eval_steps_per_second": 3.137, "step": 230 }, { "epoch": 116.0, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.7246890068054199, "eval_runtime": 0.8093, "eval_samples_per_second": 86.498, "eval_steps_per_second": 2.471, "step": 232 }, { "epoch": 117.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7232338190078735, "eval_runtime": 0.6473, "eval_samples_per_second": 108.146, "eval_steps_per_second": 3.09, "step": 234 }, { "epoch": 118.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7218143939971924, "eval_runtime": 0.639, "eval_samples_per_second": 109.553, "eval_steps_per_second": 3.13, "step": 236 }, { "epoch": 119.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7204232811927795, "eval_runtime": 0.8245, "eval_samples_per_second": 84.902, "eval_steps_per_second": 2.426, "step": 238 }, { "epoch": 120.0, "learning_rate": 5e-05, "loss": 0.7897, "step": 240 }, { "epoch": 120.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7191569209098816, "eval_runtime": 0.639, "eval_samples_per_second": 109.548, "eval_steps_per_second": 3.13, "step": 240 }, { "epoch": 121.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7179904580116272, "eval_runtime": 0.6313, "eval_samples_per_second": 110.887, "eval_steps_per_second": 3.168, "step": 242 }, { "epoch": 122.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7168429493904114, "eval_runtime": 0.8342, "eval_samples_per_second": 83.908, "eval_steps_per_second": 2.397, "step": 244 }, { "epoch": 123.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7157979011535645, "eval_runtime": 0.6333, "eval_samples_per_second": 110.541, "eval_steps_per_second": 3.158, "step": 246 }, { "epoch": 124.0, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.714880645275116, "eval_runtime": 0.6368, "eval_samples_per_second": 109.924, "eval_steps_per_second": 3.141, "step": 248 }, { "epoch": 125.0, "learning_rate": 4.6875e-05, "loss": 0.7845, "step": 250 }, { "epoch": 125.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7140344381332397, "eval_runtime": 0.8287, "eval_samples_per_second": 84.466, "eval_steps_per_second": 2.413, "step": 250 }, { "epoch": 126.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7130730152130127, "eval_runtime": 0.6617, "eval_samples_per_second": 105.79, "eval_steps_per_second": 3.023, "step": 252 }, { "epoch": 127.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7120916247367859, "eval_runtime": 0.6392, "eval_samples_per_second": 109.509, "eval_steps_per_second": 3.129, "step": 254 }, { "epoch": 128.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7110173106193542, "eval_runtime": 0.8272, "eval_samples_per_second": 84.623, "eval_steps_per_second": 2.418, "step": 256 }, { "epoch": 129.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7099365592002869, "eval_runtime": 0.8513, "eval_samples_per_second": 82.229, "eval_steps_per_second": 2.349, "step": 258 }, { "epoch": 130.0, "learning_rate": 4.375e-05, "loss": 0.7781, "step": 260 }, { "epoch": 130.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7087655663490295, "eval_runtime": 0.6529, "eval_samples_per_second": 107.222, "eval_steps_per_second": 3.063, "step": 260 }, { "epoch": 131.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7076297402381897, "eval_runtime": 0.633, "eval_samples_per_second": 110.582, "eval_steps_per_second": 3.159, "step": 262 }, { "epoch": 132.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.706558883190155, "eval_runtime": 0.8161, "eval_samples_per_second": 85.772, "eval_steps_per_second": 2.451, "step": 264 }, { "epoch": 133.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7055317759513855, "eval_runtime": 0.6432, "eval_samples_per_second": 108.834, "eval_steps_per_second": 3.11, "step": 266 }, { "epoch": 134.0, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.7044604420661926, "eval_runtime": 0.6426, "eval_samples_per_second": 108.936, "eval_steps_per_second": 3.112, "step": 268 }, { "epoch": 135.0, "learning_rate": 4.0625000000000005e-05, "loss": 0.7708, "step": 270 }, { "epoch": 135.0, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.7034193873405457, "eval_runtime": 0.8178, "eval_samples_per_second": 85.591, "eval_steps_per_second": 2.445, "step": 270 }, { "epoch": 136.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7024958729743958, "eval_runtime": 0.6353, "eval_samples_per_second": 110.186, "eval_steps_per_second": 3.148, "step": 272 }, { "epoch": 137.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7016207575798035, "eval_runtime": 0.632, "eval_samples_per_second": 110.758, "eval_steps_per_second": 3.165, "step": 274 }, { "epoch": 138.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.7007526755332947, "eval_runtime": 0.8281, "eval_samples_per_second": 84.534, "eval_steps_per_second": 2.415, "step": 276 }, { "epoch": 139.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.6998603343963623, "eval_runtime": 0.6379, "eval_samples_per_second": 109.736, "eval_steps_per_second": 3.135, "step": 278 }, { "epoch": 140.0, "learning_rate": 3.7500000000000003e-05, "loss": 0.797, "step": 280 }, { "epoch": 140.0, "eval_accuracy": 0.7571428571428571, "eval_loss": 0.6989655494689941, "eval_runtime": 0.6404, "eval_samples_per_second": 109.303, "eval_steps_per_second": 3.123, "step": 280 }, { "epoch": 141.0, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.6981316804885864, "eval_runtime": 0.8143, "eval_samples_per_second": 85.963, "eval_steps_per_second": 2.456, "step": 282 }, { "epoch": 142.0, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.6973427534103394, "eval_runtime": 0.6382, "eval_samples_per_second": 109.681, "eval_steps_per_second": 3.134, "step": 284 }, { "epoch": 143.0, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.6966080069541931, "eval_runtime": 0.6394, "eval_samples_per_second": 109.471, "eval_steps_per_second": 3.128, "step": 286 }, { "epoch": 144.0, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.6958935856819153, "eval_runtime": 0.8183, "eval_samples_per_second": 85.546, "eval_steps_per_second": 2.444, "step": 288 }, { "epoch": 145.0, "learning_rate": 3.4375e-05, "loss": 0.7768, "step": 290 }, { "epoch": 145.0, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.695166289806366, "eval_runtime": 0.6458, "eval_samples_per_second": 108.386, "eval_steps_per_second": 3.097, "step": 290 }, { "epoch": 146.0, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.694381833076477, "eval_runtime": 0.6351, "eval_samples_per_second": 110.21, "eval_steps_per_second": 3.149, "step": 292 }, { "epoch": 147.0, "eval_accuracy": 0.7714285714285715, "eval_loss": 0.6935797333717346, "eval_runtime": 0.8207, "eval_samples_per_second": 85.29, "eval_steps_per_second": 2.437, "step": 294 }, { "epoch": 148.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6927558779716492, "eval_runtime": 0.6395, "eval_samples_per_second": 109.463, "eval_steps_per_second": 3.128, "step": 296 }, { "epoch": 149.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6919543147087097, "eval_runtime": 0.6322, "eval_samples_per_second": 110.727, "eval_steps_per_second": 3.164, "step": 298 }, { "epoch": 150.0, "learning_rate": 3.125e-05, "loss": 0.7569, "step": 300 }, { "epoch": 150.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6911686658859253, "eval_runtime": 0.8112, "eval_samples_per_second": 86.294, "eval_steps_per_second": 2.466, "step": 300 }, { "epoch": 151.0, "eval_accuracy": 0.8, "eval_loss": 0.6904271841049194, "eval_runtime": 0.6485, "eval_samples_per_second": 107.942, "eval_steps_per_second": 3.084, "step": 302 }, { "epoch": 152.0, "eval_accuracy": 0.8, "eval_loss": 0.6896898150444031, "eval_runtime": 0.6429, "eval_samples_per_second": 108.878, "eval_steps_per_second": 3.111, "step": 304 }, { "epoch": 153.0, "eval_accuracy": 0.8, "eval_loss": 0.688970148563385, "eval_runtime": 0.8248, "eval_samples_per_second": 84.872, "eval_steps_per_second": 2.425, "step": 306 }, { "epoch": 154.0, "eval_accuracy": 0.8, "eval_loss": 0.6882473826408386, "eval_runtime": 0.6473, "eval_samples_per_second": 108.141, "eval_steps_per_second": 3.09, "step": 308 }, { "epoch": 155.0, "learning_rate": 2.8125000000000003e-05, "loss": 0.7807, "step": 310 }, { "epoch": 155.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6875176429748535, "eval_runtime": 0.6211, "eval_samples_per_second": 112.707, "eval_steps_per_second": 3.22, "step": 310 }, { "epoch": 156.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6867862939834595, "eval_runtime": 0.8104, "eval_samples_per_second": 86.38, "eval_steps_per_second": 2.468, "step": 312 }, { "epoch": 157.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6860566139221191, "eval_runtime": 0.6376, "eval_samples_per_second": 109.79, "eval_steps_per_second": 3.137, "step": 314 }, { "epoch": 158.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6853832006454468, "eval_runtime": 0.6414, "eval_samples_per_second": 109.144, "eval_steps_per_second": 3.118, "step": 316 }, { "epoch": 159.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.684758186340332, "eval_runtime": 0.8168, "eval_samples_per_second": 85.701, "eval_steps_per_second": 2.449, "step": 318 }, { "epoch": 160.0, "learning_rate": 2.5e-05, "loss": 0.7472, "step": 320 }, { "epoch": 160.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6841580867767334, "eval_runtime": 0.6398, "eval_samples_per_second": 109.413, "eval_steps_per_second": 3.126, "step": 320 }, { "epoch": 161.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6836223006248474, "eval_runtime": 0.6529, "eval_samples_per_second": 107.211, "eval_steps_per_second": 3.063, "step": 322 }, { "epoch": 162.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.683104932308197, "eval_runtime": 0.8258, "eval_samples_per_second": 84.764, "eval_steps_per_second": 2.422, "step": 324 }, { "epoch": 163.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6826251745223999, "eval_runtime": 0.6361, "eval_samples_per_second": 110.054, "eval_steps_per_second": 3.144, "step": 326 }, { "epoch": 164.0, "eval_accuracy": 0.8142857142857143, "eval_loss": 0.6821797490119934, "eval_runtime": 0.6381, "eval_samples_per_second": 109.703, "eval_steps_per_second": 3.134, "step": 328 }, { "epoch": 165.0, "learning_rate": 2.1875e-05, "loss": 0.7665, "step": 330 }, { "epoch": 165.0, "eval_accuracy": 0.8, "eval_loss": 0.6817546486854553, "eval_runtime": 0.8246, "eval_samples_per_second": 84.889, "eval_steps_per_second": 2.425, "step": 330 }, { "epoch": 166.0, "eval_accuracy": 0.8, "eval_loss": 0.6813837885856628, "eval_runtime": 0.6625, "eval_samples_per_second": 105.654, "eval_steps_per_second": 3.019, "step": 332 }, { "epoch": 167.0, "eval_accuracy": 0.8, "eval_loss": 0.681039035320282, "eval_runtime": 0.6385, "eval_samples_per_second": 109.638, "eval_steps_per_second": 3.133, "step": 334 }, { "epoch": 168.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6806796193122864, "eval_runtime": 0.8294, "eval_samples_per_second": 84.403, "eval_steps_per_second": 2.412, "step": 336 }, { "epoch": 169.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.680313766002655, "eval_runtime": 0.6312, "eval_samples_per_second": 110.896, "eval_steps_per_second": 3.168, "step": 338 }, { "epoch": 170.0, "learning_rate": 1.8750000000000002e-05, "loss": 0.7684, "step": 340 }, { "epoch": 170.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6799898743629456, "eval_runtime": 0.6322, "eval_samples_per_second": 110.721, "eval_steps_per_second": 3.163, "step": 340 }, { "epoch": 171.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6796825528144836, "eval_runtime": 0.8381, "eval_samples_per_second": 83.525, "eval_steps_per_second": 2.386, "step": 342 }, { "epoch": 172.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6793543696403503, "eval_runtime": 0.6493, "eval_samples_per_second": 107.81, "eval_steps_per_second": 3.08, "step": 344 }, { "epoch": 173.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6790363192558289, "eval_runtime": 0.6451, "eval_samples_per_second": 108.504, "eval_steps_per_second": 3.1, "step": 346 }, { "epoch": 174.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6787369847297668, "eval_runtime": 0.8298, "eval_samples_per_second": 84.361, "eval_steps_per_second": 2.41, "step": 348 }, { "epoch": 175.0, "learning_rate": 1.5625e-05, "loss": 0.7459, "step": 350 }, { "epoch": 175.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6784414052963257, "eval_runtime": 0.6607, "eval_samples_per_second": 105.953, "eval_steps_per_second": 3.027, "step": 350 }, { "epoch": 176.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6781107783317566, "eval_runtime": 0.681, "eval_samples_per_second": 102.789, "eval_steps_per_second": 2.937, "step": 352 }, { "epoch": 177.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.677795946598053, "eval_runtime": 0.8379, "eval_samples_per_second": 83.54, "eval_steps_per_second": 2.387, "step": 354 }, { "epoch": 178.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6774783730506897, "eval_runtime": 0.6408, "eval_samples_per_second": 109.247, "eval_steps_per_second": 3.121, "step": 356 }, { "epoch": 179.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6771765351295471, "eval_runtime": 0.6403, "eval_samples_per_second": 109.315, "eval_steps_per_second": 3.123, "step": 358 }, { "epoch": 180.0, "learning_rate": 1.25e-05, "loss": 0.742, "step": 360 }, { "epoch": 180.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6768958568572998, "eval_runtime": 0.8321, "eval_samples_per_second": 84.126, "eval_steps_per_second": 2.404, "step": 360 }, { "epoch": 181.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6766448616981506, "eval_runtime": 0.6364, "eval_samples_per_second": 110.0, "eval_steps_per_second": 3.143, "step": 362 }, { "epoch": 182.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6764284372329712, "eval_runtime": 0.647, "eval_samples_per_second": 108.196, "eval_steps_per_second": 3.091, "step": 364 }, { "epoch": 183.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6762242913246155, "eval_runtime": 0.8597, "eval_samples_per_second": 81.422, "eval_steps_per_second": 2.326, "step": 366 }, { "epoch": 184.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6760057806968689, "eval_runtime": 0.651, "eval_samples_per_second": 107.527, "eval_steps_per_second": 3.072, "step": 368 }, { "epoch": 185.0, "learning_rate": 9.375000000000001e-06, "loss": 0.7642, "step": 370 }, { "epoch": 185.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6757904291152954, "eval_runtime": 0.6445, "eval_samples_per_second": 108.618, "eval_steps_per_second": 3.103, "step": 370 }, { "epoch": 186.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6755796670913696, "eval_runtime": 0.8335, "eval_samples_per_second": 83.986, "eval_steps_per_second": 2.4, "step": 372 }, { "epoch": 187.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6753801107406616, "eval_runtime": 0.6394, "eval_samples_per_second": 109.479, "eval_steps_per_second": 3.128, "step": 374 }, { "epoch": 188.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6752031445503235, "eval_runtime": 0.658, "eval_samples_per_second": 106.386, "eval_steps_per_second": 3.04, "step": 376 }, { "epoch": 189.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6750344634056091, "eval_runtime": 0.8422, "eval_samples_per_second": 83.112, "eval_steps_per_second": 2.375, "step": 378 }, { "epoch": 190.0, "learning_rate": 6.25e-06, "loss": 0.7277, "step": 380 }, { "epoch": 190.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.674885094165802, "eval_runtime": 0.6493, "eval_samples_per_second": 107.808, "eval_steps_per_second": 3.08, "step": 380 }, { "epoch": 191.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6747546195983887, "eval_runtime": 0.6429, "eval_samples_per_second": 108.875, "eval_steps_per_second": 3.111, "step": 382 }, { "epoch": 192.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6746455430984497, "eval_runtime": 0.8305, "eval_samples_per_second": 84.289, "eval_steps_per_second": 2.408, "step": 384 }, { "epoch": 193.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6745493412017822, "eval_runtime": 0.6551, "eval_samples_per_second": 106.855, "eval_steps_per_second": 3.053, "step": 386 }, { "epoch": 194.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6744527816772461, "eval_runtime": 0.6428, "eval_samples_per_second": 108.896, "eval_steps_per_second": 3.111, "step": 388 }, { "epoch": 195.0, "learning_rate": 3.125e-06, "loss": 0.764, "step": 390 }, { "epoch": 195.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6743654608726501, "eval_runtime": 0.8334, "eval_samples_per_second": 83.991, "eval_steps_per_second": 2.4, "step": 390 }, { "epoch": 196.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.674295961856842, "eval_runtime": 0.652, "eval_samples_per_second": 107.363, "eval_steps_per_second": 3.068, "step": 392 }, { "epoch": 197.0, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.6742398142814636, "eval_runtime": 0.6659, "eval_samples_per_second": 105.127, "eval_steps_per_second": 3.004, "step": 394 }, { "epoch": 198.0, "eval_accuracy": 0.8, "eval_loss": 0.6741960644721985, "eval_runtime": 0.8894, "eval_samples_per_second": 78.703, "eval_steps_per_second": 2.249, "step": 396 }, { "epoch": 199.0, "eval_accuracy": 0.8, "eval_loss": 0.674168586730957, "eval_runtime": 0.6565, "eval_samples_per_second": 106.623, "eval_steps_per_second": 3.046, "step": 398 }, { "epoch": 200.0, "learning_rate": 0.0, "loss": 0.7444, "step": 400 }, { "epoch": 200.0, "eval_accuracy": 0.8, "eval_loss": 0.6741567850112915, "eval_runtime": 0.6417, "eval_samples_per_second": 109.09, "eval_steps_per_second": 3.117, "step": 400 }, { "epoch": 200.0, "step": 400, "total_flos": 2.23710151698432e+18, "train_loss": 0.8548950719833374, "train_runtime": 1030.1946, "train_samples_per_second": 87.362, "train_steps_per_second": 0.388 } ], "logging_steps": 10, "max_steps": 400, "num_train_epochs": 200, "save_steps": 500, "total_flos": 2.23710151698432e+18, "trial_name": null, "trial_params": null }