{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.817491868449585, "global_step": 41000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "compression_loss": 0.0, "distillation_loss": 9.49934196472168, "epoch": 0.0, "learning_rate": 5.99999721557765e-05, "loss": 12.0065, "step": 10, "task_loss": 3.6033935546875 }, { "compression_loss": 0.0, "distillation_loss": 5.30767822265625, "epoch": 0.01, "learning_rate": 5.999980199682008e-05, "loss": 6.9237, "step": 20, "task_loss": 1.9013495445251465 }, { "compression_loss": 0.0, "distillation_loss": 2.916347026824951, "epoch": 0.01, "learning_rate": 5.9999477148796615e-05, "loss": 3.7725, "step": 30, "task_loss": 1.5464043617248535 }, { "compression_loss": 0.0, "distillation_loss": 2.6741557121276855, "epoch": 0.01, "learning_rate": 5.999899761338114e-05, "loss": 2.769, "step": 40, "task_loss": 1.156141996383667 }, { "compression_loss": 0.0, "distillation_loss": 2.1403422355651855, "epoch": 0.02, "learning_rate": 5.999836339304631e-05, "loss": 2.5443, "step": 50, "task_loss": 1.239074945449829 }, { "compression_loss": 0.0, "distillation_loss": 2.6367697715759277, "epoch": 0.02, "learning_rate": 5.9997574491062356e-05, "loss": 2.3399, "step": 60, "task_loss": 1.2156091928482056 }, { "compression_loss": 0.0, "distillation_loss": 1.863443374633789, "epoch": 0.03, "learning_rate": 5.999663091149714e-05, "loss": 2.2202, "step": 70, "task_loss": 1.239349126815796 }, { "compression_loss": 0.0, "distillation_loss": 2.0775623321533203, "epoch": 0.03, "learning_rate": 5.9995532659216076e-05, "loss": 2.0928, "step": 80, "task_loss": 1.497615098953247 }, { "compression_loss": 0.0, "distillation_loss": 2.05897855758667, "epoch": 0.03, "learning_rate": 5.9994279739882106e-05, "loss": 2.1269, "step": 90, "task_loss": 1.0416953563690186 }, { "compression_loss": 0.0, "distillation_loss": 1.7927725315093994, "epoch": 0.04, "learning_rate": 5.999287215995571e-05, "loss": 1.8854, "step": 100, "task_loss": 0.6938618421554565 }, { "compression_loss": 0.0, "distillation_loss": 1.8927876949310303, "epoch": 0.04, "learning_rate": 5.9991309926694865e-05, "loss": 1.8586, "step": 110, "task_loss": 1.012183666229248 }, { "compression_loss": 0.0, "distillation_loss": 2.0477404594421387, "epoch": 0.04, "learning_rate": 5.998959304815494e-05, "loss": 1.8545, "step": 120, "task_loss": 1.4201068878173828 }, { "compression_loss": 0.0, "distillation_loss": 1.5746378898620605, "epoch": 0.05, "learning_rate": 5.998772153318875e-05, "loss": 1.9129, "step": 130, "task_loss": 0.9690418243408203 }, { "compression_loss": 0.0, "distillation_loss": 1.8912169933319092, "epoch": 0.05, "learning_rate": 5.9985695391446455e-05, "loss": 1.76, "step": 140, "task_loss": 0.9980369806289673 }, { "compression_loss": 0.0, "distillation_loss": 1.6582715511322021, "epoch": 0.05, "learning_rate": 5.9983514633375525e-05, "loss": 1.8208, "step": 150, "task_loss": 0.9474058747291565 }, { "compression_loss": 0.0, "distillation_loss": 1.706885814666748, "epoch": 0.06, "learning_rate": 5.9981179270220685e-05, "loss": 1.6574, "step": 160, "task_loss": 0.8353824615478516 }, { "compression_loss": 0.0, "distillation_loss": 2.296937942504883, "epoch": 0.06, "learning_rate": 5.997868931402385e-05, "loss": 1.9116, "step": 170, "task_loss": 1.2023546695709229 }, { "compression_loss": 0.0, "distillation_loss": 1.843885064125061, "epoch": 0.07, "learning_rate": 5.997604477762407e-05, "loss": 1.75, "step": 180, "task_loss": 1.366844892501831 }, { "compression_loss": 0.0, "distillation_loss": 1.7979713678359985, "epoch": 0.07, "learning_rate": 5.997324567465745e-05, "loss": 1.7307, "step": 190, "task_loss": 0.9796158075332642 }, { "compression_loss": 0.0, "distillation_loss": 2.4352352619171143, "epoch": 0.07, "learning_rate": 5.997029201955714e-05, "loss": 1.6291, "step": 200, "task_loss": 1.3526904582977295 }, { "compression_loss": 0.0, "distillation_loss": 1.3992118835449219, "epoch": 0.08, "learning_rate": 5.996718382755315e-05, "loss": 1.6391, "step": 210, "task_loss": 0.7047762870788574 }, { "compression_loss": 0.0, "distillation_loss": 1.648317575454712, "epoch": 0.08, "learning_rate": 5.9963921114672385e-05, "loss": 1.5126, "step": 220, "task_loss": 1.3377418518066406 }, { "compression_loss": 0.0, "distillation_loss": 1.356798768043518, "epoch": 0.08, "learning_rate": 5.996050389773846e-05, "loss": 1.5781, "step": 230, "task_loss": 0.6622216105461121 }, { "compression_loss": 0.0, "distillation_loss": 1.5281479358673096, "epoch": 0.09, "learning_rate": 5.995693219437173e-05, "loss": 1.6074, "step": 240, "task_loss": 1.0915801525115967 }, { "compression_loss": 0.0, "distillation_loss": 1.4357974529266357, "epoch": 0.09, "learning_rate": 5.995320602298906e-05, "loss": 1.6211, "step": 250, "task_loss": 1.0162770748138428 }, { "epoch": 0.09, "eval_exact_match": 75.68590350047303, "eval_f1": 85.28012898809695, "step": 250 }, { "compression_loss": 0.0, "distillation_loss": 1.83945894241333, "epoch": 0.09, "learning_rate": 5.9949325402803876e-05, "loss": 1.5693, "step": 260, "task_loss": 1.0681490898132324 }, { "compression_loss": 0.0, "distillation_loss": 1.4410802125930786, "epoch": 0.1, "learning_rate": 5.9945290353825946e-05, "loss": 1.5176, "step": 270, "task_loss": 0.9294320940971375 }, { "compression_loss": 0.0, "distillation_loss": 1.778608798980713, "epoch": 0.1, "learning_rate": 5.994110089686134e-05, "loss": 1.5048, "step": 280, "task_loss": 1.2308306694030762 }, { "compression_loss": 0.0, "distillation_loss": 1.1298151016235352, "epoch": 0.1, "learning_rate": 5.993675705351232e-05, "loss": 1.5331, "step": 290, "task_loss": 0.5350617170333862 }, { "compression_loss": 0.0, "distillation_loss": 1.5367448329925537, "epoch": 0.11, "learning_rate": 5.993225884617717e-05, "loss": 1.5269, "step": 300, "task_loss": 0.7491666078567505 }, { "compression_loss": 0.0, "distillation_loss": 1.389266848564148, "epoch": 0.11, "learning_rate": 5.9927606298050194e-05, "loss": 1.4866, "step": 310, "task_loss": 0.8327745199203491 }, { "compression_loss": 0.0, "distillation_loss": 1.6235179901123047, "epoch": 0.12, "learning_rate": 5.9922799433121474e-05, "loss": 1.5065, "step": 320, "task_loss": 0.8440191149711609 }, { "compression_loss": 0.0, "distillation_loss": 1.564889669418335, "epoch": 0.12, "learning_rate": 5.9917838276176835e-05, "loss": 1.5237, "step": 330, "task_loss": 1.3443149328231812 }, { "compression_loss": 0.0, "distillation_loss": 1.447906494140625, "epoch": 0.12, "learning_rate": 5.9912722852797664e-05, "loss": 1.4332, "step": 340, "task_loss": 0.8888688087463379 }, { "compression_loss": 0.0, "distillation_loss": 1.415635108947754, "epoch": 0.13, "learning_rate": 5.99074531893608e-05, "loss": 1.4817, "step": 350, "task_loss": 0.8752025365829468 }, { "compression_loss": 0.0, "distillation_loss": 1.707700252532959, "epoch": 0.13, "learning_rate": 5.990202931303841e-05, "loss": 1.4789, "step": 360, "task_loss": 1.3695316314697266 }, { "compression_loss": 0.0, "distillation_loss": 1.2083714008331299, "epoch": 0.13, "learning_rate": 5.989645125179782e-05, "loss": 1.5234, "step": 370, "task_loss": 0.7723703384399414 }, { "compression_loss": 0.0, "distillation_loss": 1.7325220108032227, "epoch": 0.14, "learning_rate": 5.989071903440139e-05, "loss": 1.499, "step": 380, "task_loss": 1.0906333923339844 }, { "compression_loss": 0.0, "distillation_loss": 1.2659305334091187, "epoch": 0.14, "learning_rate": 5.988483269040635e-05, "loss": 1.4476, "step": 390, "task_loss": 0.84165358543396 }, { "compression_loss": 0.0, "distillation_loss": 1.5708644390106201, "epoch": 0.14, "learning_rate": 5.987879225016469e-05, "loss": 1.4519, "step": 400, "task_loss": 0.8206751346588135 }, { "compression_loss": 0.0, "distillation_loss": 1.410794734954834, "epoch": 0.15, "learning_rate": 5.987259774482292e-05, "loss": 1.4455, "step": 410, "task_loss": 0.7794616222381592 }, { "compression_loss": 0.0, "distillation_loss": 1.5559226274490356, "epoch": 0.15, "learning_rate": 5.986624920632203e-05, "loss": 1.5825, "step": 420, "task_loss": 1.1558645963668823 }, { "compression_loss": 0.0, "distillation_loss": 1.5958247184753418, "epoch": 0.16, "learning_rate": 5.9859746667397186e-05, "loss": 1.4642, "step": 430, "task_loss": 1.1452465057373047 }, { "compression_loss": 0.0, "distillation_loss": 1.0762372016906738, "epoch": 0.16, "learning_rate": 5.985309016157769e-05, "loss": 1.4096, "step": 440, "task_loss": 0.7453725337982178 }, { "compression_loss": 0.0, "distillation_loss": 1.3428254127502441, "epoch": 0.16, "learning_rate": 5.9846279723186714e-05, "loss": 1.3398, "step": 450, "task_loss": 0.6058871746063232 }, { "compression_loss": 0.0, "distillation_loss": 1.6145907640457153, "epoch": 0.17, "learning_rate": 5.983931538734117e-05, "loss": 1.4124, "step": 460, "task_loss": 1.0071673393249512 }, { "compression_loss": 0.0, "distillation_loss": 1.4324638843536377, "epoch": 0.17, "learning_rate": 5.983219718995151e-05, "loss": 1.3421, "step": 470, "task_loss": 0.8209178447723389 }, { "compression_loss": 0.0, "distillation_loss": 1.294832468032837, "epoch": 0.17, "learning_rate": 5.982492516772156e-05, "loss": 1.3454, "step": 480, "task_loss": 1.0602484941482544 }, { "compression_loss": 0.0, "distillation_loss": 1.3344955444335938, "epoch": 0.18, "learning_rate": 5.9817499358148303e-05, "loss": 1.3674, "step": 490, "task_loss": 1.2323447465896606 }, { "compression_loss": 0.0, "distillation_loss": 1.31858229637146, "epoch": 0.18, "learning_rate": 5.980991979952172e-05, "loss": 1.4034, "step": 500, "task_loss": 0.8082805871963501 }, { "epoch": 0.18, "eval_exact_match": 76.43330179754021, "eval_f1": 85.8826809188487, "step": 500 }, { "compression_loss": 0.0, "distillation_loss": 1.117767095565796, "epoch": 0.18, "learning_rate": 5.980218653092457e-05, "loss": 1.4255, "step": 510, "task_loss": 0.8356488943099976 }, { "compression_loss": 0.0, "distillation_loss": 1.4943193197250366, "epoch": 0.19, "learning_rate": 5.9794299592232164e-05, "loss": 1.3749, "step": 520, "task_loss": 0.6115718483924866 }, { "compression_loss": 0.0, "distillation_loss": 1.5185236930847168, "epoch": 0.19, "learning_rate": 5.978625902411224e-05, "loss": 1.37, "step": 530, "task_loss": 0.9306137561798096 }, { "compression_loss": 0.0, "distillation_loss": 1.3840581178665161, "epoch": 0.2, "learning_rate": 5.977806486802464e-05, "loss": 1.2457, "step": 540, "task_loss": 1.2043516635894775 }, { "compression_loss": 0.0, "distillation_loss": 1.6848634481430054, "epoch": 0.2, "learning_rate": 5.97697171662212e-05, "loss": 1.3547, "step": 550, "task_loss": 1.2931828498840332 }, { "compression_loss": 0.0, "distillation_loss": 1.0846725702285767, "epoch": 0.2, "learning_rate": 5.9761215961745495e-05, "loss": 1.1797, "step": 560, "task_loss": 0.9227917194366455 }, { "compression_loss": 0.0, "distillation_loss": 1.2260918617248535, "epoch": 0.21, "learning_rate": 5.9752561298432565e-05, "loss": 1.1848, "step": 570, "task_loss": 0.8526758551597595 }, { "compression_loss": 0.0, "distillation_loss": 1.9792821407318115, "epoch": 0.21, "learning_rate": 5.974375322090879e-05, "loss": 1.3254, "step": 580, "task_loss": 1.3365485668182373 }, { "compression_loss": 0.0, "distillation_loss": 1.0318635702133179, "epoch": 0.21, "learning_rate": 5.973479177459156e-05, "loss": 1.249, "step": 590, "task_loss": 0.5771055817604065 }, { "compression_loss": 0.0, "distillation_loss": 1.460340142250061, "epoch": 0.22, "learning_rate": 5.972567700568911e-05, "loss": 1.4128, "step": 600, "task_loss": 0.593090295791626 }, { "compression_loss": 0.0, "distillation_loss": 1.5659215450286865, "epoch": 0.22, "learning_rate": 5.9716408961200254e-05, "loss": 1.3882, "step": 610, "task_loss": 1.2778445482254028 }, { "compression_loss": 0.0, "distillation_loss": 1.173753023147583, "epoch": 0.22, "learning_rate": 5.970698768891416e-05, "loss": 1.191, "step": 620, "task_loss": 0.5027570128440857 }, { "compression_loss": 0.0, "distillation_loss": 1.417959451675415, "epoch": 0.23, "learning_rate": 5.9697413237410065e-05, "loss": 1.4367, "step": 630, "task_loss": 0.7332655787467957 }, { "compression_loss": 0.0, "distillation_loss": 1.5544065237045288, "epoch": 0.23, "learning_rate": 5.968768565605706e-05, "loss": 1.3264, "step": 640, "task_loss": 0.8936142921447754 }, { "compression_loss": 0.0, "distillation_loss": 1.5732910633087158, "epoch": 0.23, "learning_rate": 5.9677804995013815e-05, "loss": 1.3731, "step": 650, "task_loss": 1.1154710054397583 }, { "compression_loss": 0.0, "distillation_loss": 1.3809391260147095, "epoch": 0.24, "learning_rate": 5.966777130522836e-05, "loss": 1.4006, "step": 660, "task_loss": 0.9069052338600159 }, { "compression_loss": 0.0, "distillation_loss": 1.0899161100387573, "epoch": 0.24, "learning_rate": 5.965758463843777e-05, "loss": 1.2987, "step": 670, "task_loss": 0.8597618341445923 }, { "compression_loss": 0.0, "distillation_loss": 1.151376724243164, "epoch": 0.25, "learning_rate": 5.9647245047167914e-05, "loss": 1.305, "step": 680, "task_loss": 0.8611929416656494 }, { "compression_loss": 0.0, "distillation_loss": 1.3947758674621582, "epoch": 0.25, "learning_rate": 5.96367525847332e-05, "loss": 1.2468, "step": 690, "task_loss": 1.21012544631958 }, { "compression_loss": 0.0, "distillation_loss": 1.192865252494812, "epoch": 0.25, "learning_rate": 5.962610730523631e-05, "loss": 1.3553, "step": 700, "task_loss": 1.0180736780166626 }, { "compression_loss": 0.0, "distillation_loss": 0.9713619947433472, "epoch": 0.26, "learning_rate": 5.961530926356787e-05, "loss": 1.3005, "step": 710, "task_loss": 1.0774257183074951 }, { "compression_loss": 0.0, "distillation_loss": 1.5023868083953857, "epoch": 0.26, "learning_rate": 5.960435851540622e-05, "loss": 1.3108, "step": 720, "task_loss": 1.0222753286361694 }, { "compression_loss": 0.0, "distillation_loss": 1.454642415046692, "epoch": 0.26, "learning_rate": 5.9593255117217096e-05, "loss": 1.3155, "step": 730, "task_loss": 0.8456030488014221 }, { "compression_loss": 0.0, "distillation_loss": 0.9722533226013184, "epoch": 0.27, "learning_rate": 5.958199912625336e-05, "loss": 1.3135, "step": 740, "task_loss": 0.7934273481369019 }, { "compression_loss": 0.0, "distillation_loss": 1.4297173023223877, "epoch": 0.27, "learning_rate": 5.957059060055468e-05, "loss": 1.2445, "step": 750, "task_loss": 0.8182862997055054 }, { "epoch": 0.27, "eval_exact_match": 77.19962157048249, "eval_f1": 86.33401538349308, "step": 750 }, { "compression_loss": 0.0, "distillation_loss": 1.6894798278808594, "epoch": 0.27, "learning_rate": 5.955902959894726e-05, "loss": 1.2795, "step": 760, "task_loss": 1.0540611743927002 }, { "compression_loss": 0.0, "distillation_loss": 1.2129619121551514, "epoch": 0.28, "learning_rate": 5.954731618104348e-05, "loss": 1.1677, "step": 770, "task_loss": 1.3634464740753174 }, { "compression_loss": 0.0, "distillation_loss": 1.2262523174285889, "epoch": 0.28, "learning_rate": 5.953545040724171e-05, "loss": 1.2771, "step": 780, "task_loss": 0.7795358896255493 }, { "compression_loss": 0.0, "distillation_loss": 1.224480390548706, "epoch": 0.29, "learning_rate": 5.952343233872584e-05, "loss": 1.2326, "step": 790, "task_loss": 0.8323132991790771 }, { "compression_loss": 0.0, "distillation_loss": 1.2114362716674805, "epoch": 0.29, "learning_rate": 5.951126203746507e-05, "loss": 1.247, "step": 800, "task_loss": 0.9133867621421814 }, { "compression_loss": 0.0, "distillation_loss": 1.1349849700927734, "epoch": 0.29, "learning_rate": 5.949893956621358e-05, "loss": 1.4212, "step": 810, "task_loss": 0.7154116630554199 }, { "compression_loss": 0.0, "distillation_loss": 1.6616898775100708, "epoch": 0.3, "learning_rate": 5.9486464988510165e-05, "loss": 1.2637, "step": 820, "task_loss": 1.1065783500671387 }, { "compression_loss": 0.0, "distillation_loss": 1.537660837173462, "epoch": 0.3, "learning_rate": 5.9473838368677945e-05, "loss": 1.3276, "step": 830, "task_loss": 1.0488613843917847 }, { "compression_loss": 0.0, "distillation_loss": 1.110896110534668, "epoch": 0.3, "learning_rate": 5.9461059771824025e-05, "loss": 1.2912, "step": 840, "task_loss": 0.5585556030273438 }, { "compression_loss": 0.0, "distillation_loss": 0.865274965763092, "epoch": 0.31, "learning_rate": 5.944812926383914e-05, "loss": 1.2452, "step": 850, "task_loss": 0.7370738387107849 }, { "compression_loss": 0.0, "distillation_loss": 1.265105962753296, "epoch": 0.31, "learning_rate": 5.943504691139734e-05, "loss": 1.2056, "step": 860, "task_loss": 1.171694278717041 }, { "compression_loss": 0.0, "distillation_loss": 1.2932755947113037, "epoch": 0.31, "learning_rate": 5.942181278195563e-05, "loss": 1.2814, "step": 870, "task_loss": 0.8684425354003906 }, { "compression_loss": 0.0, "distillation_loss": 1.1378037929534912, "epoch": 0.32, "learning_rate": 5.940842694375364e-05, "loss": 1.1371, "step": 880, "task_loss": 0.4964973032474518 }, { "compression_loss": 0.0, "distillation_loss": 1.041467547416687, "epoch": 0.32, "learning_rate": 5.9394889465813246e-05, "loss": 1.3367, "step": 890, "task_loss": 0.9691845178604126 }, { "compression_loss": 0.0, "distillation_loss": 1.2773613929748535, "epoch": 0.33, "learning_rate": 5.9381200417938226e-05, "loss": 1.2445, "step": 900, "task_loss": 0.8152469992637634 }, { "compression_loss": 0.0, "distillation_loss": 1.2654523849487305, "epoch": 0.33, "learning_rate": 5.936735987071393e-05, "loss": 1.3448, "step": 910, "task_loss": 0.8703353404998779 }, { "compression_loss": 0.0, "distillation_loss": 1.325675368309021, "epoch": 0.33, "learning_rate": 5.935336789550688e-05, "loss": 1.2264, "step": 920, "task_loss": 0.905992865562439 }, { "compression_loss": 0.0, "distillation_loss": 1.1226941347122192, "epoch": 0.34, "learning_rate": 5.9339224564464384e-05, "loss": 1.2136, "step": 930, "task_loss": 0.8291558623313904 }, { "compression_loss": 0.0, "distillation_loss": 2.0841875076293945, "epoch": 0.34, "learning_rate": 5.932492995051423e-05, "loss": 1.3336, "step": 940, "task_loss": 1.5568690299987793 }, { "compression_loss": 0.0, "distillation_loss": 1.2928574085235596, "epoch": 0.34, "learning_rate": 5.9310484127364235e-05, "loss": 1.2545, "step": 950, "task_loss": 0.6856122612953186 }, { "compression_loss": 0.0, "distillation_loss": 1.5862665176391602, "epoch": 0.35, "learning_rate": 5.9295887169501945e-05, "loss": 1.2767, "step": 960, "task_loss": 0.9553361535072327 }, { "compression_loss": 0.0, "distillation_loss": 1.1855202913284302, "epoch": 0.35, "learning_rate": 5.9281139152194166e-05, "loss": 1.2932, "step": 970, "task_loss": 1.2013435363769531 }, { "compression_loss": 0.0, "distillation_loss": 1.276527762413025, "epoch": 0.35, "learning_rate": 5.9266240151486647e-05, "loss": 1.2817, "step": 980, "task_loss": 0.5954961776733398 }, { "compression_loss": 0.0, "distillation_loss": 1.2427090406417847, "epoch": 0.36, "learning_rate": 5.925119024420363e-05, "loss": 1.108, "step": 990, "task_loss": 0.9799728989601135 }, { "compression_loss": 0.0, "distillation_loss": 1.2510955333709717, "epoch": 0.36, "learning_rate": 5.9235989507947515e-05, "loss": 1.2554, "step": 1000, "task_loss": 1.1080634593963623 }, { "epoch": 0.36, "eval_exact_match": 77.90917691579943, "eval_f1": 86.79589371678661, "step": 1000 }, { "compression_loss": 0.0, "distillation_loss": 1.3496589660644531, "epoch": 0.37, "learning_rate": 5.922063802109839e-05, "loss": 1.3452, "step": 1010, "task_loss": 0.8422398567199707 }, { "compression_loss": 0.0, "distillation_loss": 1.35902738571167, "epoch": 0.37, "learning_rate": 5.92051358628137e-05, "loss": 1.1973, "step": 1020, "task_loss": 1.0076375007629395 }, { "compression_loss": 0.0, "distillation_loss": 0.9760231971740723, "epoch": 0.37, "learning_rate": 5.918948311302778e-05, "loss": 1.3059, "step": 1030, "task_loss": 0.6299121975898743 }, { "compression_loss": 0.0, "distillation_loss": 1.3105099201202393, "epoch": 0.38, "learning_rate": 5.917367985245147e-05, "loss": 1.242, "step": 1040, "task_loss": 1.0138111114501953 }, { "compression_loss": 0.0, "distillation_loss": 1.1091604232788086, "epoch": 0.38, "learning_rate": 5.91577261625717e-05, "loss": 1.1236, "step": 1050, "task_loss": 1.2920585870742798 }, { "compression_loss": 0.0, "distillation_loss": 1.125967264175415, "epoch": 0.38, "learning_rate": 5.914162212565106e-05, "loss": 1.1758, "step": 1060, "task_loss": 0.6104905605316162 }, { "compression_loss": 0.0, "distillation_loss": 1.1518621444702148, "epoch": 0.39, "learning_rate": 5.9125367824727376e-05, "loss": 1.2793, "step": 1070, "task_loss": 1.1348037719726562 }, { "compression_loss": 0.0, "distillation_loss": 1.4855108261108398, "epoch": 0.39, "learning_rate": 5.9108963343613286e-05, "loss": 1.2306, "step": 1080, "task_loss": 0.7915964722633362 }, { "compression_loss": 0.0, "distillation_loss": 1.2316033840179443, "epoch": 0.39, "learning_rate": 5.909240876689582e-05, "loss": 1.3181, "step": 1090, "task_loss": 1.3942852020263672 }, { "compression_loss": 0.0, "distillation_loss": 1.0107777118682861, "epoch": 0.4, "learning_rate": 5.9075704179935926e-05, "loss": 1.2463, "step": 1100, "task_loss": 0.9370169639587402 }, { "compression_loss": 0.0, "distillation_loss": 1.1373622417449951, "epoch": 0.4, "learning_rate": 5.9058849668868097e-05, "loss": 1.2639, "step": 1110, "task_loss": 0.5862497091293335 }, { "compression_loss": 0.0, "distillation_loss": 1.1231906414031982, "epoch": 0.4, "learning_rate": 5.904184532059984e-05, "loss": 1.1012, "step": 1120, "task_loss": 0.7552740573883057 }, { "compression_loss": 0.0, "distillation_loss": 1.4255497455596924, "epoch": 0.41, "learning_rate": 5.90246912228113e-05, "loss": 1.2399, "step": 1130, "task_loss": 1.2231707572937012 }, { "compression_loss": 0.0, "distillation_loss": 1.3374457359313965, "epoch": 0.41, "learning_rate": 5.9007387463954766e-05, "loss": 1.2758, "step": 1140, "task_loss": 0.9943552017211914 }, { "compression_loss": 0.0, "distillation_loss": 1.0493338108062744, "epoch": 0.42, "learning_rate": 5.898993413325424e-05, "loss": 1.1956, "step": 1150, "task_loss": 0.7132070064544678 }, { "compression_loss": 0.0, "distillation_loss": 1.372544288635254, "epoch": 0.42, "learning_rate": 5.897233132070498e-05, "loss": 1.3824, "step": 1160, "task_loss": 0.6550334095954895 }, { "compression_loss": 0.0, "distillation_loss": 0.7538251876831055, "epoch": 0.42, "learning_rate": 5.8954579117072986e-05, "loss": 1.1745, "step": 1170, "task_loss": 0.6548653841018677 }, { "compression_loss": 0.0, "distillation_loss": 1.3968942165374756, "epoch": 0.43, "learning_rate": 5.89366776138946e-05, "loss": 1.1757, "step": 1180, "task_loss": 1.0903644561767578 }, { "compression_loss": 0.0, "distillation_loss": 0.8698831796646118, "epoch": 0.43, "learning_rate": 5.8918626903475984e-05, "loss": 1.1275, "step": 1190, "task_loss": 0.9105096459388733 }, { "compression_loss": 0.0, "distillation_loss": 1.0546157360076904, "epoch": 0.43, "learning_rate": 5.8900427078892686e-05, "loss": 1.2295, "step": 1200, "task_loss": 1.0799559354782104 }, { "compression_loss": 0.0, "distillation_loss": 0.9607287645339966, "epoch": 0.44, "learning_rate": 5.888207823398911e-05, "loss": 1.071, "step": 1210, "task_loss": 0.8502562046051025 }, { "compression_loss": 0.0, "distillation_loss": 1.7898484468460083, "epoch": 0.44, "learning_rate": 5.886358046337806e-05, "loss": 1.1555, "step": 1220, "task_loss": 1.1786938905715942 }, { "compression_loss": 0.0, "distillation_loss": 0.9928610324859619, "epoch": 0.44, "learning_rate": 5.884493386244029e-05, "loss": 1.0441, "step": 1230, "task_loss": 0.6678194999694824 }, { "compression_loss": 0.0, "distillation_loss": 1.371687412261963, "epoch": 0.45, "learning_rate": 5.882613852732392e-05, "loss": 1.1489, "step": 1240, "task_loss": 0.8427919149398804 }, { "compression_loss": 0.0, "distillation_loss": 1.259211778640747, "epoch": 0.45, "learning_rate": 5.880719455494403e-05, "loss": 1.1599, "step": 1250, "task_loss": 0.7002037763595581 }, { "epoch": 0.45, "eval_exact_match": 78.89309366130558, "eval_f1": 87.30477997622836, "step": 1250 }, { "compression_loss": 0.0, "distillation_loss": 1.2537944316864014, "epoch": 0.46, "learning_rate": 5.87881020429821e-05, "loss": 1.2433, "step": 1260, "task_loss": 0.9072924852371216 }, { "compression_loss": 0.0, "distillation_loss": 1.542639970779419, "epoch": 0.46, "learning_rate": 5.8768861089885557e-05, "loss": 1.1954, "step": 1270, "task_loss": 0.7959115505218506 }, { "compression_loss": 0.0, "distillation_loss": 1.015520453453064, "epoch": 0.46, "learning_rate": 5.8749471794867215e-05, "loss": 1.0924, "step": 1280, "task_loss": 0.7978045344352722 }, { "compression_loss": 0.0, "distillation_loss": 1.0034399032592773, "epoch": 0.47, "learning_rate": 5.87299342579048e-05, "loss": 1.1828, "step": 1290, "task_loss": 0.656456470489502 }, { "compression_loss": 0.0, "distillation_loss": 1.1494542360305786, "epoch": 0.47, "learning_rate": 5.8710248579740436e-05, "loss": 1.1708, "step": 1300, "task_loss": 0.9302552342414856 }, { "compression_loss": 0.0, "distillation_loss": 1.231022834777832, "epoch": 0.47, "learning_rate": 5.8690414861880074e-05, "loss": 1.2117, "step": 1310, "task_loss": 1.1827170848846436 }, { "compression_loss": 0.0, "distillation_loss": 1.1523957252502441, "epoch": 0.48, "learning_rate": 5.867043320659306e-05, "loss": 1.0301, "step": 1320, "task_loss": 1.0149617195129395 }, { "compression_loss": 0.0, "distillation_loss": 1.0345340967178345, "epoch": 0.48, "learning_rate": 5.865030371691151e-05, "loss": 1.0718, "step": 1330, "task_loss": 0.6616455912590027 }, { "compression_loss": 0.0, "distillation_loss": 1.1779638528823853, "epoch": 0.48, "learning_rate": 5.863002649662984e-05, "loss": 1.1689, "step": 1340, "task_loss": 1.1839759349822998 }, { "compression_loss": 0.0, "distillation_loss": 1.4716678857803345, "epoch": 0.49, "learning_rate": 5.860960165030423e-05, "loss": 1.1607, "step": 1350, "task_loss": 1.0619685649871826 }, { "compression_loss": 0.0, "distillation_loss": 0.9555326700210571, "epoch": 0.49, "learning_rate": 5.858902928325205e-05, "loss": 1.1595, "step": 1360, "task_loss": 0.7998465299606323 }, { "compression_loss": 0.0, "distillation_loss": 1.0534662008285522, "epoch": 0.5, "learning_rate": 5.856830950155133e-05, "loss": 1.1149, "step": 1370, "task_loss": 0.737253725528717 }, { "compression_loss": 0.0, "distillation_loss": 1.4515752792358398, "epoch": 0.5, "learning_rate": 5.854744241204025e-05, "loss": 1.1515, "step": 1380, "task_loss": 1.2615107297897339 }, { "compression_loss": 0.0, "distillation_loss": 1.1234278678894043, "epoch": 0.5, "learning_rate": 5.852642812231654e-05, "loss": 1.1093, "step": 1390, "task_loss": 1.0702711343765259 }, { "compression_loss": 0.0, "distillation_loss": 1.302065134048462, "epoch": 0.51, "learning_rate": 5.850526674073692e-05, "loss": 1.1776, "step": 1400, "task_loss": 0.933853268623352 }, { "compression_loss": 0.0, "distillation_loss": 0.9075888395309448, "epoch": 0.51, "learning_rate": 5.848395837641663e-05, "loss": 0.9961, "step": 1410, "task_loss": 0.7664486169815063 }, { "compression_loss": 0.0, "distillation_loss": 1.0795106887817383, "epoch": 0.51, "learning_rate": 5.8462503139228716e-05, "loss": 1.1341, "step": 1420, "task_loss": 0.921210765838623 }, { "compression_loss": 0.0, "distillation_loss": 1.2469671964645386, "epoch": 0.52, "learning_rate": 5.844090113980363e-05, "loss": 1.1763, "step": 1430, "task_loss": 0.5581917762756348 }, { "compression_loss": 0.0, "distillation_loss": 1.1218199729919434, "epoch": 0.52, "learning_rate": 5.8419152489528536e-05, "loss": 1.1253, "step": 1440, "task_loss": 0.9810710549354553 }, { "compression_loss": 0.0, "distillation_loss": 1.1303911209106445, "epoch": 0.52, "learning_rate": 5.839725730054677e-05, "loss": 1.1801, "step": 1450, "task_loss": 0.9915053248405457 }, { "compression_loss": 0.0, "distillation_loss": 0.9778804779052734, "epoch": 0.53, "learning_rate": 5.837521568575732e-05, "loss": 1.1791, "step": 1460, "task_loss": 0.6165598630905151 }, { "compression_loss": 0.0, "distillation_loss": 0.9693446159362793, "epoch": 0.53, "learning_rate": 5.835302775881414e-05, "loss": 1.0693, "step": 1470, "task_loss": 0.7351759076118469 }, { "compression_loss": 0.0, "distillation_loss": 1.2816810607910156, "epoch": 0.53, "learning_rate": 5.8330693634125654e-05, "loss": 1.1872, "step": 1480, "task_loss": 0.8726886510848999 }, { "compression_loss": 0.0, "distillation_loss": 0.8330242037773132, "epoch": 0.54, "learning_rate": 5.830821342685411e-05, "loss": 1.0149, "step": 1490, "task_loss": 0.8250100612640381 }, { "compression_loss": 0.0, "distillation_loss": 1.049477219581604, "epoch": 0.54, "learning_rate": 5.8285587252915027e-05, "loss": 1.1407, "step": 1500, "task_loss": 1.0598646402359009 }, { "epoch": 0.54, "eval_exact_match": 79.03500473036897, "eval_f1": 87.58001380842249, "step": 1500 }, { "compression_loss": 0.0, "distillation_loss": 0.89173424243927, "epoch": 0.55, "learning_rate": 5.8262815228976555e-05, "loss": 1.2533, "step": 1510, "task_loss": 0.8165568113327026 }, { "compression_loss": 0.0, "distillation_loss": 1.0344847440719604, "epoch": 0.55, "learning_rate": 5.823989747245893e-05, "loss": 1.0947, "step": 1520, "task_loss": 0.7850322723388672 }, { "compression_loss": 0.0, "distillation_loss": 1.1379281282424927, "epoch": 0.55, "learning_rate": 5.821683410153377e-05, "loss": 1.1351, "step": 1530, "task_loss": 1.0284912586212158 }, { "compression_loss": 0.0, "distillation_loss": 1.4056248664855957, "epoch": 0.56, "learning_rate": 5.8193625235123606e-05, "loss": 1.2041, "step": 1540, "task_loss": 1.2799898386001587 }, { "compression_loss": 0.0, "distillation_loss": 1.1627094745635986, "epoch": 0.56, "learning_rate": 5.817027099290115e-05, "loss": 1.0303, "step": 1550, "task_loss": 1.232445240020752 }, { "compression_loss": 0.0, "distillation_loss": 1.2967350482940674, "epoch": 0.56, "learning_rate": 5.814677149528872e-05, "loss": 1.1817, "step": 1560, "task_loss": 1.5413718223571777 }, { "compression_loss": 0.0, "distillation_loss": 0.879601776599884, "epoch": 0.57, "learning_rate": 5.812312686345763e-05, "loss": 1.0928, "step": 1570, "task_loss": 0.42186737060546875 }, { "compression_loss": 0.0, "distillation_loss": 1.1352674961090088, "epoch": 0.57, "learning_rate": 5.809933721932756e-05, "loss": 1.1005, "step": 1580, "task_loss": 1.1184422969818115 }, { "compression_loss": 0.0, "distillation_loss": 1.5701181888580322, "epoch": 0.57, "learning_rate": 5.8075402685565904e-05, "loss": 1.2745, "step": 1590, "task_loss": 0.9984648823738098 }, { "compression_loss": 0.0, "distillation_loss": 1.1425367593765259, "epoch": 0.58, "learning_rate": 5.805132338558717e-05, "loss": 1.2407, "step": 1600, "task_loss": 0.7507245540618896 }, { "compression_loss": 0.0, "distillation_loss": 0.9696254730224609, "epoch": 0.58, "learning_rate": 5.8027099443552336e-05, "loss": 1.2035, "step": 1610, "task_loss": 0.4481644630432129 }, { "compression_loss": 0.0, "distillation_loss": 1.1334104537963867, "epoch": 0.59, "learning_rate": 5.800273098436818e-05, "loss": 1.1432, "step": 1620, "task_loss": 0.890143096446991 }, { "compression_loss": 0.0, "distillation_loss": 1.0847598314285278, "epoch": 0.59, "learning_rate": 5.7978218133686684e-05, "loss": 1.1767, "step": 1630, "task_loss": 1.5157380104064941 }, { "compression_loss": 0.0, "distillation_loss": 1.137290358543396, "epoch": 0.59, "learning_rate": 5.795356101790434e-05, "loss": 1.1079, "step": 1640, "task_loss": 1.1900906562805176 }, { "compression_loss": 0.0, "distillation_loss": 0.9380532503128052, "epoch": 0.6, "learning_rate": 5.792875976416152e-05, "loss": 1.1299, "step": 1650, "task_loss": 0.44667813181877136 }, { "compression_loss": 0.0, "distillation_loss": 1.4331817626953125, "epoch": 0.6, "learning_rate": 5.7903814500341845e-05, "loss": 1.0832, "step": 1660, "task_loss": 1.059866189956665 }, { "compression_loss": 0.0, "distillation_loss": 1.4325172901153564, "epoch": 0.6, "learning_rate": 5.7878725355071475e-05, "loss": 1.1974, "step": 1670, "task_loss": 1.4602153301239014 }, { "compression_loss": 0.0, "distillation_loss": 0.9795972108840942, "epoch": 0.61, "learning_rate": 5.785349245771848e-05, "loss": 0.9908, "step": 1680, "task_loss": 0.6619421243667603 }, { "compression_loss": 0.0, "distillation_loss": 0.9228504300117493, "epoch": 0.61, "learning_rate": 5.782811593839217e-05, "loss": 1.0195, "step": 1690, "task_loss": 0.7317041158676147 }, { "compression_loss": 0.0, "distillation_loss": 1.483938217163086, "epoch": 0.61, "learning_rate": 5.78025959279424e-05, "loss": 1.1531, "step": 1700, "task_loss": 1.3933497667312622 }, { "compression_loss": 0.0, "distillation_loss": 1.2623841762542725, "epoch": 0.62, "learning_rate": 5.777693255795894e-05, "loss": 1.1414, "step": 1710, "task_loss": 1.1138461828231812 }, { "compression_loss": 0.0, "distillation_loss": 1.1957447528839111, "epoch": 0.62, "learning_rate": 5.775112596077074e-05, "loss": 1.1567, "step": 1720, "task_loss": 0.7848667502403259 }, { "compression_loss": 0.0, "distillation_loss": 1.0970215797424316, "epoch": 0.63, "learning_rate": 5.772517626944531e-05, "loss": 1.1256, "step": 1730, "task_loss": 1.2455201148986816 }, { "compression_loss": 0.0, "distillation_loss": 1.2585291862487793, "epoch": 0.63, "learning_rate": 5.769908361778797e-05, "loss": 1.0587, "step": 1740, "task_loss": 1.0000256299972534 }, { "compression_loss": 0.0, "distillation_loss": 1.362696886062622, "epoch": 0.63, "learning_rate": 5.76728481403412e-05, "loss": 1.0007, "step": 1750, "task_loss": 0.9608546495437622 }, { "epoch": 0.63, "eval_exact_match": 79.28098391674551, "eval_f1": 87.6563371777527, "step": 1750 }, { "compression_loss": 0.0, "distillation_loss": 0.8324779272079468, "epoch": 0.64, "learning_rate": 5.764646997238398e-05, "loss": 1.134, "step": 1760, "task_loss": 0.6974785923957825 }, { "compression_loss": 0.0, "distillation_loss": 1.3671345710754395, "epoch": 0.64, "learning_rate": 5.761994924993097e-05, "loss": 1.0616, "step": 1770, "task_loss": 0.9882352948188782 }, { "compression_loss": 0.0, "distillation_loss": 1.3772859573364258, "epoch": 0.64, "learning_rate": 5.759328610973195e-05, "loss": 1.2092, "step": 1780, "task_loss": 1.0786349773406982 }, { "compression_loss": 0.0, "distillation_loss": 0.871800422668457, "epoch": 0.65, "learning_rate": 5.756648068927106e-05, "loss": 1.0171, "step": 1790, "task_loss": 0.830288827419281 }, { "compression_loss": 0.0, "distillation_loss": 1.5783218145370483, "epoch": 0.65, "learning_rate": 5.753953312676602e-05, "loss": 1.255, "step": 1800, "task_loss": 1.0050296783447266 }, { "compression_loss": 0.0, "distillation_loss": 1.3028333187103271, "epoch": 0.65, "learning_rate": 5.751244356116755e-05, "loss": 1.0499, "step": 1810, "task_loss": 1.112877607345581 }, { "compression_loss": 0.0, "distillation_loss": 1.003974437713623, "epoch": 0.66, "learning_rate": 5.7485212132158555e-05, "loss": 1.0624, "step": 1820, "task_loss": 0.4431191086769104 }, { "compression_loss": 0.0, "distillation_loss": 0.9127657413482666, "epoch": 0.66, "learning_rate": 5.7457838980153446e-05, "loss": 1.1427, "step": 1830, "task_loss": 0.7105373740196228 }, { "compression_loss": 0.0, "distillation_loss": 1.3443561792373657, "epoch": 0.66, "learning_rate": 5.7430324246297385e-05, "loss": 1.0933, "step": 1840, "task_loss": 1.1131591796875 }, { "compression_loss": 0.0, "distillation_loss": 1.2187169790267944, "epoch": 0.67, "learning_rate": 5.74026680724656e-05, "loss": 1.1155, "step": 1850, "task_loss": 1.3410766124725342 }, { "compression_loss": 0.0, "distillation_loss": 0.7574527263641357, "epoch": 0.67, "learning_rate": 5.737487060126263e-05, "loss": 1.0802, "step": 1860, "task_loss": 0.42934900522232056 }, { "compression_loss": 0.0, "distillation_loss": 1.1824791431427002, "epoch": 0.68, "learning_rate": 5.734693197602157e-05, "loss": 1.1564, "step": 1870, "task_loss": 0.9754879474639893 }, { "compression_loss": 0.0, "distillation_loss": 1.1144288778305054, "epoch": 0.68, "learning_rate": 5.731885234080337e-05, "loss": 1.0964, "step": 1880, "task_loss": 0.7791981101036072 }, { "compression_loss": 0.0, "distillation_loss": 1.101729154586792, "epoch": 0.68, "learning_rate": 5.729063184039608e-05, "loss": 1.073, "step": 1890, "task_loss": 0.9119380712509155 }, { "compression_loss": 0.0, "distillation_loss": 0.7584141492843628, "epoch": 0.69, "learning_rate": 5.7262270620314076e-05, "loss": 1.0943, "step": 1900, "task_loss": 0.6965209245681763 }, { "compression_loss": 0.0, "distillation_loss": 0.7631694078445435, "epoch": 0.69, "learning_rate": 5.7233768826797364e-05, "loss": 1.0534, "step": 1910, "task_loss": 0.8611739873886108 }, { "compression_loss": 0.0, "distillation_loss": 0.8627794981002808, "epoch": 0.69, "learning_rate": 5.720512660681077e-05, "loss": 1.1024, "step": 1920, "task_loss": 0.5158926248550415 }, { "compression_loss": 0.0, "distillation_loss": 1.1938802003860474, "epoch": 0.7, "learning_rate": 5.717634410804319e-05, "loss": 1.0845, "step": 1930, "task_loss": 1.100459337234497 }, { "compression_loss": 0.0, "distillation_loss": 0.9154514074325562, "epoch": 0.7, "learning_rate": 5.71474214789069e-05, "loss": 1.1354, "step": 1940, "task_loss": 0.6148239374160767 }, { "compression_loss": 0.0, "distillation_loss": 1.3251101970672607, "epoch": 0.7, "learning_rate": 5.711835886853667e-05, "loss": 1.2525, "step": 1950, "task_loss": 0.8512697219848633 }, { "compression_loss": 0.0, "distillation_loss": 0.8448692560195923, "epoch": 0.71, "learning_rate": 5.708915642678912e-05, "loss": 1.0824, "step": 1960, "task_loss": 0.6256545782089233 }, { "compression_loss": 0.0, "distillation_loss": 0.6884149312973022, "epoch": 0.71, "learning_rate": 5.705981430424184e-05, "loss": 0.9549, "step": 1970, "task_loss": 0.3767446279525757 }, { "compression_loss": 0.0, "distillation_loss": 1.3547323942184448, "epoch": 0.72, "learning_rate": 5.7030332652192685e-05, "loss": 1.0335, "step": 1980, "task_loss": 1.198007345199585 }, { "compression_loss": 0.0, "distillation_loss": 0.630234956741333, "epoch": 0.72, "learning_rate": 5.700071162265897e-05, "loss": 1.0725, "step": 1990, "task_loss": 0.7066274881362915 }, { "compression_loss": 0.0, "distillation_loss": 1.0253219604492188, "epoch": 0.72, "learning_rate": 5.697095136837669e-05, "loss": 1.1779, "step": 2000, "task_loss": 1.2108477354049683 }, { "epoch": 0.72, "eval_exact_match": 79.67833491012298, "eval_f1": 87.87620766435882, "step": 2000 }, { "compression_loss": 0.0, "distillation_loss": 0.978434145450592, "epoch": 0.73, "learning_rate": 5.6941052042799705e-05, "loss": 1.008, "step": 2010, "task_loss": 1.1101996898651123 }, { "compression_loss": 0.0, "distillation_loss": 1.021071195602417, "epoch": 0.73, "learning_rate": 5.6911013800099014e-05, "loss": 1.0764, "step": 2020, "task_loss": 0.9951556921005249 }, { "compression_loss": 0.0, "distillation_loss": 1.0078377723693848, "epoch": 0.73, "learning_rate": 5.688083679516188e-05, "loss": 1.1377, "step": 2030, "task_loss": 0.8278811573982239 }, { "compression_loss": 0.0, "distillation_loss": 0.949942409992218, "epoch": 0.74, "learning_rate": 5.6850521183591096e-05, "loss": 1.0232, "step": 2040, "task_loss": 1.2979423999786377 }, { "compression_loss": 0.0, "distillation_loss": 1.1799646615982056, "epoch": 0.74, "learning_rate": 5.6820067121704145e-05, "loss": 1.1882, "step": 2050, "task_loss": 0.8740980625152588 }, { "compression_loss": 0.0, "distillation_loss": 1.05833101272583, "epoch": 0.74, "learning_rate": 5.678947476653241e-05, "loss": 1.0395, "step": 2060, "task_loss": 0.6604486703872681 }, { "compression_loss": 0.0, "distillation_loss": 1.5961391925811768, "epoch": 0.75, "learning_rate": 5.675874427582037e-05, "loss": 1.1881, "step": 2070, "task_loss": 1.4038045406341553 }, { "compression_loss": 0.0, "distillation_loss": 0.8555396795272827, "epoch": 0.75, "learning_rate": 5.6727875808024764e-05, "loss": 1.0642, "step": 2080, "task_loss": 0.6731501221656799 }, { "compression_loss": 0.0, "distillation_loss": 0.9887014627456665, "epoch": 0.76, "learning_rate": 5.669686952231379e-05, "loss": 1.0166, "step": 2090, "task_loss": 0.642594039440155 }, { "compression_loss": 0.0, "distillation_loss": 0.7086762189865112, "epoch": 0.76, "learning_rate": 5.6665725578566285e-05, "loss": 1.0289, "step": 2100, "task_loss": 0.5841311812400818 }, { "compression_loss": 0.0, "distillation_loss": 1.1380422115325928, "epoch": 0.76, "learning_rate": 5.663444413737089e-05, "loss": 1.1594, "step": 2110, "task_loss": 0.7255545258522034 }, { "compression_loss": 0.0, "distillation_loss": 0.8511086702346802, "epoch": 0.77, "learning_rate": 5.660302536002525e-05, "loss": 1.2183, "step": 2120, "task_loss": 0.7066528797149658 }, { "compression_loss": 0.0, "distillation_loss": 1.0993759632110596, "epoch": 0.77, "learning_rate": 5.657146940853514e-05, "loss": 1.008, "step": 2130, "task_loss": 0.6117959022521973 }, { "compression_loss": 0.0, "distillation_loss": 1.3931245803833008, "epoch": 0.77, "learning_rate": 5.6539776445613644e-05, "loss": 1.1547, "step": 2140, "task_loss": 0.8887019157409668 }, { "compression_loss": 0.0, "distillation_loss": 0.7988817691802979, "epoch": 0.78, "learning_rate": 5.6507946634680345e-05, "loss": 0.9199, "step": 2150, "task_loss": 0.5578777194023132 }, { "compression_loss": 0.0, "distillation_loss": 0.683596134185791, "epoch": 0.78, "learning_rate": 5.647598013986046e-05, "loss": 1.0362, "step": 2160, "task_loss": 0.5091426968574524 }, { "compression_loss": 0.0, "distillation_loss": 1.0852502584457397, "epoch": 0.78, "learning_rate": 5.644387712598397e-05, "loss": 1.0067, "step": 2170, "task_loss": 1.3019379377365112 }, { "compression_loss": 0.0, "distillation_loss": 1.2326734066009521, "epoch": 0.79, "learning_rate": 5.641163775858481e-05, "loss": 1.0256, "step": 2180, "task_loss": 1.3112915754318237 }, { "compression_loss": 0.0, "distillation_loss": 0.7502934336662292, "epoch": 0.79, "learning_rate": 5.637926220390003e-05, "loss": 1.1512, "step": 2190, "task_loss": 0.5557284951210022 }, { "compression_loss": 0.0, "distillation_loss": 1.5017701387405396, "epoch": 0.8, "learning_rate": 5.6346750628868835e-05, "loss": 1.1388, "step": 2200, "task_loss": 1.1129333972930908 }, { "compression_loss": 0.0, "distillation_loss": 0.8690292835235596, "epoch": 0.8, "learning_rate": 5.631410320113186e-05, "loss": 1.0063, "step": 2210, "task_loss": 0.8763360977172852 }, { "compression_loss": 0.0, "distillation_loss": 1.0584996938705444, "epoch": 0.8, "learning_rate": 5.628132008903022e-05, "loss": 1.0087, "step": 2220, "task_loss": 0.5723036527633667 }, { "compression_loss": 0.0, "distillation_loss": 1.3691318035125732, "epoch": 0.81, "learning_rate": 5.624840146160466e-05, "loss": 1.0832, "step": 2230, "task_loss": 1.132154941558838 }, { "compression_loss": 0.0, "distillation_loss": 1.2483141422271729, "epoch": 0.81, "learning_rate": 5.62153474885947e-05, "loss": 1.1432, "step": 2240, "task_loss": 0.8840174674987793 }, { "compression_loss": 0.0, "distillation_loss": 1.1159393787384033, "epoch": 0.81, "learning_rate": 5.618215834043773e-05, "loss": 1.1353, "step": 2250, "task_loss": 0.7122650146484375 }, { "epoch": 0.81, "eval_exact_match": 79.82970671712394, "eval_f1": 87.98350972516127, "step": 2250 }, { "compression_loss": 0.0, "distillation_loss": 0.971450686454773, "epoch": 0.82, "learning_rate": 5.614883418826817e-05, "loss": 0.9947, "step": 2260, "task_loss": 1.4125139713287354 }, { "compression_loss": 0.0, "distillation_loss": 1.3253803253173828, "epoch": 0.82, "learning_rate": 5.6115375203916534e-05, "loss": 1.0477, "step": 2270, "task_loss": 1.3677072525024414 }, { "compression_loss": 0.0, "distillation_loss": 1.0992299318313599, "epoch": 0.82, "learning_rate": 5.6081781559908616e-05, "loss": 1.0079, "step": 2280, "task_loss": 0.9259253144264221 }, { "compression_loss": 0.0, "distillation_loss": 0.9781779646873474, "epoch": 0.83, "learning_rate": 5.604805342946454e-05, "loss": 1.0634, "step": 2290, "task_loss": 0.6949033737182617 }, { "compression_loss": 0.0, "distillation_loss": 0.9337608218193054, "epoch": 0.83, "learning_rate": 5.601419098649789e-05, "loss": 1.0581, "step": 2300, "task_loss": 0.7444375157356262 }, { "compression_loss": 0.0, "distillation_loss": 0.783240556716919, "epoch": 0.83, "learning_rate": 5.5980194405614785e-05, "loss": 1.016, "step": 2310, "task_loss": 0.6494660377502441 }, { "compression_loss": 0.0, "distillation_loss": 1.2547146081924438, "epoch": 0.84, "learning_rate": 5.5946063862113056e-05, "loss": 1.0121, "step": 2320, "task_loss": 1.5860847234725952 }, { "compression_loss": 0.0, "distillation_loss": 1.198014497756958, "epoch": 0.84, "learning_rate": 5.591179953198124e-05, "loss": 0.9877, "step": 2330, "task_loss": 0.8051793575286865 }, { "compression_loss": 0.0, "distillation_loss": 0.6500747799873352, "epoch": 0.85, "learning_rate": 5.587740159189777e-05, "loss": 1.06, "step": 2340, "task_loss": 0.9356144666671753 }, { "compression_loss": 0.0, "distillation_loss": 0.9654289484024048, "epoch": 0.85, "learning_rate": 5.584287021922997e-05, "loss": 1.2073, "step": 2350, "task_loss": 0.7282505035400391 }, { "compression_loss": 0.0, "distillation_loss": 1.4243820905685425, "epoch": 0.85, "learning_rate": 5.5808205592033224e-05, "loss": 1.0644, "step": 2360, "task_loss": 1.230690360069275 }, { "compression_loss": 0.0, "distillation_loss": 1.6079635620117188, "epoch": 0.86, "learning_rate": 5.5773407889049986e-05, "loss": 1.07, "step": 2370, "task_loss": 1.2446208000183105 }, { "compression_loss": 0.0, "distillation_loss": 0.937513530254364, "epoch": 0.86, "learning_rate": 5.573847728970894e-05, "loss": 1.0576, "step": 2380, "task_loss": 0.9011927247047424 }, { "compression_loss": 0.0, "distillation_loss": 1.1456040143966675, "epoch": 0.86, "learning_rate": 5.570341397412398e-05, "loss": 1.1172, "step": 2390, "task_loss": 1.1431217193603516 }, { "compression_loss": 0.0, "distillation_loss": 0.9871031045913696, "epoch": 0.87, "learning_rate": 5.5668218123093375e-05, "loss": 1.0467, "step": 2400, "task_loss": 1.1605985164642334 }, { "compression_loss": 0.0, "distillation_loss": 0.9994000196456909, "epoch": 0.87, "learning_rate": 5.563288991809875e-05, "loss": 1.2362, "step": 2410, "task_loss": 0.7171075940132141 }, { "compression_loss": 0.0, "distillation_loss": 1.3609836101531982, "epoch": 0.87, "learning_rate": 5.559742954130421e-05, "loss": 1.147, "step": 2420, "task_loss": 1.1994080543518066 }, { "compression_loss": 0.0, "distillation_loss": 0.9730468988418579, "epoch": 0.88, "learning_rate": 5.556183717555539e-05, "loss": 0.9454, "step": 2430, "task_loss": 0.6406092047691345 }, { "compression_loss": 0.0, "distillation_loss": 1.6829783916473389, "epoch": 0.88, "learning_rate": 5.552611300437849e-05, "loss": 1.0889, "step": 2440, "task_loss": 1.5461273193359375 }, { "compression_loss": 0.0, "distillation_loss": 1.061495065689087, "epoch": 0.89, "learning_rate": 5.549025721197935e-05, "loss": 1.0418, "step": 2450, "task_loss": 0.6560171842575073 }, { "compression_loss": 0.0, "distillation_loss": 0.8872650861740112, "epoch": 0.89, "learning_rate": 5.54542699832425e-05, "loss": 0.9725, "step": 2460, "task_loss": 0.8630092740058899 }, { "compression_loss": 0.0, "distillation_loss": 0.9294049739837646, "epoch": 0.89, "learning_rate": 5.5418151503730185e-05, "loss": 0.9812, "step": 2470, "task_loss": 0.8304383158683777 }, { "compression_loss": 0.0, "distillation_loss": 1.1200015544891357, "epoch": 0.9, "learning_rate": 5.538190195968143e-05, "loss": 0.8769, "step": 2480, "task_loss": 0.8949685096740723 }, { "compression_loss": 0.0, "distillation_loss": 1.1685872077941895, "epoch": 0.9, "learning_rate": 5.534552153801109e-05, "loss": 1.06, "step": 2490, "task_loss": 0.9758301973342896 }, { "compression_loss": 0.0, "distillation_loss": 1.159787893295288, "epoch": 0.9, "learning_rate": 5.530901042630882e-05, "loss": 1.0887, "step": 2500, "task_loss": 0.6261301040649414 }, { "epoch": 0.9, "eval_exact_match": 80.66225165562913, "eval_f1": 88.41245606075655, "step": 2500 }, { "compression_loss": 0.0, "distillation_loss": 0.9179141521453857, "epoch": 0.91, "learning_rate": 5.527236881283822e-05, "loss": 1.0642, "step": 2510, "task_loss": 0.7565745711326599 }, { "compression_loss": 0.0, "distillation_loss": 1.0321706533432007, "epoch": 0.91, "learning_rate": 5.523559688653577e-05, "loss": 1.0146, "step": 2520, "task_loss": 0.9108026027679443 }, { "compression_loss": 0.0, "distillation_loss": 0.8541651368141174, "epoch": 0.91, "learning_rate": 5.519869483700986e-05, "loss": 0.937, "step": 2530, "task_loss": 0.887135922908783 }, { "compression_loss": 0.0, "distillation_loss": 0.9746845960617065, "epoch": 0.92, "learning_rate": 5.516166285453989e-05, "loss": 0.9061, "step": 2540, "task_loss": 1.0225167274475098 }, { "compression_loss": 0.0, "distillation_loss": 1.2189579010009766, "epoch": 0.92, "learning_rate": 5.5124501130075204e-05, "loss": 1.0058, "step": 2550, "task_loss": 1.3460352420806885 }, { "compression_loss": 0.0, "distillation_loss": 0.8514137864112854, "epoch": 0.93, "learning_rate": 5.508720985523416e-05, "loss": 1.0001, "step": 2560, "task_loss": 0.9920178651809692 }, { "compression_loss": 0.0, "distillation_loss": 0.7871240377426147, "epoch": 0.93, "learning_rate": 5.504978922230309e-05, "loss": 1.011, "step": 2570, "task_loss": 0.8328629732131958 }, { "compression_loss": 0.0, "distillation_loss": 0.8508641719818115, "epoch": 0.93, "learning_rate": 5.501223942423539e-05, "loss": 0.9933, "step": 2580, "task_loss": 0.5792371034622192 }, { "compression_loss": 0.0, "distillation_loss": 0.9439315795898438, "epoch": 0.94, "learning_rate": 5.4974560654650424e-05, "loss": 1.0669, "step": 2590, "task_loss": 0.7067208290100098 }, { "compression_loss": 0.0, "distillation_loss": 0.9048341512680054, "epoch": 0.94, "learning_rate": 5.4936753107832604e-05, "loss": 1.0051, "step": 2600, "task_loss": 0.78888338804245 }, { "compression_loss": 0.0, "distillation_loss": 0.8586219549179077, "epoch": 0.94, "learning_rate": 5.489881697873035e-05, "loss": 0.9638, "step": 2610, "task_loss": 0.556171178817749 }, { "compression_loss": 0.0, "distillation_loss": 0.9864273071289062, "epoch": 0.95, "learning_rate": 5.486075246295511e-05, "loss": 1.0877, "step": 2620, "task_loss": 0.6050583124160767 }, { "compression_loss": 0.0, "distillation_loss": 0.968224048614502, "epoch": 0.95, "learning_rate": 5.482255975678033e-05, "loss": 1.0996, "step": 2630, "task_loss": 1.148437261581421 }, { "compression_loss": 0.0, "distillation_loss": 0.8083895444869995, "epoch": 0.95, "learning_rate": 5.4784239057140426e-05, "loss": 1.0497, "step": 2640, "task_loss": 1.3358066082000732 }, { "compression_loss": 0.0, "distillation_loss": 1.0225319862365723, "epoch": 0.96, "learning_rate": 5.474579056162983e-05, "loss": 1.0384, "step": 2650, "task_loss": 0.7764133810997009 }, { "compression_loss": 0.0, "distillation_loss": 1.1559662818908691, "epoch": 0.96, "learning_rate": 5.470721446850191e-05, "loss": 1.0839, "step": 2660, "task_loss": 0.9642658233642578 }, { "compression_loss": 0.0, "distillation_loss": 0.7244634032249451, "epoch": 0.96, "learning_rate": 5.466851097666796e-05, "loss": 1.0632, "step": 2670, "task_loss": 0.8971454501152039 }, { "compression_loss": 0.0, "distillation_loss": 0.644745409488678, "epoch": 0.97, "learning_rate": 5.462968028569621e-05, "loss": 1.0227, "step": 2680, "task_loss": 0.508349597454071 }, { "compression_loss": 0.0, "distillation_loss": 0.6737414002418518, "epoch": 0.97, "learning_rate": 5.459072259581078e-05, "loss": 1.0816, "step": 2690, "task_loss": 0.4952778220176697 }, { "compression_loss": 0.0, "distillation_loss": 0.9121610522270203, "epoch": 0.98, "learning_rate": 5.455163810789059e-05, "loss": 0.9735, "step": 2700, "task_loss": 0.49527156352996826 }, { "compression_loss": 0.0, "distillation_loss": 0.9224643707275391, "epoch": 0.98, "learning_rate": 5.451242702346841e-05, "loss": 1.0023, "step": 2710, "task_loss": 0.9647371768951416 }, { "compression_loss": 0.0, "distillation_loss": 0.8482234477996826, "epoch": 0.98, "learning_rate": 5.447308954472978e-05, "loss": 1.0031, "step": 2720, "task_loss": 0.7513360977172852 }, { "compression_loss": 0.0, "distillation_loss": 0.9275264739990234, "epoch": 0.99, "learning_rate": 5.443362587451196e-05, "loss": 1.0583, "step": 2730, "task_loss": 0.7999730110168457 }, { "compression_loss": 0.0, "distillation_loss": 2.1053638458251953, "epoch": 0.99, "learning_rate": 5.4394036216302914e-05, "loss": 1.1403, "step": 2740, "task_loss": 1.4788403511047363 }, { "compression_loss": 0.0, "distillation_loss": 1.2016551494598389, "epoch": 0.99, "learning_rate": 5.4354320774240234e-05, "loss": 0.8911, "step": 2750, "task_loss": 0.817990243434906 }, { "epoch": 0.99, "eval_exact_match": 80.32166508987702, "eval_f1": 88.34998419616365, "step": 2750 }, { "compression_loss": 0.0, "distillation_loss": 1.011232614517212, "epoch": 1.0, "learning_rate": 5.431447975311009e-05, "loss": 1.0087, "step": 2760, "task_loss": 0.7390681505203247 }, { "compression_loss": 0.0, "distillation_loss": 0.7077229022979736, "epoch": 1.0, "learning_rate": 5.427451335834618e-05, "loss": 0.8541, "step": 2770, "task_loss": 0.632800817489624 }, { "compression_loss": 0.0, "distillation_loss": 0.930655837059021, "epoch": 1.0, "learning_rate": 5.4234421796028677e-05, "loss": 0.8487, "step": 2780, "task_loss": 0.698851466178894 }, { "compression_loss": 0.0, "distillation_loss": 0.7608747482299805, "epoch": 1.01, "learning_rate": 5.419420527288317e-05, "loss": 0.7985, "step": 2790, "task_loss": 0.4454770088195801 }, { "compression_loss": 0.0, "distillation_loss": 0.916797399520874, "epoch": 1.01, "learning_rate": 5.415386399627955e-05, "loss": 0.8375, "step": 2800, "task_loss": 0.9108859896659851 }, { "compression_loss": 0.0, "distillation_loss": 1.0602134466171265, "epoch": 1.02, "learning_rate": 5.411339817423103e-05, "loss": 0.8843, "step": 2810, "task_loss": 1.0451345443725586 }, { "compression_loss": 0.0, "distillation_loss": 0.8087363839149475, "epoch": 1.02, "learning_rate": 5.4072808015392984e-05, "loss": 0.8028, "step": 2820, "task_loss": 0.8453482389450073 }, { "compression_loss": 0.0, "distillation_loss": 0.7011986374855042, "epoch": 1.02, "learning_rate": 5.403209372906192e-05, "loss": 0.8078, "step": 2830, "task_loss": 0.6264968514442444 }, { "compression_loss": 0.0, "distillation_loss": 0.8319069147109985, "epoch": 1.03, "learning_rate": 5.3991255525174396e-05, "loss": 0.8133, "step": 2840, "task_loss": 0.9526384472846985 }, { "compression_loss": 0.0, "distillation_loss": 1.0532593727111816, "epoch": 1.03, "learning_rate": 5.395029361430591e-05, "loss": 0.798, "step": 2850, "task_loss": 1.0793445110321045 }, { "compression_loss": 0.0, "distillation_loss": 1.1473658084869385, "epoch": 1.03, "learning_rate": 5.3909208207669864e-05, "loss": 0.8647, "step": 2860, "task_loss": 0.8664270639419556 }, { "compression_loss": 0.0, "distillation_loss": 1.2761093378067017, "epoch": 1.04, "learning_rate": 5.3867999517116424e-05, "loss": 0.8462, "step": 2870, "task_loss": 0.9831790924072266 }, { "compression_loss": 0.0, "distillation_loss": 0.8598681688308716, "epoch": 1.04, "learning_rate": 5.382666775513145e-05, "loss": 0.7603, "step": 2880, "task_loss": 0.5603002309799194 }, { "compression_loss": 0.0, "distillation_loss": 0.5312719941139221, "epoch": 1.04, "learning_rate": 5.378521313483541e-05, "loss": 0.7213, "step": 2890, "task_loss": 0.5749783515930176 }, { "compression_loss": 0.0, "distillation_loss": 0.8793454766273499, "epoch": 1.05, "learning_rate": 5.3743635869982276e-05, "loss": 0.872, "step": 2900, "task_loss": 0.5542865991592407 }, { "compression_loss": 0.0, "distillation_loss": 0.6059684753417969, "epoch": 1.05, "learning_rate": 5.370193617495839e-05, "loss": 0.7783, "step": 2910, "task_loss": 1.0900731086730957 }, { "compression_loss": 0.0, "distillation_loss": 0.6760936379432678, "epoch": 1.06, "learning_rate": 5.3660114264781405e-05, "loss": 0.7037, "step": 2920, "task_loss": 0.3957022428512573 }, { "compression_loss": 0.0, "distillation_loss": 0.9233886003494263, "epoch": 1.06, "learning_rate": 5.361817035509917e-05, "loss": 0.8746, "step": 2930, "task_loss": 1.043057918548584 }, { "compression_loss": 0.0, "distillation_loss": 0.6635424494743347, "epoch": 1.06, "learning_rate": 5.357610466218856e-05, "loss": 0.8787, "step": 2940, "task_loss": 0.712294340133667 }, { "compression_loss": 0.0, "distillation_loss": 0.6662827730178833, "epoch": 1.07, "learning_rate": 5.3533917402954456e-05, "loss": 0.7418, "step": 2950, "task_loss": 0.5668191313743591 }, { "compression_loss": 0.0, "distillation_loss": 0.7443856000900269, "epoch": 1.07, "learning_rate": 5.349160879492854e-05, "loss": 0.8513, "step": 2960, "task_loss": 0.8575096726417542 }, { "compression_loss": 0.0, "distillation_loss": 0.9767590761184692, "epoch": 1.07, "learning_rate": 5.3449179056268235e-05, "loss": 0.7926, "step": 2970, "task_loss": 0.8986884951591492 }, { "compression_loss": 0.0, "distillation_loss": 1.0205166339874268, "epoch": 1.08, "learning_rate": 5.340662840575553e-05, "loss": 0.8059, "step": 2980, "task_loss": 1.0653899908065796 }, { "compression_loss": 0.0, "distillation_loss": 0.7430188655853271, "epoch": 1.08, "learning_rate": 5.336395706279589e-05, "loss": 0.818, "step": 2990, "task_loss": 0.44489508867263794 }, { "compression_loss": 0.0, "distillation_loss": 0.6891306638717651, "epoch": 1.08, "learning_rate": 5.33211652474171e-05, "loss": 0.8024, "step": 3000, "task_loss": 0.9418755769729614 }, { "epoch": 1.08, "eval_exact_match": 80.1608325449385, "eval_f1": 88.15996436894764, "step": 3000 }, { "compression_loss": 0.0, "distillation_loss": 0.876007080078125, "epoch": 1.09, "learning_rate": 5.327825318026816e-05, "loss": 0.7745, "step": 3010, "task_loss": 0.6779855489730835 }, { "compression_loss": 0.0, "distillation_loss": 0.8885762095451355, "epoch": 1.09, "learning_rate": 5.323522108261813e-05, "loss": 0.8159, "step": 3020, "task_loss": 0.6267703175544739 }, { "compression_loss": 0.0, "distillation_loss": 0.872785747051239, "epoch": 1.1, "learning_rate": 5.319206917635494e-05, "loss": 0.8425, "step": 3030, "task_loss": 0.7042969465255737 }, { "compression_loss": 0.0, "distillation_loss": 0.7279747128486633, "epoch": 1.1, "learning_rate": 5.314879768398437e-05, "loss": 0.8151, "step": 3040, "task_loss": 0.6893462538719177 }, { "compression_loss": 0.0, "distillation_loss": 0.8534276485443115, "epoch": 1.1, "learning_rate": 5.310540682862876e-05, "loss": 0.9091, "step": 3050, "task_loss": 1.0376639366149902 }, { "compression_loss": 0.0, "distillation_loss": 0.9732213020324707, "epoch": 1.11, "learning_rate": 5.306189683402595e-05, "loss": 0.8051, "step": 3060, "task_loss": 1.1177102327346802 }, { "compression_loss": 0.0, "distillation_loss": 0.8836879730224609, "epoch": 1.11, "learning_rate": 5.3018267924528124e-05, "loss": 0.8283, "step": 3070, "task_loss": 1.0020360946655273 }, { "compression_loss": 0.0, "distillation_loss": 0.6803722381591797, "epoch": 1.11, "learning_rate": 5.29745203251006e-05, "loss": 0.8084, "step": 3080, "task_loss": 0.797760546207428 }, { "compression_loss": 0.0, "distillation_loss": 0.9686804413795471, "epoch": 1.12, "learning_rate": 5.293065426132069e-05, "loss": 0.8542, "step": 3090, "task_loss": 0.6910236477851868 }, { "compression_loss": 0.0, "distillation_loss": 0.6933484077453613, "epoch": 1.12, "learning_rate": 5.28866699593766e-05, "loss": 0.7601, "step": 3100, "task_loss": 0.6895896196365356 }, { "compression_loss": 0.0, "distillation_loss": 0.9216572046279907, "epoch": 1.12, "learning_rate": 5.284256764606617e-05, "loss": 0.7732, "step": 3110, "task_loss": 0.7823605537414551 }, { "compression_loss": 0.0, "distillation_loss": 0.677863597869873, "epoch": 1.13, "learning_rate": 5.279834754879575e-05, "loss": 0.7433, "step": 3120, "task_loss": 0.5516537427902222 }, { "compression_loss": 0.0, "distillation_loss": 0.7393268346786499, "epoch": 1.13, "learning_rate": 5.275400989557905e-05, "loss": 0.8662, "step": 3130, "task_loss": 0.4517717659473419 }, { "compression_loss": 0.0, "distillation_loss": 0.9628137350082397, "epoch": 1.13, "learning_rate": 5.270955491503589e-05, "loss": 0.8509, "step": 3140, "task_loss": 0.9863475561141968 }, { "compression_loss": 0.0, "distillation_loss": 0.8606938719749451, "epoch": 1.14, "learning_rate": 5.266498283639113e-05, "loss": 0.8131, "step": 3150, "task_loss": 0.9285368919372559 }, { "compression_loss": 0.0, "distillation_loss": 0.9628182649612427, "epoch": 1.14, "learning_rate": 5.2620293889473387e-05, "loss": 0.8319, "step": 3160, "task_loss": 1.1551557779312134 }, { "compression_loss": 0.0, "distillation_loss": 0.8328995704650879, "epoch": 1.15, "learning_rate": 5.257548830471388e-05, "loss": 0.8528, "step": 3170, "task_loss": 0.7933187484741211 }, { "compression_loss": 0.0, "distillation_loss": 0.6802824139595032, "epoch": 1.15, "learning_rate": 5.25305663131453e-05, "loss": 0.8096, "step": 3180, "task_loss": 0.6108599305152893 }, { "compression_loss": 0.0, "distillation_loss": 0.7289232015609741, "epoch": 1.15, "learning_rate": 5.2485528146400505e-05, "loss": 0.8256, "step": 3190, "task_loss": 1.2673640251159668 }, { "compression_loss": 0.0, "distillation_loss": 0.7009760141372681, "epoch": 1.16, "learning_rate": 5.244037403671146e-05, "loss": 0.7034, "step": 3200, "task_loss": 0.6759889125823975 }, { "compression_loss": 0.0, "distillation_loss": 0.6823523044586182, "epoch": 1.16, "learning_rate": 5.2395104216907926e-05, "loss": 0.8028, "step": 3210, "task_loss": 0.513982892036438 }, { "compression_loss": 0.0, "distillation_loss": 0.5610631704330444, "epoch": 1.16, "learning_rate": 5.234971892041632e-05, "loss": 0.7435, "step": 3220, "task_loss": 0.5406886339187622 }, { "compression_loss": 0.0, "distillation_loss": 0.918226420879364, "epoch": 1.17, "learning_rate": 5.230421838125847e-05, "loss": 0.8205, "step": 3230, "task_loss": 1.1968251466751099 }, { "compression_loss": 0.0, "distillation_loss": 0.8683892488479614, "epoch": 1.17, "learning_rate": 5.22586028340505e-05, "loss": 0.7288, "step": 3240, "task_loss": 0.45106273889541626 }, { "compression_loss": 0.0, "distillation_loss": 0.9409477710723877, "epoch": 1.17, "learning_rate": 5.2212872514001466e-05, "loss": 0.8127, "step": 3250, "task_loss": 0.9111529588699341 }, { "epoch": 1.17, "eval_exact_match": 80.80416272469253, "eval_f1": 88.54957233403387, "step": 3250 }, { "compression_loss": 0.0, "distillation_loss": 0.5494179725646973, "epoch": 1.18, "learning_rate": 5.2167027656912314e-05, "loss": 0.7938, "step": 3260, "task_loss": 0.5460392236709595 }, { "compression_loss": 0.0, "distillation_loss": 0.6427707672119141, "epoch": 1.18, "learning_rate": 5.212106849917452e-05, "loss": 0.777, "step": 3270, "task_loss": 0.673181414604187 }, { "compression_loss": 0.0, "distillation_loss": 0.8692964315414429, "epoch": 1.19, "learning_rate": 5.2074995277768956e-05, "loss": 0.8644, "step": 3280, "task_loss": 0.860024094581604 }, { "compression_loss": 0.0, "distillation_loss": 0.891358494758606, "epoch": 1.19, "learning_rate": 5.202880823026465e-05, "loss": 0.8198, "step": 3290, "task_loss": 0.9117802381515503 }, { "compression_loss": 0.0, "distillation_loss": 0.6767855882644653, "epoch": 1.19, "learning_rate": 5.198250759481754e-05, "loss": 0.8081, "step": 3300, "task_loss": 0.46542519330978394 }, { "compression_loss": 0.0, "distillation_loss": 0.6959289908409119, "epoch": 1.2, "learning_rate": 5.193609361016929e-05, "loss": 0.7415, "step": 3310, "task_loss": 0.86083984375 }, { "compression_loss": 0.0, "distillation_loss": 1.0413265228271484, "epoch": 1.2, "learning_rate": 5.188956651564602e-05, "loss": 0.8289, "step": 3320, "task_loss": 1.4512548446655273 }, { "compression_loss": 0.0, "distillation_loss": 0.7771068215370178, "epoch": 1.2, "learning_rate": 5.184292655115703e-05, "loss": 0.8058, "step": 3330, "task_loss": 0.5583703517913818 }, { "compression_loss": 0.0, "distillation_loss": 1.1072239875793457, "epoch": 1.21, "learning_rate": 5.1796173957193705e-05, "loss": 0.8859, "step": 3340, "task_loss": 1.0872106552124023 }, { "compression_loss": 0.0, "distillation_loss": 0.532997727394104, "epoch": 1.21, "learning_rate": 5.174930897482812e-05, "loss": 0.6922, "step": 3350, "task_loss": 0.584416389465332 }, { "compression_loss": 0.0, "distillation_loss": 0.8668481111526489, "epoch": 1.21, "learning_rate": 5.1702331845711894e-05, "loss": 0.7652, "step": 3360, "task_loss": 1.1341835260391235 }, { "compression_loss": 0.0, "distillation_loss": 0.8142274618148804, "epoch": 1.22, "learning_rate": 5.165524281207489e-05, "loss": 0.796, "step": 3370, "task_loss": 1.4565706253051758 }, { "compression_loss": 0.0, "distillation_loss": 0.4797128438949585, "epoch": 1.22, "learning_rate": 5.1608042116724006e-05, "loss": 0.8104, "step": 3380, "task_loss": 0.45489227771759033 }, { "compression_loss": 0.0, "distillation_loss": 0.8843653202056885, "epoch": 1.23, "learning_rate": 5.1560730003041896e-05, "loss": 0.9012, "step": 3390, "task_loss": 1.0831022262573242 }, { "compression_loss": 0.0, "distillation_loss": 0.5749521255493164, "epoch": 1.23, "learning_rate": 5.1513306714985725e-05, "loss": 0.693, "step": 3400, "task_loss": 0.51276695728302 }, { "compression_loss": 0.0, "distillation_loss": 0.558767557144165, "epoch": 1.23, "learning_rate": 5.146577249708593e-05, "loss": 0.807, "step": 3410, "task_loss": 0.6122146844863892 }, { "compression_loss": 0.0, "distillation_loss": 0.4809660017490387, "epoch": 1.24, "learning_rate": 5.1418127594444906e-05, "loss": 0.7465, "step": 3420, "task_loss": 0.35081037878990173 }, { "compression_loss": 0.0, "distillation_loss": 0.6747417449951172, "epoch": 1.24, "learning_rate": 5.1370372252735796e-05, "loss": 0.7898, "step": 3430, "task_loss": 0.8822129964828491 }, { "compression_loss": 0.0, "distillation_loss": 0.6882492303848267, "epoch": 1.24, "learning_rate": 5.1322506718201206e-05, "loss": 0.7297, "step": 3440, "task_loss": 0.6773614883422852 }, { "compression_loss": 0.0, "distillation_loss": 0.8323681354522705, "epoch": 1.25, "learning_rate": 5.1274531237651915e-05, "loss": 0.8202, "step": 3450, "task_loss": 1.0216150283813477 }, { "compression_loss": 0.0, "distillation_loss": 0.9863547682762146, "epoch": 1.25, "learning_rate": 5.122644605846565e-05, "loss": 0.8462, "step": 3460, "task_loss": 1.3056061267852783 }, { "compression_loss": 0.0, "distillation_loss": 0.7683500647544861, "epoch": 1.25, "learning_rate": 5.117825142858575e-05, "loss": 0.7162, "step": 3470, "task_loss": 0.978795051574707 }, { "compression_loss": 0.0, "distillation_loss": 0.9813886880874634, "epoch": 1.26, "learning_rate": 5.1129947596519946e-05, "loss": 0.7575, "step": 3480, "task_loss": 0.8878324031829834 }, { "compression_loss": 0.0, "distillation_loss": 1.039483666419983, "epoch": 1.26, "learning_rate": 5.1081534811339035e-05, "loss": 0.7397, "step": 3490, "task_loss": 0.7671569585800171 }, { "compression_loss": 0.0, "distillation_loss": 0.8197644948959351, "epoch": 1.26, "learning_rate": 5.10330133226756e-05, "loss": 0.8642, "step": 3500, "task_loss": 0.5664966702461243 }, { "epoch": 1.26, "eval_exact_match": 80.7379375591296, "eval_f1": 88.59756699218569, "step": 3500 }, { "compression_loss": 0.0, "distillation_loss": 0.7040039300918579, "epoch": 1.27, "learning_rate": 5.0984383380722776e-05, "loss": 0.779, "step": 3510, "task_loss": 1.0531933307647705 }, { "compression_loss": 0.0, "distillation_loss": 0.8066257834434509, "epoch": 1.27, "learning_rate": 5.0935645236232885e-05, "loss": 0.7653, "step": 3520, "task_loss": 0.7610957622528076 }, { "compression_loss": 0.0, "distillation_loss": 1.0015265941619873, "epoch": 1.28, "learning_rate": 5.088679914051619e-05, "loss": 0.7878, "step": 3530, "task_loss": 0.990369439125061 }, { "compression_loss": 0.0, "distillation_loss": 0.5788767337799072, "epoch": 1.28, "learning_rate": 5.0837845345439585e-05, "loss": 0.8144, "step": 3540, "task_loss": 0.6293957829475403 }, { "compression_loss": 0.0, "distillation_loss": 0.6693412065505981, "epoch": 1.28, "learning_rate": 5.078878410342531e-05, "loss": 0.7938, "step": 3550, "task_loss": 0.5734085440635681 }, { "compression_loss": 0.0, "distillation_loss": 0.8092402219772339, "epoch": 1.29, "learning_rate": 5.073961566744961e-05, "loss": 0.7598, "step": 3560, "task_loss": 0.8599215745925903 }, { "compression_loss": 0.0, "distillation_loss": 0.7224042415618896, "epoch": 1.29, "learning_rate": 5.069034029104148e-05, "loss": 0.7476, "step": 3570, "task_loss": 0.6502309441566467 }, { "compression_loss": 0.0, "distillation_loss": 0.5168977975845337, "epoch": 1.29, "learning_rate": 5.064095822828134e-05, "loss": 0.7329, "step": 3580, "task_loss": 0.6600958704948425 }, { "compression_loss": 0.0, "distillation_loss": 0.7091388702392578, "epoch": 1.3, "learning_rate": 5.05914697337997e-05, "loss": 0.8189, "step": 3590, "task_loss": 0.43178802728652954 }, { "compression_loss": 0.0, "distillation_loss": 0.6240181922912598, "epoch": 1.3, "learning_rate": 5.054187506277588e-05, "loss": 0.7433, "step": 3600, "task_loss": 0.5513472557067871 }, { "compression_loss": 0.0, "distillation_loss": 1.044684886932373, "epoch": 1.3, "learning_rate": 5.0492174470936685e-05, "loss": 0.8185, "step": 3610, "task_loss": 0.6776243448257446 }, { "compression_loss": 0.0, "distillation_loss": 0.6448076963424683, "epoch": 1.31, "learning_rate": 5.044236821455508e-05, "loss": 0.7551, "step": 3620, "task_loss": 0.62340247631073 }, { "compression_loss": 0.0, "distillation_loss": 1.0077701807022095, "epoch": 1.31, "learning_rate": 5.039245655044887e-05, "loss": 0.8528, "step": 3630, "task_loss": 0.9782606363296509 }, { "compression_loss": 0.0, "distillation_loss": 0.989992082118988, "epoch": 1.32, "learning_rate": 5.0342439735979367e-05, "loss": 0.8235, "step": 3640, "task_loss": 0.947731614112854 }, { "compression_loss": 0.0, "distillation_loss": 0.6484651565551758, "epoch": 1.32, "learning_rate": 5.029231802905011e-05, "loss": 0.8563, "step": 3650, "task_loss": 0.8938379883766174 }, { "compression_loss": 0.0, "distillation_loss": 0.9811206459999084, "epoch": 1.32, "learning_rate": 5.024209168810545e-05, "loss": 0.8731, "step": 3660, "task_loss": 0.7048652172088623 }, { "compression_loss": 0.0, "distillation_loss": 0.9399855136871338, "epoch": 1.33, "learning_rate": 5.0191760972129305e-05, "loss": 0.8083, "step": 3670, "task_loss": 0.49602335691452026 }, { "compression_loss": 0.0, "distillation_loss": 0.6922057271003723, "epoch": 1.33, "learning_rate": 5.014132614064376e-05, "loss": 0.7944, "step": 3680, "task_loss": 0.680495023727417 }, { "compression_loss": 0.0, "distillation_loss": 0.685440182685852, "epoch": 1.33, "learning_rate": 5.009078745370778e-05, "loss": 0.8661, "step": 3690, "task_loss": 0.7724747657775879 }, { "compression_loss": 0.0, "distillation_loss": 0.8706653714179993, "epoch": 1.34, "learning_rate": 5.004014517191581e-05, "loss": 0.7439, "step": 3700, "task_loss": 0.8229076862335205 }, { "compression_loss": 0.0, "distillation_loss": 0.8825914263725281, "epoch": 1.34, "learning_rate": 4.998939955639651e-05, "loss": 0.7976, "step": 3710, "task_loss": 0.4416514039039612 }, { "compression_loss": 0.0, "distillation_loss": 1.1340502500534058, "epoch": 1.34, "learning_rate": 4.993855086881132e-05, "loss": 0.8594, "step": 3720, "task_loss": 1.094773769378662 }, { "compression_loss": 0.0, "distillation_loss": 0.7764121294021606, "epoch": 1.35, "learning_rate": 4.988759937135318e-05, "loss": 0.7709, "step": 3730, "task_loss": 1.3073153495788574 }, { "compression_loss": 0.0, "distillation_loss": 0.9309306740760803, "epoch": 1.35, "learning_rate": 4.983654532674513e-05, "loss": 0.7804, "step": 3740, "task_loss": 0.9863795638084412 }, { "compression_loss": 0.0, "distillation_loss": 0.6571683287620544, "epoch": 1.36, "learning_rate": 4.9785388998239e-05, "loss": 0.838, "step": 3750, "task_loss": 1.0076504945755005 }, { "epoch": 1.36, "eval_exact_match": 80.55818353831599, "eval_f1": 88.49080617027154, "step": 3750 }, { "compression_loss": 0.0, "distillation_loss": 0.7349100112915039, "epoch": 1.36, "learning_rate": 4.9734130649614034e-05, "loss": 0.8363, "step": 3760, "task_loss": 0.6905062794685364 }, { "compression_loss": 0.0, "distillation_loss": 0.6799863576889038, "epoch": 1.36, "learning_rate": 4.96827705451755e-05, "loss": 0.7666, "step": 3770, "task_loss": 0.8045142292976379 }, { "compression_loss": 0.0, "distillation_loss": 0.96070396900177, "epoch": 1.37, "learning_rate": 4.963130894975336e-05, "loss": 0.8555, "step": 3780, "task_loss": 0.9497197270393372 }, { "compression_loss": 0.0, "distillation_loss": 0.7121753692626953, "epoch": 1.37, "learning_rate": 4.9579746128700925e-05, "loss": 0.8341, "step": 3790, "task_loss": 0.6066538095474243 }, { "compression_loss": 0.0, "distillation_loss": 0.7798603177070618, "epoch": 1.37, "learning_rate": 4.952808234789343e-05, "loss": 0.8343, "step": 3800, "task_loss": 1.0046956539154053 }, { "compression_loss": 0.0, "distillation_loss": 0.8791223764419556, "epoch": 1.38, "learning_rate": 4.9476317873726695e-05, "loss": 0.7689, "step": 3810, "task_loss": 0.8858872652053833 }, { "compression_loss": 0.0, "distillation_loss": 0.5737714767456055, "epoch": 1.38, "learning_rate": 4.942445297311577e-05, "loss": 0.7408, "step": 3820, "task_loss": 0.548050045967102 }, { "compression_loss": 0.0, "distillation_loss": 0.8374737501144409, "epoch": 1.38, "learning_rate": 4.9372487913493503e-05, "loss": 0.7491, "step": 3830, "task_loss": 0.9598158597946167 }, { "compression_loss": 0.0, "distillation_loss": 0.8589112758636475, "epoch": 1.39, "learning_rate": 4.9320422962809235e-05, "loss": 0.7824, "step": 3840, "task_loss": 1.1727523803710938 }, { "compression_loss": 0.0, "distillation_loss": 0.9275731444358826, "epoch": 1.39, "learning_rate": 4.926825838952736e-05, "loss": 0.7659, "step": 3850, "task_loss": 1.0407650470733643 }, { "compression_loss": 0.0, "distillation_loss": 0.5879838466644287, "epoch": 1.4, "learning_rate": 4.921599446262594e-05, "loss": 0.7324, "step": 3860, "task_loss": 0.4965895414352417 }, { "compression_loss": 0.0, "distillation_loss": 0.7945241928100586, "epoch": 1.4, "learning_rate": 4.916363145159537e-05, "loss": 0.8039, "step": 3870, "task_loss": 0.9777019023895264 }, { "compression_loss": 0.0, "distillation_loss": 0.6736628413200378, "epoch": 1.4, "learning_rate": 4.911116962643693e-05, "loss": 0.7911, "step": 3880, "task_loss": 0.9778854250907898 }, { "compression_loss": 0.0, "distillation_loss": 0.5995903015136719, "epoch": 1.41, "learning_rate": 4.9058609257661456e-05, "loss": 0.7825, "step": 3890, "task_loss": 0.6814795732498169 }, { "compression_loss": 0.0, "distillation_loss": 0.7362528443336487, "epoch": 1.41, "learning_rate": 4.9005950616287865e-05, "loss": 0.9157, "step": 3900, "task_loss": 0.9911819100379944 }, { "compression_loss": 0.0, "distillation_loss": 0.7359405159950256, "epoch": 1.41, "learning_rate": 4.895319397384182e-05, "loss": 0.7861, "step": 3910, "task_loss": 0.6138168573379517 }, { "compression_loss": 0.0, "distillation_loss": 1.0004394054412842, "epoch": 1.42, "learning_rate": 4.8900339602354324e-05, "loss": 0.7014, "step": 3920, "task_loss": 1.2659577131271362 }, { "compression_loss": 0.0, "distillation_loss": 0.566203773021698, "epoch": 1.42, "learning_rate": 4.884738777436027e-05, "loss": 0.7089, "step": 3930, "task_loss": 0.6886886358261108 }, { "compression_loss": 0.0, "distillation_loss": 0.6759481430053711, "epoch": 1.42, "learning_rate": 4.87943387628971e-05, "loss": 0.78, "step": 3940, "task_loss": 0.7935036420822144 }, { "compression_loss": 0.0, "distillation_loss": 0.6697784066200256, "epoch": 1.43, "learning_rate": 4.874119284150336e-05, "loss": 0.7388, "step": 3950, "task_loss": 0.602413535118103 }, { "compression_loss": 0.0, "distillation_loss": 0.8146666288375854, "epoch": 1.43, "learning_rate": 4.868795028421728e-05, "loss": 0.8598, "step": 3960, "task_loss": 0.9612588882446289 }, { "compression_loss": 0.0, "distillation_loss": 0.995978057384491, "epoch": 1.43, "learning_rate": 4.86346113655754e-05, "loss": 0.8505, "step": 3970, "task_loss": 0.7699757814407349 }, { "compression_loss": 0.0, "distillation_loss": 0.8782490491867065, "epoch": 1.44, "learning_rate": 4.8581176360611114e-05, "loss": 0.8492, "step": 3980, "task_loss": 0.879914402961731 }, { "compression_loss": 0.0, "distillation_loss": 0.8815962076187134, "epoch": 1.44, "learning_rate": 4.852764554485328e-05, "loss": 0.8009, "step": 3990, "task_loss": 0.573132336139679 }, { "compression_loss": 0.0, "distillation_loss": 0.8560042381286621, "epoch": 1.45, "learning_rate": 4.8474019194324777e-05, "loss": 0.842, "step": 4000, "task_loss": 1.0868232250213623 }, { "epoch": 1.45, "eval_exact_match": 80.65279091769158, "eval_f1": 88.59402264707015, "step": 4000 }, { "compression_loss": 0.0, "distillation_loss": 0.4224177896976471, "epoch": 1.45, "learning_rate": 4.8420297585541116e-05, "loss": 0.7367, "step": 4010, "task_loss": 0.4378919005393982 }, { "compression_loss": 0.0, "distillation_loss": 0.5388481616973877, "epoch": 1.45, "learning_rate": 4.836648099550896e-05, "loss": 0.7675, "step": 4020, "task_loss": 0.44602516293525696 }, { "compression_loss": 0.0, "distillation_loss": 0.8967180252075195, "epoch": 1.46, "learning_rate": 4.8312569701724754e-05, "loss": 0.7752, "step": 4030, "task_loss": 0.7708736062049866 }, { "compression_loss": 0.0, "distillation_loss": 0.8984067440032959, "epoch": 1.46, "learning_rate": 4.8258563982173244e-05, "loss": 0.9208, "step": 4040, "task_loss": 0.9523441791534424 }, { "compression_loss": 0.0, "distillation_loss": 0.6791067123413086, "epoch": 1.46, "learning_rate": 4.82044641153261e-05, "loss": 0.7119, "step": 4050, "task_loss": 0.957298219203949 }, { "compression_loss": 0.0, "distillation_loss": 0.5926834940910339, "epoch": 1.47, "learning_rate": 4.8150270380140414e-05, "loss": 0.8366, "step": 4060, "task_loss": 0.86881023645401 }, { "compression_loss": 0.0, "distillation_loss": 0.8107943534851074, "epoch": 1.47, "learning_rate": 4.809598305605732e-05, "loss": 0.7996, "step": 4070, "task_loss": 0.8448442220687866 }, { "compression_loss": 0.0, "distillation_loss": 1.0969449281692505, "epoch": 1.47, "learning_rate": 4.8041602423000505e-05, "loss": 0.8194, "step": 4080, "task_loss": 1.0413978099822998 }, { "compression_loss": 0.0, "distillation_loss": 0.8810731172561646, "epoch": 1.48, "learning_rate": 4.79871287613748e-05, "loss": 0.8494, "step": 4090, "task_loss": 0.8740400075912476 }, { "compression_loss": 0.0, "distillation_loss": 0.5368291735649109, "epoch": 1.48, "learning_rate": 4.793256235206473e-05, "loss": 0.7184, "step": 4100, "task_loss": 0.465472012758255 }, { "compression_loss": 0.0, "distillation_loss": 0.8813285827636719, "epoch": 1.49, "learning_rate": 4.787790347643305e-05, "loss": 0.7444, "step": 4110, "task_loss": 0.8482736349105835 }, { "compression_loss": 0.0, "distillation_loss": 0.8207933902740479, "epoch": 1.49, "learning_rate": 4.782315241631929e-05, "loss": 0.7815, "step": 4120, "task_loss": 0.8252826929092407 }, { "compression_loss": 0.0, "distillation_loss": 0.7789289355278015, "epoch": 1.49, "learning_rate": 4.776830945403833e-05, "loss": 0.7304, "step": 4130, "task_loss": 1.0397837162017822 }, { "compression_loss": 0.0, "distillation_loss": 1.0559360980987549, "epoch": 1.5, "learning_rate": 4.771337487237894e-05, "loss": 0.741, "step": 4140, "task_loss": 1.10592520236969 }, { "compression_loss": 0.0, "distillation_loss": 0.9466273784637451, "epoch": 1.5, "learning_rate": 4.7658348954602285e-05, "loss": 0.7681, "step": 4150, "task_loss": 0.8297045230865479 }, { "compression_loss": 0.0, "distillation_loss": 0.6521314382553101, "epoch": 1.5, "learning_rate": 4.76032319844405e-05, "loss": 0.8071, "step": 4160, "task_loss": 0.7315813302993774 }, { "compression_loss": 0.0, "distillation_loss": 0.860609769821167, "epoch": 1.51, "learning_rate": 4.754802424609521e-05, "loss": 0.8721, "step": 4170, "task_loss": 0.8967682123184204 }, { "compression_loss": 0.0, "distillation_loss": 0.6362369060516357, "epoch": 1.51, "learning_rate": 4.74927260242361e-05, "loss": 0.7326, "step": 4180, "task_loss": 0.5996143221855164 }, { "compression_loss": 0.0, "distillation_loss": 0.7511730194091797, "epoch": 1.51, "learning_rate": 4.7437337603999376e-05, "loss": 0.7683, "step": 4190, "task_loss": 0.43765193223953247 }, { "compression_loss": 0.0, "distillation_loss": 0.8653720617294312, "epoch": 1.52, "learning_rate": 4.7381859270986374e-05, "loss": 0.8353, "step": 4200, "task_loss": 1.0502616167068481 }, { "compression_loss": 0.0, "distillation_loss": 0.6381678581237793, "epoch": 1.52, "learning_rate": 4.732629131126202e-05, "loss": 0.7639, "step": 4210, "task_loss": 0.5875723361968994 }, { "compression_loss": 0.0, "distillation_loss": 0.7945320010185242, "epoch": 1.53, "learning_rate": 4.727063401135341e-05, "loss": 0.7863, "step": 4220, "task_loss": 1.58515465259552 }, { "compression_loss": 0.0, "distillation_loss": 0.6202449798583984, "epoch": 1.53, "learning_rate": 4.7214887658248296e-05, "loss": 0.7453, "step": 4230, "task_loss": 0.8521810173988342 }, { "compression_loss": 0.0, "distillation_loss": 0.7749199867248535, "epoch": 1.53, "learning_rate": 4.715905253939361e-05, "loss": 0.8201, "step": 4240, "task_loss": 0.766315221786499 }, { "compression_loss": 0.0, "distillation_loss": 0.729579746723175, "epoch": 1.54, "learning_rate": 4.710312894269402e-05, "loss": 0.8902, "step": 4250, "task_loss": 0.6951254606246948 }, { "epoch": 1.54, "eval_exact_match": 81.32450331125828, "eval_f1": 88.97617818833929, "step": 4250 }, { "compression_loss": 0.0, "distillation_loss": 0.9440791606903076, "epoch": 1.54, "learning_rate": 4.704711715651038e-05, "loss": 0.7623, "step": 4260, "task_loss": 1.0249818563461304 }, { "compression_loss": 0.0, "distillation_loss": 0.8210508227348328, "epoch": 1.54, "learning_rate": 4.699101746965829e-05, "loss": 0.8148, "step": 4270, "task_loss": 0.9564526081085205 }, { "compression_loss": 0.0, "distillation_loss": 0.7049989104270935, "epoch": 1.55, "learning_rate": 4.6934830171406636e-05, "loss": 0.7944, "step": 4280, "task_loss": 0.941627025604248 }, { "compression_loss": 0.0, "distillation_loss": 0.6714322566986084, "epoch": 1.55, "learning_rate": 4.687855555147597e-05, "loss": 0.7231, "step": 4290, "task_loss": 1.0258185863494873 }, { "compression_loss": 0.0, "distillation_loss": 0.8125296831130981, "epoch": 1.55, "learning_rate": 4.682219390003719e-05, "loss": 0.6968, "step": 4300, "task_loss": 0.7712159156799316 }, { "compression_loss": 0.0, "distillation_loss": 0.9884834885597229, "epoch": 1.56, "learning_rate": 4.676574550770991e-05, "loss": 0.7995, "step": 4310, "task_loss": 0.8624418377876282 }, { "compression_loss": 0.0, "distillation_loss": 1.195237398147583, "epoch": 1.56, "learning_rate": 4.6709210665561035e-05, "loss": 0.8232, "step": 4320, "task_loss": 0.8453245162963867 }, { "compression_loss": 0.0, "distillation_loss": 0.87236487865448, "epoch": 1.56, "learning_rate": 4.6652589665103204e-05, "loss": 0.7264, "step": 4330, "task_loss": 0.9678608179092407 }, { "compression_loss": 0.0, "distillation_loss": 0.7140805721282959, "epoch": 1.57, "learning_rate": 4.659588279829335e-05, "loss": 0.8526, "step": 4340, "task_loss": 0.8649274706840515 }, { "compression_loss": 0.0, "distillation_loss": 0.8390883207321167, "epoch": 1.57, "learning_rate": 4.653909035753114e-05, "loss": 0.7946, "step": 4350, "task_loss": 0.8922809362411499 }, { "compression_loss": 0.0, "distillation_loss": 0.6814520955085754, "epoch": 1.58, "learning_rate": 4.648221263565751e-05, "loss": 0.7372, "step": 4360, "task_loss": 0.740043580532074 }, { "compression_loss": 0.0, "distillation_loss": 0.922714114189148, "epoch": 1.58, "learning_rate": 4.642524992595309e-05, "loss": 0.8215, "step": 4370, "task_loss": 0.5661073327064514 }, { "compression_loss": 0.0, "distillation_loss": 0.942134439945221, "epoch": 1.58, "learning_rate": 4.636820252213679e-05, "loss": 0.7335, "step": 4380, "task_loss": 0.8119366765022278 }, { "compression_loss": 0.0, "distillation_loss": 1.0408191680908203, "epoch": 1.59, "learning_rate": 4.63110707183642e-05, "loss": 0.8429, "step": 4390, "task_loss": 0.9176040887832642 }, { "compression_loss": 0.0, "distillation_loss": 0.9637014865875244, "epoch": 1.59, "learning_rate": 4.6253854809226115e-05, "loss": 0.75, "step": 4400, "task_loss": 1.2698314189910889 }, { "compression_loss": 0.0, "distillation_loss": 0.9256500005722046, "epoch": 1.59, "learning_rate": 4.6196555089747e-05, "loss": 0.8744, "step": 4410, "task_loss": 0.9903556704521179 }, { "compression_loss": 0.0, "distillation_loss": 0.8625351190567017, "epoch": 1.6, "learning_rate": 4.6139171855383473e-05, "loss": 0.7773, "step": 4420, "task_loss": 0.6395310163497925 }, { "compression_loss": 0.0, "distillation_loss": 0.5867180824279785, "epoch": 1.6, "learning_rate": 4.608170540202279e-05, "loss": 0.7754, "step": 4430, "task_loss": 0.774559736251831 }, { "compression_loss": 0.0, "distillation_loss": 0.5111645460128784, "epoch": 1.6, "learning_rate": 4.602415602598132e-05, "loss": 0.8272, "step": 4440, "task_loss": 0.4568556547164917 }, { "compression_loss": 0.0, "distillation_loss": 0.6454306840896606, "epoch": 1.61, "learning_rate": 4.5966524024002976e-05, "loss": 0.831, "step": 4450, "task_loss": 0.40431562066078186 }, { "compression_loss": 0.0, "distillation_loss": 0.5694042444229126, "epoch": 1.61, "learning_rate": 4.590880969325774e-05, "loss": 0.7259, "step": 4460, "task_loss": 0.7366609573364258 }, { "compression_loss": 0.0, "distillation_loss": 0.6696236729621887, "epoch": 1.62, "learning_rate": 4.585101333134013e-05, "loss": 0.7484, "step": 4470, "task_loss": 0.5174720883369446 }, { "compression_loss": 0.0, "distillation_loss": 0.6274579167366028, "epoch": 1.62, "learning_rate": 4.5793135236267626e-05, "loss": 0.7402, "step": 4480, "task_loss": 0.747776985168457 }, { "compression_loss": 0.0, "distillation_loss": 0.7382104396820068, "epoch": 1.62, "learning_rate": 4.573517570647912e-05, "loss": 0.7127, "step": 4490, "task_loss": 0.7014381885528564 }, { "compression_loss": 0.0, "distillation_loss": 0.7991716861724854, "epoch": 1.63, "learning_rate": 4.567713504083346e-05, "loss": 0.7075, "step": 4500, "task_loss": 0.9051458835601807 }, { "epoch": 1.63, "eval_exact_match": 81.36234626300852, "eval_f1": 88.86679422849299, "step": 4500 }, { "compression_loss": 0.0, "distillation_loss": 0.7607914209365845, "epoch": 1.63, "learning_rate": 4.5619013538607814e-05, "loss": 0.7915, "step": 4510, "task_loss": 0.7439800500869751 }, { "compression_loss": 0.0, "distillation_loss": 0.7172999382019043, "epoch": 1.63, "learning_rate": 4.556081149949621e-05, "loss": 0.7687, "step": 4520, "task_loss": 0.8847224116325378 }, { "compression_loss": 0.0, "distillation_loss": 0.7143849730491638, "epoch": 1.64, "learning_rate": 4.5502529223607936e-05, "loss": 0.815, "step": 4530, "task_loss": 0.8012955784797668 }, { "compression_loss": 0.0, "distillation_loss": 1.046454668045044, "epoch": 1.64, "learning_rate": 4.5444167011466e-05, "loss": 0.8533, "step": 4540, "task_loss": 0.7980386018753052 }, { "compression_loss": 0.0, "distillation_loss": 0.649091362953186, "epoch": 1.64, "learning_rate": 4.5385725164005585e-05, "loss": 0.7456, "step": 4550, "task_loss": 0.49612122774124146 }, { "compression_loss": 0.0, "distillation_loss": 0.7305445075035095, "epoch": 1.65, "learning_rate": 4.5327203982572514e-05, "loss": 0.7565, "step": 4560, "task_loss": 0.6083104610443115 }, { "compression_loss": 0.0, "distillation_loss": 1.2499300241470337, "epoch": 1.65, "learning_rate": 4.526860376892167e-05, "loss": 0.9337, "step": 4570, "task_loss": 1.3941344022750854 }, { "compression_loss": 0.0, "distillation_loss": 1.0218126773834229, "epoch": 1.66, "learning_rate": 4.5209924825215474e-05, "loss": 0.7354, "step": 4580, "task_loss": 0.9629853963851929 }, { "compression_loss": 0.0, "distillation_loss": 0.7129672169685364, "epoch": 1.66, "learning_rate": 4.515116745402229e-05, "loss": 0.7491, "step": 4590, "task_loss": 0.6554641127586365 }, { "compression_loss": 0.0, "distillation_loss": 0.7573808431625366, "epoch": 1.66, "learning_rate": 4.509233195831487e-05, "loss": 0.8387, "step": 4600, "task_loss": 0.813507080078125 }, { "compression_loss": 0.0, "distillation_loss": 0.7598761320114136, "epoch": 1.67, "learning_rate": 4.5033418641468816e-05, "loss": 0.7462, "step": 4610, "task_loss": 0.8632051348686218 }, { "compression_loss": 0.0, "distillation_loss": 0.4978368580341339, "epoch": 1.67, "learning_rate": 4.497442780726101e-05, "loss": 0.6874, "step": 4620, "task_loss": 0.6175663471221924 }, { "compression_loss": 0.0, "distillation_loss": 0.6145604848861694, "epoch": 1.67, "learning_rate": 4.4915359759868034e-05, "loss": 0.713, "step": 4630, "task_loss": 0.6943603157997131 }, { "compression_loss": 0.0, "distillation_loss": 0.7277992963790894, "epoch": 1.68, "learning_rate": 4.485621480386459e-05, "loss": 0.8655, "step": 4640, "task_loss": 0.6401335000991821 }, { "compression_loss": 0.0, "distillation_loss": 0.5838598608970642, "epoch": 1.68, "learning_rate": 4.479699324422195e-05, "loss": 0.6839, "step": 4650, "task_loss": 0.43302324414253235 }, { "compression_loss": 0.0, "distillation_loss": 0.5886027216911316, "epoch": 1.68, "learning_rate": 4.473769538630642e-05, "loss": 0.6724, "step": 4660, "task_loss": 0.7869935631752014 }, { "compression_loss": 0.0, "distillation_loss": 0.8098095655441284, "epoch": 1.69, "learning_rate": 4.4678321535877664e-05, "loss": 0.7717, "step": 4670, "task_loss": 1.0552672147750854 }, { "compression_loss": 0.0, "distillation_loss": 0.6143741607666016, "epoch": 1.69, "learning_rate": 4.4618871999087255e-05, "loss": 0.825, "step": 4680, "task_loss": 0.6005398035049438 }, { "compression_loss": 0.0, "distillation_loss": 0.659191370010376, "epoch": 1.69, "learning_rate": 4.455934708247696e-05, "loss": 0.7628, "step": 4690, "task_loss": 0.5167052745819092 }, { "compression_loss": 0.0, "distillation_loss": 0.686668872833252, "epoch": 1.7, "learning_rate": 4.449974709297729e-05, "loss": 0.7712, "step": 4700, "task_loss": 0.7116361856460571 }, { "compression_loss": 0.0, "distillation_loss": 0.6677272915840149, "epoch": 1.7, "learning_rate": 4.4440072337905815e-05, "loss": 0.7906, "step": 4710, "task_loss": 0.5613526701927185 }, { "compression_loss": 0.0, "distillation_loss": 0.45507311820983887, "epoch": 1.71, "learning_rate": 4.438032312496565e-05, "loss": 0.8587, "step": 4720, "task_loss": 0.553399920463562 }, { "compression_loss": 0.0, "distillation_loss": 1.2598834037780762, "epoch": 1.71, "learning_rate": 4.432049976224383e-05, "loss": 0.9358, "step": 4730, "task_loss": 1.3447682857513428 }, { "compression_loss": 0.0, "distillation_loss": 0.7433596849441528, "epoch": 1.71, "learning_rate": 4.4260602558209715e-05, "loss": 0.7002, "step": 4740, "task_loss": 0.9043145179748535 }, { "compression_loss": 0.0, "distillation_loss": 0.8138759136199951, "epoch": 1.72, "learning_rate": 4.420063182171345e-05, "loss": 0.8739, "step": 4750, "task_loss": 0.9384928941726685 }, { "epoch": 1.72, "eval_exact_match": 81.15421002838221, "eval_f1": 88.77749919883216, "step": 4750 }, { "compression_loss": 0.0, "distillation_loss": 0.6397891044616699, "epoch": 1.72, "learning_rate": 4.414058786198431e-05, "loss": 0.7098, "step": 4760, "task_loss": 0.641806423664093 }, { "compression_loss": 0.0, "distillation_loss": 0.9567407965660095, "epoch": 1.72, "learning_rate": 4.408047098862914e-05, "loss": 0.7674, "step": 4770, "task_loss": 0.8469678163528442 }, { "compression_loss": 0.0, "distillation_loss": 1.1057536602020264, "epoch": 1.73, "learning_rate": 4.402028151163076e-05, "loss": 0.7887, "step": 4780, "task_loss": 0.9880897998809814 }, { "compression_loss": 0.0, "distillation_loss": 0.706554114818573, "epoch": 1.73, "learning_rate": 4.396001974134634e-05, "loss": 0.7421, "step": 4790, "task_loss": 0.516491711139679 }, { "compression_loss": 0.0, "distillation_loss": 0.7376039028167725, "epoch": 1.73, "learning_rate": 4.389968598850585e-05, "loss": 0.7885, "step": 4800, "task_loss": 0.7677445411682129 }, { "compression_loss": 0.0, "distillation_loss": 0.9835151433944702, "epoch": 1.74, "learning_rate": 4.38392805642104e-05, "loss": 0.7134, "step": 4810, "task_loss": 0.8267749547958374 }, { "compression_loss": 0.0, "distillation_loss": 0.9274640083312988, "epoch": 1.74, "learning_rate": 4.3778803779930655e-05, "loss": 0.7763, "step": 4820, "task_loss": 0.7708688974380493 }, { "compression_loss": 0.0, "distillation_loss": 0.807561993598938, "epoch": 1.75, "learning_rate": 4.371825594750528e-05, "loss": 0.663, "step": 4830, "task_loss": 1.1051645278930664 }, { "compression_loss": 0.0, "distillation_loss": 0.8475966453552246, "epoch": 1.75, "learning_rate": 4.365763737913924e-05, "loss": 0.6866, "step": 4840, "task_loss": 0.7194445133209229 }, { "compression_loss": 0.0, "distillation_loss": 0.662378191947937, "epoch": 1.75, "learning_rate": 4.359694838740225e-05, "loss": 0.7215, "step": 4850, "task_loss": 0.9244500398635864 }, { "compression_loss": 0.0, "distillation_loss": 0.8736042976379395, "epoch": 1.76, "learning_rate": 4.353618928522718e-05, "loss": 0.6807, "step": 4860, "task_loss": 0.5476257801055908 }, { "compression_loss": 0.0, "distillation_loss": 0.9758906960487366, "epoch": 1.76, "learning_rate": 4.3475360385908385e-05, "loss": 0.7897, "step": 4870, "task_loss": 1.1290806531906128 }, { "compression_loss": 0.0, "distillation_loss": 0.5853473544120789, "epoch": 1.76, "learning_rate": 4.3414462003100126e-05, "loss": 0.7186, "step": 4880, "task_loss": 0.6802440881729126 }, { "compression_loss": 0.0, "distillation_loss": 0.8614143133163452, "epoch": 1.77, "learning_rate": 4.335349445081493e-05, "loss": 0.77, "step": 4890, "task_loss": 0.674217939376831 }, { "compression_loss": 0.0, "distillation_loss": 0.6680730581283569, "epoch": 1.77, "learning_rate": 4.329245804342201e-05, "loss": 0.7647, "step": 4900, "task_loss": 0.5699944496154785 }, { "compression_loss": 0.0, "distillation_loss": 0.8439807891845703, "epoch": 1.77, "learning_rate": 4.3231353095645604e-05, "loss": 0.7738, "step": 4910, "task_loss": 0.8963417410850525 }, { "compression_loss": 0.0, "distillation_loss": 1.0166106224060059, "epoch": 1.78, "learning_rate": 4.317017992256337e-05, "loss": 0.7552, "step": 4920, "task_loss": 0.746208667755127 }, { "compression_loss": 0.0, "distillation_loss": 1.114891529083252, "epoch": 1.78, "learning_rate": 4.310893883960476e-05, "loss": 0.7747, "step": 4930, "task_loss": 1.3238515853881836 }, { "compression_loss": 0.0, "distillation_loss": 0.9112187623977661, "epoch": 1.79, "learning_rate": 4.304763016254939e-05, "loss": 0.742, "step": 4940, "task_loss": 1.0595370531082153 }, { "compression_loss": 0.0, "distillation_loss": 0.6720353364944458, "epoch": 1.79, "learning_rate": 4.298625420752541e-05, "loss": 0.7768, "step": 4950, "task_loss": 0.7741559147834778 }, { "compression_loss": 0.0, "distillation_loss": 1.14748215675354, "epoch": 1.79, "learning_rate": 4.29248112910079e-05, "loss": 0.7824, "step": 4960, "task_loss": 0.944894552230835 }, { "compression_loss": 0.0, "distillation_loss": 0.9729048013687134, "epoch": 1.8, "learning_rate": 4.286330172981718e-05, "loss": 0.8742, "step": 4970, "task_loss": 1.025977611541748 }, { "compression_loss": 0.0, "distillation_loss": 0.939768373966217, "epoch": 1.8, "learning_rate": 4.2801725841117244e-05, "loss": 0.7605, "step": 4980, "task_loss": 0.6698074340820312 }, { "compression_loss": 0.0, "distillation_loss": 0.5564023852348328, "epoch": 1.8, "learning_rate": 4.274008394241407e-05, "loss": 0.8018, "step": 4990, "task_loss": 0.4944656491279602 }, { "compression_loss": 0.0, "distillation_loss": 0.8198148012161255, "epoch": 1.81, "learning_rate": 4.267837635155402e-05, "loss": 0.7426, "step": 5000, "task_loss": 0.6710585355758667 }, { "epoch": 1.81, "eval_exact_match": 81.48533585619678, "eval_f1": 88.84117960148326, "step": 5000 }, { "compression_loss": 0.0, "distillation_loss": 1.1788697242736816, "epoch": 1.81, "learning_rate": 4.2616603386722185e-05, "loss": 0.7101, "step": 5010, "task_loss": 0.7871378660202026 }, { "compression_loss": 0.0, "distillation_loss": 0.8743175268173218, "epoch": 1.81, "learning_rate": 4.2554765366440736e-05, "loss": 0.7741, "step": 5020, "task_loss": 1.2785630226135254 }, { "compression_loss": 0.0, "distillation_loss": 0.9149342775344849, "epoch": 1.82, "learning_rate": 4.249286260956732e-05, "loss": 0.8054, "step": 5030, "task_loss": 0.8568088412284851 }, { "compression_loss": 0.0, "distillation_loss": 0.6473352909088135, "epoch": 1.82, "learning_rate": 4.2430895435293335e-05, "loss": 0.8112, "step": 5040, "task_loss": 0.44709980487823486 }, { "compression_loss": 0.0, "distillation_loss": 0.807868242263794, "epoch": 1.83, "learning_rate": 4.2368864163142396e-05, "loss": 0.7528, "step": 5050, "task_loss": 0.8173571825027466 }, { "compression_loss": 0.0, "distillation_loss": 0.8056721091270447, "epoch": 1.83, "learning_rate": 4.230676911296859e-05, "loss": 0.7581, "step": 5060, "task_loss": 1.0143595933914185 }, { "compression_loss": 0.0, "distillation_loss": 0.8686327934265137, "epoch": 1.83, "learning_rate": 4.2244610604954874e-05, "loss": 0.7872, "step": 5070, "task_loss": 0.8795457482337952 }, { "compression_loss": 0.0, "distillation_loss": 0.8215399980545044, "epoch": 1.84, "learning_rate": 4.218238895961143e-05, "loss": 0.8153, "step": 5080, "task_loss": 0.6292321085929871 }, { "compression_loss": 0.0, "distillation_loss": 0.6530864238739014, "epoch": 1.84, "learning_rate": 4.212010449777397e-05, "loss": 0.7122, "step": 5090, "task_loss": 0.908234715461731 }, { "compression_loss": 0.0, "distillation_loss": 0.6864101886749268, "epoch": 1.84, "learning_rate": 4.2057757540602134e-05, "loss": 0.7769, "step": 5100, "task_loss": 0.761084794998169 }, { "compression_loss": 0.0, "distillation_loss": 0.9541764855384827, "epoch": 1.85, "learning_rate": 4.199534840957779e-05, "loss": 0.7948, "step": 5110, "task_loss": 0.8573384284973145 }, { "compression_loss": 0.0, "distillation_loss": 0.5462681651115417, "epoch": 1.85, "learning_rate": 4.193287742650341e-05, "loss": 0.6368, "step": 5120, "task_loss": 0.4982544779777527 }, { "compression_loss": 0.0, "distillation_loss": 0.6552547216415405, "epoch": 1.85, "learning_rate": 4.1870344913500387e-05, "loss": 0.8185, "step": 5130, "task_loss": 1.1584696769714355 }, { "compression_loss": 0.0, "distillation_loss": 0.7229572534561157, "epoch": 1.86, "learning_rate": 4.180775119300738e-05, "loss": 0.7605, "step": 5140, "task_loss": 0.7089170813560486 }, { "compression_loss": 0.0, "distillation_loss": 0.43640923500061035, "epoch": 1.86, "learning_rate": 4.174509658777867e-05, "loss": 0.7827, "step": 5150, "task_loss": 0.9247764945030212 }, { "compression_loss": 0.0, "distillation_loss": 0.969160258769989, "epoch": 1.86, "learning_rate": 4.168238142088247e-05, "loss": 0.7693, "step": 5160, "task_loss": 0.8934771418571472 }, { "compression_loss": 0.0, "distillation_loss": 0.7915940284729004, "epoch": 1.87, "learning_rate": 4.1619606015699254e-05, "loss": 0.7791, "step": 5170, "task_loss": 0.7979614734649658 }, { "compression_loss": 0.0, "distillation_loss": 0.5941789150238037, "epoch": 1.87, "learning_rate": 4.155677069592015e-05, "loss": 0.7402, "step": 5180, "task_loss": 0.8741079568862915 }, { "compression_loss": 0.0, "distillation_loss": 0.63032066822052, "epoch": 1.88, "learning_rate": 4.149387578554516e-05, "loss": 0.8416, "step": 5190, "task_loss": 0.6060689687728882 }, { "compression_loss": 0.0, "distillation_loss": 0.7978523373603821, "epoch": 1.88, "learning_rate": 4.143092160888162e-05, "loss": 0.7329, "step": 5200, "task_loss": 1.083559274673462 }, { "compression_loss": 0.0, "distillation_loss": 0.7854770421981812, "epoch": 1.88, "learning_rate": 4.1367908490542424e-05, "loss": 0.6658, "step": 5210, "task_loss": 0.6225592494010925 }, { "compression_loss": 0.0, "distillation_loss": 0.5963532328605652, "epoch": 1.89, "learning_rate": 4.1304836755444396e-05, "loss": 0.768, "step": 5220, "task_loss": 0.6593186855316162 }, { "compression_loss": 0.0, "distillation_loss": 0.8601577281951904, "epoch": 1.89, "learning_rate": 4.1241706728806614e-05, "loss": 0.6606, "step": 5230, "task_loss": 0.8627996444702148 }, { "compression_loss": 0.0, "distillation_loss": 0.6065561175346375, "epoch": 1.89, "learning_rate": 4.1178518736148726e-05, "loss": 0.6936, "step": 5240, "task_loss": 0.49574264883995056 }, { "compression_loss": 0.0, "distillation_loss": 0.8394991159439087, "epoch": 1.9, "learning_rate": 4.111527310328926e-05, "loss": 0.809, "step": 5250, "task_loss": 0.6712428331375122 }, { "epoch": 1.9, "eval_exact_match": 81.60832544938505, "eval_f1": 88.89492666071773, "step": 5250 }, { "compression_loss": 0.0, "distillation_loss": 0.8298504948616028, "epoch": 1.9, "learning_rate": 4.105197015634399e-05, "loss": 0.7804, "step": 5260, "task_loss": 0.8732647895812988 }, { "compression_loss": 0.0, "distillation_loss": 0.6441981196403503, "epoch": 1.9, "learning_rate": 4.0988610221724165e-05, "loss": 0.7308, "step": 5270, "task_loss": 0.6386420726776123 }, { "compression_loss": 0.0, "distillation_loss": 1.327653408050537, "epoch": 1.91, "learning_rate": 4.092519362613494e-05, "loss": 0.8782, "step": 5280, "task_loss": 1.0381779670715332 }, { "compression_loss": 0.0, "distillation_loss": 0.6743829250335693, "epoch": 1.91, "learning_rate": 4.08617206965736e-05, "loss": 0.7572, "step": 5290, "task_loss": 0.6540873050689697 }, { "compression_loss": 0.0, "distillation_loss": 1.1734296083450317, "epoch": 1.92, "learning_rate": 4.079819176032791e-05, "loss": 0.7435, "step": 5300, "task_loss": 0.9436138868331909 }, { "compression_loss": 0.0, "distillation_loss": 0.97495037317276, "epoch": 1.92, "learning_rate": 4.073460714497443e-05, "loss": 0.7854, "step": 5310, "task_loss": 1.2246060371398926 }, { "compression_loss": 0.0, "distillation_loss": 0.8801750540733337, "epoch": 1.92, "learning_rate": 4.067096717837681e-05, "loss": 0.854, "step": 5320, "task_loss": 0.799035370349884 }, { "compression_loss": 0.0, "distillation_loss": 1.0151453018188477, "epoch": 1.93, "learning_rate": 4.060727218868413e-05, "loss": 0.7464, "step": 5330, "task_loss": 0.9292905330657959 }, { "compression_loss": 0.0, "distillation_loss": 0.8881112337112427, "epoch": 1.93, "learning_rate": 4.054352250432917e-05, "loss": 0.6831, "step": 5340, "task_loss": 0.8586100339889526 }, { "compression_loss": 0.0, "distillation_loss": 0.9265180230140686, "epoch": 1.93, "learning_rate": 4.047971845402674e-05, "loss": 0.811, "step": 5350, "task_loss": 0.8724481463432312 }, { "compression_loss": 0.0, "distillation_loss": 0.686070442199707, "epoch": 1.94, "learning_rate": 4.0415860366771986e-05, "loss": 0.754, "step": 5360, "task_loss": 0.8045363426208496 }, { "compression_loss": 0.0, "distillation_loss": 0.9585437774658203, "epoch": 1.94, "learning_rate": 4.0351948571838665e-05, "loss": 0.7982, "step": 5370, "task_loss": 0.6699115037918091 }, { "compression_loss": 0.0, "distillation_loss": 0.8560003638267517, "epoch": 1.94, "learning_rate": 4.0287983398777485e-05, "loss": 0.7212, "step": 5380, "task_loss": 0.46158933639526367 }, { "compression_loss": 0.0, "distillation_loss": 0.8578959107398987, "epoch": 1.95, "learning_rate": 4.02239651774144e-05, "loss": 0.8158, "step": 5390, "task_loss": 0.8129777908325195 }, { "compression_loss": 0.0, "distillation_loss": 0.7851342558860779, "epoch": 1.95, "learning_rate": 4.015989423784887e-05, "loss": 0.7846, "step": 5400, "task_loss": 0.6260675191879272 }, { "compression_loss": 0.0, "distillation_loss": 0.6622868180274963, "epoch": 1.96, "learning_rate": 4.009577091045222e-05, "loss": 0.6604, "step": 5410, "task_loss": 0.5617303252220154 }, { "compression_loss": 0.0, "distillation_loss": 0.6715734601020813, "epoch": 1.96, "learning_rate": 4.003159552586588e-05, "loss": 0.7204, "step": 5420, "task_loss": 0.5333033204078674 }, { "compression_loss": 0.0, "distillation_loss": 0.6720329523086548, "epoch": 1.96, "learning_rate": 3.996736841499972e-05, "loss": 0.77, "step": 5430, "task_loss": 0.5945698618888855 }, { "compression_loss": 0.0, "distillation_loss": 0.7461095452308655, "epoch": 1.97, "learning_rate": 3.990308990903031e-05, "loss": 0.7432, "step": 5440, "task_loss": 0.89726722240448 }, { "compression_loss": 0.0, "distillation_loss": 0.6053512096405029, "epoch": 1.97, "learning_rate": 3.983876033939925e-05, "loss": 0.7936, "step": 5450, "task_loss": 0.46585309505462646 }, { "compression_loss": 0.0, "distillation_loss": 0.5714142322540283, "epoch": 1.97, "learning_rate": 3.977438003781144e-05, "loss": 0.7128, "step": 5460, "task_loss": 0.5288292169570923 }, { "compression_loss": 0.0, "distillation_loss": 0.734135627746582, "epoch": 1.98, "learning_rate": 3.970994933623334e-05, "loss": 0.7645, "step": 5470, "task_loss": 0.9016965627670288 }, { "compression_loss": 0.0, "distillation_loss": 0.7678335905075073, "epoch": 1.98, "learning_rate": 3.9645468566891326e-05, "loss": 0.7166, "step": 5480, "task_loss": 0.5996376276016235 }, { "compression_loss": 0.0, "distillation_loss": 0.6975915431976318, "epoch": 1.98, "learning_rate": 3.958093806226994e-05, "loss": 0.7699, "step": 5490, "task_loss": 0.5111979246139526 }, { "compression_loss": 0.0, "distillation_loss": 0.6483110785484314, "epoch": 1.99, "learning_rate": 3.951635815511014e-05, "loss": 0.6991, "step": 5500, "task_loss": 1.0150556564331055 }, { "epoch": 1.99, "eval_exact_match": 81.40964995269631, "eval_f1": 88.93790583717866, "step": 5500 }, { "compression_loss": 0.0, "distillation_loss": 0.6153757572174072, "epoch": 1.99, "learning_rate": 3.9451729178407676e-05, "loss": 0.7601, "step": 5510, "task_loss": 0.49160200357437134 }, { "compression_loss": 0.0, "distillation_loss": 0.6790378093719482, "epoch": 1.99, "learning_rate": 3.9387051465411245e-05, "loss": 0.7548, "step": 5520, "task_loss": 0.7084499597549438 }, { "compression_loss": 0.0, "distillation_loss": 0.648993730545044, "epoch": 2.0, "learning_rate": 3.93223253496209e-05, "loss": 0.8321, "step": 5530, "task_loss": 0.7858644723892212 }, { "compression_loss": 0.0, "distillation_loss": 0.6678321361541748, "epoch": 2.0, "learning_rate": 3.925755116478628e-05, "loss": 0.644, "step": 5540, "task_loss": 0.40885019302368164 }, { "compression_loss": 0.0, "distillation_loss": 0.5563902854919434, "epoch": 2.01, "learning_rate": 3.919272924490484e-05, "loss": 0.584, "step": 5550, "task_loss": 0.6156301498413086 }, { "compression_loss": 0.0, "distillation_loss": 0.48990246653556824, "epoch": 2.01, "learning_rate": 3.91278599242202e-05, "loss": 0.597, "step": 5560, "task_loss": 0.5672547221183777 }, { "compression_loss": 0.0, "distillation_loss": 0.9053139686584473, "epoch": 2.01, "learning_rate": 3.9062943537220394e-05, "loss": 0.6525, "step": 5570, "task_loss": 1.018937587738037 }, { "compression_loss": 0.0, "distillation_loss": 0.643484354019165, "epoch": 2.02, "learning_rate": 3.899798041863615e-05, "loss": 0.6214, "step": 5580, "task_loss": 0.5043469667434692 }, { "compression_loss": 0.0, "distillation_loss": 0.9523886442184448, "epoch": 2.02, "learning_rate": 3.8932970903439134e-05, "loss": 0.6711, "step": 5590, "task_loss": 1.3548312187194824 }, { "compression_loss": 0.0, "distillation_loss": 0.502714216709137, "epoch": 2.02, "learning_rate": 3.886791532684028e-05, "loss": 0.5996, "step": 5600, "task_loss": 1.051832675933838 }, { "compression_loss": 0.0, "distillation_loss": 0.5625650882720947, "epoch": 2.03, "learning_rate": 3.880281402428802e-05, "loss": 0.5483, "step": 5610, "task_loss": 0.5134047269821167 }, { "compression_loss": 0.0, "distillation_loss": 0.6442995071411133, "epoch": 2.03, "learning_rate": 3.8737667331466554e-05, "loss": 0.6055, "step": 5620, "task_loss": 0.7023986577987671 }, { "compression_loss": 0.0, "distillation_loss": 0.5948624610900879, "epoch": 2.03, "learning_rate": 3.8672475584294126e-05, "loss": 0.6392, "step": 5630, "task_loss": 0.9365327954292297 }, { "compression_loss": 0.0, "distillation_loss": 0.9390955567359924, "epoch": 2.04, "learning_rate": 3.860723911892134e-05, "loss": 0.6544, "step": 5640, "task_loss": 1.2662063837051392 }, { "compression_loss": 0.0, "distillation_loss": 0.612360954284668, "epoch": 2.04, "learning_rate": 3.8541958271729304e-05, "loss": 0.5981, "step": 5650, "task_loss": 0.834581732749939 }, { "compression_loss": 0.0, "distillation_loss": 0.5364567041397095, "epoch": 2.05, "learning_rate": 3.847663337932806e-05, "loss": 0.577, "step": 5660, "task_loss": 0.5253503918647766 }, { "compression_loss": 0.0, "distillation_loss": 0.7174457907676697, "epoch": 2.05, "learning_rate": 3.841126477855469e-05, "loss": 0.6112, "step": 5670, "task_loss": 0.977493405342102 }, { "compression_loss": 0.0, "distillation_loss": 0.7490948438644409, "epoch": 2.05, "learning_rate": 3.834585280647169e-05, "loss": 0.6091, "step": 5680, "task_loss": 0.9850953817367554 }, { "compression_loss": 0.0, "distillation_loss": 0.5830691456794739, "epoch": 2.06, "learning_rate": 3.82803978003652e-05, "loss": 0.6088, "step": 5690, "task_loss": 0.850995659828186 }, { "compression_loss": 0.0, "distillation_loss": 0.5099242925643921, "epoch": 2.06, "learning_rate": 3.821490009774321e-05, "loss": 0.6365, "step": 5700, "task_loss": 0.6015151739120483 }, { "compression_loss": 0.0, "distillation_loss": 0.781893253326416, "epoch": 2.06, "learning_rate": 3.814936003633393e-05, "loss": 0.6158, "step": 5710, "task_loss": 0.7863786220550537 }, { "compression_loss": 0.0, "distillation_loss": 0.4905242323875427, "epoch": 2.07, "learning_rate": 3.808377795408394e-05, "loss": 0.5719, "step": 5720, "task_loss": 0.5809369087219238 }, { "compression_loss": 0.0, "distillation_loss": 0.6399200558662415, "epoch": 2.07, "learning_rate": 3.80181541891565e-05, "loss": 0.6363, "step": 5730, "task_loss": 0.7770982384681702 }, { "compression_loss": 0.0, "distillation_loss": 0.7491112947463989, "epoch": 2.07, "learning_rate": 3.7952489079929826e-05, "loss": 0.6044, "step": 5740, "task_loss": 0.6825676560401917 }, { "compression_loss": 0.0, "distillation_loss": 0.5090913772583008, "epoch": 2.08, "learning_rate": 3.7886782964995304e-05, "loss": 0.6148, "step": 5750, "task_loss": 1.064110279083252 }, { "epoch": 2.08, "eval_exact_match": 81.86376537369915, "eval_f1": 89.1819885255817, "step": 5750 }, { "compression_loss": 0.0, "distillation_loss": 0.5572813749313354, "epoch": 2.08, "learning_rate": 3.782103618315575e-05, "loss": 0.6215, "step": 5760, "task_loss": 0.6721667051315308 }, { "compression_loss": 0.0, "distillation_loss": 0.674677848815918, "epoch": 2.09, "learning_rate": 3.775524907342367e-05, "loss": 0.5723, "step": 5770, "task_loss": 0.8653391003608704 }, { "compression_loss": 0.0, "distillation_loss": 0.45973920822143555, "epoch": 2.09, "learning_rate": 3.768942197501955e-05, "loss": 0.6199, "step": 5780, "task_loss": 0.8878629207611084 }, { "compression_loss": 0.0, "distillation_loss": 0.7025313377380371, "epoch": 2.09, "learning_rate": 3.7623555227370017e-05, "loss": 0.5915, "step": 5790, "task_loss": 0.6296009421348572 }, { "compression_loss": 0.0, "distillation_loss": 0.6605579853057861, "epoch": 2.1, "learning_rate": 3.755764917010618e-05, "loss": 0.5127, "step": 5800, "task_loss": 0.660987377166748 }, { "compression_loss": 0.0, "distillation_loss": 0.5550613403320312, "epoch": 2.1, "learning_rate": 3.749170414306184e-05, "loss": 0.6212, "step": 5810, "task_loss": 0.5019931793212891 }, { "compression_loss": 0.0, "distillation_loss": 0.5897912383079529, "epoch": 2.1, "learning_rate": 3.7425720486271726e-05, "loss": 0.6035, "step": 5820, "task_loss": 0.6660510301589966 }, { "compression_loss": 0.0, "distillation_loss": 0.5896255970001221, "epoch": 2.11, "learning_rate": 3.735969853996976e-05, "loss": 0.5614, "step": 5830, "task_loss": 0.5327462553977966 }, { "compression_loss": 0.0, "distillation_loss": 0.6656285524368286, "epoch": 2.11, "learning_rate": 3.72936386445873e-05, "loss": 0.5375, "step": 5840, "task_loss": 0.6203778386116028 }, { "compression_loss": 0.0, "distillation_loss": 0.5610683560371399, "epoch": 2.11, "learning_rate": 3.722754114075137e-05, "loss": 0.6618, "step": 5850, "task_loss": 0.43519946932792664 }, { "compression_loss": 0.0, "distillation_loss": 0.7016445398330688, "epoch": 2.12, "learning_rate": 3.716140636928295e-05, "loss": 0.648, "step": 5860, "task_loss": 0.7705813646316528 }, { "compression_loss": 0.0, "distillation_loss": 0.496102511882782, "epoch": 2.12, "learning_rate": 3.709523467119514e-05, "loss": 0.5707, "step": 5870, "task_loss": 0.6810226440429688 }, { "compression_loss": 0.0, "distillation_loss": 0.5184519290924072, "epoch": 2.13, "learning_rate": 3.7029026387691464e-05, "loss": 0.5762, "step": 5880, "task_loss": 0.5692154169082642 }, { "compression_loss": 0.0, "distillation_loss": 0.7326685786247253, "epoch": 2.13, "learning_rate": 3.696278186016411e-05, "loss": 0.6707, "step": 5890, "task_loss": 0.6973918676376343 }, { "compression_loss": 0.0, "distillation_loss": 0.6160091161727905, "epoch": 2.13, "learning_rate": 3.6896501430192134e-05, "loss": 0.617, "step": 5900, "task_loss": 0.6182623505592346 }, { "compression_loss": 0.0, "distillation_loss": 0.5769556164741516, "epoch": 2.14, "learning_rate": 3.6830185439539726e-05, "loss": 0.598, "step": 5910, "task_loss": 0.6873341202735901 }, { "compression_loss": 0.0, "distillation_loss": 0.46607810258865356, "epoch": 2.14, "learning_rate": 3.676383423015442e-05, "loss": 0.5983, "step": 5920, "task_loss": 0.8500930666923523 }, { "compression_loss": 0.0, "distillation_loss": 0.46026360988616943, "epoch": 2.14, "learning_rate": 3.6697448144165357e-05, "loss": 0.5742, "step": 5930, "task_loss": 0.8118923902511597 }, { "compression_loss": 0.0, "distillation_loss": 0.6993731260299683, "epoch": 2.15, "learning_rate": 3.663102752388155e-05, "loss": 0.6208, "step": 5940, "task_loss": 0.3766775131225586 }, { "compression_loss": 0.0, "distillation_loss": 0.5157681703567505, "epoch": 2.15, "learning_rate": 3.656457271179003e-05, "loss": 0.5637, "step": 5950, "task_loss": 0.42299726605415344 }, { "compression_loss": 0.0, "distillation_loss": 0.7117334604263306, "epoch": 2.15, "learning_rate": 3.6498084050554164e-05, "loss": 0.691, "step": 5960, "task_loss": 0.8287671804428101 }, { "compression_loss": 0.0, "distillation_loss": 0.41603410243988037, "epoch": 2.16, "learning_rate": 3.643156188301183e-05, "loss": 0.5911, "step": 5970, "task_loss": 0.4762575626373291 }, { "compression_loss": 0.0, "distillation_loss": 0.5428121089935303, "epoch": 2.16, "learning_rate": 3.636500655217371e-05, "loss": 0.5762, "step": 5980, "task_loss": 0.6505820751190186 }, { "compression_loss": 0.0, "distillation_loss": 0.3971426486968994, "epoch": 2.16, "learning_rate": 3.629841840122147e-05, "loss": 0.6486, "step": 5990, "task_loss": 0.5191299915313721 }, { "compression_loss": 0.0, "distillation_loss": 0.7955690622329712, "epoch": 2.17, "learning_rate": 3.6231797773505994e-05, "loss": 0.6275, "step": 6000, "task_loss": 0.656362771987915 }, { "epoch": 2.17, "eval_exact_match": 81.8070009460738, "eval_f1": 89.1233677854583, "step": 6000 }, { "compression_loss": 0.0, "distillation_loss": 0.6281618475914001, "epoch": 2.17, "learning_rate": 3.616514501254567e-05, "loss": 0.5901, "step": 6010, "task_loss": 0.6404191255569458 }, { "compression_loss": 0.0, "distillation_loss": 0.5767370462417603, "epoch": 2.18, "learning_rate": 3.6098460462024506e-05, "loss": 0.6066, "step": 6020, "task_loss": 0.506596565246582 }, { "compression_loss": 0.0, "distillation_loss": 0.7444403171539307, "epoch": 2.18, "learning_rate": 3.6031744465790495e-05, "loss": 0.6459, "step": 6030, "task_loss": 0.8166441917419434 }, { "compression_loss": 0.0, "distillation_loss": 0.6288504600524902, "epoch": 2.18, "learning_rate": 3.5964997367853755e-05, "loss": 0.5967, "step": 6040, "task_loss": 0.7653200030326843 }, { "compression_loss": 0.0, "distillation_loss": 0.6448060274124146, "epoch": 2.19, "learning_rate": 3.589821951238474e-05, "loss": 0.5961, "step": 6050, "task_loss": 1.2137736082077026 }, { "compression_loss": 0.0, "distillation_loss": 0.6648958921432495, "epoch": 2.19, "learning_rate": 3.5831411243712555e-05, "loss": 0.5949, "step": 6060, "task_loss": 0.5861155986785889 }, { "compression_loss": 0.0, "distillation_loss": 0.6277318596839905, "epoch": 2.19, "learning_rate": 3.5764572906323075e-05, "loss": 0.5954, "step": 6070, "task_loss": 0.6117310523986816 }, { "compression_loss": 0.0, "distillation_loss": 0.5213579535484314, "epoch": 2.2, "learning_rate": 3.569770484485726e-05, "loss": 0.5722, "step": 6080, "task_loss": 0.9827824831008911 }, { "compression_loss": 0.0, "distillation_loss": 0.4223155379295349, "epoch": 2.2, "learning_rate": 3.563080740410932e-05, "loss": 0.6018, "step": 6090, "task_loss": 0.6889990568161011 }, { "compression_loss": 0.0, "distillation_loss": 0.562755286693573, "epoch": 2.2, "learning_rate": 3.556388092902494e-05, "loss": 0.6329, "step": 6100, "task_loss": 0.5587911605834961 }, { "compression_loss": 0.0, "distillation_loss": 0.5433652997016907, "epoch": 2.21, "learning_rate": 3.549692576469955e-05, "loss": 0.602, "step": 6110, "task_loss": 0.4544098377227783 }, { "compression_loss": 0.0, "distillation_loss": 0.4736385941505432, "epoch": 2.21, "learning_rate": 3.542994225637648e-05, "loss": 0.58, "step": 6120, "task_loss": 0.4461681544780731 }, { "compression_loss": 0.0, "distillation_loss": 0.6638973951339722, "epoch": 2.22, "learning_rate": 3.536293074944522e-05, "loss": 0.6859, "step": 6130, "task_loss": 0.5688951015472412 }, { "compression_loss": 0.0, "distillation_loss": 0.7005724906921387, "epoch": 2.22, "learning_rate": 3.529589158943965e-05, "loss": 0.625, "step": 6140, "task_loss": 0.5859721899032593 }, { "compression_loss": 0.0, "distillation_loss": 0.6913973093032837, "epoch": 2.22, "learning_rate": 3.522882512203621e-05, "loss": 0.6089, "step": 6150, "task_loss": 1.2206425666809082 }, { "compression_loss": 0.0, "distillation_loss": 0.7091802358627319, "epoch": 2.23, "learning_rate": 3.516173169305216e-05, "loss": 0.6443, "step": 6160, "task_loss": 0.4974295496940613 }, { "compression_loss": 0.0, "distillation_loss": 0.7010023593902588, "epoch": 2.23, "learning_rate": 3.5094611648443773e-05, "loss": 0.654, "step": 6170, "task_loss": 0.5480866432189941 }, { "compression_loss": 0.0, "distillation_loss": 0.6212794780731201, "epoch": 2.23, "learning_rate": 3.5027465334304586e-05, "loss": 0.6342, "step": 6180, "task_loss": 0.7250664830207825 }, { "compression_loss": 0.0, "distillation_loss": 0.5260363221168518, "epoch": 2.24, "learning_rate": 3.496029309686356e-05, "loss": 0.5235, "step": 6190, "task_loss": 0.6064286231994629 }, { "compression_loss": 0.0, "distillation_loss": 0.546513020992279, "epoch": 2.24, "learning_rate": 3.4893095282483346e-05, "loss": 0.5609, "step": 6200, "task_loss": 0.3039868474006653 }, { "compression_loss": 0.0, "distillation_loss": 0.5084608793258667, "epoch": 2.24, "learning_rate": 3.482587223765847e-05, "loss": 0.6002, "step": 6210, "task_loss": 0.36580145359039307 }, { "compression_loss": 0.0, "distillation_loss": 0.7696444392204285, "epoch": 2.25, "learning_rate": 3.475862430901355e-05, "loss": 0.6239, "step": 6220, "task_loss": 1.048229694366455 }, { "compression_loss": 0.0, "distillation_loss": 0.38577717542648315, "epoch": 2.25, "learning_rate": 3.469135184330153e-05, "loss": 0.5504, "step": 6230, "task_loss": 0.4784127473831177 }, { "compression_loss": 0.0, "distillation_loss": 0.7590066194534302, "epoch": 2.26, "learning_rate": 3.462405518740186e-05, "loss": 0.6185, "step": 6240, "task_loss": 0.6026236414909363 }, { "compression_loss": 0.0, "distillation_loss": 0.7041105031967163, "epoch": 2.26, "learning_rate": 3.455673468831872e-05, "loss": 0.6119, "step": 6250, "task_loss": 0.9708863496780396 }, { "epoch": 2.26, "eval_exact_match": 81.5042573320719, "eval_f1": 88.78182464091005, "step": 6250 }, { "compression_loss": 0.0, "distillation_loss": 0.612851619720459, "epoch": 2.26, "learning_rate": 3.448939069317926e-05, "loss": 0.5884, "step": 6260, "task_loss": 1.315657615661621 }, { "compression_loss": 0.0, "distillation_loss": 0.5629523992538452, "epoch": 2.27, "learning_rate": 3.442202354923175e-05, "loss": 0.5514, "step": 6270, "task_loss": 0.5583246946334839 }, { "compression_loss": 0.0, "distillation_loss": 0.7930963039398193, "epoch": 2.27, "learning_rate": 3.435463360384383e-05, "loss": 0.6183, "step": 6280, "task_loss": 0.959362268447876 }, { "compression_loss": 0.0, "distillation_loss": 0.8136299848556519, "epoch": 2.27, "learning_rate": 3.428722120450074e-05, "loss": 0.644, "step": 6290, "task_loss": 0.8650078773498535 }, { "compression_loss": 0.0, "distillation_loss": 0.6255238056182861, "epoch": 2.28, "learning_rate": 3.421978669880346e-05, "loss": 0.594, "step": 6300, "task_loss": 1.1138728857040405 }, { "compression_loss": 0.0, "distillation_loss": 0.700169026851654, "epoch": 2.28, "learning_rate": 3.4152330434466994e-05, "loss": 0.5617, "step": 6310, "task_loss": 0.5409881472587585 }, { "compression_loss": 0.0, "distillation_loss": 0.5610506534576416, "epoch": 2.28, "learning_rate": 3.408485275931851e-05, "loss": 0.6282, "step": 6320, "task_loss": 0.4196653366088867 }, { "compression_loss": 0.0, "distillation_loss": 0.504428505897522, "epoch": 2.29, "learning_rate": 3.40173540212956e-05, "loss": 0.5916, "step": 6330, "task_loss": 0.4185686707496643 }, { "compression_loss": 0.0, "distillation_loss": 0.5590067505836487, "epoch": 2.29, "learning_rate": 3.394983456844446e-05, "loss": 0.6516, "step": 6340, "task_loss": 0.6237858533859253 }, { "compression_loss": 0.0, "distillation_loss": 0.6239622235298157, "epoch": 2.29, "learning_rate": 3.3882294748918074e-05, "loss": 0.6038, "step": 6350, "task_loss": 0.54815673828125 }, { "compression_loss": 0.0, "distillation_loss": 0.6079332828521729, "epoch": 2.3, "learning_rate": 3.381473491097448e-05, "loss": 0.5802, "step": 6360, "task_loss": 0.738688051700592 }, { "compression_loss": 0.0, "distillation_loss": 0.6828926801681519, "epoch": 2.3, "learning_rate": 3.374715540297492e-05, "loss": 0.5867, "step": 6370, "task_loss": 0.9791734218597412 }, { "compression_loss": 0.0, "distillation_loss": 0.5750732421875, "epoch": 2.31, "learning_rate": 3.367955657338205e-05, "loss": 0.5862, "step": 6380, "task_loss": 0.6127207279205322 }, { "compression_loss": 0.0, "distillation_loss": 0.5833463668823242, "epoch": 2.31, "learning_rate": 3.3611938770758186e-05, "loss": 0.6614, "step": 6390, "task_loss": 0.5630807876586914 }, { "compression_loss": 0.0, "distillation_loss": 0.6061692237854004, "epoch": 2.31, "learning_rate": 3.354430234376344e-05, "loss": 0.5972, "step": 6400, "task_loss": 0.6525106430053711 }, { "compression_loss": 0.0, "distillation_loss": 0.6266899704933167, "epoch": 2.32, "learning_rate": 3.347664764115398e-05, "loss": 0.537, "step": 6410, "task_loss": 0.9735203385353088 }, { "compression_loss": 0.0, "distillation_loss": 0.7700897455215454, "epoch": 2.32, "learning_rate": 3.340897501178019e-05, "loss": 0.6051, "step": 6420, "task_loss": 0.7603612542152405 }, { "compression_loss": 0.0, "distillation_loss": 0.541512131690979, "epoch": 2.32, "learning_rate": 3.334128480458492e-05, "loss": 0.6205, "step": 6430, "task_loss": 0.633751392364502 }, { "compression_loss": 0.0, "distillation_loss": 0.47812074422836304, "epoch": 2.33, "learning_rate": 3.3273577368601644e-05, "loss": 0.6212, "step": 6440, "task_loss": 0.5502046346664429 }, { "compression_loss": 0.0, "distillation_loss": 0.574849545955658, "epoch": 2.33, "learning_rate": 3.320585305295264e-05, "loss": 0.5781, "step": 6450, "task_loss": 0.6506343483924866 }, { "compression_loss": 0.0, "distillation_loss": 0.7085522413253784, "epoch": 2.33, "learning_rate": 3.3138112206847296e-05, "loss": 0.5527, "step": 6460, "task_loss": 0.9275124073028564 }, { "compression_loss": 0.0, "distillation_loss": 0.44274505972862244, "epoch": 2.34, "learning_rate": 3.307035517958015e-05, "loss": 0.5773, "step": 6470, "task_loss": 0.32708221673965454 }, { "compression_loss": 0.0, "distillation_loss": 0.3988785147666931, "epoch": 2.34, "learning_rate": 3.3002582320529247e-05, "loss": 0.6215, "step": 6480, "task_loss": 0.6397969126701355 }, { "compression_loss": 0.0, "distillation_loss": 0.5385051965713501, "epoch": 2.35, "learning_rate": 3.293479397915423e-05, "loss": 0.5694, "step": 6490, "task_loss": 0.43270617723464966 }, { "compression_loss": 0.0, "distillation_loss": 0.5102781653404236, "epoch": 2.35, "learning_rate": 3.286699050499458e-05, "loss": 0.5186, "step": 6500, "task_loss": 0.4291302561759949 }, { "epoch": 2.35, "eval_exact_match": 81.9678334910123, "eval_f1": 89.18938641074499, "step": 6500 }, { "compression_loss": 0.0, "distillation_loss": 0.5587887763977051, "epoch": 2.35, "learning_rate": 3.279917224766781e-05, "loss": 0.5673, "step": 6510, "task_loss": 0.38369613885879517 }, { "compression_loss": 0.0, "distillation_loss": 0.4702199697494507, "epoch": 2.36, "learning_rate": 3.273133955686768e-05, "loss": 0.6397, "step": 6520, "task_loss": 0.7154326438903809 }, { "compression_loss": 0.0, "distillation_loss": 0.5072304010391235, "epoch": 2.36, "learning_rate": 3.2663492782362315e-05, "loss": 0.597, "step": 6530, "task_loss": 0.7844618558883667 }, { "compression_loss": 0.0, "distillation_loss": 0.45684850215911865, "epoch": 2.36, "learning_rate": 3.259563227399253e-05, "loss": 0.5534, "step": 6540, "task_loss": 0.30611854791641235 }, { "compression_loss": 0.0, "distillation_loss": 0.8612761497497559, "epoch": 2.37, "learning_rate": 3.252775838166991e-05, "loss": 0.6271, "step": 6550, "task_loss": 0.923941969871521 }, { "compression_loss": 0.0, "distillation_loss": 0.5912673473358154, "epoch": 2.37, "learning_rate": 3.2459871455375075e-05, "loss": 0.5532, "step": 6560, "task_loss": 0.7655602097511292 }, { "compression_loss": 0.0, "distillation_loss": 0.6388885974884033, "epoch": 2.37, "learning_rate": 3.239197184515584e-05, "loss": 0.5633, "step": 6570, "task_loss": 1.067481517791748 }, { "compression_loss": 0.0, "distillation_loss": 0.51146000623703, "epoch": 2.38, "learning_rate": 3.232405990112543e-05, "loss": 0.5882, "step": 6580, "task_loss": 0.6557249426841736 }, { "compression_loss": 0.0, "distillation_loss": 0.5170506238937378, "epoch": 2.38, "learning_rate": 3.225613597346068e-05, "loss": 0.5887, "step": 6590, "task_loss": 0.25539761781692505 }, { "compression_loss": 0.0, "distillation_loss": 0.4798586368560791, "epoch": 2.39, "learning_rate": 3.218820041240017e-05, "loss": 0.6124, "step": 6600, "task_loss": 0.6159725785255432 }, { "compression_loss": 0.0, "distillation_loss": 0.5324605703353882, "epoch": 2.39, "learning_rate": 3.212025356824251e-05, "loss": 0.5845, "step": 6610, "task_loss": 0.5922216176986694 }, { "compression_loss": 0.0, "distillation_loss": 0.40312764048576355, "epoch": 2.39, "learning_rate": 3.20522957913445e-05, "loss": 0.5713, "step": 6620, "task_loss": 0.4075431227684021 }, { "compression_loss": 0.0, "distillation_loss": 0.5648410320281982, "epoch": 2.4, "learning_rate": 3.198432743211925e-05, "loss": 0.6679, "step": 6630, "task_loss": 0.5671501755714417 }, { "compression_loss": 0.0, "distillation_loss": 0.4727499186992645, "epoch": 2.4, "learning_rate": 3.191634884103451e-05, "loss": 0.631, "step": 6640, "task_loss": 0.527313768863678 }, { "compression_loss": 0.0, "distillation_loss": 0.5744331479072571, "epoch": 2.4, "learning_rate": 3.184836036861074e-05, "loss": 0.566, "step": 6650, "task_loss": 0.8066692352294922 }, { "compression_loss": 0.0, "distillation_loss": 0.5417715311050415, "epoch": 2.41, "learning_rate": 3.178036236541936e-05, "loss": 0.5379, "step": 6660, "task_loss": 0.3417711555957794 }, { "compression_loss": 0.0, "distillation_loss": 0.4441830515861511, "epoch": 2.41, "learning_rate": 3.171235518208096e-05, "loss": 0.5847, "step": 6670, "task_loss": 0.5670948028564453 }, { "compression_loss": 0.0, "distillation_loss": 0.6484647393226624, "epoch": 2.41, "learning_rate": 3.164433916926342e-05, "loss": 0.6377, "step": 6680, "task_loss": 0.8303718566894531 }, { "compression_loss": 0.0, "distillation_loss": 0.4492584764957428, "epoch": 2.42, "learning_rate": 3.15763146776802e-05, "loss": 0.5715, "step": 6690, "task_loss": 0.5020931959152222 }, { "compression_loss": 0.0, "distillation_loss": 0.7698655128479004, "epoch": 2.42, "learning_rate": 3.1508282058088424e-05, "loss": 0.6297, "step": 6700, "task_loss": 0.8664661645889282 }, { "compression_loss": 0.0, "distillation_loss": 0.5374277234077454, "epoch": 2.43, "learning_rate": 3.144024166128718e-05, "loss": 0.5943, "step": 6710, "task_loss": 0.8371603488922119 }, { "compression_loss": 0.0, "distillation_loss": 0.44918084144592285, "epoch": 2.43, "learning_rate": 3.137219383811562e-05, "loss": 0.5709, "step": 6720, "task_loss": 0.6834157109260559 }, { "compression_loss": 0.0, "distillation_loss": 0.5904335975646973, "epoch": 2.43, "learning_rate": 3.130413893945121e-05, "loss": 0.5909, "step": 6730, "task_loss": 0.6250230073928833 }, { "compression_loss": 0.0, "distillation_loss": 0.5855615139007568, "epoch": 2.44, "learning_rate": 3.12360773162079e-05, "loss": 0.5643, "step": 6740, "task_loss": 0.7704566121101379 }, { "compression_loss": 0.0, "distillation_loss": 0.5405184626579285, "epoch": 2.44, "learning_rate": 3.1168009319334276e-05, "loss": 0.5897, "step": 6750, "task_loss": 0.7256063222885132 }, { "epoch": 2.44, "eval_exact_match": 81.759697256386, "eval_f1": 89.06270353602996, "step": 6750 }, { "compression_loss": 0.0, "distillation_loss": 0.8909243941307068, "epoch": 2.44, "learning_rate": 3.1099935299811844e-05, "loss": 0.7078, "step": 6760, "task_loss": 0.8761190176010132 }, { "compression_loss": 0.0, "distillation_loss": 0.5284467935562134, "epoch": 2.45, "learning_rate": 3.103185560865314e-05, "loss": 0.5888, "step": 6770, "task_loss": 0.5950831770896912 }, { "compression_loss": 0.0, "distillation_loss": 0.6189389228820801, "epoch": 2.45, "learning_rate": 3.096377059689994e-05, "loss": 0.5407, "step": 6780, "task_loss": 0.8411575555801392 }, { "compression_loss": 0.0, "distillation_loss": 0.5810912847518921, "epoch": 2.45, "learning_rate": 3.089568061562146e-05, "loss": 0.6317, "step": 6790, "task_loss": 0.706413745880127 }, { "compression_loss": 0.0, "distillation_loss": 0.7708436250686646, "epoch": 2.46, "learning_rate": 3.082758601591253e-05, "loss": 0.6281, "step": 6800, "task_loss": 0.8470696806907654 }, { "compression_loss": 0.0, "distillation_loss": 0.7277690172195435, "epoch": 2.46, "learning_rate": 3.075948714889181e-05, "loss": 0.647, "step": 6810, "task_loss": 0.6179013848304749 }, { "compression_loss": 0.0, "distillation_loss": 0.43784040212631226, "epoch": 2.46, "learning_rate": 3.069138436569997e-05, "loss": 0.6279, "step": 6820, "task_loss": 0.5682166814804077 }, { "compression_loss": 0.0, "distillation_loss": 0.6058757305145264, "epoch": 2.47, "learning_rate": 3.062327801749784e-05, "loss": 0.5572, "step": 6830, "task_loss": 0.6574521064758301 }, { "compression_loss": 0.0, "distillation_loss": 0.45435667037963867, "epoch": 2.47, "learning_rate": 3.055516845546468e-05, "loss": 0.5397, "step": 6840, "task_loss": 0.5073586106300354 }, { "compression_loss": 0.0, "distillation_loss": 0.5500397682189941, "epoch": 2.48, "learning_rate": 3.0487056030796262e-05, "loss": 0.586, "step": 6850, "task_loss": 0.6582145094871521 }, { "compression_loss": 0.0, "distillation_loss": 0.46652621030807495, "epoch": 2.48, "learning_rate": 3.0418941094703176e-05, "loss": 0.5826, "step": 6860, "task_loss": 0.367644339799881 }, { "compression_loss": 0.0, "distillation_loss": 0.583972692489624, "epoch": 2.48, "learning_rate": 3.0350823998408926e-05, "loss": 0.5876, "step": 6870, "task_loss": 1.2166520357131958 }, { "compression_loss": 0.0, "distillation_loss": 0.73996502161026, "epoch": 2.49, "learning_rate": 3.028270509314817e-05, "loss": 0.6818, "step": 6880, "task_loss": 0.9112734198570251 }, { "compression_loss": 0.0, "distillation_loss": 0.38398513197898865, "epoch": 2.49, "learning_rate": 3.0214584730164898e-05, "loss": 0.5749, "step": 6890, "task_loss": 0.844524085521698 }, { "compression_loss": 0.0, "distillation_loss": 0.40584832429885864, "epoch": 2.49, "learning_rate": 3.014646326071059e-05, "loss": 0.5586, "step": 6900, "task_loss": 0.5562839508056641 }, { "compression_loss": 0.0, "distillation_loss": 0.5574773550033569, "epoch": 2.5, "learning_rate": 3.0078341036042457e-05, "loss": 0.644, "step": 6910, "task_loss": 0.594291090965271 }, { "compression_loss": 0.0, "distillation_loss": 0.5767573118209839, "epoch": 2.5, "learning_rate": 3.0010218407421613e-05, "loss": 0.5991, "step": 6920, "task_loss": 0.8964383006095886 }, { "compression_loss": 0.0, "distillation_loss": 0.5444501638412476, "epoch": 2.5, "learning_rate": 2.9942095726111204e-05, "loss": 0.5118, "step": 6930, "task_loss": 0.5208333730697632 }, { "compression_loss": 0.0, "distillation_loss": 0.6177530288696289, "epoch": 2.51, "learning_rate": 2.9873973343374728e-05, "loss": 0.6443, "step": 6940, "task_loss": 0.48882049322128296 }, { "compression_loss": 0.0, "distillation_loss": 0.5032823085784912, "epoch": 2.51, "learning_rate": 2.9805851610474044e-05, "loss": 0.5943, "step": 6950, "task_loss": 0.5606658458709717 }, { "compression_loss": 0.0, "distillation_loss": 0.6921489238739014, "epoch": 2.52, "learning_rate": 2.9737730878667765e-05, "loss": 0.6433, "step": 6960, "task_loss": 0.5478000044822693 }, { "compression_loss": 0.0, "distillation_loss": 0.40710246562957764, "epoch": 2.52, "learning_rate": 2.9669611499209254e-05, "loss": 0.644, "step": 6970, "task_loss": 0.2990033030509949 }, { "compression_loss": 0.0, "distillation_loss": 0.5349779725074768, "epoch": 2.52, "learning_rate": 2.960149382334496e-05, "loss": 0.5927, "step": 6980, "task_loss": 0.5776247978210449 }, { "compression_loss": 0.0, "distillation_loss": 0.6616565585136414, "epoch": 2.53, "learning_rate": 2.9533378202312512e-05, "loss": 0.5778, "step": 6990, "task_loss": 0.7887768745422363 }, { "compression_loss": 0.0, "distillation_loss": 0.8387987613677979, "epoch": 2.53, "learning_rate": 2.9465264987338966e-05, "loss": 0.6088, "step": 7000, "task_loss": 0.5467420220375061 }, { "epoch": 2.53, "eval_exact_match": 82.07190160832545, "eval_f1": 89.27410906455074, "step": 7000 }, { "compression_loss": 0.0, "distillation_loss": 0.47956252098083496, "epoch": 2.53, "learning_rate": 2.939715452963896e-05, "loss": 0.5898, "step": 7010, "task_loss": 0.6686305999755859 }, { "compression_loss": 0.0, "distillation_loss": 0.4508477747440338, "epoch": 2.54, "learning_rate": 2.9329047180412914e-05, "loss": 0.5906, "step": 7020, "task_loss": 0.5521897077560425 }, { "compression_loss": 0.0, "distillation_loss": 0.47773054242134094, "epoch": 2.54, "learning_rate": 2.9260943290845216e-05, "loss": 0.5783, "step": 7030, "task_loss": 0.7009822130203247 }, { "compression_loss": 0.0, "distillation_loss": 0.5153251886367798, "epoch": 2.54, "learning_rate": 2.919284321210245e-05, "loss": 0.5826, "step": 7040, "task_loss": 0.7742196321487427 }, { "compression_loss": 0.0, "distillation_loss": 0.5433927178382874, "epoch": 2.55, "learning_rate": 2.9124747295331482e-05, "loss": 0.5769, "step": 7050, "task_loss": 0.5901714563369751 }, { "compression_loss": 0.0, "distillation_loss": 0.7135424613952637, "epoch": 2.55, "learning_rate": 2.9056655891657793e-05, "loss": 0.6148, "step": 7060, "task_loss": 0.9341310262680054 }, { "compression_loss": 0.0, "distillation_loss": 0.7931948304176331, "epoch": 2.56, "learning_rate": 2.8988569352183534e-05, "loss": 0.6634, "step": 7070, "task_loss": 1.1662893295288086 }, { "compression_loss": 0.0, "distillation_loss": 0.30930817127227783, "epoch": 2.56, "learning_rate": 2.8920488027985812e-05, "loss": 0.5629, "step": 7080, "task_loss": 0.2516777217388153 }, { "compression_loss": 0.0, "distillation_loss": 0.6308006644248962, "epoch": 2.56, "learning_rate": 2.8852412270114817e-05, "loss": 0.6637, "step": 7090, "task_loss": 0.7488775253295898 }, { "compression_loss": 0.0, "distillation_loss": 0.8185533285140991, "epoch": 2.57, "learning_rate": 2.8784342429592058e-05, "loss": 0.6215, "step": 7100, "task_loss": 0.9923599362373352 }, { "compression_loss": 0.0, "distillation_loss": 0.7322626113891602, "epoch": 2.57, "learning_rate": 2.871627885740851e-05, "loss": 0.5773, "step": 7110, "task_loss": 0.755662739276886 }, { "compression_loss": 0.0, "distillation_loss": 0.5336388349533081, "epoch": 2.57, "learning_rate": 2.8648221904522858e-05, "loss": 0.574, "step": 7120, "task_loss": 0.44107335805892944 }, { "compression_loss": 0.0, "distillation_loss": 0.5362550020217896, "epoch": 2.58, "learning_rate": 2.8580171921859606e-05, "loss": 0.6477, "step": 7130, "task_loss": 0.5944764614105225 }, { "compression_loss": 0.0, "distillation_loss": 0.5030221939086914, "epoch": 2.58, "learning_rate": 2.851212926030738e-05, "loss": 0.6169, "step": 7140, "task_loss": 0.8135521411895752 }, { "compression_loss": 0.0, "distillation_loss": 0.742468535900116, "epoch": 2.58, "learning_rate": 2.8450897414439317e-05, "loss": 0.623, "step": 7150, "task_loss": 0.9540405869483948 }, { "compression_loss": 0.0, "distillation_loss": 0.7146216630935669, "epoch": 2.59, "learning_rate": 2.838286962955943e-05, "loss": 0.6073, "step": 7160, "task_loss": 0.8092219233512878 }, { "compression_loss": 0.0, "distillation_loss": 0.5073987245559692, "epoch": 2.59, "learning_rate": 2.8314850183147516e-05, "loss": 0.5979, "step": 7170, "task_loss": 0.6714034080505371 }, { "compression_loss": 0.0, "distillation_loss": 0.5770305395126343, "epoch": 2.59, "learning_rate": 2.8246839425934724e-05, "loss": 0.4854, "step": 7180, "task_loss": 0.33490344882011414 }, { "compression_loss": 0.0, "distillation_loss": 0.5794994831085205, "epoch": 2.6, "learning_rate": 2.817883770860737e-05, "loss": 0.6159, "step": 7190, "task_loss": 0.6019021272659302 }, { "compression_loss": 0.0, "distillation_loss": 0.5001416206359863, "epoch": 2.6, "learning_rate": 2.81108453818052e-05, "loss": 0.5719, "step": 7200, "task_loss": 0.525328516960144 }, { "compression_loss": 0.0, "distillation_loss": 0.6738238334655762, "epoch": 2.61, "learning_rate": 2.8042862796119482e-05, "loss": 0.6662, "step": 7210, "task_loss": 0.6700210571289062 }, { "compression_loss": 0.0, "distillation_loss": 0.48849350214004517, "epoch": 2.61, "learning_rate": 2.7974890302091327e-05, "loss": 0.5327, "step": 7220, "task_loss": 0.6737467050552368 }, { "compression_loss": 0.0, "distillation_loss": 0.593249499797821, "epoch": 2.61, "learning_rate": 2.7906928250209743e-05, "loss": 0.6226, "step": 7230, "task_loss": 0.7583106160163879 }, { "compression_loss": 0.0, "distillation_loss": 0.6394388675689697, "epoch": 2.62, "learning_rate": 2.783897699090994e-05, "loss": 0.5993, "step": 7240, "task_loss": 0.9611802101135254 }, { "compression_loss": 0.0, "distillation_loss": 0.814205527305603, "epoch": 2.62, "learning_rate": 2.7771036874571443e-05, "loss": 0.5924, "step": 7250, "task_loss": 0.9047456979751587 }, { "epoch": 2.62, "eval_exact_match": 81.57048249763481, "eval_f1": 88.98593489499765, "step": 7250 }, { "compression_loss": 0.0, "distillation_loss": 0.5149030685424805, "epoch": 2.62, "learning_rate": 2.770310825151635e-05, "loss": 0.6186, "step": 7260, "task_loss": 0.5792198777198792 }, { "compression_loss": 0.0, "distillation_loss": 0.6406042575836182, "epoch": 2.63, "learning_rate": 2.763519147200748e-05, "loss": 0.6658, "step": 7270, "task_loss": 0.5885680913925171 }, { "compression_loss": 0.0, "distillation_loss": 0.702430009841919, "epoch": 2.63, "learning_rate": 2.7567286886246593e-05, "loss": 0.5986, "step": 7280, "task_loss": 0.6880907416343689 }, { "compression_loss": 0.0, "distillation_loss": 0.5257836580276489, "epoch": 2.63, "learning_rate": 2.749939484437255e-05, "loss": 0.5546, "step": 7290, "task_loss": 0.5597729086875916 }, { "compression_loss": 0.0, "distillation_loss": 0.583315372467041, "epoch": 2.64, "learning_rate": 2.7431515696459577e-05, "loss": 0.5803, "step": 7300, "task_loss": 0.6037495732307434 }, { "compression_loss": 0.0, "distillation_loss": 0.5714931488037109, "epoch": 2.64, "learning_rate": 2.736364979251535e-05, "loss": 0.5847, "step": 7310, "task_loss": 0.6359392404556274 }, { "compression_loss": 0.0, "distillation_loss": 0.5830150842666626, "epoch": 2.65, "learning_rate": 2.7295797482479327e-05, "loss": 0.5466, "step": 7320, "task_loss": 0.561113715171814 }, { "compression_loss": 0.0, "distillation_loss": 0.7011891603469849, "epoch": 2.65, "learning_rate": 2.7227959116220803e-05, "loss": 0.674, "step": 7330, "task_loss": 1.1329269409179688 }, { "compression_loss": 0.0, "distillation_loss": 1.028478980064392, "epoch": 2.65, "learning_rate": 2.7160135043537236e-05, "loss": 0.6032, "step": 7340, "task_loss": 0.9812748432159424 }, { "compression_loss": 0.0, "distillation_loss": 0.8232661485671997, "epoch": 2.66, "learning_rate": 2.7092325614152328e-05, "loss": 0.5801, "step": 7350, "task_loss": 0.5196477770805359 }, { "compression_loss": 0.0, "distillation_loss": 0.47645464539527893, "epoch": 2.66, "learning_rate": 2.7024531177714316e-05, "loss": 0.5197, "step": 7360, "task_loss": 0.3743845224380493 }, { "compression_loss": 0.0, "distillation_loss": 0.5509925484657288, "epoch": 2.66, "learning_rate": 2.6956752083794094e-05, "loss": 0.6336, "step": 7370, "task_loss": 0.6567373871803284 }, { "compression_loss": 0.0, "distillation_loss": 0.6547894477844238, "epoch": 2.67, "learning_rate": 2.688898868188348e-05, "loss": 0.632, "step": 7380, "task_loss": 0.632176399230957 }, { "compression_loss": 0.0, "distillation_loss": 0.624300479888916, "epoch": 2.67, "learning_rate": 2.682124132139334e-05, "loss": 0.5946, "step": 7390, "task_loss": 0.5805683135986328 }, { "compression_loss": 0.0, "distillation_loss": 0.5678684115409851, "epoch": 2.67, "learning_rate": 2.675351035165188e-05, "loss": 0.5989, "step": 7400, "task_loss": 0.570992112159729 }, { "compression_loss": 0.0, "distillation_loss": 0.8260520696640015, "epoch": 2.68, "learning_rate": 2.668579612190271e-05, "loss": 0.5988, "step": 7410, "task_loss": 0.6632480621337891 }, { "compression_loss": 0.0, "distillation_loss": 0.5764724612236023, "epoch": 2.68, "learning_rate": 2.6618098981303204e-05, "loss": 0.6097, "step": 7420, "task_loss": 0.6332680583000183 }, { "compression_loss": 0.0, "distillation_loss": 0.5330133438110352, "epoch": 2.69, "learning_rate": 2.655041927892257e-05, "loss": 0.5412, "step": 7430, "task_loss": 0.6299136281013489 }, { "compression_loss": 0.0, "distillation_loss": 0.5107343196868896, "epoch": 2.69, "learning_rate": 2.6482757363740117e-05, "loss": 0.4791, "step": 7440, "task_loss": 0.7189974188804626 }, { "compression_loss": 0.0, "distillation_loss": 0.48826032876968384, "epoch": 2.69, "learning_rate": 2.6415113584643424e-05, "loss": 0.562, "step": 7450, "task_loss": 0.5355274677276611 }, { "compression_loss": 0.0, "distillation_loss": 1.131540060043335, "epoch": 2.7, "learning_rate": 2.6347488290426573e-05, "loss": 0.6699, "step": 7460, "task_loss": 0.9076927900314331 }, { "compression_loss": 0.0, "distillation_loss": 0.5723243355751038, "epoch": 2.7, "learning_rate": 2.627988182978831e-05, "loss": 0.5461, "step": 7470, "task_loss": 0.6198363304138184 }, { "compression_loss": 0.0, "distillation_loss": 0.4197266101837158, "epoch": 2.7, "learning_rate": 2.6212294551330293e-05, "loss": 0.5525, "step": 7480, "task_loss": 0.34966617822647095 }, { "compression_loss": 0.0, "distillation_loss": 0.46035024523735046, "epoch": 2.71, "learning_rate": 2.6144726803555232e-05, "loss": 0.6712, "step": 7490, "task_loss": 0.39124152064323425 }, { "compression_loss": 0.0, "distillation_loss": 0.46283459663391113, "epoch": 2.71, "learning_rate": 2.6077178934865193e-05, "loss": 0.5959, "step": 7500, "task_loss": 0.8845962882041931 }, { "epoch": 2.71, "eval_exact_match": 82.52601702932829, "eval_f1": 89.60594328334614, "step": 7500 }, { "compression_loss": 0.0, "distillation_loss": 0.5695556402206421, "epoch": 2.71, "learning_rate": 2.6009651293559663e-05, "loss": 0.5596, "step": 7510, "task_loss": 0.4987695515155792 }, { "compression_loss": 0.0, "distillation_loss": 0.6206524968147278, "epoch": 2.72, "learning_rate": 2.59421442278339e-05, "loss": 0.6555, "step": 7520, "task_loss": 0.6371747255325317 }, { "compression_loss": 0.0, "distillation_loss": 0.431548535823822, "epoch": 2.72, "learning_rate": 2.5874658085777014e-05, "loss": 0.535, "step": 7530, "task_loss": 0.4861009418964386 }, { "compression_loss": 0.0, "distillation_loss": 0.4274836778640747, "epoch": 2.72, "learning_rate": 2.580719321537026e-05, "loss": 0.5952, "step": 7540, "task_loss": 0.7763680219650269 }, { "compression_loss": 0.0, "distillation_loss": 0.43078041076660156, "epoch": 2.73, "learning_rate": 2.5739749964485183e-05, "loss": 0.6147, "step": 7550, "task_loss": 0.37037426233291626 }, { "compression_loss": 0.0, "distillation_loss": 0.5834618806838989, "epoch": 2.73, "learning_rate": 2.5672328680881876e-05, "loss": 0.6496, "step": 7560, "task_loss": 0.8014817833900452 }, { "compression_loss": 0.0, "distillation_loss": 0.8053905963897705, "epoch": 2.74, "learning_rate": 2.5604929712207137e-05, "loss": 0.6696, "step": 7570, "task_loss": 0.8470737934112549 }, { "compression_loss": 0.0, "distillation_loss": 0.7601429224014282, "epoch": 2.74, "learning_rate": 2.5537553405992723e-05, "loss": 0.5971, "step": 7580, "task_loss": 1.2324957847595215 }, { "compression_loss": 0.0, "distillation_loss": 0.46784117817878723, "epoch": 2.74, "learning_rate": 2.547020010965351e-05, "loss": 0.558, "step": 7590, "task_loss": 0.6156980991363525 }, { "compression_loss": 0.0, "distillation_loss": 0.8725177049636841, "epoch": 2.75, "learning_rate": 2.5402870170485775e-05, "loss": 0.6351, "step": 7600, "task_loss": 0.8051373958587646 }, { "compression_loss": 0.0, "distillation_loss": 0.5979294776916504, "epoch": 2.75, "learning_rate": 2.533556393566528e-05, "loss": 0.583, "step": 7610, "task_loss": 0.5045502781867981 }, { "compression_loss": 0.0, "distillation_loss": 0.422203004360199, "epoch": 2.75, "learning_rate": 2.5268281752245642e-05, "loss": 0.5823, "step": 7620, "task_loss": 0.6445773243904114 }, { "compression_loss": 0.0, "distillation_loss": 0.5774694085121155, "epoch": 2.76, "learning_rate": 2.520102396715641e-05, "loss": 0.6053, "step": 7630, "task_loss": 0.4244164824485779 }, { "compression_loss": 0.0, "distillation_loss": 0.4557744264602661, "epoch": 2.76, "learning_rate": 2.513379092720134e-05, "loss": 0.566, "step": 7640, "task_loss": 0.6151514053344727 }, { "compression_loss": 0.0, "distillation_loss": 0.5988390445709229, "epoch": 2.76, "learning_rate": 2.5066582979056587e-05, "loss": 0.5673, "step": 7650, "task_loss": 1.005041480064392 }, { "compression_loss": 0.0, "distillation_loss": 0.8742297887802124, "epoch": 2.77, "learning_rate": 2.4999400469268948e-05, "loss": 0.6041, "step": 7660, "task_loss": 0.8448403477668762 }, { "compression_loss": 0.0, "distillation_loss": 0.42271387577056885, "epoch": 2.77, "learning_rate": 2.493224374425402e-05, "loss": 0.5586, "step": 7670, "task_loss": 0.44839856028556824 }, { "compression_loss": 0.0, "distillation_loss": 0.3397362232208252, "epoch": 2.78, "learning_rate": 2.486511315029447e-05, "loss": 0.5687, "step": 7680, "task_loss": 0.346187949180603 }, { "compression_loss": 0.0, "distillation_loss": 0.3857961893081665, "epoch": 2.78, "learning_rate": 2.47980090335382e-05, "loss": 0.5529, "step": 7690, "task_loss": 0.4324136972427368 }, { "compression_loss": 0.0, "distillation_loss": 0.5957242250442505, "epoch": 2.78, "learning_rate": 2.4730931739996625e-05, "loss": 0.6139, "step": 7700, "task_loss": 0.9946978092193604 }, { "compression_loss": 0.0, "distillation_loss": 0.7779924869537354, "epoch": 2.79, "learning_rate": 2.4663881615542794e-05, "loss": 0.6516, "step": 7710, "task_loss": 0.8503498435020447 }, { "compression_loss": 0.0, "distillation_loss": 0.4511723816394806, "epoch": 2.79, "learning_rate": 2.4596859005909728e-05, "loss": 0.6176, "step": 7720, "task_loss": 0.6886883974075317 }, { "compression_loss": 0.0, "distillation_loss": 0.49279749393463135, "epoch": 2.79, "learning_rate": 2.4529864256688515e-05, "loss": 0.5525, "step": 7730, "task_loss": 0.6275307536125183 }, { "compression_loss": 0.0, "distillation_loss": 0.4928208589553833, "epoch": 2.8, "learning_rate": 2.4462897713326633e-05, "loss": 0.5676, "step": 7740, "task_loss": 0.784211277961731 }, { "compression_loss": 0.0, "distillation_loss": 0.6046674847602844, "epoch": 2.8, "learning_rate": 2.4395959721126073e-05, "loss": 0.5396, "step": 7750, "task_loss": 0.780253529548645 }, { "epoch": 2.8, "eval_exact_match": 82.2705771050142, "eval_f1": 89.53313416894864, "step": 7750 }, { "compression_loss": 0.0, "distillation_loss": 0.83915114402771, "epoch": 2.8, "learning_rate": 2.432905062524165e-05, "loss": 0.5805, "step": 7760, "task_loss": 1.0383943319320679 }, { "compression_loss": 0.0, "distillation_loss": 0.5021464228630066, "epoch": 2.81, "learning_rate": 2.426217077067916e-05, "loss": 0.544, "step": 7770, "task_loss": 0.6696529388427734 }, { "compression_loss": 0.0, "distillation_loss": 0.6492622494697571, "epoch": 2.81, "learning_rate": 2.419532050229361e-05, "loss": 0.6244, "step": 7780, "task_loss": 0.7537462115287781 }, { "compression_loss": 0.0, "distillation_loss": 0.5134307146072388, "epoch": 2.82, "learning_rate": 2.412850016478747e-05, "loss": 0.6005, "step": 7790, "task_loss": 0.8974807262420654 }, { "compression_loss": 0.0, "distillation_loss": 0.4806973338127136, "epoch": 2.82, "learning_rate": 2.4061710102708885e-05, "loss": 0.5571, "step": 7800, "task_loss": 0.3275541663169861 }, { "compression_loss": 0.0, "distillation_loss": 0.535984218120575, "epoch": 2.82, "learning_rate": 2.3994950660449844e-05, "loss": 0.542, "step": 7810, "task_loss": 0.9069480895996094 }, { "compression_loss": 0.0, "distillation_loss": 0.5129786133766174, "epoch": 2.83, "learning_rate": 2.3928222182244508e-05, "loss": 0.5683, "step": 7820, "task_loss": 0.3967326581478119 }, { "compression_loss": 0.0, "distillation_loss": 0.5826820135116577, "epoch": 2.83, "learning_rate": 2.3861525012167334e-05, "loss": 0.581, "step": 7830, "task_loss": 0.7916654348373413 }, { "compression_loss": 0.0, "distillation_loss": 0.8494963645935059, "epoch": 2.83, "learning_rate": 2.379485949413137e-05, "loss": 0.6048, "step": 7840, "task_loss": 1.0144283771514893 }, { "compression_loss": 0.0, "distillation_loss": 0.5546177625656128, "epoch": 2.84, "learning_rate": 2.3728225971886433e-05, "loss": 0.581, "step": 7850, "task_loss": 0.45552924275398254 }, { "compression_loss": 0.0, "distillation_loss": 0.581791877746582, "epoch": 2.84, "learning_rate": 2.366162478901738e-05, "loss": 0.5915, "step": 7860, "task_loss": 0.5242971777915955 }, { "compression_loss": 0.0, "distillation_loss": 0.4776926040649414, "epoch": 2.84, "learning_rate": 2.359505628894229e-05, "loss": 0.5925, "step": 7870, "task_loss": 0.7510772943496704 }, { "compression_loss": 0.0, "distillation_loss": 0.4774831235408783, "epoch": 2.85, "learning_rate": 2.3528520814910756e-05, "loss": 0.612, "step": 7880, "task_loss": 0.41770559549331665 }, { "compression_loss": 0.0, "distillation_loss": 0.5438613891601562, "epoch": 2.85, "learning_rate": 2.346201871000203e-05, "loss": 0.5752, "step": 7890, "task_loss": 0.6803189516067505 }, { "compression_loss": 0.0, "distillation_loss": 0.6210540533065796, "epoch": 2.86, "learning_rate": 2.3395550317123355e-05, "loss": 0.5516, "step": 7900, "task_loss": 0.5259459614753723 }, { "compression_loss": 0.0, "distillation_loss": 0.7965078353881836, "epoch": 2.86, "learning_rate": 2.3329115979008098e-05, "loss": 0.6016, "step": 7910, "task_loss": 0.9792982935905457 }, { "compression_loss": 0.0, "distillation_loss": 0.7713618278503418, "epoch": 2.86, "learning_rate": 2.3262716038214055e-05, "loss": 0.5892, "step": 7920, "task_loss": 0.8038817048072815 }, { "compression_loss": 0.0, "distillation_loss": 0.6315901875495911, "epoch": 2.87, "learning_rate": 2.319635083712164e-05, "loss": 0.5492, "step": 7930, "task_loss": 0.800554096698761 }, { "compression_loss": 0.0, "distillation_loss": 0.7543363571166992, "epoch": 2.87, "learning_rate": 2.3130020717932155e-05, "loss": 0.5724, "step": 7940, "task_loss": 0.6426233649253845 }, { "compression_loss": 0.0, "distillation_loss": 0.7032104134559631, "epoch": 2.87, "learning_rate": 2.306372602266599e-05, "loss": 0.6558, "step": 7950, "task_loss": 0.6244052648544312 }, { "compression_loss": 0.0, "distillation_loss": 0.8306782245635986, "epoch": 2.88, "learning_rate": 2.29974670931609e-05, "loss": 0.6546, "step": 7960, "task_loss": 0.9088537096977234 }, { "compression_loss": 0.0, "distillation_loss": 0.951589822769165, "epoch": 2.88, "learning_rate": 2.2931244271070183e-05, "loss": 0.6349, "step": 7970, "task_loss": 1.1932218074798584 }, { "compression_loss": 0.0, "distillation_loss": 0.524719774723053, "epoch": 2.88, "learning_rate": 2.2865057897861014e-05, "loss": 0.5249, "step": 7980, "task_loss": 0.6670576333999634 }, { "compression_loss": 0.0, "distillation_loss": 0.5125932693481445, "epoch": 2.89, "learning_rate": 2.2798908314812546e-05, "loss": 0.5753, "step": 7990, "task_loss": 0.8458143472671509 }, { "compression_loss": 0.0, "distillation_loss": 0.6288058161735535, "epoch": 2.89, "learning_rate": 2.273279586301431e-05, "loss": 0.5709, "step": 8000, "task_loss": 0.5643374919891357 }, { "epoch": 2.89, "eval_exact_match": 82.39356669820246, "eval_f1": 89.43276664538762, "step": 8000 }, { "compression_loss": 0.0, "distillation_loss": 0.42751121520996094, "epoch": 2.89, "learning_rate": 2.2666720883364317e-05, "loss": 0.4948, "step": 8010, "task_loss": 0.41463011503219604 }, { "compression_loss": 0.0, "distillation_loss": 0.6234784126281738, "epoch": 2.9, "learning_rate": 2.26006837165674e-05, "loss": 0.5213, "step": 8020, "task_loss": 0.8985073566436768 }, { "compression_loss": 0.0, "distillation_loss": 0.35889315605163574, "epoch": 2.9, "learning_rate": 2.2534684703133375e-05, "loss": 0.593, "step": 8030, "task_loss": 0.40073755383491516 }, { "compression_loss": 0.0, "distillation_loss": 0.3940616250038147, "epoch": 2.91, "learning_rate": 2.2468724183375365e-05, "loss": 0.608, "step": 8040, "task_loss": 0.7963597774505615 }, { "compression_loss": 0.0, "distillation_loss": 0.5547673106193542, "epoch": 2.91, "learning_rate": 2.240280249740798e-05, "loss": 0.5817, "step": 8050, "task_loss": 0.8905491828918457 }, { "compression_loss": 0.0, "distillation_loss": 0.5244134664535522, "epoch": 2.91, "learning_rate": 2.233691998514561e-05, "loss": 0.5501, "step": 8060, "task_loss": 0.6890424489974976 }, { "compression_loss": 0.0, "distillation_loss": 0.5362120270729065, "epoch": 2.92, "learning_rate": 2.2271076986300626e-05, "loss": 0.5356, "step": 8070, "task_loss": 0.6972655057907104 }, { "compression_loss": 0.0, "distillation_loss": 0.5454753637313843, "epoch": 2.92, "learning_rate": 2.2205273840381697e-05, "loss": 0.5795, "step": 8080, "task_loss": 0.44935011863708496 }, { "compression_loss": 0.0, "distillation_loss": 0.5678622722625732, "epoch": 2.92, "learning_rate": 2.213951088669194e-05, "loss": 0.5637, "step": 8090, "task_loss": 0.5042102336883545 }, { "compression_loss": 0.0, "distillation_loss": 0.8571547269821167, "epoch": 2.93, "learning_rate": 2.2073788464327286e-05, "loss": 0.6529, "step": 8100, "task_loss": 0.9279406666755676 }, { "compression_loss": 0.0, "distillation_loss": 0.4891819357872009, "epoch": 2.93, "learning_rate": 2.2008106912174626e-05, "loss": 0.5716, "step": 8110, "task_loss": 0.863795280456543 }, { "compression_loss": 0.0, "distillation_loss": 0.9461628198623657, "epoch": 2.93, "learning_rate": 2.1942466568910142e-05, "loss": 0.6349, "step": 8120, "task_loss": 0.8306816220283508 }, { "compression_loss": 0.0, "distillation_loss": 0.49649107456207275, "epoch": 2.94, "learning_rate": 2.18768677729975e-05, "loss": 0.6054, "step": 8130, "task_loss": 0.5083526372909546 }, { "compression_loss": 0.0, "distillation_loss": 0.6678476929664612, "epoch": 2.94, "learning_rate": 2.1811310862686177e-05, "loss": 0.5735, "step": 8140, "task_loss": 0.7546992301940918 }, { "compression_loss": 0.0, "distillation_loss": 0.657577633857727, "epoch": 2.95, "learning_rate": 2.1745796176009618e-05, "loss": 0.632, "step": 8150, "task_loss": 0.7159422636032104 }, { "compression_loss": 0.0, "distillation_loss": 0.6389050483703613, "epoch": 2.95, "learning_rate": 2.1680324050783598e-05, "loss": 0.6101, "step": 8160, "task_loss": 0.4867662787437439 }, { "compression_loss": 0.0, "distillation_loss": 0.3546678125858307, "epoch": 2.95, "learning_rate": 2.161489482460439e-05, "loss": 0.5069, "step": 8170, "task_loss": 0.4260442852973938 }, { "compression_loss": 0.0, "distillation_loss": 0.49699175357818604, "epoch": 2.96, "learning_rate": 2.1549508834847124e-05, "loss": 0.5537, "step": 8180, "task_loss": 0.6937697529792786 }, { "compression_loss": 0.0, "distillation_loss": 0.6141911149024963, "epoch": 2.96, "learning_rate": 2.1484166418663904e-05, "loss": 0.5792, "step": 8190, "task_loss": 0.7282106876373291 }, { "compression_loss": 0.0, "distillation_loss": 0.7464488744735718, "epoch": 2.96, "learning_rate": 2.1418867912982233e-05, "loss": 0.6254, "step": 8200, "task_loss": 0.807386040687561 }, { "compression_loss": 0.0, "distillation_loss": 0.712665855884552, "epoch": 2.97, "learning_rate": 2.135361365450315e-05, "loss": 0.6006, "step": 8210, "task_loss": 1.1894166469573975 }, { "compression_loss": 0.0, "distillation_loss": 0.6086498498916626, "epoch": 2.97, "learning_rate": 2.1288403979699563e-05, "loss": 0.5498, "step": 8220, "task_loss": 0.7981095910072327 }, { "compression_loss": 0.0, "distillation_loss": 0.646178126335144, "epoch": 2.97, "learning_rate": 2.122323922481447e-05, "loss": 0.5831, "step": 8230, "task_loss": 0.7211898565292358 }, { "compression_loss": 0.0, "distillation_loss": 0.522659420967102, "epoch": 2.98, "learning_rate": 2.1158119725859282e-05, "loss": 0.5215, "step": 8240, "task_loss": 0.5190514326095581 }, { "compression_loss": 0.0, "distillation_loss": 0.5700294971466064, "epoch": 2.98, "learning_rate": 2.1093045818612006e-05, "loss": 0.5412, "step": 8250, "task_loss": 0.9010043144226074 }, { "epoch": 2.98, "eval_exact_match": 82.32734153263955, "eval_f1": 89.52267203340489, "step": 8250 }, { "compression_loss": 0.0, "distillation_loss": 0.5628690123558044, "epoch": 2.99, "learning_rate": 2.102801783861561e-05, "loss": 0.5405, "step": 8260, "task_loss": 0.7926698327064514 }, { "compression_loss": 0.0, "distillation_loss": 0.41890618205070496, "epoch": 2.99, "learning_rate": 2.0963036121176206e-05, "loss": 0.5562, "step": 8270, "task_loss": 0.4853794574737549 }, { "compression_loss": 0.0, "distillation_loss": 0.6962077021598816, "epoch": 2.99, "learning_rate": 2.0898101001361418e-05, "loss": 0.5628, "step": 8280, "task_loss": 0.6412039995193481 }, { "compression_loss": 0.0, "distillation_loss": 0.629459023475647, "epoch": 3.0, "learning_rate": 2.083321281399851e-05, "loss": 0.6054, "step": 8290, "task_loss": 0.4869459867477417 }, { "compression_loss": 0.0, "distillation_loss": 0.5349444150924683, "epoch": 3.0, "learning_rate": 2.0768371893672834e-05, "loss": 0.5765, "step": 8300, "task_loss": 0.7212353944778442 }, { "compression_loss": 0.0, "distillation_loss": 0.4701830744743347, "epoch": 3.0, "learning_rate": 2.0703578574725962e-05, "loss": 0.4958, "step": 8310, "task_loss": 0.6365481615066528 }, { "compression_loss": 0.0, "distillation_loss": 0.37688684463500977, "epoch": 3.01, "learning_rate": 2.0638833191254032e-05, "loss": 0.5041, "step": 8320, "task_loss": 0.551572859287262 }, { "compression_loss": 0.0, "distillation_loss": 0.5466549396514893, "epoch": 3.01, "learning_rate": 2.0574136077106028e-05, "loss": 0.5433, "step": 8330, "task_loss": 0.3961504399776459 }, { "compression_loss": 0.0, "distillation_loss": 0.522490382194519, "epoch": 3.01, "learning_rate": 2.0509487565882006e-05, "loss": 0.5166, "step": 8340, "task_loss": 0.9316266775131226 }, { "compression_loss": 0.0, "distillation_loss": 0.549529492855072, "epoch": 3.02, "learning_rate": 2.0444887990931454e-05, "loss": 0.5098, "step": 8350, "task_loss": 0.7794489860534668 }, { "compression_loss": 0.0, "distillation_loss": 0.4621466398239136, "epoch": 3.02, "learning_rate": 2.038033768535147e-05, "loss": 0.4768, "step": 8360, "task_loss": 0.4732331335544586 }, { "compression_loss": 0.0, "distillation_loss": 0.6309295892715454, "epoch": 3.02, "learning_rate": 2.0315836981985175e-05, "loss": 0.5065, "step": 8370, "task_loss": 1.0547935962677002 }, { "compression_loss": 0.0, "distillation_loss": 0.31558752059936523, "epoch": 3.03, "learning_rate": 2.025138621341985e-05, "loss": 0.5306, "step": 8380, "task_loss": 0.29626554250717163 }, { "compression_loss": 0.0, "distillation_loss": 0.6012607216835022, "epoch": 3.03, "learning_rate": 2.0186985711985356e-05, "loss": 0.502, "step": 8390, "task_loss": 1.0542261600494385 }, { "compression_loss": 0.0, "distillation_loss": 0.427781879901886, "epoch": 3.04, "learning_rate": 2.012263580975231e-05, "loss": 0.4638, "step": 8400, "task_loss": 0.7197548747062683 }, { "compression_loss": 0.0, "distillation_loss": 0.5939445495605469, "epoch": 3.04, "learning_rate": 2.005833683853047e-05, "loss": 0.4944, "step": 8410, "task_loss": 0.7222771048545837 }, { "compression_loss": 0.0, "distillation_loss": 0.603698194026947, "epoch": 3.04, "learning_rate": 1.999408912986694e-05, "loss": 0.5148, "step": 8420, "task_loss": 0.7187478542327881 }, { "compression_loss": 0.0, "distillation_loss": 0.3711569011211395, "epoch": 3.05, "learning_rate": 1.9929893015044516e-05, "loss": 0.4703, "step": 8430, "task_loss": 0.48197200894355774 }, { "compression_loss": 0.0, "distillation_loss": 0.3467344641685486, "epoch": 3.05, "learning_rate": 1.986574882507994e-05, "loss": 0.5674, "step": 8440, "task_loss": 0.2590097486972809 }, { "compression_loss": 0.0, "distillation_loss": 0.38601693511009216, "epoch": 3.05, "learning_rate": 1.9801656890722253e-05, "loss": 0.4763, "step": 8450, "task_loss": 0.44248801469802856 }, { "compression_loss": 0.0, "distillation_loss": 0.3432322144508362, "epoch": 3.06, "learning_rate": 1.973761754245098e-05, "loss": 0.4624, "step": 8460, "task_loss": 0.3968687653541565 }, { "compression_loss": 0.0, "distillation_loss": 0.37857741117477417, "epoch": 3.06, "learning_rate": 1.9673631110474576e-05, "loss": 0.4573, "step": 8470, "task_loss": 0.5128192901611328 }, { "compression_loss": 0.0, "distillation_loss": 0.6123422980308533, "epoch": 3.06, "learning_rate": 1.9609697924728577e-05, "loss": 0.4654, "step": 8480, "task_loss": 0.9716970324516296 }, { "compression_loss": 0.0, "distillation_loss": 0.6166114807128906, "epoch": 3.07, "learning_rate": 1.9545818314873996e-05, "loss": 0.5385, "step": 8490, "task_loss": 0.4100545644760132 }, { "compression_loss": 0.0, "distillation_loss": 0.5452293157577515, "epoch": 3.07, "learning_rate": 1.9481992610295572e-05, "loss": 0.5531, "step": 8500, "task_loss": 0.3822357654571533 }, { "epoch": 3.07, "eval_exact_match": 82.11920529801324, "eval_f1": 89.28049962544765, "step": 8500 }, { "compression_loss": 0.0, "distillation_loss": 0.7301047444343567, "epoch": 3.08, "learning_rate": 1.941822114010011e-05, "loss": 0.486, "step": 8510, "task_loss": 0.9077910780906677 }, { "compression_loss": 0.0, "distillation_loss": 0.7157807350158691, "epoch": 3.08, "learning_rate": 1.9354504233114756e-05, "loss": 0.5203, "step": 8520, "task_loss": 0.7050743103027344 }, { "compression_loss": 0.0, "distillation_loss": 0.5539354681968689, "epoch": 3.08, "learning_rate": 1.92908422178853e-05, "loss": 0.4978, "step": 8530, "task_loss": 0.8747463226318359 }, { "compression_loss": 0.0, "distillation_loss": 0.3879743814468384, "epoch": 3.09, "learning_rate": 1.9227235422674496e-05, "loss": 0.4905, "step": 8540, "task_loss": 0.4578686058521271 }, { "compression_loss": 0.0, "distillation_loss": 0.47939151525497437, "epoch": 3.09, "learning_rate": 1.9163684175460395e-05, "loss": 0.5013, "step": 8550, "task_loss": 0.5444331765174866 }, { "compression_loss": 0.0, "distillation_loss": 0.6490421891212463, "epoch": 3.09, "learning_rate": 1.9100188803934563e-05, "loss": 0.5392, "step": 8560, "task_loss": 0.6370527744293213 }, { "compression_loss": 0.0, "distillation_loss": 0.5446510314941406, "epoch": 3.1, "learning_rate": 1.9036749635500517e-05, "loss": 0.518, "step": 8570, "task_loss": 0.7046805620193481 }, { "compression_loss": 0.0, "distillation_loss": 0.5223091840744019, "epoch": 3.1, "learning_rate": 1.897336699727192e-05, "loss": 0.5085, "step": 8580, "task_loss": 0.8283286094665527 }, { "compression_loss": 0.0, "distillation_loss": 0.5764731764793396, "epoch": 3.1, "learning_rate": 1.8910041216070985e-05, "loss": 0.5382, "step": 8590, "task_loss": 0.5241403579711914 }, { "compression_loss": 0.0, "distillation_loss": 0.44843530654907227, "epoch": 3.11, "learning_rate": 1.884677261842672e-05, "loss": 0.5134, "step": 8600, "task_loss": 0.47629907727241516 }, { "compression_loss": 0.0, "distillation_loss": 0.5873540639877319, "epoch": 3.11, "learning_rate": 1.8783561530573297e-05, "loss": 0.4634, "step": 8610, "task_loss": 0.49033936858177185 }, { "compression_loss": 0.0, "distillation_loss": 0.5004246234893799, "epoch": 3.12, "learning_rate": 1.8720408278448332e-05, "loss": 0.4896, "step": 8620, "task_loss": 0.4907362759113312 }, { "compression_loss": 0.0, "distillation_loss": 0.3840811252593994, "epoch": 3.12, "learning_rate": 1.8657313187691257e-05, "loss": 0.5091, "step": 8630, "task_loss": 0.6038985252380371 }, { "compression_loss": 0.0, "distillation_loss": 0.3785248398780823, "epoch": 3.12, "learning_rate": 1.8594276583641534e-05, "loss": 0.52, "step": 8640, "task_loss": 0.36846432089805603 }, { "compression_loss": 0.0, "distillation_loss": 0.5661005973815918, "epoch": 3.13, "learning_rate": 1.8531298791337127e-05, "loss": 0.5173, "step": 8650, "task_loss": 0.5859448909759521 }, { "compression_loss": 0.0, "distillation_loss": 0.5267096757888794, "epoch": 3.13, "learning_rate": 1.8468380135512688e-05, "loss": 0.4774, "step": 8660, "task_loss": 0.985637903213501 }, { "compression_loss": 0.0, "distillation_loss": 0.5141612887382507, "epoch": 3.13, "learning_rate": 1.8405520940597985e-05, "loss": 0.5026, "step": 8670, "task_loss": 0.5452569723129272 }, { "compression_loss": 0.0, "distillation_loss": 0.4393519163131714, "epoch": 3.14, "learning_rate": 1.834272153071614e-05, "loss": 0.488, "step": 8680, "task_loss": 0.5240843892097473 }, { "compression_loss": 0.0, "distillation_loss": 0.4073109030723572, "epoch": 3.14, "learning_rate": 1.827998222968205e-05, "loss": 0.4844, "step": 8690, "task_loss": 0.4562574625015259 }, { "compression_loss": 0.0, "distillation_loss": 0.4827924966812134, "epoch": 3.14, "learning_rate": 1.8217303361000625e-05, "loss": 0.5465, "step": 8700, "task_loss": 0.5933099985122681 }, { "compression_loss": 0.0, "distillation_loss": 0.5707128047943115, "epoch": 3.15, "learning_rate": 1.8154685247865215e-05, "loss": 0.4999, "step": 8710, "task_loss": 0.6060525178909302 }, { "compression_loss": 0.0, "distillation_loss": 0.4365973472595215, "epoch": 3.15, "learning_rate": 1.809212821315584e-05, "loss": 0.4649, "step": 8720, "task_loss": 0.8164606094360352 }, { "compression_loss": 0.0, "distillation_loss": 0.3885442614555359, "epoch": 3.16, "learning_rate": 1.802963257943764e-05, "loss": 0.4712, "step": 8730, "task_loss": 0.46396589279174805 }, { "compression_loss": 0.0, "distillation_loss": 0.5628999471664429, "epoch": 3.16, "learning_rate": 1.7967198668959077e-05, "loss": 0.4756, "step": 8740, "task_loss": 0.540549635887146 }, { "compression_loss": 0.0, "distillation_loss": 0.6016356945037842, "epoch": 3.16, "learning_rate": 1.7904826803650426e-05, "loss": 0.5062, "step": 8750, "task_loss": 0.46754416823387146 }, { "epoch": 3.16, "eval_exact_match": 82.5449385052034, "eval_f1": 89.74640245653393, "step": 8750 }, { "compression_loss": 0.0, "distillation_loss": 0.6363502740859985, "epoch": 3.17, "learning_rate": 1.7842517305121973e-05, "loss": 0.5155, "step": 8760, "task_loss": 0.5670214891433716 }, { "compression_loss": 0.0, "distillation_loss": 0.6069682836532593, "epoch": 3.17, "learning_rate": 1.778027049466246e-05, "loss": 0.4794, "step": 8770, "task_loss": 0.5283325910568237 }, { "compression_loss": 0.0, "distillation_loss": 0.5104897022247314, "epoch": 3.17, "learning_rate": 1.7718086693237365e-05, "loss": 0.4731, "step": 8780, "task_loss": 0.604925274848938 }, { "compression_loss": 0.0, "distillation_loss": 0.5205997228622437, "epoch": 3.18, "learning_rate": 1.7655966221487286e-05, "loss": 0.5561, "step": 8790, "task_loss": 0.7673854231834412 }, { "compression_loss": 0.0, "distillation_loss": 0.43104785680770874, "epoch": 3.18, "learning_rate": 1.759390939972625e-05, "loss": 0.4787, "step": 8800, "task_loss": 0.4828697443008423 }, { "compression_loss": 0.0, "distillation_loss": 0.39554059505462646, "epoch": 3.18, "learning_rate": 1.753191654794012e-05, "loss": 0.4671, "step": 8810, "task_loss": 0.33180779218673706 }, { "compression_loss": 0.0, "distillation_loss": 0.5428735613822937, "epoch": 3.19, "learning_rate": 1.7469987985784853e-05, "loss": 0.4931, "step": 8820, "task_loss": 1.0688862800598145 }, { "compression_loss": 0.0, "distillation_loss": 0.40573301911354065, "epoch": 3.19, "learning_rate": 1.7408124032584977e-05, "loss": 0.5403, "step": 8830, "task_loss": 0.7760818004608154 }, { "compression_loss": 0.0, "distillation_loss": 0.39982184767723083, "epoch": 3.19, "learning_rate": 1.7346325007331814e-05, "loss": 0.4537, "step": 8840, "task_loss": 0.48054105043411255 }, { "compression_loss": 0.0, "distillation_loss": 0.5107048153877258, "epoch": 3.2, "learning_rate": 1.7284591228681924e-05, "loss": 0.4919, "step": 8850, "task_loss": 0.7338130474090576 }, { "compression_loss": 0.0, "distillation_loss": 0.28509578108787537, "epoch": 3.2, "learning_rate": 1.722292301495543e-05, "loss": 0.4804, "step": 8860, "task_loss": 0.32352930307388306 }, { "compression_loss": 0.0, "distillation_loss": 0.46888577938079834, "epoch": 3.21, "learning_rate": 1.716132068413439e-05, "loss": 0.5633, "step": 8870, "task_loss": 0.8163585662841797 }, { "compression_loss": 0.0, "distillation_loss": 0.4140506684780121, "epoch": 3.21, "learning_rate": 1.70997845538611e-05, "loss": 0.5328, "step": 8880, "task_loss": 0.5608940124511719 }, { "compression_loss": 0.0, "distillation_loss": 0.4530109763145447, "epoch": 3.21, "learning_rate": 1.7038314941436593e-05, "loss": 0.5011, "step": 8890, "task_loss": 0.6756296157836914 }, { "compression_loss": 0.0, "distillation_loss": 0.45567673444747925, "epoch": 3.22, "learning_rate": 1.697691216381882e-05, "loss": 0.51, "step": 8900, "task_loss": 0.7713099122047424 }, { "compression_loss": 0.0, "distillation_loss": 0.602544903755188, "epoch": 3.22, "learning_rate": 1.691557653762117e-05, "loss": 0.5366, "step": 8910, "task_loss": 0.8172224164009094 }, { "compression_loss": 0.0, "distillation_loss": 0.6989117860794067, "epoch": 3.22, "learning_rate": 1.685430837911074e-05, "loss": 0.5546, "step": 8920, "task_loss": 0.6363785862922668 }, { "compression_loss": 0.0, "distillation_loss": 0.4007064700126648, "epoch": 3.23, "learning_rate": 1.6793108004206788e-05, "loss": 0.4931, "step": 8930, "task_loss": 0.4201916456222534 }, { "compression_loss": 0.0, "distillation_loss": 0.4066169261932373, "epoch": 3.23, "learning_rate": 1.6731975728478994e-05, "loss": 0.4984, "step": 8940, "task_loss": 0.5423001050949097 }, { "compression_loss": 0.0, "distillation_loss": 0.5652238130569458, "epoch": 3.23, "learning_rate": 1.667091186714594e-05, "loss": 0.5475, "step": 8950, "task_loss": 0.619628369808197 }, { "compression_loss": 0.0, "distillation_loss": 0.5001813769340515, "epoch": 3.24, "learning_rate": 1.6609916735073432e-05, "loss": 0.4621, "step": 8960, "task_loss": 0.90772545337677 }, { "compression_loss": 0.0, "distillation_loss": 0.3538113236427307, "epoch": 3.24, "learning_rate": 1.654899064677289e-05, "loss": 0.5043, "step": 8970, "task_loss": 0.443660706281662 }, { "compression_loss": 0.0, "distillation_loss": 0.48391348123550415, "epoch": 3.25, "learning_rate": 1.648813391639968e-05, "loss": 0.5245, "step": 8980, "task_loss": 0.739136815071106 }, { "compression_loss": 0.0, "distillation_loss": 0.45259201526641846, "epoch": 3.25, "learning_rate": 1.6427346857751618e-05, "loss": 0.4996, "step": 8990, "task_loss": 0.3973962962627411 }, { "compression_loss": 0.0, "distillation_loss": 0.4466731548309326, "epoch": 3.25, "learning_rate": 1.6366629784267172e-05, "loss": 0.4757, "step": 9000, "task_loss": 0.7052876949310303 }, { "epoch": 3.25, "eval_exact_match": 82.45979186376537, "eval_f1": 89.55562803988978, "step": 9000 }, { "compression_loss": 0.0, "distillation_loss": 0.5209038257598877, "epoch": 3.26, "learning_rate": 1.6305983009024018e-05, "loss": 0.4895, "step": 9010, "task_loss": 0.9570509195327759 }, { "compression_loss": 0.0, "distillation_loss": 0.5868513584136963, "epoch": 3.26, "learning_rate": 1.624540684473731e-05, "loss": 0.5143, "step": 9020, "task_loss": 0.8736361861228943 }, { "compression_loss": 0.0, "distillation_loss": 0.4569542706012726, "epoch": 3.26, "learning_rate": 1.6184901603758137e-05, "loss": 0.4714, "step": 9030, "task_loss": 0.41086018085479736 }, { "compression_loss": 0.0, "distillation_loss": 0.3990710973739624, "epoch": 3.27, "learning_rate": 1.612446759807184e-05, "loss": 0.47, "step": 9040, "task_loss": 0.8022845387458801 }, { "compression_loss": 0.0, "distillation_loss": 0.5180515646934509, "epoch": 3.27, "learning_rate": 1.606410513929649e-05, "loss": 0.5147, "step": 9050, "task_loss": 0.989239513874054 }, { "compression_loss": 0.0, "distillation_loss": 0.41727307438850403, "epoch": 3.27, "learning_rate": 1.600381453868123e-05, "loss": 0.51, "step": 9060, "task_loss": 0.6814265251159668 }, { "compression_loss": 0.0, "distillation_loss": 0.6537793278694153, "epoch": 3.28, "learning_rate": 1.594359610710467e-05, "loss": 0.4618, "step": 9070, "task_loss": 1.0158696174621582 }, { "compression_loss": 0.0, "distillation_loss": 0.31821686029434204, "epoch": 3.28, "learning_rate": 1.5883450155073262e-05, "loss": 0.4612, "step": 9080, "task_loss": 0.2660524845123291 }, { "compression_loss": 0.0, "distillation_loss": 0.5743929147720337, "epoch": 3.29, "learning_rate": 1.5823376992719805e-05, "loss": 0.5242, "step": 9090, "task_loss": 0.4032166600227356 }, { "compression_loss": 0.0, "distillation_loss": 0.42509496212005615, "epoch": 3.29, "learning_rate": 1.5763376929801693e-05, "loss": 0.5119, "step": 9100, "task_loss": 0.5546834468841553 }, { "compression_loss": 0.0, "distillation_loss": 0.3368126153945923, "epoch": 3.29, "learning_rate": 1.5703450275699436e-05, "loss": 0.5213, "step": 9110, "task_loss": 0.5160356163978577 }, { "compression_loss": 0.0, "distillation_loss": 0.46083498001098633, "epoch": 3.3, "learning_rate": 1.5643597339415015e-05, "loss": 0.5059, "step": 9120, "task_loss": 0.8579056262969971 }, { "compression_loss": 0.0, "distillation_loss": 0.43022578954696655, "epoch": 3.3, "learning_rate": 1.5583818429570307e-05, "loss": 0.5503, "step": 9130, "task_loss": 0.37180399894714355 }, { "compression_loss": 0.0, "distillation_loss": 0.4419669508934021, "epoch": 3.3, "learning_rate": 1.5524113854405446e-05, "loss": 0.5065, "step": 9140, "task_loss": 0.5681513547897339 }, { "compression_loss": 0.0, "distillation_loss": 0.4496462941169739, "epoch": 3.31, "learning_rate": 1.546448392177732e-05, "loss": 0.5143, "step": 9150, "task_loss": 0.5917080640792847 }, { "compression_loss": 0.0, "distillation_loss": 0.4904141128063202, "epoch": 3.31, "learning_rate": 1.54049289391579e-05, "loss": 0.5138, "step": 9160, "task_loss": 0.5543509721755981 }, { "compression_loss": 0.0, "distillation_loss": 0.44269993901252747, "epoch": 3.31, "learning_rate": 1.5345449213632725e-05, "loss": 0.5018, "step": 9170, "task_loss": 0.5917487144470215 }, { "compression_loss": 0.0, "distillation_loss": 0.4732619524002075, "epoch": 3.32, "learning_rate": 1.528604505189922e-05, "loss": 0.4905, "step": 9180, "task_loss": 0.40163156390190125 }, { "compression_loss": 0.0, "distillation_loss": 0.5729936361312866, "epoch": 3.32, "learning_rate": 1.5226716760265265e-05, "loss": 0.4991, "step": 9190, "task_loss": 0.8118762969970703 }, { "compression_loss": 0.0, "distillation_loss": 0.4613145589828491, "epoch": 3.32, "learning_rate": 1.5167464644647453e-05, "loss": 0.4727, "step": 9200, "task_loss": 0.69233238697052 }, { "compression_loss": 0.0, "distillation_loss": 0.6504011154174805, "epoch": 3.33, "learning_rate": 1.5108289010569618e-05, "loss": 0.5479, "step": 9210, "task_loss": 0.8994011282920837 }, { "compression_loss": 0.0, "distillation_loss": 0.6478760242462158, "epoch": 3.33, "learning_rate": 1.504919016316124e-05, "loss": 0.475, "step": 9220, "task_loss": 0.603376030921936 }, { "compression_loss": 0.0, "distillation_loss": 0.7874225974082947, "epoch": 3.34, "learning_rate": 1.4990168407155853e-05, "loss": 0.4952, "step": 9230, "task_loss": 0.5990192890167236 }, { "compression_loss": 0.0, "distillation_loss": 0.34686243534088135, "epoch": 3.34, "learning_rate": 1.4931224046889456e-05, "loss": 0.4821, "step": 9240, "task_loss": 0.4154958128929138 }, { "compression_loss": 0.0, "distillation_loss": 0.4504637122154236, "epoch": 3.34, "learning_rate": 1.4872357386299005e-05, "loss": 0.5222, "step": 9250, "task_loss": 0.7193425297737122 }, { "epoch": 3.34, "eval_exact_match": 81.93945127719962, "eval_f1": 89.31012659211146, "step": 9250 }, { "compression_loss": 0.0, "distillation_loss": 0.632270872592926, "epoch": 3.35, "learning_rate": 1.4813568728920795e-05, "loss": 0.5471, "step": 9260, "task_loss": 0.6672326326370239 }, { "compression_loss": 0.0, "distillation_loss": 0.453116774559021, "epoch": 3.35, "learning_rate": 1.4754858377888926e-05, "loss": 0.5107, "step": 9270, "task_loss": 0.31693387031555176 }, { "compression_loss": 0.0, "distillation_loss": 0.5828425288200378, "epoch": 3.35, "learning_rate": 1.4696226635933674e-05, "loss": 0.4849, "step": 9280, "task_loss": 0.9231215119361877 }, { "compression_loss": 0.0, "distillation_loss": 0.4946627616882324, "epoch": 3.36, "learning_rate": 1.4637673805380065e-05, "loss": 0.5012, "step": 9290, "task_loss": 0.6342258453369141 }, { "compression_loss": 0.0, "distillation_loss": 0.41230204701423645, "epoch": 3.36, "learning_rate": 1.4579200188146144e-05, "loss": 0.482, "step": 9300, "task_loss": 0.5047462582588196 }, { "compression_loss": 0.0, "distillation_loss": 0.5421258807182312, "epoch": 3.36, "learning_rate": 1.4520806085741555e-05, "loss": 0.5333, "step": 9310, "task_loss": 0.9924592971801758 }, { "compression_loss": 0.0, "distillation_loss": 0.4269621968269348, "epoch": 3.37, "learning_rate": 1.4462491799265932e-05, "loss": 0.4559, "step": 9320, "task_loss": 0.7256890535354614 }, { "compression_loss": 0.0, "distillation_loss": 0.5573655366897583, "epoch": 3.37, "learning_rate": 1.4404257629407353e-05, "loss": 0.5239, "step": 9330, "task_loss": 0.46378791332244873 }, { "compression_loss": 0.0, "distillation_loss": 0.3567560911178589, "epoch": 3.38, "learning_rate": 1.4346103876440755e-05, "loss": 0.5374, "step": 9340, "task_loss": 0.4994860887527466 }, { "compression_loss": 0.0, "distillation_loss": 0.45855897665023804, "epoch": 3.38, "learning_rate": 1.4288030840226461e-05, "loss": 0.5311, "step": 9350, "task_loss": 0.6456738710403442 }, { "compression_loss": 0.0, "distillation_loss": 0.39899706840515137, "epoch": 3.38, "learning_rate": 1.4230038820208569e-05, "loss": 0.4944, "step": 9360, "task_loss": 0.5475515723228455 }, { "compression_loss": 0.0, "distillation_loss": 0.4496174454689026, "epoch": 3.39, "learning_rate": 1.4172128115413446e-05, "loss": 0.4966, "step": 9370, "task_loss": 0.5722992420196533 }, { "compression_loss": 0.0, "distillation_loss": 0.48511573672294617, "epoch": 3.39, "learning_rate": 1.4114299024448127e-05, "loss": 0.4852, "step": 9380, "task_loss": 0.5806999206542969 }, { "compression_loss": 0.0, "distillation_loss": 0.3434864580631256, "epoch": 3.39, "learning_rate": 1.4056551845498899e-05, "loss": 0.473, "step": 9390, "task_loss": 0.28214818239212036 }, { "compression_loss": 0.0, "distillation_loss": 0.520751953125, "epoch": 3.4, "learning_rate": 1.3998886876329597e-05, "loss": 0.4578, "step": 9400, "task_loss": 0.6089061498641968 }, { "compression_loss": 0.0, "distillation_loss": 0.49395525455474854, "epoch": 3.4, "learning_rate": 1.3941304414280207e-05, "loss": 0.461, "step": 9410, "task_loss": 0.9712688326835632 }, { "compression_loss": 0.0, "distillation_loss": 0.33728528022766113, "epoch": 3.4, "learning_rate": 1.3883804756265264e-05, "loss": 0.4602, "step": 9420, "task_loss": 0.5431016087532043 }, { "compression_loss": 0.0, "distillation_loss": 0.47305771708488464, "epoch": 3.41, "learning_rate": 1.382638819877235e-05, "loss": 0.448, "step": 9430, "task_loss": 0.8087161183357239 }, { "compression_loss": 0.0, "distillation_loss": 0.47628888487815857, "epoch": 3.41, "learning_rate": 1.3769055037860525e-05, "loss": 0.5081, "step": 9440, "task_loss": 0.7864973545074463 }, { "compression_loss": 0.0, "distillation_loss": 0.7246733903884888, "epoch": 3.42, "learning_rate": 1.3711805569158852e-05, "loss": 0.5485, "step": 9450, "task_loss": 0.620114803314209 }, { "compression_loss": 0.0, "distillation_loss": 0.41885077953338623, "epoch": 3.42, "learning_rate": 1.3654640087864848e-05, "loss": 0.4701, "step": 9460, "task_loss": 0.7940152883529663 }, { "compression_loss": 0.0, "distillation_loss": 0.33186885714530945, "epoch": 3.42, "learning_rate": 1.3597558888742954e-05, "loss": 0.4616, "step": 9470, "task_loss": 0.5817943811416626 }, { "compression_loss": 0.0, "distillation_loss": 0.3579007387161255, "epoch": 3.43, "learning_rate": 1.3540562266123025e-05, "loss": 0.4819, "step": 9480, "task_loss": 0.36475780606269836 }, { "compression_loss": 0.0, "distillation_loss": 0.32448476552963257, "epoch": 3.43, "learning_rate": 1.3483650513898832e-05, "loss": 0.469, "step": 9490, "task_loss": 0.7694838047027588 }, { "compression_loss": 0.0, "distillation_loss": 0.5285438299179077, "epoch": 3.43, "learning_rate": 1.3426823925526479e-05, "loss": 0.5106, "step": 9500, "task_loss": 0.8708394765853882 }, { "epoch": 3.43, "eval_exact_match": 82.31788079470199, "eval_f1": 89.50688669947229, "step": 9500 }, { "compression_loss": 0.0, "distillation_loss": 0.4109702706336975, "epoch": 3.44, "learning_rate": 1.3370082794022974e-05, "loss": 0.5511, "step": 9510, "task_loss": 0.45803433656692505 }, { "compression_loss": 0.0, "distillation_loss": 0.3952751159667969, "epoch": 3.44, "learning_rate": 1.3313427411964676e-05, "loss": 0.4921, "step": 9520, "task_loss": 0.49687451124191284 }, { "compression_loss": 0.0, "distillation_loss": 0.5060081481933594, "epoch": 3.44, "learning_rate": 1.3256858071485795e-05, "loss": 0.4775, "step": 9530, "task_loss": 0.594099223613739 }, { "compression_loss": 0.0, "distillation_loss": 0.426449716091156, "epoch": 3.45, "learning_rate": 1.3200375064276841e-05, "loss": 0.492, "step": 9540, "task_loss": 0.5237439274787903 }, { "compression_loss": 0.0, "distillation_loss": 0.5308140516281128, "epoch": 3.45, "learning_rate": 1.3143978681583236e-05, "loss": 0.5023, "step": 9550, "task_loss": 1.1491860151290894 }, { "compression_loss": 0.0, "distillation_loss": 0.6118658781051636, "epoch": 3.46, "learning_rate": 1.3087669214203657e-05, "loss": 0.5216, "step": 9560, "task_loss": 0.6933400630950928 }, { "compression_loss": 0.0, "distillation_loss": 0.4912259578704834, "epoch": 3.46, "learning_rate": 1.303144695248867e-05, "loss": 0.4434, "step": 9570, "task_loss": 0.86939936876297 }, { "compression_loss": 0.0, "distillation_loss": 0.7513962388038635, "epoch": 3.46, "learning_rate": 1.2975312186339156e-05, "loss": 0.5468, "step": 9580, "task_loss": 1.1207659244537354 }, { "compression_loss": 0.0, "distillation_loss": 0.6973851919174194, "epoch": 3.47, "learning_rate": 1.2919265205204863e-05, "loss": 0.4847, "step": 9590, "task_loss": 0.9924426078796387 }, { "compression_loss": 0.0, "distillation_loss": 0.5232359766960144, "epoch": 3.47, "learning_rate": 1.2863306298082847e-05, "loss": 0.5701, "step": 9600, "task_loss": 1.4724518060684204 }, { "compression_loss": 0.0, "distillation_loss": 0.49260246753692627, "epoch": 3.47, "learning_rate": 1.2807435753516059e-05, "loss": 0.5398, "step": 9610, "task_loss": 1.156559944152832 }, { "compression_loss": 0.0, "distillation_loss": 0.4523448348045349, "epoch": 3.48, "learning_rate": 1.2751653859591815e-05, "loss": 0.4651, "step": 9620, "task_loss": 0.5585622787475586 }, { "compression_loss": 0.0, "distillation_loss": 0.40095406770706177, "epoch": 3.48, "learning_rate": 1.2695960903940331e-05, "loss": 0.5763, "step": 9630, "task_loss": 0.6868075132369995 }, { "compression_loss": 0.0, "distillation_loss": 0.39510098099708557, "epoch": 3.48, "learning_rate": 1.2640357173733184e-05, "loss": 0.4757, "step": 9640, "task_loss": 0.5561718940734863 }, { "compression_loss": 0.0, "distillation_loss": 0.6113804578781128, "epoch": 3.49, "learning_rate": 1.258484295568194e-05, "loss": 0.4769, "step": 9650, "task_loss": 0.5155743956565857 }, { "compression_loss": 0.0, "distillation_loss": 0.38609778881073, "epoch": 3.49, "learning_rate": 1.252941853603655e-05, "loss": 0.4734, "step": 9660, "task_loss": 0.5266010761260986 }, { "compression_loss": 0.0, "distillation_loss": 0.5123001933097839, "epoch": 3.49, "learning_rate": 1.2474084200583964e-05, "loss": 0.4694, "step": 9670, "task_loss": 0.6387827396392822 }, { "compression_loss": 0.0, "distillation_loss": 0.4852139353752136, "epoch": 3.5, "learning_rate": 1.2418840234646627e-05, "loss": 0.4779, "step": 9680, "task_loss": 0.48199230432510376 }, { "compression_loss": 0.0, "distillation_loss": 0.5093953013420105, "epoch": 3.5, "learning_rate": 1.2363686923081015e-05, "loss": 0.5113, "step": 9690, "task_loss": 0.34967291355133057 }, { "compression_loss": 0.0, "distillation_loss": 0.4891633689403534, "epoch": 3.51, "learning_rate": 1.2308624550276123e-05, "loss": 0.5272, "step": 9700, "task_loss": 0.5109860897064209 }, { "compression_loss": 0.0, "distillation_loss": 0.37727656960487366, "epoch": 3.51, "learning_rate": 1.2253653400152073e-05, "loss": 0.5071, "step": 9710, "task_loss": 0.5218365788459778 }, { "compression_loss": 0.0, "distillation_loss": 0.43080103397369385, "epoch": 3.51, "learning_rate": 1.2198773756158604e-05, "loss": 0.5266, "step": 9720, "task_loss": 0.6511129140853882 }, { "compression_loss": 0.0, "distillation_loss": 0.42583030462265015, "epoch": 3.52, "learning_rate": 1.2143985901273618e-05, "loss": 0.5077, "step": 9730, "task_loss": 0.5220778584480286 }, { "compression_loss": 0.0, "distillation_loss": 0.5337873697280884, "epoch": 3.52, "learning_rate": 1.2089290118001697e-05, "loss": 0.4677, "step": 9740, "task_loss": 0.7937961220741272 }, { "compression_loss": 0.0, "distillation_loss": 0.6372555494308472, "epoch": 3.52, "learning_rate": 1.2034686688372724e-05, "loss": 0.6156, "step": 9750, "task_loss": 0.818684458732605 }, { "epoch": 3.52, "eval_exact_match": 82.30842005676443, "eval_f1": 89.48111611207813, "step": 9750 }, { "compression_loss": 0.0, "distillation_loss": 0.6067251563072205, "epoch": 3.53, "learning_rate": 1.1980175893940316e-05, "loss": 0.5211, "step": 9760, "task_loss": 0.7992770671844482 }, { "compression_loss": 0.0, "distillation_loss": 0.5251370072364807, "epoch": 3.53, "learning_rate": 1.192575801578047e-05, "loss": 0.491, "step": 9770, "task_loss": 0.5564746856689453 }, { "compression_loss": 0.0, "distillation_loss": 0.46255046129226685, "epoch": 3.53, "learning_rate": 1.1871433334490063e-05, "loss": 0.5013, "step": 9780, "task_loss": 0.593856930732727 }, { "compression_loss": 0.0, "distillation_loss": 0.6018144488334656, "epoch": 3.54, "learning_rate": 1.181720213018543e-05, "loss": 0.5316, "step": 9790, "task_loss": 0.5894225239753723 }, { "compression_loss": 0.0, "distillation_loss": 0.49758362770080566, "epoch": 3.54, "learning_rate": 1.1763064682500879e-05, "loss": 0.5543, "step": 9800, "task_loss": 0.5570957660675049 }, { "compression_loss": 0.0, "distillation_loss": 0.536584734916687, "epoch": 3.55, "learning_rate": 1.1709021270587304e-05, "loss": 0.4794, "step": 9810, "task_loss": 0.5450813174247742 }, { "compression_loss": 0.0, "distillation_loss": 0.3794777989387512, "epoch": 3.55, "learning_rate": 1.1655072173110707e-05, "loss": 0.4492, "step": 9820, "task_loss": 0.3681301474571228 }, { "compression_loss": 0.0, "distillation_loss": 0.4257237911224365, "epoch": 3.55, "learning_rate": 1.1601217668250789e-05, "loss": 0.4717, "step": 9830, "task_loss": 0.7739952802658081 }, { "compression_loss": 0.0, "distillation_loss": 0.5841401815414429, "epoch": 3.56, "learning_rate": 1.1547458033699452e-05, "loss": 0.4669, "step": 9840, "task_loss": 0.7634145021438599 }, { "compression_loss": 0.0, "distillation_loss": 0.44450539350509644, "epoch": 3.56, "learning_rate": 1.1493793546659497e-05, "loss": 0.4667, "step": 9850, "task_loss": 0.6069711446762085 }, { "compression_loss": 0.0, "distillation_loss": 0.5876635313034058, "epoch": 3.56, "learning_rate": 1.1440224483843031e-05, "loss": 0.4881, "step": 9860, "task_loss": 0.7760765552520752 }, { "compression_loss": 0.0, "distillation_loss": 0.5074859857559204, "epoch": 3.57, "learning_rate": 1.1386751121470161e-05, "loss": 0.5055, "step": 9870, "task_loss": 0.43628257513046265 }, { "compression_loss": 0.0, "distillation_loss": 0.40634429454803467, "epoch": 3.57, "learning_rate": 1.1333373735267528e-05, "loss": 0.5235, "step": 9880, "task_loss": 0.5263395309448242 }, { "compression_loss": 0.0, "distillation_loss": 0.5098382234573364, "epoch": 3.57, "learning_rate": 1.1280092600466898e-05, "loss": 0.4973, "step": 9890, "task_loss": 0.7390273809432983 }, { "compression_loss": 0.0, "distillation_loss": 0.49760282039642334, "epoch": 3.58, "learning_rate": 1.1226907991803694e-05, "loss": 0.4334, "step": 9900, "task_loss": 0.48946425318717957 }, { "compression_loss": 0.0, "distillation_loss": 0.3809628188610077, "epoch": 3.58, "learning_rate": 1.1173820183515652e-05, "loss": 0.491, "step": 9910, "task_loss": 0.5488647222518921 }, { "compression_loss": 0.0, "distillation_loss": 0.5812700986862183, "epoch": 3.59, "learning_rate": 1.112082944934137e-05, "loss": 0.5098, "step": 9920, "task_loss": 0.656021237373352 }, { "compression_loss": 0.0, "distillation_loss": 0.32435011863708496, "epoch": 3.59, "learning_rate": 1.1067936062518893e-05, "loss": 0.4879, "step": 9930, "task_loss": 0.2461182326078415 }, { "compression_loss": 0.0, "distillation_loss": 0.7479100227355957, "epoch": 3.59, "learning_rate": 1.1015140295784286e-05, "loss": 0.5034, "step": 9940, "task_loss": 1.272243618965149 }, { "compression_loss": 0.0, "distillation_loss": 0.6995210647583008, "epoch": 3.6, "learning_rate": 1.0962442421370318e-05, "loss": 0.4988, "step": 9950, "task_loss": 1.0233780145645142 }, { "compression_loss": 0.0, "distillation_loss": 0.48262107372283936, "epoch": 3.6, "learning_rate": 1.0909842711004913e-05, "loss": 0.472, "step": 9960, "task_loss": 0.5522131323814392 }, { "compression_loss": 0.0, "distillation_loss": 0.5361160039901733, "epoch": 3.6, "learning_rate": 1.0857341435909876e-05, "loss": 0.4965, "step": 9970, "task_loss": 0.7532282471656799 }, { "compression_loss": 0.0, "distillation_loss": 0.4412672221660614, "epoch": 3.61, "learning_rate": 1.0804938866799441e-05, "loss": 0.5342, "step": 9980, "task_loss": 0.8714137077331543 }, { "compression_loss": 0.0, "distillation_loss": 0.49830132722854614, "epoch": 3.61, "learning_rate": 1.075263527387888e-05, "loss": 0.5097, "step": 9990, "task_loss": 0.6957769989967346 }, { "compression_loss": 0.0, "distillation_loss": 0.5538281798362732, "epoch": 3.61, "learning_rate": 1.0700430926843094e-05, "loss": 0.4753, "step": 10000, "task_loss": 0.6752587556838989 }, { "epoch": 3.61, "eval_exact_match": 82.52601702932829, "eval_f1": 89.68187205330648, "step": 10000 }, { "compression_loss": 0.0, "distillation_loss": 0.5624034404754639, "epoch": 3.62, "learning_rate": 1.0648326094875253e-05, "loss": 0.4598, "step": 10010, "task_loss": 0.692568302154541 }, { "compression_loss": 0.0, "distillation_loss": 0.4360852837562561, "epoch": 3.62, "learning_rate": 1.0596321046645394e-05, "loss": 0.4773, "step": 10020, "task_loss": 0.4527958631515503 }, { "compression_loss": 0.0, "distillation_loss": 0.4201253056526184, "epoch": 3.62, "learning_rate": 1.0544416050309043e-05, "loss": 0.4736, "step": 10030, "task_loss": 0.4331403374671936 }, { "compression_loss": 0.0, "distillation_loss": 0.4714534282684326, "epoch": 3.63, "learning_rate": 1.0492611373505785e-05, "loss": 0.4866, "step": 10040, "task_loss": 0.6479029655456543 }, { "compression_loss": 0.0, "distillation_loss": 0.5169629454612732, "epoch": 3.63, "learning_rate": 1.0440907283357999e-05, "loss": 0.4508, "step": 10050, "task_loss": 0.6727709770202637 }, { "compression_loss": 0.0, "distillation_loss": 0.4062344431877136, "epoch": 3.64, "learning_rate": 1.0389304046469323e-05, "loss": 0.4978, "step": 10060, "task_loss": 0.5695604085922241 }, { "compression_loss": 0.0, "distillation_loss": 0.5002131462097168, "epoch": 3.64, "learning_rate": 1.033780192892341e-05, "loss": 0.4988, "step": 10070, "task_loss": 0.6775874495506287 }, { "compression_loss": 0.0, "distillation_loss": 0.4262256622314453, "epoch": 3.64, "learning_rate": 1.0286401196282502e-05, "loss": 0.5113, "step": 10080, "task_loss": 0.5431799292564392 }, { "compression_loss": 0.0, "distillation_loss": 0.3821599781513214, "epoch": 3.65, "learning_rate": 1.0235102113586063e-05, "loss": 0.4595, "step": 10090, "task_loss": 0.6419239044189453 }, { "compression_loss": 0.0, "distillation_loss": 0.5522717237472534, "epoch": 3.65, "learning_rate": 1.01839049453494e-05, "loss": 0.481, "step": 10100, "task_loss": 0.4990994334220886 }, { "compression_loss": 0.0, "distillation_loss": 0.5535731315612793, "epoch": 3.65, "learning_rate": 1.0132809955562337e-05, "loss": 0.4963, "step": 10110, "task_loss": 0.6528249979019165 }, { "compression_loss": 0.0, "distillation_loss": 0.5421990156173706, "epoch": 3.66, "learning_rate": 1.0081817407687819e-05, "loss": 0.519, "step": 10120, "task_loss": 0.9474392533302307 }, { "compression_loss": 0.0, "distillation_loss": 0.5861812829971313, "epoch": 3.66, "learning_rate": 1.0030927564660585e-05, "loss": 0.4887, "step": 10130, "task_loss": 0.5796793103218079 }, { "compression_loss": 0.0, "distillation_loss": 0.37179023027420044, "epoch": 3.66, "learning_rate": 9.980140688885745e-06, "loss": 0.4896, "step": 10140, "task_loss": 0.537672221660614 }, { "compression_loss": 0.0, "distillation_loss": 0.513761579990387, "epoch": 3.67, "learning_rate": 9.929457042237548e-06, "loss": 0.5025, "step": 10150, "task_loss": 0.6180034279823303 }, { "compression_loss": 0.0, "distillation_loss": 0.5233842134475708, "epoch": 3.67, "learning_rate": 9.878876886057894e-06, "loss": 0.4689, "step": 10160, "task_loss": 0.5639371871948242 }, { "compression_loss": 0.0, "distillation_loss": 0.5454587936401367, "epoch": 3.68, "learning_rate": 9.828400481155083e-06, "loss": 0.5063, "step": 10170, "task_loss": 0.8569517731666565 }, { "compression_loss": 0.0, "distillation_loss": 0.5449087023735046, "epoch": 3.68, "learning_rate": 9.77802808780243e-06, "loss": 0.5119, "step": 10180, "task_loss": 0.47911345958709717 }, { "compression_loss": 0.0, "distillation_loss": 0.5373599529266357, "epoch": 3.68, "learning_rate": 9.727759965736939e-06, "loss": 0.4877, "step": 10190, "task_loss": 0.7237974405288696 }, { "compression_loss": 0.0, "distillation_loss": 0.3350166082382202, "epoch": 3.69, "learning_rate": 9.677596374157957e-06, "loss": 0.4623, "step": 10200, "task_loss": 0.43222576379776 }, { "compression_loss": 0.0, "distillation_loss": 0.47038358449935913, "epoch": 3.69, "learning_rate": 9.627537571725803e-06, "loss": 0.5102, "step": 10210, "task_loss": 0.5583560466766357 }, { "compression_loss": 0.0, "distillation_loss": 0.6115131974220276, "epoch": 3.69, "learning_rate": 9.577583816560534e-06, "loss": 0.5384, "step": 10220, "task_loss": 0.6348150968551636 }, { "compression_loss": 0.0, "distillation_loss": 0.537125825881958, "epoch": 3.7, "learning_rate": 9.527735366240478e-06, "loss": 0.4803, "step": 10230, "task_loss": 0.3411647081375122 }, { "compression_loss": 0.0, "distillation_loss": 0.38711780309677124, "epoch": 3.7, "learning_rate": 9.477992477801019e-06, "loss": 0.4531, "step": 10240, "task_loss": 0.2967953681945801 }, { "compression_loss": 0.0, "distillation_loss": 0.419807493686676, "epoch": 3.7, "learning_rate": 9.428355407733219e-06, "loss": 0.4769, "step": 10250, "task_loss": 0.7556804418563843 }, { "epoch": 3.7, "eval_exact_match": 82.36518448438979, "eval_f1": 89.59782459360834, "step": 10250 }, { "compression_loss": 0.0, "distillation_loss": 0.4036286771297455, "epoch": 3.71, "learning_rate": 9.378824411982507e-06, "loss": 0.4836, "step": 10260, "task_loss": 1.1075952053070068 }, { "compression_loss": 0.0, "distillation_loss": 0.637839674949646, "epoch": 3.71, "learning_rate": 9.329399745947339e-06, "loss": 0.4865, "step": 10270, "task_loss": 1.0219868421554565 }, { "compression_loss": 0.0, "distillation_loss": 0.387042760848999, "epoch": 3.72, "learning_rate": 9.28008166447792e-06, "loss": 0.4587, "step": 10280, "task_loss": 0.7554829120635986 }, { "compression_loss": 0.0, "distillation_loss": 0.519973635673523, "epoch": 3.72, "learning_rate": 9.230870421874859e-06, "loss": 0.5099, "step": 10290, "task_loss": 0.7394765615463257 }, { "compression_loss": 0.0, "distillation_loss": 0.4160851836204529, "epoch": 3.72, "learning_rate": 9.181766271887887e-06, "loss": 0.4725, "step": 10300, "task_loss": 0.6241825819015503 }, { "compression_loss": 0.0, "distillation_loss": 0.5004531741142273, "epoch": 3.73, "learning_rate": 9.132769467714479e-06, "loss": 0.4985, "step": 10310, "task_loss": 0.6873956322669983 }, { "compression_loss": 0.0, "distillation_loss": 0.3454711437225342, "epoch": 3.73, "learning_rate": 9.083880261998675e-06, "loss": 0.4592, "step": 10320, "task_loss": 0.4680253863334656 }, { "compression_loss": 0.0, "distillation_loss": 0.4951745271682739, "epoch": 3.73, "learning_rate": 9.035098906829628e-06, "loss": 0.462, "step": 10330, "task_loss": 0.4511879086494446 }, { "compression_loss": 0.0, "distillation_loss": 0.7201694250106812, "epoch": 3.74, "learning_rate": 8.986425653740421e-06, "loss": 0.5548, "step": 10340, "task_loss": 0.6285726428031921 }, { "compression_loss": 0.0, "distillation_loss": 0.6020553112030029, "epoch": 3.74, "learning_rate": 8.937860753706713e-06, "loss": 0.4861, "step": 10350, "task_loss": 0.7205938696861267 }, { "compression_loss": 0.0, "distillation_loss": 0.40691256523132324, "epoch": 3.74, "learning_rate": 8.88940445714547e-06, "loss": 0.4598, "step": 10360, "task_loss": 0.44143691658973694 }, { "compression_loss": 0.0, "distillation_loss": 0.5514005422592163, "epoch": 3.75, "learning_rate": 8.841057013913629e-06, "loss": 0.473, "step": 10370, "task_loss": 1.1116174459457397 }, { "compression_loss": 0.0, "distillation_loss": 0.6847667694091797, "epoch": 3.75, "learning_rate": 8.79281867330688e-06, "loss": 0.5555, "step": 10380, "task_loss": 1.004784107208252 }, { "compression_loss": 0.0, "distillation_loss": 0.5778000354766846, "epoch": 3.75, "learning_rate": 8.744689684058324e-06, "loss": 0.4873, "step": 10390, "task_loss": 0.9104892015457153 }, { "compression_loss": 0.0, "distillation_loss": 0.41177552938461304, "epoch": 3.76, "learning_rate": 8.696670294337223e-06, "loss": 0.4595, "step": 10400, "task_loss": 0.8145915865898132 }, { "compression_loss": 0.0, "distillation_loss": 0.34814390540122986, "epoch": 3.76, "learning_rate": 8.648760751747671e-06, "loss": 0.5126, "step": 10410, "task_loss": 0.520122766494751 }, { "compression_loss": 0.0, "distillation_loss": 0.41130712628364563, "epoch": 3.77, "learning_rate": 8.600961303327416e-06, "loss": 0.4893, "step": 10420, "task_loss": 0.43144863843917847 }, { "compression_loss": 0.0, "distillation_loss": 0.34951236844062805, "epoch": 3.77, "learning_rate": 8.553272195546463e-06, "loss": 0.459, "step": 10430, "task_loss": 0.6265913248062134 }, { "compression_loss": 0.0, "distillation_loss": 0.4187074899673462, "epoch": 3.77, "learning_rate": 8.505693674305892e-06, "loss": 0.4915, "step": 10440, "task_loss": 0.4581117033958435 }, { "compression_loss": 0.0, "distillation_loss": 0.4234219193458557, "epoch": 3.78, "learning_rate": 8.458225984936562e-06, "loss": 0.4749, "step": 10450, "task_loss": 0.49265989661216736 }, { "compression_loss": 0.0, "distillation_loss": 0.5711984634399414, "epoch": 3.78, "learning_rate": 8.410869372197849e-06, "loss": 0.4633, "step": 10460, "task_loss": 0.7391009330749512 }, { "compression_loss": 0.0, "distillation_loss": 0.6687216758728027, "epoch": 3.78, "learning_rate": 8.363624080276351e-06, "loss": 0.516, "step": 10470, "task_loss": 0.6271883249282837 }, { "compression_loss": 0.0, "distillation_loss": 0.4507598876953125, "epoch": 3.79, "learning_rate": 8.316490352784698e-06, "loss": 0.4851, "step": 10480, "task_loss": 0.7498517036437988 }, { "compression_loss": 0.0, "distillation_loss": 0.376162052154541, "epoch": 3.79, "learning_rate": 8.269468432760227e-06, "loss": 0.4669, "step": 10490, "task_loss": 0.399840384721756 }, { "compression_loss": 0.0, "distillation_loss": 0.39421695470809937, "epoch": 3.79, "learning_rate": 8.222558562663788e-06, "loss": 0.5207, "step": 10500, "task_loss": 0.44343698024749756 }, { "epoch": 3.79, "eval_exact_match": 82.3841059602649, "eval_f1": 89.54316499056209, "step": 10500 }, { "compression_loss": 0.0, "distillation_loss": 0.4426266551017761, "epoch": 3.8, "learning_rate": 8.175760984378414e-06, "loss": 0.4776, "step": 10510, "task_loss": 0.5466393232345581 }, { "compression_loss": 0.0, "distillation_loss": 0.4493921101093292, "epoch": 3.8, "learning_rate": 8.129075939208192e-06, "loss": 0.4836, "step": 10520, "task_loss": 0.5781248211860657 }, { "compression_loss": 0.0, "distillation_loss": 0.44374221563339233, "epoch": 3.81, "learning_rate": 8.082503667876887e-06, "loss": 0.5203, "step": 10530, "task_loss": 0.3778543472290039 }, { "compression_loss": 0.0, "distillation_loss": 0.43476569652557373, "epoch": 3.81, "learning_rate": 8.036044410526801e-06, "loss": 0.4235, "step": 10540, "task_loss": 0.45925962924957275 }, { "compression_loss": 0.0, "distillation_loss": 0.6843395829200745, "epoch": 3.81, "learning_rate": 7.989698406717485e-06, "loss": 0.476, "step": 10550, "task_loss": 0.8984243869781494 }, { "compression_loss": 0.0, "distillation_loss": 0.4233591556549072, "epoch": 3.82, "learning_rate": 7.943465895424528e-06, "loss": 0.4784, "step": 10560, "task_loss": 0.6609106063842773 }, { "compression_loss": 0.0, "distillation_loss": 0.35403668880462646, "epoch": 3.82, "learning_rate": 7.897347115038289e-06, "loss": 0.4811, "step": 10570, "task_loss": 0.6570682525634766 }, { "compression_loss": 0.0, "distillation_loss": 0.45888036489486694, "epoch": 3.82, "learning_rate": 7.851342303362713e-06, "loss": 0.4827, "step": 10580, "task_loss": 0.8530523777008057 }, { "compression_loss": 0.0, "distillation_loss": 0.538689374923706, "epoch": 3.83, "learning_rate": 7.805451697614085e-06, "loss": 0.485, "step": 10590, "task_loss": 0.692087709903717 }, { "compression_loss": 0.0, "distillation_loss": 0.4368409812450409, "epoch": 3.83, "learning_rate": 7.759675534419805e-06, "loss": 0.546, "step": 10600, "task_loss": 0.5097652077674866 }, { "compression_loss": 0.0, "distillation_loss": 0.4115678071975708, "epoch": 3.83, "learning_rate": 7.714014049817136e-06, "loss": 0.4959, "step": 10610, "task_loss": 0.3430953919887543 }, { "compression_loss": 0.0, "distillation_loss": 0.3076716959476471, "epoch": 3.84, "learning_rate": 7.668467479252084e-06, "loss": 0.4085, "step": 10620, "task_loss": 0.44386640191078186 }, { "compression_loss": 0.0, "distillation_loss": 0.37531307339668274, "epoch": 3.84, "learning_rate": 7.623036057578053e-06, "loss": 0.4485, "step": 10630, "task_loss": 0.4283076524734497 }, { "compression_loss": 0.0, "distillation_loss": 0.4523438811302185, "epoch": 3.85, "learning_rate": 7.5777200190547465e-06, "loss": 0.5024, "step": 10640, "task_loss": 0.8270664215087891 }, { "compression_loss": 0.0, "distillation_loss": 0.40491363406181335, "epoch": 3.85, "learning_rate": 7.532519597346889e-06, "loss": 0.5433, "step": 10650, "task_loss": 0.5985238552093506 }, { "compression_loss": 0.0, "distillation_loss": 0.4389151334762573, "epoch": 3.85, "learning_rate": 7.487435025523069e-06, "loss": 0.4752, "step": 10660, "task_loss": 0.7840195894241333 }, { "compression_loss": 0.0, "distillation_loss": 0.4390360713005066, "epoch": 3.86, "learning_rate": 7.442466536054479e-06, "loss": 0.4214, "step": 10670, "task_loss": 1.005985140800476 }, { "compression_loss": 0.0, "distillation_loss": 0.4800419211387634, "epoch": 3.86, "learning_rate": 7.397614360813781e-06, "loss": 0.4512, "step": 10680, "task_loss": 0.5574986338615417 }, { "compression_loss": 0.0, "distillation_loss": 0.5794166326522827, "epoch": 3.86, "learning_rate": 7.352878731073872e-06, "loss": 0.4872, "step": 10690, "task_loss": 0.7646304368972778 }, { "compression_loss": 0.0, "distillation_loss": 0.38002362847328186, "epoch": 3.87, "learning_rate": 7.308259877506705e-06, "loss": 0.5005, "step": 10700, "task_loss": 0.547897219657898 }, { "compression_loss": 0.0, "distillation_loss": 0.377015620470047, "epoch": 3.87, "learning_rate": 7.26375803018209e-06, "loss": 0.4254, "step": 10710, "task_loss": 0.2385042905807495 }, { "compression_loss": 0.0, "distillation_loss": 0.5748364925384521, "epoch": 3.87, "learning_rate": 7.219373418566526e-06, "loss": 0.5412, "step": 10720, "task_loss": 0.6794745326042175 }, { "compression_loss": 0.0, "distillation_loss": 0.34330910444259644, "epoch": 3.88, "learning_rate": 7.175106271521979e-06, "loss": 0.4973, "step": 10730, "task_loss": 0.5638716220855713 }, { "compression_loss": 0.0, "distillation_loss": 0.5264981985092163, "epoch": 3.88, "learning_rate": 7.130956817304751e-06, "loss": 0.4934, "step": 10740, "task_loss": 0.7932864427566528 }, { "compression_loss": 0.0, "distillation_loss": 0.4208604693412781, "epoch": 3.89, "learning_rate": 7.0869252835642775e-06, "loss": 0.407, "step": 10750, "task_loss": 0.8152490854263306 }, { "epoch": 3.89, "eval_exact_match": 82.64900662251655, "eval_f1": 89.68607828698693, "step": 10750 }, { "compression_loss": 0.0, "distillation_loss": 0.846092939376831, "epoch": 3.89, "learning_rate": 7.043011897341959e-06, "loss": 0.5306, "step": 10760, "task_loss": 1.4170434474945068 }, { "compression_loss": 0.0, "distillation_loss": 0.5525998473167419, "epoch": 3.89, "learning_rate": 6.999216885069956e-06, "loss": 0.4667, "step": 10770, "task_loss": 0.9994626045227051 }, { "compression_loss": 0.0, "distillation_loss": 0.4946367144584656, "epoch": 3.9, "learning_rate": 6.955540472570116e-06, "loss": 0.4602, "step": 10780, "task_loss": 0.696071982383728 }, { "compression_loss": 0.0, "distillation_loss": 0.529938817024231, "epoch": 3.9, "learning_rate": 6.911982885052676e-06, "loss": 0.4975, "step": 10790, "task_loss": 0.6633660197257996 }, { "compression_loss": 0.0, "distillation_loss": 0.5889250040054321, "epoch": 3.9, "learning_rate": 6.868544347115216e-06, "loss": 0.5195, "step": 10800, "task_loss": 0.9271608591079712 }, { "compression_loss": 0.0, "distillation_loss": 0.5101796388626099, "epoch": 3.91, "learning_rate": 6.82522508274145e-06, "loss": 0.4942, "step": 10810, "task_loss": 0.4550142288208008 }, { "compression_loss": 0.0, "distillation_loss": 0.41657310724258423, "epoch": 3.91, "learning_rate": 6.7820253153000725e-06, "loss": 0.453, "step": 10820, "task_loss": 0.6524810791015625 }, { "compression_loss": 0.0, "distillation_loss": 0.42488449811935425, "epoch": 3.91, "learning_rate": 6.7389452675436e-06, "loss": 0.4542, "step": 10830, "task_loss": 0.4262767732143402 }, { "compression_loss": 0.0, "distillation_loss": 0.4936716854572296, "epoch": 3.92, "learning_rate": 6.695985161607251e-06, "loss": 0.4998, "step": 10840, "task_loss": 0.932105541229248 }, { "compression_loss": 0.0, "distillation_loss": 0.6871744990348816, "epoch": 3.92, "learning_rate": 6.6531452190077755e-06, "loss": 0.5045, "step": 10850, "task_loss": 1.0812252759933472 }, { "compression_loss": 0.0, "distillation_loss": 0.3607264459133148, "epoch": 3.92, "learning_rate": 6.610425660642335e-06, "loss": 0.4217, "step": 10860, "task_loss": 0.3593289852142334 }, { "compression_loss": 0.0, "distillation_loss": 0.7296729683876038, "epoch": 3.93, "learning_rate": 6.567826706787313e-06, "loss": 0.4889, "step": 10870, "task_loss": 0.8488539457321167 }, { "compression_loss": 0.0, "distillation_loss": 0.5310675501823425, "epoch": 3.93, "learning_rate": 6.525348577097271e-06, "loss": 0.5384, "step": 10880, "task_loss": 0.5912023782730103 }, { "compression_loss": 0.0, "distillation_loss": 0.4117245376110077, "epoch": 3.94, "learning_rate": 6.482991490603705e-06, "loss": 0.4646, "step": 10890, "task_loss": 0.33494138717651367 }, { "compression_loss": 0.0, "distillation_loss": 0.39375028014183044, "epoch": 3.94, "learning_rate": 6.440755665714006e-06, "loss": 0.5194, "step": 10900, "task_loss": 0.352378249168396 }, { "compression_loss": 0.0, "distillation_loss": 0.5667198896408081, "epoch": 3.94, "learning_rate": 6.398641320210292e-06, "loss": 0.4856, "step": 10910, "task_loss": 0.6015273332595825 }, { "compression_loss": 0.0, "distillation_loss": 0.408721923828125, "epoch": 3.95, "learning_rate": 6.356648671248292e-06, "loss": 0.5528, "step": 10920, "task_loss": 0.5410779714584351 }, { "compression_loss": 0.0, "distillation_loss": 0.4362407326698303, "epoch": 3.95, "learning_rate": 6.314777935356211e-06, "loss": 0.4703, "step": 10930, "task_loss": 0.7694103717803955 }, { "compression_loss": 0.0, "distillation_loss": 0.42711496353149414, "epoch": 3.95, "learning_rate": 6.273029328433653e-06, "loss": 0.4835, "step": 10940, "task_loss": 0.690986692905426 }, { "compression_loss": 0.0, "distillation_loss": 0.49706920981407166, "epoch": 3.96, "learning_rate": 6.231403065750473e-06, "loss": 0.4767, "step": 10950, "task_loss": 0.5795883536338806 }, { "compression_loss": 0.0, "distillation_loss": 0.44640737771987915, "epoch": 3.96, "learning_rate": 6.189899361945688e-06, "loss": 0.4741, "step": 10960, "task_loss": 0.7339274883270264 }, { "compression_loss": 0.0, "distillation_loss": 0.5700512528419495, "epoch": 3.96, "learning_rate": 6.148518431026324e-06, "loss": 0.4344, "step": 10970, "task_loss": 0.7164508700370789 }, { "compression_loss": 0.0, "distillation_loss": 0.3941676616668701, "epoch": 3.97, "learning_rate": 6.10726048636641e-06, "loss": 0.4584, "step": 10980, "task_loss": 0.9847264885902405 }, { "compression_loss": 0.0, "distillation_loss": 0.5191637277603149, "epoch": 3.97, "learning_rate": 6.066125740705759e-06, "loss": 0.4964, "step": 10990, "task_loss": 0.5576518774032593 }, { "compression_loss": 0.0, "distillation_loss": 0.4768173098564148, "epoch": 3.98, "learning_rate": 6.02511440614896e-06, "loss": 0.4876, "step": 11000, "task_loss": 0.7509768009185791 }, { "epoch": 3.98, "eval_exact_match": 82.52601702932829, "eval_f1": 89.64100301643371, "step": 11000 }, { "compression_loss": 0.0, "distillation_loss": 0.5284594297409058, "epoch": 3.98, "learning_rate": 5.984226694164241e-06, "loss": 0.5149, "step": 11010, "task_loss": 0.827378511428833 }, { "compression_loss": 0.0, "distillation_loss": 0.495005339384079, "epoch": 3.98, "learning_rate": 5.9434628155824066e-06, "loss": 0.5033, "step": 11020, "task_loss": 0.8550190925598145 }, { "compression_loss": 0.0, "distillation_loss": 0.43676552176475525, "epoch": 3.99, "learning_rate": 5.902822980595704e-06, "loss": 0.508, "step": 11030, "task_loss": 0.7428649663925171 }, { "compression_loss": 0.0, "distillation_loss": 0.48685529828071594, "epoch": 3.99, "learning_rate": 5.8623073987567965e-06, "loss": 0.4647, "step": 11040, "task_loss": 0.7491306066513062 }, { "compression_loss": 0.0, "distillation_loss": 0.42806217074394226, "epoch": 3.99, "learning_rate": 5.821916278977651e-06, "loss": 0.4953, "step": 11050, "task_loss": 0.973045825958252 }, { "compression_loss": 0.0, "distillation_loss": 0.46617090702056885, "epoch": 4.0, "learning_rate": 5.781649829528467e-06, "loss": 0.4884, "step": 11060, "task_loss": 0.6378223299980164 }, { "compression_loss": 0.0, "distillation_loss": 0.5613276958465576, "epoch": 4.0, "learning_rate": 5.741508258036575e-06, "loss": 0.549, "step": 11070, "task_loss": 0.9049055576324463 }, { "compression_loss": 0.0, "distillation_loss": 0.3290178179740906, "epoch": 4.0, "learning_rate": 5.701491771485448e-06, "loss": 0.4582, "step": 11080, "task_loss": 0.7116979360580444 }, { "compression_loss": 0.0, "distillation_loss": 0.5921218395233154, "epoch": 4.01, "learning_rate": 5.661600576213527e-06, "loss": 0.4359, "step": 11090, "task_loss": 0.6299700140953064 }, { "compression_loss": 0.0, "distillation_loss": 0.44644230604171753, "epoch": 4.01, "learning_rate": 5.62183487791323e-06, "loss": 0.5159, "step": 11100, "task_loss": 0.5687563419342041 }, { "compression_loss": 0.0, "distillation_loss": 0.38395532965660095, "epoch": 4.02, "learning_rate": 5.582194881629875e-06, "loss": 0.4807, "step": 11110, "task_loss": 0.677031934261322 }, { "compression_loss": 0.0, "distillation_loss": 0.39592987298965454, "epoch": 4.02, "learning_rate": 5.542680791760618e-06, "loss": 0.4683, "step": 11120, "task_loss": 0.31899696588516235 }, { "compression_loss": 0.0, "distillation_loss": 0.42465940117836, "epoch": 4.02, "learning_rate": 5.5032928120533776e-06, "loss": 0.4734, "step": 11130, "task_loss": 1.0328463315963745 }, { "compression_loss": 0.0, "distillation_loss": 0.5964382886886597, "epoch": 4.03, "learning_rate": 5.464031145605829e-06, "loss": 0.4592, "step": 11140, "task_loss": 0.6344958543777466 }, { "compression_loss": 0.0, "distillation_loss": 0.38633590936660767, "epoch": 4.03, "learning_rate": 5.424895994864332e-06, "loss": 0.4756, "step": 11150, "task_loss": 0.6650869846343994 }, { "compression_loss": 0.0, "distillation_loss": 0.36032289266586304, "epoch": 4.03, "learning_rate": 5.385887561622884e-06, "loss": 0.4349, "step": 11160, "task_loss": 0.5552083849906921 }, { "compression_loss": 0.0, "distillation_loss": 0.4399193525314331, "epoch": 4.04, "learning_rate": 5.347006047022067e-06, "loss": 0.4414, "step": 11170, "task_loss": 0.614956796169281 }, { "compression_loss": 0.0, "distillation_loss": 0.285051554441452, "epoch": 4.04, "learning_rate": 5.3082516515480715e-06, "loss": 0.43, "step": 11180, "task_loss": 0.4062471389770508 }, { "compression_loss": 0.0, "distillation_loss": 0.3828444480895996, "epoch": 4.04, "learning_rate": 5.269624575031573e-06, "loss": 0.464, "step": 11190, "task_loss": 0.5335018634796143 }, { "compression_loss": 0.0, "distillation_loss": 0.4207053780555725, "epoch": 4.05, "learning_rate": 5.231125016646768e-06, "loss": 0.4634, "step": 11200, "task_loss": 0.5893898010253906 }, { "compression_loss": 0.0, "distillation_loss": 0.4219140410423279, "epoch": 4.05, "learning_rate": 5.192753174910334e-06, "loss": 0.4549, "step": 11210, "task_loss": 0.4798952341079712 }, { "compression_loss": 0.0, "distillation_loss": 0.4892703890800476, "epoch": 4.05, "learning_rate": 5.154509247680397e-06, "loss": 0.4225, "step": 11220, "task_loss": 0.4158070981502533 }, { "compression_loss": 0.0, "distillation_loss": 0.5082884430885315, "epoch": 4.06, "learning_rate": 5.116393432155484e-06, "loss": 0.4373, "step": 11230, "task_loss": 0.8525248765945435 }, { "compression_loss": 0.0, "distillation_loss": 0.45040473341941833, "epoch": 4.06, "learning_rate": 5.078405924873568e-06, "loss": 0.4378, "step": 11240, "task_loss": 0.8633182644844055 }, { "compression_loss": 0.0, "distillation_loss": 0.4001632332801819, "epoch": 4.07, "learning_rate": 5.040546921711011e-06, "loss": 0.4203, "step": 11250, "task_loss": 0.9423239827156067 }, { "epoch": 4.07, "eval_exact_match": 82.61116367076632, "eval_f1": 89.66884691314182, "step": 11250 }, { "compression_loss": 0.0, "distillation_loss": 0.33361560106277466, "epoch": 4.07, "learning_rate": 5.002816617881575e-06, "loss": 0.4725, "step": 11260, "task_loss": 0.45950907468795776 }, { "compression_loss": 0.0, "distillation_loss": 0.3659554123878479, "epoch": 4.07, "learning_rate": 4.965215207935365e-06, "loss": 0.4997, "step": 11270, "task_loss": 0.5733004808425903 }, { "compression_loss": 0.0, "distillation_loss": 0.41657739877700806, "epoch": 4.08, "learning_rate": 4.931484303514092e-06, "loss": 0.4386, "step": 11280, "task_loss": 0.5786995887756348 }, { "compression_loss": 0.0, "distillation_loss": 0.4608357846736908, "epoch": 4.08, "learning_rate": 4.8941283255505295e-06, "loss": 0.4184, "step": 11290, "task_loss": 0.7504065036773682 }, { "compression_loss": 0.0, "distillation_loss": 0.42020905017852783, "epoch": 4.08, "learning_rate": 4.856901801903594e-06, "loss": 0.4813, "step": 11300, "task_loss": 0.8636704683303833 }, { "compression_loss": 0.0, "distillation_loss": 0.6672564744949341, "epoch": 4.09, "learning_rate": 4.8198049245257615e-06, "loss": 0.4454, "step": 11310, "task_loss": 1.0013171434402466 }, { "compression_loss": 0.0, "distillation_loss": 0.4561736583709717, "epoch": 4.09, "learning_rate": 4.782837884701011e-06, "loss": 0.4985, "step": 11320, "task_loss": 0.4815346598625183 }, { "compression_loss": 0.0, "distillation_loss": 0.4631105065345764, "epoch": 4.09, "learning_rate": 4.746000873043818e-06, "loss": 0.4496, "step": 11330, "task_loss": 0.5157427787780762 }, { "compression_loss": 0.0, "distillation_loss": 0.4157012403011322, "epoch": 4.1, "learning_rate": 4.709294079498207e-06, "loss": 0.4962, "step": 11340, "task_loss": 0.353486031293869 }, { "compression_loss": 0.0, "distillation_loss": 0.41181379556655884, "epoch": 4.1, "learning_rate": 4.672717693336749e-06, "loss": 0.4367, "step": 11350, "task_loss": 0.79695725440979 }, { "compression_loss": 0.0, "distillation_loss": 0.4717938303947449, "epoch": 4.11, "learning_rate": 4.6362719031596e-06, "loss": 0.4518, "step": 11360, "task_loss": 0.6884281635284424 }, { "compression_loss": 0.0, "distillation_loss": 0.430459201335907, "epoch": 4.11, "learning_rate": 4.5999568968934854e-06, "loss": 0.4505, "step": 11370, "task_loss": 0.7808297872543335 }, { "compression_loss": 0.0, "distillation_loss": 0.42840248346328735, "epoch": 4.11, "learning_rate": 4.5637728617908196e-06, "loss": 0.4285, "step": 11380, "task_loss": 1.0069183111190796 }, { "compression_loss": 0.0, "distillation_loss": 0.38663750886917114, "epoch": 4.12, "learning_rate": 4.527719984428635e-06, "loss": 0.4425, "step": 11390, "task_loss": 0.5139755010604858 }, { "compression_loss": 0.0, "distillation_loss": 0.5585314035415649, "epoch": 4.12, "learning_rate": 4.491798450707693e-06, "loss": 0.465, "step": 11400, "task_loss": 0.5836506485939026 }, { "compression_loss": 0.0, "distillation_loss": 0.42551636695861816, "epoch": 4.12, "learning_rate": 4.456008445851504e-06, "loss": 0.4585, "step": 11410, "task_loss": 0.4914487600326538 }, { "compression_loss": 0.0, "distillation_loss": 0.5012587904930115, "epoch": 4.13, "learning_rate": 4.420350154405373e-06, "loss": 0.4651, "step": 11420, "task_loss": 0.9073992371559143 }, { "compression_loss": 0.0, "distillation_loss": 0.49892178177833557, "epoch": 4.13, "learning_rate": 4.384823760235432e-06, "loss": 0.4523, "step": 11430, "task_loss": 0.6013965606689453 }, { "compression_loss": 0.0, "distillation_loss": 0.4007934331893921, "epoch": 4.13, "learning_rate": 4.3494294465277165e-06, "loss": 0.4745, "step": 11440, "task_loss": 0.5818295478820801 }, { "compression_loss": 0.0, "distillation_loss": 0.4963274896144867, "epoch": 4.14, "learning_rate": 4.314167395787213e-06, "loss": 0.4859, "step": 11450, "task_loss": 0.552923858165741 }, { "compression_loss": 0.0, "distillation_loss": 0.46873417496681213, "epoch": 4.14, "learning_rate": 4.279037789836915e-06, "loss": 0.5081, "step": 11460, "task_loss": 0.44107770919799805 }, { "compression_loss": 0.0, "distillation_loss": 0.6212577819824219, "epoch": 4.15, "learning_rate": 4.244040809816882e-06, "loss": 0.5153, "step": 11470, "task_loss": 0.7792346477508545 }, { "compression_loss": 0.0, "distillation_loss": 0.3834245800971985, "epoch": 4.15, "learning_rate": 4.209176636183313e-06, "loss": 0.4428, "step": 11480, "task_loss": 0.35695308446884155 }, { "compression_loss": 0.0, "distillation_loss": 0.4868013858795166, "epoch": 4.15, "learning_rate": 4.174445448707604e-06, "loss": 0.4634, "step": 11490, "task_loss": 0.5800308585166931 }, { "compression_loss": 0.0, "distillation_loss": 0.4813215434551239, "epoch": 4.16, "learning_rate": 4.139847426475443e-06, "loss": 0.4672, "step": 11500, "task_loss": 0.6517878174781799 }, { "epoch": 4.16, "eval_exact_match": 82.51655629139073, "eval_f1": 89.63512158721852, "step": 11500 }, { "compression_loss": 0.0, "distillation_loss": 0.45381027460098267, "epoch": 4.16, "learning_rate": 4.105382747885863e-06, "loss": 0.4824, "step": 11510, "task_loss": 0.8077258467674255 }, { "compression_loss": 0.0, "distillation_loss": 0.47750887274742126, "epoch": 4.16, "learning_rate": 4.071051590650343e-06, "loss": 0.4829, "step": 11520, "task_loss": 0.5288639664649963 }, { "compression_loss": 0.0, "distillation_loss": 0.417965829372406, "epoch": 4.17, "learning_rate": 4.036854131791856e-06, "loss": 0.4444, "step": 11530, "task_loss": 0.3982042670249939 }, { "compression_loss": 0.0, "distillation_loss": 0.4850964844226837, "epoch": 4.17, "learning_rate": 4.002790547644029e-06, "loss": 0.4159, "step": 11540, "task_loss": 0.42431122064590454 }, { "compression_loss": 0.0, "distillation_loss": 0.4070306420326233, "epoch": 4.17, "learning_rate": 3.96886101385013e-06, "loss": 0.4419, "step": 11550, "task_loss": 0.5825086832046509 }, { "compression_loss": 0.0, "distillation_loss": 0.358112633228302, "epoch": 4.18, "learning_rate": 3.935065705362258e-06, "loss": 0.4052, "step": 11560, "task_loss": 0.5550183057785034 }, { "compression_loss": 0.0, "distillation_loss": 0.49228614568710327, "epoch": 4.18, "learning_rate": 3.9014047964403805e-06, "loss": 0.431, "step": 11570, "task_loss": 0.819994330406189 }, { "compression_loss": 0.0, "distillation_loss": 0.5448801517486572, "epoch": 4.19, "learning_rate": 3.867878460651475e-06, "loss": 0.4863, "step": 11580, "task_loss": 0.5822548866271973 }, { "compression_loss": 0.0, "distillation_loss": 0.44271165132522583, "epoch": 4.19, "learning_rate": 3.834486870868585e-06, "loss": 0.4002, "step": 11590, "task_loss": 0.501520574092865 }, { "compression_loss": 0.0, "distillation_loss": 0.40002530813217163, "epoch": 4.19, "learning_rate": 3.8012301992699906e-06, "loss": 0.4177, "step": 11600, "task_loss": 0.6586291790008545 }, { "compression_loss": 0.0, "distillation_loss": 0.3801167607307434, "epoch": 4.2, "learning_rate": 3.768108617338265e-06, "loss": 0.4417, "step": 11610, "task_loss": 0.7325814962387085 }, { "compression_loss": 0.0, "distillation_loss": 0.4070740342140198, "epoch": 4.2, "learning_rate": 3.735122295859431e-06, "loss": 0.4995, "step": 11620, "task_loss": 0.35257917642593384 }, { "compression_loss": 0.0, "distillation_loss": 0.4214155673980713, "epoch": 4.2, "learning_rate": 3.7022714049220387e-06, "loss": 0.4334, "step": 11630, "task_loss": 0.5078192949295044 }, { "compression_loss": 0.0, "distillation_loss": 0.444728285074234, "epoch": 4.21, "learning_rate": 3.669556113916349e-06, "loss": 0.4291, "step": 11640, "task_loss": 0.6134557127952576 }, { "compression_loss": 0.0, "distillation_loss": 0.6096001863479614, "epoch": 4.21, "learning_rate": 3.6369765915333876e-06, "loss": 0.4695, "step": 11650, "task_loss": 0.7519853115081787 }, { "compression_loss": 0.0, "distillation_loss": 0.4547508955001831, "epoch": 4.21, "learning_rate": 3.6045330057641344e-06, "loss": 0.4402, "step": 11660, "task_loss": 0.5918570756912231 }, { "compression_loss": 0.0, "distillation_loss": 0.5541609525680542, "epoch": 4.22, "learning_rate": 3.5722255238986255e-06, "loss": 0.464, "step": 11670, "task_loss": 0.573555588722229 }, { "compression_loss": 0.0, "distillation_loss": 0.3390219807624817, "epoch": 4.22, "learning_rate": 3.5400543125251037e-06, "loss": 0.4442, "step": 11680, "task_loss": 0.5026131272315979 }, { "compression_loss": 0.0, "distillation_loss": 0.34992164373397827, "epoch": 4.22, "learning_rate": 3.5112168706785487e-06, "loss": 0.4619, "step": 11690, "task_loss": 0.9039815664291382 }, { "compression_loss": 0.0, "distillation_loss": 0.42350032925605774, "epoch": 4.23, "learning_rate": 3.4793050296731365e-06, "loss": 0.4914, "step": 11700, "task_loss": 0.5576711893081665 }, { "compression_loss": 0.0, "distillation_loss": 0.432574987411499, "epoch": 4.23, "learning_rate": 3.447529938289038e-06, "loss": 0.455, "step": 11710, "task_loss": 0.30313971638679504 }, { "compression_loss": 0.0, "distillation_loss": 0.3797840476036072, "epoch": 4.24, "learning_rate": 3.4158917603692885e-06, "loss": 0.4161, "step": 11720, "task_loss": 0.35903841257095337 }, { "compression_loss": 0.0, "distillation_loss": 0.405576229095459, "epoch": 4.24, "learning_rate": 3.384390659050979e-06, "loss": 0.4451, "step": 11730, "task_loss": 0.584861695766449 }, { "compression_loss": 0.0, "distillation_loss": 0.51385897397995, "epoch": 4.24, "learning_rate": 3.353026796764378e-06, "loss": 0.4533, "step": 11740, "task_loss": 0.6303707361221313 }, { "compression_loss": 0.0, "distillation_loss": 0.4929048418998718, "epoch": 4.25, "learning_rate": 3.321800335232118e-06, "loss": 0.5089, "step": 11750, "task_loss": 0.545328676700592 }, { "epoch": 4.25, "eval_exact_match": 82.50709555345317, "eval_f1": 89.55435656315157, "step": 11750 }, { "compression_loss": 0.0, "distillation_loss": 0.3543594181537628, "epoch": 4.25, "learning_rate": 3.2907114354683175e-06, "loss": 0.4477, "step": 11760, "task_loss": 0.4058670699596405 }, { "compression_loss": 0.0, "distillation_loss": 0.6473177075386047, "epoch": 4.25, "learning_rate": 3.2597602577778065e-06, "loss": 0.4899, "step": 11770, "task_loss": 0.8452356457710266 }, { "compression_loss": 0.0, "distillation_loss": 0.48143479228019714, "epoch": 4.26, "learning_rate": 3.2289469617552613e-06, "loss": 0.4521, "step": 11780, "task_loss": 0.6364303231239319 }, { "compression_loss": 0.0, "distillation_loss": 0.3805115818977356, "epoch": 4.26, "learning_rate": 3.198271706284409e-06, "loss": 0.4724, "step": 11790, "task_loss": 0.5545732975006104 }, { "compression_loss": 0.0, "distillation_loss": 0.42327409982681274, "epoch": 4.26, "learning_rate": 3.1677346495371616e-06, "loss": 0.4401, "step": 11800, "task_loss": 0.3729352653026581 }, { "compression_loss": 0.0, "distillation_loss": 0.49267518520355225, "epoch": 4.27, "learning_rate": 3.1373359489728783e-06, "loss": 0.4854, "step": 11810, "task_loss": 0.7921924591064453 }, { "compression_loss": 0.0, "distillation_loss": 0.4918190538883209, "epoch": 4.27, "learning_rate": 3.107075761337458e-06, "loss": 0.4366, "step": 11820, "task_loss": 0.6186762452125549 }, { "compression_loss": 0.0, "distillation_loss": 0.3143673539161682, "epoch": 4.28, "learning_rate": 3.076954242662615e-06, "loss": 0.4444, "step": 11830, "task_loss": 0.41375160217285156 }, { "compression_loss": 0.0, "distillation_loss": 0.4199334979057312, "epoch": 4.28, "learning_rate": 3.0469715482650264e-06, "loss": 0.4666, "step": 11840, "task_loss": 0.3213299512863159 }, { "compression_loss": 0.0, "distillation_loss": 0.45714980363845825, "epoch": 4.28, "learning_rate": 3.01712783274555e-06, "loss": 0.4897, "step": 11850, "task_loss": 0.6414152383804321 }, { "compression_loss": 0.0, "distillation_loss": 0.4583025276660919, "epoch": 4.29, "learning_rate": 2.987423249988411e-06, "loss": 0.4416, "step": 11860, "task_loss": 0.6744894981384277 }, { "compression_loss": 0.0, "distillation_loss": 0.4339994788169861, "epoch": 4.29, "learning_rate": 2.9578579531604335e-06, "loss": 0.5102, "step": 11870, "task_loss": 0.7543153762817383 }, { "compression_loss": 0.0, "distillation_loss": 0.5392136573791504, "epoch": 4.29, "learning_rate": 2.9284320947102227e-06, "loss": 0.4107, "step": 11880, "task_loss": 0.8216215372085571 }, { "compression_loss": 0.0, "distillation_loss": 0.3788997232913971, "epoch": 4.3, "learning_rate": 2.899145826367412e-06, "loss": 0.4469, "step": 11890, "task_loss": 1.0387654304504395 }, { "compression_loss": 0.0, "distillation_loss": 0.596594512462616, "epoch": 4.3, "learning_rate": 2.869999299141829e-06, "loss": 0.4655, "step": 11900, "task_loss": 0.8393262624740601 }, { "compression_loss": 0.0, "distillation_loss": 0.5400757789611816, "epoch": 4.3, "learning_rate": 2.8409926633227947e-06, "loss": 0.4444, "step": 11910, "task_loss": 0.5543546676635742 }, { "compression_loss": 0.0, "distillation_loss": 0.3381040394306183, "epoch": 4.31, "learning_rate": 2.8121260684782567e-06, "loss": 0.4609, "step": 11920, "task_loss": 0.7791619300842285 }, { "compression_loss": 0.0, "distillation_loss": 0.4478145241737366, "epoch": 4.31, "learning_rate": 2.7833996634540914e-06, "loss": 0.4413, "step": 11930, "task_loss": 0.5504406094551086 }, { "compression_loss": 0.0, "distillation_loss": 0.3867071866989136, "epoch": 4.32, "learning_rate": 2.7548135963733057e-06, "loss": 0.4504, "step": 11940, "task_loss": 0.35446274280548096 }, { "compression_loss": 0.0, "distillation_loss": 0.4211112856864929, "epoch": 4.32, "learning_rate": 2.726368014635275e-06, "loss": 0.3832, "step": 11950, "task_loss": 0.7330440282821655 }, { "compression_loss": 0.0, "distillation_loss": 0.450991690158844, "epoch": 4.32, "learning_rate": 2.6980630649149797e-06, "loss": 0.4979, "step": 11960, "task_loss": 0.9241546392440796 }, { "compression_loss": 0.0, "distillation_loss": 0.427333265542984, "epoch": 4.33, "learning_rate": 2.669898893162257e-06, "loss": 0.4731, "step": 11970, "task_loss": 0.7334015369415283 }, { "compression_loss": 0.0, "distillation_loss": 0.6288331747055054, "epoch": 4.33, "learning_rate": 2.641875644601047e-06, "loss": 0.4594, "step": 11980, "task_loss": 0.7425243854522705 }, { "compression_loss": 0.0, "distillation_loss": 0.41228044033050537, "epoch": 4.33, "learning_rate": 2.6139934637286546e-06, "loss": 0.4227, "step": 11990, "task_loss": 0.4954274892807007 }, { "compression_loss": 0.0, "distillation_loss": 0.39298591017723083, "epoch": 4.34, "learning_rate": 2.586252494314961e-06, "loss": 0.461, "step": 12000, "task_loss": 0.3948507010936737 }, { "epoch": 4.34, "eval_exact_match": 82.52601702932829, "eval_f1": 89.65617956341971, "step": 12000 }, { "compression_loss": 0.0, "distillation_loss": 0.38348010182380676, "epoch": 4.34, "learning_rate": 2.558652879401753e-06, "loss": 0.4474, "step": 12010, "task_loss": 0.36755043268203735 }, { "compression_loss": 0.0, "distillation_loss": 0.44406384229660034, "epoch": 4.34, "learning_rate": 2.531194761301907e-06, "loss": 0.3773, "step": 12020, "task_loss": 0.4231364130973816 }, { "compression_loss": 0.0, "distillation_loss": 0.3030942678451538, "epoch": 4.35, "learning_rate": 2.503878281598726e-06, "loss": 0.4116, "step": 12030, "task_loss": 0.23304599523544312 }, { "compression_loss": 0.0, "distillation_loss": 0.42190343141555786, "epoch": 4.35, "learning_rate": 2.476703581145162e-06, "loss": 0.4454, "step": 12040, "task_loss": 0.5864576101303101 }, { "compression_loss": 0.0, "distillation_loss": 0.3552584648132324, "epoch": 4.35, "learning_rate": 2.4496708000631094e-06, "loss": 0.4174, "step": 12050, "task_loss": 0.28264883160591125 }, { "compression_loss": 0.0, "distillation_loss": 0.3596591651439667, "epoch": 4.36, "learning_rate": 2.4227800777426746e-06, "loss": 0.3788, "step": 12060, "task_loss": 0.5582944750785828 }, { "compression_loss": 0.0, "distillation_loss": 0.4927133619785309, "epoch": 4.36, "learning_rate": 2.396031552841462e-06, "loss": 0.4512, "step": 12070, "task_loss": 0.9744982123374939 }, { "compression_loss": 0.0, "distillation_loss": 0.4307158589363098, "epoch": 4.37, "learning_rate": 2.369425363283865e-06, "loss": 0.4468, "step": 12080, "task_loss": 0.7413559556007385 }, { "compression_loss": 0.0, "distillation_loss": 0.5698738098144531, "epoch": 4.37, "learning_rate": 2.3429616462603477e-06, "loss": 0.4575, "step": 12090, "task_loss": 0.5306029915809631 }, { "compression_loss": 0.0, "distillation_loss": 0.44272828102111816, "epoch": 4.37, "learning_rate": 2.316640538226721e-06, "loss": 0.4357, "step": 12100, "task_loss": 0.6407321691513062 }, { "compression_loss": 0.0, "distillation_loss": 0.3783756196498871, "epoch": 4.38, "learning_rate": 2.290462174903486e-06, "loss": 0.4917, "step": 12110, "task_loss": 0.6652238368988037 }, { "compression_loss": 0.0, "distillation_loss": 0.4510440230369568, "epoch": 4.38, "learning_rate": 2.2644266912750733e-06, "loss": 0.4379, "step": 12120, "task_loss": 0.5491682291030884 }, { "compression_loss": 0.0, "distillation_loss": 0.6077476739883423, "epoch": 4.38, "learning_rate": 2.238534221589196e-06, "loss": 0.4289, "step": 12130, "task_loss": 0.8977972269058228 }, { "compression_loss": 0.0, "distillation_loss": 0.5110172033309937, "epoch": 4.39, "learning_rate": 2.212784899356136e-06, "loss": 0.4378, "step": 12140, "task_loss": 0.40151098370552063 }, { "compression_loss": 0.0, "distillation_loss": 0.4892664849758148, "epoch": 4.39, "learning_rate": 2.187178857348061e-06, "loss": 0.4869, "step": 12150, "task_loss": 0.7194719314575195 }, { "compression_loss": 0.0, "distillation_loss": 0.3257119953632355, "epoch": 4.39, "learning_rate": 2.1617162275983217e-06, "loss": 0.3906, "step": 12160, "task_loss": 0.4725964069366455 }, { "compression_loss": 0.0, "distillation_loss": 0.46231818199157715, "epoch": 4.4, "learning_rate": 2.1363971414008097e-06, "loss": 0.4395, "step": 12170, "task_loss": 0.3184768557548523 }, { "compression_loss": 0.0, "distillation_loss": 0.6130329966545105, "epoch": 4.4, "learning_rate": 2.1112217293092405e-06, "loss": 0.4499, "step": 12180, "task_loss": 0.6699998378753662 }, { "compression_loss": 0.0, "distillation_loss": 0.48698151111602783, "epoch": 4.41, "learning_rate": 2.0861901211365177e-06, "loss": 0.431, "step": 12190, "task_loss": 0.5479468703269958 }, { "compression_loss": 0.0, "distillation_loss": 0.6351044178009033, "epoch": 4.41, "learning_rate": 2.0613024459540076e-06, "loss": 0.4908, "step": 12200, "task_loss": 0.5488524436950684 }, { "compression_loss": 0.0, "distillation_loss": 0.47335565090179443, "epoch": 4.41, "learning_rate": 2.0365588320909576e-06, "loss": 0.4721, "step": 12210, "task_loss": 0.5685126781463623 }, { "compression_loss": 0.0, "distillation_loss": 0.36355626583099365, "epoch": 4.42, "learning_rate": 2.0119594071337433e-06, "loss": 0.3822, "step": 12220, "task_loss": 0.5155311822891235 }, { "compression_loss": 0.0, "distillation_loss": 0.5141717195510864, "epoch": 4.42, "learning_rate": 1.98750429792528e-06, "loss": 0.4493, "step": 12230, "task_loss": 0.6456845998764038 }, { "compression_loss": 0.0, "distillation_loss": 0.3986155092716217, "epoch": 4.42, "learning_rate": 1.9631936305643294e-06, "loss": 0.4575, "step": 12240, "task_loss": 0.7241479158401489 }, { "compression_loss": 0.0, "distillation_loss": 0.4736446142196655, "epoch": 4.43, "learning_rate": 1.9390275304048755e-06, "loss": 0.4109, "step": 12250, "task_loss": 0.35844236612319946 }, { "epoch": 4.43, "eval_exact_match": 82.72469252601702, "eval_f1": 89.86566403216932, "step": 12250 }, { "compression_loss": 0.0, "distillation_loss": 0.46747684478759766, "epoch": 4.43, "learning_rate": 1.915006122055445e-06, "loss": 0.4611, "step": 12260, "task_loss": 0.9917988777160645 }, { "compression_loss": 0.0, "distillation_loss": 0.31288278102874756, "epoch": 4.43, "learning_rate": 1.891129529378508e-06, "loss": 0.4293, "step": 12270, "task_loss": 0.5523203015327454 }, { "compression_loss": 0.0, "distillation_loss": 0.4253838360309601, "epoch": 4.44, "learning_rate": 1.867397875489799e-06, "loss": 0.4312, "step": 12280, "task_loss": 0.4688680171966553 }, { "compression_loss": 0.0, "distillation_loss": 0.3778352737426758, "epoch": 4.44, "learning_rate": 1.8438112827577068e-06, "loss": 0.4365, "step": 12290, "task_loss": 0.704608142375946 }, { "compression_loss": 0.0, "distillation_loss": 0.43521445989608765, "epoch": 4.45, "learning_rate": 1.8203698728026386e-06, "loss": 0.4443, "step": 12300, "task_loss": 0.73112952709198 }, { "compression_loss": 0.0, "distillation_loss": 0.3996022641658783, "epoch": 4.45, "learning_rate": 1.7970737664963832e-06, "loss": 0.4243, "step": 12310, "task_loss": 0.5031775832176208 }, { "compression_loss": 0.0, "distillation_loss": 0.47173169255256653, "epoch": 4.45, "learning_rate": 1.7739230839614962e-06, "loss": 0.4668, "step": 12320, "task_loss": 0.6459155082702637 }, { "compression_loss": 0.0, "distillation_loss": 0.38863933086395264, "epoch": 4.46, "learning_rate": 1.7509179445706858e-06, "loss": 0.4154, "step": 12330, "task_loss": 0.5454069375991821 }, { "compression_loss": 0.0, "distillation_loss": 0.5435532331466675, "epoch": 4.46, "learning_rate": 1.7280584669461808e-06, "loss": 0.4891, "step": 12340, "task_loss": 0.40724098682403564 }, { "compression_loss": 0.0, "distillation_loss": 0.44464439153671265, "epoch": 4.46, "learning_rate": 1.7053447689591473e-06, "loss": 0.4487, "step": 12350, "task_loss": 0.31382906436920166 }, { "compression_loss": 0.0, "distillation_loss": 0.3412976861000061, "epoch": 4.47, "learning_rate": 1.6827769677290294e-06, "loss": 0.4525, "step": 12360, "task_loss": 0.5036389827728271 }, { "compression_loss": 0.0, "distillation_loss": 0.3674119710922241, "epoch": 4.47, "learning_rate": 1.6603551796230232e-06, "loss": 0.4865, "step": 12370, "task_loss": 0.5364322662353516 }, { "compression_loss": 0.0, "distillation_loss": 0.3727283477783203, "epoch": 4.47, "learning_rate": 1.6380795202553866e-06, "loss": 0.4124, "step": 12380, "task_loss": 0.6606693863868713 }, { "compression_loss": 0.0, "distillation_loss": 0.5182126760482788, "epoch": 4.48, "learning_rate": 1.615950104486924e-06, "loss": 0.4459, "step": 12390, "task_loss": 0.6920464038848877 }, { "compression_loss": 0.0, "distillation_loss": 0.4384608864784241, "epoch": 4.48, "learning_rate": 1.5939670464243362e-06, "loss": 0.486, "step": 12400, "task_loss": 0.5016846656799316 }, { "compression_loss": 0.0, "distillation_loss": 0.3930404484272003, "epoch": 4.49, "learning_rate": 1.572130459419674e-06, "loss": 0.4514, "step": 12410, "task_loss": 0.5308805704116821 }, { "compression_loss": 0.0, "distillation_loss": 0.5489310622215271, "epoch": 4.49, "learning_rate": 1.5504404560697093e-06, "loss": 0.4661, "step": 12420, "task_loss": 0.7806792259216309 }, { "compression_loss": 0.0, "distillation_loss": 0.6114611625671387, "epoch": 4.49, "learning_rate": 1.5288971482153957e-06, "loss": 0.4664, "step": 12430, "task_loss": 0.9652453660964966 }, { "compression_loss": 0.0, "distillation_loss": 0.32875922322273254, "epoch": 4.5, "learning_rate": 1.5075006469412778e-06, "loss": 0.4655, "step": 12440, "task_loss": 0.2918616533279419 }, { "compression_loss": 0.0, "distillation_loss": 0.3842153549194336, "epoch": 4.5, "learning_rate": 1.486251062574916e-06, "loss": 0.4458, "step": 12450, "task_loss": 0.4864546060562134 }, { "compression_loss": 0.0, "distillation_loss": 0.5039974451065063, "epoch": 4.5, "learning_rate": 1.4651485046862933e-06, "loss": 0.4366, "step": 12460, "task_loss": 0.816196858882904 }, { "compression_loss": 0.0, "distillation_loss": 0.3842373192310333, "epoch": 4.51, "learning_rate": 1.4441930820873195e-06, "loss": 0.4098, "step": 12470, "task_loss": 0.7222305536270142 }, { "compression_loss": 0.0, "distillation_loss": 0.5461684465408325, "epoch": 4.51, "learning_rate": 1.4233849028311808e-06, "loss": 0.491, "step": 12480, "task_loss": 0.6540833711624146 }, { "compression_loss": 0.0, "distillation_loss": 0.38882261514663696, "epoch": 4.51, "learning_rate": 1.4027240742118542e-06, "loss": 0.4521, "step": 12490, "task_loss": 1.0828897953033447 }, { "compression_loss": 0.0, "distillation_loss": 0.3728446364402771, "epoch": 4.52, "learning_rate": 1.3822107027635178e-06, "loss": 0.4369, "step": 12500, "task_loss": 0.6404829025268555 }, { "epoch": 4.52, "eval_exact_match": 82.67738883632923, "eval_f1": 89.75424790980458, "step": 12500 }, { "compression_loss": 0.0, "distillation_loss": 0.8121786713600159, "epoch": 4.52, "learning_rate": 1.3618448942600182e-06, "loss": 0.4755, "step": 12510, "task_loss": 0.6036579012870789 }, { "compression_loss": 0.0, "distillation_loss": 0.3688916862010956, "epoch": 4.52, "learning_rate": 1.3416267537143035e-06, "loss": 0.4518, "step": 12520, "task_loss": 0.3554149866104126 }, { "compression_loss": 0.0, "distillation_loss": 0.5656636953353882, "epoch": 4.53, "learning_rate": 1.3215563853779112e-06, "loss": 0.4684, "step": 12530, "task_loss": 0.6867916584014893 }, { "compression_loss": 0.0, "distillation_loss": 0.3812546730041504, "epoch": 4.53, "learning_rate": 1.3016338927404047e-06, "loss": 0.4268, "step": 12540, "task_loss": 0.5564362406730652 }, { "compression_loss": 0.0, "distillation_loss": 0.3769798278808594, "epoch": 4.54, "learning_rate": 1.2818593785288645e-06, "loss": 0.4809, "step": 12550, "task_loss": 0.5600254535675049 }, { "compression_loss": 0.0, "distillation_loss": 0.5981583595275879, "epoch": 4.54, "learning_rate": 1.262232944707321e-06, "loss": 0.4021, "step": 12560, "task_loss": 0.5983515977859497 }, { "compression_loss": 0.0, "distillation_loss": 0.35735106468200684, "epoch": 4.54, "learning_rate": 1.2427546924762823e-06, "loss": 0.3927, "step": 12570, "task_loss": 0.3935161828994751 }, { "compression_loss": 0.0, "distillation_loss": 0.4289091229438782, "epoch": 4.55, "learning_rate": 1.2234247222721573e-06, "loss": 0.4543, "step": 12580, "task_loss": 0.548987090587616 }, { "compression_loss": 0.0, "distillation_loss": 0.42020517587661743, "epoch": 4.55, "learning_rate": 1.2042431337667704e-06, "loss": 0.4656, "step": 12590, "task_loss": 0.9573279619216919 }, { "compression_loss": 0.0, "distillation_loss": 0.43121659755706787, "epoch": 4.55, "learning_rate": 1.1852100258668507e-06, "loss": 0.4789, "step": 12600, "task_loss": 0.45250964164733887 }, { "compression_loss": 0.0, "distillation_loss": 0.49477794766426086, "epoch": 4.56, "learning_rate": 1.1663254967134973e-06, "loss": 0.4642, "step": 12610, "task_loss": 1.1415183544158936 }, { "compression_loss": 0.0, "distillation_loss": 0.38228267431259155, "epoch": 4.56, "learning_rate": 1.1475896436816947e-06, "loss": 0.4493, "step": 12620, "task_loss": 0.3687947392463684 }, { "compression_loss": 0.0, "distillation_loss": 0.400796115398407, "epoch": 4.56, "learning_rate": 1.1290025633797973e-06, "loss": 0.4358, "step": 12630, "task_loss": 0.3746778964996338 }, { "compression_loss": 0.0, "distillation_loss": 0.3107837438583374, "epoch": 4.57, "learning_rate": 1.1105643516490438e-06, "loss": 0.4588, "step": 12640, "task_loss": 0.5554065108299255 }, { "compression_loss": 0.0, "distillation_loss": 0.4274769425392151, "epoch": 4.57, "learning_rate": 1.0922751035630595e-06, "loss": 0.4397, "step": 12650, "task_loss": 0.5595571994781494 }, { "compression_loss": 0.0, "distillation_loss": 0.5575343370437622, "epoch": 4.58, "learning_rate": 1.0741349134273448e-06, "loss": 0.4558, "step": 12660, "task_loss": 0.8480510711669922 }, { "compression_loss": 0.0, "distillation_loss": 0.4240317940711975, "epoch": 4.58, "learning_rate": 1.0561438747788377e-06, "loss": 0.4562, "step": 12670, "task_loss": 0.5816398859024048 }, { "compression_loss": 0.0, "distillation_loss": 0.4905956983566284, "epoch": 4.58, "learning_rate": 1.0383020803853682e-06, "loss": 0.4599, "step": 12680, "task_loss": 0.7364996671676636 }, { "compression_loss": 0.0, "distillation_loss": 0.5033512115478516, "epoch": 4.59, "learning_rate": 1.0206096222452321e-06, "loss": 0.4555, "step": 12690, "task_loss": 0.5982179641723633 }, { "compression_loss": 0.0, "distillation_loss": 0.3769073486328125, "epoch": 4.59, "learning_rate": 1.0030665915866944e-06, "loss": 0.4675, "step": 12700, "task_loss": 0.6038902997970581 }, { "compression_loss": 0.0, "distillation_loss": 0.5291876196861267, "epoch": 4.59, "learning_rate": 9.856730788675228e-07, "loss": 0.4573, "step": 12710, "task_loss": 0.5451856851577759 }, { "compression_loss": 0.0, "distillation_loss": 0.3201839327812195, "epoch": 4.6, "learning_rate": 9.68429173774512e-07, "loss": 0.4138, "step": 12720, "task_loss": 0.4877792000770569 }, { "compression_loss": 0.0, "distillation_loss": 0.4193733334541321, "epoch": 4.6, "learning_rate": 9.513349652230407e-07, "loss": 0.4541, "step": 12730, "task_loss": 0.6895813345909119 }, { "compression_loss": 0.0, "distillation_loss": 0.3674892783164978, "epoch": 4.6, "learning_rate": 9.343905413565878e-07, "loss": 0.4354, "step": 12740, "task_loss": 0.5178918838500977 }, { "compression_loss": 0.0, "distillation_loss": 0.49814528226852417, "epoch": 4.61, "learning_rate": 9.175959895463138e-07, "loss": 0.4446, "step": 12750, "task_loss": 1.101963996887207 }, { "epoch": 4.61, "eval_exact_match": 82.61116367076632, "eval_f1": 89.66291770374835, "step": 12750 }, { "compression_loss": 0.0, "distillation_loss": 0.4390224516391754, "epoch": 4.61, "learning_rate": 9.009513963905602e-07, "loss": 0.435, "step": 12760, "task_loss": 0.6204954385757446 }, { "compression_loss": 0.0, "distillation_loss": 0.35951822996139526, "epoch": 4.62, "learning_rate": 8.844568477144644e-07, "loss": 0.4543, "step": 12770, "task_loss": 0.5188806056976318 }, { "compression_loss": 0.0, "distillation_loss": 0.5767937898635864, "epoch": 4.62, "learning_rate": 8.681124285694486e-07, "loss": 0.442, "step": 12780, "task_loss": 0.7311397790908813 }, { "compression_loss": 0.0, "distillation_loss": 0.4548581838607788, "epoch": 4.62, "learning_rate": 8.519182232328415e-07, "loss": 0.4579, "step": 12790, "task_loss": 0.5679984092712402 }, { "compression_loss": 0.0, "distillation_loss": 0.5455349087715149, "epoch": 4.63, "learning_rate": 8.358743152074111e-07, "loss": 0.451, "step": 12800, "task_loss": 0.6041518449783325 }, { "compression_loss": 0.0, "distillation_loss": 0.33094823360443115, "epoch": 4.63, "learning_rate": 8.199807872209452e-07, "loss": 0.39, "step": 12810, "task_loss": 0.2264111191034317 }, { "compression_loss": 0.0, "distillation_loss": 0.38882723450660706, "epoch": 4.63, "learning_rate": 8.042377212258123e-07, "loss": 0.4509, "step": 12820, "task_loss": 0.685081958770752 }, { "compression_loss": 0.0, "distillation_loss": 0.5015546083450317, "epoch": 4.64, "learning_rate": 7.886451983985576e-07, "loss": 0.4549, "step": 12830, "task_loss": 0.4656003713607788 }, { "compression_loss": 0.0, "distillation_loss": 0.5418596267700195, "epoch": 4.64, "learning_rate": 7.73203299139471e-07, "loss": 0.491, "step": 12840, "task_loss": 0.481580913066864 }, { "compression_loss": 0.0, "distillation_loss": 0.6147862672805786, "epoch": 4.64, "learning_rate": 7.579121030721837e-07, "loss": 0.4705, "step": 12850, "task_loss": 0.7846791744232178 }, { "compression_loss": 0.0, "distillation_loss": 0.39587533473968506, "epoch": 4.65, "learning_rate": 7.427716890432346e-07, "loss": 0.464, "step": 12860, "task_loss": 0.40945491194725037 }, { "compression_loss": 0.0, "distillation_loss": 0.3885082006454468, "epoch": 4.65, "learning_rate": 7.277821351216984e-07, "loss": 0.4694, "step": 12870, "task_loss": 0.507175087928772 }, { "compression_loss": 0.0, "distillation_loss": 0.4322596788406372, "epoch": 4.65, "learning_rate": 7.129435185987487e-07, "loss": 0.4253, "step": 12880, "task_loss": 0.6645572185516357 }, { "compression_loss": 0.0, "distillation_loss": 0.4641607999801636, "epoch": 4.66, "learning_rate": 6.982559159872881e-07, "loss": 0.435, "step": 12890, "task_loss": 0.5848242044448853 }, { "compression_loss": 0.0, "distillation_loss": 0.42792952060699463, "epoch": 4.66, "learning_rate": 6.837194030215288e-07, "loss": 0.4249, "step": 12900, "task_loss": 0.37192124128341675 }, { "compression_loss": 0.0, "distillation_loss": 0.36941879987716675, "epoch": 4.67, "learning_rate": 6.693340546566263e-07, "loss": 0.4517, "step": 12910, "task_loss": 0.48606473207473755 }, { "compression_loss": 0.0, "distillation_loss": 0.5151842832565308, "epoch": 4.67, "learning_rate": 6.550999450682693e-07, "loss": 0.474, "step": 12920, "task_loss": 0.6494313478469849 }, { "compression_loss": 0.0, "distillation_loss": 0.41002902388572693, "epoch": 4.67, "learning_rate": 6.410171476523141e-07, "loss": 0.4543, "step": 12930, "task_loss": 0.6349010467529297 }, { "compression_loss": 0.0, "distillation_loss": 0.4869542717933655, "epoch": 4.68, "learning_rate": 6.270857350243974e-07, "loss": 0.4688, "step": 12940, "task_loss": 0.5892748832702637 }, { "compression_loss": 0.0, "distillation_loss": 0.4189644455909729, "epoch": 4.68, "learning_rate": 6.133057790195773e-07, "loss": 0.4564, "step": 12950, "task_loss": 0.7822315096855164 }, { "compression_loss": 0.0, "distillation_loss": 0.5754233598709106, "epoch": 4.68, "learning_rate": 5.996773506919262e-07, "loss": 0.457, "step": 12960, "task_loss": 0.5739059448242188 }, { "compression_loss": 0.0, "distillation_loss": 0.5854544639587402, "epoch": 4.69, "learning_rate": 5.862005203142151e-07, "loss": 0.4339, "step": 12970, "task_loss": 0.8967189788818359 }, { "compression_loss": 0.0, "distillation_loss": 0.4532425105571747, "epoch": 4.69, "learning_rate": 5.728753573775069e-07, "loss": 0.449, "step": 12980, "task_loss": 0.6138538122177124 }, { "compression_loss": 0.0, "distillation_loss": 0.3805669844150543, "epoch": 4.69, "learning_rate": 5.597019305908235e-07, "loss": 0.4256, "step": 12990, "task_loss": 0.49195724725723267 }, { "compression_loss": 0.0, "distillation_loss": 0.5049669742584229, "epoch": 4.7, "learning_rate": 5.466803078807859e-07, "loss": 0.4906, "step": 13000, "task_loss": 0.5219841003417969 }, { "epoch": 4.7, "eval_exact_match": 82.639545884579, "eval_f1": 89.75210989680092, "step": 13000 }, { "compression_loss": 0.0, "distillation_loss": 0.327298104763031, "epoch": 4.7, "learning_rate": 5.33810556391261e-07, "loss": 0.461, "step": 13010, "task_loss": 0.7025531530380249 }, { "compression_loss": 0.0, "distillation_loss": 0.4368177652359009, "epoch": 4.71, "learning_rate": 5.210927424830092e-07, "loss": 0.4248, "step": 13020, "task_loss": 0.5124354958534241 }, { "compression_loss": 0.0, "distillation_loss": 0.3765318989753723, "epoch": 4.71, "learning_rate": 5.085269317333574e-07, "loss": 0.4322, "step": 13030, "task_loss": 0.44112464785575867 }, { "compression_loss": 0.0, "distillation_loss": 0.41373926401138306, "epoch": 4.71, "learning_rate": 4.961131889358528e-07, "loss": 0.4575, "step": 13040, "task_loss": 1.0655057430267334 }, { "compression_loss": 0.0, "distillation_loss": 0.42176538705825806, "epoch": 4.72, "learning_rate": 4.838515780999264e-07, "loss": 0.4261, "step": 13050, "task_loss": 0.6866442561149597 }, { "compression_loss": 0.0, "distillation_loss": 0.4533381462097168, "epoch": 4.72, "learning_rate": 4.717421624505669e-07, "loss": 0.5023, "step": 13060, "task_loss": 0.5832757353782654 }, { "compression_loss": 0.0, "distillation_loss": 0.5116744041442871, "epoch": 4.72, "learning_rate": 4.597850044279972e-07, "loss": 0.4487, "step": 13070, "task_loss": 0.9590458869934082 }, { "compression_loss": 0.0, "distillation_loss": 0.48838239908218384, "epoch": 4.73, "learning_rate": 4.4798016568733837e-07, "loss": 0.43, "step": 13080, "task_loss": 0.8301137089729309 }, { "compression_loss": 0.0, "distillation_loss": 0.4273003339767456, "epoch": 4.73, "learning_rate": 4.3632770709831293e-07, "loss": 0.4554, "step": 13090, "task_loss": 0.5610930919647217 }, { "compression_loss": 0.0, "distillation_loss": 0.47295600175857544, "epoch": 4.73, "learning_rate": 4.248276887449154e-07, "loss": 0.4865, "step": 13100, "task_loss": 0.6153036952018738 }, { "compression_loss": 0.0, "distillation_loss": 0.47993203997612, "epoch": 4.74, "learning_rate": 4.1348016992510895e-07, "loss": 0.4574, "step": 13110, "task_loss": 0.5558942556381226 }, { "compression_loss": 0.0, "distillation_loss": 0.39272961020469666, "epoch": 4.74, "learning_rate": 4.0228520915050915e-07, "loss": 0.4293, "step": 13120, "task_loss": 0.4004030227661133 }, { "compression_loss": 0.0, "distillation_loss": 0.6470414400100708, "epoch": 4.75, "learning_rate": 3.912428641461041e-07, "loss": 0.4491, "step": 13130, "task_loss": 0.7483572959899902 }, { "compression_loss": 0.0, "distillation_loss": 0.3939260244369507, "epoch": 4.75, "learning_rate": 3.8035319184993813e-07, "loss": 0.4283, "step": 13140, "task_loss": 0.3680633306503296 }, { "compression_loss": 0.0, "distillation_loss": 0.558957576751709, "epoch": 4.75, "learning_rate": 3.6961624841282516e-07, "loss": 0.4946, "step": 13150, "task_loss": 0.9486407041549683 }, { "compression_loss": 0.0, "distillation_loss": 0.4826211929321289, "epoch": 4.76, "learning_rate": 3.590320891980492e-07, "loss": 0.435, "step": 13160, "task_loss": 0.8779466152191162 }, { "compression_loss": 0.0, "distillation_loss": 0.42434561252593994, "epoch": 4.76, "learning_rate": 3.4860076878110103e-07, "loss": 0.4788, "step": 13170, "task_loss": 0.6615882515907288 }, { "compression_loss": 0.0, "distillation_loss": 0.4156636595726013, "epoch": 4.76, "learning_rate": 3.383223409493719e-07, "loss": 0.4392, "step": 13180, "task_loss": 0.5900173187255859 }, { "compression_loss": 0.0, "distillation_loss": 0.39588701725006104, "epoch": 4.77, "learning_rate": 3.281968587018902e-07, "loss": 0.4215, "step": 13190, "task_loss": 0.5002977848052979 }, { "compression_loss": 0.0, "distillation_loss": 0.4153065085411072, "epoch": 4.77, "learning_rate": 3.1822437424905536e-07, "loss": 0.4643, "step": 13200, "task_loss": 0.506074845790863 }, { "compression_loss": 0.0, "distillation_loss": 0.4411586821079254, "epoch": 4.77, "learning_rate": 3.084049390123478e-07, "loss": 0.4208, "step": 13210, "task_loss": 0.49065306782722473 }, { "compression_loss": 0.0, "distillation_loss": 0.5167171359062195, "epoch": 4.78, "learning_rate": 2.9873860362407244e-07, "loss": 0.459, "step": 13220, "task_loss": 0.6649248003959656 }, { "compression_loss": 0.0, "distillation_loss": 0.3746606707572937, "epoch": 4.78, "learning_rate": 2.892254179271059e-07, "loss": 0.4396, "step": 13230, "task_loss": 0.3278374671936035 }, { "compression_loss": 0.0, "distillation_loss": 0.482988178730011, "epoch": 4.78, "learning_rate": 2.798654309746396e-07, "loss": 0.4777, "step": 13240, "task_loss": 0.7824634313583374 }, { "compression_loss": 0.0, "distillation_loss": 0.3645211458206177, "epoch": 4.79, "learning_rate": 2.706586910299069e-07, "loss": 0.423, "step": 13250, "task_loss": 0.4886607527732849 }, { "epoch": 4.79, "eval_exact_match": 82.56385998107852, "eval_f1": 89.66224189133082, "step": 13250 }, { "compression_loss": 0.0, "distillation_loss": 0.4217372536659241, "epoch": 4.79, "learning_rate": 2.616052455659568e-07, "loss": 0.4264, "step": 13260, "task_loss": 0.5518814325332642 }, { "compression_loss": 0.0, "distillation_loss": 0.29746928811073303, "epoch": 4.8, "learning_rate": 2.5270514126540025e-07, "loss": 0.4416, "step": 13270, "task_loss": 0.2353348284959793 }, { "compression_loss": 0.0, "distillation_loss": 0.40688103437423706, "epoch": 4.8, "learning_rate": 2.4395842402016756e-07, "loss": 0.4465, "step": 13280, "task_loss": 0.3618243336677551 }, { "compression_loss": 0.0, "distillation_loss": 0.42863696813583374, "epoch": 4.8, "learning_rate": 2.3536513893127166e-07, "loss": 0.423, "step": 13290, "task_loss": 0.7000741958618164 }, { "compression_loss": 0.0, "distillation_loss": 0.6176955699920654, "epoch": 4.81, "learning_rate": 2.2692533030857832e-07, "loss": 0.4629, "step": 13300, "task_loss": 0.5734840631484985 }, { "compression_loss": 0.0, "distillation_loss": 0.48465853929519653, "epoch": 4.81, "learning_rate": 2.1863904167058634e-07, "loss": 0.4697, "step": 13310, "task_loss": 1.253157377243042 }, { "compression_loss": 0.0, "distillation_loss": 0.5344210863113403, "epoch": 4.81, "learning_rate": 2.1050631574418112e-07, "loss": 0.4755, "step": 13320, "task_loss": 0.5893040895462036 }, { "compression_loss": 0.0, "distillation_loss": 0.673054575920105, "epoch": 4.82, "learning_rate": 2.0252719446443135e-07, "loss": 0.5033, "step": 13330, "task_loss": 0.8126822710037231 }, { "compression_loss": 0.0, "distillation_loss": 0.34842216968536377, "epoch": 4.82, "learning_rate": 1.9470171897437273e-07, "loss": 0.444, "step": 13340, "task_loss": 0.5029393434524536 }, { "compression_loss": 0.0, "distillation_loss": 0.5423133969306946, "epoch": 4.82, "learning_rate": 1.8702992962478792e-07, "loss": 0.4286, "step": 13350, "task_loss": 0.41326332092285156 }, { "compression_loss": 0.0, "distillation_loss": 0.43724340200424194, "epoch": 4.83, "learning_rate": 1.7951186597399693e-07, "loss": 0.5312, "step": 13360, "task_loss": 0.6493289470672607 }, { "compression_loss": 0.0, "distillation_loss": 0.38860416412353516, "epoch": 4.83, "learning_rate": 1.7214756678767042e-07, "loss": 0.4399, "step": 13370, "task_loss": 0.623653769493103 }, { "compression_loss": 0.0, "distillation_loss": 0.40182918310165405, "epoch": 4.84, "learning_rate": 1.649370700386099e-07, "loss": 0.4528, "step": 13380, "task_loss": 0.3763955235481262 }, { "compression_loss": 0.0, "distillation_loss": 0.34956735372543335, "epoch": 4.84, "learning_rate": 1.578804129065614e-07, "loss": 0.4706, "step": 13390, "task_loss": 0.48613160848617554 }, { "compression_loss": 0.0, "distillation_loss": 0.5024135112762451, "epoch": 4.84, "learning_rate": 1.5097763177802205e-07, "loss": 0.463, "step": 13400, "task_loss": 0.7361462712287903 }, { "compression_loss": 0.0, "distillation_loss": 0.3231491148471832, "epoch": 4.85, "learning_rate": 1.4422876224605365e-07, "loss": 0.4349, "step": 13410, "task_loss": 0.4247916638851166 }, { "compression_loss": 0.0, "distillation_loss": 0.6355755925178528, "epoch": 4.85, "learning_rate": 1.3763383911009287e-07, "loss": 0.4766, "step": 13420, "task_loss": 0.9337074160575867 }, { "compression_loss": 0.0, "distillation_loss": 0.5704454183578491, "epoch": 4.85, "learning_rate": 1.3119289637578135e-07, "loss": 0.461, "step": 13430, "task_loss": 1.1079574823379517 }, { "compression_loss": 0.0, "distillation_loss": 0.48450767993927, "epoch": 4.86, "learning_rate": 1.249059672547892e-07, "loss": 0.4929, "step": 13440, "task_loss": 0.8369407653808594 }, { "compression_loss": 0.0, "distillation_loss": 0.419539213180542, "epoch": 4.86, "learning_rate": 1.1877308416463506e-07, "loss": 0.4397, "step": 13450, "task_loss": 0.6441231369972229 }, { "compression_loss": 0.0, "distillation_loss": 0.4916451573371887, "epoch": 4.86, "learning_rate": 1.1279427872852965e-07, "loss": 0.4731, "step": 13460, "task_loss": 0.6786421537399292 }, { "compression_loss": 0.0, "distillation_loss": 0.41480889916419983, "epoch": 4.87, "learning_rate": 1.0696958177519922e-07, "loss": 0.4472, "step": 13470, "task_loss": 0.6673970222473145 }, { "compression_loss": 0.0, "distillation_loss": 0.4530647397041321, "epoch": 4.87, "learning_rate": 1.0129902333874563e-07, "loss": 0.4267, "step": 13480, "task_loss": 1.1996808052062988 }, { "compression_loss": 0.0, "distillation_loss": 0.37082067131996155, "epoch": 4.88, "learning_rate": 9.578263265846655e-08, "loss": 0.4798, "step": 13490, "task_loss": 0.3775365352630615 }, { "compression_loss": 0.0, "distillation_loss": 0.25928795337677, "epoch": 4.88, "learning_rate": 9.042043817873547e-08, "loss": 0.4649, "step": 13500, "task_loss": 0.15395194292068481 }, { "epoch": 4.88, "eval_exact_match": 82.80983916745507, "eval_f1": 89.66839121824178, "step": 13500 }, { "compression_loss": 0.0, "distillation_loss": 0.5914658308029175, "epoch": 4.88, "learning_rate": 8.521246754882528e-08, "loss": 0.4885, "step": 13510, "task_loss": 0.7391992807388306 }, { "compression_loss": 0.0, "distillation_loss": 0.38663071393966675, "epoch": 4.89, "learning_rate": 8.01587476227883e-08, "loss": 0.3928, "step": 13520, "task_loss": 0.24446828663349152 }, { "compression_loss": 0.0, "distillation_loss": 0.4410104751586914, "epoch": 4.89, "learning_rate": 7.525930445929974e-08, "loss": 0.4671, "step": 13530, "task_loss": 0.339232861995697 }, { "compression_loss": 0.0, "distillation_loss": 0.4469359517097473, "epoch": 4.89, "learning_rate": 7.051416332153781e-08, "loss": 0.4552, "step": 13540, "task_loss": 0.6987203359603882 }, { "compression_loss": 0.0, "distillation_loss": 0.5052058100700378, "epoch": 4.9, "learning_rate": 6.592334867704719e-08, "loss": 0.4769, "step": 13550, "task_loss": 0.6112401485443115 }, { "compression_loss": 0.0, "distillation_loss": 0.4418012499809265, "epoch": 4.9, "learning_rate": 6.148688419760906e-08, "loss": 0.4485, "step": 13560, "task_loss": 0.6320434212684631 }, { "compression_loss": 0.0, "distillation_loss": 0.4680473804473877, "epoch": 4.9, "learning_rate": 5.7204792759127936e-08, "loss": 0.4803, "step": 13570, "task_loss": 0.8190406560897827 }, { "compression_loss": 0.0, "distillation_loss": 0.37423837184906006, "epoch": 4.91, "learning_rate": 5.307709644150505e-08, "loss": 0.4563, "step": 13580, "task_loss": 0.4952273368835449 }, { "compression_loss": 0.0, "distillation_loss": 0.5851823091506958, "epoch": 4.91, "learning_rate": 4.910381652853513e-08, "loss": 0.4465, "step": 13590, "task_loss": 0.7107750177383423 }, { "compression_loss": 0.0, "distillation_loss": 0.3553691506385803, "epoch": 4.92, "learning_rate": 4.528497350777983e-08, "loss": 0.4799, "step": 13600, "task_loss": 0.5710075497627258 }, { "compression_loss": 0.0, "distillation_loss": 0.3894813358783722, "epoch": 4.92, "learning_rate": 4.162058707048444e-08, "loss": 0.4332, "step": 13610, "task_loss": 0.5974895358085632 }, { "compression_loss": 0.0, "distillation_loss": 0.37952518463134766, "epoch": 4.92, "learning_rate": 3.8110676111451357e-08, "loss": 0.4155, "step": 13620, "task_loss": 0.8900310397148132 }, { "compression_loss": 0.0, "distillation_loss": 0.4508509933948517, "epoch": 4.93, "learning_rate": 3.4755258728963455e-08, "loss": 0.4591, "step": 13630, "task_loss": 0.43794721364974976 }, { "compression_loss": 0.0, "distillation_loss": 0.40247878432273865, "epoch": 4.93, "learning_rate": 3.155435222468417e-08, "loss": 0.4891, "step": 13640, "task_loss": 0.7496728897094727 }, { "compression_loss": 0.0, "distillation_loss": 0.47712230682373047, "epoch": 4.93, "learning_rate": 2.8507973103560903e-08, "loss": 0.4467, "step": 13650, "task_loss": 0.9178839325904846 }, { "compression_loss": 0.0, "distillation_loss": 0.4095407724380493, "epoch": 4.94, "learning_rate": 2.5616137073748436e-08, "loss": 0.4325, "step": 13660, "task_loss": 0.5698050856590271 }, { "compression_loss": 0.0, "distillation_loss": 0.5157955884933472, "epoch": 4.94, "learning_rate": 2.2878859046525648e-08, "loss": 0.4647, "step": 13670, "task_loss": 0.4228496551513672 }, { "compression_loss": 0.0, "distillation_loss": 0.3722189664840698, "epoch": 4.94, "learning_rate": 2.029615313622224e-08, "loss": 0.462, "step": 13680, "task_loss": 0.534970760345459 }, { "compression_loss": 0.0, "distillation_loss": 0.4381513297557831, "epoch": 4.95, "learning_rate": 1.786803266013548e-08, "loss": 0.428, "step": 13690, "task_loss": 0.7130058407783508 }, { "compression_loss": 0.0, "distillation_loss": 0.43109041452407837, "epoch": 4.95, "learning_rate": 1.559451013847024e-08, "loss": 0.4274, "step": 13700, "task_loss": 0.47240149974823 }, { "compression_loss": 0.0, "distillation_loss": 0.5169810056686401, "epoch": 4.95, "learning_rate": 1.3475597294275722e-08, "loss": 0.421, "step": 13710, "task_loss": 0.9785813689231873 }, { "compression_loss": 0.0, "distillation_loss": 0.4849017262458801, "epoch": 4.96, "learning_rate": 1.1511305053375498e-08, "loss": 0.4566, "step": 13720, "task_loss": 0.9021215438842773 }, { "compression_loss": 0.0, "distillation_loss": 0.4900825619697571, "epoch": 4.96, "learning_rate": 9.701643544327565e-09, "loss": 0.5105, "step": 13730, "task_loss": 0.8299789428710938 }, { "compression_loss": 0.0, "distillation_loss": 0.3275885581970215, "epoch": 4.97, "learning_rate": 8.046622098347723e-09, "loss": 0.4322, "step": 13740, "task_loss": 0.4920736849308014 }, { "compression_loss": 0.0, "distillation_loss": 0.4971184730529785, "epoch": 4.97, "learning_rate": 6.5462492492829316e-09, "loss": 0.4324, "step": 13750, "task_loss": 0.6894551515579224 }, { "epoch": 4.97, "eval_exact_match": 82.4314096499527, "eval_f1": 89.51060111859181, "step": 13750 }, { "compression_loss": 0.0, "distillation_loss": 0.2823774218559265, "epoch": 4.97, "learning_rate": 5.2005327335680196e-09, "loss": 0.4704, "step": 13760, "task_loss": 0.2854418456554413 }, { "compression_loss": 0.0, "distillation_loss": 0.4087982773780823, "epoch": 4.98, "learning_rate": 4.0094794901623934e-09, "loss": 0.4415, "step": 13770, "task_loss": 0.6700844168663025 }, { "compression_loss": 0.0, "distillation_loss": 0.41539302468299866, "epoch": 4.98, "learning_rate": 2.973095660540048e-09, "loss": 0.4678, "step": 13780, "task_loss": 0.6645501852035522 }, { "compression_loss": 0.0, "distillation_loss": 0.6088934540748596, "epoch": 4.98, "learning_rate": 2.0913865886462714e-09, "loss": 0.4834, "step": 13790, "task_loss": 0.9101829528808594 }, { "compression_loss": 0.0, "distillation_loss": 0.562792181968689, "epoch": 4.99, "learning_rate": 1.364356820864332e-09, "loss": 0.4619, "step": 13800, "task_loss": 0.7332584261894226 }, { "compression_loss": 0.0, "distillation_loss": 0.3973432183265686, "epoch": 4.99, "learning_rate": 7.920101060088225e-10, "loss": 0.4212, "step": 13810, "task_loss": 0.6076244711875916 }, { "compression_loss": 0.0, "distillation_loss": 0.3704431653022766, "epoch": 4.99, "learning_rate": 3.743493952890198e-10, "loss": 0.4176, "step": 13820, "task_loss": 0.6046047806739807 }, { "compression_loss": 0.0, "distillation_loss": 0.4479011595249176, "epoch": 5.0, "learning_rate": 1.1137684230888567e-10, "loss": 0.491, "step": 13830, "task_loss": 0.41174694895744324 }, { "compression_loss": 0.0, "distillation_loss": 0.6051190495491028, "epoch": 5.0, "learning_rate": 3.0938030370908365e-12, "loss": 0.4473, "step": 13840, "task_loss": 0.9124219417572021 }, { "compression_loss": 0.0, "distillation_loss": 0.36170339584350586, "epoch": 5.01, "learning_rate": 5.9999950499164185e-05, "loss": 0.409, "step": 13850, "task_loss": 0.36121177673339844 }, { "compression_loss": 0.0, "distillation_loss": 0.537726104259491, "epoch": 5.01, "learning_rate": 5.999974940229864e-05, "loss": 0.4383, "step": 13860, "task_loss": 0.6074551343917847 }, { "compression_loss": 0.0, "distillation_loss": 0.5061550140380859, "epoch": 5.01, "learning_rate": 5.999939361663725e-05, "loss": 0.431, "step": 13870, "task_loss": 0.4759042263031006 }, { "compression_loss": 0.0, "distillation_loss": 0.4849228262901306, "epoch": 5.02, "learning_rate": 5.999888314401456e-05, "loss": 0.4848, "step": 13880, "task_loss": 0.6583101749420166 }, { "compression_loss": 0.0, "distillation_loss": 0.4311343729496002, "epoch": 5.02, "learning_rate": 5.9998217987062765e-05, "loss": 0.4236, "step": 13890, "task_loss": 0.6400418281555176 }, { "compression_loss": 0.0, "distillation_loss": 0.6894408464431763, "epoch": 5.02, "learning_rate": 5.999739814921161e-05, "loss": 0.4667, "step": 13900, "task_loss": 0.9435676336288452 }, { "compression_loss": 0.0, "distillation_loss": 0.6396845579147339, "epoch": 5.03, "learning_rate": 5.9996423634688465e-05, "loss": 0.5015, "step": 13910, "task_loss": 0.7678850293159485 }, { "compression_loss": 0.0, "distillation_loss": 0.39257675409317017, "epoch": 5.03, "learning_rate": 5.999529444851826e-05, "loss": 0.4825, "step": 13920, "task_loss": 0.5393377542495728 }, { "compression_loss": 0.0, "distillation_loss": 0.3359961211681366, "epoch": 5.03, "learning_rate": 5.999401059652345e-05, "loss": 0.4539, "step": 13930, "task_loss": 0.7673816680908203 }, { "compression_loss": 0.0, "distillation_loss": 0.39863264560699463, "epoch": 5.04, "learning_rate": 5.999257208532401e-05, "loss": 0.483, "step": 13940, "task_loss": 1.0004619359970093 }, { "compression_loss": 0.0, "distillation_loss": 0.38244152069091797, "epoch": 5.04, "learning_rate": 5.99909789223374e-05, "loss": 0.4579, "step": 13950, "task_loss": 0.3268355429172516 }, { "compression_loss": 0.0, "distillation_loss": 0.40854525566101074, "epoch": 5.05, "learning_rate": 5.998923111577847e-05, "loss": 0.4941, "step": 13960, "task_loss": 0.5148742198944092 }, { "compression_loss": 0.0, "distillation_loss": 0.3848707675933838, "epoch": 5.05, "learning_rate": 5.998732867465954e-05, "loss": 0.4979, "step": 13970, "task_loss": 0.5055022835731506 }, { "compression_loss": 0.0, "distillation_loss": 0.7005259990692139, "epoch": 5.05, "learning_rate": 5.998527160879021e-05, "loss": 0.5703, "step": 13980, "task_loss": 0.7080357074737549 }, { "compression_loss": 0.0, "distillation_loss": 0.4665435552597046, "epoch": 5.06, "learning_rate": 5.998305992877741e-05, "loss": 0.4853, "step": 13990, "task_loss": 0.7121034860610962 }, { "compression_loss": 0.0, "distillation_loss": 0.4767329692840576, "epoch": 5.06, "learning_rate": 5.99806936460253e-05, "loss": 0.4968, "step": 14000, "task_loss": 0.7108612060546875 }, { "epoch": 5.06, "eval_exact_match": 82.15704824976348, "eval_f1": 89.43257264133547, "step": 14000 }, { "compression_loss": 0.0, "distillation_loss": 0.3834998607635498, "epoch": 5.06, "learning_rate": 5.997817277273525e-05, "loss": 0.4928, "step": 14010, "task_loss": 0.45428478717803955 }, { "compression_loss": 0.0, "distillation_loss": 0.7256457805633545, "epoch": 5.07, "learning_rate": 5.997549732190571e-05, "loss": 0.525, "step": 14020, "task_loss": 0.8021411895751953 }, { "compression_loss": 0.0, "distillation_loss": 0.5225358009338379, "epoch": 5.07, "learning_rate": 5.997266730733221e-05, "loss": 0.537, "step": 14030, "task_loss": 0.5008682012557983 }, { "compression_loss": 0.0, "distillation_loss": 0.6417495012283325, "epoch": 5.07, "learning_rate": 5.996968274360725e-05, "loss": 0.5379, "step": 14040, "task_loss": 0.9481985569000244 }, { "compression_loss": 0.0, "distillation_loss": 0.4337199330329895, "epoch": 5.08, "learning_rate": 5.996654364612026e-05, "loss": 0.5181, "step": 14050, "task_loss": 0.4173719882965088 }, { "compression_loss": 0.0, "distillation_loss": 0.5246778726577759, "epoch": 5.08, "learning_rate": 5.996325003105747e-05, "loss": 0.5021, "step": 14060, "task_loss": 1.0055538415908813 }, { "compression_loss": 0.0, "distillation_loss": 0.41551411151885986, "epoch": 5.08, "learning_rate": 5.9959801915401866e-05, "loss": 0.477, "step": 14070, "task_loss": 0.45110952854156494 }, { "compression_loss": 0.0, "distillation_loss": 0.5802291631698608, "epoch": 5.09, "learning_rate": 5.99561993169331e-05, "loss": 0.5067, "step": 14080, "task_loss": 0.8387892246246338 }, { "compression_loss": 0.0, "distillation_loss": 0.4913982152938843, "epoch": 5.09, "learning_rate": 5.995244225422738e-05, "loss": 0.5391, "step": 14090, "task_loss": 0.5127557516098022 }, { "compression_loss": 0.0, "distillation_loss": 0.465750515460968, "epoch": 5.1, "learning_rate": 5.994853074665739e-05, "loss": 0.4964, "step": 14100, "task_loss": 0.8444522619247437 }, { "compression_loss": 0.0, "distillation_loss": 0.6058213710784912, "epoch": 5.1, "learning_rate": 5.9944464814392157e-05, "loss": 0.5417, "step": 14110, "task_loss": 0.7191112637519836 }, { "compression_loss": 0.0, "distillation_loss": 0.4102645516395569, "epoch": 5.1, "learning_rate": 5.9940244478397005e-05, "loss": 0.4553, "step": 14120, "task_loss": 0.39053791761398315 }, { "compression_loss": 0.0, "distillation_loss": 0.6364562511444092, "epoch": 5.11, "learning_rate": 5.9935869760433415e-05, "loss": 0.5512, "step": 14130, "task_loss": 0.624964714050293 }, { "compression_loss": 0.0, "distillation_loss": 0.43969202041625977, "epoch": 5.11, "learning_rate": 5.99313406830589e-05, "loss": 0.4893, "step": 14140, "task_loss": 0.36895930767059326 }, { "compression_loss": 0.0, "distillation_loss": 0.4144252836704254, "epoch": 5.11, "learning_rate": 5.99266572696269e-05, "loss": 0.5299, "step": 14150, "task_loss": 0.5147717595100403 }, { "compression_loss": 0.0, "distillation_loss": 0.3856452703475952, "epoch": 5.12, "learning_rate": 5.992181954428667e-05, "loss": 0.499, "step": 14160, "task_loss": 0.5266073346138 }, { "compression_loss": 0.0, "distillation_loss": 0.5156189799308777, "epoch": 5.12, "learning_rate": 5.9916827531983165e-05, "loss": 0.4847, "step": 14170, "task_loss": 0.4401143789291382 }, { "compression_loss": 0.0, "distillation_loss": 0.4977889657020569, "epoch": 5.12, "learning_rate": 5.991168125845685e-05, "loss": 0.4558, "step": 14180, "task_loss": 0.5201467275619507 }, { "compression_loss": 0.0, "distillation_loss": 0.4669112265110016, "epoch": 5.13, "learning_rate": 5.990638075024366e-05, "loss": 0.4877, "step": 14190, "task_loss": 0.5265516042709351 }, { "compression_loss": 0.0, "distillation_loss": 0.32335522770881653, "epoch": 5.13, "learning_rate": 5.990092603467481e-05, "loss": 0.5017, "step": 14200, "task_loss": 0.3526550233364105 }, { "compression_loss": 0.0, "distillation_loss": 0.41797494888305664, "epoch": 5.14, "learning_rate": 5.989531713987662e-05, "loss": 0.4715, "step": 14210, "task_loss": 0.4137646555900574 }, { "compression_loss": 0.0, "distillation_loss": 0.4363604187965393, "epoch": 5.14, "learning_rate": 5.988955409477045e-05, "loss": 0.5498, "step": 14220, "task_loss": 0.6443294286727905 }, { "compression_loss": 0.0, "distillation_loss": 0.6083539724349976, "epoch": 5.14, "learning_rate": 5.98836369290725e-05, "loss": 0.5433, "step": 14230, "task_loss": 0.6782771348953247 }, { "compression_loss": 0.0, "distillation_loss": 0.43323948979377747, "epoch": 5.15, "learning_rate": 5.987756567329367e-05, "loss": 0.4889, "step": 14240, "task_loss": 0.440801739692688 }, { "compression_loss": 0.0, "distillation_loss": 0.5146920680999756, "epoch": 5.15, "learning_rate": 5.9871340358739385e-05, "loss": 0.4781, "step": 14250, "task_loss": 0.48200464248657227 }, { "epoch": 5.15, "eval_exact_match": 82.10974456007568, "eval_f1": 89.35325173039678, "step": 14250 }, { "compression_loss": 0.0, "distillation_loss": 0.4706941545009613, "epoch": 5.15, "learning_rate": 5.9864961017509476e-05, "loss": 0.5426, "step": 14260, "task_loss": 0.9112202525138855 }, { "compression_loss": 0.0, "distillation_loss": 0.4482850432395935, "epoch": 5.16, "learning_rate": 5.985842768249795e-05, "loss": 0.4626, "step": 14270, "task_loss": 0.46659618616104126 }, { "compression_loss": 0.0, "distillation_loss": 0.47765567898750305, "epoch": 5.16, "learning_rate": 5.98517403873929e-05, "loss": 0.5386, "step": 14280, "task_loss": 0.7766150832176208 }, { "compression_loss": 0.0, "distillation_loss": 0.6142876148223877, "epoch": 5.16, "learning_rate": 5.984489916667626e-05, "loss": 0.5518, "step": 14290, "task_loss": 0.7481546998023987 }, { "compression_loss": 0.0, "distillation_loss": 0.5589594841003418, "epoch": 5.17, "learning_rate": 5.983790405562367e-05, "loss": 0.4997, "step": 14300, "task_loss": 0.8356878757476807 }, { "compression_loss": 0.0, "distillation_loss": 0.5612353086471558, "epoch": 5.17, "learning_rate": 5.983075509030426e-05, "loss": 0.5829, "step": 14310, "task_loss": 0.865998387336731 }, { "compression_loss": 0.0, "distillation_loss": 0.8121389150619507, "epoch": 5.18, "learning_rate": 5.982345230758051e-05, "loss": 0.5064, "step": 14320, "task_loss": 1.0466325283050537 }, { "compression_loss": 0.0, "distillation_loss": 0.5815309286117554, "epoch": 5.18, "learning_rate": 5.981599574510802e-05, "loss": 0.5482, "step": 14330, "task_loss": 0.8093647956848145 }, { "compression_loss": 0.0, "distillation_loss": 0.49722617864608765, "epoch": 5.18, "learning_rate": 5.980838544133534e-05, "loss": 0.5367, "step": 14340, "task_loss": 0.6952990889549255 }, { "compression_loss": 0.0, "distillation_loss": 0.5625362396240234, "epoch": 5.19, "learning_rate": 5.980062143550375e-05, "loss": 0.5873, "step": 14350, "task_loss": 0.5973076820373535 }, { "compression_loss": 0.0, "distillation_loss": 0.8071750402450562, "epoch": 5.19, "learning_rate": 5.979270376764708e-05, "loss": 0.5362, "step": 14360, "task_loss": 0.801953911781311 }, { "compression_loss": 0.0, "distillation_loss": 0.5167639255523682, "epoch": 5.19, "learning_rate": 5.9784632478591475e-05, "loss": 0.5556, "step": 14370, "task_loss": 0.4926624298095703 }, { "compression_loss": 0.0, "distillation_loss": 0.5415273308753967, "epoch": 5.2, "learning_rate": 5.977640760995523e-05, "loss": 0.5254, "step": 14380, "task_loss": 0.8653746843338013 }, { "compression_loss": 0.0, "distillation_loss": 0.5194505453109741, "epoch": 5.2, "learning_rate": 5.9768029204148513e-05, "loss": 0.5942, "step": 14390, "task_loss": 0.769846498966217 }, { "compression_loss": 0.0, "distillation_loss": 0.5619365572929382, "epoch": 5.2, "learning_rate": 5.9759497304373234e-05, "loss": 0.5572, "step": 14400, "task_loss": 0.6389682292938232 }, { "compression_loss": 0.0, "distillation_loss": 0.6544053554534912, "epoch": 5.21, "learning_rate": 5.9750811954622714e-05, "loss": 0.5505, "step": 14410, "task_loss": 0.6094022989273071 }, { "compression_loss": 0.0, "distillation_loss": 0.9797204732894897, "epoch": 5.21, "learning_rate": 5.974197319968153e-05, "loss": 0.5554, "step": 14420, "task_loss": 0.9586498141288757 }, { "compression_loss": 0.0, "distillation_loss": 0.4406765103340149, "epoch": 5.22, "learning_rate": 5.973298108512531e-05, "loss": 0.5066, "step": 14430, "task_loss": 0.542404055595398 }, { "compression_loss": 0.0, "distillation_loss": 0.5614707469940186, "epoch": 5.22, "learning_rate": 5.972383565732038e-05, "loss": 0.5418, "step": 14440, "task_loss": 0.6197214126586914 }, { "compression_loss": 0.0, "distillation_loss": 0.529656708240509, "epoch": 5.22, "learning_rate": 5.971453696342367e-05, "loss": 0.5157, "step": 14450, "task_loss": 0.7224357724189758 }, { "compression_loss": 0.0, "distillation_loss": 0.49263083934783936, "epoch": 5.23, "learning_rate": 5.9705085051382344e-05, "loss": 0.4777, "step": 14460, "task_loss": 0.3264583349227905 }, { "compression_loss": 0.0, "distillation_loss": 0.4123901426792145, "epoch": 5.23, "learning_rate": 5.969547996993367e-05, "loss": 0.4791, "step": 14470, "task_loss": 0.6501359343528748 }, { "compression_loss": 0.0, "distillation_loss": 0.5078509449958801, "epoch": 5.23, "learning_rate": 5.9685721768604656e-05, "loss": 0.6034, "step": 14480, "task_loss": 0.6295353174209595 }, { "compression_loss": 0.0, "distillation_loss": 0.3895311951637268, "epoch": 5.24, "learning_rate": 5.967581049771188e-05, "loss": 0.4871, "step": 14490, "task_loss": 0.34086859226226807 }, { "compression_loss": 0.0, "distillation_loss": 0.5105054974555969, "epoch": 5.24, "learning_rate": 5.966574620836119e-05, "loss": 0.4953, "step": 14500, "task_loss": 0.6598679423332214 }, { "epoch": 5.24, "eval_exact_match": 81.95837275307474, "eval_f1": 89.23343113339236, "step": 14500 }, { "compression_loss": 0.0, "distillation_loss": 0.5998846292495728, "epoch": 5.24, "learning_rate": 5.9655528952447426e-05, "loss": 0.5945, "step": 14510, "task_loss": 0.5025860071182251 }, { "compression_loss": 0.0, "distillation_loss": 0.6489298939704895, "epoch": 5.25, "learning_rate": 5.9645158782654216e-05, "loss": 0.5164, "step": 14520, "task_loss": 0.7763657569885254 }, { "compression_loss": 0.0, "distillation_loss": 0.480061411857605, "epoch": 5.25, "learning_rate": 5.9634635752453644e-05, "loss": 0.4953, "step": 14530, "task_loss": 0.626976728439331 }, { "compression_loss": 0.0, "distillation_loss": 0.5622721910476685, "epoch": 5.25, "learning_rate": 5.962395991610599e-05, "loss": 0.5129, "step": 14540, "task_loss": 0.6075005531311035 }, { "compression_loss": 0.0, "distillation_loss": 0.4024551510810852, "epoch": 5.26, "learning_rate": 5.961313132865944e-05, "loss": 0.5174, "step": 14550, "task_loss": 0.892763614654541 }, { "compression_loss": 0.0, "distillation_loss": 0.48125165700912476, "epoch": 5.26, "learning_rate": 5.960215004594984e-05, "loss": 0.5541, "step": 14560, "task_loss": 0.6092730164527893 }, { "compression_loss": 0.0, "distillation_loss": 0.38604000210762024, "epoch": 5.27, "learning_rate": 5.959101612460038e-05, "loss": 0.4897, "step": 14570, "task_loss": 0.7240785360336304 }, { "compression_loss": 0.0, "distillation_loss": 0.516532301902771, "epoch": 5.27, "learning_rate": 5.957972962202131e-05, "loss": 0.5445, "step": 14580, "task_loss": 0.7919718027114868 }, { "compression_loss": 0.0, "distillation_loss": 0.537828266620636, "epoch": 5.27, "learning_rate": 5.9568290596409616e-05, "loss": 0.5246, "step": 14590, "task_loss": 0.6629247069358826 }, { "compression_loss": 0.0, "distillation_loss": 0.49570807814598083, "epoch": 5.28, "learning_rate": 5.955669910674877e-05, "loss": 0.5261, "step": 14600, "task_loss": 0.3058420419692993 }, { "compression_loss": 0.0, "distillation_loss": 0.6100714206695557, "epoch": 5.28, "learning_rate": 5.95449552128084e-05, "loss": 0.524, "step": 14610, "task_loss": 1.2158831357955933 }, { "compression_loss": 0.0, "distillation_loss": 0.5747349262237549, "epoch": 5.28, "learning_rate": 5.9533058975143956e-05, "loss": 0.5039, "step": 14620, "task_loss": 0.4315424859523773 }, { "compression_loss": 0.0, "distillation_loss": 0.5765594840049744, "epoch": 5.29, "learning_rate": 5.9521010455096446e-05, "loss": 0.5584, "step": 14630, "task_loss": 0.831333339214325 }, { "compression_loss": 0.0, "distillation_loss": 0.6396365761756897, "epoch": 5.29, "learning_rate": 5.950880971479208e-05, "loss": 0.5345, "step": 14640, "task_loss": 0.8963925242424011 }, { "compression_loss": 0.0, "distillation_loss": 0.594346821308136, "epoch": 5.29, "learning_rate": 5.9496456817141996e-05, "loss": 0.5928, "step": 14650, "task_loss": 1.1544039249420166 }, { "compression_loss": 0.0, "distillation_loss": 0.4301066994667053, "epoch": 5.3, "learning_rate": 5.948395182584189e-05, "loss": 0.4974, "step": 14660, "task_loss": 0.5060799717903137 }, { "compression_loss": 0.0, "distillation_loss": 0.46475663781166077, "epoch": 5.3, "learning_rate": 5.9471294805371684e-05, "loss": 0.5466, "step": 14670, "task_loss": 0.6515345573425293 }, { "compression_loss": 0.0, "distillation_loss": 0.43622440099716187, "epoch": 5.31, "learning_rate": 5.9458485820995235e-05, "loss": 0.5506, "step": 14680, "task_loss": 0.7932460308074951 }, { "compression_loss": 0.0, "distillation_loss": 0.4005524516105652, "epoch": 5.31, "learning_rate": 5.9445524938759986e-05, "loss": 0.4265, "step": 14690, "task_loss": 0.30546653270721436 }, { "compression_loss": 0.0, "distillation_loss": 0.6993730068206787, "epoch": 5.31, "learning_rate": 5.9432412225496586e-05, "loss": 0.4971, "step": 14700, "task_loss": 0.7546347379684448 }, { "compression_loss": 0.0, "distillation_loss": 0.8860001564025879, "epoch": 5.32, "learning_rate": 5.941914774881862e-05, "loss": 0.5512, "step": 14710, "task_loss": 0.7079243063926697 }, { "compression_loss": 0.0, "distillation_loss": 0.6870177984237671, "epoch": 5.32, "learning_rate": 5.940573157712217e-05, "loss": 0.5731, "step": 14720, "task_loss": 0.8747744560241699 }, { "compression_loss": 0.0, "distillation_loss": 0.5104433298110962, "epoch": 5.32, "learning_rate": 5.9392163779585525e-05, "loss": 0.5506, "step": 14730, "task_loss": 1.362548589706421 }, { "compression_loss": 0.0, "distillation_loss": 0.40730664134025574, "epoch": 5.33, "learning_rate": 5.937844442616883e-05, "loss": 0.518, "step": 14740, "task_loss": 0.5095131397247314 }, { "compression_loss": 0.0, "distillation_loss": 0.5867040157318115, "epoch": 5.33, "learning_rate": 5.936457358761367e-05, "loss": 0.5429, "step": 14750, "task_loss": 0.6059485673904419 }, { "epoch": 5.33, "eval_exact_match": 81.59886471144749, "eval_f1": 88.89803036593699, "step": 14750 }, { "compression_loss": 0.0, "distillation_loss": 0.39228177070617676, "epoch": 5.33, "learning_rate": 5.935055133544276e-05, "loss": 0.5219, "step": 14760, "task_loss": 0.5120740532875061 }, { "compression_loss": 0.0, "distillation_loss": 0.4774217903614044, "epoch": 5.34, "learning_rate": 5.933637774195955e-05, "loss": 0.5376, "step": 14770, "task_loss": 0.6802091002464294 }, { "compression_loss": 0.0, "distillation_loss": 0.6107138395309448, "epoch": 5.34, "learning_rate": 5.932205288024784e-05, "loss": 0.5498, "step": 14780, "task_loss": 0.6426236033439636 }, { "compression_loss": 0.0, "distillation_loss": 0.6453840732574463, "epoch": 5.35, "learning_rate": 5.930757682417145e-05, "loss": 0.5544, "step": 14790, "task_loss": 0.845929741859436 }, { "compression_loss": 0.0, "distillation_loss": 0.6528950929641724, "epoch": 5.35, "learning_rate": 5.929294964837378e-05, "loss": 0.514, "step": 14800, "task_loss": 0.9554112553596497 }, { "compression_loss": 0.0, "distillation_loss": 0.6820924878120422, "epoch": 5.35, "learning_rate": 5.9278171428277475e-05, "loss": 0.6105, "step": 14810, "task_loss": 0.9136593341827393 }, { "compression_loss": 0.0, "distillation_loss": 0.4658266603946686, "epoch": 5.36, "learning_rate": 5.926324224008401e-05, "loss": 0.5843, "step": 14820, "task_loss": 0.46199867129325867 }, { "compression_loss": 0.0, "distillation_loss": 0.7525842189788818, "epoch": 5.36, "learning_rate": 5.9248162160773286e-05, "loss": 0.5121, "step": 14830, "task_loss": 0.7043994665145874 }, { "compression_loss": 0.0, "distillation_loss": 0.5841355323791504, "epoch": 5.36, "learning_rate": 5.923293126810328e-05, "loss": 0.5481, "step": 14840, "task_loss": 0.6969549655914307 }, { "compression_loss": 0.0, "distillation_loss": 0.6712848544120789, "epoch": 5.37, "learning_rate": 5.921754964060959e-05, "loss": 0.588, "step": 14850, "task_loss": 0.48286545276641846 }, { "compression_loss": 0.0, "distillation_loss": 0.5721571445465088, "epoch": 5.37, "learning_rate": 5.9202017357605045e-05, "loss": 0.5374, "step": 14860, "task_loss": 0.847301721572876 }, { "compression_loss": 0.0, "distillation_loss": 0.47767120599746704, "epoch": 5.37, "learning_rate": 5.918633449917934e-05, "loss": 0.5735, "step": 14870, "task_loss": 0.924615204334259 }, { "compression_loss": 0.0, "distillation_loss": 0.47853583097457886, "epoch": 5.38, "learning_rate": 5.917050114619856e-05, "loss": 0.5108, "step": 14880, "task_loss": 0.7048487663269043 }, { "compression_loss": 0.0, "distillation_loss": 0.47235754132270813, "epoch": 5.38, "learning_rate": 5.9154517380304794e-05, "loss": 0.5083, "step": 14890, "task_loss": 0.7272685766220093 }, { "compression_loss": 0.0, "distillation_loss": 0.5390362739562988, "epoch": 5.38, "learning_rate": 5.9138383283915716e-05, "loss": 0.5101, "step": 14900, "task_loss": 0.7805506587028503 }, { "compression_loss": 0.0, "distillation_loss": 0.42291706800460815, "epoch": 5.39, "learning_rate": 5.912209894022415e-05, "loss": 0.5247, "step": 14910, "task_loss": 0.4546523988246918 }, { "compression_loss": 0.0, "distillation_loss": 0.6531658172607422, "epoch": 5.39, "learning_rate": 5.910566443319765e-05, "loss": 0.5639, "step": 14920, "task_loss": 0.6974441409111023 }, { "compression_loss": 0.0, "distillation_loss": 0.5830737948417664, "epoch": 5.4, "learning_rate": 5.908907984757806e-05, "loss": 0.5388, "step": 14930, "task_loss": 0.6396021842956543 }, { "compression_loss": 0.0, "distillation_loss": 0.5406855344772339, "epoch": 5.4, "learning_rate": 5.9072345268881085e-05, "loss": 0.5475, "step": 14940, "task_loss": 0.7636957168579102 }, { "compression_loss": 0.0, "distillation_loss": 0.7267158031463623, "epoch": 5.4, "learning_rate": 5.905715597527462e-05, "loss": 0.5783, "step": 14950, "task_loss": 0.6421911120414734 }, { "compression_loss": 0.0, "distillation_loss": 0.5212079286575317, "epoch": 5.41, "learning_rate": 5.9040136648095986e-05, "loss": 0.5195, "step": 14960, "task_loss": 0.9378068447113037 }, { "compression_loss": 0.0, "distillation_loss": 0.41629695892333984, "epoch": 5.41, "learning_rate": 5.902296758020755e-05, "loss": 0.4998, "step": 14970, "task_loss": 0.8873476982116699 }, { "compression_loss": 0.0, "distillation_loss": 0.5604190826416016, "epoch": 5.41, "learning_rate": 5.900564886013882e-05, "loss": 0.5128, "step": 14980, "task_loss": 0.5965309143066406 }, { "compression_loss": 0.0, "distillation_loss": 0.6193476915359497, "epoch": 5.42, "learning_rate": 5.8988180577190914e-05, "loss": 0.5533, "step": 14990, "task_loss": 0.8003284335136414 }, { "compression_loss": 0.0, "distillation_loss": 0.3812726140022278, "epoch": 5.42, "learning_rate": 5.8970562821436184e-05, "loss": 0.5454, "step": 15000, "task_loss": 0.5890715718269348 }, { "epoch": 5.42, "eval_exact_match": 82.06244087038789, "eval_f1": 89.37476650775072, "step": 15000 }, { "compression_loss": 0.0, "distillation_loss": 0.4163830280303955, "epoch": 5.42, "learning_rate": 5.895279568371772e-05, "loss": 0.5529, "step": 15010, "task_loss": 0.8374828696250916 }, { "compression_loss": 0.0, "distillation_loss": 0.5462802648544312, "epoch": 5.43, "learning_rate": 5.8934879255648834e-05, "loss": 0.5532, "step": 15020, "task_loss": 0.939515233039856 }, { "compression_loss": 0.0, "distillation_loss": 0.4633031487464905, "epoch": 5.43, "learning_rate": 5.8916813629612655e-05, "loss": 0.5588, "step": 15030, "task_loss": 0.38705766201019287 }, { "compression_loss": 0.0, "distillation_loss": 0.5264443755149841, "epoch": 5.44, "learning_rate": 5.8898598898761656e-05, "loss": 0.4643, "step": 15040, "task_loss": 0.5500625967979431 }, { "compression_loss": 0.0, "distillation_loss": 0.4520212411880493, "epoch": 5.44, "learning_rate": 5.8880235157017074e-05, "loss": 0.4999, "step": 15050, "task_loss": 0.41272589564323425 }, { "compression_loss": 0.0, "distillation_loss": 0.4026266038417816, "epoch": 5.44, "learning_rate": 5.886172249906856e-05, "loss": 0.4997, "step": 15060, "task_loss": 0.30943921208381653 }, { "compression_loss": 0.0, "distillation_loss": 0.7537382245063782, "epoch": 5.45, "learning_rate": 5.8843061020373594e-05, "loss": 0.5524, "step": 15070, "task_loss": 0.9021023511886597 }, { "compression_loss": 0.0, "distillation_loss": 0.4830913245677948, "epoch": 5.45, "learning_rate": 5.882425081715705e-05, "loss": 0.5472, "step": 15080, "task_loss": 0.48394033312797546 }, { "compression_loss": 0.0, "distillation_loss": 0.45110899209976196, "epoch": 5.45, "learning_rate": 5.8805291986410646e-05, "loss": 0.5265, "step": 15090, "task_loss": 0.49047964811325073 }, { "compression_loss": 0.0, "distillation_loss": 0.39607346057891846, "epoch": 5.46, "learning_rate": 5.878618462589249e-05, "loss": 0.5501, "step": 15100, "task_loss": 0.5193868279457092 }, { "compression_loss": 0.0, "distillation_loss": 0.475017786026001, "epoch": 5.46, "learning_rate": 5.876692883412656e-05, "loss": 0.4851, "step": 15110, "task_loss": 0.7011887431144714 }, { "compression_loss": 0.0, "distillation_loss": 0.6903802156448364, "epoch": 5.46, "learning_rate": 5.87475247104022e-05, "loss": 0.5406, "step": 15120, "task_loss": 0.7297502756118774 }, { "compression_loss": 0.0, "distillation_loss": 0.4474151134490967, "epoch": 5.47, "learning_rate": 5.872797235477359e-05, "loss": 0.5009, "step": 15130, "task_loss": 0.7269125580787659 }, { "compression_loss": 0.0, "distillation_loss": 0.38013216853141785, "epoch": 5.47, "learning_rate": 5.8708271868059255e-05, "loss": 0.5268, "step": 15140, "task_loss": 0.738182544708252 }, { "compression_loss": 0.0, "distillation_loss": 0.503143310546875, "epoch": 5.48, "learning_rate": 5.868842335184153e-05, "loss": 0.5503, "step": 15150, "task_loss": 1.0320183038711548 }, { "compression_loss": 0.0, "distillation_loss": 0.5389174222946167, "epoch": 5.48, "learning_rate": 5.866842690846602e-05, "loss": 0.5421, "step": 15160, "task_loss": 0.44373756647109985 }, { "compression_loss": 0.0, "distillation_loss": 0.4785197377204895, "epoch": 5.48, "learning_rate": 5.864828264104114e-05, "loss": 0.5715, "step": 15170, "task_loss": 0.76427161693573 }, { "compression_loss": 0.0, "distillation_loss": 0.40499380230903625, "epoch": 5.49, "learning_rate": 5.862799065343749e-05, "loss": 0.529, "step": 15180, "task_loss": 0.8471218347549438 }, { "compression_loss": 0.0, "distillation_loss": 0.34510934352874756, "epoch": 5.49, "learning_rate": 5.860755105028738e-05, "loss": 0.4904, "step": 15190, "task_loss": 0.42475417256355286 }, { "compression_loss": 0.0, "distillation_loss": 0.6029459238052368, "epoch": 5.49, "learning_rate": 5.858696393698428e-05, "loss": 0.5395, "step": 15200, "task_loss": 0.6130906343460083 }, { "compression_loss": 0.0, "distillation_loss": 0.6409763097763062, "epoch": 5.5, "learning_rate": 5.8566229419682275e-05, "loss": 0.5447, "step": 15210, "task_loss": 0.3819064199924469 }, { "compression_loss": 0.0, "distillation_loss": 0.3984764814376831, "epoch": 5.5, "learning_rate": 5.8545347605295516e-05, "loss": 0.571, "step": 15220, "task_loss": 0.6237053871154785 }, { "compression_loss": 0.0, "distillation_loss": 0.44126883149147034, "epoch": 5.5, "learning_rate": 5.852431860149763e-05, "loss": 0.5209, "step": 15230, "task_loss": 0.5754768252372742 }, { "compression_loss": 0.0, "distillation_loss": 0.4907762110233307, "epoch": 5.51, "learning_rate": 5.850314251672126e-05, "loss": 0.552, "step": 15240, "task_loss": 0.34236305952072144 }, { "compression_loss": 0.0, "distillation_loss": 0.552409291267395, "epoch": 5.51, "learning_rate": 5.848181946015741e-05, "loss": 0.5916, "step": 15250, "task_loss": 0.6347699165344238 }, { "epoch": 5.51, "eval_exact_match": 82.09082308420057, "eval_f1": 89.29649475160107, "step": 15250 }, { "compression_loss": 0.0, "distillation_loss": 0.4228856861591339, "epoch": 5.51, "learning_rate": 5.846034954175493e-05, "loss": 0.5081, "step": 15260, "task_loss": 0.397327721118927 }, { "compression_loss": 0.0, "distillation_loss": 0.39670825004577637, "epoch": 5.52, "learning_rate": 5.8438732872219934e-05, "loss": 0.5519, "step": 15270, "task_loss": 0.37487858533859253 }, { "compression_loss": 0.0, "distillation_loss": 0.5530394911766052, "epoch": 5.52, "learning_rate": 5.8416969563015246e-05, "loss": 0.494, "step": 15280, "task_loss": 0.7215574979782104 }, { "compression_loss": 0.0, "distillation_loss": 0.5375179648399353, "epoch": 5.53, "learning_rate": 5.83950597263598e-05, "loss": 0.4888, "step": 15290, "task_loss": 0.5879909992218018 }, { "compression_loss": 0.0, "distillation_loss": 0.6529196500778198, "epoch": 5.53, "learning_rate": 5.837300347522809e-05, "loss": 0.5204, "step": 15300, "task_loss": 1.0084820985794067 }, { "compression_loss": 0.0, "distillation_loss": 0.4595330059528351, "epoch": 5.53, "learning_rate": 5.8350800923349556e-05, "loss": 0.5129, "step": 15310, "task_loss": 0.5193516612052917 }, { "compression_loss": 0.0, "distillation_loss": 0.5306529998779297, "epoch": 5.54, "learning_rate": 5.8328452185208034e-05, "loss": 0.5032, "step": 15320, "task_loss": 0.6539807915687561 }, { "compression_loss": 0.0, "distillation_loss": 0.35273468494415283, "epoch": 5.54, "learning_rate": 5.830595737604112e-05, "loss": 0.5855, "step": 15330, "task_loss": 0.36837586760520935 }, { "compression_loss": 0.0, "distillation_loss": 0.5520907640457153, "epoch": 5.54, "learning_rate": 5.828331661183961e-05, "loss": 0.5686, "step": 15340, "task_loss": 0.6090099215507507 }, { "compression_loss": 0.0, "distillation_loss": 0.5058830380439758, "epoch": 5.55, "learning_rate": 5.826053000934692e-05, "loss": 0.516, "step": 15350, "task_loss": 0.4280429780483246 }, { "compression_loss": 0.0, "distillation_loss": 0.5331200361251831, "epoch": 5.55, "learning_rate": 5.8237597686058406e-05, "loss": 0.5455, "step": 15360, "task_loss": 0.46258166432380676 }, { "compression_loss": 0.0, "distillation_loss": 0.49761390686035156, "epoch": 5.55, "learning_rate": 5.821451976022086e-05, "loss": 0.5249, "step": 15370, "task_loss": 0.8308390974998474 }, { "compression_loss": 0.0, "distillation_loss": 0.662971019744873, "epoch": 5.56, "learning_rate": 5.819129635083182e-05, "loss": 0.5582, "step": 15380, "task_loss": 0.6953208446502686 }, { "compression_loss": 0.0, "distillation_loss": 0.6624317765235901, "epoch": 5.56, "learning_rate": 5.8167927577638994e-05, "loss": 0.5488, "step": 15390, "task_loss": 0.9981436729431152 }, { "compression_loss": 0.0, "distillation_loss": 0.5271157026290894, "epoch": 5.57, "learning_rate": 5.8144413561139636e-05, "loss": 0.5159, "step": 15400, "task_loss": 0.5372161865234375 }, { "compression_loss": 0.0, "distillation_loss": 0.7370679378509521, "epoch": 5.57, "learning_rate": 5.812075442257992e-05, "loss": 0.5604, "step": 15410, "task_loss": 1.0659924745559692 }, { "compression_loss": 0.0, "distillation_loss": 0.6820415258407593, "epoch": 5.57, "learning_rate": 5.8096950283954324e-05, "loss": 0.5597, "step": 15420, "task_loss": 0.9119454622268677 }, { "compression_loss": 0.0, "distillation_loss": 0.39658281207084656, "epoch": 5.58, "learning_rate": 5.8073001268004986e-05, "loss": 0.5198, "step": 15430, "task_loss": 0.8047652840614319 }, { "compression_loss": 0.0, "distillation_loss": 0.42770448327064514, "epoch": 5.58, "learning_rate": 5.804890749822109e-05, "loss": 0.4929, "step": 15440, "task_loss": 1.5828943252563477 }, { "compression_loss": 0.0, "distillation_loss": 0.3862127363681793, "epoch": 5.58, "learning_rate": 5.8024669098838224e-05, "loss": 0.5447, "step": 15450, "task_loss": 0.45715686678886414 }, { "compression_loss": 0.0, "distillation_loss": 0.4763340353965759, "epoch": 5.59, "learning_rate": 5.8000286194837706e-05, "loss": 0.5788, "step": 15460, "task_loss": 0.5869323015213013 }, { "compression_loss": 0.0, "distillation_loss": 0.6561774015426636, "epoch": 5.59, "learning_rate": 5.7975758911945995e-05, "loss": 0.5451, "step": 15470, "task_loss": 0.7080704569816589 }, { "compression_loss": 0.0, "distillation_loss": 0.40479183197021484, "epoch": 5.59, "learning_rate": 5.795108737663401e-05, "loss": 0.5185, "step": 15480, "task_loss": 0.6346590518951416 }, { "compression_loss": 0.0, "distillation_loss": 0.734893262386322, "epoch": 5.6, "learning_rate": 5.79262717161165e-05, "loss": 0.5102, "step": 15490, "task_loss": 0.9744662046432495 }, { "compression_loss": 0.0, "distillation_loss": 0.4641631841659546, "epoch": 5.6, "learning_rate": 5.7901312058351334e-05, "loss": 0.5243, "step": 15500, "task_loss": 0.44881314039230347 }, { "epoch": 5.6, "eval_exact_match": 82.25165562913908, "eval_f1": 89.3428073483407, "step": 15500 }, { "compression_loss": 0.0, "distillation_loss": 0.44037681818008423, "epoch": 5.61, "learning_rate": 5.78762085320389e-05, "loss": 0.4944, "step": 15510, "task_loss": 0.3445591330528259 }, { "compression_loss": 0.0, "distillation_loss": 0.6916956901550293, "epoch": 5.61, "learning_rate": 5.7850961266621434e-05, "loss": 0.5736, "step": 15520, "task_loss": 0.8610516786575317 }, { "compression_loss": 0.0, "distillation_loss": 0.41196784377098083, "epoch": 5.61, "learning_rate": 5.782557039228231e-05, "loss": 0.5498, "step": 15530, "task_loss": 0.3979591131210327 }, { "compression_loss": 0.0, "distillation_loss": 0.48362213373184204, "epoch": 5.62, "learning_rate": 5.780003603994543e-05, "loss": 0.5389, "step": 15540, "task_loss": 0.5863582491874695 }, { "compression_loss": 0.0, "distillation_loss": 0.508769154548645, "epoch": 5.62, "learning_rate": 5.77743583412745e-05, "loss": 0.4957, "step": 15550, "task_loss": 0.9604640007019043 }, { "compression_loss": 0.0, "distillation_loss": 0.5353500247001648, "epoch": 5.62, "learning_rate": 5.774853742867237e-05, "loss": 0.5351, "step": 15560, "task_loss": 0.4285392165184021 }, { "compression_loss": 0.0, "distillation_loss": 0.6010510921478271, "epoch": 5.63, "learning_rate": 5.772257343528033e-05, "loss": 0.5232, "step": 15570, "task_loss": 0.6189045906066895 }, { "compression_loss": 0.0, "distillation_loss": 0.5353314876556396, "epoch": 5.63, "learning_rate": 5.7696466494977485e-05, "loss": 0.5602, "step": 15580, "task_loss": 0.7458304762840271 }, { "compression_loss": 0.0, "distillation_loss": 0.7870986461639404, "epoch": 5.63, "learning_rate": 5.767021674237998e-05, "loss": 0.5116, "step": 15590, "task_loss": 0.6424393057823181 }, { "compression_loss": 0.0, "distillation_loss": 0.443065881729126, "epoch": 5.64, "learning_rate": 5.7643824312840375e-05, "loss": 0.4851, "step": 15600, "task_loss": 0.6533344984054565 }, { "compression_loss": 0.0, "distillation_loss": 0.3721536099910736, "epoch": 5.64, "learning_rate": 5.76172893424469e-05, "loss": 0.5041, "step": 15610, "task_loss": 0.5399680733680725 }, { "compression_loss": 0.0, "distillation_loss": 0.7717987298965454, "epoch": 5.65, "learning_rate": 5.75906119680228e-05, "loss": 0.5318, "step": 15620, "task_loss": 1.1942107677459717 }, { "compression_loss": 0.0, "distillation_loss": 0.49710613489151, "epoch": 5.65, "learning_rate": 5.756379232712559e-05, "loss": 0.5569, "step": 15630, "task_loss": 0.30331313610076904 }, { "compression_loss": 0.0, "distillation_loss": 0.42363065481185913, "epoch": 5.65, "learning_rate": 5.753683055804634e-05, "loss": 0.5337, "step": 15640, "task_loss": 0.6503758430480957 }, { "compression_loss": 0.0, "distillation_loss": 0.6108335256576538, "epoch": 5.66, "learning_rate": 5.750972679980902e-05, "loss": 0.572, "step": 15650, "task_loss": 0.9395371079444885 }, { "compression_loss": 0.0, "distillation_loss": 0.40696799755096436, "epoch": 5.66, "learning_rate": 5.7482481192169704e-05, "loss": 0.5518, "step": 15660, "task_loss": 0.6489532589912415 }, { "compression_loss": 0.0, "distillation_loss": 0.45866596698760986, "epoch": 5.66, "learning_rate": 5.745509387561591e-05, "loss": 0.5466, "step": 15670, "task_loss": 0.814210832118988 }, { "compression_loss": 0.0, "distillation_loss": 0.5743582248687744, "epoch": 5.67, "learning_rate": 5.742756499136585e-05, "loss": 0.5609, "step": 15680, "task_loss": 0.7212681174278259 }, { "compression_loss": 0.0, "distillation_loss": 0.4180968999862671, "epoch": 5.67, "learning_rate": 5.739989468136772e-05, "loss": 0.6103, "step": 15690, "task_loss": 0.3201150894165039 }, { "compression_loss": 0.0, "distillation_loss": 0.38410964608192444, "epoch": 5.67, "learning_rate": 5.7372083088298935e-05, "loss": 0.5811, "step": 15700, "task_loss": 0.44139623641967773 }, { "compression_loss": 0.0, "distillation_loss": 0.6996259093284607, "epoch": 5.68, "learning_rate": 5.734413035556541e-05, "loss": 0.5444, "step": 15710, "task_loss": 0.8911246061325073 }, { "compression_loss": 0.0, "distillation_loss": 0.5756803750991821, "epoch": 5.68, "learning_rate": 5.731603662730085e-05, "loss": 0.5422, "step": 15720, "task_loss": 0.8120925426483154 }, { "compression_loss": 0.0, "distillation_loss": 0.5546524524688721, "epoch": 5.68, "learning_rate": 5.7287802048365964e-05, "loss": 0.5102, "step": 15730, "task_loss": 0.6311538219451904 }, { "compression_loss": 0.0, "distillation_loss": 0.8656566143035889, "epoch": 5.69, "learning_rate": 5.725942676434773e-05, "loss": 0.5599, "step": 15740, "task_loss": 0.7161672115325928 }, { "compression_loss": 0.0, "distillation_loss": 0.4685131311416626, "epoch": 5.69, "learning_rate": 5.723091092155865e-05, "loss": 0.5562, "step": 15750, "task_loss": 0.3831646144390106 }, { "epoch": 5.69, "eval_exact_match": 82.02459791863765, "eval_f1": 89.1522756044948, "step": 15750 }, { "compression_loss": 0.0, "distillation_loss": 0.7688452005386353, "epoch": 5.7, "learning_rate": 5.720225466703601e-05, "loss": 0.6282, "step": 15760, "task_loss": 0.5565420389175415 }, { "compression_loss": 0.0, "distillation_loss": 0.47321581840515137, "epoch": 5.7, "learning_rate": 5.7173458148541094e-05, "loss": 0.6024, "step": 15770, "task_loss": 0.37659692764282227 }, { "compression_loss": 0.0, "distillation_loss": 0.4902139902114868, "epoch": 5.7, "learning_rate": 5.714452151455841e-05, "loss": 0.6281, "step": 15780, "task_loss": 0.9982061386108398 }, { "compression_loss": 0.0, "distillation_loss": 0.44813448190689087, "epoch": 5.71, "learning_rate": 5.711544491429501e-05, "loss": 0.5142, "step": 15790, "task_loss": 0.7714956998825073 }, { "compression_loss": 0.0, "distillation_loss": 0.4338916838169098, "epoch": 5.71, "learning_rate": 5.708622849767961e-05, "loss": 0.5278, "step": 15800, "task_loss": 0.4820694327354431 }, { "compression_loss": 0.0, "distillation_loss": 0.4372040629386902, "epoch": 5.71, "learning_rate": 5.705687241536186e-05, "loss": 0.5699, "step": 15810, "task_loss": 0.41295021772384644 }, { "compression_loss": 0.0, "distillation_loss": 0.3634551465511322, "epoch": 5.72, "learning_rate": 5.702737681871161e-05, "loss": 0.4913, "step": 15820, "task_loss": 0.31981927156448364 }, { "compression_loss": 0.0, "distillation_loss": 0.47144439816474915, "epoch": 5.72, "learning_rate": 5.699774185981806e-05, "loss": 0.5732, "step": 15830, "task_loss": 0.3408505916595459 }, { "compression_loss": 0.0, "distillation_loss": 0.664949893951416, "epoch": 5.72, "learning_rate": 5.696796769148905e-05, "loss": 0.5635, "step": 15840, "task_loss": 0.5504066348075867 }, { "compression_loss": 0.0, "distillation_loss": 0.5689000487327576, "epoch": 5.73, "learning_rate": 5.693805446725018e-05, "loss": 0.6337, "step": 15850, "task_loss": 0.7448662519454956 }, { "compression_loss": 0.0, "distillation_loss": 0.5832275152206421, "epoch": 5.73, "learning_rate": 5.69080023413441e-05, "loss": 0.5428, "step": 15860, "task_loss": 0.5787875056266785 }, { "compression_loss": 0.0, "distillation_loss": 0.4936749339103699, "epoch": 5.74, "learning_rate": 5.687781146872967e-05, "loss": 0.5589, "step": 15870, "task_loss": 0.5635548830032349 }, { "compression_loss": 0.0, "distillation_loss": 0.4124121069908142, "epoch": 5.74, "learning_rate": 5.6847482005081207e-05, "loss": 0.5222, "step": 15880, "task_loss": 1.0899685621261597 }, { "compression_loss": 0.0, "distillation_loss": 0.390001118183136, "epoch": 5.74, "learning_rate": 5.681701410678759e-05, "loss": 0.4966, "step": 15890, "task_loss": 0.5182036757469177 }, { "compression_loss": 0.0, "distillation_loss": 0.38561293482780457, "epoch": 5.75, "learning_rate": 5.678640793095156e-05, "loss": 0.5762, "step": 15900, "task_loss": 0.7452099323272705 }, { "compression_loss": 0.0, "distillation_loss": 0.7303899526596069, "epoch": 5.75, "learning_rate": 5.6755663635388865e-05, "loss": 0.5726, "step": 15910, "task_loss": 0.6906106472015381 }, { "compression_loss": 0.0, "distillation_loss": 0.5508267283439636, "epoch": 5.75, "learning_rate": 5.6724781378627425e-05, "loss": 0.6166, "step": 15920, "task_loss": 0.4653738737106323 }, { "compression_loss": 0.0, "distillation_loss": 0.3965685963630676, "epoch": 5.76, "learning_rate": 5.669376131990653e-05, "loss": 0.5519, "step": 15930, "task_loss": 0.5775076746940613 }, { "compression_loss": 0.0, "distillation_loss": 0.8321218490600586, "epoch": 5.76, "learning_rate": 5.666260361917604e-05, "loss": 0.5691, "step": 15940, "task_loss": 0.9377269148826599 }, { "compression_loss": 0.0, "distillation_loss": 0.712748110294342, "epoch": 5.76, "learning_rate": 5.6631308437095546e-05, "loss": 0.5579, "step": 15950, "task_loss": 0.767340898513794 }, { "compression_loss": 0.0, "distillation_loss": 0.8006727695465088, "epoch": 5.77, "learning_rate": 5.6599875935033516e-05, "loss": 0.6169, "step": 15960, "task_loss": 0.9040712714195251 }, { "compression_loss": 0.0, "distillation_loss": 0.5734758973121643, "epoch": 5.77, "learning_rate": 5.656830627506651e-05, "loss": 0.5505, "step": 15970, "task_loss": 0.7179234027862549 }, { "compression_loss": 0.0, "distillation_loss": 0.5710708498954773, "epoch": 5.78, "learning_rate": 5.6536599619978306e-05, "loss": 0.5686, "step": 15980, "task_loss": 0.7058345079421997 }, { "compression_loss": 0.0, "distillation_loss": 0.49838507175445557, "epoch": 5.78, "learning_rate": 5.650475613325907e-05, "loss": 0.5444, "step": 15990, "task_loss": 0.4772479236125946 }, { "compression_loss": 0.0, "distillation_loss": 0.6272715330123901, "epoch": 5.78, "learning_rate": 5.647277597910456e-05, "loss": 0.5258, "step": 16000, "task_loss": 0.5559104681015015 }, { "epoch": 5.78, "eval_exact_match": 81.86376537369915, "eval_f1": 89.00954211887367, "step": 16000 }, { "compression_loss": 0.0, "distillation_loss": 0.45378392934799194, "epoch": 5.79, "learning_rate": 5.644065932241517e-05, "loss": 0.5763, "step": 16010, "task_loss": 0.5300541520118713 }, { "compression_loss": 0.0, "distillation_loss": 0.493353933095932, "epoch": 5.79, "learning_rate": 5.6408406328795195e-05, "loss": 0.5179, "step": 16020, "task_loss": 0.4974842071533203 }, { "compression_loss": 0.0, "distillation_loss": 0.7274896502494812, "epoch": 5.79, "learning_rate": 5.6376017164551915e-05, "loss": 0.59, "step": 16030, "task_loss": 0.7033414840698242 }, { "compression_loss": 0.0, "distillation_loss": 0.4151965379714966, "epoch": 5.8, "learning_rate": 5.6343491996694754e-05, "loss": 0.5246, "step": 16040, "task_loss": 0.7589873671531677 }, { "compression_loss": 0.0, "distillation_loss": 0.40979576110839844, "epoch": 5.8, "learning_rate": 5.631083099293441e-05, "loss": 0.531, "step": 16050, "task_loss": 0.45069462060928345 }, { "compression_loss": 0.0, "distillation_loss": 0.45870572328567505, "epoch": 5.8, "learning_rate": 5.627803432168202e-05, "loss": 0.5516, "step": 16060, "task_loss": 0.5535123348236084 }, { "compression_loss": 0.0, "distillation_loss": 0.47782111167907715, "epoch": 5.81, "learning_rate": 5.6245102152048215e-05, "loss": 0.4949, "step": 16070, "task_loss": 0.8046015501022339 }, { "compression_loss": 0.0, "distillation_loss": 0.5272634029388428, "epoch": 5.81, "learning_rate": 5.621203465384236e-05, "loss": 0.5679, "step": 16080, "task_loss": 0.5130640268325806 }, { "compression_loss": 0.0, "distillation_loss": 0.40280604362487793, "epoch": 5.81, "learning_rate": 5.617883199757159e-05, "loss": 0.4614, "step": 16090, "task_loss": 0.4750958979129791 }, { "compression_loss": 0.0, "distillation_loss": 0.6462051868438721, "epoch": 5.82, "learning_rate": 5.6145494354439964e-05, "loss": 0.5555, "step": 16100, "task_loss": 0.9935194253921509 }, { "compression_loss": 0.0, "distillation_loss": 0.4317300319671631, "epoch": 5.82, "learning_rate": 5.611202189634758e-05, "loss": 0.5154, "step": 16110, "task_loss": 0.36278975009918213 }, { "compression_loss": 0.0, "distillation_loss": 0.5830767154693604, "epoch": 5.83, "learning_rate": 5.6078414795889694e-05, "loss": 0.5193, "step": 16120, "task_loss": 0.2859882414340973 }, { "compression_loss": 0.0, "distillation_loss": 0.39172929525375366, "epoch": 5.83, "learning_rate": 5.604467322635582e-05, "loss": 0.4694, "step": 16130, "task_loss": 0.44212770462036133 }, { "compression_loss": 0.0, "distillation_loss": 0.6151372790336609, "epoch": 5.83, "learning_rate": 5.6010797361728816e-05, "loss": 0.4802, "step": 16140, "task_loss": 0.8025726079940796 }, { "compression_loss": 0.0, "distillation_loss": 0.612571656703949, "epoch": 5.84, "learning_rate": 5.597678737668405e-05, "loss": 0.5408, "step": 16150, "task_loss": 0.6253871917724609 }, { "compression_loss": 0.0, "distillation_loss": 0.34145790338516235, "epoch": 5.84, "learning_rate": 5.594264344658843e-05, "loss": 0.4514, "step": 16160, "task_loss": 0.4706138074398041 }, { "compression_loss": 0.0, "distillation_loss": 0.56342613697052, "epoch": 5.84, "learning_rate": 5.5908365747499555e-05, "loss": 0.6194, "step": 16170, "task_loss": 0.40212613344192505 }, { "compression_loss": 0.0, "distillation_loss": 0.4632798433303833, "epoch": 5.85, "learning_rate": 5.587395445616475e-05, "loss": 0.5395, "step": 16180, "task_loss": 0.6225385665893555 }, { "compression_loss": 0.0, "distillation_loss": 0.5093396902084351, "epoch": 5.85, "learning_rate": 5.583940975002023e-05, "loss": 0.5351, "step": 16190, "task_loss": 0.6102060079574585 }, { "compression_loss": 0.0, "distillation_loss": 0.3960115313529968, "epoch": 5.85, "learning_rate": 5.5804731807190085e-05, "loss": 0.5119, "step": 16200, "task_loss": 0.3458710014820099 }, { "compression_loss": 0.0, "distillation_loss": 0.7457716464996338, "epoch": 5.86, "learning_rate": 5.576992080648548e-05, "loss": 0.4945, "step": 16210, "task_loss": 1.0185015201568604 }, { "compression_loss": 0.0, "distillation_loss": 0.7339679002761841, "epoch": 5.86, "learning_rate": 5.57349769274036e-05, "loss": 0.6524, "step": 16220, "task_loss": 0.9575223922729492 }, { "compression_loss": 0.0, "distillation_loss": 0.48004066944122314, "epoch": 5.87, "learning_rate": 5.569990035012688e-05, "loss": 0.5535, "step": 16230, "task_loss": 0.6020759344100952 }, { "compression_loss": 0.0, "distillation_loss": 0.37398168444633484, "epoch": 5.87, "learning_rate": 5.566469125552193e-05, "loss": 0.4726, "step": 16240, "task_loss": 0.555140495300293 }, { "compression_loss": 0.0, "distillation_loss": 0.49550220370292664, "epoch": 5.87, "learning_rate": 5.562934982513867e-05, "loss": 0.6053, "step": 16250, "task_loss": 0.5213397741317749 }, { "epoch": 5.87, "eval_exact_match": 82.0151371807001, "eval_f1": 89.09085178965859, "step": 16250 }, { "compression_loss": 0.0, "distillation_loss": 0.49956950545310974, "epoch": 5.88, "learning_rate": 5.559387624120942e-05, "loss": 0.487, "step": 16260, "task_loss": 0.537997841835022 }, { "compression_loss": 0.0, "distillation_loss": 0.43931645154953003, "epoch": 5.88, "learning_rate": 5.5558270686647886e-05, "loss": 0.5127, "step": 16270, "task_loss": 0.6900993585586548 }, { "compression_loss": 0.0, "distillation_loss": 0.49603772163391113, "epoch": 5.88, "learning_rate": 5.55225333450483e-05, "loss": 0.4836, "step": 16280, "task_loss": 0.4945855438709259 }, { "compression_loss": 0.0, "distillation_loss": 0.5353456139564514, "epoch": 5.89, "learning_rate": 5.5486664400684396e-05, "loss": 0.6114, "step": 16290, "task_loss": 0.5326354503631592 }, { "compression_loss": 0.0, "distillation_loss": 0.6305358409881592, "epoch": 5.89, "learning_rate": 5.5450664038508526e-05, "loss": 0.5082, "step": 16300, "task_loss": 0.6449335813522339 }, { "compression_loss": 0.0, "distillation_loss": 0.48550814390182495, "epoch": 5.89, "learning_rate": 5.541453244415065e-05, "loss": 0.418, "step": 16310, "task_loss": 0.724381685256958 }, { "compression_loss": 0.0, "distillation_loss": 0.5628600120544434, "epoch": 5.9, "learning_rate": 5.5378269803917425e-05, "loss": 0.509, "step": 16320, "task_loss": 0.6630486249923706 }, { "compression_loss": 0.0, "distillation_loss": 0.5237171053886414, "epoch": 5.9, "learning_rate": 5.534187630479123e-05, "loss": 0.5697, "step": 16330, "task_loss": 0.5837256908416748 }, { "compression_loss": 0.0, "distillation_loss": 0.4340498745441437, "epoch": 5.91, "learning_rate": 5.530535213442916e-05, "loss": 0.6226, "step": 16340, "task_loss": 0.5448073148727417 }, { "compression_loss": 0.0, "distillation_loss": 0.557012677192688, "epoch": 5.91, "learning_rate": 5.5268697481162134e-05, "loss": 0.5754, "step": 16350, "task_loss": 0.3911644220352173 }, { "compression_loss": 0.0, "distillation_loss": 0.5198036432266235, "epoch": 5.91, "learning_rate": 5.523191253399388e-05, "loss": 0.521, "step": 16360, "task_loss": 0.6677054166793823 }, { "compression_loss": 0.0, "distillation_loss": 0.9684406518936157, "epoch": 5.92, "learning_rate": 5.519499748259993e-05, "loss": 0.5403, "step": 16370, "task_loss": 0.9720715284347534 }, { "compression_loss": 0.0, "distillation_loss": 0.5757874250411987, "epoch": 5.92, "learning_rate": 5.515795251732672e-05, "loss": 0.4946, "step": 16380, "task_loss": 0.8197060227394104 }, { "compression_loss": 0.0, "distillation_loss": 0.5475500822067261, "epoch": 5.92, "learning_rate": 5.5120777829190543e-05, "loss": 0.4821, "step": 16390, "task_loss": 0.47533178329467773 }, { "compression_loss": 0.0, "distillation_loss": 0.4618098735809326, "epoch": 5.93, "learning_rate": 5.50834736098766e-05, "loss": 0.5336, "step": 16400, "task_loss": 0.6744464039802551 }, { "compression_loss": 0.0, "distillation_loss": 0.5418887734413147, "epoch": 5.93, "learning_rate": 5.5046040051737976e-05, "loss": 0.5763, "step": 16410, "task_loss": 0.4931833744049072 }, { "compression_loss": 0.0, "distillation_loss": 0.5653811693191528, "epoch": 5.93, "learning_rate": 5.500847734779469e-05, "loss": 0.504, "step": 16420, "task_loss": 0.5599584579467773 }, { "compression_loss": 0.0, "distillation_loss": 0.384804904460907, "epoch": 5.94, "learning_rate": 5.497078569173267e-05, "loss": 0.5194, "step": 16430, "task_loss": 1.2085001468658447 }, { "compression_loss": 0.0, "distillation_loss": 0.6292977333068848, "epoch": 5.94, "learning_rate": 5.4932965277902784e-05, "loss": 0.4963, "step": 16440, "task_loss": 0.8269913792610168 }, { "compression_loss": 0.0, "distillation_loss": 0.611187756061554, "epoch": 5.95, "learning_rate": 5.4895016301319786e-05, "loss": 0.5997, "step": 16450, "task_loss": 0.6286755204200745 }, { "compression_loss": 0.0, "distillation_loss": 0.5451836585998535, "epoch": 5.95, "learning_rate": 5.4856938957661377e-05, "loss": 0.6417, "step": 16460, "task_loss": 0.48253700137138367 }, { "compression_loss": 0.0, "distillation_loss": 0.36945682764053345, "epoch": 5.95, "learning_rate": 5.481873344326713e-05, "loss": 0.4661, "step": 16470, "task_loss": 0.24678575992584229 }, { "compression_loss": 0.0, "distillation_loss": 0.6069915294647217, "epoch": 5.96, "learning_rate": 5.478039995513753e-05, "loss": 0.5643, "step": 16480, "task_loss": 1.2377862930297852 }, { "compression_loss": 0.0, "distillation_loss": 0.5773873925209045, "epoch": 5.96, "learning_rate": 5.474193869093293e-05, "loss": 0.5338, "step": 16490, "task_loss": 0.890839159488678 }, { "compression_loss": 0.0, "distillation_loss": 0.5319252014160156, "epoch": 5.96, "learning_rate": 5.4703349848972554e-05, "loss": 0.5673, "step": 16500, "task_loss": 0.6719323992729187 }, { "epoch": 5.96, "eval_exact_match": 82.07190160832545, "eval_f1": 89.23505876854658, "step": 16500 }, { "compression_loss": 0.0, "distillation_loss": 0.4537544846534729, "epoch": 5.97, "learning_rate": 5.466463362823343e-05, "loss": 0.5421, "step": 16510, "task_loss": 0.8447656035423279 }, { "compression_loss": 0.0, "distillation_loss": 0.3538123071193695, "epoch": 5.97, "learning_rate": 5.4625790228349424e-05, "loss": 0.5304, "step": 16520, "task_loss": 0.41902589797973633 }, { "compression_loss": 0.0, "distillation_loss": 0.5340615510940552, "epoch": 5.97, "learning_rate": 5.458681984961015e-05, "loss": 0.5857, "step": 16530, "task_loss": 0.5699688196182251 }, { "compression_loss": 0.0, "distillation_loss": 0.5087348222732544, "epoch": 5.98, "learning_rate": 5.4547722692960005e-05, "loss": 0.4812, "step": 16540, "task_loss": 0.8394026756286621 }, { "compression_loss": 0.0, "distillation_loss": 0.4404750168323517, "epoch": 5.98, "learning_rate": 5.450849895999707e-05, "loss": 0.5653, "step": 16550, "task_loss": 0.5041899085044861 }, { "compression_loss": 0.0, "distillation_loss": 0.2825411558151245, "epoch": 5.98, "learning_rate": 5.44691488529721e-05, "loss": 0.499, "step": 16560, "task_loss": 0.20059698820114136 }, { "compression_loss": 0.0, "distillation_loss": 0.6841438412666321, "epoch": 5.99, "learning_rate": 5.442967257478748e-05, "loss": 0.5434, "step": 16570, "task_loss": 0.7802176475524902 }, { "compression_loss": 0.0, "distillation_loss": 0.8112098574638367, "epoch": 5.99, "learning_rate": 5.4390070328996166e-05, "loss": 0.6064, "step": 16580, "task_loss": 0.8238754272460938 }, { "compression_loss": 0.0, "distillation_loss": 0.42704349756240845, "epoch": 6.0, "learning_rate": 5.435034231980067e-05, "loss": 0.5143, "step": 16590, "task_loss": 0.6214593052864075 }, { "compression_loss": 0.0, "distillation_loss": 0.4533708691596985, "epoch": 6.0, "learning_rate": 5.431048875205195e-05, "loss": 0.5248, "step": 16600, "task_loss": 0.4769049882888794 }, { "compression_loss": 0.0, "distillation_loss": 0.5088812112808228, "epoch": 6.0, "learning_rate": 5.427050983124843e-05, "loss": 0.4649, "step": 16610, "task_loss": 0.9433021545410156 }, { "compression_loss": 0.0, "distillation_loss": 0.3258495628833771, "epoch": 6.01, "learning_rate": 5.423040576353483e-05, "loss": 0.4545, "step": 16620, "task_loss": 0.5422816276550293 }, { "compression_loss": 0.0, "distillation_loss": 0.5611860752105713, "epoch": 6.01, "learning_rate": 5.4190176755701236e-05, "loss": 0.4954, "step": 16630, "task_loss": 0.4522503614425659 }, { "compression_loss": 0.0, "distillation_loss": 0.37038782238960266, "epoch": 6.01, "learning_rate": 5.414982301518194e-05, "loss": 0.486, "step": 16640, "task_loss": 0.4855346083641052 }, { "compression_loss": 0.0, "distillation_loss": 0.4425680339336395, "epoch": 6.02, "learning_rate": 5.410934475005439e-05, "loss": 0.435, "step": 16650, "task_loss": 0.4484761953353882 }, { "compression_loss": 0.0, "distillation_loss": 0.3988327980041504, "epoch": 6.02, "learning_rate": 5.4068742169038124e-05, "loss": 0.4155, "step": 16660, "task_loss": 0.4220232367515564 }, { "compression_loss": 0.0, "distillation_loss": 0.43297821283340454, "epoch": 6.02, "learning_rate": 5.402801548149372e-05, "loss": 0.4468, "step": 16670, "task_loss": 1.0445358753204346 }, { "compression_loss": 0.0, "distillation_loss": 0.5155505537986755, "epoch": 6.03, "learning_rate": 5.398716489742166e-05, "loss": 0.4967, "step": 16680, "task_loss": 0.43968135118484497 }, { "compression_loss": 0.0, "distillation_loss": 0.3766743838787079, "epoch": 6.03, "learning_rate": 5.3946190627461314e-05, "loss": 0.4799, "step": 16690, "task_loss": 0.3420065939426422 }, { "compression_loss": 0.0, "distillation_loss": 0.34114527702331543, "epoch": 6.04, "learning_rate": 5.390509288288977e-05, "loss": 0.4431, "step": 16700, "task_loss": 0.38847649097442627 }, { "compression_loss": 0.0, "distillation_loss": 0.3855423331260681, "epoch": 6.04, "learning_rate": 5.386387187562082e-05, "loss": 0.4372, "step": 16710, "task_loss": 0.21255548298358917 }, { "compression_loss": 0.0, "distillation_loss": 0.3481213450431824, "epoch": 6.04, "learning_rate": 5.3822527818203865e-05, "loss": 0.4527, "step": 16720, "task_loss": 0.4471372365951538 }, { "compression_loss": 0.0, "distillation_loss": 0.44838201999664307, "epoch": 6.05, "learning_rate": 5.378106092382275e-05, "loss": 0.4386, "step": 16730, "task_loss": 0.743310809135437 }, { "compression_loss": 0.0, "distillation_loss": 0.644596517086029, "epoch": 6.05, "learning_rate": 5.3739471406294725e-05, "loss": 0.473, "step": 16740, "task_loss": 0.6514346599578857 }, { "compression_loss": 0.0, "distillation_loss": 0.2982652187347412, "epoch": 6.05, "learning_rate": 5.369775948006933e-05, "loss": 0.4327, "step": 16750, "task_loss": 0.28472357988357544 }, { "epoch": 6.05, "eval_exact_match": 81.88268684957427, "eval_f1": 89.02189324828987, "step": 16750 }, { "compression_loss": 0.0, "distillation_loss": 0.4026915431022644, "epoch": 6.06, "learning_rate": 5.365592536022728e-05, "loss": 0.4691, "step": 16760, "task_loss": 0.688092827796936 }, { "compression_loss": 0.0, "distillation_loss": 0.408430278301239, "epoch": 6.06, "learning_rate": 5.361396926247936e-05, "loss": 0.4222, "step": 16770, "task_loss": 0.34195762872695923 }, { "compression_loss": 0.0, "distillation_loss": 0.5037705302238464, "epoch": 6.06, "learning_rate": 5.3571891403165345e-05, "loss": 0.4151, "step": 16780, "task_loss": 0.6692633032798767 }, { "compression_loss": 0.0, "distillation_loss": 0.33279550075531006, "epoch": 6.07, "learning_rate": 5.3529691999252797e-05, "loss": 0.4216, "step": 16790, "task_loss": 0.763120174407959 }, { "compression_loss": 0.0, "distillation_loss": 0.3310759961605072, "epoch": 6.07, "learning_rate": 5.348737126833605e-05, "loss": 0.4252, "step": 16800, "task_loss": 0.38220885396003723 }, { "compression_loss": 0.0, "distillation_loss": 0.4700435698032379, "epoch": 6.08, "learning_rate": 5.344492942863501e-05, "loss": 0.4713, "step": 16810, "task_loss": 0.7272550463676453 }, { "compression_loss": 0.0, "distillation_loss": 0.49290114641189575, "epoch": 6.08, "learning_rate": 5.340236669899409e-05, "loss": 0.4825, "step": 16820, "task_loss": 0.7776498794555664 }, { "compression_loss": 0.0, "distillation_loss": 0.4069538712501526, "epoch": 6.08, "learning_rate": 5.3359683298881014e-05, "loss": 0.4863, "step": 16830, "task_loss": 0.6780685782432556 }, { "compression_loss": 0.0, "distillation_loss": 0.4466571509838104, "epoch": 6.09, "learning_rate": 5.331687944838578e-05, "loss": 0.4775, "step": 16840, "task_loss": 0.6824575662612915 }, { "compression_loss": 0.0, "distillation_loss": 0.6595938205718994, "epoch": 6.09, "learning_rate": 5.327395536821941e-05, "loss": 0.4408, "step": 16850, "task_loss": 1.1058135032653809 }, { "compression_loss": 0.0, "distillation_loss": 0.39724981784820557, "epoch": 6.09, "learning_rate": 5.323091127971289e-05, "loss": 0.4776, "step": 16860, "task_loss": 0.5060951709747314 }, { "compression_loss": 0.0, "distillation_loss": 0.4856202006340027, "epoch": 6.1, "learning_rate": 5.318774740481603e-05, "loss": 0.4653, "step": 16870, "task_loss": 0.4803681969642639 }, { "compression_loss": 0.0, "distillation_loss": 0.401211142539978, "epoch": 6.1, "learning_rate": 5.314446396609628e-05, "loss": 0.4307, "step": 16880, "task_loss": 0.3891042470932007 }, { "compression_loss": 0.0, "distillation_loss": 0.3375917673110962, "epoch": 6.1, "learning_rate": 5.310106118673761e-05, "loss": 0.458, "step": 16890, "task_loss": 0.7722040414810181 }, { "compression_loss": 0.0, "distillation_loss": 0.3473578989505768, "epoch": 6.11, "learning_rate": 5.305753929053932e-05, "loss": 0.4412, "step": 16900, "task_loss": 0.7287085056304932 }, { "compression_loss": 0.0, "distillation_loss": 0.680449366569519, "epoch": 6.11, "learning_rate": 5.301389850191497e-05, "loss": 0.4862, "step": 16910, "task_loss": 0.8735339045524597 }, { "compression_loss": 0.0, "distillation_loss": 0.42973363399505615, "epoch": 6.11, "learning_rate": 5.297013904589114e-05, "loss": 0.4532, "step": 16920, "task_loss": 0.4185020625591278 }, { "compression_loss": 0.0, "distillation_loss": 0.5793424844741821, "epoch": 6.12, "learning_rate": 5.292626114810628e-05, "loss": 0.4858, "step": 16930, "task_loss": 0.8029798269271851 }, { "compression_loss": 0.0, "distillation_loss": 0.3389170169830322, "epoch": 6.12, "learning_rate": 5.28822650348096e-05, "loss": 0.4769, "step": 16940, "task_loss": 0.3712247610092163 }, { "compression_loss": 0.0, "distillation_loss": 0.4885207414627075, "epoch": 6.13, "learning_rate": 5.2838150932859857e-05, "loss": 0.4678, "step": 16950, "task_loss": 0.6370507478713989 }, { "compression_loss": 0.0, "distillation_loss": 0.316753625869751, "epoch": 6.13, "learning_rate": 5.279391906972418e-05, "loss": 0.4271, "step": 16960, "task_loss": 0.5604386329650879 }, { "compression_loss": 0.0, "distillation_loss": 0.41553622484207153, "epoch": 6.13, "learning_rate": 5.274956967347694e-05, "loss": 0.4632, "step": 16970, "task_loss": 0.49455636739730835 }, { "compression_loss": 0.0, "distillation_loss": 0.3349422514438629, "epoch": 6.14, "learning_rate": 5.270510297279855e-05, "loss": 0.4382, "step": 16980, "task_loss": 0.6961212158203125 }, { "compression_loss": 0.0, "distillation_loss": 0.4425978660583496, "epoch": 6.14, "learning_rate": 5.266051919697425e-05, "loss": 0.4478, "step": 16990, "task_loss": 0.5341584086418152 }, { "compression_loss": 0.0, "distillation_loss": 0.570014238357544, "epoch": 6.14, "learning_rate": 5.2615818575892986e-05, "loss": 0.4476, "step": 17000, "task_loss": 0.5794532895088196 }, { "epoch": 6.14, "eval_exact_match": 82.07190160832545, "eval_f1": 89.09099844346169, "step": 17000 }, { "compression_loss": 0.0, "distillation_loss": 0.5344833135604858, "epoch": 6.15, "learning_rate": 5.2571001340046186e-05, "loss": 0.4784, "step": 17010, "task_loss": 0.5975182056427002 }, { "compression_loss": 0.0, "distillation_loss": 0.43689098954200745, "epoch": 6.15, "learning_rate": 5.2526067720526606e-05, "loss": 0.4314, "step": 17020, "task_loss": 0.8588857650756836 }, { "compression_loss": 0.0, "distillation_loss": 0.36046090722084045, "epoch": 6.15, "learning_rate": 5.2481017949027086e-05, "loss": 0.4663, "step": 17030, "task_loss": 0.5059746503829956 }, { "compression_loss": 0.0, "distillation_loss": 0.4314071536064148, "epoch": 6.16, "learning_rate": 5.2435852257839397e-05, "loss": 0.4958, "step": 17040, "task_loss": 0.6044473648071289 }, { "compression_loss": 0.0, "distillation_loss": 0.4972867965698242, "epoch": 6.16, "learning_rate": 5.239057087985303e-05, "loss": 0.3913, "step": 17050, "task_loss": 0.7851539850234985 }, { "compression_loss": 0.0, "distillation_loss": 0.4286130368709564, "epoch": 6.17, "learning_rate": 5.234517404855399e-05, "loss": 0.4531, "step": 17060, "task_loss": 0.44648587703704834 }, { "compression_loss": 0.0, "distillation_loss": 0.4931001365184784, "epoch": 6.17, "learning_rate": 5.229966199802363e-05, "loss": 0.4906, "step": 17070, "task_loss": 0.4792635142803192 }, { "compression_loss": 0.0, "distillation_loss": 0.4335430860519409, "epoch": 6.17, "learning_rate": 5.225403496293736e-05, "loss": 0.4316, "step": 17080, "task_loss": 0.2784348130226135 }, { "compression_loss": 0.0, "distillation_loss": 0.5062400102615356, "epoch": 6.18, "learning_rate": 5.2208293178563524e-05, "loss": 0.4188, "step": 17090, "task_loss": 0.8841861486434937 }, { "compression_loss": 0.0, "distillation_loss": 0.42178788781166077, "epoch": 6.18, "learning_rate": 5.216243688076215e-05, "loss": 0.4311, "step": 17100, "task_loss": 0.5392696261405945 }, { "compression_loss": 0.0, "distillation_loss": 0.48605018854141235, "epoch": 6.18, "learning_rate": 5.211646630598372e-05, "loss": 0.492, "step": 17110, "task_loss": 0.7885700464248657 }, { "compression_loss": 0.0, "distillation_loss": 0.43214869499206543, "epoch": 6.19, "learning_rate": 5.207038169126799e-05, "loss": 0.4787, "step": 17120, "task_loss": 0.8961579203605652 }, { "compression_loss": 0.0, "distillation_loss": 0.4661581516265869, "epoch": 6.19, "learning_rate": 5.2024183274242715e-05, "loss": 0.4191, "step": 17130, "task_loss": 0.6008694171905518 }, { "compression_loss": 0.0, "distillation_loss": 0.5632531642913818, "epoch": 6.19, "learning_rate": 5.197787129312247e-05, "loss": 0.4638, "step": 17140, "task_loss": 0.6885539293289185 }, { "compression_loss": 0.0, "distillation_loss": 0.343812495470047, "epoch": 6.2, "learning_rate": 5.193144598670741e-05, "loss": 0.4286, "step": 17150, "task_loss": 0.5359995365142822 }, { "compression_loss": 0.0, "distillation_loss": 0.4356578588485718, "epoch": 6.2, "learning_rate": 5.188490759438202e-05, "loss": 0.4945, "step": 17160, "task_loss": 0.7381937503814697 }, { "compression_loss": 0.0, "distillation_loss": 0.3725375533103943, "epoch": 6.21, "learning_rate": 5.18382563561139e-05, "loss": 0.4391, "step": 17170, "task_loss": 0.5693670511245728 }, { "compression_loss": 0.0, "distillation_loss": 0.38266217708587646, "epoch": 6.21, "learning_rate": 5.179149251245253e-05, "loss": 0.4688, "step": 17180, "task_loss": 0.5379250049591064 }, { "compression_loss": 0.0, "distillation_loss": 0.5168706178665161, "epoch": 6.21, "learning_rate": 5.174461630452802e-05, "loss": 0.4385, "step": 17190, "task_loss": 0.6097722053527832 }, { "compression_loss": 0.0, "distillation_loss": 0.3769886791706085, "epoch": 6.22, "learning_rate": 5.169762797404983e-05, "loss": 0.4674, "step": 17200, "task_loss": 0.3455650210380554 }, { "compression_loss": 0.0, "distillation_loss": 0.590087890625, "epoch": 6.22, "learning_rate": 5.165052776330562e-05, "loss": 0.4498, "step": 17210, "task_loss": 0.5600070357322693 }, { "compression_loss": 0.0, "distillation_loss": 0.5225914716720581, "epoch": 6.22, "learning_rate": 5.16033159151599e-05, "loss": 0.4534, "step": 17220, "task_loss": 1.0604463815689087 }, { "compression_loss": 0.0, "distillation_loss": 0.4769077003002167, "epoch": 6.23, "learning_rate": 5.1555992673052845e-05, "loss": 0.4587, "step": 17230, "task_loss": 0.7149950265884399 }, { "compression_loss": 0.0, "distillation_loss": 0.35209062695503235, "epoch": 6.23, "learning_rate": 5.150855828099901e-05, "loss": 0.449, "step": 17240, "task_loss": 0.9201719164848328 }, { "compression_loss": 0.0, "distillation_loss": 0.2952001988887787, "epoch": 6.23, "learning_rate": 5.146101298358604e-05, "loss": 0.4543, "step": 17250, "task_loss": 0.3284863233566284 }, { "epoch": 6.23, "eval_exact_match": 82.06244087038789, "eval_f1": 89.28534999216927, "step": 17250 }, { "compression_loss": 0.0, "distillation_loss": 0.45337045192718506, "epoch": 6.24, "learning_rate": 5.1413357025973514e-05, "loss": 0.3967, "step": 17260, "task_loss": 0.4615285396575928 }, { "compression_loss": 0.0, "distillation_loss": 0.5968666076660156, "epoch": 6.24, "learning_rate": 5.1365590653891554e-05, "loss": 0.4762, "step": 17270, "task_loss": 0.6431958675384521 }, { "compression_loss": 0.0, "distillation_loss": 0.47852158546447754, "epoch": 6.25, "learning_rate": 5.131771411363964e-05, "loss": 0.4462, "step": 17280, "task_loss": 0.8368129730224609 }, { "compression_loss": 0.0, "distillation_loss": 0.4786396026611328, "epoch": 6.25, "learning_rate": 5.1269727652085315e-05, "loss": 0.4775, "step": 17290, "task_loss": 0.828134298324585 }, { "compression_loss": 0.0, "distillation_loss": 0.2864540219306946, "epoch": 6.25, "learning_rate": 5.1221631516662914e-05, "loss": 0.4184, "step": 17300, "task_loss": 0.5259373188018799 }, { "compression_loss": 0.0, "distillation_loss": 0.6025077104568481, "epoch": 6.26, "learning_rate": 5.117342595537228e-05, "loss": 0.4859, "step": 17310, "task_loss": 0.6346871256828308 }, { "compression_loss": 0.0, "distillation_loss": 0.37674152851104736, "epoch": 6.26, "learning_rate": 5.1125111216777496e-05, "loss": 0.4247, "step": 17320, "task_loss": 0.42210888862609863 }, { "compression_loss": 0.0, "distillation_loss": 0.39654457569122314, "epoch": 6.26, "learning_rate": 5.10766875500056e-05, "loss": 0.4151, "step": 17330, "task_loss": 0.45702940225601196 }, { "compression_loss": 0.0, "distillation_loss": 0.4761405289173126, "epoch": 6.27, "learning_rate": 5.10281552047453e-05, "loss": 0.4388, "step": 17340, "task_loss": 0.564466118812561 }, { "compression_loss": 0.0, "distillation_loss": 0.4193245470523834, "epoch": 6.27, "learning_rate": 5.09795144312457e-05, "loss": 0.4997, "step": 17350, "task_loss": 0.3380679190158844 }, { "compression_loss": 0.0, "distillation_loss": 0.3163435161113739, "epoch": 6.27, "learning_rate": 5.0930765480314974e-05, "loss": 0.4753, "step": 17360, "task_loss": 0.28184810280799866 }, { "compression_loss": 0.0, "distillation_loss": 0.4957866668701172, "epoch": 6.28, "learning_rate": 5.0881908603319105e-05, "loss": 0.4457, "step": 17370, "task_loss": 0.8675940036773682 }, { "compression_loss": 0.0, "distillation_loss": 0.3084317445755005, "epoch": 6.28, "learning_rate": 5.083294405218059e-05, "loss": 0.4188, "step": 17380, "task_loss": 0.35364723205566406 }, { "compression_loss": 0.0, "distillation_loss": 0.41741424798965454, "epoch": 6.28, "learning_rate": 5.07838720793771e-05, "loss": 0.5694, "step": 17390, "task_loss": 0.5752847194671631 }, { "compression_loss": 0.0, "distillation_loss": 0.3092053532600403, "epoch": 6.29, "learning_rate": 5.073469293794025e-05, "loss": 0.4577, "step": 17400, "task_loss": 0.5759758949279785 }, { "compression_loss": 0.0, "distillation_loss": 0.40555286407470703, "epoch": 6.29, "learning_rate": 5.0685406881454215e-05, "loss": 0.4448, "step": 17410, "task_loss": 0.7658182382583618 }, { "compression_loss": 0.0, "distillation_loss": 0.3874298334121704, "epoch": 6.3, "learning_rate": 5.0636014164054486e-05, "loss": 0.4463, "step": 17420, "task_loss": 0.6377713680267334 }, { "compression_loss": 0.0, "distillation_loss": 0.686197817325592, "epoch": 6.3, "learning_rate": 5.0586515040426506e-05, "loss": 0.4981, "step": 17430, "task_loss": 0.7246427536010742 }, { "compression_loss": 0.0, "distillation_loss": 0.37397500872612, "epoch": 6.3, "learning_rate": 5.053690976580443e-05, "loss": 0.4869, "step": 17440, "task_loss": 0.693129301071167 }, { "compression_loss": 0.0, "distillation_loss": 0.4268683195114136, "epoch": 6.31, "learning_rate": 5.0487198595969706e-05, "loss": 0.4614, "step": 17450, "task_loss": 0.3804856538772583 }, { "compression_loss": 0.0, "distillation_loss": 0.45956653356552124, "epoch": 6.31, "learning_rate": 5.0437381787249864e-05, "loss": 0.4549, "step": 17460, "task_loss": 0.8962446451187134 }, { "compression_loss": 0.0, "distillation_loss": 0.4504271149635315, "epoch": 6.31, "learning_rate": 5.03874595965171e-05, "loss": 0.5028, "step": 17470, "task_loss": 0.4768270254135132 }, { "compression_loss": 0.0, "distillation_loss": 0.4447179436683655, "epoch": 6.32, "learning_rate": 5.033743228118704e-05, "loss": 0.4214, "step": 17480, "task_loss": 0.49524736404418945 }, { "compression_loss": 0.0, "distillation_loss": 0.6099137663841248, "epoch": 6.32, "learning_rate": 5.028730009921733e-05, "loss": 0.5059, "step": 17490, "task_loss": 0.5523144006729126 }, { "compression_loss": 0.0, "distillation_loss": 0.5245805978775024, "epoch": 6.32, "learning_rate": 5.023706330910635e-05, "loss": 0.4337, "step": 17500, "task_loss": 0.48322606086730957 }, { "epoch": 6.32, "eval_exact_match": 82.15704824976348, "eval_f1": 89.3212773932139, "step": 17500 }, { "compression_loss": 0.0, "distillation_loss": 0.6089495420455933, "epoch": 6.33, "learning_rate": 5.0186722169891904e-05, "loss": 0.4971, "step": 17510, "task_loss": 1.3261122703552246 }, { "compression_loss": 0.0, "distillation_loss": 0.42009443044662476, "epoch": 6.33, "learning_rate": 5.013627694114982e-05, "loss": 0.4783, "step": 17520, "task_loss": 0.6886181831359863 }, { "compression_loss": 0.0, "distillation_loss": 0.3833591938018799, "epoch": 6.34, "learning_rate": 5.008572788299266e-05, "loss": 0.4261, "step": 17530, "task_loss": 0.498070627450943 }, { "compression_loss": 0.0, "distillation_loss": 0.4751524329185486, "epoch": 6.34, "learning_rate": 5.0035075256068377e-05, "loss": 0.4438, "step": 17540, "task_loss": 0.8459548950195312 }, { "compression_loss": 0.0, "distillation_loss": 0.5234730839729309, "epoch": 6.34, "learning_rate": 4.998431932155894e-05, "loss": 0.4572, "step": 17550, "task_loss": 0.5055698752403259 }, { "compression_loss": 0.0, "distillation_loss": 0.4609374403953552, "epoch": 6.35, "learning_rate": 4.993346034117901e-05, "loss": 0.4893, "step": 17560, "task_loss": 0.8448480367660522 }, { "compression_loss": 0.0, "distillation_loss": 0.3884456157684326, "epoch": 6.35, "learning_rate": 4.9882498577174596e-05, "loss": 0.4746, "step": 17570, "task_loss": 0.4255600571632385 }, { "compression_loss": 0.0, "distillation_loss": 0.5293577909469604, "epoch": 6.35, "learning_rate": 4.98314342923217e-05, "loss": 0.4517, "step": 17580, "task_loss": 0.5622125864028931 }, { "compression_loss": 0.0, "distillation_loss": 0.45298856496810913, "epoch": 6.36, "learning_rate": 4.9780267749924934e-05, "loss": 0.4589, "step": 17590, "task_loss": 0.5631417036056519 }, { "compression_loss": 0.0, "distillation_loss": 0.5164973735809326, "epoch": 6.36, "learning_rate": 4.9728999213816197e-05, "loss": 0.4505, "step": 17600, "task_loss": 0.5395184755325317 }, { "compression_loss": 0.0, "distillation_loss": 0.36571723222732544, "epoch": 6.36, "learning_rate": 4.9677628948353304e-05, "loss": 0.4707, "step": 17610, "task_loss": 0.5570918321609497 }, { "compression_loss": 0.0, "distillation_loss": 0.408827006816864, "epoch": 6.37, "learning_rate": 4.962615721841863e-05, "loss": 0.43, "step": 17620, "task_loss": 0.5332542657852173 }, { "compression_loss": 0.0, "distillation_loss": 0.383866548538208, "epoch": 6.37, "learning_rate": 4.9574584289417695e-05, "loss": 0.4391, "step": 17630, "task_loss": 0.4717841148376465 }, { "compression_loss": 0.0, "distillation_loss": 0.3984270393848419, "epoch": 6.38, "learning_rate": 4.952291042727788e-05, "loss": 0.4708, "step": 17640, "task_loss": 0.685359001159668 }, { "compression_loss": 0.0, "distillation_loss": 0.5164908170700073, "epoch": 6.38, "learning_rate": 4.9471135898447e-05, "loss": 0.5033, "step": 17650, "task_loss": 0.6516079902648926 }, { "compression_loss": 0.0, "distillation_loss": 0.5407741069793701, "epoch": 6.38, "learning_rate": 4.9419260969891926e-05, "loss": 0.4793, "step": 17660, "task_loss": 0.7795162200927734 }, { "compression_loss": 0.0, "distillation_loss": 0.3706479072570801, "epoch": 6.39, "learning_rate": 4.936728590909724e-05, "loss": 0.4658, "step": 17670, "task_loss": 0.6481907367706299 }, { "compression_loss": 0.0, "distillation_loss": 0.3029181957244873, "epoch": 6.39, "learning_rate": 4.9320422962809235e-05, "loss": 0.4577, "step": 17680, "task_loss": 0.4625958800315857 }, { "compression_loss": 0.0, "distillation_loss": 0.3568081259727478, "epoch": 6.39, "learning_rate": 4.926825838952736e-05, "loss": 0.4723, "step": 17690, "task_loss": 0.5693346261978149 }, { "compression_loss": 0.0, "distillation_loss": 0.387743204832077, "epoch": 6.4, "learning_rate": 4.921599446262594e-05, "loss": 0.4436, "step": 17700, "task_loss": 0.5526187419891357 }, { "compression_loss": 0.0, "distillation_loss": 0.28367936611175537, "epoch": 6.4, "learning_rate": 4.916363145159537e-05, "loss": 0.459, "step": 17710, "task_loss": 0.28557759523391724 }, { "compression_loss": 0.0, "distillation_loss": 0.5074309706687927, "epoch": 6.4, "learning_rate": 4.911116962643693e-05, "loss": 0.4521, "step": 17720, "task_loss": 0.7370518445968628 }, { "compression_loss": 0.0, "distillation_loss": 0.42953354120254517, "epoch": 6.41, "learning_rate": 4.9058609257661456e-05, "loss": 0.4991, "step": 17730, "task_loss": 0.4267071485519409 }, { "compression_loss": 0.0, "distillation_loss": 0.42934396862983704, "epoch": 6.41, "learning_rate": 4.9005950616287865e-05, "loss": 0.4038, "step": 17740, "task_loss": 0.5813796520233154 }, { "compression_loss": 0.0, "distillation_loss": 0.425101637840271, "epoch": 6.41, "learning_rate": 4.895319397384182e-05, "loss": 0.4605, "step": 17750, "task_loss": 0.6897194385528564 }, { "epoch": 6.41, "eval_exact_match": 82.23273415326395, "eval_f1": 89.3422644153395, "step": 17750 }, { "compression_loss": 0.0, "distillation_loss": 0.4546336531639099, "epoch": 6.42, "learning_rate": 4.8900339602354324e-05, "loss": 0.4959, "step": 17760, "task_loss": 0.6193094253540039 }, { "compression_loss": 0.0, "distillation_loss": 0.5773555040359497, "epoch": 6.42, "learning_rate": 4.884738777436027e-05, "loss": 0.5355, "step": 17770, "task_loss": 0.8573752641677856 }, { "compression_loss": 0.0, "distillation_loss": 0.4337666630744934, "epoch": 6.43, "learning_rate": 4.87943387628971e-05, "loss": 0.4405, "step": 17780, "task_loss": 0.5380644202232361 }, { "compression_loss": 0.0, "distillation_loss": 0.5035710334777832, "epoch": 6.43, "learning_rate": 4.874119284150336e-05, "loss": 0.4727, "step": 17790, "task_loss": 0.7339311838150024 }, { "compression_loss": 0.0, "distillation_loss": 0.39960795640945435, "epoch": 6.43, "learning_rate": 4.868795028421728e-05, "loss": 0.4856, "step": 17800, "task_loss": 0.28274768590927124 }, { "compression_loss": 0.0, "distillation_loss": 0.42659035325050354, "epoch": 6.44, "learning_rate": 4.86346113655754e-05, "loss": 0.4335, "step": 17810, "task_loss": 0.7971694469451904 }, { "compression_loss": 0.0, "distillation_loss": 0.509783148765564, "epoch": 6.44, "learning_rate": 4.8581176360611114e-05, "loss": 0.474, "step": 17820, "task_loss": 0.7136898636817932 }, { "compression_loss": 0.0, "distillation_loss": 0.5505311489105225, "epoch": 6.44, "learning_rate": 4.852764554485328e-05, "loss": 0.4862, "step": 17830, "task_loss": 0.5040520429611206 }, { "compression_loss": 0.0, "distillation_loss": 0.3778039515018463, "epoch": 6.45, "learning_rate": 4.8474019194324777e-05, "loss": 0.4955, "step": 17840, "task_loss": 0.4302838444709778 }, { "compression_loss": 0.0, "distillation_loss": 0.38377246260643005, "epoch": 6.45, "learning_rate": 4.8420297585541116e-05, "loss": 0.4369, "step": 17850, "task_loss": 0.646184504032135 }, { "compression_loss": 0.0, "distillation_loss": 0.47774505615234375, "epoch": 6.45, "learning_rate": 4.836648099550896e-05, "loss": 0.4616, "step": 17860, "task_loss": 0.9298821091651917 }, { "compression_loss": 0.0, "distillation_loss": 0.43539664149284363, "epoch": 6.46, "learning_rate": 4.8312569701724754e-05, "loss": 0.432, "step": 17870, "task_loss": 0.5792849063873291 }, { "compression_loss": 0.0, "distillation_loss": 0.4134996831417084, "epoch": 6.46, "learning_rate": 4.8258563982173244e-05, "loss": 0.4159, "step": 17880, "task_loss": 0.7238243818283081 }, { "compression_loss": 0.0, "distillation_loss": 0.37172645330429077, "epoch": 6.47, "learning_rate": 4.82044641153261e-05, "loss": 0.4143, "step": 17890, "task_loss": 0.4079241156578064 }, { "compression_loss": 0.0, "distillation_loss": 0.44674891233444214, "epoch": 6.47, "learning_rate": 4.8150270380140414e-05, "loss": 0.4914, "step": 17900, "task_loss": 0.38229721784591675 }, { "compression_loss": 0.0, "distillation_loss": 0.5314511060714722, "epoch": 6.47, "learning_rate": 4.809598305605732e-05, "loss": 0.4783, "step": 17910, "task_loss": 0.935704231262207 }, { "compression_loss": 0.0, "distillation_loss": 0.5900159478187561, "epoch": 6.48, "learning_rate": 4.8041602423000505e-05, "loss": 0.4613, "step": 17920, "task_loss": 0.8068616986274719 }, { "compression_loss": 0.0, "distillation_loss": 0.3018052577972412, "epoch": 6.48, "learning_rate": 4.79871287613748e-05, "loss": 0.4789, "step": 17930, "task_loss": 0.6351147890090942 }, { "compression_loss": 0.0, "distillation_loss": 0.44554102420806885, "epoch": 6.48, "learning_rate": 4.793256235206473e-05, "loss": 0.4141, "step": 17940, "task_loss": 0.5927938222885132 }, { "compression_loss": 0.0, "distillation_loss": 0.40561315417289734, "epoch": 6.49, "learning_rate": 4.787790347643305e-05, "loss": 0.4314, "step": 17950, "task_loss": 0.4989495277404785 }, { "compression_loss": 0.0, "distillation_loss": 0.5982292294502258, "epoch": 6.49, "learning_rate": 4.782315241631929e-05, "loss": 0.483, "step": 17960, "task_loss": 1.0493371486663818 }, { "compression_loss": 0.0, "distillation_loss": 0.46976473927497864, "epoch": 6.49, "learning_rate": 4.776830945403833e-05, "loss": 0.4004, "step": 17970, "task_loss": 0.8933740854263306 }, { "compression_loss": 0.0, "distillation_loss": 0.36101314425468445, "epoch": 6.5, "learning_rate": 4.771337487237894e-05, "loss": 0.4111, "step": 17980, "task_loss": 0.6764540672302246 }, { "compression_loss": 0.0, "distillation_loss": 0.38094139099121094, "epoch": 6.5, "learning_rate": 4.7658348954602285e-05, "loss": 0.4418, "step": 17990, "task_loss": 0.8435592651367188 }, { "compression_loss": 0.0, "distillation_loss": 0.31571972370147705, "epoch": 6.51, "learning_rate": 4.76032319844405e-05, "loss": 0.4856, "step": 18000, "task_loss": 0.4472782015800476 }, { "epoch": 6.51, "eval_exact_match": 82.02459791863765, "eval_f1": 89.19706431714104, "step": 18000 }, { "compression_loss": 0.0, "distillation_loss": 0.3489418029785156, "epoch": 6.51, "learning_rate": 4.754802424609521e-05, "loss": 0.4659, "step": 18010, "task_loss": 0.3588530421257019 }, { "compression_loss": 0.0, "distillation_loss": 0.47085440158843994, "epoch": 6.51, "learning_rate": 4.74927260242361e-05, "loss": 0.4414, "step": 18020, "task_loss": 0.8189518451690674 }, { "compression_loss": 0.0, "distillation_loss": 0.49984803795814514, "epoch": 6.52, "learning_rate": 4.7437337603999376e-05, "loss": 0.423, "step": 18030, "task_loss": 0.3147749900817871 }, { "compression_loss": 0.0, "distillation_loss": 0.34126660227775574, "epoch": 6.52, "learning_rate": 4.7381859270986374e-05, "loss": 0.4315, "step": 18040, "task_loss": 0.3727048337459564 }, { "compression_loss": 0.0, "distillation_loss": 0.48777493834495544, "epoch": 6.52, "learning_rate": 4.732629131126202e-05, "loss": 0.4487, "step": 18050, "task_loss": 0.4514695405960083 }, { "compression_loss": 0.0, "distillation_loss": 0.44182878732681274, "epoch": 6.53, "learning_rate": 4.727063401135341e-05, "loss": 0.4873, "step": 18060, "task_loss": 0.5817041397094727 }, { "compression_loss": 0.0, "distillation_loss": 0.49510326981544495, "epoch": 6.53, "learning_rate": 4.7214887658248296e-05, "loss": 0.4561, "step": 18070, "task_loss": 0.5546749830245972 }, { "compression_loss": 0.0, "distillation_loss": 0.33820611238479614, "epoch": 6.53, "learning_rate": 4.715905253939361e-05, "loss": 0.4116, "step": 18080, "task_loss": 0.45475584268569946 }, { "compression_loss": 0.0, "distillation_loss": 0.35808318853378296, "epoch": 6.54, "learning_rate": 4.710312894269402e-05, "loss": 0.4597, "step": 18090, "task_loss": 0.5782244205474854 }, { "compression_loss": 0.0, "distillation_loss": 0.3415292203426361, "epoch": 6.54, "learning_rate": 4.704711715651038e-05, "loss": 0.4549, "step": 18100, "task_loss": 0.5431405901908875 }, { "compression_loss": 0.0, "distillation_loss": 0.350860595703125, "epoch": 6.54, "learning_rate": 4.699101746965829e-05, "loss": 0.4275, "step": 18110, "task_loss": 0.34007489681243896 }, { "compression_loss": 0.0, "distillation_loss": 0.4397695064544678, "epoch": 6.55, "learning_rate": 4.6934830171406636e-05, "loss": 0.4853, "step": 18120, "task_loss": 0.5518962144851685 }, { "compression_loss": 0.0, "distillation_loss": 0.4137437045574188, "epoch": 6.55, "learning_rate": 4.687855555147597e-05, "loss": 0.4352, "step": 18130, "task_loss": 0.4553642272949219 }, { "compression_loss": 0.0, "distillation_loss": 0.45643168687820435, "epoch": 6.56, "learning_rate": 4.682219390003719e-05, "loss": 0.4659, "step": 18140, "task_loss": 0.5998508930206299 }, { "compression_loss": 0.0, "distillation_loss": 0.5669125914573669, "epoch": 6.56, "learning_rate": 4.676574550770991e-05, "loss": 0.4963, "step": 18150, "task_loss": 0.7089822292327881 }, { "compression_loss": 0.0, "distillation_loss": 0.45733779668807983, "epoch": 6.56, "learning_rate": 4.6709210665561035e-05, "loss": 0.4983, "step": 18160, "task_loss": 0.47134196758270264 }, { "compression_loss": 0.0, "distillation_loss": 0.4086102843284607, "epoch": 6.57, "learning_rate": 4.6652589665103204e-05, "loss": 0.4618, "step": 18170, "task_loss": 0.4143943786621094 }, { "compression_loss": 0.0, "distillation_loss": 0.5107593536376953, "epoch": 6.57, "learning_rate": 4.659588279829335e-05, "loss": 0.4485, "step": 18180, "task_loss": 0.5046135187149048 }, { "compression_loss": 0.0, "distillation_loss": 0.3629281222820282, "epoch": 6.57, "learning_rate": 4.653909035753114e-05, "loss": 0.431, "step": 18190, "task_loss": 0.6013154983520508 }, { "compression_loss": 0.0, "distillation_loss": 0.4908069372177124, "epoch": 6.58, "learning_rate": 4.648221263565751e-05, "loss": 0.4443, "step": 18200, "task_loss": 0.5517657995223999 }, { "compression_loss": 0.0, "distillation_loss": 0.36872684955596924, "epoch": 6.58, "learning_rate": 4.642524992595309e-05, "loss": 0.4264, "step": 18210, "task_loss": 0.3264022171497345 }, { "compression_loss": 0.0, "distillation_loss": 0.5466926097869873, "epoch": 6.58, "learning_rate": 4.636820252213679e-05, "loss": 0.4298, "step": 18220, "task_loss": 0.9148642420768738 }, { "compression_loss": 0.0, "distillation_loss": 0.5774287581443787, "epoch": 6.59, "learning_rate": 4.63110707183642e-05, "loss": 0.4806, "step": 18230, "task_loss": 0.44062086939811707 }, { "compression_loss": 0.0, "distillation_loss": 0.3985748887062073, "epoch": 6.59, "learning_rate": 4.6253854809226115e-05, "loss": 0.4406, "step": 18240, "task_loss": 0.4817260801792145 }, { "compression_loss": 0.0, "distillation_loss": 0.2817017734050751, "epoch": 6.6, "learning_rate": 4.6196555089747e-05, "loss": 0.425, "step": 18250, "task_loss": 0.5607198476791382 }, { "epoch": 6.6, "eval_exact_match": 82.28003784295176, "eval_f1": 89.29688426511828, "step": 18250 }, { "compression_loss": 0.0, "distillation_loss": 0.38135290145874023, "epoch": 6.6, "learning_rate": 4.6139171855383473e-05, "loss": 0.4354, "step": 18260, "task_loss": 0.7876352667808533 }, { "compression_loss": 0.0, "distillation_loss": 0.40633225440979004, "epoch": 6.6, "learning_rate": 4.608170540202279e-05, "loss": 0.4319, "step": 18270, "task_loss": 0.35161352157592773 }, { "compression_loss": 0.0, "distillation_loss": 0.43270987272262573, "epoch": 6.61, "learning_rate": 4.602415602598132e-05, "loss": 0.4343, "step": 18280, "task_loss": 0.5352205634117126 }, { "compression_loss": 0.0, "distillation_loss": 0.4512401223182678, "epoch": 6.61, "learning_rate": 4.5966524024002976e-05, "loss": 0.49, "step": 18290, "task_loss": 0.3575047552585602 }, { "compression_loss": 0.0, "distillation_loss": 0.5585800409317017, "epoch": 6.61, "learning_rate": 4.590880969325774e-05, "loss": 0.4595, "step": 18300, "task_loss": 0.6749407052993774 }, { "compression_loss": 0.0, "distillation_loss": 0.39090049266815186, "epoch": 6.62, "learning_rate": 4.585101333134013e-05, "loss": 0.491, "step": 18310, "task_loss": 0.5440311431884766 }, { "compression_loss": 0.0, "distillation_loss": 0.7067247033119202, "epoch": 6.62, "learning_rate": 4.5793135236267626e-05, "loss": 0.5154, "step": 18320, "task_loss": 0.8724928498268127 }, { "compression_loss": 0.0, "distillation_loss": 0.36196187138557434, "epoch": 6.62, "learning_rate": 4.573517570647912e-05, "loss": 0.4102, "step": 18330, "task_loss": 0.6686896681785583 }, { "compression_loss": 0.0, "distillation_loss": 0.620430052280426, "epoch": 6.63, "learning_rate": 4.567713504083346e-05, "loss": 0.4554, "step": 18340, "task_loss": 0.8327699899673462 }, { "compression_loss": 0.0, "distillation_loss": 0.40227943658828735, "epoch": 6.63, "learning_rate": 4.5619013538607814e-05, "loss": 0.5117, "step": 18350, "task_loss": 0.7482736110687256 }, { "compression_loss": 0.0, "distillation_loss": 0.47123730182647705, "epoch": 6.64, "learning_rate": 4.556081149949621e-05, "loss": 0.456, "step": 18360, "task_loss": 0.5184352993965149 }, { "compression_loss": 0.0, "distillation_loss": 0.3152047395706177, "epoch": 6.64, "learning_rate": 4.5502529223607936e-05, "loss": 0.4324, "step": 18370, "task_loss": 0.5678387880325317 }, { "compression_loss": 0.0, "distillation_loss": 0.45514464378356934, "epoch": 6.64, "learning_rate": 4.5444167011466e-05, "loss": 0.4807, "step": 18380, "task_loss": 0.612543523311615 }, { "compression_loss": 0.0, "distillation_loss": 0.3955601453781128, "epoch": 6.65, "learning_rate": 4.5385725164005585e-05, "loss": 0.4173, "step": 18390, "task_loss": 0.455796480178833 }, { "compression_loss": 0.0, "distillation_loss": 0.396673321723938, "epoch": 6.65, "learning_rate": 4.5327203982572514e-05, "loss": 0.4673, "step": 18400, "task_loss": 0.5756609439849854 }, { "compression_loss": 0.0, "distillation_loss": 0.3982319235801697, "epoch": 6.65, "learning_rate": 4.526860376892167e-05, "loss": 0.4573, "step": 18410, "task_loss": 0.7715901136398315 }, { "compression_loss": 0.0, "distillation_loss": 0.5171875953674316, "epoch": 6.66, "learning_rate": 4.5209924825215474e-05, "loss": 0.4395, "step": 18420, "task_loss": 0.8181601166725159 }, { "compression_loss": 0.0, "distillation_loss": 0.4994076192378998, "epoch": 6.66, "learning_rate": 4.515116745402229e-05, "loss": 0.4798, "step": 18430, "task_loss": 0.5784910917282104 }, { "compression_loss": 0.0, "distillation_loss": 0.44892919063568115, "epoch": 6.66, "learning_rate": 4.509233195831487e-05, "loss": 0.4927, "step": 18440, "task_loss": 0.552018940448761 }, { "compression_loss": 0.0, "distillation_loss": 0.6276687383651733, "epoch": 6.67, "learning_rate": 4.5033418641468816e-05, "loss": 0.4696, "step": 18450, "task_loss": 0.43832433223724365 }, { "compression_loss": 0.0, "distillation_loss": 0.5821511745452881, "epoch": 6.67, "learning_rate": 4.497442780726101e-05, "loss": 0.5283, "step": 18460, "task_loss": 0.7905117869377136 }, { "compression_loss": 0.0, "distillation_loss": 0.4565989673137665, "epoch": 6.68, "learning_rate": 4.4915359759868034e-05, "loss": 0.4865, "step": 18470, "task_loss": 0.676786482334137 }, { "compression_loss": 0.0, "distillation_loss": 0.4760429859161377, "epoch": 6.68, "learning_rate": 4.485621480386459e-05, "loss": 0.4953, "step": 18480, "task_loss": 0.6198767423629761 }, { "compression_loss": 0.0, "distillation_loss": 0.3461025059223175, "epoch": 6.68, "learning_rate": 4.479699324422195e-05, "loss": 0.4824, "step": 18490, "task_loss": 0.5995087623596191 }, { "compression_loss": 0.0, "distillation_loss": 0.40634581446647644, "epoch": 6.69, "learning_rate": 4.473769538630642e-05, "loss": 0.4957, "step": 18500, "task_loss": 0.4016622006893158 }, { "epoch": 6.69, "eval_exact_match": 82.42194891201514, "eval_f1": 89.54146936438647, "step": 18500 }, { "compression_loss": 0.0, "distillation_loss": 0.386337012052536, "epoch": 6.69, "learning_rate": 4.4678321535877664e-05, "loss": 0.4908, "step": 18510, "task_loss": 0.4025079607963562 }, { "compression_loss": 0.0, "distillation_loss": 0.48946017026901245, "epoch": 6.69, "learning_rate": 4.4618871999087255e-05, "loss": 0.4565, "step": 18520, "task_loss": 0.6569441556930542 }, { "compression_loss": 0.0, "distillation_loss": 0.3229324221611023, "epoch": 6.7, "learning_rate": 4.455934708247696e-05, "loss": 0.4545, "step": 18530, "task_loss": 0.5276088714599609 }, { "compression_loss": 0.0, "distillation_loss": 0.5329983830451965, "epoch": 6.7, "learning_rate": 4.449974709297729e-05, "loss": 0.49, "step": 18540, "task_loss": 0.4105309545993805 }, { "compression_loss": 0.0, "distillation_loss": 0.5233172178268433, "epoch": 6.7, "learning_rate": 4.4440072337905815e-05, "loss": 0.4177, "step": 18550, "task_loss": 0.7585268616676331 }, { "compression_loss": 0.0, "distillation_loss": 0.5894426107406616, "epoch": 6.71, "learning_rate": 4.438032312496565e-05, "loss": 0.4819, "step": 18560, "task_loss": 0.8770580291748047 }, { "compression_loss": 0.0, "distillation_loss": 0.46357041597366333, "epoch": 6.71, "learning_rate": 4.432049976224383e-05, "loss": 0.5125, "step": 18570, "task_loss": 0.943016529083252 }, { "compression_loss": 0.0, "distillation_loss": 0.3945166766643524, "epoch": 6.71, "learning_rate": 4.4260602558209715e-05, "loss": 0.4636, "step": 18580, "task_loss": 0.4682915210723877 }, { "compression_loss": 0.0, "distillation_loss": 0.5548945665359497, "epoch": 6.72, "learning_rate": 4.420063182171345e-05, "loss": 0.4443, "step": 18590, "task_loss": 0.2853912115097046 }, { "compression_loss": 0.0, "distillation_loss": 0.5039093494415283, "epoch": 6.72, "learning_rate": 4.414058786198431e-05, "loss": 0.4431, "step": 18600, "task_loss": 0.5872310400009155 }, { "compression_loss": 0.0, "distillation_loss": 0.4797692894935608, "epoch": 6.73, "learning_rate": 4.408047098862914e-05, "loss": 0.5034, "step": 18610, "task_loss": 0.5820059776306152 }, { "compression_loss": 0.0, "distillation_loss": 0.361568421125412, "epoch": 6.73, "learning_rate": 4.402028151163076e-05, "loss": 0.45, "step": 18620, "task_loss": 0.9699276089668274 }, { "compression_loss": 0.0, "distillation_loss": 0.3789913058280945, "epoch": 6.73, "learning_rate": 4.396001974134634e-05, "loss": 0.4786, "step": 18630, "task_loss": 0.5539405941963196 }, { "compression_loss": 0.0, "distillation_loss": 0.5141831636428833, "epoch": 6.74, "learning_rate": 4.389968598850585e-05, "loss": 0.4835, "step": 18640, "task_loss": 1.006083607673645 }, { "compression_loss": 0.0, "distillation_loss": 0.37998679280281067, "epoch": 6.74, "learning_rate": 4.38392805642104e-05, "loss": 0.4461, "step": 18650, "task_loss": 0.8515222668647766 }, { "compression_loss": 0.0, "distillation_loss": 0.4921640157699585, "epoch": 6.74, "learning_rate": 4.3778803779930655e-05, "loss": 0.4936, "step": 18660, "task_loss": 0.9974778890609741 }, { "compression_loss": 0.0, "distillation_loss": 0.4045303463935852, "epoch": 6.75, "learning_rate": 4.371825594750528e-05, "loss": 0.438, "step": 18670, "task_loss": 0.9138513803482056 }, { "compression_loss": 0.0, "distillation_loss": 0.4021495580673218, "epoch": 6.75, "learning_rate": 4.365763737913924e-05, "loss": 0.4834, "step": 18680, "task_loss": 0.5079997777938843 }, { "compression_loss": 0.0, "distillation_loss": 0.5624068975448608, "epoch": 6.75, "learning_rate": 4.359694838740225e-05, "loss": 0.4978, "step": 18690, "task_loss": 0.5158367156982422 }, { "compression_loss": 0.0, "distillation_loss": 0.4766731858253479, "epoch": 6.76, "learning_rate": 4.353618928522718e-05, "loss": 0.5086, "step": 18700, "task_loss": 0.3754899501800537 }, { "compression_loss": 0.0, "distillation_loss": 0.3693751096725464, "epoch": 6.76, "learning_rate": 4.3475360385908385e-05, "loss": 0.4476, "step": 18710, "task_loss": 0.4243190884590149 }, { "compression_loss": 0.0, "distillation_loss": 0.4787391126155853, "epoch": 6.77, "learning_rate": 4.3414462003100126e-05, "loss": 0.4959, "step": 18720, "task_loss": 0.8037554025650024 }, { "compression_loss": 0.0, "distillation_loss": 0.5594469308853149, "epoch": 6.77, "learning_rate": 4.335349445081493e-05, "loss": 0.5292, "step": 18730, "task_loss": 0.5515763759613037 }, { "compression_loss": 0.0, "distillation_loss": 0.49907881021499634, "epoch": 6.77, "learning_rate": 4.329245804342201e-05, "loss": 0.4717, "step": 18740, "task_loss": 0.6888168454170227 }, { "compression_loss": 0.0, "distillation_loss": 0.3339458107948303, "epoch": 6.78, "learning_rate": 4.3231353095645604e-05, "loss": 0.413, "step": 18750, "task_loss": 0.3833264112472534 }, { "epoch": 6.78, "eval_exact_match": 82.639545884579, "eval_f1": 89.63747801805509, "step": 18750 }, { "compression_loss": 0.0, "distillation_loss": 0.44776710867881775, "epoch": 6.78, "learning_rate": 4.317017992256337e-05, "loss": 0.4343, "step": 18760, "task_loss": 0.36081254482269287 }, { "compression_loss": 0.0, "distillation_loss": 0.5654640197753906, "epoch": 6.78, "learning_rate": 4.310893883960476e-05, "loss": 0.4711, "step": 18770, "task_loss": 0.7916117906570435 }, { "compression_loss": 0.0, "distillation_loss": 0.47904425859451294, "epoch": 6.79, "learning_rate": 4.304763016254939e-05, "loss": 0.4494, "step": 18780, "task_loss": 0.6122930645942688 }, { "compression_loss": 0.0, "distillation_loss": 0.5881616473197937, "epoch": 6.79, "learning_rate": 4.298625420752541e-05, "loss": 0.4592, "step": 18790, "task_loss": 0.9475069642066956 }, { "compression_loss": 0.0, "distillation_loss": 0.49008673429489136, "epoch": 6.79, "learning_rate": 4.29248112910079e-05, "loss": 0.4432, "step": 18800, "task_loss": 0.3713112473487854 }, { "compression_loss": 0.0, "distillation_loss": 0.5173578262329102, "epoch": 6.8, "learning_rate": 4.286330172981718e-05, "loss": 0.437, "step": 18810, "task_loss": 0.8030617237091064 }, { "compression_loss": 0.0, "distillation_loss": 0.5325472950935364, "epoch": 6.8, "learning_rate": 4.2801725841117244e-05, "loss": 0.4291, "step": 18820, "task_loss": 0.7275118827819824 }, { "compression_loss": 0.0, "distillation_loss": 0.41245532035827637, "epoch": 6.81, "learning_rate": 4.274008394241407e-05, "loss": 0.4659, "step": 18830, "task_loss": 0.6393299102783203 }, { "compression_loss": 0.0, "distillation_loss": 0.3188970685005188, "epoch": 6.81, "learning_rate": 4.267837635155402e-05, "loss": 0.4185, "step": 18840, "task_loss": 0.23499251902103424 }, { "compression_loss": 0.0, "distillation_loss": 0.442760169506073, "epoch": 6.81, "learning_rate": 4.2616603386722185e-05, "loss": 0.4602, "step": 18850, "task_loss": 0.6652241945266724 }, { "compression_loss": 0.0, "distillation_loss": 0.46997952461242676, "epoch": 6.82, "learning_rate": 4.2554765366440736e-05, "loss": 0.4335, "step": 18860, "task_loss": 0.46291276812553406 }, { "compression_loss": 0.0, "distillation_loss": 0.4596181809902191, "epoch": 6.82, "learning_rate": 4.249286260956732e-05, "loss": 0.4364, "step": 18870, "task_loss": 0.43191826343536377 }, { "compression_loss": 0.0, "distillation_loss": 0.3417094945907593, "epoch": 6.82, "learning_rate": 4.2430895435293335e-05, "loss": 0.4439, "step": 18880, "task_loss": 0.45981624722480774 }, { "compression_loss": 0.0, "distillation_loss": 0.42703115940093994, "epoch": 6.83, "learning_rate": 4.2368864163142396e-05, "loss": 0.4596, "step": 18890, "task_loss": 0.5582728385925293 }, { "compression_loss": 0.0, "distillation_loss": 0.4443010091781616, "epoch": 6.83, "learning_rate": 4.230676911296859e-05, "loss": 0.443, "step": 18900, "task_loss": 0.6267966032028198 }, { "compression_loss": 0.0, "distillation_loss": 0.4458591938018799, "epoch": 6.83, "learning_rate": 4.2244610604954874e-05, "loss": 0.4731, "step": 18910, "task_loss": 0.81801837682724 }, { "compression_loss": 0.0, "distillation_loss": 0.38323748111724854, "epoch": 6.84, "learning_rate": 4.218238895961143e-05, "loss": 0.477, "step": 18920, "task_loss": 1.1212983131408691 }, { "compression_loss": 0.0, "distillation_loss": 0.7608765363693237, "epoch": 6.84, "learning_rate": 4.212010449777397e-05, "loss": 0.509, "step": 18930, "task_loss": 0.9213683009147644 }, { "compression_loss": 0.0, "distillation_loss": 0.38869547843933105, "epoch": 6.84, "learning_rate": 4.2057757540602134e-05, "loss": 0.4825, "step": 18940, "task_loss": 0.5943039655685425 }, { "compression_loss": 0.0, "distillation_loss": 0.355135053396225, "epoch": 6.85, "learning_rate": 4.199534840957779e-05, "loss": 0.4582, "step": 18950, "task_loss": 0.9915359020233154 }, { "compression_loss": 0.0, "distillation_loss": 0.4248897135257721, "epoch": 6.85, "learning_rate": 4.193287742650341e-05, "loss": 0.4378, "step": 18960, "task_loss": 0.6342843174934387 }, { "compression_loss": 0.0, "distillation_loss": 0.4118010401725769, "epoch": 6.86, "learning_rate": 4.1870344913500387e-05, "loss": 0.4238, "step": 18970, "task_loss": 0.6452910900115967 }, { "compression_loss": 0.0, "distillation_loss": 0.39560335874557495, "epoch": 6.86, "learning_rate": 4.180775119300738e-05, "loss": 0.4288, "step": 18980, "task_loss": 0.6294822096824646 }, { "compression_loss": 0.0, "distillation_loss": 0.3214435577392578, "epoch": 6.86, "learning_rate": 4.174509658777867e-05, "loss": 0.4134, "step": 18990, "task_loss": 0.2638058662414551 }, { "compression_loss": 0.0, "distillation_loss": 0.5299546122550964, "epoch": 6.87, "learning_rate": 4.168238142088247e-05, "loss": 0.459, "step": 19000, "task_loss": 0.5968713760375977 }, { "epoch": 6.87, "eval_exact_match": 82.23273415326395, "eval_f1": 89.16111651632492, "step": 19000 }, { "compression_loss": 0.0, "distillation_loss": 0.40809687972068787, "epoch": 6.87, "learning_rate": 4.1619606015699254e-05, "loss": 0.4248, "step": 19010, "task_loss": 0.7575485706329346 }, { "compression_loss": 0.0, "distillation_loss": 0.5880671739578247, "epoch": 6.87, "learning_rate": 4.155677069592015e-05, "loss": 0.4437, "step": 19020, "task_loss": 0.7457284927368164 }, { "compression_loss": 0.0, "distillation_loss": 0.3408096432685852, "epoch": 6.88, "learning_rate": 4.149387578554516e-05, "loss": 0.4678, "step": 19030, "task_loss": 0.8817156553268433 }, { "compression_loss": 0.0, "distillation_loss": 0.374206006526947, "epoch": 6.88, "learning_rate": 4.143092160888162e-05, "loss": 0.4213, "step": 19040, "task_loss": 0.5227565169334412 }, { "compression_loss": 0.0, "distillation_loss": 0.3797913193702698, "epoch": 6.88, "learning_rate": 4.1367908490542424e-05, "loss": 0.409, "step": 19050, "task_loss": 0.44340643286705017 }, { "compression_loss": 0.0, "distillation_loss": 0.3299967646598816, "epoch": 6.89, "learning_rate": 4.1304836755444396e-05, "loss": 0.4099, "step": 19060, "task_loss": 0.6511759757995605 }, { "compression_loss": 0.0, "distillation_loss": 0.6297036409378052, "epoch": 6.89, "learning_rate": 4.1241706728806614e-05, "loss": 0.4631, "step": 19070, "task_loss": 0.9518231749534607 }, { "compression_loss": 0.0, "distillation_loss": 0.27615126967430115, "epoch": 6.9, "learning_rate": 4.1178518736148726e-05, "loss": 0.4206, "step": 19080, "task_loss": 0.2932681739330292 }, { "compression_loss": 0.0, "distillation_loss": 0.3559025526046753, "epoch": 6.9, "learning_rate": 4.111527310328926e-05, "loss": 0.4576, "step": 19090, "task_loss": 0.3928833603858948 }, { "compression_loss": 0.0, "distillation_loss": 0.6219375133514404, "epoch": 6.9, "learning_rate": 4.105197015634399e-05, "loss": 0.4262, "step": 19100, "task_loss": 0.6940836906433105 }, { "compression_loss": 0.0, "distillation_loss": 0.34312307834625244, "epoch": 6.91, "learning_rate": 4.0988610221724165e-05, "loss": 0.4914, "step": 19110, "task_loss": 0.5049322247505188 }, { "compression_loss": 0.0, "distillation_loss": 0.5630295276641846, "epoch": 6.91, "learning_rate": 4.092519362613494e-05, "loss": 0.4919, "step": 19120, "task_loss": 0.8349260091781616 }, { "compression_loss": 0.0, "distillation_loss": 0.3788623809814453, "epoch": 6.91, "learning_rate": 4.08617206965736e-05, "loss": 0.4542, "step": 19130, "task_loss": 0.4850049614906311 }, { "compression_loss": 0.0, "distillation_loss": 0.4747163653373718, "epoch": 6.92, "learning_rate": 4.079819176032791e-05, "loss": 0.5041, "step": 19140, "task_loss": 0.7553776502609253 }, { "compression_loss": 0.0, "distillation_loss": 0.47813260555267334, "epoch": 6.92, "learning_rate": 4.073460714497443e-05, "loss": 0.4269, "step": 19150, "task_loss": 0.6414305567741394 }, { "compression_loss": 0.0, "distillation_loss": 0.34697532653808594, "epoch": 6.92, "learning_rate": 4.067096717837681e-05, "loss": 0.4815, "step": 19160, "task_loss": 0.41636407375335693 }, { "compression_loss": 0.0, "distillation_loss": 0.4089597165584564, "epoch": 6.93, "learning_rate": 4.060727218868413e-05, "loss": 0.4731, "step": 19170, "task_loss": 0.5453172326087952 }, { "compression_loss": 0.0, "distillation_loss": 0.504246711730957, "epoch": 6.93, "learning_rate": 4.054352250432917e-05, "loss": 0.4974, "step": 19180, "task_loss": 0.4027463495731354 }, { "compression_loss": 0.0, "distillation_loss": 0.4585569500923157, "epoch": 6.94, "learning_rate": 4.047971845402674e-05, "loss": 0.4536, "step": 19190, "task_loss": 0.49631595611572266 }, { "compression_loss": 0.0, "distillation_loss": 0.3411811590194702, "epoch": 6.94, "learning_rate": 4.0415860366771986e-05, "loss": 0.4268, "step": 19200, "task_loss": 0.20603856444358826 }, { "compression_loss": 0.0, "distillation_loss": 0.3674144744873047, "epoch": 6.94, "learning_rate": 4.0351948571838665e-05, "loss": 0.4593, "step": 19210, "task_loss": 0.7646399736404419 }, { "compression_loss": 0.0, "distillation_loss": 0.5466402173042297, "epoch": 6.95, "learning_rate": 4.0287983398777485e-05, "loss": 0.512, "step": 19220, "task_loss": 1.0787473917007446 }, { "compression_loss": 0.0, "distillation_loss": 0.3193405270576477, "epoch": 6.95, "learning_rate": 4.02239651774144e-05, "loss": 0.4307, "step": 19230, "task_loss": 0.415851354598999 }, { "compression_loss": 0.0, "distillation_loss": 0.3150692582130432, "epoch": 6.95, "learning_rate": 4.015989423784887e-05, "loss": 0.4961, "step": 19240, "task_loss": 0.33591964840888977 }, { "compression_loss": 0.0, "distillation_loss": 0.658031165599823, "epoch": 6.96, "learning_rate": 4.009577091045222e-05, "loss": 0.4653, "step": 19250, "task_loss": 1.2693332433700562 }, { "epoch": 6.96, "eval_exact_match": 82.82876064333018, "eval_f1": 89.67725624937343, "step": 19250 }, { "compression_loss": 0.0, "distillation_loss": 0.4980925917625427, "epoch": 6.96, "learning_rate": 4.003159552586588e-05, "loss": 0.4392, "step": 19260, "task_loss": 0.6456292867660522 }, { "compression_loss": 0.0, "distillation_loss": 0.4320065379142761, "epoch": 6.96, "learning_rate": 3.996736841499972e-05, "loss": 0.4594, "step": 19270, "task_loss": 0.566118597984314 }, { "compression_loss": 0.0, "distillation_loss": 0.4771836996078491, "epoch": 6.97, "learning_rate": 3.990308990903031e-05, "loss": 0.4181, "step": 19280, "task_loss": 0.8952730298042297 }, { "compression_loss": 0.0, "distillation_loss": 0.4210527837276459, "epoch": 6.97, "learning_rate": 3.983876033939925e-05, "loss": 0.465, "step": 19290, "task_loss": 0.7912062406539917 }, { "compression_loss": 0.0, "distillation_loss": 0.43877533078193665, "epoch": 6.98, "learning_rate": 3.977438003781144e-05, "loss": 0.4059, "step": 19300, "task_loss": 0.8035497069358826 }, { "compression_loss": 0.0, "distillation_loss": 0.3345944285392761, "epoch": 6.98, "learning_rate": 3.970994933623334e-05, "loss": 0.4686, "step": 19310, "task_loss": 0.6707329750061035 }, { "compression_loss": 0.0, "distillation_loss": 0.6642370820045471, "epoch": 6.98, "learning_rate": 3.9645468566891326e-05, "loss": 0.4572, "step": 19320, "task_loss": 1.2499730587005615 }, { "compression_loss": 0.0, "distillation_loss": 0.2801411747932434, "epoch": 6.99, "learning_rate": 3.958093806226994e-05, "loss": 0.4667, "step": 19330, "task_loss": 0.9053614139556885 }, { "compression_loss": 0.0, "distillation_loss": 0.6855303049087524, "epoch": 6.99, "learning_rate": 3.951635815511014e-05, "loss": 0.4742, "step": 19340, "task_loss": 0.8596073985099792 }, { "compression_loss": 0.0, "distillation_loss": 0.26637864112854004, "epoch": 6.99, "learning_rate": 3.9451729178407676e-05, "loss": 0.4094, "step": 19350, "task_loss": 0.24470390379428864 }, { "compression_loss": 0.0, "distillation_loss": 0.5194672346115112, "epoch": 7.0, "learning_rate": 3.9387051465411245e-05, "loss": 0.4304, "step": 19360, "task_loss": 0.4968796968460083 }, { "compression_loss": 0.0, "distillation_loss": 0.33181172609329224, "epoch": 7.0, "learning_rate": 3.93223253496209e-05, "loss": 0.4214, "step": 19370, "task_loss": 0.5487986207008362 }, { "compression_loss": 0.0, "distillation_loss": 0.3815804421901703, "epoch": 7.0, "learning_rate": 3.925755116478628e-05, "loss": 0.3593, "step": 19380, "task_loss": 0.4483642876148224 }, { "compression_loss": 0.0, "distillation_loss": 0.33547675609588623, "epoch": 7.01, "learning_rate": 3.919272924490484e-05, "loss": 0.4288, "step": 19390, "task_loss": 0.5775176286697388 }, { "compression_loss": 0.0, "distillation_loss": 0.36918535828590393, "epoch": 7.01, "learning_rate": 3.91278599242202e-05, "loss": 0.3726, "step": 19400, "task_loss": 0.6206930875778198 }, { "compression_loss": 0.0, "distillation_loss": 0.4281493127346039, "epoch": 7.01, "learning_rate": 3.9062943537220394e-05, "loss": 0.3883, "step": 19410, "task_loss": 0.4776683449745178 }, { "compression_loss": 0.0, "distillation_loss": 0.3090469241142273, "epoch": 7.02, "learning_rate": 3.899798041863615e-05, "loss": 0.3848, "step": 19420, "task_loss": 0.5897292494773865 }, { "compression_loss": 0.0, "distillation_loss": 0.37905633449554443, "epoch": 7.02, "learning_rate": 3.8932970903439134e-05, "loss": 0.4084, "step": 19430, "task_loss": 0.6165533661842346 }, { "compression_loss": 0.0, "distillation_loss": 0.42853739857673645, "epoch": 7.03, "learning_rate": 3.886791532684028e-05, "loss": 0.452, "step": 19440, "task_loss": 0.6996277570724487 }, { "compression_loss": 0.0, "distillation_loss": 0.44645875692367554, "epoch": 7.03, "learning_rate": 3.880281402428802e-05, "loss": 0.3729, "step": 19450, "task_loss": 0.6104946732521057 }, { "compression_loss": 0.0, "distillation_loss": 0.43968871235847473, "epoch": 7.03, "learning_rate": 3.8737667331466554e-05, "loss": 0.4113, "step": 19460, "task_loss": 1.4070665836334229 }, { "compression_loss": 0.0, "distillation_loss": 0.32989373803138733, "epoch": 7.04, "learning_rate": 3.8672475584294126e-05, "loss": 0.3745, "step": 19470, "task_loss": 0.7486312389373779 }, { "compression_loss": 0.0, "distillation_loss": 0.3704969584941864, "epoch": 7.04, "learning_rate": 3.860723911892134e-05, "loss": 0.3999, "step": 19480, "task_loss": 0.90283203125 }, { "compression_loss": 0.0, "distillation_loss": 0.41549235582351685, "epoch": 7.04, "learning_rate": 3.8541958271729304e-05, "loss": 0.398, "step": 19490, "task_loss": 0.6458536386489868 }, { "compression_loss": 0.0, "distillation_loss": 0.3486079275608063, "epoch": 7.05, "learning_rate": 3.847663337932806e-05, "loss": 0.3815, "step": 19500, "task_loss": 0.5868500471115112 }, { "epoch": 7.05, "eval_exact_match": 82.79091769157995, "eval_f1": 89.71267092502406, "step": 19500 }, { "compression_loss": 0.0, "distillation_loss": 0.34197574853897095, "epoch": 7.05, "learning_rate": 3.841126477855469e-05, "loss": 0.4191, "step": 19510, "task_loss": 0.7607687711715698 }, { "compression_loss": 0.0, "distillation_loss": 0.39318233728408813, "epoch": 7.05, "learning_rate": 3.834585280647169e-05, "loss": 0.368, "step": 19520, "task_loss": 0.4875791072845459 }, { "compression_loss": 0.0, "distillation_loss": 0.28019875288009644, "epoch": 7.06, "learning_rate": 3.82803978003652e-05, "loss": 0.4371, "step": 19530, "task_loss": 0.4103456437587738 }, { "compression_loss": 0.0, "distillation_loss": 0.40750378370285034, "epoch": 7.06, "learning_rate": 3.821490009774321e-05, "loss": 0.38, "step": 19540, "task_loss": 0.7055575251579285 }, { "compression_loss": 0.0, "distillation_loss": 0.349598228931427, "epoch": 7.07, "learning_rate": 3.814936003633393e-05, "loss": 0.4285, "step": 19550, "task_loss": 0.4073978066444397 }, { "compression_loss": 0.0, "distillation_loss": 0.46198201179504395, "epoch": 7.07, "learning_rate": 3.808377795408394e-05, "loss": 0.3817, "step": 19560, "task_loss": 0.9234828948974609 }, { "compression_loss": 0.0, "distillation_loss": 0.40304064750671387, "epoch": 7.07, "learning_rate": 3.80181541891565e-05, "loss": 0.3844, "step": 19570, "task_loss": 0.6997017860412598 }, { "compression_loss": 0.0, "distillation_loss": 0.4080723822116852, "epoch": 7.08, "learning_rate": 3.7952489079929826e-05, "loss": 0.3802, "step": 19580, "task_loss": 0.5474996566772461 }, { "compression_loss": 0.0, "distillation_loss": 0.30232366919517517, "epoch": 7.08, "learning_rate": 3.7886782964995304e-05, "loss": 0.3964, "step": 19590, "task_loss": 0.6640204191207886 }, { "compression_loss": 0.0, "distillation_loss": 0.4523887634277344, "epoch": 7.08, "learning_rate": 3.782103618315575e-05, "loss": 0.4024, "step": 19600, "task_loss": 0.5164095163345337 }, { "compression_loss": 0.0, "distillation_loss": 0.4048759639263153, "epoch": 7.09, "learning_rate": 3.775524907342367e-05, "loss": 0.4032, "step": 19610, "task_loss": 0.7620391845703125 }, { "compression_loss": 0.0, "distillation_loss": 0.3249843120574951, "epoch": 7.09, "learning_rate": 3.768942197501955e-05, "loss": 0.3736, "step": 19620, "task_loss": 0.4513287842273712 }, { "compression_loss": 0.0, "distillation_loss": 0.3706258237361908, "epoch": 7.09, "learning_rate": 3.7623555227370017e-05, "loss": 0.3994, "step": 19630, "task_loss": 0.6358516216278076 }, { "compression_loss": 0.0, "distillation_loss": 0.41293206810951233, "epoch": 7.1, "learning_rate": 3.755764917010618e-05, "loss": 0.3806, "step": 19640, "task_loss": 0.4972880482673645 }, { "compression_loss": 0.0, "distillation_loss": 0.4146842360496521, "epoch": 7.1, "learning_rate": 3.749170414306184e-05, "loss": 0.4166, "step": 19650, "task_loss": 0.4463941156864166 }, { "compression_loss": 0.0, "distillation_loss": 0.30623093247413635, "epoch": 7.11, "learning_rate": 3.7425720486271726e-05, "loss": 0.3611, "step": 19660, "task_loss": 0.24231675267219543 }, { "compression_loss": 0.0, "distillation_loss": 0.5473644733428955, "epoch": 7.11, "learning_rate": 3.735969853996976e-05, "loss": 0.4418, "step": 19670, "task_loss": 0.8937996625900269 }, { "compression_loss": 0.0, "distillation_loss": 0.3078344464302063, "epoch": 7.11, "learning_rate": 3.72936386445873e-05, "loss": 0.3904, "step": 19680, "task_loss": 0.4181814193725586 }, { "compression_loss": 0.0, "distillation_loss": 0.3279660940170288, "epoch": 7.12, "learning_rate": 3.722754114075137e-05, "loss": 0.393, "step": 19690, "task_loss": 0.41047608852386475 }, { "compression_loss": 0.0, "distillation_loss": 0.30717504024505615, "epoch": 7.12, "learning_rate": 3.716140636928295e-05, "loss": 0.3757, "step": 19700, "task_loss": 0.40464282035827637 }, { "compression_loss": 0.0, "distillation_loss": 0.37334302067756653, "epoch": 7.12, "learning_rate": 3.709523467119514e-05, "loss": 0.3663, "step": 19710, "task_loss": 0.7159653902053833 }, { "compression_loss": 0.0, "distillation_loss": 0.29783928394317627, "epoch": 7.13, "learning_rate": 3.7029026387691464e-05, "loss": 0.3925, "step": 19720, "task_loss": 0.36046409606933594 }, { "compression_loss": 0.0, "distillation_loss": 0.33988526463508606, "epoch": 7.13, "learning_rate": 3.696278186016411e-05, "loss": 0.3777, "step": 19730, "task_loss": 0.3282432556152344 }, { "compression_loss": 0.0, "distillation_loss": 0.520359218120575, "epoch": 7.13, "learning_rate": 3.6896501430192134e-05, "loss": 0.3646, "step": 19740, "task_loss": 0.7145998477935791 }, { "compression_loss": 0.0, "distillation_loss": 0.3622116446495056, "epoch": 7.14, "learning_rate": 3.6830185439539726e-05, "loss": 0.4161, "step": 19750, "task_loss": 0.8804785013198853 }, { "epoch": 7.14, "eval_exact_match": 82.51655629139073, "eval_f1": 89.55102082659974, "step": 19750 }, { "compression_loss": 0.0, "distillation_loss": 0.35625964403152466, "epoch": 7.14, "learning_rate": 3.676383423015442e-05, "loss": 0.3932, "step": 19760, "task_loss": 0.3122161626815796 }, { "compression_loss": 0.0, "distillation_loss": 0.3781408965587616, "epoch": 7.14, "learning_rate": 3.6697448144165357e-05, "loss": 0.3812, "step": 19770, "task_loss": 0.32182449102401733 }, { "compression_loss": 0.0, "distillation_loss": 0.5030004978179932, "epoch": 7.15, "learning_rate": 3.663102752388155e-05, "loss": 0.3671, "step": 19780, "task_loss": 0.6021063327789307 }, { "compression_loss": 0.0, "distillation_loss": 0.3663809299468994, "epoch": 7.15, "learning_rate": 3.656457271179003e-05, "loss": 0.4523, "step": 19790, "task_loss": 0.5944013595581055 }, { "compression_loss": 0.0, "distillation_loss": 0.3215387165546417, "epoch": 7.16, "learning_rate": 3.6498084050554164e-05, "loss": 0.403, "step": 19800, "task_loss": 0.6697392463684082 }, { "compression_loss": 0.0, "distillation_loss": 0.487884521484375, "epoch": 7.16, "learning_rate": 3.643156188301183e-05, "loss": 0.3921, "step": 19810, "task_loss": 0.4826103448867798 }, { "compression_loss": 0.0, "distillation_loss": 0.31640633940696716, "epoch": 7.16, "learning_rate": 3.636500655217371e-05, "loss": 0.4237, "step": 19820, "task_loss": 0.24287016689777374 }, { "compression_loss": 0.0, "distillation_loss": 0.4092869162559509, "epoch": 7.17, "learning_rate": 3.629841840122147e-05, "loss": 0.3917, "step": 19830, "task_loss": 0.7395460605621338 }, { "compression_loss": 0.0, "distillation_loss": 0.3161425292491913, "epoch": 7.17, "learning_rate": 3.6231797773505994e-05, "loss": 0.3842, "step": 19840, "task_loss": 0.4677432179450989 }, { "compression_loss": 0.0, "distillation_loss": 0.45856085419654846, "epoch": 7.17, "learning_rate": 3.616514501254567e-05, "loss": 0.3748, "step": 19850, "task_loss": 0.6139996647834778 }, { "compression_loss": 0.0, "distillation_loss": 0.4803004860877991, "epoch": 7.18, "learning_rate": 3.6098460462024506e-05, "loss": 0.4153, "step": 19860, "task_loss": 0.6763205528259277 }, { "compression_loss": 0.0, "distillation_loss": 0.4521372318267822, "epoch": 7.18, "learning_rate": 3.6031744465790495e-05, "loss": 0.4046, "step": 19870, "task_loss": 0.5324259996414185 }, { "compression_loss": 0.0, "distillation_loss": 0.34895187616348267, "epoch": 7.18, "learning_rate": 3.5964997367853755e-05, "loss": 0.3662, "step": 19880, "task_loss": 0.3761727213859558 }, { "compression_loss": 0.0, "distillation_loss": 0.3079893887042999, "epoch": 7.19, "learning_rate": 3.589821951238474e-05, "loss": 0.3581, "step": 19890, "task_loss": 0.48900896310806274 }, { "compression_loss": 0.0, "distillation_loss": 0.375984251499176, "epoch": 7.19, "learning_rate": 3.5831411243712555e-05, "loss": 0.4284, "step": 19900, "task_loss": 0.38719305396080017 }, { "compression_loss": 0.0, "distillation_loss": 0.4510999023914337, "epoch": 7.2, "learning_rate": 3.5764572906323075e-05, "loss": 0.4194, "step": 19910, "task_loss": 0.7633450031280518 }, { "compression_loss": 0.0, "distillation_loss": 0.4013802409172058, "epoch": 7.2, "learning_rate": 3.569770484485726e-05, "loss": 0.4213, "step": 19920, "task_loss": 0.39382585883140564 }, { "compression_loss": 0.0, "distillation_loss": 0.4381757974624634, "epoch": 7.2, "learning_rate": 3.563080740410932e-05, "loss": 0.405, "step": 19930, "task_loss": 0.4762646555900574 }, { "compression_loss": 0.0, "distillation_loss": 0.36635851860046387, "epoch": 7.21, "learning_rate": 3.556388092902494e-05, "loss": 0.4212, "step": 19940, "task_loss": 0.4292539358139038 }, { "compression_loss": 0.0, "distillation_loss": 0.4319227933883667, "epoch": 7.21, "learning_rate": 3.549692576469955e-05, "loss": 0.4012, "step": 19950, "task_loss": 0.705822229385376 }, { "compression_loss": 0.0, "distillation_loss": 0.2866457402706146, "epoch": 7.21, "learning_rate": 3.542994225637648e-05, "loss": 0.422, "step": 19960, "task_loss": 0.3992174565792084 }, { "compression_loss": 0.0, "distillation_loss": 0.43599292635917664, "epoch": 7.22, "learning_rate": 3.536293074944522e-05, "loss": 0.369, "step": 19970, "task_loss": 0.9676334857940674 }, { "compression_loss": 0.0, "distillation_loss": 0.48719683289527893, "epoch": 7.22, "learning_rate": 3.529589158943965e-05, "loss": 0.386, "step": 19980, "task_loss": 0.6881073713302612 }, { "compression_loss": 0.0, "distillation_loss": 0.3196744918823242, "epoch": 7.22, "learning_rate": 3.522882512203621e-05, "loss": 0.4417, "step": 19990, "task_loss": 0.5994395017623901 }, { "compression_loss": 0.0, "distillation_loss": 0.3680725693702698, "epoch": 7.23, "learning_rate": 3.516173169305216e-05, "loss": 0.3802, "step": 20000, "task_loss": 0.521813154220581 }, { "epoch": 7.23, "eval_exact_match": 82.36518448438979, "eval_f1": 89.49724844867848, "step": 20000 }, { "compression_loss": 0.0, "distillation_loss": 0.39387252926826477, "epoch": 7.23, "learning_rate": 3.5094611648443773e-05, "loss": 0.4165, "step": 20010, "task_loss": 0.5132284164428711 }, { "compression_loss": 0.0, "distillation_loss": 0.3651062548160553, "epoch": 7.24, "learning_rate": 3.5034181137980914e-05, "loss": 0.3956, "step": 20020, "task_loss": 0.5812886357307434 }, { "compression_loss": 0.0, "distillation_loss": 0.2903411388397217, "epoch": 7.24, "learning_rate": 3.496701147728595e-05, "loss": 0.3921, "step": 20030, "task_loss": 0.5770473480224609 }, { "compression_loss": 0.0, "distillation_loss": 0.321413516998291, "epoch": 7.24, "learning_rate": 3.489981620500956e-05, "loss": 0.4927, "step": 20040, "task_loss": 0.3058428168296814 }, { "compression_loss": 0.0, "distillation_loss": 0.36443406343460083, "epoch": 7.25, "learning_rate": 3.483259566763317e-05, "loss": 0.3565, "step": 20050, "task_loss": 0.5052045583724976 }, { "compression_loss": 0.0, "distillation_loss": 0.3698991537094116, "epoch": 7.25, "learning_rate": 3.476535021176848e-05, "loss": 0.3733, "step": 20060, "task_loss": 0.6003679037094116 }, { "compression_loss": 0.0, "distillation_loss": 0.34568488597869873, "epoch": 7.25, "learning_rate": 3.4698080184155674e-05, "loss": 0.3943, "step": 20070, "task_loss": 0.5871903896331787 }, { "compression_loss": 0.0, "distillation_loss": 0.373020738363266, "epoch": 7.26, "learning_rate": 3.463078593166162e-05, "loss": 0.394, "step": 20080, "task_loss": 0.3602370023727417 }, { "compression_loss": 0.0, "distillation_loss": 0.33881717920303345, "epoch": 7.26, "learning_rate": 3.456346780127812e-05, "loss": 0.4087, "step": 20090, "task_loss": 0.5115830898284912 }, { "compression_loss": 0.0, "distillation_loss": 0.5135064125061035, "epoch": 7.26, "learning_rate": 3.44961261401201e-05, "loss": 0.3805, "step": 20100, "task_loss": 0.4804335832595825 }, { "compression_loss": 0.0, "distillation_loss": 0.43249940872192383, "epoch": 7.27, "learning_rate": 3.4428761295423806e-05, "loss": 0.3812, "step": 20110, "task_loss": 0.7190282940864563 }, { "compression_loss": 0.0, "distillation_loss": 0.3258872926235199, "epoch": 7.27, "learning_rate": 3.436137361454502e-05, "loss": 0.4134, "step": 20120, "task_loss": 0.613054633140564 }, { "compression_loss": 0.0, "distillation_loss": 0.3143617510795593, "epoch": 7.28, "learning_rate": 3.42939634449573e-05, "loss": 0.3406, "step": 20130, "task_loss": 0.3883563280105591 }, { "compression_loss": 0.0, "distillation_loss": 0.34468793869018555, "epoch": 7.28, "learning_rate": 3.422653113425013e-05, "loss": 0.3866, "step": 20140, "task_loss": 0.5091181993484497 }, { "compression_loss": 0.0, "distillation_loss": 0.4208349585533142, "epoch": 7.28, "learning_rate": 3.415907703012719e-05, "loss": 0.3896, "step": 20150, "task_loss": 0.5660240650177002 }, { "compression_loss": 0.0, "distillation_loss": 0.3679567873477936, "epoch": 7.29, "learning_rate": 3.4091601480404535e-05, "loss": 0.3947, "step": 20160, "task_loss": 0.6140090227127075 }, { "compression_loss": 0.0, "distillation_loss": 0.43558749556541443, "epoch": 7.29, "learning_rate": 3.402410483300877e-05, "loss": 0.4325, "step": 20170, "task_loss": 0.9385803937911987 }, { "compression_loss": 0.0, "distillation_loss": 0.4312199354171753, "epoch": 7.29, "learning_rate": 3.395658743597531e-05, "loss": 0.3422, "step": 20180, "task_loss": 0.6504771709442139 }, { "compression_loss": 0.0, "distillation_loss": 0.333647757768631, "epoch": 7.3, "learning_rate": 3.388904963744656e-05, "loss": 0.4073, "step": 20190, "task_loss": 0.4446883201599121 }, { "compression_loss": 0.0, "distillation_loss": 0.3883225917816162, "epoch": 7.3, "learning_rate": 3.382149178567012e-05, "loss": 0.403, "step": 20200, "task_loss": 0.622755229473114 }, { "compression_loss": 0.0, "distillation_loss": 0.32313084602355957, "epoch": 7.3, "learning_rate": 3.3753914228997e-05, "loss": 0.4027, "step": 20210, "task_loss": 0.6717528104782104 }, { "compression_loss": 0.0, "distillation_loss": 0.3080373704433441, "epoch": 7.31, "learning_rate": 3.3686317315879785e-05, "loss": 0.3907, "step": 20220, "task_loss": 0.4064512848854065 }, { "compression_loss": 0.0, "distillation_loss": 0.32508981227874756, "epoch": 7.31, "learning_rate": 3.361870139487092e-05, "loss": 0.3981, "step": 20230, "task_loss": 0.2644849419593811 }, { "compression_loss": 0.0, "distillation_loss": 0.2956303358078003, "epoch": 7.31, "learning_rate": 3.3551066814620796e-05, "loss": 0.3662, "step": 20240, "task_loss": 0.3841955065727234 }, { "compression_loss": 0.0, "distillation_loss": 0.3077021837234497, "epoch": 7.32, "learning_rate": 3.3483413923876075e-05, "loss": 0.3706, "step": 20250, "task_loss": 0.4486422538757324 }, { "epoch": 7.32, "eval_exact_match": 82.73415326395458, "eval_f1": 89.86822273734353, "step": 20250 }, { "compression_loss": 0.0, "distillation_loss": 0.35297924280166626, "epoch": 7.32, "learning_rate": 3.3415743071477814e-05, "loss": 0.3836, "step": 20260, "task_loss": 0.4488530457019806 }, { "compression_loss": 0.0, "distillation_loss": 0.4628410041332245, "epoch": 7.33, "learning_rate": 3.334805460635966e-05, "loss": 0.4589, "step": 20270, "task_loss": 0.9875482320785522 }, { "compression_loss": 0.0, "distillation_loss": 0.416703999042511, "epoch": 7.33, "learning_rate": 3.328034887754613e-05, "loss": 0.3884, "step": 20280, "task_loss": 0.3616025447845459 }, { "compression_loss": 0.0, "distillation_loss": 0.3806164264678955, "epoch": 7.33, "learning_rate": 3.321262623415071e-05, "loss": 0.4103, "step": 20290, "task_loss": 0.4707099199295044 }, { "compression_loss": 0.0, "distillation_loss": 0.4472636580467224, "epoch": 7.34, "learning_rate": 3.3144887025374125e-05, "loss": 0.4004, "step": 20300, "task_loss": 0.48857200145721436 }, { "compression_loss": 0.0, "distillation_loss": 0.3303232789039612, "epoch": 7.34, "learning_rate": 3.307713160050252e-05, "loss": 0.3606, "step": 20310, "task_loss": 0.4707809388637543 }, { "compression_loss": 0.0, "distillation_loss": 0.4367483854293823, "epoch": 7.34, "learning_rate": 3.300936030890564e-05, "loss": 0.4182, "step": 20320, "task_loss": 0.7906453609466553 }, { "compression_loss": 0.0, "distillation_loss": 0.28419390320777893, "epoch": 7.35, "learning_rate": 3.294157350003507e-05, "loss": 0.3636, "step": 20330, "task_loss": 0.2948117256164551 }, { "compression_loss": 0.0, "distillation_loss": 0.339834988117218, "epoch": 7.35, "learning_rate": 3.287377152342236e-05, "loss": 0.3916, "step": 20340, "task_loss": 0.34666329622268677 }, { "compression_loss": 0.0, "distillation_loss": 0.2876173257827759, "epoch": 7.35, "learning_rate": 3.280595472867733e-05, "loss": 0.4121, "step": 20350, "task_loss": 0.3110664188861847 }, { "compression_loss": 0.0, "distillation_loss": 0.317335307598114, "epoch": 7.36, "learning_rate": 3.273812346548617e-05, "loss": 0.3872, "step": 20360, "task_loss": 0.4784393012523651 }, { "compression_loss": 0.0, "distillation_loss": 0.4668826460838318, "epoch": 7.36, "learning_rate": 3.2670278083609685e-05, "loss": 0.3571, "step": 20370, "task_loss": 0.6749038100242615 }, { "compression_loss": 0.0, "distillation_loss": 0.3103598654270172, "epoch": 7.37, "learning_rate": 3.260241893288147e-05, "loss": 0.3929, "step": 20380, "task_loss": 0.7942910194396973 }, { "compression_loss": 0.0, "distillation_loss": 0.332332581281662, "epoch": 7.37, "learning_rate": 3.253454636320613e-05, "loss": 0.3819, "step": 20390, "task_loss": 0.4826880097389221 }, { "compression_loss": 0.0, "distillation_loss": 0.40551626682281494, "epoch": 7.37, "learning_rate": 3.246666072455746e-05, "loss": 0.3616, "step": 20400, "task_loss": 0.43429872393608093 }, { "compression_loss": 0.0, "distillation_loss": 0.3160107135772705, "epoch": 7.38, "learning_rate": 3.2398762366976647e-05, "loss": 0.3852, "step": 20410, "task_loss": 0.43068432807922363 }, { "compression_loss": 0.0, "distillation_loss": 0.4586879014968872, "epoch": 7.38, "learning_rate": 3.2330851640570427e-05, "loss": 0.4322, "step": 20420, "task_loss": 0.702597975730896 }, { "compression_loss": 0.0, "distillation_loss": 0.33801770210266113, "epoch": 7.38, "learning_rate": 3.226292889550938e-05, "loss": 0.4115, "step": 20430, "task_loss": 1.104683756828308 }, { "compression_loss": 0.0, "distillation_loss": 0.4458228051662445, "epoch": 7.39, "learning_rate": 3.219499448202598e-05, "loss": 0.3881, "step": 20440, "task_loss": 0.5195741057395935 }, { "compression_loss": 0.0, "distillation_loss": 0.3442004323005676, "epoch": 7.39, "learning_rate": 3.212704875041293e-05, "loss": 0.3945, "step": 20450, "task_loss": 0.6913373470306396 }, { "compression_loss": 0.0, "distillation_loss": 0.420304536819458, "epoch": 7.39, "learning_rate": 3.205909205102128e-05, "loss": 0.4567, "step": 20460, "task_loss": 0.4324960708618164 }, { "compression_loss": 0.0, "distillation_loss": 0.43511003255844116, "epoch": 7.4, "learning_rate": 3.19911247342586e-05, "loss": 0.3947, "step": 20470, "task_loss": 0.8736984729766846 }, { "compression_loss": 0.0, "distillation_loss": 0.4031512141227722, "epoch": 7.4, "learning_rate": 3.192314715058724e-05, "loss": 0.4193, "step": 20480, "task_loss": 0.5991595983505249 }, { "compression_loss": 0.0, "distillation_loss": 0.5769762992858887, "epoch": 7.41, "learning_rate": 3.185515965052248e-05, "loss": 0.4721, "step": 20490, "task_loss": 0.5272440314292908 }, { "compression_loss": 0.0, "distillation_loss": 0.32588744163513184, "epoch": 7.41, "learning_rate": 3.1787162584630735e-05, "loss": 0.3777, "step": 20500, "task_loss": 0.5834881067276001 }, { "epoch": 7.41, "eval_exact_match": 82.74361400189214, "eval_f1": 89.73354102474433, "step": 20500 }, { "compression_loss": 0.0, "distillation_loss": 0.34094029664993286, "epoch": 7.41, "learning_rate": 3.1719156303527744e-05, "loss": 0.3924, "step": 20510, "task_loss": 0.6706292629241943 }, { "compression_loss": 0.0, "distillation_loss": 0.47249582409858704, "epoch": 7.42, "learning_rate": 3.1651141157876755e-05, "loss": 0.4, "step": 20520, "task_loss": 0.5581311583518982 }, { "compression_loss": 0.0, "distillation_loss": 0.3450727164745331, "epoch": 7.42, "learning_rate": 3.1583117498386725e-05, "loss": 0.4057, "step": 20530, "task_loss": 0.6178710460662842 }, { "compression_loss": 0.0, "distillation_loss": 0.3991689383983612, "epoch": 7.42, "learning_rate": 3.1515085675810534e-05, "loss": 0.3887, "step": 20540, "task_loss": 0.5536105632781982 }, { "compression_loss": 0.0, "distillation_loss": 0.36258408427238464, "epoch": 7.43, "learning_rate": 3.144704604094312e-05, "loss": 0.3549, "step": 20550, "task_loss": 0.48555445671081543 }, { "compression_loss": 0.0, "distillation_loss": 0.4089629650115967, "epoch": 7.43, "learning_rate": 3.137899894461973e-05, "loss": 0.3655, "step": 20560, "task_loss": 0.2968985438346863 }, { "compression_loss": 0.0, "distillation_loss": 0.45736321806907654, "epoch": 7.43, "learning_rate": 3.131094473771406e-05, "loss": 0.3927, "step": 20570, "task_loss": 0.938363790512085 }, { "compression_loss": 0.0, "distillation_loss": 0.35800325870513916, "epoch": 7.44, "learning_rate": 3.12428837711365e-05, "loss": 0.4001, "step": 20580, "task_loss": 0.7353466749191284 }, { "compression_loss": 0.0, "distillation_loss": 0.4800975024700165, "epoch": 7.44, "learning_rate": 3.117481639583228e-05, "loss": 0.4031, "step": 20590, "task_loss": 0.35843586921691895 }, { "compression_loss": 0.0, "distillation_loss": 0.2761758267879486, "epoch": 7.44, "learning_rate": 3.110674296277967e-05, "loss": 0.3579, "step": 20600, "task_loss": 0.5030902028083801 }, { "compression_loss": 0.0, "distillation_loss": 0.5137525200843811, "epoch": 7.45, "learning_rate": 3.103866382298818e-05, "loss": 0.3511, "step": 20610, "task_loss": 0.6685386896133423 }, { "compression_loss": 0.0, "distillation_loss": 0.45088014006614685, "epoch": 7.45, "learning_rate": 3.097057932749675e-05, "loss": 0.4005, "step": 20620, "task_loss": 0.695102334022522 }, { "compression_loss": 0.0, "distillation_loss": 0.3517519533634186, "epoch": 7.46, "learning_rate": 3.0902489827371924e-05, "loss": 0.3538, "step": 20630, "task_loss": 0.6277685165405273 }, { "compression_loss": 0.0, "distillation_loss": 0.4504373073577881, "epoch": 7.46, "learning_rate": 3.083439567370609e-05, "loss": 0.4344, "step": 20640, "task_loss": 0.8747158050537109 }, { "compression_loss": 0.0, "distillation_loss": 0.5259716510772705, "epoch": 7.46, "learning_rate": 3.076629721761557e-05, "loss": 0.4092, "step": 20650, "task_loss": 0.684150218963623 }, { "compression_loss": 0.0, "distillation_loss": 0.33155426383018494, "epoch": 7.47, "learning_rate": 3.069819481023892e-05, "loss": 0.3818, "step": 20660, "task_loss": 0.4055613875389099 }, { "compression_loss": 0.0, "distillation_loss": 0.3492274284362793, "epoch": 7.47, "learning_rate": 3.0630088802735046e-05, "loss": 0.4271, "step": 20670, "task_loss": 0.657691478729248 }, { "compression_loss": 0.0, "distillation_loss": 0.47741013765335083, "epoch": 7.47, "learning_rate": 3.056197954628143e-05, "loss": 0.4267, "step": 20680, "task_loss": 0.8212167620658875 }, { "compression_loss": 0.0, "distillation_loss": 0.2485727220773697, "epoch": 7.48, "learning_rate": 3.0493867392072296e-05, "loss": 0.3732, "step": 20690, "task_loss": 0.37689927220344543 }, { "compression_loss": 0.0, "distillation_loss": 0.47409436106681824, "epoch": 7.48, "learning_rate": 3.0425752691316814e-05, "loss": 0.4012, "step": 20700, "task_loss": 0.818795382976532 }, { "compression_loss": 0.0, "distillation_loss": 0.3214324116706848, "epoch": 7.48, "learning_rate": 3.03576357952373e-05, "loss": 0.3844, "step": 20710, "task_loss": 0.9982783198356628 }, { "compression_loss": 0.0, "distillation_loss": 0.49584317207336426, "epoch": 7.49, "learning_rate": 3.028951705506736e-05, "loss": 0.4084, "step": 20720, "task_loss": 0.7639179229736328 }, { "compression_loss": 0.0, "distillation_loss": 0.2755817770957947, "epoch": 7.49, "learning_rate": 3.0221396822050114e-05, "loss": 0.3612, "step": 20730, "task_loss": 0.32425567507743835 }, { "compression_loss": 0.0, "distillation_loss": 0.3090718388557434, "epoch": 7.5, "learning_rate": 3.0153275447436423e-05, "loss": 0.3943, "step": 20740, "task_loss": 0.2633516192436218 }, { "compression_loss": 0.0, "distillation_loss": 0.41136348247528076, "epoch": 7.5, "learning_rate": 3.0085153282482977e-05, "loss": 0.368, "step": 20750, "task_loss": 0.3509225845336914 }, { "epoch": 7.5, "eval_exact_match": 82.76253547776727, "eval_f1": 89.78687617165846, "step": 20750 }, { "compression_loss": 0.0, "distillation_loss": 0.2749996483325958, "epoch": 7.5, "learning_rate": 3.0017030678450578e-05, "loss": 0.3833, "step": 20760, "task_loss": 0.2450357973575592 }, { "compression_loss": 0.0, "distillation_loss": 0.3595663905143738, "epoch": 7.51, "learning_rate": 2.9948907986602262e-05, "loss": 0.3604, "step": 20770, "task_loss": 0.8052037954330444 }, { "compression_loss": 0.0, "distillation_loss": 0.3556064963340759, "epoch": 7.51, "learning_rate": 2.988078555820156e-05, "loss": 0.3699, "step": 20780, "task_loss": 0.8142600059509277 }, { "compression_loss": 0.0, "distillation_loss": 0.3247981071472168, "epoch": 7.51, "learning_rate": 2.98126637445106e-05, "loss": 0.4256, "step": 20790, "task_loss": 0.5774292945861816 }, { "compression_loss": 0.0, "distillation_loss": 0.39211082458496094, "epoch": 7.52, "learning_rate": 2.9744542896788375e-05, "loss": 0.3755, "step": 20800, "task_loss": 0.44028204679489136 }, { "compression_loss": 0.0, "distillation_loss": 0.3495498597621918, "epoch": 7.52, "learning_rate": 2.967642336628887e-05, "loss": 0.3667, "step": 20810, "task_loss": 0.7542967796325684 }, { "compression_loss": 0.0, "distillation_loss": 0.5816289186477661, "epoch": 7.52, "learning_rate": 2.9608305504259303e-05, "loss": 0.464, "step": 20820, "task_loss": 0.7060995101928711 }, { "compression_loss": 0.0, "distillation_loss": 0.38165950775146484, "epoch": 7.53, "learning_rate": 2.9540189661938263e-05, "loss": 0.3788, "step": 20830, "task_loss": 0.652009129524231 }, { "compression_loss": 0.0, "distillation_loss": 0.2923566699028015, "epoch": 7.53, "learning_rate": 2.9472076190553952e-05, "loss": 0.3983, "step": 20840, "task_loss": 0.5197100043296814 }, { "compression_loss": 0.0, "distillation_loss": 0.4252726435661316, "epoch": 7.54, "learning_rate": 2.9403965441322315e-05, "loss": 0.3834, "step": 20850, "task_loss": 0.48940667510032654 }, { "compression_loss": 0.0, "distillation_loss": 0.33503347635269165, "epoch": 7.54, "learning_rate": 2.9335857765445304e-05, "loss": 0.3915, "step": 20860, "task_loss": 1.0881128311157227 }, { "compression_loss": 0.0, "distillation_loss": 0.3840358257293701, "epoch": 7.54, "learning_rate": 2.9267753514108968e-05, "loss": 0.3975, "step": 20870, "task_loss": 0.38832956552505493 }, { "compression_loss": 0.0, "distillation_loss": 0.5155704021453857, "epoch": 7.55, "learning_rate": 2.9199653038481753e-05, "loss": 0.3877, "step": 20880, "task_loss": 0.7354077100753784 }, { "compression_loss": 0.0, "distillation_loss": 0.3932916820049286, "epoch": 7.55, "learning_rate": 2.9131556689712597e-05, "loss": 0.3647, "step": 20890, "task_loss": 0.3581256866455078 }, { "compression_loss": 0.0, "distillation_loss": 0.48106008768081665, "epoch": 7.55, "learning_rate": 2.9063464818929186e-05, "loss": 0.4336, "step": 20900, "task_loss": 0.7806867361068726 }, { "compression_loss": 0.0, "distillation_loss": 0.30515527725219727, "epoch": 7.56, "learning_rate": 2.8995377777236085e-05, "loss": 0.3727, "step": 20910, "task_loss": 0.5825417637825012 }, { "compression_loss": 0.0, "distillation_loss": 0.347696989774704, "epoch": 7.56, "learning_rate": 2.8927295915712994e-05, "loss": 0.384, "step": 20920, "task_loss": 0.5347048044204712 }, { "compression_loss": 0.0, "distillation_loss": 0.31037092208862305, "epoch": 7.56, "learning_rate": 2.885921958541287e-05, "loss": 0.3555, "step": 20930, "task_loss": 0.301509827375412 }, { "compression_loss": 0.0, "distillation_loss": 0.42140477895736694, "epoch": 7.57, "learning_rate": 2.8791149137360176e-05, "loss": 0.3656, "step": 20940, "task_loss": 0.45308709144592285 }, { "compression_loss": 0.0, "distillation_loss": 0.44975745677948, "epoch": 7.57, "learning_rate": 2.872308492254901e-05, "loss": 0.4172, "step": 20950, "task_loss": 0.6830534934997559 }, { "compression_loss": 0.0, "distillation_loss": 0.35245752334594727, "epoch": 7.57, "learning_rate": 2.865502729194139e-05, "loss": 0.3779, "step": 20960, "task_loss": 0.3799573481082916 }, { "compression_loss": 0.0, "distillation_loss": 0.36846715211868286, "epoch": 7.58, "learning_rate": 2.8586976596465308e-05, "loss": 0.3959, "step": 20970, "task_loss": 0.5989868640899658 }, { "compression_loss": 0.0, "distillation_loss": 0.33940085768699646, "epoch": 7.58, "learning_rate": 2.8518933187013053e-05, "loss": 0.3268, "step": 20980, "task_loss": 0.5030498504638672 }, { "compression_loss": 0.0, "distillation_loss": 0.4226445257663727, "epoch": 7.59, "learning_rate": 2.8450897414439317e-05, "loss": 0.3917, "step": 20990, "task_loss": 0.596397876739502 }, { "compression_loss": 0.0, "distillation_loss": 0.39020735025405884, "epoch": 7.59, "learning_rate": 2.838286962955943e-05, "loss": 0.3701, "step": 21000, "task_loss": 0.7234203815460205 }, { "epoch": 7.59, "eval_exact_match": 82.58278145695364, "eval_f1": 89.55032112084302, "step": 21000 }, { "compression_loss": 0.0, "distillation_loss": 0.5218778848648071, "epoch": 7.59, "learning_rate": 2.8314850183147516e-05, "loss": 0.3957, "step": 21010, "task_loss": 0.6053802967071533 }, { "compression_loss": 0.0, "distillation_loss": 0.348112016916275, "epoch": 7.6, "learning_rate": 2.8246839425934724e-05, "loss": 0.377, "step": 21020, "task_loss": 0.6146667003631592 }, { "compression_loss": 0.0, "distillation_loss": 0.3650135397911072, "epoch": 7.6, "learning_rate": 2.817883770860737e-05, "loss": 0.3546, "step": 21030, "task_loss": 0.37088972330093384 }, { "compression_loss": 0.0, "distillation_loss": 0.3635476231575012, "epoch": 7.6, "learning_rate": 2.81108453818052e-05, "loss": 0.3664, "step": 21040, "task_loss": 0.4450761079788208 }, { "compression_loss": 0.0, "distillation_loss": 0.4935896098613739, "epoch": 7.61, "learning_rate": 2.8042862796119482e-05, "loss": 0.4058, "step": 21050, "task_loss": 0.5729918479919434 }, { "compression_loss": 0.0, "distillation_loss": 0.3340076804161072, "epoch": 7.61, "learning_rate": 2.7974890302091327e-05, "loss": 0.4005, "step": 21060, "task_loss": 0.7589318752288818 }, { "compression_loss": 0.0, "distillation_loss": 0.27468156814575195, "epoch": 7.61, "learning_rate": 2.7906928250209743e-05, "loss": 0.4049, "step": 21070, "task_loss": 0.6373112797737122 }, { "compression_loss": 0.0, "distillation_loss": 0.6113014817237854, "epoch": 7.62, "learning_rate": 2.783897699090994e-05, "loss": 0.4058, "step": 21080, "task_loss": 1.0075609683990479 }, { "compression_loss": 0.0, "distillation_loss": 0.3902479410171509, "epoch": 7.62, "learning_rate": 2.7771036874571443e-05, "loss": 0.4069, "step": 21090, "task_loss": 0.6849322319030762 }, { "compression_loss": 0.0, "distillation_loss": 0.31239408254623413, "epoch": 7.63, "learning_rate": 2.770310825151635e-05, "loss": 0.3778, "step": 21100, "task_loss": 0.46959567070007324 }, { "compression_loss": 0.0, "distillation_loss": 0.33670780062675476, "epoch": 7.63, "learning_rate": 2.763519147200748e-05, "loss": 0.3693, "step": 21110, "task_loss": 0.45546576380729675 }, { "compression_loss": 0.0, "distillation_loss": 0.4706672430038452, "epoch": 7.63, "learning_rate": 2.7567286886246593e-05, "loss": 0.3903, "step": 21120, "task_loss": 0.5884417295455933 }, { "compression_loss": 0.0, "distillation_loss": 0.3736233115196228, "epoch": 7.64, "learning_rate": 2.749939484437255e-05, "loss": 0.4197, "step": 21130, "task_loss": 0.3115198016166687 }, { "compression_loss": 0.0, "distillation_loss": 0.39010971784591675, "epoch": 7.64, "learning_rate": 2.7431515696459577e-05, "loss": 0.4015, "step": 21140, "task_loss": 0.5224123001098633 }, { "compression_loss": 0.0, "distillation_loss": 0.29932332038879395, "epoch": 7.64, "learning_rate": 2.736364979251535e-05, "loss": 0.3853, "step": 21150, "task_loss": 0.4086366891860962 }, { "compression_loss": 0.0, "distillation_loss": 0.42739713191986084, "epoch": 7.65, "learning_rate": 2.7295797482479327e-05, "loss": 0.4023, "step": 21160, "task_loss": 0.9876140356063843 }, { "compression_loss": 0.0, "distillation_loss": 0.3685208857059479, "epoch": 7.65, "learning_rate": 2.7227959116220803e-05, "loss": 0.3945, "step": 21170, "task_loss": 0.5753424167633057 }, { "compression_loss": 0.0, "distillation_loss": 0.47203847765922546, "epoch": 7.65, "learning_rate": 2.7160135043537236e-05, "loss": 0.3561, "step": 21180, "task_loss": 0.7574573755264282 }, { "compression_loss": 0.0, "distillation_loss": 0.3211335837841034, "epoch": 7.66, "learning_rate": 2.7092325614152328e-05, "loss": 0.4059, "step": 21190, "task_loss": 0.621513843536377 }, { "compression_loss": 0.0, "distillation_loss": 0.4134974479675293, "epoch": 7.66, "learning_rate": 2.7024531177714316e-05, "loss": 0.3693, "step": 21200, "task_loss": 0.39401504397392273 }, { "compression_loss": 0.0, "distillation_loss": 0.29084134101867676, "epoch": 7.67, "learning_rate": 2.6956752083794094e-05, "loss": 0.3972, "step": 21210, "task_loss": 0.49650153517723083 }, { "compression_loss": 0.0, "distillation_loss": 0.5855056047439575, "epoch": 7.67, "learning_rate": 2.688898868188348e-05, "loss": 0.425, "step": 21220, "task_loss": 0.6136555075645447 }, { "compression_loss": 0.0, "distillation_loss": 0.4040588140487671, "epoch": 7.67, "learning_rate": 2.682124132139334e-05, "loss": 0.3688, "step": 21230, "task_loss": 0.5768276453018188 }, { "compression_loss": 0.0, "distillation_loss": 0.3762333393096924, "epoch": 7.68, "learning_rate": 2.675351035165188e-05, "loss": 0.41, "step": 21240, "task_loss": 0.6931823492050171 }, { "compression_loss": 0.0, "distillation_loss": 0.5697497725486755, "epoch": 7.68, "learning_rate": 2.668579612190271e-05, "loss": 0.4016, "step": 21250, "task_loss": 0.7728415131568909 }, { "epoch": 7.68, "eval_exact_match": 83.15042573320719, "eval_f1": 90.07272671513242, "step": 21250 }, { "compression_loss": 0.0, "distillation_loss": 0.39408552646636963, "epoch": 7.68, "learning_rate": 2.6618098981303204e-05, "loss": 0.3603, "step": 21260, "task_loss": 0.6829339265823364 }, { "compression_loss": 0.0, "distillation_loss": 0.4046620726585388, "epoch": 7.69, "learning_rate": 2.655041927892257e-05, "loss": 0.3888, "step": 21270, "task_loss": 0.6104872822761536 }, { "compression_loss": 0.0, "distillation_loss": 0.32940006256103516, "epoch": 7.69, "learning_rate": 2.6482757363740117e-05, "loss": 0.3446, "step": 21280, "task_loss": 0.4668557047843933 }, { "compression_loss": 0.0, "distillation_loss": 0.4409995675086975, "epoch": 7.69, "learning_rate": 2.6415113584643424e-05, "loss": 0.4242, "step": 21290, "task_loss": 0.6875514984130859 }, { "compression_loss": 0.0, "distillation_loss": 0.2878825068473816, "epoch": 7.7, "learning_rate": 2.6347488290426573e-05, "loss": 0.3609, "step": 21300, "task_loss": 0.7109676599502563 }, { "compression_loss": 0.0, "distillation_loss": 0.3507508635520935, "epoch": 7.7, "learning_rate": 2.627988182978831e-05, "loss": 0.4358, "step": 21310, "task_loss": 0.6243581771850586 }, { "compression_loss": 0.0, "distillation_loss": 0.3260875344276428, "epoch": 7.71, "learning_rate": 2.6212294551330293e-05, "loss": 0.4248, "step": 21320, "task_loss": 0.5162962675094604 }, { "compression_loss": 0.0, "distillation_loss": 0.39942505955696106, "epoch": 7.71, "learning_rate": 2.6144726803555232e-05, "loss": 0.4051, "step": 21330, "task_loss": 0.8916963338851929 }, { "compression_loss": 0.0, "distillation_loss": 0.4103178083896637, "epoch": 7.71, "learning_rate": 2.6077178934865193e-05, "loss": 0.3919, "step": 21340, "task_loss": 0.9658797979354858 }, { "compression_loss": 0.0, "distillation_loss": 0.2807236313819885, "epoch": 7.72, "learning_rate": 2.6009651293559663e-05, "loss": 0.3637, "step": 21350, "task_loss": 0.9017109870910645 }, { "compression_loss": 0.0, "distillation_loss": 0.23867225646972656, "epoch": 7.72, "learning_rate": 2.59421442278339e-05, "loss": 0.3577, "step": 21360, "task_loss": 0.29913216829299927 }, { "compression_loss": 0.0, "distillation_loss": 0.2474673092365265, "epoch": 7.72, "learning_rate": 2.5874658085777014e-05, "loss": 0.3767, "step": 21370, "task_loss": 0.40880587697029114 }, { "compression_loss": 0.0, "distillation_loss": 0.441650390625, "epoch": 7.73, "learning_rate": 2.580719321537026e-05, "loss": 0.3848, "step": 21380, "task_loss": 1.2267597913742065 }, { "compression_loss": 0.0, "distillation_loss": 0.4035109877586365, "epoch": 7.73, "learning_rate": 2.5739749964485183e-05, "loss": 0.401, "step": 21390, "task_loss": 0.6787598133087158 }, { "compression_loss": 0.0, "distillation_loss": 0.3127707839012146, "epoch": 7.73, "learning_rate": 2.5672328680881876e-05, "loss": 0.3999, "step": 21400, "task_loss": 0.43720167875289917 }, { "compression_loss": 0.0, "distillation_loss": 0.7328001856803894, "epoch": 7.74, "learning_rate": 2.5604929712207137e-05, "loss": 0.4533, "step": 21410, "task_loss": 0.9978082776069641 }, { "compression_loss": 0.0, "distillation_loss": 0.3034532368183136, "epoch": 7.74, "learning_rate": 2.5537553405992723e-05, "loss": 0.3571, "step": 21420, "task_loss": 0.33651202917099 }, { "compression_loss": 0.0, "distillation_loss": 0.4111151695251465, "epoch": 7.74, "learning_rate": 2.547020010965351e-05, "loss": 0.3843, "step": 21430, "task_loss": 0.40663081407546997 }, { "compression_loss": 0.0, "distillation_loss": 0.46831807494163513, "epoch": 7.75, "learning_rate": 2.5402870170485775e-05, "loss": 0.3962, "step": 21440, "task_loss": 0.5977736711502075 }, { "compression_loss": 0.0, "distillation_loss": 0.30742383003234863, "epoch": 7.75, "learning_rate": 2.533556393566528e-05, "loss": 0.3719, "step": 21450, "task_loss": 0.30203139781951904 }, { "compression_loss": 0.0, "distillation_loss": 0.3444630801677704, "epoch": 7.76, "learning_rate": 2.5268281752245642e-05, "loss": 0.3626, "step": 21460, "task_loss": 0.5389599800109863 }, { "compression_loss": 0.0, "distillation_loss": 0.33669689297676086, "epoch": 7.76, "learning_rate": 2.520102396715641e-05, "loss": 0.3656, "step": 21470, "task_loss": 0.5374178290367126 }, { "compression_loss": 0.0, "distillation_loss": 0.38278454542160034, "epoch": 7.76, "learning_rate": 2.513379092720134e-05, "loss": 0.3485, "step": 21480, "task_loss": 0.6707999110221863 }, { "compression_loss": 0.0, "distillation_loss": 0.3585819900035858, "epoch": 7.77, "learning_rate": 2.5066582979056587e-05, "loss": 0.361, "step": 21490, "task_loss": 0.27591443061828613 }, { "compression_loss": 0.0, "distillation_loss": 0.24885764718055725, "epoch": 7.77, "learning_rate": 2.4999400469268948e-05, "loss": 0.3919, "step": 21500, "task_loss": 0.283647745847702 }, { "epoch": 7.77, "eval_exact_match": 83.03689687795648, "eval_f1": 90.03524090780901, "step": 21500 }, { "compression_loss": 0.0, "distillation_loss": 0.29895299673080444, "epoch": 7.77, "learning_rate": 2.493224374425402e-05, "loss": 0.3684, "step": 21510, "task_loss": 0.42891183495521545 }, { "compression_loss": 0.0, "distillation_loss": 0.4570951461791992, "epoch": 7.78, "learning_rate": 2.486511315029447e-05, "loss": 0.3693, "step": 21520, "task_loss": 0.3669062554836273 }, { "compression_loss": 0.0, "distillation_loss": 0.2935447692871094, "epoch": 7.78, "learning_rate": 2.47980090335382e-05, "loss": 0.3801, "step": 21530, "task_loss": 0.5963490009307861 }, { "compression_loss": 0.0, "distillation_loss": 0.34089094400405884, "epoch": 7.78, "learning_rate": 2.4730931739996625e-05, "loss": 0.3634, "step": 21540, "task_loss": 0.725817084312439 }, { "compression_loss": 0.0, "distillation_loss": 0.2687324285507202, "epoch": 7.79, "learning_rate": 2.4663881615542794e-05, "loss": 0.3626, "step": 21550, "task_loss": 0.6789768934249878 }, { "compression_loss": 0.0, "distillation_loss": 0.4002509117126465, "epoch": 7.79, "learning_rate": 2.4596859005909728e-05, "loss": 0.4158, "step": 21560, "task_loss": 0.43731197714805603 }, { "compression_loss": 0.0, "distillation_loss": 0.33176618814468384, "epoch": 7.8, "learning_rate": 2.4529864256688515e-05, "loss": 0.3768, "step": 21570, "task_loss": 0.32815253734588623 }, { "compression_loss": 0.0, "distillation_loss": 0.3556719720363617, "epoch": 7.8, "learning_rate": 2.4462897713326633e-05, "loss": 0.3839, "step": 21580, "task_loss": 0.42700621485710144 }, { "compression_loss": 0.0, "distillation_loss": 0.39052969217300415, "epoch": 7.8, "learning_rate": 2.4395959721126073e-05, "loss": 0.4138, "step": 21590, "task_loss": 0.3304116129875183 }, { "compression_loss": 0.0, "distillation_loss": 0.2801629900932312, "epoch": 7.81, "learning_rate": 2.432905062524165e-05, "loss": 0.3815, "step": 21600, "task_loss": 0.41076505184173584 }, { "compression_loss": 0.0, "distillation_loss": 0.29806920886039734, "epoch": 7.81, "learning_rate": 2.426217077067916e-05, "loss": 0.4044, "step": 21610, "task_loss": 0.513504147529602 }, { "compression_loss": 0.0, "distillation_loss": 0.3114551901817322, "epoch": 7.81, "learning_rate": 2.419532050229361e-05, "loss": 0.375, "step": 21620, "task_loss": 0.46315598487854004 }, { "compression_loss": 0.0, "distillation_loss": 0.2388986349105835, "epoch": 7.82, "learning_rate": 2.412850016478747e-05, "loss": 0.3385, "step": 21630, "task_loss": 0.29883062839508057 }, { "compression_loss": 0.0, "distillation_loss": 0.2899854779243469, "epoch": 7.82, "learning_rate": 2.4061710102708885e-05, "loss": 0.3532, "step": 21640, "task_loss": 0.5468807816505432 }, { "compression_loss": 0.0, "distillation_loss": 0.5108985304832458, "epoch": 7.82, "learning_rate": 2.3994950660449844e-05, "loss": 0.394, "step": 21650, "task_loss": 0.38056838512420654 }, { "compression_loss": 0.0, "distillation_loss": 0.33521783351898193, "epoch": 7.83, "learning_rate": 2.3928222182244508e-05, "loss": 0.3658, "step": 21660, "task_loss": 0.3311670422554016 }, { "compression_loss": 0.0, "distillation_loss": 0.4127667546272278, "epoch": 7.83, "learning_rate": 2.3861525012167334e-05, "loss": 0.4362, "step": 21670, "task_loss": 0.7293896079063416 }, { "compression_loss": 0.0, "distillation_loss": 0.45115426182746887, "epoch": 7.84, "learning_rate": 2.379485949413137e-05, "loss": 0.4158, "step": 21680, "task_loss": 0.564669668674469 }, { "compression_loss": 0.0, "distillation_loss": 0.38690847158432007, "epoch": 7.84, "learning_rate": 2.3728225971886433e-05, "loss": 0.3499, "step": 21690, "task_loss": 0.669680118560791 }, { "compression_loss": 0.0, "distillation_loss": 0.4596896767616272, "epoch": 7.84, "learning_rate": 2.366162478901738e-05, "loss": 0.3607, "step": 21700, "task_loss": 0.5676548480987549 }, { "compression_loss": 0.0, "distillation_loss": 0.36195582151412964, "epoch": 7.85, "learning_rate": 2.359505628894229e-05, "loss": 0.3731, "step": 21710, "task_loss": 0.8088686466217041 }, { "compression_loss": 0.0, "distillation_loss": 0.4107312858104706, "epoch": 7.85, "learning_rate": 2.3528520814910756e-05, "loss": 0.3817, "step": 21720, "task_loss": 0.7661799788475037 }, { "compression_loss": 0.0, "distillation_loss": 0.44738367199897766, "epoch": 7.85, "learning_rate": 2.346201871000203e-05, "loss": 0.4273, "step": 21730, "task_loss": 0.3972681760787964 }, { "compression_loss": 0.0, "distillation_loss": 0.38369739055633545, "epoch": 7.86, "learning_rate": 2.3395550317123355e-05, "loss": 0.3842, "step": 21740, "task_loss": 0.83580482006073 }, { "compression_loss": 0.0, "distillation_loss": 0.26634058356285095, "epoch": 7.86, "learning_rate": 2.3329115979008098e-05, "loss": 0.3654, "step": 21750, "task_loss": 0.47266823053359985 }, { "epoch": 7.86, "eval_exact_match": 82.86660359508042, "eval_f1": 89.9055183315166, "step": 21750 }, { "compression_loss": 0.0, "distillation_loss": 0.3641436696052551, "epoch": 7.86, "learning_rate": 2.3262716038214055e-05, "loss": 0.3793, "step": 21760, "task_loss": 0.44877809286117554 }, { "compression_loss": 0.0, "distillation_loss": 0.5512512922286987, "epoch": 7.87, "learning_rate": 2.319635083712164e-05, "loss": 0.397, "step": 21770, "task_loss": 1.344954013824463 }, { "compression_loss": 0.0, "distillation_loss": 0.34335392713546753, "epoch": 7.87, "learning_rate": 2.3130020717932155e-05, "loss": 0.4084, "step": 21780, "task_loss": 0.48081788420677185 }, { "compression_loss": 0.0, "distillation_loss": 0.352206826210022, "epoch": 7.87, "learning_rate": 2.306372602266599e-05, "loss": 0.3501, "step": 21790, "task_loss": 0.6583988666534424 }, { "compression_loss": 0.0, "distillation_loss": 0.46836334466934204, "epoch": 7.88, "learning_rate": 2.29974670931609e-05, "loss": 0.4028, "step": 21800, "task_loss": 0.6334257125854492 }, { "compression_loss": 0.0, "distillation_loss": 0.36540892720222473, "epoch": 7.88, "learning_rate": 2.2931244271070183e-05, "loss": 0.3821, "step": 21810, "task_loss": 0.39173829555511475 }, { "compression_loss": 0.0, "distillation_loss": 0.46499329805374146, "epoch": 7.89, "learning_rate": 2.2865057897861014e-05, "loss": 0.3748, "step": 21820, "task_loss": 0.4624268412590027 }, { "compression_loss": 0.0, "distillation_loss": 0.5395506024360657, "epoch": 7.89, "learning_rate": 2.2798908314812546e-05, "loss": 0.3834, "step": 21830, "task_loss": 0.9067790508270264 }, { "compression_loss": 0.0, "distillation_loss": 0.33177289366722107, "epoch": 7.89, "learning_rate": 2.273279586301431e-05, "loss": 0.348, "step": 21840, "task_loss": 0.4920841157436371 }, { "compression_loss": 0.0, "distillation_loss": 0.3163832426071167, "epoch": 7.9, "learning_rate": 2.2666720883364317e-05, "loss": 0.3463, "step": 21850, "task_loss": 0.6887298822402954 }, { "compression_loss": 0.0, "distillation_loss": 0.39988842606544495, "epoch": 7.9, "learning_rate": 2.26006837165674e-05, "loss": 0.3928, "step": 21860, "task_loss": 0.6893279552459717 }, { "compression_loss": 0.0, "distillation_loss": 0.35277271270751953, "epoch": 7.9, "learning_rate": 2.2534684703133375e-05, "loss": 0.3875, "step": 21870, "task_loss": 0.3911385238170624 }, { "compression_loss": 0.0, "distillation_loss": 0.2951965630054474, "epoch": 7.91, "learning_rate": 2.2468724183375365e-05, "loss": 0.3776, "step": 21880, "task_loss": 0.3761315941810608 }, { "compression_loss": 0.0, "distillation_loss": 0.3309294879436493, "epoch": 7.91, "learning_rate": 2.240280249740798e-05, "loss": 0.376, "step": 21890, "task_loss": 0.522919774055481 }, { "compression_loss": 0.0, "distillation_loss": 0.5310372114181519, "epoch": 7.91, "learning_rate": 2.233691998514561e-05, "loss": 0.4073, "step": 21900, "task_loss": 1.1187676191329956 }, { "compression_loss": 0.0, "distillation_loss": 0.33567559719085693, "epoch": 7.92, "learning_rate": 2.2271076986300626e-05, "loss": 0.4021, "step": 21910, "task_loss": 0.7209323644638062 }, { "compression_loss": 0.0, "distillation_loss": 0.46349138021469116, "epoch": 7.92, "learning_rate": 2.2205273840381697e-05, "loss": 0.3781, "step": 21920, "task_loss": 0.9030737280845642 }, { "compression_loss": 0.0, "distillation_loss": 0.5550073981285095, "epoch": 7.93, "learning_rate": 2.213951088669194e-05, "loss": 0.407, "step": 21930, "task_loss": 0.8301783800125122 }, { "compression_loss": 0.0, "distillation_loss": 0.4752506613731384, "epoch": 7.93, "learning_rate": 2.2073788464327286e-05, "loss": 0.3842, "step": 21940, "task_loss": 0.5708151459693909 }, { "compression_loss": 0.0, "distillation_loss": 0.30064135789871216, "epoch": 7.93, "learning_rate": 2.2008106912174626e-05, "loss": 0.3408, "step": 21950, "task_loss": 0.5206139087677002 }, { "compression_loss": 0.0, "distillation_loss": 0.4654524326324463, "epoch": 7.94, "learning_rate": 2.1942466568910142e-05, "loss": 0.3693, "step": 21960, "task_loss": 0.6942557096481323 }, { "compression_loss": 0.0, "distillation_loss": 0.47742265462875366, "epoch": 7.94, "learning_rate": 2.18768677729975e-05, "loss": 0.3627, "step": 21970, "task_loss": 0.5125290155410767 }, { "compression_loss": 0.0, "distillation_loss": 0.45493432879447937, "epoch": 7.94, "learning_rate": 2.1811310862686177e-05, "loss": 0.4222, "step": 21980, "task_loss": 0.7441220879554749 }, { "compression_loss": 0.0, "distillation_loss": 0.2649460434913635, "epoch": 7.95, "learning_rate": 2.1745796176009618e-05, "loss": 0.3469, "step": 21990, "task_loss": 0.3829834759235382 }, { "compression_loss": 0.0, "distillation_loss": 0.41353529691696167, "epoch": 7.95, "learning_rate": 2.1680324050783598e-05, "loss": 0.3821, "step": 22000, "task_loss": 0.5880372524261475 }, { "epoch": 7.95, "eval_exact_match": 83.01797540208136, "eval_f1": 89.93481086824103, "step": 22000 }, { "compression_loss": 0.0, "distillation_loss": 0.33622804284095764, "epoch": 7.95, "learning_rate": 2.161489482460439e-05, "loss": 0.3528, "step": 22010, "task_loss": 0.4933108687400818 }, { "compression_loss": 0.0, "distillation_loss": 0.46350884437561035, "epoch": 7.96, "learning_rate": 2.1549508834847124e-05, "loss": 0.386, "step": 22020, "task_loss": 0.8076874613761902 }, { "compression_loss": 0.0, "distillation_loss": 0.27121564745903015, "epoch": 7.96, "learning_rate": 2.1484166418663904e-05, "loss": 0.3791, "step": 22030, "task_loss": 0.5695164799690247 }, { "compression_loss": 0.0, "distillation_loss": 0.31125015020370483, "epoch": 7.97, "learning_rate": 2.1418867912982233e-05, "loss": 0.3831, "step": 22040, "task_loss": 0.41965681314468384 }, { "compression_loss": 0.0, "distillation_loss": 0.41130608320236206, "epoch": 7.97, "learning_rate": 2.135361365450315e-05, "loss": 0.3684, "step": 22050, "task_loss": 0.39245760440826416 }, { "compression_loss": 0.0, "distillation_loss": 0.3798828721046448, "epoch": 7.97, "learning_rate": 2.1288403979699563e-05, "loss": 0.3601, "step": 22060, "task_loss": 0.4798113703727722 }, { "compression_loss": 0.0, "distillation_loss": 0.32262685894966125, "epoch": 7.98, "learning_rate": 2.122323922481447e-05, "loss": 0.3987, "step": 22070, "task_loss": 0.7292386293411255 }, { "compression_loss": 0.0, "distillation_loss": 0.24890893697738647, "epoch": 7.98, "learning_rate": 2.1158119725859282e-05, "loss": 0.3586, "step": 22080, "task_loss": 0.5080243945121765 }, { "compression_loss": 0.0, "distillation_loss": 0.4089392423629761, "epoch": 7.98, "learning_rate": 2.1093045818612006e-05, "loss": 0.347, "step": 22090, "task_loss": 0.41398802399635315 }, { "compression_loss": 0.0, "distillation_loss": 0.4324132800102234, "epoch": 7.99, "learning_rate": 2.102801783861561e-05, "loss": 0.3558, "step": 22100, "task_loss": 0.5894123315811157 }, { "compression_loss": 0.0, "distillation_loss": 0.47795045375823975, "epoch": 7.99, "learning_rate": 2.0963036121176206e-05, "loss": 0.3962, "step": 22110, "task_loss": 0.7632349729537964 }, { "compression_loss": 0.0, "distillation_loss": 0.4554988145828247, "epoch": 7.99, "learning_rate": 2.0898101001361418e-05, "loss": 0.4285, "step": 22120, "task_loss": 0.838047981262207 }, { "compression_loss": 0.0, "distillation_loss": 0.4030979871749878, "epoch": 8.0, "learning_rate": 2.083321281399851e-05, "loss": 0.4023, "step": 22130, "task_loss": 0.6322430372238159 }, { "compression_loss": 0.0, "distillation_loss": 0.46217092871665955, "epoch": 8.0, "learning_rate": 2.0768371893672834e-05, "loss": 0.3616, "step": 22140, "task_loss": 0.5198900103569031 }, { "compression_loss": 0.0, "distillation_loss": 0.3689584732055664, "epoch": 8.01, "learning_rate": 2.0703578574725962e-05, "loss": 0.3295, "step": 22150, "task_loss": 0.5603437423706055 }, { "compression_loss": 0.0, "distillation_loss": 0.3704279661178589, "epoch": 8.01, "learning_rate": 2.0638833191254032e-05, "loss": 0.3244, "step": 22160, "task_loss": 0.4228596091270447 }, { "compression_loss": 0.0, "distillation_loss": 0.4226457476615906, "epoch": 8.01, "learning_rate": 2.0574136077106028e-05, "loss": 0.3405, "step": 22170, "task_loss": 0.663974404335022 }, { "compression_loss": 0.0, "distillation_loss": 0.2713632583618164, "epoch": 8.02, "learning_rate": 2.0509487565882006e-05, "loss": 0.3215, "step": 22180, "task_loss": 0.1982375979423523 }, { "compression_loss": 0.0, "distillation_loss": 0.18484729528427124, "epoch": 8.02, "learning_rate": 2.0444887990931454e-05, "loss": 0.3104, "step": 22190, "task_loss": 0.37546926736831665 }, { "compression_loss": 0.0, "distillation_loss": 0.2333574742078781, "epoch": 8.02, "learning_rate": 2.0386790489299768e-05, "loss": 0.326, "step": 22200, "task_loss": 0.40536680817604065 }, { "compression_loss": 0.0, "distillation_loss": 0.3073742091655731, "epoch": 8.03, "learning_rate": 2.0322284810741438e-05, "loss": 0.3456, "step": 22210, "task_loss": 0.6669155359268188 }, { "compression_loss": 0.0, "distillation_loss": 0.31559669971466064, "epoch": 8.03, "learning_rate": 2.0257829033736913e-05, "loss": 0.3496, "step": 22220, "task_loss": 0.4252294898033142 }, { "compression_loss": 0.0, "distillation_loss": 0.3236352205276489, "epoch": 8.03, "learning_rate": 2.0193423490641865e-05, "loss": 0.346, "step": 22230, "task_loss": 0.31383252143859863 }, { "compression_loss": 0.0, "distillation_loss": 0.28318703174591064, "epoch": 8.04, "learning_rate": 2.012906851355292e-05, "loss": 0.3354, "step": 22240, "task_loss": 0.45789241790771484 }, { "compression_loss": 0.0, "distillation_loss": 0.2784233093261719, "epoch": 8.04, "learning_rate": 2.0064764434306e-05, "loss": 0.3764, "step": 22250, "task_loss": 0.5130239725112915 }, { "epoch": 8.04, "eval_exact_match": 82.99905392620624, "eval_f1": 89.85647992545962, "step": 22250 }, { "compression_loss": 0.0, "distillation_loss": 0.35418766736984253, "epoch": 8.04, "learning_rate": 2.0000511584474526e-05, "loss": 0.3849, "step": 22260, "task_loss": 0.6872138977050781 }, { "compression_loss": 0.0, "distillation_loss": 0.3738144338130951, "epoch": 8.05, "learning_rate": 1.9936310295367844e-05, "loss": 0.3544, "step": 22270, "task_loss": 1.0411014556884766 }, { "compression_loss": 0.0, "distillation_loss": 0.3013811707496643, "epoch": 8.05, "learning_rate": 1.9872160898029345e-05, "loss": 0.3503, "step": 22280, "task_loss": 1.0129387378692627 }, { "compression_loss": 0.0, "distillation_loss": 0.37890174984931946, "epoch": 8.06, "learning_rate": 1.9808063723234918e-05, "loss": 0.3773, "step": 22290, "task_loss": 0.4624231457710266 }, { "compression_loss": 0.0, "distillation_loss": 0.3464246988296509, "epoch": 8.06, "learning_rate": 1.9744019101491147e-05, "loss": 0.3466, "step": 22300, "task_loss": 0.8617278337478638 }, { "compression_loss": 0.0, "distillation_loss": 0.3005381226539612, "epoch": 8.06, "learning_rate": 1.968002736303364e-05, "loss": 0.3255, "step": 22310, "task_loss": 0.4841776490211487 }, { "compression_loss": 0.0, "distillation_loss": 0.38545113801956177, "epoch": 8.07, "learning_rate": 1.9616088837825307e-05, "loss": 0.3392, "step": 22320, "task_loss": 0.839179277420044 }, { "compression_loss": 0.0, "distillation_loss": 0.2746634781360626, "epoch": 8.07, "learning_rate": 1.9552203855554702e-05, "loss": 0.3239, "step": 22330, "task_loss": 0.5544817447662354 }, { "compression_loss": 0.0, "distillation_loss": 0.3266233503818512, "epoch": 8.07, "learning_rate": 1.948837274563426e-05, "loss": 0.3442, "step": 22340, "task_loss": 0.3120766282081604 }, { "compression_loss": 0.0, "distillation_loss": 0.2832903563976288, "epoch": 8.08, "learning_rate": 1.9424595837198654e-05, "loss": 0.3225, "step": 22350, "task_loss": 0.5503286719322205 }, { "compression_loss": 0.0, "distillation_loss": 0.2415936142206192, "epoch": 8.08, "learning_rate": 1.936087345910306e-05, "loss": 0.3388, "step": 22360, "task_loss": 0.4857402443885803 }, { "compression_loss": 0.0, "distillation_loss": 0.40143972635269165, "epoch": 8.08, "learning_rate": 1.929720593992152e-05, "loss": 0.332, "step": 22370, "task_loss": 0.700149655342102 }, { "compression_loss": 0.0, "distillation_loss": 0.3912586569786072, "epoch": 8.09, "learning_rate": 1.9233593607945127e-05, "loss": 0.3277, "step": 22380, "task_loss": 0.6662502288818359 }, { "compression_loss": 0.0, "distillation_loss": 0.24969267845153809, "epoch": 8.09, "learning_rate": 1.917003679118049e-05, "loss": 0.3495, "step": 22390, "task_loss": 0.33860787749290466 }, { "compression_loss": 0.0, "distillation_loss": 0.4209668040275574, "epoch": 8.1, "learning_rate": 1.9106535817347912e-05, "loss": 0.3384, "step": 22400, "task_loss": 0.3553839325904846 }, { "compression_loss": 0.0, "distillation_loss": 0.2749325633049011, "epoch": 8.1, "learning_rate": 1.9043091013879773e-05, "loss": 0.3445, "step": 22410, "task_loss": 0.48762959241867065 }, { "compression_loss": 0.0, "distillation_loss": 0.3994620144367218, "epoch": 8.1, "learning_rate": 1.897970270791881e-05, "loss": 0.3522, "step": 22420, "task_loss": 0.5795477628707886 }, { "compression_loss": 0.0, "distillation_loss": 0.3599868714809418, "epoch": 8.11, "learning_rate": 1.891637122631645e-05, "loss": 0.3621, "step": 22430, "task_loss": 0.8509477376937866 }, { "compression_loss": 0.0, "distillation_loss": 0.29197603464126587, "epoch": 8.11, "learning_rate": 1.88530968956311e-05, "loss": 0.3364, "step": 22440, "task_loss": 0.39719563722610474 }, { "compression_loss": 0.0, "distillation_loss": 0.24607793986797333, "epoch": 8.11, "learning_rate": 1.8789880042126502e-05, "loss": 0.3425, "step": 22450, "task_loss": 0.2874683737754822 }, { "compression_loss": 0.0, "distillation_loss": 0.3210759162902832, "epoch": 8.12, "learning_rate": 1.8726720991769983e-05, "loss": 0.3485, "step": 22460, "task_loss": 0.6553330421447754 }, { "compression_loss": 0.0, "distillation_loss": 0.34106212854385376, "epoch": 8.12, "learning_rate": 1.8663620070230873e-05, "loss": 0.3472, "step": 22470, "task_loss": 0.47652125358581543 }, { "compression_loss": 0.0, "distillation_loss": 0.35027986764907837, "epoch": 8.12, "learning_rate": 1.8600577602878722e-05, "loss": 0.3235, "step": 22480, "task_loss": 0.4614375829696655 }, { "compression_loss": 0.0, "distillation_loss": 0.36589890718460083, "epoch": 8.13, "learning_rate": 1.8537593914781706e-05, "loss": 0.3435, "step": 22490, "task_loss": 0.7123420834541321 }, { "compression_loss": 0.0, "distillation_loss": 0.38203728199005127, "epoch": 8.13, "learning_rate": 1.847466933070489e-05, "loss": 0.3821, "step": 22500, "task_loss": 0.4685204327106476 }, { "epoch": 8.13, "eval_exact_match": 82.86660359508042, "eval_f1": 89.73513596202447, "step": 22500 }, { "compression_loss": 0.0, "distillation_loss": 0.373041033744812, "epoch": 8.14, "learning_rate": 1.8411804175108595e-05, "loss": 0.3285, "step": 22510, "task_loss": 0.7189540863037109 }, { "compression_loss": 0.0, "distillation_loss": 0.26004570722579956, "epoch": 8.14, "learning_rate": 1.8348998772146698e-05, "loss": 0.338, "step": 22520, "task_loss": 0.36693066358566284 }, { "compression_loss": 0.0, "distillation_loss": 0.34126996994018555, "epoch": 8.14, "learning_rate": 1.828625344566498e-05, "loss": 0.3419, "step": 22530, "task_loss": 0.47086426615715027 }, { "compression_loss": 0.0, "distillation_loss": 0.3550693988800049, "epoch": 8.15, "learning_rate": 1.822356851919943e-05, "loss": 0.3521, "step": 22540, "task_loss": 0.7392463684082031 }, { "compression_loss": 0.0, "distillation_loss": 0.3915178179740906, "epoch": 8.15, "learning_rate": 1.816094431597464e-05, "loss": 0.363, "step": 22550, "task_loss": 0.5709539651870728 }, { "compression_loss": 0.0, "distillation_loss": 0.41465628147125244, "epoch": 8.15, "learning_rate": 1.8098381158902023e-05, "loss": 0.3423, "step": 22560, "task_loss": 0.49347808957099915 }, { "compression_loss": 0.0, "distillation_loss": 0.2688755393028259, "epoch": 8.16, "learning_rate": 1.803587937057828e-05, "loss": 0.3412, "step": 22570, "task_loss": 0.5061269402503967 }, { "compression_loss": 0.0, "distillation_loss": 0.3253447115421295, "epoch": 8.16, "learning_rate": 1.7973439273283633e-05, "loss": 0.3537, "step": 22580, "task_loss": 0.4826359748840332 }, { "compression_loss": 0.0, "distillation_loss": 0.4960862994194031, "epoch": 8.16, "learning_rate": 1.791106118898024e-05, "loss": 0.3537, "step": 22590, "task_loss": 0.66794753074646 }, { "compression_loss": 0.0, "distillation_loss": 0.308212548494339, "epoch": 8.17, "learning_rate": 1.7848745439310454e-05, "loss": 0.3229, "step": 22600, "task_loss": 0.4701980650424957 }, { "compression_loss": 0.0, "distillation_loss": 0.3355109393596649, "epoch": 8.17, "learning_rate": 1.7786492345595258e-05, "loss": 0.31, "step": 22610, "task_loss": 0.3176344037055969 }, { "compression_loss": 0.0, "distillation_loss": 0.37522727251052856, "epoch": 8.17, "learning_rate": 1.7724302228832518e-05, "loss": 0.3228, "step": 22620, "task_loss": 0.7154313325881958 }, { "compression_loss": 0.0, "distillation_loss": 0.30252838134765625, "epoch": 8.18, "learning_rate": 1.7662175409695403e-05, "loss": 0.3255, "step": 22630, "task_loss": 0.3516947627067566 }, { "compression_loss": 0.0, "distillation_loss": 0.2813971936702728, "epoch": 8.18, "learning_rate": 1.760011220853067e-05, "loss": 0.3469, "step": 22640, "task_loss": 0.3032105267047882 }, { "compression_loss": 0.0, "distillation_loss": 0.3111063241958618, "epoch": 8.19, "learning_rate": 1.7538112945357074e-05, "loss": 0.3466, "step": 22650, "task_loss": 0.6328559517860413 }, { "compression_loss": 0.0, "distillation_loss": 0.3012014627456665, "epoch": 8.19, "learning_rate": 1.747617793986364e-05, "loss": 0.3383, "step": 22660, "task_loss": 0.7136552333831787 }, { "compression_loss": 0.0, "distillation_loss": 0.4168001413345337, "epoch": 8.19, "learning_rate": 1.7414307511408105e-05, "loss": 0.33, "step": 22670, "task_loss": 0.7946159839630127 }, { "compression_loss": 0.0, "distillation_loss": 0.3220928907394409, "epoch": 8.2, "learning_rate": 1.7352501979015185e-05, "loss": 0.3734, "step": 22680, "task_loss": 0.6289501786231995 }, { "compression_loss": 0.0, "distillation_loss": 0.3598100543022156, "epoch": 8.2, "learning_rate": 1.7290761661374998e-05, "loss": 0.3356, "step": 22690, "task_loss": 0.5484864711761475 }, { "compression_loss": 0.0, "distillation_loss": 0.39641427993774414, "epoch": 8.2, "learning_rate": 1.722908687684138e-05, "loss": 0.3569, "step": 22700, "task_loss": 0.34807297587394714 }, { "compression_loss": 0.0, "distillation_loss": 0.3675978183746338, "epoch": 8.21, "learning_rate": 1.7167477943430275e-05, "loss": 0.3319, "step": 22710, "task_loss": 0.27945399284362793 }, { "compression_loss": 0.0, "distillation_loss": 0.37253180146217346, "epoch": 8.21, "learning_rate": 1.710593517881802e-05, "loss": 0.3363, "step": 22720, "task_loss": 0.5771284103393555 }, { "compression_loss": 0.0, "distillation_loss": 0.46653449535369873, "epoch": 8.21, "learning_rate": 1.7044458900339855e-05, "loss": 0.3938, "step": 22730, "task_loss": 0.6149200201034546 }, { "compression_loss": 0.0, "distillation_loss": 0.32485833764076233, "epoch": 8.22, "learning_rate": 1.6983049424988106e-05, "loss": 0.3088, "step": 22740, "task_loss": 0.6905471086502075 }, { "compression_loss": 0.0, "distillation_loss": 0.32625168561935425, "epoch": 8.22, "learning_rate": 1.6921707069410698e-05, "loss": 0.3635, "step": 22750, "task_loss": 0.6236306428909302 }, { "epoch": 8.22, "eval_exact_match": 82.82876064333018, "eval_f1": 89.90657086280171, "step": 22750 }, { "compression_loss": 0.0, "distillation_loss": 0.36564576625823975, "epoch": 8.23, "learning_rate": 1.686043214990943e-05, "loss": 0.3627, "step": 22760, "task_loss": 0.611585259437561 }, { "compression_loss": 0.0, "distillation_loss": 0.323543906211853, "epoch": 8.23, "learning_rate": 1.6799224982438413e-05, "loss": 0.3304, "step": 22770, "task_loss": 0.7676356434822083 }, { "compression_loss": 0.0, "distillation_loss": 0.29762110114097595, "epoch": 8.23, "learning_rate": 1.6738085882602358e-05, "loss": 0.3379, "step": 22780, "task_loss": 0.8042917251586914 }, { "compression_loss": 0.0, "distillation_loss": 0.3024720549583435, "epoch": 8.24, "learning_rate": 1.6677015165655034e-05, "loss": 0.3291, "step": 22790, "task_loss": 0.48665961623191833 }, { "compression_loss": 0.0, "distillation_loss": 0.4022981524467468, "epoch": 8.24, "learning_rate": 1.6616013146497598e-05, "loss": 0.3783, "step": 22800, "task_loss": 0.40880414843559265 }, { "compression_loss": 0.0, "distillation_loss": 0.4667377173900604, "epoch": 8.24, "learning_rate": 1.6555080139676972e-05, "loss": 0.3679, "step": 22810, "task_loss": 0.6365650296211243 }, { "compression_loss": 0.0, "distillation_loss": 0.3593342900276184, "epoch": 8.25, "learning_rate": 1.6494216459384225e-05, "loss": 0.3606, "step": 22820, "task_loss": 0.582703709602356 }, { "compression_loss": 0.0, "distillation_loss": 0.31626957654953003, "epoch": 8.25, "learning_rate": 1.6433422419452973e-05, "loss": 0.3391, "step": 22830, "task_loss": 0.41589945554733276 }, { "compression_loss": 0.0, "distillation_loss": 0.2823561429977417, "epoch": 8.25, "learning_rate": 1.6372698333357705e-05, "loss": 0.3464, "step": 22840, "task_loss": 0.6627315282821655 }, { "compression_loss": 0.0, "distillation_loss": 0.3531639873981476, "epoch": 8.26, "learning_rate": 1.6312044514212245e-05, "loss": 0.3758, "step": 22850, "task_loss": 0.809797465801239 }, { "compression_loss": 0.0, "distillation_loss": 0.3731263279914856, "epoch": 8.26, "learning_rate": 1.625146127476807e-05, "loss": 0.3786, "step": 22860, "task_loss": 0.7332509160041809 }, { "compression_loss": 0.0, "distillation_loss": 0.2990288734436035, "epoch": 8.27, "learning_rate": 1.6190948927412758e-05, "loss": 0.3298, "step": 22870, "task_loss": 0.38085365295410156 }, { "compression_loss": 0.0, "distillation_loss": 0.2953346371650696, "epoch": 8.27, "learning_rate": 1.613050778416828e-05, "loss": 0.3321, "step": 22880, "task_loss": 0.7593106031417847 }, { "compression_loss": 0.0, "distillation_loss": 0.30572813749313354, "epoch": 8.27, "learning_rate": 1.607013815668954e-05, "loss": 0.3467, "step": 22890, "task_loss": 0.40899914503097534 }, { "compression_loss": 0.0, "distillation_loss": 0.3036983907222748, "epoch": 8.28, "learning_rate": 1.6009840356262603e-05, "loss": 0.3755, "step": 22900, "task_loss": 0.4268397092819214 }, { "compression_loss": 0.0, "distillation_loss": 0.37761154770851135, "epoch": 8.28, "learning_rate": 1.594961469380322e-05, "loss": 0.3332, "step": 22910, "task_loss": 0.41044527292251587 }, { "compression_loss": 0.0, "distillation_loss": 0.2791655659675598, "epoch": 8.28, "learning_rate": 1.5889461479855153e-05, "loss": 0.3374, "step": 22920, "task_loss": 0.546173095703125 }, { "compression_loss": 0.0, "distillation_loss": 0.30385681986808777, "epoch": 8.29, "learning_rate": 1.5829381024588614e-05, "loss": 0.3145, "step": 22930, "task_loss": 0.5504984855651855 }, { "compression_loss": 0.0, "distillation_loss": 0.31653857231140137, "epoch": 8.29, "learning_rate": 1.576937363779861e-05, "loss": 0.3167, "step": 22940, "task_loss": 0.4941592812538147 }, { "compression_loss": 0.0, "distillation_loss": 0.3911159634590149, "epoch": 8.29, "learning_rate": 1.570943962890342e-05, "loss": 0.3725, "step": 22950, "task_loss": 0.6922523975372314 }, { "compression_loss": 0.0, "distillation_loss": 0.3799233138561249, "epoch": 8.3, "learning_rate": 1.5649579306942943e-05, "loss": 0.3422, "step": 22960, "task_loss": 0.794413685798645 }, { "compression_loss": 0.0, "distillation_loss": 0.33418601751327515, "epoch": 8.3, "learning_rate": 1.558979298057715e-05, "loss": 0.3407, "step": 22970, "task_loss": 0.6918811798095703 }, { "compression_loss": 0.0, "distillation_loss": 0.48559606075286865, "epoch": 8.31, "learning_rate": 1.55300809580844e-05, "loss": 0.3429, "step": 22980, "task_loss": 0.6242055892944336 }, { "compression_loss": 0.0, "distillation_loss": 0.3377108573913574, "epoch": 8.31, "learning_rate": 1.5470443547360003e-05, "loss": 0.3321, "step": 22990, "task_loss": 0.6736431121826172 }, { "compression_loss": 0.0, "distillation_loss": 0.2609238624572754, "epoch": 8.31, "learning_rate": 1.5410881055914478e-05, "loss": 0.3416, "step": 23000, "task_loss": 0.43551382422447205 }, { "epoch": 8.31, "eval_exact_match": 83.0558183538316, "eval_f1": 89.9465346529477, "step": 23000 }, { "compression_loss": 0.0, "distillation_loss": 0.31109219789505005, "epoch": 8.32, "learning_rate": 1.5351393790872065e-05, "loss": 0.342, "step": 23010, "task_loss": 0.5256096124649048 }, { "compression_loss": 0.0, "distillation_loss": 0.33370792865753174, "epoch": 8.32, "learning_rate": 1.529198205896911e-05, "loss": 0.3416, "step": 23020, "task_loss": 0.42889517545700073 }, { "compression_loss": 0.0, "distillation_loss": 0.323059618473053, "epoch": 8.32, "learning_rate": 1.5232646166552486e-05, "loss": 0.3426, "step": 23030, "task_loss": 0.44258755445480347 }, { "compression_loss": 0.0, "distillation_loss": 0.36747613549232483, "epoch": 8.33, "learning_rate": 1.517338641957799e-05, "loss": 0.373, "step": 23040, "task_loss": 0.6945732235908508 }, { "compression_loss": 0.0, "distillation_loss": 0.3251588046550751, "epoch": 8.33, "learning_rate": 1.5114203123608817e-05, "loss": 0.3609, "step": 23050, "task_loss": 0.5549402236938477 }, { "compression_loss": 0.0, "distillation_loss": 0.29842013120651245, "epoch": 8.33, "learning_rate": 1.5055096583813937e-05, "loss": 0.3394, "step": 23060, "task_loss": 0.3050425946712494 }, { "compression_loss": 0.0, "distillation_loss": 0.3361862599849701, "epoch": 8.34, "learning_rate": 1.499606710496656e-05, "loss": 0.3192, "step": 23070, "task_loss": 1.0102906227111816 }, { "compression_loss": 0.0, "distillation_loss": 0.3161877989768982, "epoch": 8.34, "learning_rate": 1.4937114991442491e-05, "loss": 0.3482, "step": 23080, "task_loss": 0.4595767557621002 }, { "compression_loss": 0.0, "distillation_loss": 0.39450502395629883, "epoch": 8.34, "learning_rate": 1.48782405472187e-05, "loss": 0.3569, "step": 23090, "task_loss": 0.48541557788848877 }, { "compression_loss": 0.0, "distillation_loss": 0.4245753586292267, "epoch": 8.35, "learning_rate": 1.4819444075871585e-05, "loss": 0.3323, "step": 23100, "task_loss": 0.6281993389129639 }, { "compression_loss": 0.0, "distillation_loss": 0.29610657691955566, "epoch": 8.35, "learning_rate": 1.4760725880575534e-05, "loss": 0.3332, "step": 23110, "task_loss": 0.373093843460083 }, { "compression_loss": 0.0, "distillation_loss": 0.353634774684906, "epoch": 8.36, "learning_rate": 1.4702086264101305e-05, "loss": 0.3962, "step": 23120, "task_loss": 0.8853891491889954 }, { "compression_loss": 0.0, "distillation_loss": 0.29741644859313965, "epoch": 8.36, "learning_rate": 1.4643525528814492e-05, "loss": 0.2991, "step": 23130, "task_loss": 0.7390473484992981 }, { "compression_loss": 0.0, "distillation_loss": 0.37111955881118774, "epoch": 8.36, "learning_rate": 1.4585043976673916e-05, "loss": 0.334, "step": 23140, "task_loss": 0.7053943872451782 }, { "compression_loss": 0.0, "distillation_loss": 0.3489043712615967, "epoch": 8.37, "learning_rate": 1.4526641909230136e-05, "loss": 0.3145, "step": 23150, "task_loss": 0.5620697736740112 }, { "compression_loss": 0.0, "distillation_loss": 0.35167962312698364, "epoch": 8.37, "learning_rate": 1.446831962762385e-05, "loss": 0.3207, "step": 23160, "task_loss": 0.5666888356208801 }, { "compression_loss": 0.0, "distillation_loss": 0.3380488157272339, "epoch": 8.37, "learning_rate": 1.4410077432584366e-05, "loss": 0.3348, "step": 23170, "task_loss": 0.8974223732948303 }, { "compression_loss": 0.0, "distillation_loss": 0.29322537779808044, "epoch": 8.38, "learning_rate": 1.435191562442799e-05, "loss": 0.3445, "step": 23180, "task_loss": 0.5745025277137756 }, { "compression_loss": 0.0, "distillation_loss": 0.31768181920051575, "epoch": 8.38, "learning_rate": 1.4293834503056611e-05, "loss": 0.3563, "step": 23190, "task_loss": 0.39405155181884766 }, { "compression_loss": 0.0, "distillation_loss": 0.41235482692718506, "epoch": 8.38, "learning_rate": 1.4235834367955988e-05, "loss": 0.3429, "step": 23200, "task_loss": 1.0112557411193848 }, { "compression_loss": 0.0, "distillation_loss": 0.27087026834487915, "epoch": 8.39, "learning_rate": 1.417791551819433e-05, "loss": 0.3267, "step": 23210, "task_loss": 0.3743332028388977 }, { "compression_loss": 0.0, "distillation_loss": 0.49186843633651733, "epoch": 8.39, "learning_rate": 1.4120078252420704e-05, "loss": 0.3509, "step": 23220, "task_loss": 0.6274521350860596 }, { "compression_loss": 0.0, "distillation_loss": 0.3783082962036133, "epoch": 8.4, "learning_rate": 1.4062322868863515e-05, "loss": 0.3594, "step": 23230, "task_loss": 0.38089364767074585 }, { "compression_loss": 0.0, "distillation_loss": 0.21668830513954163, "epoch": 8.4, "learning_rate": 1.4004649665328914e-05, "loss": 0.316, "step": 23240, "task_loss": 0.3113134503364563 }, { "compression_loss": 0.0, "distillation_loss": 0.32121360301971436, "epoch": 8.4, "learning_rate": 1.3947058939199343e-05, "loss": 0.3575, "step": 23250, "task_loss": 0.3753986656665802 }, { "epoch": 8.4, "eval_exact_match": 83.12204351939451, "eval_f1": 89.97779373367567, "step": 23250 }, { "compression_loss": 0.0, "distillation_loss": 0.32461658120155334, "epoch": 8.41, "learning_rate": 1.388955098743196e-05, "loss": 0.3335, "step": 23260, "task_loss": 0.4626964330673218 }, { "compression_loss": 0.0, "distillation_loss": 0.31419169902801514, "epoch": 8.41, "learning_rate": 1.3832126106557103e-05, "loss": 0.3385, "step": 23270, "task_loss": 0.5672830939292908 }, { "compression_loss": 0.0, "distillation_loss": 0.28030550479888916, "epoch": 8.41, "learning_rate": 1.377478459267674e-05, "loss": 0.3333, "step": 23280, "task_loss": 0.6520091891288757 }, { "compression_loss": 0.0, "distillation_loss": 0.3894043564796448, "epoch": 8.42, "learning_rate": 1.3717526741463045e-05, "loss": 0.3725, "step": 23290, "task_loss": 0.9674755334854126 }, { "compression_loss": 0.0, "distillation_loss": 0.3570999801158905, "epoch": 8.42, "learning_rate": 1.3660352848156717e-05, "loss": 0.3313, "step": 23300, "task_loss": 0.33604758977890015 }, { "compression_loss": 0.0, "distillation_loss": 0.3844010829925537, "epoch": 8.42, "learning_rate": 1.3603263207565584e-05, "loss": 0.3143, "step": 23310, "task_loss": 0.5411736369132996 }, { "compression_loss": 0.0, "distillation_loss": 0.2638304829597473, "epoch": 8.43, "learning_rate": 1.3546258114063033e-05, "loss": 0.3217, "step": 23320, "task_loss": 0.6530275344848633 }, { "compression_loss": 0.0, "distillation_loss": 0.3618456721305847, "epoch": 8.43, "learning_rate": 1.3489337861586507e-05, "loss": 0.341, "step": 23330, "task_loss": 0.48471835255622864 }, { "compression_loss": 0.0, "distillation_loss": 0.4442026615142822, "epoch": 8.44, "learning_rate": 1.3432502743635948e-05, "loss": 0.3354, "step": 23340, "task_loss": 0.5538107752799988 }, { "compression_loss": 0.0, "distillation_loss": 0.28205060958862305, "epoch": 8.44, "learning_rate": 1.3375753053272343e-05, "loss": 0.3587, "step": 23350, "task_loss": 0.4763302206993103 }, { "compression_loss": 0.0, "distillation_loss": 0.3960496783256531, "epoch": 8.44, "learning_rate": 1.3319089083116176e-05, "loss": 0.3454, "step": 23360, "task_loss": 0.461125910282135 }, { "compression_loss": 0.0, "distillation_loss": 0.2497769594192505, "epoch": 8.45, "learning_rate": 1.326251112534595e-05, "loss": 0.353, "step": 23370, "task_loss": 0.6359678506851196 }, { "compression_loss": 0.0, "distillation_loss": 0.4250258803367615, "epoch": 8.45, "learning_rate": 1.32060194716966e-05, "loss": 0.3319, "step": 23380, "task_loss": 0.5756826400756836 }, { "compression_loss": 0.0, "distillation_loss": 0.4019028842449188, "epoch": 8.45, "learning_rate": 1.3149614413458139e-05, "loss": 0.3462, "step": 23390, "task_loss": 0.9020615220069885 }, { "compression_loss": 0.0, "distillation_loss": 0.3332751989364624, "epoch": 8.46, "learning_rate": 1.3093296241473975e-05, "loss": 0.3435, "step": 23400, "task_loss": 0.681817889213562 }, { "compression_loss": 0.0, "distillation_loss": 0.2737824618816376, "epoch": 8.46, "learning_rate": 1.3037065246139558e-05, "loss": 0.3495, "step": 23410, "task_loss": 0.40974706411361694 }, { "compression_loss": 0.0, "distillation_loss": 0.3022178113460541, "epoch": 8.46, "learning_rate": 1.2980921717400803e-05, "loss": 0.3158, "step": 23420, "task_loss": 0.33448854088783264 }, { "compression_loss": 0.0, "distillation_loss": 0.31435340642929077, "epoch": 8.47, "learning_rate": 1.292486594475264e-05, "loss": 0.3092, "step": 23430, "task_loss": 0.4791732430458069 }, { "compression_loss": 0.0, "distillation_loss": 0.311881422996521, "epoch": 8.47, "learning_rate": 1.2868898217237457e-05, "loss": 0.3531, "step": 23440, "task_loss": 0.34755033254623413 }, { "compression_loss": 0.0, "distillation_loss": 0.28101277351379395, "epoch": 8.47, "learning_rate": 1.2813018823443685e-05, "loss": 0.3431, "step": 23450, "task_loss": 0.3975358009338379 }, { "compression_loss": 0.0, "distillation_loss": 0.36834776401519775, "epoch": 8.48, "learning_rate": 1.2757228051504276e-05, "loss": 0.3747, "step": 23460, "task_loss": 0.6111078262329102 }, { "compression_loss": 0.0, "distillation_loss": 0.32710838317871094, "epoch": 8.48, "learning_rate": 1.2701526189095216e-05, "loss": 0.3374, "step": 23470, "task_loss": 0.6565884351730347 }, { "compression_loss": 0.0, "distillation_loss": 0.27171874046325684, "epoch": 8.49, "learning_rate": 1.2645913523434012e-05, "loss": 0.3174, "step": 23480, "task_loss": 0.7133381962776184 }, { "compression_loss": 0.0, "distillation_loss": 0.28032228350639343, "epoch": 8.49, "learning_rate": 1.2590390341278302e-05, "loss": 0.3107, "step": 23490, "task_loss": 0.2568185031414032 }, { "compression_loss": 0.0, "distillation_loss": 0.33454135060310364, "epoch": 8.49, "learning_rate": 1.2534956928924263e-05, "loss": 0.3542, "step": 23500, "task_loss": 0.3434966206550598 }, { "epoch": 8.49, "eval_exact_match": 83.09366130558183, "eval_f1": 89.88354014153792, "step": 23500 }, { "compression_loss": 0.0, "distillation_loss": 0.2474634200334549, "epoch": 8.5, "learning_rate": 1.2479613572205212e-05, "loss": 0.3277, "step": 23510, "task_loss": 0.43857425451278687 }, { "compression_loss": 0.0, "distillation_loss": 0.36067289113998413, "epoch": 8.5, "learning_rate": 1.2424360556490109e-05, "loss": 0.3517, "step": 23520, "task_loss": 0.5712844133377075 }, { "compression_loss": 0.0, "distillation_loss": 0.2622072398662567, "epoch": 8.5, "learning_rate": 1.2369198166682094e-05, "loss": 0.321, "step": 23530, "task_loss": 0.4479938745498657 }, { "compression_loss": 0.0, "distillation_loss": 0.24256464838981628, "epoch": 8.51, "learning_rate": 1.2314126687216974e-05, "loss": 0.3231, "step": 23540, "task_loss": 0.3161258101463318 }, { "compression_loss": 0.0, "distillation_loss": 0.24894395470619202, "epoch": 8.51, "learning_rate": 1.2259146402061829e-05, "loss": 0.3464, "step": 23550, "task_loss": 0.3210480809211731 }, { "compression_loss": 0.0, "distillation_loss": 0.3175157904624939, "epoch": 8.51, "learning_rate": 1.2204257594713501e-05, "loss": 0.3401, "step": 23560, "task_loss": 0.35496532917022705 }, { "compression_loss": 0.0, "distillation_loss": 0.2891998589038849, "epoch": 8.52, "learning_rate": 1.2149460548197143e-05, "loss": 0.3373, "step": 23570, "task_loss": 0.4902755618095398 }, { "compression_loss": 0.0, "distillation_loss": 0.38131245970726013, "epoch": 8.52, "learning_rate": 1.2094755545064719e-05, "loss": 0.3375, "step": 23580, "task_loss": 0.6391329765319824 }, { "compression_loss": 0.0, "distillation_loss": 0.3201846480369568, "epoch": 8.53, "learning_rate": 1.2040142867393671e-05, "loss": 0.3589, "step": 23590, "task_loss": 0.46474406123161316 }, { "compression_loss": 0.0, "distillation_loss": 0.32470813393592834, "epoch": 8.53, "learning_rate": 1.1985622796785295e-05, "loss": 0.3063, "step": 23600, "task_loss": 0.8694692850112915 }, { "compression_loss": 0.0, "distillation_loss": 0.3770723044872284, "epoch": 8.53, "learning_rate": 1.1931195614363417e-05, "loss": 0.3198, "step": 23610, "task_loss": 0.585353434085846 }, { "compression_loss": 0.0, "distillation_loss": 0.3081551492214203, "epoch": 8.54, "learning_rate": 1.1876861600772893e-05, "loss": 0.3371, "step": 23620, "task_loss": 0.6682502627372742 }, { "compression_loss": 0.0, "distillation_loss": 0.41357192397117615, "epoch": 8.54, "learning_rate": 1.1822621036178173e-05, "loss": 0.359, "step": 23630, "task_loss": 0.5059230327606201 }, { "compression_loss": 0.0, "distillation_loss": 0.31322067975997925, "epoch": 8.54, "learning_rate": 1.1768474200261838e-05, "loss": 0.3108, "step": 23640, "task_loss": 0.473344087600708 }, { "compression_loss": 0.0, "distillation_loss": 0.2495288848876953, "epoch": 8.55, "learning_rate": 1.1714421372223179e-05, "loss": 0.3173, "step": 23650, "task_loss": 0.38251379132270813 }, { "compression_loss": 0.0, "distillation_loss": 0.21137635409832, "epoch": 8.55, "learning_rate": 1.1660462830776766e-05, "loss": 0.3165, "step": 23660, "task_loss": 0.6774376630783081 }, { "compression_loss": 0.0, "distillation_loss": 0.25261959433555603, "epoch": 8.55, "learning_rate": 1.1606598854150983e-05, "loss": 0.3302, "step": 23670, "task_loss": 0.6455027461051941 }, { "compression_loss": 0.0, "distillation_loss": 0.38040322065353394, "epoch": 8.56, "learning_rate": 1.1552829720086581e-05, "loss": 0.366, "step": 23680, "task_loss": 0.4775557518005371 }, { "compression_loss": 0.0, "distillation_loss": 0.3587028384208679, "epoch": 8.56, "learning_rate": 1.1499155705835338e-05, "loss": 0.3393, "step": 23690, "task_loss": 0.6567662358283997 }, { "compression_loss": 0.0, "distillation_loss": 0.4207734167575836, "epoch": 8.57, "learning_rate": 1.1445577088158486e-05, "loss": 0.347, "step": 23700, "task_loss": 0.6872299909591675 }, { "compression_loss": 0.0, "distillation_loss": 0.3458840250968933, "epoch": 8.57, "learning_rate": 1.1392094143325404e-05, "loss": 0.3251, "step": 23710, "task_loss": 0.5791058540344238 }, { "compression_loss": 0.0, "distillation_loss": 0.2893916368484497, "epoch": 8.57, "learning_rate": 1.1338707147112145e-05, "loss": 0.3497, "step": 23720, "task_loss": 0.3764035403728485 }, { "compression_loss": 0.0, "distillation_loss": 0.3328968584537506, "epoch": 8.58, "learning_rate": 1.1285416374800018e-05, "loss": 0.3678, "step": 23730, "task_loss": 0.46390044689178467 }, { "compression_loss": 0.0, "distillation_loss": 0.2898051142692566, "epoch": 8.58, "learning_rate": 1.1232222101174148e-05, "loss": 0.3194, "step": 23740, "task_loss": 0.8645293712615967 }, { "compression_loss": 0.0, "distillation_loss": 0.2928909659385681, "epoch": 8.58, "learning_rate": 1.1179124600522107e-05, "loss": 0.3093, "step": 23750, "task_loss": 0.472074955701828 }, { "epoch": 8.58, "eval_exact_match": 83.28287606433302, "eval_f1": 90.04093742477677, "step": 23750 }, { "compression_loss": 0.0, "distillation_loss": 0.3210963010787964, "epoch": 8.59, "learning_rate": 1.1126124146632464e-05, "loss": 0.3379, "step": 23760, "task_loss": 0.5641353130340576 }, { "compression_loss": 0.0, "distillation_loss": 0.32252565026283264, "epoch": 8.59, "learning_rate": 1.1073221012793393e-05, "loss": 0.3445, "step": 23770, "task_loss": 0.6479802131652832 }, { "compression_loss": 0.0, "distillation_loss": 0.290593683719635, "epoch": 8.59, "learning_rate": 1.102041547179121e-05, "loss": 0.3177, "step": 23780, "task_loss": 0.4076792597770691 }, { "compression_loss": 0.0, "distillation_loss": 0.30375468730926514, "epoch": 8.6, "learning_rate": 1.0967707795909077e-05, "loss": 0.3838, "step": 23790, "task_loss": 0.4609778821468353 }, { "compression_loss": 0.0, "distillation_loss": 0.2941964268684387, "epoch": 8.6, "learning_rate": 1.0915098256925474e-05, "loss": 0.3539, "step": 23800, "task_loss": 0.5235286355018616 }, { "compression_loss": 0.0, "distillation_loss": 0.33607012033462524, "epoch": 8.6, "learning_rate": 1.0862587126112873e-05, "loss": 0.3128, "step": 23810, "task_loss": 0.6910580396652222 }, { "compression_loss": 0.0, "distillation_loss": 0.321811318397522, "epoch": 8.61, "learning_rate": 1.0810174674236335e-05, "loss": 0.2864, "step": 23820, "task_loss": 0.5703722834587097 }, { "compression_loss": 0.0, "distillation_loss": 0.37454020977020264, "epoch": 8.61, "learning_rate": 1.0757861171552074e-05, "loss": 0.3266, "step": 23830, "task_loss": 0.5501848459243774 }, { "compression_loss": 0.0, "distillation_loss": 0.32955604791641235, "epoch": 8.62, "learning_rate": 1.0705646887806119e-05, "loss": 0.3291, "step": 23840, "task_loss": 0.6074403524398804 }, { "compression_loss": 0.0, "distillation_loss": 0.3324754238128662, "epoch": 8.62, "learning_rate": 1.0653532092232843e-05, "loss": 0.3458, "step": 23850, "task_loss": 0.34126532077789307 }, { "compression_loss": 0.0, "distillation_loss": 0.33597028255462646, "epoch": 8.62, "learning_rate": 1.0601517053553697e-05, "loss": 0.3562, "step": 23860, "task_loss": 0.7301780581474304 }, { "compression_loss": 0.0, "distillation_loss": 0.3327868580818176, "epoch": 8.63, "learning_rate": 1.0549602039975683e-05, "loss": 0.3614, "step": 23870, "task_loss": 0.520077645778656 }, { "compression_loss": 0.0, "distillation_loss": 0.2888182997703552, "epoch": 8.63, "learning_rate": 1.0497787319190076e-05, "loss": 0.3371, "step": 23880, "task_loss": 0.4426148235797882 }, { "compression_loss": 0.0, "distillation_loss": 0.25183236598968506, "epoch": 8.63, "learning_rate": 1.0446073158370996e-05, "loss": 0.3579, "step": 23890, "task_loss": 0.7299506664276123 }, { "compression_loss": 0.0, "distillation_loss": 0.29746419191360474, "epoch": 8.64, "learning_rate": 1.0394459824174065e-05, "loss": 0.3397, "step": 23900, "task_loss": 0.5756563544273376 }, { "compression_loss": 0.0, "distillation_loss": 0.3072645664215088, "epoch": 8.64, "learning_rate": 1.0342947582734962e-05, "loss": 0.3284, "step": 23910, "task_loss": 0.5769457817077637 }, { "compression_loss": 0.0, "distillation_loss": 0.35788294672966003, "epoch": 8.64, "learning_rate": 1.029153669966814e-05, "loss": 0.3455, "step": 23920, "task_loss": 0.6118526458740234 }, { "compression_loss": 0.0, "distillation_loss": 0.2843881845474243, "epoch": 8.65, "learning_rate": 1.02402274400654e-05, "loss": 0.34, "step": 23930, "task_loss": 0.4474141299724579 }, { "compression_loss": 0.0, "distillation_loss": 0.3228676915168762, "epoch": 8.65, "learning_rate": 1.0189020068494543e-05, "loss": 0.3477, "step": 23940, "task_loss": 0.43117979168891907 }, { "compression_loss": 0.0, "distillation_loss": 0.32196763157844543, "epoch": 8.66, "learning_rate": 1.0137914848997974e-05, "loss": 0.3313, "step": 23950, "task_loss": 0.8009005784988403 }, { "compression_loss": 0.0, "distillation_loss": 0.321696400642395, "epoch": 8.66, "learning_rate": 1.0086912045091422e-05, "loss": 0.3255, "step": 23960, "task_loss": 0.3024410009384155 }, { "compression_loss": 0.0, "distillation_loss": 0.34478169679641724, "epoch": 8.66, "learning_rate": 1.0036011919762471e-05, "loss": 0.3573, "step": 23970, "task_loss": 0.5820472836494446 }, { "compression_loss": 0.0, "distillation_loss": 0.30829232931137085, "epoch": 8.67, "learning_rate": 9.98521473546929e-06, "loss": 0.3552, "step": 23980, "task_loss": 0.7267586588859558 }, { "compression_loss": 0.0, "distillation_loss": 0.3902008533477783, "epoch": 8.67, "learning_rate": 9.93452075413924e-06, "loss": 0.3605, "step": 23990, "task_loss": 0.5337940454483032 }, { "compression_loss": 0.0, "distillation_loss": 0.3022475242614746, "epoch": 8.67, "learning_rate": 9.88393023716755e-06, "loss": 0.3119, "step": 24000, "task_loss": 0.2621081471443176 }, { "epoch": 8.67, "eval_exact_match": 83.20719016083254, "eval_f1": 89.99360400964093, "step": 24000 }, { "compression_loss": 0.0, "distillation_loss": 0.3147343397140503, "epoch": 8.68, "learning_rate": 9.833443445415917e-06, "loss": 0.3734, "step": 24010, "task_loss": 0.4403849244117737 }, { "compression_loss": 0.0, "distillation_loss": 0.3691180348396301, "epoch": 8.68, "learning_rate": 9.783060639211225e-06, "loss": 0.3333, "step": 24020, "task_loss": 0.7338327169418335 }, { "compression_loss": 0.0, "distillation_loss": 0.34679627418518066, "epoch": 8.68, "learning_rate": 9.732782078344166e-06, "loss": 0.3361, "step": 24030, "task_loss": 0.6814612746238708 }, { "compression_loss": 0.0, "distillation_loss": 0.24168024957180023, "epoch": 8.69, "learning_rate": 9.682608022067903e-06, "loss": 0.3084, "step": 24040, "task_loss": 0.3496478199958801 }, { "compression_loss": 0.0, "distillation_loss": 0.25266405940055847, "epoch": 8.69, "learning_rate": 9.632538729096749e-06, "loss": 0.3407, "step": 24050, "task_loss": 0.8575865030288696 }, { "compression_loss": 0.0, "distillation_loss": 0.2826876640319824, "epoch": 8.7, "learning_rate": 9.58257445760482e-06, "loss": 0.3108, "step": 24060, "task_loss": 0.5487475395202637 }, { "compression_loss": 0.0, "distillation_loss": 0.3644624948501587, "epoch": 8.7, "learning_rate": 9.532715465224678e-06, "loss": 0.3213, "step": 24070, "task_loss": 0.26308804750442505 }, { "compression_loss": 0.0, "distillation_loss": 0.37130007147789, "epoch": 8.7, "learning_rate": 9.482962009046073e-06, "loss": 0.3427, "step": 24080, "task_loss": 0.6274560689926147 }, { "compression_loss": 0.0, "distillation_loss": 0.34187111258506775, "epoch": 8.71, "learning_rate": 9.433314345614548e-06, "loss": 0.3268, "step": 24090, "task_loss": 0.6488524675369263 }, { "compression_loss": 0.0, "distillation_loss": 0.3473740220069885, "epoch": 8.71, "learning_rate": 9.383772730930163e-06, "loss": 0.3466, "step": 24100, "task_loss": 0.5115723609924316 }, { "compression_loss": 0.0, "distillation_loss": 0.34605729579925537, "epoch": 8.71, "learning_rate": 9.334337420446116e-06, "loss": 0.3619, "step": 24110, "task_loss": 0.5676177740097046 }, { "compression_loss": 0.0, "distillation_loss": 0.27803748846054077, "epoch": 8.72, "learning_rate": 9.285008669067524e-06, "loss": 0.3174, "step": 24120, "task_loss": 1.14910089969635 }, { "compression_loss": 0.0, "distillation_loss": 0.31176722049713135, "epoch": 8.72, "learning_rate": 9.23578673114999e-06, "loss": 0.3318, "step": 24130, "task_loss": 0.6742380857467651 }, { "compression_loss": 0.0, "distillation_loss": 0.412189245223999, "epoch": 8.72, "learning_rate": 9.18667186049839e-06, "loss": 0.3511, "step": 24140, "task_loss": 0.9542233943939209 }, { "compression_loss": 0.0, "distillation_loss": 0.30980244278907776, "epoch": 8.73, "learning_rate": 9.13766431036551e-06, "loss": 0.354, "step": 24150, "task_loss": 0.554745078086853 }, { "compression_loss": 0.0, "distillation_loss": 0.3011392056941986, "epoch": 8.73, "learning_rate": 9.088764333450769e-06, "loss": 0.3053, "step": 24160, "task_loss": 0.3188933730125427 }, { "compression_loss": 0.0, "distillation_loss": 0.31323549151420593, "epoch": 8.74, "learning_rate": 9.039972181898872e-06, "loss": 0.3359, "step": 24170, "task_loss": 0.7727498412132263 }, { "compression_loss": 0.0, "distillation_loss": 0.375557541847229, "epoch": 8.74, "learning_rate": 8.991288107298571e-06, "loss": 0.3463, "step": 24180, "task_loss": 0.512423574924469 }, { "compression_loss": 0.0, "distillation_loss": 0.33595043420791626, "epoch": 8.74, "learning_rate": 8.942712360681321e-06, "loss": 0.337, "step": 24190, "task_loss": 0.27191513776779175 }, { "compression_loss": 0.0, "distillation_loss": 0.33838656544685364, "epoch": 8.75, "learning_rate": 8.894245192520018e-06, "loss": 0.3529, "step": 24200, "task_loss": 0.5304921865463257 }, { "compression_loss": 0.0, "distillation_loss": 0.3112967610359192, "epoch": 8.75, "learning_rate": 8.845886852727651e-06, "loss": 0.3491, "step": 24210, "task_loss": 0.6719498634338379 }, { "compression_loss": 0.0, "distillation_loss": 0.24464872479438782, "epoch": 8.75, "learning_rate": 8.79763759065612e-06, "loss": 0.3141, "step": 24220, "task_loss": 0.7396454215049744 }, { "compression_loss": 0.0, "distillation_loss": 0.2790934443473816, "epoch": 8.76, "learning_rate": 8.749497655094812e-06, "loss": 0.3297, "step": 24230, "task_loss": 0.21234217286109924 }, { "compression_loss": 0.0, "distillation_loss": 0.2472725212574005, "epoch": 8.76, "learning_rate": 8.701467294269437e-06, "loss": 0.3184, "step": 24240, "task_loss": 0.30988842248916626 }, { "compression_loss": 0.0, "distillation_loss": 0.2908933162689209, "epoch": 8.76, "learning_rate": 8.653546755840686e-06, "loss": 0.3048, "step": 24250, "task_loss": 0.3451705873012543 }, { "epoch": 8.76, "eval_exact_match": 83.28287606433302, "eval_f1": 90.04894867144425, "step": 24250 }, { "compression_loss": 0.0, "distillation_loss": 0.3388180434703827, "epoch": 8.77, "learning_rate": 8.605736286902976e-06, "loss": 0.3563, "step": 24260, "task_loss": 0.44518616795539856 }, { "compression_loss": 0.0, "distillation_loss": 0.3412635624408722, "epoch": 8.77, "learning_rate": 8.558036133983143e-06, "loss": 0.3404, "step": 24270, "task_loss": 1.031734585762024 }, { "compression_loss": 0.0, "distillation_loss": 0.2612943649291992, "epoch": 8.77, "learning_rate": 8.510446543039228e-06, "loss": 0.3304, "step": 24280, "task_loss": 0.7055977582931519 }, { "compression_loss": 0.0, "distillation_loss": 0.28402096033096313, "epoch": 8.78, "learning_rate": 8.462967759459164e-06, "loss": 0.3342, "step": 24290, "task_loss": 0.5784477591514587 }, { "compression_loss": 0.0, "distillation_loss": 0.39517825841903687, "epoch": 8.78, "learning_rate": 8.415600028059531e-06, "loss": 0.3588, "step": 24300, "task_loss": 0.700530469417572 }, { "compression_loss": 0.0, "distillation_loss": 0.3836681544780731, "epoch": 8.79, "learning_rate": 8.368343593084255e-06, "loss": 0.3471, "step": 24310, "task_loss": 0.4598133862018585 }, { "compression_loss": 0.0, "distillation_loss": 0.30892038345336914, "epoch": 8.79, "learning_rate": 8.32119869820344e-06, "loss": 0.3385, "step": 24320, "task_loss": 0.5151838660240173 }, { "compression_loss": 0.0, "distillation_loss": 0.358475923538208, "epoch": 8.79, "learning_rate": 8.274165586511993e-06, "loss": 0.3616, "step": 24330, "task_loss": 0.7977138757705688 }, { "compression_loss": 0.0, "distillation_loss": 0.4102436900138855, "epoch": 8.8, "learning_rate": 8.227244500528464e-06, "loss": 0.386, "step": 24340, "task_loss": 0.9865103960037231 }, { "compression_loss": 0.0, "distillation_loss": 0.25304460525512695, "epoch": 8.8, "learning_rate": 8.180435682193744e-06, "loss": 0.3081, "step": 24350, "task_loss": 0.3072149455547333 }, { "compression_loss": 0.0, "distillation_loss": 0.2997356057167053, "epoch": 8.8, "learning_rate": 8.133739372869856e-06, "loss": 0.3325, "step": 24360, "task_loss": 0.4807734489440918 }, { "compression_loss": 0.0, "distillation_loss": 0.30522841215133667, "epoch": 8.81, "learning_rate": 8.08715581333865e-06, "loss": 0.3412, "step": 24370, "task_loss": 0.40793800354003906 }, { "compression_loss": 0.0, "distillation_loss": 0.3938402235507965, "epoch": 8.81, "learning_rate": 8.040685243800636e-06, "loss": 0.334, "step": 24380, "task_loss": 0.4832313358783722 }, { "compression_loss": 0.0, "distillation_loss": 0.38264673948287964, "epoch": 8.81, "learning_rate": 7.994327903873696e-06, "loss": 0.3277, "step": 24390, "task_loss": 0.7113062143325806 }, { "compression_loss": 0.0, "distillation_loss": 0.2893344759941101, "epoch": 8.82, "learning_rate": 7.948084032591872e-06, "loss": 0.3337, "step": 24400, "task_loss": 0.28811150789260864 }, { "compression_loss": 0.0, "distillation_loss": 0.18665549159049988, "epoch": 8.82, "learning_rate": 7.901953868404086e-06, "loss": 0.3474, "step": 24410, "task_loss": 0.5828348398208618 }, { "compression_loss": 0.0, "distillation_loss": 0.2941107153892517, "epoch": 8.83, "learning_rate": 7.855937649173017e-06, "loss": 0.3248, "step": 24420, "task_loss": 0.45032960176467896 }, { "compression_loss": 0.0, "distillation_loss": 0.2906193137168884, "epoch": 8.83, "learning_rate": 7.810035612173736e-06, "loss": 0.3679, "step": 24430, "task_loss": 0.7031286954879761 }, { "compression_loss": 0.0, "distillation_loss": 0.39351212978363037, "epoch": 8.83, "learning_rate": 7.764247994092586e-06, "loss": 0.3464, "step": 24440, "task_loss": 0.3175731897354126 }, { "compression_loss": 0.0, "distillation_loss": 0.2753757834434509, "epoch": 8.84, "learning_rate": 7.718575031025928e-06, "loss": 0.3265, "step": 24450, "task_loss": 0.5854820013046265 }, { "compression_loss": 0.0, "distillation_loss": 0.3078010082244873, "epoch": 8.84, "learning_rate": 7.673016958478925e-06, "loss": 0.3592, "step": 24460, "task_loss": 0.7642509341239929 }, { "compression_loss": 0.0, "distillation_loss": 0.286521852016449, "epoch": 8.84, "learning_rate": 7.627574011364303e-06, "loss": 0.3358, "step": 24470, "task_loss": 0.549680233001709 }, { "compression_loss": 0.0, "distillation_loss": 0.2735077142715454, "epoch": 8.85, "learning_rate": 7.582246424001182e-06, "loss": 0.3227, "step": 24480, "task_loss": 0.30459415912628174 }, { "compression_loss": 0.0, "distillation_loss": 0.33238837122917175, "epoch": 8.85, "learning_rate": 7.5370344301138494e-06, "loss": 0.3252, "step": 24490, "task_loss": 0.2824766933917999 }, { "compression_loss": 0.0, "distillation_loss": 0.29000821709632874, "epoch": 8.85, "learning_rate": 7.491938262830554e-06, "loss": 0.2903, "step": 24500, "task_loss": 0.8501275777816772 }, { "epoch": 8.85, "eval_exact_match": 83.41532639545885, "eval_f1": 89.98208448301646, "step": 24500 }, { "compression_loss": 0.0, "distillation_loss": 0.3185809850692749, "epoch": 8.86, "learning_rate": 7.446958154682279e-06, "loss": 0.3237, "step": 24510, "task_loss": 0.9856406450271606 }, { "compression_loss": 0.0, "distillation_loss": 0.3409211039543152, "epoch": 8.86, "learning_rate": 7.402094337601612e-06, "loss": 0.329, "step": 24520, "task_loss": 0.4515220522880554 }, { "compression_loss": 0.0, "distillation_loss": 0.22276683151721954, "epoch": 8.87, "learning_rate": 7.357347042921463e-06, "loss": 0.3529, "step": 24530, "task_loss": 0.22912217676639557 }, { "compression_loss": 0.0, "distillation_loss": 0.4352920949459076, "epoch": 8.87, "learning_rate": 7.312716501373936e-06, "loss": 0.3475, "step": 24540, "task_loss": 0.6683515310287476 }, { "compression_loss": 0.0, "distillation_loss": 0.2936110496520996, "epoch": 8.87, "learning_rate": 7.26820294308911e-06, "loss": 0.3313, "step": 24550, "task_loss": 0.36033982038497925 }, { "compression_loss": 0.0, "distillation_loss": 0.3248167037963867, "epoch": 8.88, "learning_rate": 7.223806597593867e-06, "loss": 0.3343, "step": 24560, "task_loss": 0.5283554792404175 }, { "compression_loss": 0.0, "distillation_loss": 0.35730212926864624, "epoch": 8.88, "learning_rate": 7.179527693810683e-06, "loss": 0.3508, "step": 24570, "task_loss": 1.0318670272827148 }, { "compression_loss": 0.0, "distillation_loss": 0.5101932287216187, "epoch": 8.88, "learning_rate": 7.135366460056477e-06, "loss": 0.3355, "step": 24580, "task_loss": 0.7086995840072632 }, { "compression_loss": 0.0, "distillation_loss": 0.3750256896018982, "epoch": 8.89, "learning_rate": 7.091323124041422e-06, "loss": 0.3953, "step": 24590, "task_loss": 0.8333643674850464 }, { "compression_loss": 0.0, "distillation_loss": 0.2565135657787323, "epoch": 8.89, "learning_rate": 7.047397912867779e-06, "loss": 0.3184, "step": 24600, "task_loss": 0.31886714696884155 }, { "compression_loss": 0.0, "distillation_loss": 0.3164982199668884, "epoch": 8.89, "learning_rate": 7.003591053028682e-06, "loss": 0.3167, "step": 24610, "task_loss": 0.41132819652557373 }, { "compression_loss": 0.0, "distillation_loss": 0.40736451745033264, "epoch": 8.9, "learning_rate": 6.959902770407063e-06, "loss": 0.3695, "step": 24620, "task_loss": 0.8851571083068848 }, { "compression_loss": 0.0, "distillation_loss": 0.27555185556411743, "epoch": 8.9, "learning_rate": 6.916333290274374e-06, "loss": 0.3252, "step": 24630, "task_loss": 0.5711570978164673 }, { "compression_loss": 0.0, "distillation_loss": 0.2528682053089142, "epoch": 8.9, "learning_rate": 6.872882837289513e-06, "loss": 0.3101, "step": 24640, "task_loss": 0.5852243900299072 }, { "compression_loss": 0.0, "distillation_loss": 0.3240984380245209, "epoch": 8.91, "learning_rate": 6.829551635497628e-06, "loss": 0.3337, "step": 24650, "task_loss": 0.9316442012786865 }, { "compression_loss": 0.0, "distillation_loss": 0.23819328844547272, "epoch": 8.91, "learning_rate": 6.786339908328972e-06, "loss": 0.3461, "step": 24660, "task_loss": 0.5214189291000366 }, { "compression_loss": 0.0, "distillation_loss": 0.45258206129074097, "epoch": 8.92, "learning_rate": 6.743247878597731e-06, "loss": 0.3531, "step": 24670, "task_loss": 0.5347813367843628 }, { "compression_loss": 0.0, "distillation_loss": 0.30123329162597656, "epoch": 8.92, "learning_rate": 6.7002757685009e-06, "loss": 0.3493, "step": 24680, "task_loss": 0.7872360944747925 }, { "compression_loss": 0.0, "distillation_loss": 0.2336387187242508, "epoch": 8.92, "learning_rate": 6.657423799617136e-06, "loss": 0.3438, "step": 24690, "task_loss": 0.49200066924095154 }, { "compression_loss": 0.0, "distillation_loss": 0.35450315475463867, "epoch": 8.93, "learning_rate": 6.614692192905601e-06, "loss": 0.3303, "step": 24700, "task_loss": 0.516528844833374 }, { "compression_loss": 0.0, "distillation_loss": 0.300314337015152, "epoch": 8.93, "learning_rate": 6.572081168704813e-06, "loss": 0.3105, "step": 24710, "task_loss": 0.5251568555831909 }, { "compression_loss": 0.0, "distillation_loss": 0.31300753355026245, "epoch": 8.93, "learning_rate": 6.529590946731566e-06, "loss": 0.3362, "step": 24720, "task_loss": 0.43648332357406616 }, { "compression_loss": 0.0, "distillation_loss": 0.35132792592048645, "epoch": 8.94, "learning_rate": 6.487221746079714e-06, "loss": 0.3731, "step": 24730, "task_loss": 0.6553832292556763 }, { "compression_loss": 0.0, "distillation_loss": 0.22395357489585876, "epoch": 8.94, "learning_rate": 6.444973785219107e-06, "loss": 0.3393, "step": 24740, "task_loss": 0.3021842837333679 }, { "compression_loss": 0.0, "distillation_loss": 0.315970242023468, "epoch": 8.94, "learning_rate": 6.402847281994443e-06, "loss": 0.2966, "step": 24750, "task_loss": 0.5357693433761597 }, { "epoch": 8.94, "eval_exact_match": 83.23557237464522, "eval_f1": 90.0436443008999, "step": 24750 }, { "compression_loss": 0.0, "distillation_loss": 0.3409693241119385, "epoch": 8.95, "learning_rate": 6.360842453624137e-06, "loss": 0.3445, "step": 24760, "task_loss": 0.9291391372680664 }, { "compression_loss": 0.0, "distillation_loss": 0.38368433713912964, "epoch": 8.95, "learning_rate": 6.318959516699202e-06, "loss": 0.3723, "step": 24770, "task_loss": 0.6492195129394531 }, { "compression_loss": 0.0, "distillation_loss": 0.25230035185813904, "epoch": 8.96, "learning_rate": 6.277198687182152e-06, "loss": 0.3391, "step": 24780, "task_loss": 0.8180399537086487 }, { "compression_loss": 0.0, "distillation_loss": 0.3488203287124634, "epoch": 8.96, "learning_rate": 6.235560180405858e-06, "loss": 0.3587, "step": 24790, "task_loss": 0.5325171947479248 }, { "compression_loss": 0.0, "distillation_loss": 0.4679383635520935, "epoch": 8.96, "learning_rate": 6.1940442110724805e-06, "loss": 0.3328, "step": 24800, "task_loss": 0.6240459680557251 }, { "compression_loss": 0.0, "distillation_loss": 0.39593812823295593, "epoch": 8.97, "learning_rate": 6.1526509932522904e-06, "loss": 0.3521, "step": 24810, "task_loss": 0.3513973355293274 }, { "compression_loss": 0.0, "distillation_loss": 0.39898785948753357, "epoch": 8.97, "learning_rate": 6.111380740382668e-06, "loss": 0.3396, "step": 24820, "task_loss": 0.6773884892463684 }, { "compression_loss": 0.0, "distillation_loss": 0.33439552783966064, "epoch": 8.97, "learning_rate": 6.070233665266894e-06, "loss": 0.323, "step": 24830, "task_loss": 0.5181454420089722 }, { "compression_loss": 0.0, "distillation_loss": 0.23859478533267975, "epoch": 8.98, "learning_rate": 6.029209980073129e-06, "loss": 0.3325, "step": 24840, "task_loss": 0.2097017616033554 }, { "compression_loss": 0.0, "distillation_loss": 0.2746514678001404, "epoch": 8.98, "learning_rate": 5.988309896333292e-06, "loss": 0.3399, "step": 24850, "task_loss": 0.3311827480792999 }, { "compression_loss": 0.0, "distillation_loss": 0.3430408239364624, "epoch": 8.98, "learning_rate": 5.947533624941972e-06, "loss": 0.337, "step": 24860, "task_loss": 0.6250514984130859 }, { "compression_loss": 0.0, "distillation_loss": 0.30967992544174194, "epoch": 8.99, "learning_rate": 5.906881376155325e-06, "loss": 0.3168, "step": 24870, "task_loss": 0.4880766272544861 }, { "compression_loss": 0.0, "distillation_loss": 0.34915846586227417, "epoch": 8.99, "learning_rate": 5.866353359590022e-06, "loss": 0.3472, "step": 24880, "task_loss": 0.41612374782562256 }, { "compression_loss": 0.0, "distillation_loss": 0.326454758644104, "epoch": 9.0, "learning_rate": 5.825949784222142e-06, "loss": 0.3226, "step": 24890, "task_loss": 0.3217955529689789 }, { "compression_loss": 0.0, "distillation_loss": 0.32856497168540955, "epoch": 9.0, "learning_rate": 5.785670858386115e-06, "loss": 0.3293, "step": 24900, "task_loss": 0.561482310295105 }, { "compression_loss": 0.0, "distillation_loss": 0.29845374822616577, "epoch": 9.0, "learning_rate": 5.745516789773605e-06, "loss": 0.334, "step": 24910, "task_loss": 0.6011270880699158 }, { "compression_loss": 0.0, "distillation_loss": 0.26393237709999084, "epoch": 9.01, "learning_rate": 5.705487785432524e-06, "loss": 0.3155, "step": 24920, "task_loss": 0.40632134675979614 }, { "compression_loss": 0.0, "distillation_loss": 0.32185572385787964, "epoch": 9.01, "learning_rate": 5.665584051765853e-06, "loss": 0.2986, "step": 24930, "task_loss": 1.1531330347061157 }, { "compression_loss": 0.0, "distillation_loss": 0.2393801510334015, "epoch": 9.01, "learning_rate": 5.62580579453067e-06, "loss": 0.3166, "step": 24940, "task_loss": 0.6275138854980469 }, { "compression_loss": 0.0, "distillation_loss": 0.2692786753177643, "epoch": 9.02, "learning_rate": 5.586153218837046e-06, "loss": 0.3394, "step": 24950, "task_loss": 0.508584201335907 }, { "compression_loss": 0.0, "distillation_loss": 0.5188926458358765, "epoch": 9.02, "learning_rate": 5.546626529147002e-06, "loss": 0.3341, "step": 24960, "task_loss": 0.7853003740310669 }, { "compression_loss": 0.0, "distillation_loss": 0.287403404712677, "epoch": 9.02, "learning_rate": 5.507225929273426e-06, "loss": 0.2979, "step": 24970, "task_loss": 0.2948596477508545 }, { "compression_loss": 0.0, "distillation_loss": 0.2628954350948334, "epoch": 9.03, "learning_rate": 5.4679516223790685e-06, "loss": 0.3202, "step": 24980, "task_loss": 0.34732797741889954 }, { "compression_loss": 0.0, "distillation_loss": 0.21323145925998688, "epoch": 9.03, "learning_rate": 5.428803810975462e-06, "loss": 0.3072, "step": 24990, "task_loss": 0.38041430711746216 }, { "compression_loss": 0.0, "distillation_loss": 0.3073531687259674, "epoch": 9.04, "learning_rate": 5.389782696921894e-06, "loss": 0.3485, "step": 25000, "task_loss": 0.7490738034248352 }, { "epoch": 9.04, "eval_exact_match": 83.09366130558183, "eval_f1": 89.96078940144051, "step": 25000 }, { "compression_loss": 0.0, "distillation_loss": 0.3114092946052551, "epoch": 9.04, "learning_rate": 5.3508884814243295e-06, "loss": 0.3131, "step": 25010, "task_loss": 0.709526538848877 }, { "compression_loss": 0.0, "distillation_loss": 0.19256383180618286, "epoch": 9.04, "learning_rate": 5.31212136503445e-06, "loss": 0.3359, "step": 25020, "task_loss": 0.2658892273902893 }, { "compression_loss": 0.0, "distillation_loss": 0.3078978955745697, "epoch": 9.05, "learning_rate": 5.2734815476485186e-06, "loss": 0.3117, "step": 25030, "task_loss": 0.3521191477775574 }, { "compression_loss": 0.0, "distillation_loss": 0.24306003749370575, "epoch": 9.05, "learning_rate": 5.23496922850644e-06, "loss": 0.333, "step": 25040, "task_loss": 0.3750509023666382 }, { "compression_loss": 0.0, "distillation_loss": 0.2680177688598633, "epoch": 9.05, "learning_rate": 5.1965846061906805e-06, "loss": 0.309, "step": 25050, "task_loss": 0.37960493564605713 }, { "compression_loss": 0.0, "distillation_loss": 0.2974720597267151, "epoch": 9.06, "learning_rate": 5.158327878625265e-06, "loss": 0.313, "step": 25060, "task_loss": 0.557094156742096 }, { "compression_loss": 0.0, "distillation_loss": 0.292024165391922, "epoch": 9.06, "learning_rate": 5.120199243074735e-06, "loss": 0.3009, "step": 25070, "task_loss": 0.4877658486366272 }, { "compression_loss": 0.0, "distillation_loss": 0.321384072303772, "epoch": 9.06, "learning_rate": 5.082198896143158e-06, "loss": 0.3207, "step": 25080, "task_loss": 0.7667458653450012 }, { "compression_loss": 0.0, "distillation_loss": 0.35278797149658203, "epoch": 9.07, "learning_rate": 5.044327033773109e-06, "loss": 0.3105, "step": 25090, "task_loss": 0.5815832614898682 }, { "compression_loss": 0.0, "distillation_loss": 0.2632955312728882, "epoch": 9.07, "learning_rate": 5.0065838512446464e-06, "loss": 0.325, "step": 25100, "task_loss": 0.34696850180625916 }, { "compression_loss": 0.0, "distillation_loss": 0.3683552145957947, "epoch": 9.07, "learning_rate": 4.9689695431742876e-06, "loss": 0.3271, "step": 25110, "task_loss": 0.5845701694488525 }, { "compression_loss": 0.0, "distillation_loss": 0.2908926010131836, "epoch": 9.08, "learning_rate": 4.931484303514092e-06, "loss": 0.3412, "step": 25120, "task_loss": 0.5417877435684204 }, { "compression_loss": 0.0, "distillation_loss": 0.341631144285202, "epoch": 9.08, "learning_rate": 4.8941283255505295e-06, "loss": 0.3059, "step": 25130, "task_loss": 0.5817184448242188 }, { "compression_loss": 0.0, "distillation_loss": 0.2900015115737915, "epoch": 9.09, "learning_rate": 4.856901801903594e-06, "loss": 0.3146, "step": 25140, "task_loss": 0.41760894656181335 }, { "compression_loss": 0.0, "distillation_loss": 0.2963072657585144, "epoch": 9.09, "learning_rate": 4.8198049245257615e-06, "loss": 0.3642, "step": 25150, "task_loss": 0.4821188449859619 }, { "compression_loss": 0.0, "distillation_loss": 0.414448082447052, "epoch": 9.09, "learning_rate": 4.782837884701011e-06, "loss": 0.3301, "step": 25160, "task_loss": 0.5216705203056335 }, { "compression_loss": 0.0, "distillation_loss": 0.24655842781066895, "epoch": 9.1, "learning_rate": 4.746000873043818e-06, "loss": 0.2995, "step": 25170, "task_loss": 0.814030647277832 }, { "compression_loss": 0.0, "distillation_loss": 0.3046429455280304, "epoch": 9.1, "learning_rate": 4.709294079498207e-06, "loss": 0.3073, "step": 25180, "task_loss": 0.6938967704772949 }, { "compression_loss": 0.0, "distillation_loss": 0.38174983859062195, "epoch": 9.1, "learning_rate": 4.672717693336749e-06, "loss": 0.3216, "step": 25190, "task_loss": 0.9253899455070496 }, { "compression_loss": 0.0, "distillation_loss": 0.43978115916252136, "epoch": 9.11, "learning_rate": 4.6362719031596e-06, "loss": 0.3133, "step": 25200, "task_loss": 0.6062501072883606 }, { "compression_loss": 0.0, "distillation_loss": 0.3401195704936981, "epoch": 9.11, "learning_rate": 4.5999568968934854e-06, "loss": 0.3333, "step": 25210, "task_loss": 0.42778706550598145 }, { "compression_loss": 0.0, "distillation_loss": 0.30857905745506287, "epoch": 9.11, "learning_rate": 4.5637728617908196e-06, "loss": 0.3275, "step": 25220, "task_loss": 0.6282418966293335 }, { "compression_loss": 0.0, "distillation_loss": 0.3164075016975403, "epoch": 9.12, "learning_rate": 4.527719984428635e-06, "loss": 0.281, "step": 25230, "task_loss": 0.4652021527290344 }, { "compression_loss": 0.0, "distillation_loss": 0.36166757345199585, "epoch": 9.12, "learning_rate": 4.491798450707693e-06, "loss": 0.3245, "step": 25240, "task_loss": 0.38490480184555054 }, { "compression_loss": 0.0, "distillation_loss": 0.2693134546279907, "epoch": 9.13, "learning_rate": 4.456008445851504e-06, "loss": 0.3044, "step": 25250, "task_loss": 0.4064496159553528 }, { "epoch": 9.13, "eval_exact_match": 83.34910122989594, "eval_f1": 90.04081962102016, "step": 25250 }, { "compression_loss": 0.0, "distillation_loss": 0.5317705869674683, "epoch": 9.13, "learning_rate": 4.420350154405373e-06, "loss": 0.3251, "step": 25260, "task_loss": 0.7308588027954102 }, { "compression_loss": 0.0, "distillation_loss": 0.3306049108505249, "epoch": 9.13, "learning_rate": 4.384823760235432e-06, "loss": 0.3609, "step": 25270, "task_loss": 0.578725278377533 }, { "compression_loss": 0.0, "distillation_loss": 0.3114280700683594, "epoch": 9.14, "learning_rate": 4.3494294465277165e-06, "loss": 0.3169, "step": 25280, "task_loss": 0.4894099533557892 }, { "compression_loss": 0.0, "distillation_loss": 0.25917303562164307, "epoch": 9.14, "learning_rate": 4.314167395787213e-06, "loss": 0.2984, "step": 25290, "task_loss": 0.49306440353393555 }, { "compression_loss": 0.0, "distillation_loss": 0.24381668865680695, "epoch": 9.14, "learning_rate": 4.279037789836915e-06, "loss": 0.3322, "step": 25300, "task_loss": 0.25567013025283813 }, { "compression_loss": 0.0, "distillation_loss": 0.28523534536361694, "epoch": 9.15, "learning_rate": 4.244040809816882e-06, "loss": 0.3257, "step": 25310, "task_loss": 0.6707668304443359 }, { "compression_loss": 0.0, "distillation_loss": 0.3397090435028076, "epoch": 9.15, "learning_rate": 4.209176636183313e-06, "loss": 0.3319, "step": 25320, "task_loss": 0.5605945587158203 }, { "compression_loss": 0.0, "distillation_loss": 0.28864046931266785, "epoch": 9.15, "learning_rate": 4.174445448707604e-06, "loss": 0.3127, "step": 25330, "task_loss": 0.5202219486236572 }, { "compression_loss": 0.0, "distillation_loss": 0.32487910985946655, "epoch": 9.16, "learning_rate": 4.139847426475443e-06, "loss": 0.3338, "step": 25340, "task_loss": 0.44852226972579956 }, { "compression_loss": 0.0, "distillation_loss": 0.3215107321739197, "epoch": 9.16, "learning_rate": 4.105382747885863e-06, "loss": 0.3108, "step": 25350, "task_loss": 0.5720582008361816 }, { "compression_loss": 0.0, "distillation_loss": 0.2655651569366455, "epoch": 9.17, "learning_rate": 4.071051590650343e-06, "loss": 0.3315, "step": 25360, "task_loss": 0.8129599094390869 }, { "compression_loss": 0.0, "distillation_loss": 0.28251737356185913, "epoch": 9.17, "learning_rate": 4.036854131791856e-06, "loss": 0.2958, "step": 25370, "task_loss": 0.7780440449714661 }, { "compression_loss": 0.0, "distillation_loss": 0.3596683144569397, "epoch": 9.17, "learning_rate": 4.002790547644029e-06, "loss": 0.3233, "step": 25380, "task_loss": 0.7711316347122192 }, { "compression_loss": 0.0, "distillation_loss": 0.32312220335006714, "epoch": 9.18, "learning_rate": 3.96886101385013e-06, "loss": 0.3196, "step": 25390, "task_loss": 0.5299769043922424 }, { "compression_loss": 0.0, "distillation_loss": 0.2755924463272095, "epoch": 9.18, "learning_rate": 3.935065705362258e-06, "loss": 0.3004, "step": 25400, "task_loss": 0.8743758201599121 }, { "compression_loss": 0.0, "distillation_loss": 0.19421358406543732, "epoch": 9.18, "learning_rate": 3.9014047964403805e-06, "loss": 0.301, "step": 25410, "task_loss": 0.40134578943252563 }, { "compression_loss": 0.0, "distillation_loss": 0.3234480023384094, "epoch": 9.19, "learning_rate": 3.867878460651475e-06, "loss": 0.319, "step": 25420, "task_loss": 0.5269187688827515 }, { "compression_loss": 0.0, "distillation_loss": 0.35207459330558777, "epoch": 9.19, "learning_rate": 3.834486870868585e-06, "loss": 0.3288, "step": 25430, "task_loss": 0.5806119441986084 }, { "compression_loss": 0.0, "distillation_loss": 0.24220982193946838, "epoch": 9.19, "learning_rate": 3.8012301992699906e-06, "loss": 0.3152, "step": 25440, "task_loss": 0.3459148406982422 }, { "compression_loss": 0.0, "distillation_loss": 0.35971611738204956, "epoch": 9.2, "learning_rate": 3.768108617338265e-06, "loss": 0.3115, "step": 25450, "task_loss": 0.9299747347831726 }, { "compression_loss": 0.0, "distillation_loss": 0.3165518641471863, "epoch": 9.2, "learning_rate": 3.735122295859431e-06, "loss": 0.2978, "step": 25460, "task_loss": 0.8229607343673706 }, { "compression_loss": 0.0, "distillation_loss": 0.3041304349899292, "epoch": 9.2, "learning_rate": 3.7022714049220387e-06, "loss": 0.3304, "step": 25470, "task_loss": 0.6964329481124878 }, { "compression_loss": 0.0, "distillation_loss": 0.2823697328567505, "epoch": 9.21, "learning_rate": 3.669556113916349e-06, "loss": 0.3331, "step": 25480, "task_loss": 0.44915440678596497 }, { "compression_loss": 0.0, "distillation_loss": 0.3072703182697296, "epoch": 9.21, "learning_rate": 3.6369765915333876e-06, "loss": 0.3241, "step": 25490, "task_loss": 0.34987956285476685 }, { "compression_loss": 0.0, "distillation_loss": 0.2890113294124603, "epoch": 9.22, "learning_rate": 3.6045330057641344e-06, "loss": 0.3328, "step": 25500, "task_loss": 0.8481899499893188 }, { "epoch": 9.22, "eval_exact_match": 83.29233680227058, "eval_f1": 90.02785814666667, "step": 25500 }, { "compression_loss": 0.0, "distillation_loss": 0.29243725538253784, "epoch": 9.22, "learning_rate": 3.5722255238986255e-06, "loss": 0.3104, "step": 25510, "task_loss": 0.5860704183578491 }, { "compression_loss": 0.0, "distillation_loss": 0.25151023268699646, "epoch": 9.22, "learning_rate": 3.5400543125251037e-06, "loss": 0.2972, "step": 25520, "task_loss": 0.5240755081176758 }, { "compression_loss": 0.0, "distillation_loss": 0.2432774305343628, "epoch": 9.23, "learning_rate": 3.508019537529148e-06, "loss": 0.3093, "step": 25530, "task_loss": 0.18137642741203308 }, { "compression_loss": 0.0, "distillation_loss": 0.2751896381378174, "epoch": 9.23, "learning_rate": 3.4761213640928357e-06, "loss": 0.3322, "step": 25540, "task_loss": 0.48760008811950684 }, { "compression_loss": 0.0, "distillation_loss": 0.404532253742218, "epoch": 9.23, "learning_rate": 3.4443599566938687e-06, "loss": 0.3474, "step": 25550, "task_loss": 0.6081807017326355 }, { "compression_loss": 0.0, "distillation_loss": 0.2790014445781708, "epoch": 9.24, "learning_rate": 3.412735479104756e-06, "loss": 0.2977, "step": 25560, "task_loss": 0.5392050743103027 }, { "compression_loss": 0.0, "distillation_loss": 0.28828808665275574, "epoch": 9.24, "learning_rate": 3.3812480943919176e-06, "loss": 0.3254, "step": 25570, "task_loss": 0.46198827028274536 }, { "compression_loss": 0.0, "distillation_loss": 0.28925055265426636, "epoch": 9.24, "learning_rate": 3.349897964914923e-06, "loss": 0.31, "step": 25580, "task_loss": 0.7260234355926514 }, { "compression_loss": 0.0, "distillation_loss": 0.29103171825408936, "epoch": 9.25, "learning_rate": 3.318685252325564e-06, "loss": 0.3192, "step": 25590, "task_loss": 0.7414866089820862 }, { "compression_loss": 0.0, "distillation_loss": 0.30816346406936646, "epoch": 9.25, "learning_rate": 3.287610117567086e-06, "loss": 0.3264, "step": 25600, "task_loss": 0.8291851282119751 }, { "compression_loss": 0.0, "distillation_loss": 0.32630425691604614, "epoch": 9.26, "learning_rate": 3.256672720873335e-06, "loss": 0.3499, "step": 25610, "task_loss": 0.6567143201828003 }, { "compression_loss": 0.0, "distillation_loss": 0.3918631970882416, "epoch": 9.26, "learning_rate": 3.2258732217679388e-06, "loss": 0.3278, "step": 25620, "task_loss": 0.5021631717681885 }, { "compression_loss": 0.0, "distillation_loss": 0.3493354916572571, "epoch": 9.26, "learning_rate": 3.195211779063457e-06, "loss": 0.3106, "step": 25630, "task_loss": 0.4202045798301697 }, { "compression_loss": 0.0, "distillation_loss": 0.2900541424751282, "epoch": 9.27, "learning_rate": 3.164688550860606e-06, "loss": 0.3223, "step": 25640, "task_loss": 0.37236881256103516 }, { "compression_loss": 0.0, "distillation_loss": 0.30690574645996094, "epoch": 9.27, "learning_rate": 3.1343036945474114e-06, "loss": 0.3456, "step": 25650, "task_loss": 0.6118491888046265 }, { "compression_loss": 0.0, "distillation_loss": 0.3027680218219757, "epoch": 9.27, "learning_rate": 3.1040573667984164e-06, "loss": 0.339, "step": 25660, "task_loss": 0.5271258354187012 }, { "compression_loss": 0.0, "distillation_loss": 0.26531606912612915, "epoch": 9.28, "learning_rate": 3.073949723573852e-06, "loss": 0.3036, "step": 25670, "task_loss": 0.37352457642555237 }, { "compression_loss": 0.0, "distillation_loss": 0.19538961350917816, "epoch": 9.28, "learning_rate": 3.043980920118853e-06, "loss": 0.3216, "step": 25680, "task_loss": 0.29823341965675354 }, { "compression_loss": 0.0, "distillation_loss": 0.3124743103981018, "epoch": 9.28, "learning_rate": 3.014151110962654e-06, "loss": 0.3246, "step": 25690, "task_loss": 0.2712612748146057 }, { "compression_loss": 0.0, "distillation_loss": 0.33055031299591064, "epoch": 9.29, "learning_rate": 2.9844604499177686e-06, "loss": 0.3172, "step": 25700, "task_loss": 0.5724518299102783 }, { "compression_loss": 0.0, "distillation_loss": 0.307403564453125, "epoch": 9.29, "learning_rate": 2.954909090079233e-06, "loss": 0.3294, "step": 25710, "task_loss": 0.3865295648574829 }, { "compression_loss": 0.0, "distillation_loss": 0.2786659002304077, "epoch": 9.3, "learning_rate": 2.925497183823802e-06, "loss": 0.3085, "step": 25720, "task_loss": 0.5146081447601318 }, { "compression_loss": 0.0, "distillation_loss": 0.30434009432792664, "epoch": 9.3, "learning_rate": 2.8962248828091555e-06, "loss": 0.3249, "step": 25730, "task_loss": 0.5385090112686157 }, { "compression_loss": 0.0, "distillation_loss": 0.287394642829895, "epoch": 9.3, "learning_rate": 2.867092337973105e-06, "loss": 0.314, "step": 25740, "task_loss": 0.7510730028152466 }, { "compression_loss": 0.0, "distillation_loss": 0.2954567074775696, "epoch": 9.31, "learning_rate": 2.83809969953287e-06, "loss": 0.3224, "step": 25750, "task_loss": 0.4567757248878479 }, { "epoch": 9.31, "eval_exact_match": 83.20719016083254, "eval_f1": 90.01758651781414, "step": 25750 }, { "compression_loss": 0.0, "distillation_loss": 0.370003879070282, "epoch": 9.31, "learning_rate": 2.809247116984226e-06, "loss": 0.3028, "step": 25760, "task_loss": 0.6199772357940674 }, { "compression_loss": 0.0, "distillation_loss": 0.2925732433795929, "epoch": 9.31, "learning_rate": 2.7805347391007963e-06, "loss": 0.3122, "step": 25770, "task_loss": 0.4094921350479126 }, { "compression_loss": 0.0, "distillation_loss": 0.3006725311279297, "epoch": 9.32, "learning_rate": 2.7519627139332594e-06, "loss": 0.336, "step": 25780, "task_loss": 0.9209330081939697 }, { "compression_loss": 0.0, "distillation_loss": 0.3157416582107544, "epoch": 9.32, "learning_rate": 2.723531188808589e-06, "loss": 0.3277, "step": 25790, "task_loss": 0.8077236413955688 }, { "compression_loss": 0.0, "distillation_loss": 0.28249573707580566, "epoch": 9.32, "learning_rate": 2.695240310329279e-06, "loss": 0.3234, "step": 25800, "task_loss": 0.369956374168396 }, { "compression_loss": 0.0, "distillation_loss": 0.2779986262321472, "epoch": 9.33, "learning_rate": 2.6670902243726127e-06, "loss": 0.3028, "step": 25810, "task_loss": 0.7005301713943481 }, { "compression_loss": 0.0, "distillation_loss": 0.26586776971817017, "epoch": 9.33, "learning_rate": 2.6390810760899043e-06, "loss": 0.3389, "step": 25820, "task_loss": 0.5821472406387329 }, { "compression_loss": 0.0, "distillation_loss": 0.2681348919868469, "epoch": 9.34, "learning_rate": 2.6112130099057465e-06, "loss": 0.3343, "step": 25830, "task_loss": 0.43781211972236633 }, { "compression_loss": 0.0, "distillation_loss": 0.28912442922592163, "epoch": 9.34, "learning_rate": 2.5834861695172394e-06, "loss": 0.2981, "step": 25840, "task_loss": 0.5437923669815063 }, { "compression_loss": 0.0, "distillation_loss": 0.2632666528224945, "epoch": 9.34, "learning_rate": 2.555900697893321e-06, "loss": 0.3046, "step": 25850, "task_loss": 0.8168158531188965 }, { "compression_loss": 0.0, "distillation_loss": 0.4091622233390808, "epoch": 9.35, "learning_rate": 2.52845673727394e-06, "loss": 0.3188, "step": 25860, "task_loss": 0.5111245512962341 }, { "compression_loss": 0.0, "distillation_loss": 0.26104307174682617, "epoch": 9.35, "learning_rate": 2.501154429169393e-06, "loss": 0.3321, "step": 25870, "task_loss": 0.3140479326248169 }, { "compression_loss": 0.0, "distillation_loss": 0.3605012893676758, "epoch": 9.35, "learning_rate": 2.4739939143595626e-06, "loss": 0.3232, "step": 25880, "task_loss": 0.478623628616333 }, { "compression_loss": 0.0, "distillation_loss": 0.38676559925079346, "epoch": 9.36, "learning_rate": 2.4469753328932e-06, "loss": 0.3311, "step": 25890, "task_loss": 0.5500181317329407 }, { "compression_loss": 0.0, "distillation_loss": 0.26764678955078125, "epoch": 9.36, "learning_rate": 2.4200988240871837e-06, "loss": 0.352, "step": 25900, "task_loss": 0.387978196144104 }, { "compression_loss": 0.0, "distillation_loss": 0.2940652370452881, "epoch": 9.36, "learning_rate": 2.3933645265258407e-06, "loss": 0.3374, "step": 25910, "task_loss": 0.5337430238723755 }, { "compression_loss": 0.0, "distillation_loss": 0.23789635300636292, "epoch": 9.37, "learning_rate": 2.366772578060199e-06, "loss": 0.3429, "step": 25920, "task_loss": 0.5305555462837219 }, { "compression_loss": 0.0, "distillation_loss": 0.2854630947113037, "epoch": 9.37, "learning_rate": 2.3403231158072893e-06, "loss": 0.341, "step": 25930, "task_loss": 0.6268982887268066 }, { "compression_loss": 0.0, "distillation_loss": 0.2674894630908966, "epoch": 9.37, "learning_rate": 2.3140162761494166e-06, "loss": 0.3222, "step": 25940, "task_loss": 0.48702147603034973 }, { "compression_loss": 0.0, "distillation_loss": 0.302157998085022, "epoch": 9.38, "learning_rate": 2.287852194733515e-06, "loss": 0.3122, "step": 25950, "task_loss": 0.3152402639389038 }, { "compression_loss": 0.0, "distillation_loss": 0.43852946162223816, "epoch": 9.38, "learning_rate": 2.2618310064703707e-06, "loss": 0.3381, "step": 25960, "task_loss": 0.6149742603302002 }, { "compression_loss": 0.0, "distillation_loss": 0.27004188299179077, "epoch": 9.39, "learning_rate": 2.235952845533985e-06, "loss": 0.3265, "step": 25970, "task_loss": 0.5028146505355835 }, { "compression_loss": 0.0, "distillation_loss": 0.507034957408905, "epoch": 9.39, "learning_rate": 2.21021784536086e-06, "loss": 0.317, "step": 25980, "task_loss": 0.6118849515914917 }, { "compression_loss": 0.0, "distillation_loss": 0.24345892667770386, "epoch": 9.39, "learning_rate": 2.184626138649315e-06, "loss": 0.3307, "step": 25990, "task_loss": 0.4160289764404297 }, { "compression_loss": 0.0, "distillation_loss": 0.30164164304733276, "epoch": 9.4, "learning_rate": 2.1591778573587795e-06, "loss": 0.2989, "step": 26000, "task_loss": 0.6134577989578247 }, { "epoch": 9.4, "eval_exact_match": 83.24503311258277, "eval_f1": 90.07899218127025, "step": 26000 }, { "compression_loss": 0.0, "distillation_loss": 0.3065416216850281, "epoch": 9.4, "learning_rate": 2.133873132709162e-06, "loss": 0.2929, "step": 26010, "task_loss": 0.5050661563873291 }, { "compression_loss": 0.0, "distillation_loss": 0.20736584067344666, "epoch": 9.4, "learning_rate": 2.1087120951801298e-06, "loss": 0.3064, "step": 26020, "task_loss": 0.6142216920852661 }, { "compression_loss": 0.0, "distillation_loss": 0.30738502740859985, "epoch": 9.41, "learning_rate": 2.0836948745104554e-06, "loss": 0.3022, "step": 26030, "task_loss": 0.42188823223114014 }, { "compression_loss": 0.0, "distillation_loss": 0.34054452180862427, "epoch": 9.41, "learning_rate": 2.058821599697326e-06, "loss": 0.3338, "step": 26040, "task_loss": 0.4355012774467468 }, { "compression_loss": 0.0, "distillation_loss": 0.287036657333374, "epoch": 9.41, "learning_rate": 2.0340923989957337e-06, "loss": 0.3057, "step": 26050, "task_loss": 0.6002825498580933 }, { "compression_loss": 0.0, "distillation_loss": 0.34066513180732727, "epoch": 9.42, "learning_rate": 2.0095073999177405e-06, "loss": 0.3077, "step": 26060, "task_loss": 0.42962759733200073 }, { "compression_loss": 0.0, "distillation_loss": 0.26722410321235657, "epoch": 9.42, "learning_rate": 1.985066729231865e-06, "loss": 0.3017, "step": 26070, "task_loss": 0.2320828139781952 }, { "compression_loss": 0.0, "distillation_loss": 0.2443457543849945, "epoch": 9.43, "learning_rate": 1.960770512962431e-06, "loss": 0.2898, "step": 26080, "task_loss": 0.3011275827884674 }, { "compression_loss": 0.0, "distillation_loss": 0.2698020935058594, "epoch": 9.43, "learning_rate": 1.9366188763889026e-06, "loss": 0.3416, "step": 26090, "task_loss": 0.8731992244720459 }, { "compression_loss": 0.0, "distillation_loss": 0.3889712989330292, "epoch": 9.43, "learning_rate": 1.9126119440452293e-06, "loss": 0.3149, "step": 26100, "task_loss": 0.667431116104126 }, { "compression_loss": 0.0, "distillation_loss": 0.41749489307403564, "epoch": 9.44, "learning_rate": 1.8887498397192316e-06, "loss": 0.3004, "step": 26110, "task_loss": 1.0217292308807373 }, { "compression_loss": 0.0, "distillation_loss": 0.2434772253036499, "epoch": 9.44, "learning_rate": 1.8650326864519428e-06, "loss": 0.3241, "step": 26120, "task_loss": 0.27343904972076416 }, { "compression_loss": 0.0, "distillation_loss": 0.2682727873325348, "epoch": 9.44, "learning_rate": 1.8414606065369855e-06, "loss": 0.3026, "step": 26130, "task_loss": 0.7200096845626831 }, { "compression_loss": 0.0, "distillation_loss": 0.30232781171798706, "epoch": 9.45, "learning_rate": 1.818033721519916e-06, "loss": 0.3156, "step": 26140, "task_loss": 0.41904714703559875 }, { "compression_loss": 0.0, "distillation_loss": 0.31530797481536865, "epoch": 9.45, "learning_rate": 1.7970737664963832e-06, "loss": 0.302, "step": 26150, "task_loss": 0.4437169134616852 }, { "compression_loss": 0.0, "distillation_loss": 0.28844863176345825, "epoch": 9.45, "learning_rate": 1.7739230839614962e-06, "loss": 0.2845, "step": 26160, "task_loss": 0.5498650074005127 }, { "compression_loss": 0.0, "distillation_loss": 0.38394349813461304, "epoch": 9.46, "learning_rate": 1.7509179445706858e-06, "loss": 0.321, "step": 26170, "task_loss": 0.843754768371582 }, { "compression_loss": 0.0, "distillation_loss": 0.29749709367752075, "epoch": 9.46, "learning_rate": 1.7280584669461808e-06, "loss": 0.3067, "step": 26180, "task_loss": 0.2624616026878357 }, { "compression_loss": 0.0, "distillation_loss": 0.2989083528518677, "epoch": 9.47, "learning_rate": 1.7053447689591473e-06, "loss": 0.3149, "step": 26190, "task_loss": 0.6814029812812805 }, { "compression_loss": 0.0, "distillation_loss": 0.24795949459075928, "epoch": 9.47, "learning_rate": 1.6827769677290294e-06, "loss": 0.2966, "step": 26200, "task_loss": 0.6931198239326477 }, { "compression_loss": 0.0, "distillation_loss": 0.31588611006736755, "epoch": 9.47, "learning_rate": 1.6603551796230232e-06, "loss": 0.3074, "step": 26210, "task_loss": 0.7078069448471069 }, { "compression_loss": 0.0, "distillation_loss": 0.2802581191062927, "epoch": 9.48, "learning_rate": 1.6380795202553866e-06, "loss": 0.312, "step": 26220, "task_loss": 0.7879334688186646 }, { "compression_loss": 0.0, "distillation_loss": 0.3153989911079407, "epoch": 9.48, "learning_rate": 1.615950104486924e-06, "loss": 0.3211, "step": 26230, "task_loss": 0.593709409236908 }, { "compression_loss": 0.0, "distillation_loss": 0.2652713358402252, "epoch": 9.48, "learning_rate": 1.5939670464243362e-06, "loss": 0.3022, "step": 26240, "task_loss": 0.5030221939086914 }, { "compression_loss": 0.0, "distillation_loss": 0.2979765236377716, "epoch": 9.49, "learning_rate": 1.572130459419674e-06, "loss": 0.3224, "step": 26250, "task_loss": 0.3495621085166931 }, { "epoch": 9.49, "eval_exact_match": 83.15988647114474, "eval_f1": 90.06487572380685, "step": 26250 }, { "compression_loss": 0.0, "distillation_loss": 0.30114901065826416, "epoch": 9.49, "learning_rate": 1.5504404560697093e-06, "loss": 0.3202, "step": 26260, "task_loss": 0.4952106177806854 }, { "compression_loss": 0.0, "distillation_loss": 0.30627989768981934, "epoch": 9.49, "learning_rate": 1.5288971482153957e-06, "loss": 0.324, "step": 26270, "task_loss": 0.6019161939620972 }, { "compression_loss": 0.0, "distillation_loss": 0.27386802434921265, "epoch": 9.5, "learning_rate": 1.5075006469412778e-06, "loss": 0.3126, "step": 26280, "task_loss": 0.6094711422920227 }, { "compression_loss": 0.0, "distillation_loss": 0.3032553195953369, "epoch": 9.5, "learning_rate": 1.486251062574916e-06, "loss": 0.2942, "step": 26290, "task_loss": 0.475771963596344 }, { "compression_loss": 0.0, "distillation_loss": 0.2948029637336731, "epoch": 9.5, "learning_rate": 1.4651485046862933e-06, "loss": 0.3229, "step": 26300, "task_loss": 0.44787514209747314 }, { "compression_loss": 0.0, "distillation_loss": 0.3109568953514099, "epoch": 9.51, "learning_rate": 1.4441930820873195e-06, "loss": 0.3074, "step": 26310, "task_loss": 0.4297507107257843 }, { "compression_loss": 0.0, "distillation_loss": 0.2993125319480896, "epoch": 9.51, "learning_rate": 1.4233849028311808e-06, "loss": 0.3203, "step": 26320, "task_loss": 0.41013583540916443 }, { "compression_loss": 0.0, "distillation_loss": 0.3109281659126282, "epoch": 9.52, "learning_rate": 1.4027240742118542e-06, "loss": 0.31, "step": 26330, "task_loss": 0.48224520683288574 }, { "compression_loss": 0.0, "distillation_loss": 0.320933073759079, "epoch": 9.52, "learning_rate": 1.3822107027635178e-06, "loss": 0.3106, "step": 26340, "task_loss": 0.3480777144432068 }, { "compression_loss": 0.0, "distillation_loss": 0.40795719623565674, "epoch": 9.52, "learning_rate": 1.3618448942600182e-06, "loss": 0.3197, "step": 26350, "task_loss": 0.6005345582962036 }, { "compression_loss": 0.0, "distillation_loss": 0.2971874475479126, "epoch": 9.53, "learning_rate": 1.3416267537143035e-06, "loss": 0.3062, "step": 26360, "task_loss": 0.45181214809417725 }, { "compression_loss": 0.0, "distillation_loss": 0.3023059368133545, "epoch": 9.53, "learning_rate": 1.3215563853779112e-06, "loss": 0.2991, "step": 26370, "task_loss": 0.5861034989356995 }, { "compression_loss": 0.0, "distillation_loss": 0.34587642550468445, "epoch": 9.53, "learning_rate": 1.3016338927404047e-06, "loss": 0.3595, "step": 26380, "task_loss": 0.5939334034919739 }, { "compression_loss": 0.0, "distillation_loss": 0.28393590450286865, "epoch": 9.54, "learning_rate": 1.2818593785288645e-06, "loss": 0.3235, "step": 26390, "task_loss": 0.6647424697875977 }, { "compression_loss": 0.0, "distillation_loss": 0.35412973165512085, "epoch": 9.54, "learning_rate": 1.262232944707321e-06, "loss": 0.3574, "step": 26400, "task_loss": 0.7059704065322876 }, { "compression_loss": 0.0, "distillation_loss": 0.22645823657512665, "epoch": 9.54, "learning_rate": 1.2427546924762823e-06, "loss": 0.2852, "step": 26410, "task_loss": 0.5531166791915894 }, { "compression_loss": 0.0, "distillation_loss": 0.2105254828929901, "epoch": 9.55, "learning_rate": 1.2234247222721573e-06, "loss": 0.2749, "step": 26420, "task_loss": 0.22269278764724731 }, { "compression_loss": 0.0, "distillation_loss": 0.39202356338500977, "epoch": 9.55, "learning_rate": 1.2042431337667704e-06, "loss": 0.3171, "step": 26430, "task_loss": 0.6872595548629761 }, { "compression_loss": 0.0, "distillation_loss": 0.2564923167228699, "epoch": 9.56, "learning_rate": 1.1852100258668507e-06, "loss": 0.3454, "step": 26440, "task_loss": 0.50974440574646 }, { "compression_loss": 0.0, "distillation_loss": 0.3733943998813629, "epoch": 9.56, "learning_rate": 1.1663254967134973e-06, "loss": 0.3358, "step": 26450, "task_loss": 0.4759913980960846 }, { "compression_loss": 0.0, "distillation_loss": 0.34948068857192993, "epoch": 9.56, "learning_rate": 1.1475896436816947e-06, "loss": 0.3324, "step": 26460, "task_loss": 0.8849142789840698 }, { "compression_loss": 0.0, "distillation_loss": 0.4483783543109894, "epoch": 9.57, "learning_rate": 1.1290025633797973e-06, "loss": 0.3227, "step": 26470, "task_loss": 1.083753228187561 }, { "compression_loss": 0.0, "distillation_loss": 0.3329492211341858, "epoch": 9.57, "learning_rate": 1.1105643516490438e-06, "loss": 0.2947, "step": 26480, "task_loss": 0.70804762840271 }, { "compression_loss": 0.0, "distillation_loss": 0.3900536298751831, "epoch": 9.57, "learning_rate": 1.0922751035630595e-06, "loss": 0.3534, "step": 26490, "task_loss": 0.8057717084884644 }, { "compression_loss": 0.0, "distillation_loss": 0.38479679822921753, "epoch": 9.58, "learning_rate": 1.0741349134273448e-06, "loss": 0.324, "step": 26500, "task_loss": 0.5944019556045532 }, { "epoch": 9.58, "eval_exact_match": 83.19772942289498, "eval_f1": 89.94716840298422, "step": 26500 }, { "compression_loss": 0.0, "distillation_loss": 0.31899452209472656, "epoch": 9.58, "learning_rate": 1.0561438747788377e-06, "loss": 0.3297, "step": 26510, "task_loss": 0.6771483421325684 }, { "compression_loss": 0.0, "distillation_loss": 0.2978406250476837, "epoch": 9.58, "learning_rate": 1.0383020803853682e-06, "loss": 0.3061, "step": 26520, "task_loss": 0.3691001534461975 }, { "compression_loss": 0.0, "distillation_loss": 0.20832061767578125, "epoch": 9.59, "learning_rate": 1.0206096222452321e-06, "loss": 0.3142, "step": 26530, "task_loss": 0.4117594361305237 }, { "compression_loss": 0.0, "distillation_loss": 0.3379862904548645, "epoch": 9.59, "learning_rate": 1.0030665915866944e-06, "loss": 0.3164, "step": 26540, "task_loss": 0.4368290305137634 }, { "compression_loss": 0.0, "distillation_loss": 0.24347561597824097, "epoch": 9.6, "learning_rate": 9.856730788675228e-07, "loss": 0.3003, "step": 26550, "task_loss": 0.31799739599227905 }, { "compression_loss": 0.0, "distillation_loss": 0.2570602595806122, "epoch": 9.6, "learning_rate": 9.68429173774512e-07, "loss": 0.3272, "step": 26560, "task_loss": 0.6868414282798767 }, { "compression_loss": 0.0, "distillation_loss": 0.29512378573417664, "epoch": 9.6, "learning_rate": 9.513349652230407e-07, "loss": 0.3032, "step": 26570, "task_loss": 0.7735044956207275 }, { "compression_loss": 0.0, "distillation_loss": 0.216871976852417, "epoch": 9.61, "learning_rate": 9.343905413565878e-07, "loss": 0.307, "step": 26580, "task_loss": 0.2056950479745865 }, { "compression_loss": 0.0, "distillation_loss": 0.2929142713546753, "epoch": 9.61, "learning_rate": 9.175959895463138e-07, "loss": 0.3184, "step": 26590, "task_loss": 0.566573977470398 }, { "compression_loss": 0.0, "distillation_loss": 0.3073238134384155, "epoch": 9.61, "learning_rate": 9.009513963905602e-07, "loss": 0.3183, "step": 26600, "task_loss": 0.6305901408195496 }, { "compression_loss": 0.0, "distillation_loss": 0.2523428201675415, "epoch": 9.62, "learning_rate": 8.844568477144644e-07, "loss": 0.3194, "step": 26610, "task_loss": 0.4756616950035095 }, { "compression_loss": 0.0, "distillation_loss": 0.40382206439971924, "epoch": 9.62, "learning_rate": 8.681124285694486e-07, "loss": 0.3168, "step": 26620, "task_loss": 0.5824015736579895 }, { "compression_loss": 0.0, "distillation_loss": 0.25531330704689026, "epoch": 9.62, "learning_rate": 8.519182232328415e-07, "loss": 0.3202, "step": 26630, "task_loss": 0.5715607404708862 }, { "compression_loss": 0.0, "distillation_loss": 0.3254801630973816, "epoch": 9.63, "learning_rate": 8.358743152074111e-07, "loss": 0.3626, "step": 26640, "task_loss": 0.4549102187156677 }, { "compression_loss": 0.0, "distillation_loss": 0.25074678659439087, "epoch": 9.63, "learning_rate": 8.199807872209452e-07, "loss": 0.3284, "step": 26650, "task_loss": 0.33278608322143555 }, { "compression_loss": 0.0, "distillation_loss": 0.23102590441703796, "epoch": 9.63, "learning_rate": 8.042377212258123e-07, "loss": 0.2935, "step": 26660, "task_loss": 0.5513738989830017 }, { "compression_loss": 0.0, "distillation_loss": 0.47359585762023926, "epoch": 9.64, "learning_rate": 7.886451983985576e-07, "loss": 0.308, "step": 26670, "task_loss": 0.6858689785003662 }, { "compression_loss": 0.0, "distillation_loss": 0.29826098680496216, "epoch": 9.64, "learning_rate": 7.73203299139471e-07, "loss": 0.3315, "step": 26680, "task_loss": 0.3836401104927063 }, { "compression_loss": 0.0, "distillation_loss": 0.3404310345649719, "epoch": 9.65, "learning_rate": 7.579121030721837e-07, "loss": 0.3221, "step": 26690, "task_loss": 0.625718355178833 }, { "compression_loss": 0.0, "distillation_loss": 0.27862346172332764, "epoch": 9.65, "learning_rate": 7.427716890432346e-07, "loss": 0.2999, "step": 26700, "task_loss": 0.26387932896614075 }, { "compression_loss": 0.0, "distillation_loss": 0.30959299206733704, "epoch": 9.65, "learning_rate": 7.277821351216984e-07, "loss": 0.3153, "step": 26710, "task_loss": 0.7208259105682373 }, { "compression_loss": 0.0, "distillation_loss": 0.3370169401168823, "epoch": 9.66, "learning_rate": 7.129435185987487e-07, "loss": 0.2994, "step": 26720, "task_loss": 0.43289870023727417 }, { "compression_loss": 0.0, "distillation_loss": 0.36385589838027954, "epoch": 9.66, "learning_rate": 6.982559159872881e-07, "loss": 0.328, "step": 26730, "task_loss": 0.5257211923599243 }, { "compression_loss": 0.0, "distillation_loss": 0.2223806381225586, "epoch": 9.66, "learning_rate": 6.837194030215288e-07, "loss": 0.3035, "step": 26740, "task_loss": 0.39960020780563354 }, { "compression_loss": 0.0, "distillation_loss": 0.3810596168041229, "epoch": 9.67, "learning_rate": 6.693340546566263e-07, "loss": 0.3517, "step": 26750, "task_loss": 0.6381791830062866 }, { "epoch": 9.67, "eval_exact_match": 83.3112582781457, "eval_f1": 90.07123919343461, "step": 26750 }, { "compression_loss": 0.0, "distillation_loss": 0.2689059376716614, "epoch": 9.67, "learning_rate": 6.550999450682693e-07, "loss": 0.2918, "step": 26760, "task_loss": 0.48646509647369385 }, { "compression_loss": 0.0, "distillation_loss": 0.22755245864391327, "epoch": 9.67, "learning_rate": 6.410171476523141e-07, "loss": 0.2787, "step": 26770, "task_loss": 0.27000099420547485 }, { "compression_loss": 0.0, "distillation_loss": 0.2955330014228821, "epoch": 9.68, "learning_rate": 6.270857350243974e-07, "loss": 0.3318, "step": 26780, "task_loss": 0.44846802949905396 }, { "compression_loss": 0.0, "distillation_loss": 0.3734288513660431, "epoch": 9.68, "learning_rate": 6.133057790195773e-07, "loss": 0.3282, "step": 26790, "task_loss": 0.5023435950279236 }, { "compression_loss": 0.0, "distillation_loss": 0.3733587861061096, "epoch": 9.69, "learning_rate": 5.996773506919262e-07, "loss": 0.3225, "step": 26800, "task_loss": 0.8738093972206116 }, { "compression_loss": 0.0, "distillation_loss": 0.3161894977092743, "epoch": 9.69, "learning_rate": 5.862005203142151e-07, "loss": 0.3023, "step": 26810, "task_loss": 0.5742534399032593 }, { "compression_loss": 0.0, "distillation_loss": 0.2755066156387329, "epoch": 9.69, "learning_rate": 5.728753573775069e-07, "loss": 0.2993, "step": 26820, "task_loss": 0.5461592078208923 }, { "compression_loss": 0.0, "distillation_loss": 0.28466445207595825, "epoch": 9.7, "learning_rate": 5.597019305908235e-07, "loss": 0.283, "step": 26830, "task_loss": 0.4959646463394165 }, { "compression_loss": 0.0, "distillation_loss": 0.2563675343990326, "epoch": 9.7, "learning_rate": 5.466803078807859e-07, "loss": 0.3423, "step": 26840, "task_loss": 0.4334827959537506 }, { "compression_loss": 0.0, "distillation_loss": 0.26834434270858765, "epoch": 9.7, "learning_rate": 5.33810556391261e-07, "loss": 0.2885, "step": 26850, "task_loss": 0.8743736147880554 }, { "compression_loss": 0.0, "distillation_loss": 0.2733617424964905, "epoch": 9.71, "learning_rate": 5.210927424830092e-07, "loss": 0.2869, "step": 26860, "task_loss": 0.5635610222816467 }, { "compression_loss": 0.0, "distillation_loss": 0.3465721309185028, "epoch": 9.71, "learning_rate": 5.085269317333574e-07, "loss": 0.2863, "step": 26870, "task_loss": 0.5172914266586304 }, { "compression_loss": 0.0, "distillation_loss": 0.24393369257450104, "epoch": 9.71, "learning_rate": 4.961131889358528e-07, "loss": 0.2913, "step": 26880, "task_loss": 0.544761598110199 }, { "compression_loss": 0.0, "distillation_loss": 0.304984450340271, "epoch": 9.72, "learning_rate": 4.838515780999264e-07, "loss": 0.3326, "step": 26890, "task_loss": 0.49490851163864136 }, { "compression_loss": 0.0, "distillation_loss": 0.31869620084762573, "epoch": 9.72, "learning_rate": 4.717421624505669e-07, "loss": 0.308, "step": 26900, "task_loss": 0.4404585361480713 }, { "compression_loss": 0.0, "distillation_loss": 0.26529282331466675, "epoch": 9.73, "learning_rate": 4.597850044279972e-07, "loss": 0.3086, "step": 26910, "task_loss": 0.6393947601318359 }, { "compression_loss": 0.0, "distillation_loss": 0.3640422821044922, "epoch": 9.73, "learning_rate": 4.4798016568733837e-07, "loss": 0.3077, "step": 26920, "task_loss": 0.7868601083755493 }, { "compression_loss": 0.0, "distillation_loss": 0.2870105504989624, "epoch": 9.73, "learning_rate": 4.3632770709831293e-07, "loss": 0.3128, "step": 26930, "task_loss": 0.6231682300567627 }, { "compression_loss": 0.0, "distillation_loss": 0.2648237347602844, "epoch": 9.74, "learning_rate": 4.248276887449154e-07, "loss": 0.2981, "step": 26940, "task_loss": 0.6860414147377014 }, { "compression_loss": 0.0, "distillation_loss": 0.30803900957107544, "epoch": 9.74, "learning_rate": 4.1348016992510895e-07, "loss": 0.3192, "step": 26950, "task_loss": 0.6218051910400391 }, { "compression_loss": 0.0, "distillation_loss": 0.30648428201675415, "epoch": 9.74, "learning_rate": 4.0228520915050915e-07, "loss": 0.319, "step": 26960, "task_loss": 0.6169790625572205 }, { "compression_loss": 0.0, "distillation_loss": 0.29523447155952454, "epoch": 9.75, "learning_rate": 3.912428641461041e-07, "loss": 0.3132, "step": 26970, "task_loss": 0.9747434854507446 }, { "compression_loss": 0.0, "distillation_loss": 0.23645155131816864, "epoch": 9.75, "learning_rate": 3.8035319184993813e-07, "loss": 0.2862, "step": 26980, "task_loss": 0.47767388820648193 }, { "compression_loss": 0.0, "distillation_loss": 0.30601510405540466, "epoch": 9.75, "learning_rate": 3.6961624841282516e-07, "loss": 0.3058, "step": 26990, "task_loss": 0.3921341598033905 }, { "compression_loss": 0.0, "distillation_loss": 0.3483054041862488, "epoch": 9.76, "learning_rate": 3.590320891980492e-07, "loss": 0.3432, "step": 27000, "task_loss": 0.7122894525527954 }, { "epoch": 9.76, "eval_exact_match": 83.19772942289498, "eval_f1": 89.93300652023898, "step": 27000 }, { "compression_loss": 0.0, "distillation_loss": 0.32802069187164307, "epoch": 9.76, "learning_rate": 3.4860076878110103e-07, "loss": 0.2999, "step": 27010, "task_loss": 0.7228002548217773 }, { "compression_loss": 0.0, "distillation_loss": 0.286615788936615, "epoch": 9.77, "learning_rate": 3.383223409493719e-07, "loss": 0.3082, "step": 27020, "task_loss": 0.4433107376098633 }, { "compression_loss": 0.0, "distillation_loss": 0.3046417832374573, "epoch": 9.77, "learning_rate": 3.281968587018902e-07, "loss": 0.3324, "step": 27030, "task_loss": 0.54556804895401 }, { "compression_loss": 0.0, "distillation_loss": 0.271384596824646, "epoch": 9.77, "learning_rate": 3.1822437424905536e-07, "loss": 0.3071, "step": 27040, "task_loss": 0.44324588775634766 }, { "compression_loss": 0.0, "distillation_loss": 0.28623712062835693, "epoch": 9.78, "learning_rate": 3.084049390123478e-07, "loss": 0.3245, "step": 27050, "task_loss": 0.7106075882911682 }, { "compression_loss": 0.0, "distillation_loss": 0.31551676988601685, "epoch": 9.78, "learning_rate": 2.9873860362407244e-07, "loss": 0.3025, "step": 27060, "task_loss": 0.47528547048568726 }, { "compression_loss": 0.0, "distillation_loss": 0.28199514746665955, "epoch": 9.78, "learning_rate": 2.892254179271059e-07, "loss": 0.3273, "step": 27070, "task_loss": 0.40891680121421814 }, { "compression_loss": 0.0, "distillation_loss": 0.3044975996017456, "epoch": 9.79, "learning_rate": 2.798654309746396e-07, "loss": 0.2912, "step": 27080, "task_loss": 0.4443657398223877 }, { "compression_loss": 0.0, "distillation_loss": 0.25702059268951416, "epoch": 9.79, "learning_rate": 2.706586910299069e-07, "loss": 0.2979, "step": 27090, "task_loss": 0.4479978084564209 }, { "compression_loss": 0.0, "distillation_loss": 0.2585090100765228, "epoch": 9.79, "learning_rate": 2.616052455659568e-07, "loss": 0.3239, "step": 27100, "task_loss": 0.38295847177505493 }, { "compression_loss": 0.0, "distillation_loss": 0.3074435293674469, "epoch": 9.8, "learning_rate": 2.5270514126540025e-07, "loss": 0.2986, "step": 27110, "task_loss": 0.5449491739273071 }, { "compression_loss": 0.0, "distillation_loss": 0.3268190026283264, "epoch": 9.8, "learning_rate": 2.4395842402016756e-07, "loss": 0.3059, "step": 27120, "task_loss": 0.49111929535865784 }, { "compression_loss": 0.0, "distillation_loss": 0.2732863426208496, "epoch": 9.8, "learning_rate": 2.3536513893127166e-07, "loss": 0.302, "step": 27130, "task_loss": 0.46516484022140503 }, { "compression_loss": 0.0, "distillation_loss": 0.3503320813179016, "epoch": 9.81, "learning_rate": 2.2692533030857832e-07, "loss": 0.3421, "step": 27140, "task_loss": 0.8032338619232178 }, { "compression_loss": 0.0, "distillation_loss": 0.4571889638900757, "epoch": 9.81, "learning_rate": 2.1863904167058634e-07, "loss": 0.3358, "step": 27150, "task_loss": 0.7194252014160156 }, { "compression_loss": 0.0, "distillation_loss": 0.28522342443466187, "epoch": 9.82, "learning_rate": 2.1050631574418112e-07, "loss": 0.3302, "step": 27160, "task_loss": 0.594832181930542 }, { "compression_loss": 0.0, "distillation_loss": 0.2757059335708618, "epoch": 9.82, "learning_rate": 2.0252719446443135e-07, "loss": 0.3168, "step": 27170, "task_loss": 0.7021136283874512 }, { "compression_loss": 0.0, "distillation_loss": 0.35544660687446594, "epoch": 9.82, "learning_rate": 1.9470171897437273e-07, "loss": 0.2886, "step": 27180, "task_loss": 0.5380356907844543 }, { "compression_loss": 0.0, "distillation_loss": 0.26072177290916443, "epoch": 9.83, "learning_rate": 1.8702992962478792e-07, "loss": 0.3091, "step": 27190, "task_loss": 0.5083783864974976 }, { "compression_loss": 0.0, "distillation_loss": 0.2878473997116089, "epoch": 9.83, "learning_rate": 1.7951186597399693e-07, "loss": 0.3031, "step": 27200, "task_loss": 0.4856301248073578 }, { "compression_loss": 0.0, "distillation_loss": 0.3149654269218445, "epoch": 9.83, "learning_rate": 1.7214756678767042e-07, "loss": 0.3054, "step": 27210, "task_loss": 0.7618269324302673 }, { "compression_loss": 0.0, "distillation_loss": 0.292486310005188, "epoch": 9.84, "learning_rate": 1.649370700386099e-07, "loss": 0.2823, "step": 27220, "task_loss": 0.2056034803390503 }, { "compression_loss": 0.0, "distillation_loss": 0.30289211869239807, "epoch": 9.84, "learning_rate": 1.578804129065614e-07, "loss": 0.3203, "step": 27230, "task_loss": 0.478914737701416 }, { "compression_loss": 0.0, "distillation_loss": 0.248458594083786, "epoch": 9.84, "learning_rate": 1.5097763177802205e-07, "loss": 0.3146, "step": 27240, "task_loss": 0.2979784309864044 }, { "compression_loss": 0.0, "distillation_loss": 0.34328991174697876, "epoch": 9.85, "learning_rate": 1.4422876224605365e-07, "loss": 0.335, "step": 27250, "task_loss": 0.7470317482948303 }, { "epoch": 9.85, "eval_exact_match": 83.09366130558183, "eval_f1": 89.97577505817762, "step": 27250 }, { "compression_loss": 0.0, "distillation_loss": 0.3134426176548004, "epoch": 9.85, "learning_rate": 1.3763383911009287e-07, "loss": 0.3113, "step": 27260, "task_loss": 0.5885034799575806 }, { "compression_loss": 0.0, "distillation_loss": 0.3346371352672577, "epoch": 9.86, "learning_rate": 1.3119289637578135e-07, "loss": 0.3353, "step": 27270, "task_loss": 0.4166927635669708 }, { "compression_loss": 0.0, "distillation_loss": 0.3193387985229492, "epoch": 9.86, "learning_rate": 1.249059672547892e-07, "loss": 0.3067, "step": 27280, "task_loss": 0.41660696268081665 }, { "compression_loss": 0.0, "distillation_loss": 0.2841346859931946, "epoch": 9.86, "learning_rate": 1.1877308416463506e-07, "loss": 0.2894, "step": 27290, "task_loss": 0.28571581840515137 }, { "compression_loss": 0.0, "distillation_loss": 0.2764800786972046, "epoch": 9.87, "learning_rate": 1.1279427872852965e-07, "loss": 0.3167, "step": 27300, "task_loss": 0.3838335871696472 }, { "compression_loss": 0.0, "distillation_loss": 0.3414124846458435, "epoch": 9.87, "learning_rate": 1.0696958177519922e-07, "loss": 0.3141, "step": 27310, "task_loss": 0.6372711658477783 }, { "compression_loss": 0.0, "distillation_loss": 0.23128125071525574, "epoch": 9.87, "learning_rate": 1.0129902333874563e-07, "loss": 0.2906, "step": 27320, "task_loss": 0.5092746019363403 }, { "compression_loss": 0.0, "distillation_loss": 0.4284539818763733, "epoch": 9.88, "learning_rate": 9.578263265846655e-08, "loss": 0.328, "step": 27330, "task_loss": 0.3266478180885315 }, { "compression_loss": 0.0, "distillation_loss": 0.2824637293815613, "epoch": 9.88, "learning_rate": 9.042043817873547e-08, "loss": 0.3189, "step": 27340, "task_loss": 0.4688670337200165 }, { "compression_loss": 0.0, "distillation_loss": 0.40793055295944214, "epoch": 9.88, "learning_rate": 8.521246754882528e-08, "loss": 0.3287, "step": 27350, "task_loss": 0.45810216665267944 }, { "compression_loss": 0.0, "distillation_loss": 0.24485069513320923, "epoch": 9.89, "learning_rate": 8.01587476227883e-08, "loss": 0.3213, "step": 27360, "task_loss": 0.4046506881713867 }, { "compression_loss": 0.0, "distillation_loss": 0.2961767911911011, "epoch": 9.89, "learning_rate": 7.525930445929974e-08, "loss": 0.3041, "step": 27370, "task_loss": 1.140902042388916 }, { "compression_loss": 0.0, "distillation_loss": 0.2372909039258957, "epoch": 9.9, "learning_rate": 7.051416332153781e-08, "loss": 0.2999, "step": 27380, "task_loss": 0.7308323979377747 }, { "compression_loss": 0.0, "distillation_loss": 0.34356093406677246, "epoch": 9.9, "learning_rate": 6.592334867704719e-08, "loss": 0.3112, "step": 27390, "task_loss": 0.5305260419845581 }, { "compression_loss": 0.0, "distillation_loss": 0.23644784092903137, "epoch": 9.9, "learning_rate": 6.148688419760906e-08, "loss": 0.314, "step": 27400, "task_loss": 0.43768665194511414 }, { "compression_loss": 0.0, "distillation_loss": 0.29776376485824585, "epoch": 9.91, "learning_rate": 5.7204792759127936e-08, "loss": 0.3048, "step": 27410, "task_loss": 0.7872896194458008 }, { "compression_loss": 0.0, "distillation_loss": 0.3038090467453003, "epoch": 9.91, "learning_rate": 5.307709644150505e-08, "loss": 0.2959, "step": 27420, "task_loss": 0.8730282783508301 }, { "compression_loss": 0.0, "distillation_loss": 0.3143189549446106, "epoch": 9.91, "learning_rate": 4.910381652853513e-08, "loss": 0.3617, "step": 27430, "task_loss": 0.34668901562690735 }, { "compression_loss": 0.0, "distillation_loss": 0.2636554539203644, "epoch": 9.92, "learning_rate": 4.528497350777983e-08, "loss": 0.3384, "step": 27440, "task_loss": 0.6265454292297363 }, { "compression_loss": 0.0, "distillation_loss": 0.32342687249183655, "epoch": 9.92, "learning_rate": 4.162058707048444e-08, "loss": 0.3056, "step": 27450, "task_loss": 0.8568477630615234 }, { "compression_loss": 0.0, "distillation_loss": 0.3245423436164856, "epoch": 9.92, "learning_rate": 3.8110676111451357e-08, "loss": 0.3082, "step": 27460, "task_loss": 0.5641300678253174 }, { "compression_loss": 0.0, "distillation_loss": 0.25845956802368164, "epoch": 9.93, "learning_rate": 3.4755258728963455e-08, "loss": 0.2974, "step": 27470, "task_loss": 0.5490345358848572 }, { "compression_loss": 0.0, "distillation_loss": 0.2910792827606201, "epoch": 9.93, "learning_rate": 3.155435222468417e-08, "loss": 0.3161, "step": 27480, "task_loss": 0.5065259337425232 }, { "compression_loss": 0.0, "distillation_loss": 0.20717984437942505, "epoch": 9.93, "learning_rate": 2.8507973103560903e-08, "loss": 0.3401, "step": 27490, "task_loss": 0.6428301334381104 }, { "compression_loss": 0.0, "distillation_loss": 0.27313390374183655, "epoch": 9.94, "learning_rate": 2.5616137073748436e-08, "loss": 0.3013, "step": 27500, "task_loss": 0.7006524801254272 }, { "epoch": 9.94, "eval_exact_match": 83.30179754020814, "eval_f1": 90.00136438136511, "step": 27500 }, { "compression_loss": 0.0, "distillation_loss": 0.4248490631580353, "epoch": 9.94, "learning_rate": 2.2878859046525648e-08, "loss": 0.3218, "step": 27510, "task_loss": 0.45274466276168823 }, { "compression_loss": 0.0, "distillation_loss": 0.2887372672557831, "epoch": 9.95, "learning_rate": 2.029615313622224e-08, "loss": 0.3149, "step": 27520, "task_loss": 0.3250759541988373 }, { "compression_loss": 0.0, "distillation_loss": 0.3086904287338257, "epoch": 9.95, "learning_rate": 1.786803266013548e-08, "loss": 0.3138, "step": 27530, "task_loss": 0.5649579167366028 }, { "compression_loss": 0.0, "distillation_loss": 0.27589845657348633, "epoch": 9.95, "learning_rate": 1.559451013847024e-08, "loss": 0.3084, "step": 27540, "task_loss": 0.6957834362983704 }, { "compression_loss": 0.0, "distillation_loss": 0.2580502927303314, "epoch": 9.96, "learning_rate": 1.3475597294275722e-08, "loss": 0.2937, "step": 27550, "task_loss": 0.3605492115020752 }, { "compression_loss": 0.0, "distillation_loss": 0.3019343316555023, "epoch": 9.96, "learning_rate": 1.1511305053375498e-08, "loss": 0.3341, "step": 27560, "task_loss": 0.6453779935836792 }, { "compression_loss": 0.0, "distillation_loss": 0.22303266823291779, "epoch": 9.96, "learning_rate": 9.701643544327565e-09, "loss": 0.2955, "step": 27570, "task_loss": 0.48411405086517334 }, { "compression_loss": 0.0, "distillation_loss": 0.29931584000587463, "epoch": 9.97, "learning_rate": 8.046622098347723e-09, "loss": 0.3183, "step": 27580, "task_loss": 0.4647744596004486 }, { "compression_loss": 0.0, "distillation_loss": 0.30010122060775757, "epoch": 9.97, "learning_rate": 6.5462492492829316e-09, "loss": 0.3054, "step": 27590, "task_loss": 0.8768385648727417 }, { "compression_loss": 0.0, "distillation_loss": 0.23750996589660645, "epoch": 9.97, "learning_rate": 5.2005327335680196e-09, "loss": 0.3119, "step": 27600, "task_loss": 0.33597642183303833 }, { "compression_loss": 0.0, "distillation_loss": 0.2556949853897095, "epoch": 9.98, "learning_rate": 4.0094794901623934e-09, "loss": 0.3085, "step": 27610, "task_loss": 0.19064223766326904 }, { "compression_loss": 0.0, "distillation_loss": 0.27123573422431946, "epoch": 9.98, "learning_rate": 2.973095660540048e-09, "loss": 0.2973, "step": 27620, "task_loss": 0.4306481182575226 }, { "compression_loss": 0.0, "distillation_loss": 0.30214256048202515, "epoch": 9.99, "learning_rate": 2.0913865886462714e-09, "loss": 0.3515, "step": 27630, "task_loss": 0.6253249645233154 }, { "compression_loss": 0.0, "distillation_loss": 0.25861015915870667, "epoch": 9.99, "learning_rate": 1.364356820864332e-09, "loss": 0.3419, "step": 27640, "task_loss": 0.4915951192378998 }, { "compression_loss": 0.0, "distillation_loss": 0.2867693603038788, "epoch": 9.99, "learning_rate": 7.920101060088225e-10, "loss": 0.3529, "step": 27650, "task_loss": 0.38751643896102905 }, { "compression_loss": 0.0, "distillation_loss": 0.2829661965370178, "epoch": 10.0, "learning_rate": 3.743493952890198e-10, "loss": 0.2897, "step": 27660, "task_loss": 0.3813509941101074 }, { "compression_loss": 0.0, "distillation_loss": 0.1939045637845993, "epoch": 10.0, "learning_rate": 1.1137684230888567e-10, "loss": 0.3115, "step": 27670, "task_loss": 0.2901547849178314 }, { "compression_loss": 0.0, "distillation_loss": 0.2560853064060211, "epoch": 10.0, "learning_rate": 3.0938030370908365e-12, "loss": 0.3379, "step": 27680, "task_loss": 0.46912720799446106 }, { "compression_loss": 0.0, "distillation_loss": 0.2885116934776306, "epoch": 10.01, "learning_rate": 5.9999950499164185e-05, "loss": 0.3083, "step": 27690, "task_loss": 0.39502573013305664 }, { "compression_loss": 0.0, "distillation_loss": 0.3206736445426941, "epoch": 10.01, "learning_rate": 5.999974940229864e-05, "loss": 0.3122, "step": 27700, "task_loss": 0.4612738788127899 }, { "compression_loss": 0.0, "distillation_loss": 0.33336344361305237, "epoch": 10.01, "learning_rate": 5.999939361663725e-05, "loss": 0.3242, "step": 27710, "task_loss": 0.4723026752471924 }, { "compression_loss": 0.0, "distillation_loss": 0.3171226382255554, "epoch": 10.02, "learning_rate": 5.999888314401456e-05, "loss": 0.3177, "step": 27720, "task_loss": 0.48637786507606506 }, { "compression_loss": 0.0, "distillation_loss": 0.26646995544433594, "epoch": 10.02, "learning_rate": 5.9998217987062765e-05, "loss": 0.3087, "step": 27730, "task_loss": 0.5959435701370239 }, { "compression_loss": 0.0, "distillation_loss": 0.35628658533096313, "epoch": 10.03, "learning_rate": 5.999739814921161e-05, "loss": 0.3672, "step": 27740, "task_loss": 0.9028927683830261 }, { "compression_loss": 0.0, "distillation_loss": 0.35965442657470703, "epoch": 10.03, "learning_rate": 5.9996423634688465e-05, "loss": 0.3404, "step": 27750, "task_loss": 0.8516654968261719 }, { "epoch": 10.03, "eval_exact_match": 82.55439924314096, "eval_f1": 89.54112319609712, "step": 27750 }, { "compression_loss": 0.0, "distillation_loss": 0.2923398017883301, "epoch": 10.03, "learning_rate": 5.999529444851826e-05, "loss": 0.3326, "step": 27760, "task_loss": 0.4506998658180237 }, { "compression_loss": 0.0, "distillation_loss": 0.39279308915138245, "epoch": 10.04, "learning_rate": 5.999401059652345e-05, "loss": 0.3135, "step": 27770, "task_loss": 0.7872811555862427 }, { "compression_loss": 0.0, "distillation_loss": 0.368738055229187, "epoch": 10.04, "learning_rate": 5.999257208532401e-05, "loss": 0.3256, "step": 27780, "task_loss": 0.42682385444641113 }, { "compression_loss": 0.0, "distillation_loss": 0.3173050284385681, "epoch": 10.04, "learning_rate": 5.99909789223374e-05, "loss": 0.3742, "step": 27790, "task_loss": 0.6968157291412354 }, { "compression_loss": 0.0, "distillation_loss": 0.3928011655807495, "epoch": 10.05, "learning_rate": 5.998923111577847e-05, "loss": 0.3723, "step": 27800, "task_loss": 0.42035573720932007 }, { "compression_loss": 0.0, "distillation_loss": 0.3175928294658661, "epoch": 10.05, "learning_rate": 5.998732867465954e-05, "loss": 0.3445, "step": 27810, "task_loss": 0.7033043503761292 }, { "compression_loss": 0.0, "distillation_loss": 0.28419843316078186, "epoch": 10.05, "learning_rate": 5.998527160879021e-05, "loss": 0.3125, "step": 27820, "task_loss": 0.340492844581604 }, { "compression_loss": 0.0, "distillation_loss": 0.3261581063270569, "epoch": 10.06, "learning_rate": 5.998305992877741e-05, "loss": 0.3205, "step": 27830, "task_loss": 0.5372412204742432 }, { "compression_loss": 0.0, "distillation_loss": 0.36411145329475403, "epoch": 10.06, "learning_rate": 5.99806936460253e-05, "loss": 0.3273, "step": 27840, "task_loss": 0.5985209345817566 }, { "compression_loss": 0.0, "distillation_loss": 0.38092899322509766, "epoch": 10.07, "learning_rate": 5.997817277273525e-05, "loss": 0.3682, "step": 27850, "task_loss": 0.6099511384963989 }, { "compression_loss": 0.0, "distillation_loss": 0.2801337242126465, "epoch": 10.07, "learning_rate": 5.997549732190571e-05, "loss": 0.3201, "step": 27860, "task_loss": 0.6733810901641846 }, { "compression_loss": 0.0, "distillation_loss": 0.3363659381866455, "epoch": 10.07, "learning_rate": 5.997266730733221e-05, "loss": 0.3784, "step": 27870, "task_loss": 0.6295250654220581 }, { "compression_loss": 0.0, "distillation_loss": 0.45163094997406006, "epoch": 10.08, "learning_rate": 5.996968274360725e-05, "loss": 0.366, "step": 27880, "task_loss": 0.9575765132904053 }, { "compression_loss": 0.0, "distillation_loss": 0.3112249970436096, "epoch": 10.08, "learning_rate": 5.996654364612026e-05, "loss": 0.3643, "step": 27890, "task_loss": 0.45321786403656006 }, { "compression_loss": 0.0, "distillation_loss": 0.32966169714927673, "epoch": 10.08, "learning_rate": 5.996325003105747e-05, "loss": 0.3322, "step": 27900, "task_loss": 0.330741822719574 }, { "compression_loss": 0.0, "distillation_loss": 0.3450155556201935, "epoch": 10.09, "learning_rate": 5.9959801915401866e-05, "loss": 0.3285, "step": 27910, "task_loss": 0.3840063810348511 }, { "compression_loss": 0.0, "distillation_loss": 0.4525710344314575, "epoch": 10.09, "learning_rate": 5.99561993169331e-05, "loss": 0.3503, "step": 27920, "task_loss": 0.7056595087051392 }, { "compression_loss": 0.0, "distillation_loss": 0.36409515142440796, "epoch": 10.09, "learning_rate": 5.995244225422738e-05, "loss": 0.347, "step": 27930, "task_loss": 0.591515302658081 }, { "compression_loss": 0.0, "distillation_loss": 0.4213048815727234, "epoch": 10.1, "learning_rate": 5.994853074665739e-05, "loss": 0.3146, "step": 27940, "task_loss": 0.4202615022659302 }, { "compression_loss": 0.0, "distillation_loss": 0.38822439312934875, "epoch": 10.1, "learning_rate": 5.9944464814392157e-05, "loss": 0.3537, "step": 27950, "task_loss": 0.646776556968689 }, { "compression_loss": 0.0, "distillation_loss": 0.38637375831604004, "epoch": 10.1, "learning_rate": 5.9940244478397005e-05, "loss": 0.3575, "step": 27960, "task_loss": 0.4372355341911316 }, { "compression_loss": 0.0, "distillation_loss": 0.3227386474609375, "epoch": 10.11, "learning_rate": 5.9935869760433415e-05, "loss": 0.3577, "step": 27970, "task_loss": 0.44878262281417847 }, { "compression_loss": 0.0, "distillation_loss": 0.322928786277771, "epoch": 10.11, "learning_rate": 5.99313406830589e-05, "loss": 0.3376, "step": 27980, "task_loss": 0.7455002665519714 }, { "compression_loss": 0.0, "distillation_loss": 0.40133416652679443, "epoch": 10.12, "learning_rate": 5.99266572696269e-05, "loss": 0.3633, "step": 27990, "task_loss": 0.8214153051376343 }, { "compression_loss": 0.0, "distillation_loss": 0.31198346614837646, "epoch": 10.12, "learning_rate": 5.992181954428667e-05, "loss": 0.3622, "step": 28000, "task_loss": 0.37712812423706055 }, { "epoch": 10.12, "eval_exact_match": 82.68684957426679, "eval_f1": 89.56443195465796, "step": 28000 }, { "compression_loss": 0.0, "distillation_loss": 0.24220602214336395, "epoch": 10.12, "learning_rate": 5.9916827531983165e-05, "loss": 0.3767, "step": 28010, "task_loss": 0.3110200762748718 }, { "compression_loss": 0.0, "distillation_loss": 0.24042615294456482, "epoch": 10.13, "learning_rate": 5.991168125845685e-05, "loss": 0.3189, "step": 28020, "task_loss": 0.5726457238197327 }, { "compression_loss": 0.0, "distillation_loss": 0.37541890144348145, "epoch": 10.13, "learning_rate": 5.990638075024366e-05, "loss": 0.3511, "step": 28030, "task_loss": 0.4889417290687561 }, { "compression_loss": 0.0, "distillation_loss": 0.28294020891189575, "epoch": 10.13, "learning_rate": 5.990092603467481e-05, "loss": 0.3717, "step": 28040, "task_loss": 0.3334283232688904 }, { "compression_loss": 0.0, "distillation_loss": 0.3576716184616089, "epoch": 10.14, "learning_rate": 5.989531713987662e-05, "loss": 0.3557, "step": 28050, "task_loss": 0.8734741806983948 }, { "compression_loss": 0.0, "distillation_loss": 0.4389265179634094, "epoch": 10.14, "learning_rate": 5.988955409477045e-05, "loss": 0.3551, "step": 28060, "task_loss": 0.5746753215789795 }, { "compression_loss": 0.0, "distillation_loss": 0.4083833694458008, "epoch": 10.14, "learning_rate": 5.98836369290725e-05, "loss": 0.3335, "step": 28070, "task_loss": 0.6252074241638184 }, { "compression_loss": 0.0, "distillation_loss": 0.4334869384765625, "epoch": 10.15, "learning_rate": 5.987756567329367e-05, "loss": 0.3735, "step": 28080, "task_loss": 0.5796908140182495 }, { "compression_loss": 0.0, "distillation_loss": 0.2732439339160919, "epoch": 10.15, "learning_rate": 5.9871340358739385e-05, "loss": 0.3554, "step": 28090, "task_loss": 0.26658836007118225 }, { "compression_loss": 0.0, "distillation_loss": 0.3145688772201538, "epoch": 10.16, "learning_rate": 5.9864961017509476e-05, "loss": 0.3682, "step": 28100, "task_loss": 0.4686429500579834 }, { "compression_loss": 0.0, "distillation_loss": 0.29603311419487, "epoch": 10.16, "learning_rate": 5.985842768249795e-05, "loss": 0.3878, "step": 28110, "task_loss": 0.2740791440010071 }, { "compression_loss": 0.0, "distillation_loss": 0.4538814425468445, "epoch": 10.16, "learning_rate": 5.98517403873929e-05, "loss": 0.3946, "step": 28120, "task_loss": 0.6446802616119385 }, { "compression_loss": 0.0, "distillation_loss": 0.3252262473106384, "epoch": 10.17, "learning_rate": 5.984489916667626e-05, "loss": 0.3301, "step": 28130, "task_loss": 0.6380620002746582 }, { "compression_loss": 0.0, "distillation_loss": 0.3742297887802124, "epoch": 10.17, "learning_rate": 5.983790405562367e-05, "loss": 0.3774, "step": 28140, "task_loss": 0.592261016368866 }, { "compression_loss": 0.0, "distillation_loss": 0.38454627990722656, "epoch": 10.17, "learning_rate": 5.983075509030426e-05, "loss": 0.3663, "step": 28150, "task_loss": 0.7711337804794312 }, { "compression_loss": 0.0, "distillation_loss": 0.3592371344566345, "epoch": 10.18, "learning_rate": 5.982345230758051e-05, "loss": 0.3388, "step": 28160, "task_loss": 0.875689685344696 }, { "compression_loss": 0.0, "distillation_loss": 0.29503804445266724, "epoch": 10.18, "learning_rate": 5.981599574510802e-05, "loss": 0.3679, "step": 28170, "task_loss": 0.408102810382843 }, { "compression_loss": 0.0, "distillation_loss": 0.32741719484329224, "epoch": 10.18, "learning_rate": 5.980838544133534e-05, "loss": 0.3239, "step": 28180, "task_loss": 0.49565500020980835 }, { "compression_loss": 0.0, "distillation_loss": 0.3471035957336426, "epoch": 10.19, "learning_rate": 5.980062143550375e-05, "loss": 0.363, "step": 28190, "task_loss": 0.9158684015274048 }, { "compression_loss": 0.0, "distillation_loss": 0.5277878046035767, "epoch": 10.19, "learning_rate": 5.979270376764708e-05, "loss": 0.3955, "step": 28200, "task_loss": 0.8009511232376099 }, { "compression_loss": 0.0, "distillation_loss": 0.30897581577301025, "epoch": 10.2, "learning_rate": 5.9784632478591475e-05, "loss": 0.3419, "step": 28210, "task_loss": 0.5038349032402039 }, { "compression_loss": 0.0, "distillation_loss": 0.3534333407878876, "epoch": 10.2, "learning_rate": 5.977640760995523e-05, "loss": 0.3718, "step": 28220, "task_loss": 0.5224078297615051 }, { "compression_loss": 0.0, "distillation_loss": 0.3860829472541809, "epoch": 10.2, "learning_rate": 5.9768029204148513e-05, "loss": 0.362, "step": 28230, "task_loss": 0.5800853967666626 }, { "compression_loss": 0.0, "distillation_loss": 0.3779979348182678, "epoch": 10.21, "learning_rate": 5.9759497304373234e-05, "loss": 0.4041, "step": 28240, "task_loss": 1.0521299839019775 }, { "compression_loss": 0.0, "distillation_loss": 0.36808979511260986, "epoch": 10.21, "learning_rate": 5.9750811954622714e-05, "loss": 0.3753, "step": 28250, "task_loss": 0.42708760499954224 }, { "epoch": 10.21, "eval_exact_match": 82.14758751182592, "eval_f1": 89.47133153334977, "step": 28250 }, { "compression_loss": 0.0, "distillation_loss": 0.3896569013595581, "epoch": 10.21, "learning_rate": 5.974197319968153e-05, "loss": 0.3771, "step": 28260, "task_loss": 0.5128267407417297 }, { "compression_loss": 0.0, "distillation_loss": 0.38547414541244507, "epoch": 10.22, "learning_rate": 5.973298108512531e-05, "loss": 0.3917, "step": 28270, "task_loss": 0.6350282430648804 }, { "compression_loss": 0.0, "distillation_loss": 0.2885977029800415, "epoch": 10.22, "learning_rate": 5.972383565732038e-05, "loss": 0.349, "step": 28280, "task_loss": 0.38273125886917114 }, { "compression_loss": 0.0, "distillation_loss": 0.3749898374080658, "epoch": 10.22, "learning_rate": 5.9715473728427166e-05, "loss": 0.3714, "step": 28290, "task_loss": 0.29564017057418823 }, { "compression_loss": 0.0, "distillation_loss": 0.36325937509536743, "epoch": 10.23, "learning_rate": 5.970603713602016e-05, "loss": 0.3571, "step": 28300, "task_loss": 0.5103201866149902 }, { "compression_loss": 0.0, "distillation_loss": 0.4782273471355438, "epoch": 10.23, "learning_rate": 5.9696447369296534e-05, "loss": 0.3782, "step": 28310, "task_loss": 0.811413586139679 }, { "compression_loss": 0.0, "distillation_loss": 0.25456976890563965, "epoch": 10.23, "learning_rate": 5.968670447770433e-05, "loss": 0.3548, "step": 28320, "task_loss": 0.374428927898407 }, { "compression_loss": 0.0, "distillation_loss": 0.33408117294311523, "epoch": 10.24, "learning_rate": 5.96768085114812e-05, "loss": 0.3829, "step": 28330, "task_loss": 0.4150253236293793 }, { "compression_loss": 0.0, "distillation_loss": 0.32442450523376465, "epoch": 10.24, "learning_rate": 5.966675952165403e-05, "loss": 0.3675, "step": 28340, "task_loss": 0.7664307355880737 }, { "compression_loss": 0.0, "distillation_loss": 0.4130185842514038, "epoch": 10.25, "learning_rate": 5.965655756003884e-05, "loss": 0.3821, "step": 28350, "task_loss": 0.812406063079834 }, { "compression_loss": 0.0, "distillation_loss": 0.3772509694099426, "epoch": 10.25, "learning_rate": 5.964620267924034e-05, "loss": 0.3437, "step": 28360, "task_loss": 0.6886706352233887 }, { "compression_loss": 0.0, "distillation_loss": 0.3586800694465637, "epoch": 10.25, "learning_rate": 5.963569493265179e-05, "loss": 0.3691, "step": 28370, "task_loss": 0.726294994354248 }, { "compression_loss": 0.0, "distillation_loss": 0.33081382513046265, "epoch": 10.26, "learning_rate": 5.9625034374454664e-05, "loss": 0.3461, "step": 28380, "task_loss": 0.8060112595558167 }, { "compression_loss": 0.0, "distillation_loss": 0.39789003133773804, "epoch": 10.26, "learning_rate": 5.961422105961839e-05, "loss": 0.3707, "step": 28390, "task_loss": 0.5250519514083862 }, { "compression_loss": 0.0, "distillation_loss": 0.4983270764350891, "epoch": 10.26, "learning_rate": 5.9603255043900047e-05, "loss": 0.3661, "step": 28400, "task_loss": 0.4081471264362335 }, { "compression_loss": 0.0, "distillation_loss": 0.256054162979126, "epoch": 10.27, "learning_rate": 5.95921363838441e-05, "loss": 0.373, "step": 28410, "task_loss": 0.4245930314064026 }, { "compression_loss": 0.0, "distillation_loss": 0.26962295174598694, "epoch": 10.27, "learning_rate": 5.95808651367821e-05, "loss": 0.3395, "step": 28420, "task_loss": 0.26497402787208557 }, { "compression_loss": 0.0, "distillation_loss": 0.2624947726726532, "epoch": 10.27, "learning_rate": 5.956944136083239e-05, "loss": 0.3957, "step": 28430, "task_loss": 0.25945717096328735 }, { "compression_loss": 0.0, "distillation_loss": 0.34345728158950806, "epoch": 10.28, "learning_rate": 5.95578651148998e-05, "loss": 0.3927, "step": 28440, "task_loss": 0.41743308305740356 }, { "compression_loss": 0.0, "distillation_loss": 0.34728893637657166, "epoch": 10.28, "learning_rate": 5.954613645867534e-05, "loss": 0.3566, "step": 28450, "task_loss": 0.5428339838981628 }, { "compression_loss": 0.0, "distillation_loss": 0.3047831058502197, "epoch": 10.29, "learning_rate": 5.9534255452635915e-05, "loss": 0.386, "step": 28460, "task_loss": 0.3285978436470032 }, { "compression_loss": 0.0, "distillation_loss": 0.44751378893852234, "epoch": 10.29, "learning_rate": 5.9522222158043986e-05, "loss": 0.3781, "step": 28470, "task_loss": 0.32941681146621704 }, { "compression_loss": 0.0, "distillation_loss": 0.3645317554473877, "epoch": 10.29, "learning_rate": 5.9510036636947265e-05, "loss": 0.381, "step": 28480, "task_loss": 0.7505057454109192 }, { "compression_loss": 0.0, "distillation_loss": 0.3114251494407654, "epoch": 10.3, "learning_rate": 5.9497698952178385e-05, "loss": 0.3428, "step": 28490, "task_loss": 0.47057342529296875 }, { "compression_loss": 0.0, "distillation_loss": 0.3543356657028198, "epoch": 10.3, "learning_rate": 5.948520916735462e-05, "loss": 0.4157, "step": 28500, "task_loss": 0.4492913484573364 }, { "epoch": 10.3, "eval_exact_match": 82.74361400189214, "eval_f1": 89.51669420473074, "step": 28500 }, { "compression_loss": 0.0, "distillation_loss": 0.3564971089363098, "epoch": 10.3, "learning_rate": 5.9472567346877475e-05, "loss": 0.3685, "step": 28510, "task_loss": 0.3423532545566559 }, { "compression_loss": 0.0, "distillation_loss": 0.28164559602737427, "epoch": 10.31, "learning_rate": 5.9459773555932445e-05, "loss": 0.3434, "step": 28520, "task_loss": 0.7573295831680298 }, { "compression_loss": 0.0, "distillation_loss": 0.24590784311294556, "epoch": 10.31, "learning_rate": 5.9446827860488616e-05, "loss": 0.3907, "step": 28530, "task_loss": 0.570406436920166 }, { "compression_loss": 0.0, "distillation_loss": 0.34086698293685913, "epoch": 10.31, "learning_rate": 5.943373032729835e-05, "loss": 0.3792, "step": 28540, "task_loss": 0.24719828367233276 }, { "compression_loss": 0.0, "distillation_loss": 0.37996360659599304, "epoch": 10.32, "learning_rate": 5.942048102389691e-05, "loss": 0.3728, "step": 28550, "task_loss": 0.6601479053497314 }, { "compression_loss": 0.0, "distillation_loss": 0.2580659091472626, "epoch": 10.32, "learning_rate": 5.940708001860219e-05, "loss": 0.3737, "step": 28560, "task_loss": 0.340899795293808 }, { "compression_loss": 0.0, "distillation_loss": 0.43038126826286316, "epoch": 10.33, "learning_rate": 5.9393527380514265e-05, "loss": 0.3748, "step": 28570, "task_loss": 0.684032678604126 }, { "compression_loss": 0.0, "distillation_loss": 0.30722588300704956, "epoch": 10.33, "learning_rate": 5.937982317951509e-05, "loss": 0.3499, "step": 28580, "task_loss": 0.2970711290836334 }, { "compression_loss": 0.0, "distillation_loss": 0.38105618953704834, "epoch": 10.33, "learning_rate": 5.936596748626814e-05, "loss": 0.3456, "step": 28590, "task_loss": 0.3015686273574829 }, { "compression_loss": 0.0, "distillation_loss": 0.4224506616592407, "epoch": 10.34, "learning_rate": 5.935196037221803e-05, "loss": 0.3923, "step": 28600, "task_loss": 0.6144254207611084 }, { "compression_loss": 0.0, "distillation_loss": 0.2678179442882538, "epoch": 10.34, "learning_rate": 5.933780190959015e-05, "loss": 0.3617, "step": 28610, "task_loss": 0.404130220413208 }, { "compression_loss": 0.0, "distillation_loss": 0.40702712535858154, "epoch": 10.34, "learning_rate": 5.932349217139029e-05, "loss": 0.3927, "step": 28620, "task_loss": 0.5161640048027039 }, { "compression_loss": 0.0, "distillation_loss": 0.39445358514785767, "epoch": 10.35, "learning_rate": 5.930903123140427e-05, "loss": 0.3783, "step": 28630, "task_loss": 0.7414616346359253 }, { "compression_loss": 0.0, "distillation_loss": 0.30149519443511963, "epoch": 10.35, "learning_rate": 5.9294419164197565e-05, "loss": 0.3494, "step": 28640, "task_loss": 0.5813414454460144 }, { "compression_loss": 0.0, "distillation_loss": 0.43674802780151367, "epoch": 10.35, "learning_rate": 5.927965604511491e-05, "loss": 0.3455, "step": 28650, "task_loss": 0.9160444736480713 }, { "compression_loss": 0.0, "distillation_loss": 0.4207167625427246, "epoch": 10.36, "learning_rate": 5.9264741950279895e-05, "loss": 0.4298, "step": 28660, "task_loss": 0.6424660682678223 }, { "compression_loss": 0.0, "distillation_loss": 0.38436251878738403, "epoch": 10.36, "learning_rate": 5.924967695659463e-05, "loss": 0.3763, "step": 28670, "task_loss": 0.8374863862991333 }, { "compression_loss": 0.0, "distillation_loss": 0.25715458393096924, "epoch": 10.37, "learning_rate": 5.923446114173928e-05, "loss": 0.3494, "step": 28680, "task_loss": 0.34484949707984924 }, { "compression_loss": 0.0, "distillation_loss": 0.3070823550224304, "epoch": 10.37, "learning_rate": 5.9219094584171695e-05, "loss": 0.3655, "step": 28690, "task_loss": 0.45581191778182983 }, { "compression_loss": 0.0, "distillation_loss": 0.392423152923584, "epoch": 10.37, "learning_rate": 5.9203577363127016e-05, "loss": 0.3727, "step": 28700, "task_loss": 0.5320243835449219 }, { "compression_loss": 0.0, "distillation_loss": 0.44943130016326904, "epoch": 10.38, "learning_rate": 5.918790955861727e-05, "loss": 0.3844, "step": 28710, "task_loss": 0.5158875584602356 }, { "compression_loss": 0.0, "distillation_loss": 0.36688247323036194, "epoch": 10.38, "learning_rate": 5.91720912514309e-05, "loss": 0.3352, "step": 28720, "task_loss": 0.4710213243961334 }, { "compression_loss": 0.0, "distillation_loss": 0.30681973695755005, "epoch": 10.38, "learning_rate": 5.915612252313243e-05, "loss": 0.3831, "step": 28730, "task_loss": 0.39453649520874023 }, { "compression_loss": 0.0, "distillation_loss": 0.3728558421134949, "epoch": 10.39, "learning_rate": 5.914000345606199e-05, "loss": 0.4136, "step": 28740, "task_loss": 0.4394037425518036 }, { "compression_loss": 0.0, "distillation_loss": 0.388075053691864, "epoch": 10.39, "learning_rate": 5.9123734133334916e-05, "loss": 0.3804, "step": 28750, "task_loss": 0.9567576050758362 }, { "epoch": 10.39, "eval_exact_match": 82.3841059602649, "eval_f1": 89.3657178878086, "step": 28750 }, { "compression_loss": 0.0, "distillation_loss": 0.4076917767524719, "epoch": 10.39, "learning_rate": 5.91073146388413e-05, "loss": 0.3567, "step": 28760, "task_loss": 0.8818804025650024 }, { "compression_loss": 0.0, "distillation_loss": 0.3763560950756073, "epoch": 10.4, "learning_rate": 5.909074505724557e-05, "loss": 0.3752, "step": 28770, "task_loss": 0.5184281468391418 }, { "compression_loss": 0.0, "distillation_loss": 0.41304516792297363, "epoch": 10.4, "learning_rate": 5.907402547398609e-05, "loss": 0.3807, "step": 28780, "task_loss": 0.6674418449401855 }, { "compression_loss": 0.0, "distillation_loss": 0.5048227906227112, "epoch": 10.4, "learning_rate": 5.905715597527462e-05, "loss": 0.3916, "step": 28790, "task_loss": 1.2223567962646484 }, { "compression_loss": 0.0, "distillation_loss": 0.45865944027900696, "epoch": 10.41, "learning_rate": 5.9040136648095986e-05, "loss": 0.3883, "step": 28800, "task_loss": 0.4730258882045746 }, { "compression_loss": 0.0, "distillation_loss": 0.3826584219932556, "epoch": 10.41, "learning_rate": 5.902296758020755e-05, "loss": 0.4247, "step": 28810, "task_loss": 0.3842507004737854 }, { "compression_loss": 0.0, "distillation_loss": 0.3068469762802124, "epoch": 10.42, "learning_rate": 5.900564886013882e-05, "loss": 0.3572, "step": 28820, "task_loss": 0.348866730928421 }, { "compression_loss": 0.0, "distillation_loss": 0.30432772636413574, "epoch": 10.42, "learning_rate": 5.8988180577190914e-05, "loss": 0.3867, "step": 28830, "task_loss": 0.32357197999954224 }, { "compression_loss": 0.0, "distillation_loss": 0.25718772411346436, "epoch": 10.42, "learning_rate": 5.8970562821436184e-05, "loss": 0.327, "step": 28840, "task_loss": 0.38030368089675903 }, { "compression_loss": 0.0, "distillation_loss": 0.3453293442726135, "epoch": 10.43, "learning_rate": 5.895279568371772e-05, "loss": 0.336, "step": 28850, "task_loss": 0.5080013275146484 }, { "compression_loss": 0.0, "distillation_loss": 0.35937240719795227, "epoch": 10.43, "learning_rate": 5.8934879255648834e-05, "loss": 0.39, "step": 28860, "task_loss": 0.3022608458995819 }, { "compression_loss": 0.0, "distillation_loss": 0.30319833755493164, "epoch": 10.43, "learning_rate": 5.8916813629612655e-05, "loss": 0.4019, "step": 28870, "task_loss": 0.4994625151157379 }, { "compression_loss": 0.0, "distillation_loss": 0.37237548828125, "epoch": 10.44, "learning_rate": 5.8898598898761656e-05, "loss": 0.4214, "step": 28880, "task_loss": 0.6839677095413208 }, { "compression_loss": 0.0, "distillation_loss": 0.4111763834953308, "epoch": 10.44, "learning_rate": 5.8880235157017074e-05, "loss": 0.4021, "step": 28890, "task_loss": 0.46749764680862427 }, { "compression_loss": 0.0, "distillation_loss": 0.34317219257354736, "epoch": 10.44, "learning_rate": 5.886172249906856e-05, "loss": 0.3653, "step": 28900, "task_loss": 0.7069752216339111 }, { "compression_loss": 0.0, "distillation_loss": 0.47758495807647705, "epoch": 10.45, "learning_rate": 5.8843061020373594e-05, "loss": 0.3454, "step": 28910, "task_loss": 0.5334576964378357 }, { "compression_loss": 0.0, "distillation_loss": 0.3918111324310303, "epoch": 10.45, "learning_rate": 5.882425081715705e-05, "loss": 0.4134, "step": 28920, "task_loss": 0.47633877396583557 }, { "compression_loss": 0.0, "distillation_loss": 0.42572399973869324, "epoch": 10.46, "learning_rate": 5.8805291986410646e-05, "loss": 0.3451, "step": 28930, "task_loss": 0.44892263412475586 }, { "compression_loss": 0.0, "distillation_loss": 0.3907076120376587, "epoch": 10.46, "learning_rate": 5.878618462589249e-05, "loss": 0.417, "step": 28940, "task_loss": 0.8796771168708801 }, { "compression_loss": 0.0, "distillation_loss": 0.49205639958381653, "epoch": 10.46, "learning_rate": 5.876692883412656e-05, "loss": 0.374, "step": 28950, "task_loss": 0.7883171439170837 }, { "compression_loss": 0.0, "distillation_loss": 0.23329684138298035, "epoch": 10.47, "learning_rate": 5.87475247104022e-05, "loss": 0.3493, "step": 28960, "task_loss": 0.2832324504852295 }, { "compression_loss": 0.0, "distillation_loss": 0.4535112977027893, "epoch": 10.47, "learning_rate": 5.872797235477359e-05, "loss": 0.3867, "step": 28970, "task_loss": 0.45803922414779663 }, { "compression_loss": 0.0, "distillation_loss": 0.397172749042511, "epoch": 10.47, "learning_rate": 5.8708271868059255e-05, "loss": 0.3743, "step": 28980, "task_loss": 0.587207019329071 }, { "compression_loss": 0.0, "distillation_loss": 0.33365657925605774, "epoch": 10.48, "learning_rate": 5.868842335184153e-05, "loss": 0.3598, "step": 28990, "task_loss": 0.5650234818458557 }, { "compression_loss": 0.0, "distillation_loss": 0.395450234413147, "epoch": 10.48, "learning_rate": 5.866842690846602e-05, "loss": 0.4087, "step": 29000, "task_loss": 0.5595653057098389 }, { "epoch": 10.48, "eval_exact_match": 82.42194891201514, "eval_f1": 89.48886603082968, "step": 29000 }, { "compression_loss": 0.0, "distillation_loss": 0.3053317666053772, "epoch": 10.48, "learning_rate": 5.864828264104114e-05, "loss": 0.3498, "step": 29010, "task_loss": 0.34997349977493286 }, { "compression_loss": 0.0, "distillation_loss": 0.3626655340194702, "epoch": 10.49, "learning_rate": 5.862799065343749e-05, "loss": 0.3818, "step": 29020, "task_loss": 0.5937673449516296 }, { "compression_loss": 0.0, "distillation_loss": 0.42667001485824585, "epoch": 10.49, "learning_rate": 5.860755105028738e-05, "loss": 0.3867, "step": 29030, "task_loss": 0.9323544502258301 }, { "compression_loss": 0.0, "distillation_loss": 0.42904749512672424, "epoch": 10.5, "learning_rate": 5.858696393698428e-05, "loss": 0.3985, "step": 29040, "task_loss": 0.46694380044937134 }, { "compression_loss": 0.0, "distillation_loss": 0.39898544549942017, "epoch": 10.5, "learning_rate": 5.8566229419682275e-05, "loss": 0.4042, "step": 29050, "task_loss": 0.5771615505218506 }, { "compression_loss": 0.0, "distillation_loss": 0.20150551199913025, "epoch": 10.5, "learning_rate": 5.8545347605295516e-05, "loss": 0.3512, "step": 29060, "task_loss": 0.5410712957382202 }, { "compression_loss": 0.0, "distillation_loss": 0.3072255253791809, "epoch": 10.51, "learning_rate": 5.852431860149763e-05, "loss": 0.3717, "step": 29070, "task_loss": 0.4550820589065552 }, { "compression_loss": 0.0, "distillation_loss": 0.4038892388343811, "epoch": 10.51, "learning_rate": 5.850314251672126e-05, "loss": 0.398, "step": 29080, "task_loss": 0.5738189220428467 }, { "compression_loss": 0.0, "distillation_loss": 0.3132917284965515, "epoch": 10.51, "learning_rate": 5.848181946015741e-05, "loss": 0.3839, "step": 29090, "task_loss": 0.5845606923103333 }, { "compression_loss": 0.0, "distillation_loss": 0.4155728816986084, "epoch": 10.52, "learning_rate": 5.846034954175493e-05, "loss": 0.3925, "step": 29100, "task_loss": 0.6973046660423279 }, { "compression_loss": 0.0, "distillation_loss": 0.31962496042251587, "epoch": 10.52, "learning_rate": 5.8438732872219934e-05, "loss": 0.3677, "step": 29110, "task_loss": 0.6435040235519409 }, { "compression_loss": 0.0, "distillation_loss": 0.34306031465530396, "epoch": 10.52, "learning_rate": 5.8416969563015246e-05, "loss": 0.3737, "step": 29120, "task_loss": 0.42822667956352234 }, { "compression_loss": 0.0, "distillation_loss": 0.25659072399139404, "epoch": 10.53, "learning_rate": 5.83950597263598e-05, "loss": 0.3953, "step": 29130, "task_loss": 0.5244855880737305 }, { "compression_loss": 0.0, "distillation_loss": 0.3888118863105774, "epoch": 10.53, "learning_rate": 5.837300347522809e-05, "loss": 0.4479, "step": 29140, "task_loss": 0.445207417011261 }, { "compression_loss": 0.0, "distillation_loss": 0.3718984127044678, "epoch": 10.53, "learning_rate": 5.8350800923349556e-05, "loss": 0.3726, "step": 29150, "task_loss": 0.8107948303222656 }, { "compression_loss": 0.0, "distillation_loss": 0.3401542007923126, "epoch": 10.54, "learning_rate": 5.8328452185208034e-05, "loss": 0.3725, "step": 29160, "task_loss": 0.29308414459228516 }, { "compression_loss": 0.0, "distillation_loss": 0.3953520655632019, "epoch": 10.54, "learning_rate": 5.830595737604112e-05, "loss": 0.3662, "step": 29170, "task_loss": 0.6135333776473999 }, { "compression_loss": 0.0, "distillation_loss": 0.33224552869796753, "epoch": 10.55, "learning_rate": 5.828331661183961e-05, "loss": 0.4135, "step": 29180, "task_loss": 0.6807444095611572 }, { "compression_loss": 0.0, "distillation_loss": 0.46813124418258667, "epoch": 10.55, "learning_rate": 5.826053000934692e-05, "loss": 0.4053, "step": 29190, "task_loss": 0.48048609495162964 }, { "compression_loss": 0.0, "distillation_loss": 0.3617834448814392, "epoch": 10.55, "learning_rate": 5.8237597686058406e-05, "loss": 0.3684, "step": 29200, "task_loss": 0.684455156326294 }, { "compression_loss": 0.0, "distillation_loss": 0.37988534569740295, "epoch": 10.56, "learning_rate": 5.821451976022086e-05, "loss": 0.3826, "step": 29210, "task_loss": 0.44116660952568054 }, { "compression_loss": 0.0, "distillation_loss": 0.3315032124519348, "epoch": 10.56, "learning_rate": 5.819129635083182e-05, "loss": 0.4355, "step": 29220, "task_loss": 0.8068733215332031 }, { "compression_loss": 0.0, "distillation_loss": 0.32461968064308167, "epoch": 10.56, "learning_rate": 5.8167927577638994e-05, "loss": 0.4045, "step": 29230, "task_loss": 0.49032747745513916 }, { "compression_loss": 0.0, "distillation_loss": 0.4072458744049072, "epoch": 10.57, "learning_rate": 5.8144413561139636e-05, "loss": 0.3736, "step": 29240, "task_loss": 0.5367380976676941 }, { "compression_loss": 0.0, "distillation_loss": 0.4317589998245239, "epoch": 10.57, "learning_rate": 5.812075442257992e-05, "loss": 0.4024, "step": 29250, "task_loss": 0.4210602045059204 }, { "epoch": 10.57, "eval_exact_match": 82.28949858088932, "eval_f1": 89.47220420737969, "step": 29250 }, { "compression_loss": 0.0, "distillation_loss": 0.30034422874450684, "epoch": 10.57, "learning_rate": 5.8096950283954324e-05, "loss": 0.3691, "step": 29260, "task_loss": 0.6281427145004272 }, { "compression_loss": 0.0, "distillation_loss": 0.6280544996261597, "epoch": 10.58, "learning_rate": 5.8073001268004986e-05, "loss": 0.433, "step": 29270, "task_loss": 0.9039506316184998 }, { "compression_loss": 0.0, "distillation_loss": 0.3547070026397705, "epoch": 10.58, "learning_rate": 5.804890749822109e-05, "loss": 0.379, "step": 29280, "task_loss": 0.6923799514770508 }, { "compression_loss": 0.0, "distillation_loss": 0.4179234504699707, "epoch": 10.59, "learning_rate": 5.8024669098838224e-05, "loss": 0.4061, "step": 29290, "task_loss": 0.5038123726844788 }, { "compression_loss": 0.0, "distillation_loss": 0.4669201970100403, "epoch": 10.59, "learning_rate": 5.8000286194837706e-05, "loss": 0.3888, "step": 29300, "task_loss": 0.8348492383956909 }, { "compression_loss": 0.0, "distillation_loss": 0.354667067527771, "epoch": 10.59, "learning_rate": 5.7975758911945995e-05, "loss": 0.4108, "step": 29310, "task_loss": 0.4626341164112091 }, { "compression_loss": 0.0, "distillation_loss": 0.2784455418586731, "epoch": 10.6, "learning_rate": 5.795108737663401e-05, "loss": 0.3752, "step": 29320, "task_loss": 0.5510279536247253 }, { "compression_loss": 0.0, "distillation_loss": 0.32893824577331543, "epoch": 10.6, "learning_rate": 5.79262717161165e-05, "loss": 0.3742, "step": 29330, "task_loss": 0.7631270885467529 }, { "compression_loss": 0.0, "distillation_loss": 0.3505815863609314, "epoch": 10.6, "learning_rate": 5.7901312058351334e-05, "loss": 0.3496, "step": 29340, "task_loss": 0.40224701166152954 }, { "compression_loss": 0.0, "distillation_loss": 0.4556928277015686, "epoch": 10.61, "learning_rate": 5.78762085320389e-05, "loss": 0.3705, "step": 29350, "task_loss": 0.6299753189086914 }, { "compression_loss": 0.0, "distillation_loss": 0.40033918619155884, "epoch": 10.61, "learning_rate": 5.7850961266621434e-05, "loss": 0.3959, "step": 29360, "task_loss": 0.7346347570419312 }, { "compression_loss": 0.0, "distillation_loss": 0.43684858083724976, "epoch": 10.61, "learning_rate": 5.782557039228231e-05, "loss": 0.3916, "step": 29370, "task_loss": 0.645737886428833 }, { "compression_loss": 0.0, "distillation_loss": 0.4691216051578522, "epoch": 10.62, "learning_rate": 5.780003603994543e-05, "loss": 0.4144, "step": 29380, "task_loss": 0.3999781012535095 }, { "compression_loss": 0.0, "distillation_loss": 0.46305355429649353, "epoch": 10.62, "learning_rate": 5.77743583412745e-05, "loss": 0.4086, "step": 29390, "task_loss": 0.7214027643203735 }, { "compression_loss": 0.0, "distillation_loss": 0.3314850926399231, "epoch": 10.63, "learning_rate": 5.774853742867237e-05, "loss": 0.378, "step": 29400, "task_loss": 0.6907333731651306 }, { "compression_loss": 0.0, "distillation_loss": 0.33742809295654297, "epoch": 10.63, "learning_rate": 5.772257343528033e-05, "loss": 0.3629, "step": 29410, "task_loss": 0.6695260405540466 }, { "compression_loss": 0.0, "distillation_loss": 0.36523616313934326, "epoch": 10.63, "learning_rate": 5.7696466494977485e-05, "loss": 0.4139, "step": 29420, "task_loss": 0.6513307094573975 }, { "compression_loss": 0.0, "distillation_loss": 0.4402333199977875, "epoch": 10.64, "learning_rate": 5.767021674237998e-05, "loss": 0.4005, "step": 29430, "task_loss": 0.6502874493598938 }, { "compression_loss": 0.0, "distillation_loss": 0.2928636968135834, "epoch": 10.64, "learning_rate": 5.7643824312840375e-05, "loss": 0.4047, "step": 29440, "task_loss": 0.4543102979660034 }, { "compression_loss": 0.0, "distillation_loss": 0.3670118749141693, "epoch": 10.64, "learning_rate": 5.76172893424469e-05, "loss": 0.412, "step": 29450, "task_loss": 0.467820942401886 }, { "compression_loss": 0.0, "distillation_loss": 0.5537813901901245, "epoch": 10.65, "learning_rate": 5.75906119680228e-05, "loss": 0.3999, "step": 29460, "task_loss": 0.6507628560066223 }, { "compression_loss": 0.0, "distillation_loss": 0.27481332421302795, "epoch": 10.65, "learning_rate": 5.756379232712559e-05, "loss": 0.3434, "step": 29470, "task_loss": 0.5384325385093689 }, { "compression_loss": 0.0, "distillation_loss": 0.38477927446365356, "epoch": 10.65, "learning_rate": 5.753683055804634e-05, "loss": 0.3771, "step": 29480, "task_loss": 0.5689960718154907 }, { "compression_loss": 0.0, "distillation_loss": 0.31757038831710815, "epoch": 10.66, "learning_rate": 5.750972679980902e-05, "loss": 0.4164, "step": 29490, "task_loss": 0.6483100056648254 }, { "compression_loss": 0.0, "distillation_loss": 0.44253355264663696, "epoch": 10.66, "learning_rate": 5.7482481192169704e-05, "loss": 0.376, "step": 29500, "task_loss": 0.624772310256958 }, { "epoch": 10.66, "eval_exact_match": 82.56385998107852, "eval_f1": 89.60784348253303, "step": 29500 }, { "compression_loss": 0.0, "distillation_loss": 0.44806426763534546, "epoch": 10.66, "learning_rate": 5.745509387561591e-05, "loss": 0.3933, "step": 29510, "task_loss": 0.5298267602920532 }, { "compression_loss": 0.0, "distillation_loss": 0.3329993486404419, "epoch": 10.67, "learning_rate": 5.742756499136585e-05, "loss": 0.3558, "step": 29520, "task_loss": 0.5523538589477539 }, { "compression_loss": 0.0, "distillation_loss": 0.3392196297645569, "epoch": 10.67, "learning_rate": 5.739989468136772e-05, "loss": 0.3664, "step": 29530, "task_loss": 0.44542059302330017 }, { "compression_loss": 0.0, "distillation_loss": 0.3531342148780823, "epoch": 10.68, "learning_rate": 5.7372083088298935e-05, "loss": 0.3518, "step": 29540, "task_loss": 0.43197929859161377 }, { "compression_loss": 0.0, "distillation_loss": 0.4389444887638092, "epoch": 10.68, "learning_rate": 5.734413035556541e-05, "loss": 0.3831, "step": 29550, "task_loss": 0.48105964064598083 }, { "compression_loss": 0.0, "distillation_loss": 0.35376453399658203, "epoch": 10.68, "learning_rate": 5.731603662730085e-05, "loss": 0.391, "step": 29560, "task_loss": 0.582084059715271 }, { "compression_loss": 0.0, "distillation_loss": 0.38745567202568054, "epoch": 10.69, "learning_rate": 5.7287802048365964e-05, "loss": 0.3715, "step": 29570, "task_loss": 0.6971489191055298 }, { "compression_loss": 0.0, "distillation_loss": 0.3742871582508087, "epoch": 10.69, "learning_rate": 5.725942676434773e-05, "loss": 0.3688, "step": 29580, "task_loss": 0.3916323482990265 }, { "compression_loss": 0.0, "distillation_loss": 0.5940446853637695, "epoch": 10.69, "learning_rate": 5.723091092155865e-05, "loss": 0.453, "step": 29590, "task_loss": 0.8099455833435059 }, { "compression_loss": 0.0, "distillation_loss": 0.3112245798110962, "epoch": 10.7, "learning_rate": 5.720225466703601e-05, "loss": 0.3629, "step": 29600, "task_loss": 0.631229043006897 }, { "compression_loss": 0.0, "distillation_loss": 0.32478272914886475, "epoch": 10.7, "learning_rate": 5.7173458148541094e-05, "loss": 0.4228, "step": 29610, "task_loss": 0.4904170632362366 }, { "compression_loss": 0.0, "distillation_loss": 0.41761791706085205, "epoch": 10.7, "learning_rate": 5.714452151455841e-05, "loss": 0.3698, "step": 29620, "task_loss": 1.1223629713058472 }, { "compression_loss": 0.0, "distillation_loss": 0.3478744626045227, "epoch": 10.71, "learning_rate": 5.711544491429501e-05, "loss": 0.3702, "step": 29630, "task_loss": 0.19475889205932617 }, { "compression_loss": 0.0, "distillation_loss": 0.41987693309783936, "epoch": 10.71, "learning_rate": 5.708622849767961e-05, "loss": 0.4604, "step": 29640, "task_loss": 0.6271519660949707 }, { "compression_loss": 0.0, "distillation_loss": 0.2809098958969116, "epoch": 10.72, "learning_rate": 5.705687241536186e-05, "loss": 0.4281, "step": 29650, "task_loss": 0.6296674013137817 }, { "compression_loss": 0.0, "distillation_loss": 0.2818308472633362, "epoch": 10.72, "learning_rate": 5.702737681871161e-05, "loss": 0.3584, "step": 29660, "task_loss": 0.39925143122673035 }, { "compression_loss": 0.0, "distillation_loss": 0.35148537158966064, "epoch": 10.72, "learning_rate": 5.699774185981806e-05, "loss": 0.3706, "step": 29670, "task_loss": 0.5052469372749329 }, { "compression_loss": 0.0, "distillation_loss": 0.35409435629844666, "epoch": 10.73, "learning_rate": 5.696796769148905e-05, "loss": 0.418, "step": 29680, "task_loss": 0.5041142702102661 }, { "compression_loss": 0.0, "distillation_loss": 0.3384610414505005, "epoch": 10.73, "learning_rate": 5.693805446725018e-05, "loss": 0.4002, "step": 29690, "task_loss": 0.8012888431549072 }, { "compression_loss": 0.0, "distillation_loss": 0.41421744227409363, "epoch": 10.73, "learning_rate": 5.69080023413441e-05, "loss": 0.398, "step": 29700, "task_loss": 0.4893695116043091 }, { "compression_loss": 0.0, "distillation_loss": 0.30970844626426697, "epoch": 10.74, "learning_rate": 5.687781146872967e-05, "loss": 0.3823, "step": 29710, "task_loss": 0.433634489774704 }, { "compression_loss": 0.0, "distillation_loss": 0.20200911164283752, "epoch": 10.74, "learning_rate": 5.6847482005081207e-05, "loss": 0.3594, "step": 29720, "task_loss": 0.3214665651321411 }, { "compression_loss": 0.0, "distillation_loss": 0.366502583026886, "epoch": 10.74, "learning_rate": 5.681701410678759e-05, "loss": 0.4093, "step": 29730, "task_loss": 0.535211443901062 }, { "compression_loss": 0.0, "distillation_loss": 0.3267229199409485, "epoch": 10.75, "learning_rate": 5.678640793095156e-05, "loss": 0.3556, "step": 29740, "task_loss": 0.5452145934104919 }, { "compression_loss": 0.0, "distillation_loss": 0.39769360423088074, "epoch": 10.75, "learning_rate": 5.6755663635388865e-05, "loss": 0.3696, "step": 29750, "task_loss": 0.32848280668258667 }, { "epoch": 10.75, "eval_exact_match": 82.94228949858089, "eval_f1": 89.87294387331669, "step": 29750 }, { "compression_loss": 0.0, "distillation_loss": 0.3788045048713684, "epoch": 10.76, "learning_rate": 5.6724781378627425e-05, "loss": 0.3952, "step": 29760, "task_loss": 0.6843670606613159 }, { "compression_loss": 0.0, "distillation_loss": 0.46630746126174927, "epoch": 10.76, "learning_rate": 5.669376131990653e-05, "loss": 0.402, "step": 29770, "task_loss": 0.6446393132209778 }, { "compression_loss": 0.0, "distillation_loss": 0.35927873849868774, "epoch": 10.76, "learning_rate": 5.666260361917604e-05, "loss": 0.3987, "step": 29780, "task_loss": 0.5452622771263123 }, { "compression_loss": 0.0, "distillation_loss": 0.44025394320487976, "epoch": 10.77, "learning_rate": 5.6631308437095546e-05, "loss": 0.3841, "step": 29790, "task_loss": 0.5754637122154236 }, { "compression_loss": 0.0, "distillation_loss": 0.3944118916988373, "epoch": 10.77, "learning_rate": 5.6599875935033516e-05, "loss": 0.3838, "step": 29800, "task_loss": 0.34058427810668945 }, { "compression_loss": 0.0, "distillation_loss": 0.45717811584472656, "epoch": 10.77, "learning_rate": 5.656830627506651e-05, "loss": 0.3812, "step": 29810, "task_loss": 0.5898969173431396 }, { "compression_loss": 0.0, "distillation_loss": 0.3032148778438568, "epoch": 10.78, "learning_rate": 5.6536599619978306e-05, "loss": 0.3668, "step": 29820, "task_loss": 0.6649981141090393 }, { "compression_loss": 0.0, "distillation_loss": 0.40282613039016724, "epoch": 10.78, "learning_rate": 5.650475613325907e-05, "loss": 0.3864, "step": 29830, "task_loss": 0.959244966506958 }, { "compression_loss": 0.0, "distillation_loss": 0.25969162583351135, "epoch": 10.78, "learning_rate": 5.647277597910456e-05, "loss": 0.3826, "step": 29840, "task_loss": 0.3896484673023224 }, { "compression_loss": 0.0, "distillation_loss": 0.3874444365501404, "epoch": 10.79, "learning_rate": 5.644065932241517e-05, "loss": 0.3751, "step": 29850, "task_loss": 0.5907013416290283 }, { "compression_loss": 0.0, "distillation_loss": 0.355577677488327, "epoch": 10.79, "learning_rate": 5.6408406328795195e-05, "loss": 0.3604, "step": 29860, "task_loss": 0.539420485496521 }, { "compression_loss": 0.0, "distillation_loss": 0.33990269899368286, "epoch": 10.8, "learning_rate": 5.6376017164551915e-05, "loss": 0.3428, "step": 29870, "task_loss": 0.47408655285835266 }, { "compression_loss": 0.0, "distillation_loss": 0.295065313577652, "epoch": 10.8, "learning_rate": 5.6343491996694754e-05, "loss": 0.4273, "step": 29880, "task_loss": 0.3917389512062073 }, { "compression_loss": 0.0, "distillation_loss": 0.3400767147541046, "epoch": 10.8, "learning_rate": 5.631083099293441e-05, "loss": 0.3782, "step": 29890, "task_loss": 0.29112324118614197 }, { "compression_loss": 0.0, "distillation_loss": 0.30475717782974243, "epoch": 10.81, "learning_rate": 5.627803432168202e-05, "loss": 0.3963, "step": 29900, "task_loss": 0.6054189205169678 }, { "compression_loss": 0.0, "distillation_loss": 0.38359367847442627, "epoch": 10.81, "learning_rate": 5.6245102152048215e-05, "loss": 0.3931, "step": 29910, "task_loss": 0.5336320400238037 }, { "compression_loss": 0.0, "distillation_loss": 0.38667869567871094, "epoch": 10.81, "learning_rate": 5.621203465384236e-05, "loss": 0.3865, "step": 29920, "task_loss": 0.47453516721725464 }, { "compression_loss": 0.0, "distillation_loss": 0.2894802391529083, "epoch": 10.82, "learning_rate": 5.617883199757159e-05, "loss": 0.393, "step": 29930, "task_loss": 0.37323397397994995 }, { "compression_loss": 0.0, "distillation_loss": 0.3388747572898865, "epoch": 10.82, "learning_rate": 5.6145494354439964e-05, "loss": 0.3999, "step": 29940, "task_loss": 0.6223235130310059 }, { "compression_loss": 0.0, "distillation_loss": 0.3649522066116333, "epoch": 10.82, "learning_rate": 5.611202189634758e-05, "loss": 0.3712, "step": 29950, "task_loss": 0.5318911075592041 }, { "compression_loss": 0.0, "distillation_loss": 0.46085262298583984, "epoch": 10.83, "learning_rate": 5.6078414795889694e-05, "loss": 0.3899, "step": 29960, "task_loss": 0.5634099245071411 }, { "compression_loss": 0.0, "distillation_loss": 0.4994548559188843, "epoch": 10.83, "learning_rate": 5.604467322635582e-05, "loss": 0.3865, "step": 29970, "task_loss": 0.6656855344772339 }, { "compression_loss": 0.0, "distillation_loss": 0.3881114721298218, "epoch": 10.83, "learning_rate": 5.6010797361728816e-05, "loss": 0.4178, "step": 29980, "task_loss": 0.48665136098861694 }, { "compression_loss": 0.0, "distillation_loss": 0.3745848834514618, "epoch": 10.84, "learning_rate": 5.597678737668405e-05, "loss": 0.3859, "step": 29990, "task_loss": 0.7532287836074829 }, { "compression_loss": 0.0, "distillation_loss": 0.34191638231277466, "epoch": 10.84, "learning_rate": 5.594264344658843e-05, "loss": 0.3605, "step": 30000, "task_loss": 1.0175566673278809 }, { "epoch": 10.84, "eval_exact_match": 82.94228949858089, "eval_f1": 89.72641922575288, "step": 30000 }, { "compression_loss": 0.0, "distillation_loss": 0.3670957684516907, "epoch": 10.85, "learning_rate": 5.5908365747499555e-05, "loss": 0.3526, "step": 30010, "task_loss": 0.5466479659080505 }, { "compression_loss": 0.0, "distillation_loss": 0.3133726716041565, "epoch": 10.85, "learning_rate": 5.587395445616475e-05, "loss": 0.3785, "step": 30020, "task_loss": 0.41326943039894104 }, { "compression_loss": 0.0, "distillation_loss": 0.38958606123924255, "epoch": 10.85, "learning_rate": 5.583940975002023e-05, "loss": 0.3621, "step": 30030, "task_loss": 0.5244191884994507 }, { "compression_loss": 0.0, "distillation_loss": 0.34110361337661743, "epoch": 10.86, "learning_rate": 5.5804731807190085e-05, "loss": 0.3703, "step": 30040, "task_loss": 0.4226912260055542 }, { "compression_loss": 0.0, "distillation_loss": 0.3145621418952942, "epoch": 10.86, "learning_rate": 5.576992080648548e-05, "loss": 0.431, "step": 30050, "task_loss": 0.4115636944770813 }, { "compression_loss": 0.0, "distillation_loss": 0.4243555963039398, "epoch": 10.86, "learning_rate": 5.57349769274036e-05, "loss": 0.3942, "step": 30060, "task_loss": 0.42534300684928894 }, { "compression_loss": 0.0, "distillation_loss": 0.4849332571029663, "epoch": 10.87, "learning_rate": 5.569990035012688e-05, "loss": 0.3818, "step": 30070, "task_loss": 0.6155873537063599 }, { "compression_loss": 0.0, "distillation_loss": 0.19062018394470215, "epoch": 10.87, "learning_rate": 5.566469125552193e-05, "loss": 0.398, "step": 30080, "task_loss": 0.18269407749176025 }, { "compression_loss": 0.0, "distillation_loss": 0.3313145339488983, "epoch": 10.87, "learning_rate": 5.562934982513867e-05, "loss": 0.3657, "step": 30090, "task_loss": 0.5402665138244629 }, { "compression_loss": 0.0, "distillation_loss": 0.4278787076473236, "epoch": 10.88, "learning_rate": 5.559387624120942e-05, "loss": 0.3884, "step": 30100, "task_loss": 0.4952021837234497 }, { "compression_loss": 0.0, "distillation_loss": 0.32850050926208496, "epoch": 10.88, "learning_rate": 5.5558270686647886e-05, "loss": 0.3914, "step": 30110, "task_loss": 0.5060421824455261 }, { "compression_loss": 0.0, "distillation_loss": 0.5403910279273987, "epoch": 10.89, "learning_rate": 5.55225333450483e-05, "loss": 0.3696, "step": 30120, "task_loss": 0.9411796927452087 }, { "compression_loss": 0.0, "distillation_loss": 0.3275948464870453, "epoch": 10.89, "learning_rate": 5.5486664400684396e-05, "loss": 0.4234, "step": 30130, "task_loss": 0.7505064010620117 }, { "compression_loss": 0.0, "distillation_loss": 0.2951309084892273, "epoch": 10.89, "learning_rate": 5.5450664038508526e-05, "loss": 0.403, "step": 30140, "task_loss": 0.41326138377189636 }, { "compression_loss": 0.0, "distillation_loss": 0.5003862977027893, "epoch": 10.9, "learning_rate": 5.541453244415065e-05, "loss": 0.3756, "step": 30150, "task_loss": 0.6409727334976196 }, { "compression_loss": 0.0, "distillation_loss": 0.28051674365997314, "epoch": 10.9, "learning_rate": 5.5378269803917425e-05, "loss": 0.3686, "step": 30160, "task_loss": 0.228929340839386 }, { "compression_loss": 0.0, "distillation_loss": 0.3465709388256073, "epoch": 10.9, "learning_rate": 5.534187630479123e-05, "loss": 0.3693, "step": 30170, "task_loss": 0.365300714969635 }, { "compression_loss": 0.0, "distillation_loss": 0.41735100746154785, "epoch": 10.91, "learning_rate": 5.530535213442916e-05, "loss": 0.3856, "step": 30180, "task_loss": 0.6544727683067322 }, { "compression_loss": 0.0, "distillation_loss": 0.3385262191295624, "epoch": 10.91, "learning_rate": 5.5268697481162134e-05, "loss": 0.3742, "step": 30190, "task_loss": 0.4145801365375519 }, { "compression_loss": 0.0, "distillation_loss": 0.2984004616737366, "epoch": 10.91, "learning_rate": 5.523191253399388e-05, "loss": 0.3715, "step": 30200, "task_loss": 0.7145050764083862 }, { "compression_loss": 0.0, "distillation_loss": 0.3871958255767822, "epoch": 10.92, "learning_rate": 5.519499748259993e-05, "loss": 0.3578, "step": 30210, "task_loss": 0.8310413360595703 }, { "compression_loss": 0.0, "distillation_loss": 0.5353628396987915, "epoch": 10.92, "learning_rate": 5.515795251732672e-05, "loss": 0.4082, "step": 30220, "task_loss": 0.6640851497650146 }, { "compression_loss": 0.0, "distillation_loss": 0.4021126627922058, "epoch": 10.93, "learning_rate": 5.5120777829190543e-05, "loss": 0.3779, "step": 30230, "task_loss": 0.7048424482345581 }, { "compression_loss": 0.0, "distillation_loss": 0.34633779525756836, "epoch": 10.93, "learning_rate": 5.50834736098766e-05, "loss": 0.3843, "step": 30240, "task_loss": 0.9538134336471558 }, { "compression_loss": 0.0, "distillation_loss": 0.3343544006347656, "epoch": 10.93, "learning_rate": 5.5046040051737976e-05, "loss": 0.3904, "step": 30250, "task_loss": 0.4619567394256592 }, { "epoch": 10.93, "eval_exact_match": 82.96121097445601, "eval_f1": 89.80941939893061, "step": 30250 }, { "compression_loss": 0.0, "distillation_loss": 0.453721821308136, "epoch": 10.94, "learning_rate": 5.500847734779469e-05, "loss": 0.4285, "step": 30260, "task_loss": 0.5236670970916748 }, { "compression_loss": 0.0, "distillation_loss": 0.2954332232475281, "epoch": 10.94, "learning_rate": 5.497078569173267e-05, "loss": 0.3821, "step": 30270, "task_loss": 0.24834580719470978 }, { "compression_loss": 0.0, "distillation_loss": 0.3309508264064789, "epoch": 10.94, "learning_rate": 5.4932965277902784e-05, "loss": 0.4257, "step": 30280, "task_loss": 0.7076093554496765 }, { "compression_loss": 0.0, "distillation_loss": 0.39211854338645935, "epoch": 10.95, "learning_rate": 5.4895016301319786e-05, "loss": 0.3832, "step": 30290, "task_loss": 0.6536873579025269 }, { "compression_loss": 0.0, "distillation_loss": 0.3901174068450928, "epoch": 10.95, "learning_rate": 5.4856938957661377e-05, "loss": 0.4079, "step": 30300, "task_loss": 0.6007660627365112 }, { "compression_loss": 0.0, "distillation_loss": 0.4000302851200104, "epoch": 10.95, "learning_rate": 5.481873344326713e-05, "loss": 0.443, "step": 30310, "task_loss": 0.7138227224349976 }, { "compression_loss": 0.0, "distillation_loss": 0.4617716073989868, "epoch": 10.96, "learning_rate": 5.478039995513753e-05, "loss": 0.4003, "step": 30320, "task_loss": 0.7568254470825195 }, { "compression_loss": 0.0, "distillation_loss": 0.4206737279891968, "epoch": 10.96, "learning_rate": 5.474193869093293e-05, "loss": 0.39, "step": 30330, "task_loss": 0.7496936917304993 }, { "compression_loss": 0.0, "distillation_loss": 0.34480810165405273, "epoch": 10.96, "learning_rate": 5.4703349848972554e-05, "loss": 0.3514, "step": 30340, "task_loss": 0.44453567266464233 }, { "compression_loss": 0.0, "distillation_loss": 0.2850736379623413, "epoch": 10.97, "learning_rate": 5.466463362823343e-05, "loss": 0.3435, "step": 30350, "task_loss": 0.6559290289878845 }, { "compression_loss": 0.0, "distillation_loss": 0.3789885342121124, "epoch": 10.97, "learning_rate": 5.4625790228349424e-05, "loss": 0.3775, "step": 30360, "task_loss": 1.417602300643921 }, { "compression_loss": 0.0, "distillation_loss": 0.37427616119384766, "epoch": 10.98, "learning_rate": 5.458681984961015e-05, "loss": 0.4092, "step": 30370, "task_loss": 0.6762957572937012 }, { "compression_loss": 0.0, "distillation_loss": 0.3822694420814514, "epoch": 10.98, "learning_rate": 5.4547722692960005e-05, "loss": 0.373, "step": 30380, "task_loss": 0.40661516785621643 }, { "compression_loss": 0.0, "distillation_loss": 0.28816527128219604, "epoch": 10.98, "learning_rate": 5.450849895999707e-05, "loss": 0.3938, "step": 30390, "task_loss": 0.5435937643051147 }, { "compression_loss": 0.0, "distillation_loss": 0.43996739387512207, "epoch": 10.99, "learning_rate": 5.44691488529721e-05, "loss": 0.4486, "step": 30400, "task_loss": 0.7547143697738647 }, { "compression_loss": 0.0, "distillation_loss": 0.33609914779663086, "epoch": 10.99, "learning_rate": 5.442967257478748e-05, "loss": 0.3759, "step": 30410, "task_loss": 0.41951659321784973 }, { "compression_loss": 0.0, "distillation_loss": 0.34954917430877686, "epoch": 10.99, "learning_rate": 5.4390070328996166e-05, "loss": 0.3761, "step": 30420, "task_loss": 0.5150220990180969 }, { "compression_loss": 0.0, "distillation_loss": 0.40067267417907715, "epoch": 11.0, "learning_rate": 5.435034231980067e-05, "loss": 0.3763, "step": 30430, "task_loss": 0.7470228672027588 }, { "compression_loss": 0.0, "distillation_loss": 0.2949705123901367, "epoch": 11.0, "learning_rate": 5.431048875205195e-05, "loss": 0.4451, "step": 30440, "task_loss": 0.6367085576057434 }, { "compression_loss": 0.0, "distillation_loss": 0.29878896474838257, "epoch": 11.0, "learning_rate": 5.427050983124843e-05, "loss": 0.3842, "step": 30450, "task_loss": 0.4922108054161072 }, { "compression_loss": 0.0, "distillation_loss": 0.44266384840011597, "epoch": 11.01, "learning_rate": 5.423040576353483e-05, "loss": 0.3627, "step": 30460, "task_loss": 0.9143210053443909 }, { "compression_loss": 0.0, "distillation_loss": 0.29100504517555237, "epoch": 11.01, "learning_rate": 5.4190176755701236e-05, "loss": 0.3293, "step": 30470, "task_loss": 0.46341586112976074 }, { "compression_loss": 0.0, "distillation_loss": 0.38211023807525635, "epoch": 11.02, "learning_rate": 5.414982301518194e-05, "loss": 0.349, "step": 30480, "task_loss": 0.6259920001029968 }, { "compression_loss": 0.0, "distillation_loss": 0.28083837032318115, "epoch": 11.02, "learning_rate": 5.410934475005439e-05, "loss": 0.3214, "step": 30490, "task_loss": 0.5484700202941895 }, { "compression_loss": 0.0, "distillation_loss": 0.3642995059490204, "epoch": 11.02, "learning_rate": 5.4068742169038124e-05, "loss": 0.3605, "step": 30500, "task_loss": 0.8151582479476929 }, { "epoch": 11.02, "eval_exact_match": 82.61116367076632, "eval_f1": 89.53006867143404, "step": 30500 }, { "compression_loss": 0.0, "distillation_loss": 0.29346784949302673, "epoch": 11.03, "learning_rate": 5.402801548149372e-05, "loss": 0.3385, "step": 30510, "task_loss": 0.5374734997749329 }, { "compression_loss": 0.0, "distillation_loss": 0.4059075117111206, "epoch": 11.03, "learning_rate": 5.398716489742166e-05, "loss": 0.3405, "step": 30520, "task_loss": 0.6881134510040283 }, { "compression_loss": 0.0, "distillation_loss": 0.36759153008461, "epoch": 11.03, "learning_rate": 5.3946190627461314e-05, "loss": 0.3453, "step": 30530, "task_loss": 0.667883038520813 }, { "compression_loss": 0.0, "distillation_loss": 0.49028345942497253, "epoch": 11.04, "learning_rate": 5.390509288288977e-05, "loss": 0.3501, "step": 30540, "task_loss": 0.9341843128204346 }, { "compression_loss": 0.0, "distillation_loss": 0.25190505385398865, "epoch": 11.04, "learning_rate": 5.386387187562082e-05, "loss": 0.2926, "step": 30550, "task_loss": 0.5309723615646362 }, { "compression_loss": 0.0, "distillation_loss": 0.287572979927063, "epoch": 11.04, "learning_rate": 5.3822527818203865e-05, "loss": 0.3514, "step": 30560, "task_loss": 0.4287169575691223 }, { "compression_loss": 0.0, "distillation_loss": 0.25738686323165894, "epoch": 11.05, "learning_rate": 5.378106092382275e-05, "loss": 0.3649, "step": 30570, "task_loss": 0.6980565786361694 }, { "compression_loss": 0.0, "distillation_loss": 0.2965894043445587, "epoch": 11.05, "learning_rate": 5.3739471406294725e-05, "loss": 0.3187, "step": 30580, "task_loss": 0.43664753437042236 }, { "compression_loss": 0.0, "distillation_loss": 0.3387247323989868, "epoch": 11.06, "learning_rate": 5.369775948006933e-05, "loss": 0.335, "step": 30590, "task_loss": 0.5256164073944092 }, { "compression_loss": 0.0, "distillation_loss": 0.3025720715522766, "epoch": 11.06, "learning_rate": 5.365592536022728e-05, "loss": 0.3239, "step": 30600, "task_loss": 0.5954160094261169 }, { "compression_loss": 0.0, "distillation_loss": 0.23759621381759644, "epoch": 11.06, "learning_rate": 5.361396926247936e-05, "loss": 0.3192, "step": 30610, "task_loss": 0.5290464162826538 }, { "compression_loss": 0.0, "distillation_loss": 0.3050764203071594, "epoch": 11.07, "learning_rate": 5.3571891403165345e-05, "loss": 0.3963, "step": 30620, "task_loss": 0.6192272901535034 }, { "compression_loss": 0.0, "distillation_loss": 0.37906011939048767, "epoch": 11.07, "learning_rate": 5.3529691999252797e-05, "loss": 0.367, "step": 30630, "task_loss": 0.738808274269104 }, { "compression_loss": 0.0, "distillation_loss": 0.28784462809562683, "epoch": 11.07, "learning_rate": 5.348737126833605e-05, "loss": 0.3624, "step": 30640, "task_loss": 0.6045219898223877 }, { "compression_loss": 0.0, "distillation_loss": 0.3597484529018402, "epoch": 11.08, "learning_rate": 5.344492942863501e-05, "loss": 0.3306, "step": 30650, "task_loss": 0.46364304423332214 }, { "compression_loss": 0.0, "distillation_loss": 0.28757065534591675, "epoch": 11.08, "learning_rate": 5.340236669899409e-05, "loss": 0.3167, "step": 30660, "task_loss": 0.5059828162193298 }, { "compression_loss": 0.0, "distillation_loss": 0.4648352861404419, "epoch": 11.08, "learning_rate": 5.336395706279589e-05, "loss": 0.369, "step": 30670, "task_loss": 1.1628811359405518 }, { "compression_loss": 0.0, "distillation_loss": 0.3188636302947998, "epoch": 11.09, "learning_rate": 5.33211652474171e-05, "loss": 0.2976, "step": 30680, "task_loss": 0.8980743885040283 }, { "compression_loss": 0.0, "distillation_loss": 0.39204472303390503, "epoch": 11.09, "learning_rate": 5.327825318026816e-05, "loss": 0.3738, "step": 30690, "task_loss": 0.52778559923172 }, { "compression_loss": 0.0, "distillation_loss": 0.3116000294685364, "epoch": 11.1, "learning_rate": 5.323522108261813e-05, "loss": 0.3696, "step": 30700, "task_loss": 0.4787866771221161 }, { "compression_loss": 0.0, "distillation_loss": 0.32855403423309326, "epoch": 11.1, "learning_rate": 5.319206917635494e-05, "loss": 0.3564, "step": 30710, "task_loss": 0.4829432964324951 }, { "compression_loss": 0.0, "distillation_loss": 0.33017492294311523, "epoch": 11.1, "learning_rate": 5.314879768398437e-05, "loss": 0.3741, "step": 30720, "task_loss": 0.5279051065444946 }, { "compression_loss": 0.0, "distillation_loss": 0.2766365706920624, "epoch": 11.11, "learning_rate": 5.310540682862876e-05, "loss": 0.3449, "step": 30730, "task_loss": 0.4771583378314972 }, { "compression_loss": 0.0, "distillation_loss": 0.4740075469017029, "epoch": 11.11, "learning_rate": 5.306189683402595e-05, "loss": 0.3884, "step": 30740, "task_loss": 0.8812153935432434 }, { "compression_loss": 0.0, "distillation_loss": 0.3995136320590973, "epoch": 11.11, "learning_rate": 5.3018267924528124e-05, "loss": 0.3768, "step": 30750, "task_loss": 0.6199120283126831 }, { "epoch": 11.11, "eval_exact_match": 82.72469252601702, "eval_f1": 89.62041686562053, "step": 30750 }, { "compression_loss": 0.0, "distillation_loss": 0.27484428882598877, "epoch": 11.12, "learning_rate": 5.29745203251006e-05, "loss": 0.3488, "step": 30760, "task_loss": 0.3609499931335449 }, { "compression_loss": 0.0, "distillation_loss": 0.37529778480529785, "epoch": 11.12, "learning_rate": 5.293065426132069e-05, "loss": 0.3489, "step": 30770, "task_loss": 0.41195905208587646 }, { "compression_loss": 0.0, "distillation_loss": 0.35930025577545166, "epoch": 11.12, "learning_rate": 5.28866699593766e-05, "loss": 0.3429, "step": 30780, "task_loss": 0.5321624279022217 }, { "compression_loss": 0.0, "distillation_loss": 0.46943289041519165, "epoch": 11.13, "learning_rate": 5.284256764606617e-05, "loss": 0.3324, "step": 30790, "task_loss": 0.6085718870162964 }, { "compression_loss": 0.0, "distillation_loss": 0.2779424488544464, "epoch": 11.13, "learning_rate": 5.279834754879575e-05, "loss": 0.353, "step": 30800, "task_loss": 0.33586573600769043 }, { "compression_loss": 0.0, "distillation_loss": 0.38808631896972656, "epoch": 11.13, "learning_rate": 5.275400989557905e-05, "loss": 0.346, "step": 30810, "task_loss": 1.155708909034729 }, { "compression_loss": 0.0, "distillation_loss": 0.412808895111084, "epoch": 11.14, "learning_rate": 5.270955491503589e-05, "loss": 0.3386, "step": 30820, "task_loss": 0.46611833572387695 }, { "compression_loss": 0.0, "distillation_loss": 0.2601992189884186, "epoch": 11.14, "learning_rate": 5.266498283639113e-05, "loss": 0.3435, "step": 30830, "task_loss": 0.25323614478111267 }, { "compression_loss": 0.0, "distillation_loss": 0.47554054856300354, "epoch": 11.15, "learning_rate": 5.2620293889473387e-05, "loss": 0.3747, "step": 30840, "task_loss": 0.6405948400497437 }, { "compression_loss": 0.0, "distillation_loss": 0.38526564836502075, "epoch": 11.15, "learning_rate": 5.257548830471388e-05, "loss": 0.3412, "step": 30850, "task_loss": 0.32339832186698914 }, { "compression_loss": 0.0, "distillation_loss": 0.27890390157699585, "epoch": 11.15, "learning_rate": 5.25305663131453e-05, "loss": 0.3417, "step": 30860, "task_loss": 0.5287346243858337 }, { "compression_loss": 0.0, "distillation_loss": 0.3683282136917114, "epoch": 11.16, "learning_rate": 5.2485528146400505e-05, "loss": 0.3392, "step": 30870, "task_loss": 0.29345154762268066 }, { "compression_loss": 0.0, "distillation_loss": 0.40986067056655884, "epoch": 11.16, "learning_rate": 5.244037403671146e-05, "loss": 0.3427, "step": 30880, "task_loss": 0.7018733620643616 }, { "compression_loss": 0.0, "distillation_loss": 0.3679884970188141, "epoch": 11.16, "learning_rate": 5.2395104216907926e-05, "loss": 0.3656, "step": 30890, "task_loss": 0.49413740634918213 }, { "compression_loss": 0.0, "distillation_loss": 0.3055950999259949, "epoch": 11.17, "learning_rate": 5.234971892041632e-05, "loss": 0.3658, "step": 30900, "task_loss": 0.9225125312805176 }, { "compression_loss": 0.0, "distillation_loss": 0.3304484784603119, "epoch": 11.17, "learning_rate": 5.230421838125847e-05, "loss": 0.3447, "step": 30910, "task_loss": 0.45518964529037476 }, { "compression_loss": 0.0, "distillation_loss": 0.44711834192276, "epoch": 11.17, "learning_rate": 5.2263169557434734e-05, "loss": 0.3952, "step": 30920, "task_loss": 0.3594067394733429 }, { "compression_loss": 0.0, "distillation_loss": 0.2703867256641388, "epoch": 11.18, "learning_rate": 5.221745070406853e-05, "loss": 0.3152, "step": 30930, "task_loss": 0.4056801497936249 }, { "compression_loss": 0.0, "distillation_loss": 0.2892513573169708, "epoch": 11.18, "learning_rate": 5.21716172900555e-05, "loss": 0.3801, "step": 30940, "task_loss": 0.38734328746795654 }, { "compression_loss": 0.0, "distillation_loss": 0.5101137161254883, "epoch": 11.19, "learning_rate": 5.2125669551728134e-05, "loss": 0.3908, "step": 30950, "task_loss": 0.9189091920852661 }, { "compression_loss": 0.0, "distillation_loss": 0.2777116894721985, "epoch": 11.19, "learning_rate": 5.207960772600845e-05, "loss": 0.3743, "step": 30960, "task_loss": 0.5410131216049194 }, { "compression_loss": 0.0, "distillation_loss": 0.3328074812889099, "epoch": 11.19, "learning_rate": 5.203343205040667e-05, "loss": 0.3374, "step": 30970, "task_loss": 0.3766447603702545 }, { "compression_loss": 0.0, "distillation_loss": 0.4837460517883301, "epoch": 11.2, "learning_rate": 5.198714276302013e-05, "loss": 0.3569, "step": 30980, "task_loss": 0.7223513722419739 }, { "compression_loss": 0.0, "distillation_loss": 0.29842859506607056, "epoch": 11.2, "learning_rate": 5.194074010253195e-05, "loss": 0.3412, "step": 30990, "task_loss": 0.35386359691619873 }, { "compression_loss": 0.0, "distillation_loss": 0.36791229248046875, "epoch": 11.2, "learning_rate": 5.189422430820986e-05, "loss": 0.3493, "step": 31000, "task_loss": 0.574812650680542 }, { "epoch": 11.2, "eval_exact_match": 82.3841059602649, "eval_f1": 89.51719049738145, "step": 31000 }, { "compression_loss": 0.0, "distillation_loss": 0.32887816429138184, "epoch": 11.21, "learning_rate": 5.1847595619904946e-05, "loss": 0.3514, "step": 31010, "task_loss": 0.3748660683631897 }, { "compression_loss": 0.0, "distillation_loss": 0.2734318971633911, "epoch": 11.21, "learning_rate": 5.180085427805038e-05, "loss": 0.3117, "step": 31020, "task_loss": 0.5383692979812622 }, { "compression_loss": 0.0, "distillation_loss": 0.4769822061061859, "epoch": 11.21, "learning_rate": 5.175400052366024e-05, "loss": 0.3758, "step": 31030, "task_loss": 1.0459678173065186 }, { "compression_loss": 0.0, "distillation_loss": 0.4481627941131592, "epoch": 11.22, "learning_rate": 5.170703459832827e-05, "loss": 0.3415, "step": 31040, "task_loss": 0.712192714214325 }, { "compression_loss": 0.0, "distillation_loss": 0.4470297396183014, "epoch": 11.22, "learning_rate": 5.165995674422654e-05, "loss": 0.3709, "step": 31050, "task_loss": 0.6727391481399536 }, { "compression_loss": 0.0, "distillation_loss": 0.3790176510810852, "epoch": 11.23, "learning_rate": 5.1612767204104316e-05, "loss": 0.3639, "step": 31060, "task_loss": 0.6856481432914734 }, { "compression_loss": 0.0, "distillation_loss": 0.3655795156955719, "epoch": 11.23, "learning_rate": 5.156546622128673e-05, "loss": 0.3633, "step": 31070, "task_loss": 0.962065577507019 }, { "compression_loss": 0.0, "distillation_loss": 0.31923535466194153, "epoch": 11.23, "learning_rate": 5.1518054039673544e-05, "loss": 0.3486, "step": 31080, "task_loss": 0.6942268013954163 }, { "compression_loss": 0.0, "distillation_loss": 0.3935302495956421, "epoch": 11.24, "learning_rate": 5.1470530903737924e-05, "loss": 0.3564, "step": 31090, "task_loss": 0.3999423384666443 }, { "compression_loss": 0.0, "distillation_loss": 0.34719374775886536, "epoch": 11.24, "learning_rate": 5.142289705852514e-05, "loss": 0.3651, "step": 31100, "task_loss": 0.6013666391372681 }, { "compression_loss": 0.0, "distillation_loss": 0.34161674976348877, "epoch": 11.24, "learning_rate": 5.1375152749651315e-05, "loss": 0.3532, "step": 31110, "task_loss": 0.5428796410560608 }, { "compression_loss": 0.0, "distillation_loss": 0.3403222858905792, "epoch": 11.25, "learning_rate": 5.132729822330215e-05, "loss": 0.3699, "step": 31120, "task_loss": 0.34771913290023804 }, { "compression_loss": 0.0, "distillation_loss": 0.27695637941360474, "epoch": 11.25, "learning_rate": 5.1279333726231676e-05, "loss": 0.3289, "step": 31130, "task_loss": 0.3937012553215027 }, { "compression_loss": 0.0, "distillation_loss": 0.3218735456466675, "epoch": 11.25, "learning_rate": 5.123125950576098e-05, "loss": 0.3141, "step": 31140, "task_loss": 0.4355730414390564 }, { "compression_loss": 0.0, "distillation_loss": 0.26944002509117126, "epoch": 11.26, "learning_rate": 5.11830758097769e-05, "loss": 0.3284, "step": 31150, "task_loss": 0.3624120354652405 }, { "compression_loss": 0.0, "distillation_loss": 0.3208332061767578, "epoch": 11.26, "learning_rate": 5.113478288673078e-05, "loss": 0.3409, "step": 31160, "task_loss": 0.4535531997680664 }, { "compression_loss": 0.0, "distillation_loss": 0.25942161679267883, "epoch": 11.26, "learning_rate": 5.108638098563717e-05, "loss": 0.3449, "step": 31170, "task_loss": 0.2241821587085724 }, { "compression_loss": 0.0, "distillation_loss": 0.4324563145637512, "epoch": 11.27, "learning_rate": 5.1037870356072545e-05, "loss": 0.3713, "step": 31180, "task_loss": 0.6393023729324341 }, { "compression_loss": 0.0, "distillation_loss": 0.4508543014526367, "epoch": 11.27, "learning_rate": 5.0989251248174015e-05, "loss": 0.3414, "step": 31190, "task_loss": 0.34567761421203613 }, { "compression_loss": 0.0, "distillation_loss": 0.32019156217575073, "epoch": 11.28, "learning_rate": 5.094052391263807e-05, "loss": 0.3471, "step": 31200, "task_loss": 0.3136403262615204 }, { "compression_loss": 0.0, "distillation_loss": 0.3434525728225708, "epoch": 11.28, "learning_rate": 5.0891688600719215e-05, "loss": 0.3478, "step": 31210, "task_loss": 0.7961330413818359 }, { "compression_loss": 0.0, "distillation_loss": 0.3131732642650604, "epoch": 11.28, "learning_rate": 5.084274556422876e-05, "loss": 0.3376, "step": 31220, "task_loss": 0.5231847167015076 }, { "compression_loss": 0.0, "distillation_loss": 0.37508511543273926, "epoch": 11.29, "learning_rate": 5.0793695055533446e-05, "loss": 0.3595, "step": 31230, "task_loss": 0.5824639797210693 }, { "compression_loss": 0.0, "distillation_loss": 0.43298447132110596, "epoch": 11.29, "learning_rate": 5.074453732755419e-05, "loss": 0.3633, "step": 31240, "task_loss": 0.7249249219894409 }, { "compression_loss": 0.0, "distillation_loss": 0.3672495484352112, "epoch": 11.29, "learning_rate": 5.069527263376478e-05, "loss": 0.3543, "step": 31250, "task_loss": 0.9668033123016357 }, { "epoch": 11.29, "eval_exact_match": 82.45033112582782, "eval_f1": 89.4760442654279, "step": 31250 }, { "compression_loss": 0.0, "distillation_loss": 0.3237718343734741, "epoch": 11.3, "learning_rate": 5.0645901228190525e-05, "loss": 0.3553, "step": 31260, "task_loss": 0.513779878616333 }, { "compression_loss": 0.0, "distillation_loss": 0.36057525873184204, "epoch": 11.3, "learning_rate": 5.059642336540702e-05, "loss": 0.3848, "step": 31270, "task_loss": 0.2133713960647583 }, { "compression_loss": 0.0, "distillation_loss": 0.294028639793396, "epoch": 11.3, "learning_rate": 5.054683930053873e-05, "loss": 0.3499, "step": 31280, "task_loss": 0.3805010914802551 }, { "compression_loss": 0.0, "distillation_loss": 0.24722647666931152, "epoch": 11.31, "learning_rate": 5.0497149289257796e-05, "loss": 0.3333, "step": 31290, "task_loss": 0.3370016813278198 }, { "compression_loss": 0.0, "distillation_loss": 0.4259105324745178, "epoch": 11.31, "learning_rate": 5.044735358778261e-05, "loss": 0.3769, "step": 31300, "task_loss": 0.6986902952194214 }, { "compression_loss": 0.0, "distillation_loss": 0.26834627985954285, "epoch": 11.32, "learning_rate": 5.039745245287655e-05, "loss": 0.3284, "step": 31310, "task_loss": 0.721771240234375 }, { "compression_loss": 0.0, "distillation_loss": 0.3485707640647888, "epoch": 11.32, "learning_rate": 5.0347446141846656e-05, "loss": 0.345, "step": 31320, "task_loss": 0.5485772490501404 }, { "compression_loss": 0.0, "distillation_loss": 0.38015708327293396, "epoch": 11.32, "learning_rate": 5.029733491254229e-05, "loss": 0.3858, "step": 31330, "task_loss": 0.9029421806335449 }, { "compression_loss": 0.0, "distillation_loss": 0.3657987713813782, "epoch": 11.33, "learning_rate": 5.024711902335379e-05, "loss": 0.3339, "step": 31340, "task_loss": 0.5325937271118164 }, { "compression_loss": 0.0, "distillation_loss": 0.28831857442855835, "epoch": 11.33, "learning_rate": 5.019679873321117e-05, "loss": 0.3236, "step": 31350, "task_loss": 0.4304049015045166 }, { "compression_loss": 0.0, "distillation_loss": 0.373060405254364, "epoch": 11.33, "learning_rate": 5.014637430158276e-05, "loss": 0.381, "step": 31360, "task_loss": 0.4949566423892975 }, { "compression_loss": 0.0, "distillation_loss": 0.4010653495788574, "epoch": 11.34, "learning_rate": 5.0095845988473896e-05, "loss": 0.3504, "step": 31370, "task_loss": 0.5861020088195801 }, { "compression_loss": 0.0, "distillation_loss": 0.3670312762260437, "epoch": 11.34, "learning_rate": 5.0045214054425545e-05, "loss": 0.3616, "step": 31380, "task_loss": 0.5232160091400146 }, { "compression_loss": 0.0, "distillation_loss": 0.2984731197357178, "epoch": 11.34, "learning_rate": 4.9994478760512994e-05, "loss": 0.3671, "step": 31390, "task_loss": 0.5723747611045837 }, { "compression_loss": 0.0, "distillation_loss": 0.3843269348144531, "epoch": 11.35, "learning_rate": 4.9943640368344464e-05, "loss": 0.3533, "step": 31400, "task_loss": 0.664061427116394 }, { "compression_loss": 0.0, "distillation_loss": 0.2829972505569458, "epoch": 11.35, "learning_rate": 4.989269914005981e-05, "loss": 0.3357, "step": 31410, "task_loss": 0.541733980178833 }, { "compression_loss": 0.0, "distillation_loss": 0.2264384627342224, "epoch": 11.36, "learning_rate": 4.984165533832913e-05, "loss": 0.3432, "step": 31420, "task_loss": 0.3720897436141968 }, { "compression_loss": 0.0, "distillation_loss": 0.3180156350135803, "epoch": 11.36, "learning_rate": 4.979050922635144e-05, "loss": 0.365, "step": 31430, "task_loss": 0.7879629135131836 }, { "compression_loss": 0.0, "distillation_loss": 0.32890772819519043, "epoch": 11.36, "learning_rate": 4.973926106785329e-05, "loss": 0.3623, "step": 31440, "task_loss": 0.5077107548713684 }, { "compression_loss": 0.0, "distillation_loss": 0.35433271527290344, "epoch": 11.37, "learning_rate": 4.96879111270874e-05, "loss": 0.4012, "step": 31450, "task_loss": 0.822060227394104 }, { "compression_loss": 0.0, "distillation_loss": 0.28777164220809937, "epoch": 11.37, "learning_rate": 4.9636459668831357e-05, "loss": 0.3549, "step": 31460, "task_loss": 0.5379538536071777 }, { "compression_loss": 0.0, "distillation_loss": 0.34426411986351013, "epoch": 11.37, "learning_rate": 4.958490695838617e-05, "loss": 0.3264, "step": 31470, "task_loss": 0.3982798457145691 }, { "compression_loss": 0.0, "distillation_loss": 0.4349164366722107, "epoch": 11.38, "learning_rate": 4.953325326157494e-05, "loss": 0.3671, "step": 31480, "task_loss": 0.3434685468673706 }, { "compression_loss": 0.0, "distillation_loss": 0.33134931325912476, "epoch": 11.38, "learning_rate": 4.948149884474152e-05, "loss": 0.3144, "step": 31490, "task_loss": 0.5727837085723877 }, { "compression_loss": 0.0, "distillation_loss": 0.30214446783065796, "epoch": 11.38, "learning_rate": 4.942964397474906e-05, "loss": 0.3607, "step": 31500, "task_loss": 0.4910222291946411 }, { "epoch": 11.38, "eval_exact_match": 82.57332071901608, "eval_f1": 89.62623525440266, "step": 31500 }, { "compression_loss": 0.0, "distillation_loss": 0.2808883786201477, "epoch": 11.39, "learning_rate": 4.937768891897872e-05, "loss": 0.3645, "step": 31510, "task_loss": 0.4864407777786255 }, { "compression_loss": 0.0, "distillation_loss": 0.3554116487503052, "epoch": 11.39, "learning_rate": 4.9325633945328245e-05, "loss": 0.3451, "step": 31520, "task_loss": 0.6188965439796448 }, { "compression_loss": 0.0, "distillation_loss": 0.31889331340789795, "epoch": 11.4, "learning_rate": 4.927347932221058e-05, "loss": 0.3408, "step": 31530, "task_loss": 0.4872661232948303 }, { "compression_loss": 0.0, "distillation_loss": 0.4375583529472351, "epoch": 11.4, "learning_rate": 4.922122531855249e-05, "loss": 0.3419, "step": 31540, "task_loss": 0.7491167187690735 }, { "compression_loss": 0.0, "distillation_loss": 0.26411664485931396, "epoch": 11.4, "learning_rate": 4.916887220379319e-05, "loss": 0.3321, "step": 31550, "task_loss": 0.3491521179676056 }, { "compression_loss": 0.0, "distillation_loss": 0.2768267095088959, "epoch": 11.41, "learning_rate": 4.911642024788296e-05, "loss": 0.3543, "step": 31560, "task_loss": 0.4296256899833679 }, { "compression_loss": 0.0, "distillation_loss": 0.3377191722393036, "epoch": 11.41, "learning_rate": 4.90638697212817e-05, "loss": 0.3446, "step": 31570, "task_loss": 0.6762540936470032 }, { "compression_loss": 0.0, "distillation_loss": 0.29912063479423523, "epoch": 11.41, "learning_rate": 4.901122089495762e-05, "loss": 0.3145, "step": 31580, "task_loss": 0.6021625995635986 }, { "compression_loss": 0.0, "distillation_loss": 0.41637420654296875, "epoch": 11.42, "learning_rate": 4.895847404038576e-05, "loss": 0.3784, "step": 31590, "task_loss": 0.5565124154090881 }, { "compression_loss": 0.0, "distillation_loss": 0.42575061321258545, "epoch": 11.42, "learning_rate": 4.890562942954664e-05, "loss": 0.3568, "step": 31600, "task_loss": 0.6394255757331848 }, { "compression_loss": 0.0, "distillation_loss": 0.38120606541633606, "epoch": 11.42, "learning_rate": 4.885268733492484e-05, "loss": 0.3373, "step": 31610, "task_loss": 0.5133061408996582 }, { "compression_loss": 0.0, "distillation_loss": 0.29386794567108154, "epoch": 11.43, "learning_rate": 4.87996480295076e-05, "loss": 0.3409, "step": 31620, "task_loss": 0.475729376077652 }, { "compression_loss": 0.0, "distillation_loss": 0.3488690257072449, "epoch": 11.43, "learning_rate": 4.8746511786783434e-05, "loss": 0.3525, "step": 31630, "task_loss": 0.7107182145118713 }, { "compression_loss": 0.0, "distillation_loss": 0.32025083899497986, "epoch": 11.43, "learning_rate": 4.8693278880740654e-05, "loss": 0.3548, "step": 31640, "task_loss": 0.4914471507072449 }, { "compression_loss": 0.0, "distillation_loss": 0.3770049214363098, "epoch": 11.44, "learning_rate": 4.863994958586604e-05, "loss": 0.3559, "step": 31650, "task_loss": 0.6165828704833984 }, { "compression_loss": 0.0, "distillation_loss": 0.26748329401016235, "epoch": 11.44, "learning_rate": 4.858652417714335e-05, "loss": 0.3236, "step": 31660, "task_loss": 0.08374525606632233 }, { "compression_loss": 0.0, "distillation_loss": 0.27789509296417236, "epoch": 11.45, "learning_rate": 4.853300293005198e-05, "loss": 0.3334, "step": 31670, "task_loss": 0.46393516659736633 }, { "compression_loss": 0.0, "distillation_loss": 0.3311939239501953, "epoch": 11.45, "learning_rate": 4.847938612056547e-05, "loss": 0.3449, "step": 31680, "task_loss": 0.7730482816696167 }, { "compression_loss": 0.0, "distillation_loss": 0.395574152469635, "epoch": 11.45, "learning_rate": 4.842567402515011e-05, "loss": 0.3831, "step": 31690, "task_loss": 1.0190998315811157 }, { "compression_loss": 0.0, "distillation_loss": 0.33000457286834717, "epoch": 11.46, "learning_rate": 4.837186692076353e-05, "loss": 0.3674, "step": 31700, "task_loss": 1.0033828020095825 }, { "compression_loss": 0.0, "distillation_loss": 0.29676058888435364, "epoch": 11.46, "learning_rate": 4.831796508485326e-05, "loss": 0.419, "step": 31710, "task_loss": 0.2888988256454468 }, { "compression_loss": 0.0, "distillation_loss": 0.33335405588150024, "epoch": 11.46, "learning_rate": 4.8263968795355294e-05, "loss": 0.3478, "step": 31720, "task_loss": 0.5477027893066406 }, { "compression_loss": 0.0, "distillation_loss": 0.2935606837272644, "epoch": 11.47, "learning_rate": 4.820987833069265e-05, "loss": 0.3257, "step": 31730, "task_loss": 0.8460813164710999 }, { "compression_loss": 0.0, "distillation_loss": 0.355922669172287, "epoch": 11.47, "learning_rate": 4.8155693969773935e-05, "loss": 0.3805, "step": 31740, "task_loss": 0.4853879511356354 }, { "compression_loss": 0.0, "distillation_loss": 0.30375123023986816, "epoch": 11.47, "learning_rate": 4.8101415991991965e-05, "loss": 0.3192, "step": 31750, "task_loss": 0.6044105291366577 }, { "epoch": 11.47, "eval_exact_match": 82.60170293282876, "eval_f1": 89.55296899531102, "step": 31750 }, { "compression_loss": 0.0, "distillation_loss": 0.39717626571655273, "epoch": 11.48, "learning_rate": 4.804704467722223e-05, "loss": 0.3636, "step": 31760, "task_loss": 0.6207161545753479 }, { "compression_loss": 0.0, "distillation_loss": 0.49976447224617004, "epoch": 11.48, "learning_rate": 4.799258030582152e-05, "loss": 0.3847, "step": 31770, "task_loss": 0.6690294742584229 }, { "compression_loss": 0.0, "distillation_loss": 0.3473104238510132, "epoch": 11.49, "learning_rate": 4.793802315862644e-05, "loss": 0.3781, "step": 31780, "task_loss": 0.3822641968727112 }, { "compression_loss": 0.0, "distillation_loss": 0.2778242826461792, "epoch": 11.49, "learning_rate": 4.788337351695199e-05, "loss": 0.3443, "step": 31790, "task_loss": 0.7948096990585327 }, { "compression_loss": 0.0, "distillation_loss": 0.3013157248497009, "epoch": 11.49, "learning_rate": 4.78286316625901e-05, "loss": 0.3315, "step": 31800, "task_loss": 0.8763753175735474 }, { "compression_loss": 0.0, "distillation_loss": 0.28930824995040894, "epoch": 11.5, "learning_rate": 4.777379787780818e-05, "loss": 0.361, "step": 31810, "task_loss": 0.3076488971710205 }, { "compression_loss": 0.0, "distillation_loss": 0.38833189010620117, "epoch": 11.5, "learning_rate": 4.771887244534766e-05, "loss": 0.3049, "step": 31820, "task_loss": 0.661001443862915 }, { "compression_loss": 0.0, "distillation_loss": 0.3576197326183319, "epoch": 11.5, "learning_rate": 4.766385564842255e-05, "loss": 0.3281, "step": 31830, "task_loss": 0.7011573314666748 }, { "compression_loss": 0.0, "distillation_loss": 0.25735771656036377, "epoch": 11.51, "learning_rate": 4.760874777071794e-05, "loss": 0.3408, "step": 31840, "task_loss": 0.612379789352417 }, { "compression_loss": 0.0, "distillation_loss": 0.2917349934577942, "epoch": 11.51, "learning_rate": 4.755354909638858e-05, "loss": 0.3483, "step": 31850, "task_loss": 0.38969987630844116 }, { "compression_loss": 0.0, "distillation_loss": 0.2821897864341736, "epoch": 11.51, "learning_rate": 4.74982599100574e-05, "loss": 0.3279, "step": 31860, "task_loss": 0.46879321336746216 }, { "compression_loss": 0.0, "distillation_loss": 0.38328707218170166, "epoch": 11.52, "learning_rate": 4.744288049681405e-05, "loss": 0.3868, "step": 31870, "task_loss": 0.33758458495140076 }, { "compression_loss": 0.0, "distillation_loss": 0.3531540036201477, "epoch": 11.52, "learning_rate": 4.738741114221338e-05, "loss": 0.3493, "step": 31880, "task_loss": 0.6903041005134583 }, { "compression_loss": 0.0, "distillation_loss": 0.5110961198806763, "epoch": 11.53, "learning_rate": 4.733185213227406e-05, "loss": 0.3551, "step": 31890, "task_loss": 0.7856677770614624 }, { "compression_loss": 0.0, "distillation_loss": 0.320343017578125, "epoch": 11.53, "learning_rate": 4.7276203753477014e-05, "loss": 0.3306, "step": 31900, "task_loss": 0.22347959876060486 }, { "compression_loss": 0.0, "distillation_loss": 0.3419710099697113, "epoch": 11.53, "learning_rate": 4.7220466292763996e-05, "loss": 0.3564, "step": 31910, "task_loss": 0.6967019438743591 }, { "compression_loss": 0.0, "distillation_loss": 0.3632848560810089, "epoch": 11.54, "learning_rate": 4.7164640037536104e-05, "loss": 0.3353, "step": 31920, "task_loss": 0.6160529851913452 }, { "compression_loss": 0.0, "distillation_loss": 0.36113396286964417, "epoch": 11.54, "learning_rate": 4.7108725275652284e-05, "loss": 0.3068, "step": 31930, "task_loss": 0.5054973363876343 }, { "compression_loss": 0.0, "distillation_loss": 0.315176784992218, "epoch": 11.54, "learning_rate": 4.7052722295427844e-05, "loss": 0.3345, "step": 31940, "task_loss": 0.4096016585826874 }, { "compression_loss": 0.0, "distillation_loss": 0.27597475051879883, "epoch": 11.55, "learning_rate": 4.6996631385632974e-05, "loss": 0.3571, "step": 31950, "task_loss": 0.5770153999328613 }, { "compression_loss": 0.0, "distillation_loss": 0.3908612132072449, "epoch": 11.55, "learning_rate": 4.6940452835491274e-05, "loss": 0.3732, "step": 31960, "task_loss": 0.758826732635498 }, { "compression_loss": 0.0, "distillation_loss": 0.2762240171432495, "epoch": 11.55, "learning_rate": 4.688418693467824e-05, "loss": 0.3733, "step": 31970, "task_loss": 0.26867347955703735 }, { "compression_loss": 0.0, "distillation_loss": 0.4848494529724121, "epoch": 11.56, "learning_rate": 4.682783397331978e-05, "loss": 0.3885, "step": 31980, "task_loss": 0.517472505569458 }, { "compression_loss": 0.0, "distillation_loss": 0.3382634222507477, "epoch": 11.56, "learning_rate": 4.677139424199072e-05, "loss": 0.3462, "step": 31990, "task_loss": 0.5025910139083862 }, { "compression_loss": 0.0, "distillation_loss": 0.28954753279685974, "epoch": 11.56, "learning_rate": 4.671486803171327e-05, "loss": 0.376, "step": 32000, "task_loss": 0.3863941431045532 }, { "epoch": 11.56, "eval_exact_match": 82.68684957426679, "eval_f1": 89.58686709901265, "step": 32000 }, { "compression_loss": 0.0, "distillation_loss": 0.34954577684402466, "epoch": 11.57, "learning_rate": 4.665825563395559e-05, "loss": 0.3468, "step": 32010, "task_loss": 0.48891741037368774 }, { "compression_loss": 0.0, "distillation_loss": 0.24595540761947632, "epoch": 11.57, "learning_rate": 4.6601557340630255e-05, "loss": 0.3359, "step": 32020, "task_loss": 0.3322390615940094 }, { "compression_loss": 0.0, "distillation_loss": 0.3430486023426056, "epoch": 11.58, "learning_rate": 4.654477344409271e-05, "loss": 0.376, "step": 32030, "task_loss": 0.38788700103759766 }, { "compression_loss": 0.0, "distillation_loss": 0.35017162561416626, "epoch": 11.58, "learning_rate": 4.648790423713982e-05, "loss": 0.3646, "step": 32040, "task_loss": 0.512260913848877 }, { "compression_loss": 0.0, "distillation_loss": 0.37731289863586426, "epoch": 11.58, "learning_rate": 4.643095001300836e-05, "loss": 0.3785, "step": 32050, "task_loss": 0.5028886198997498 }, { "compression_loss": 0.0, "distillation_loss": 0.3077854514122009, "epoch": 11.59, "learning_rate": 4.637391106537343e-05, "loss": 0.3383, "step": 32060, "task_loss": 0.4493955969810486 }, { "compression_loss": 0.0, "distillation_loss": 0.2899521291255951, "epoch": 11.59, "learning_rate": 4.631678768834706e-05, "loss": 0.339, "step": 32070, "task_loss": 0.5738735198974609 }, { "compression_loss": 0.0, "distillation_loss": 0.42707955837249756, "epoch": 11.59, "learning_rate": 4.625958017647657e-05, "loss": 0.3544, "step": 32080, "task_loss": 0.5231696367263794 }, { "compression_loss": 0.0, "distillation_loss": 0.34892863035202026, "epoch": 11.6, "learning_rate": 4.620228882474312e-05, "loss": 0.3458, "step": 32090, "task_loss": 0.6890476942062378 }, { "compression_loss": 0.0, "distillation_loss": 0.29222404956817627, "epoch": 11.6, "learning_rate": 4.61449139285602e-05, "loss": 0.3261, "step": 32100, "task_loss": 0.31315627694129944 }, { "compression_loss": 0.0, "distillation_loss": 0.23572243750095367, "epoch": 11.6, "learning_rate": 4.608745578377208e-05, "loss": 0.346, "step": 32110, "task_loss": 0.3648180663585663 }, { "compression_loss": 0.0, "distillation_loss": 0.29230839014053345, "epoch": 11.61, "learning_rate": 4.602991468665224e-05, "loss": 0.3495, "step": 32120, "task_loss": 0.45920950174331665 }, { "compression_loss": 0.0, "distillation_loss": 0.32993003726005554, "epoch": 11.61, "learning_rate": 4.597229093390195e-05, "loss": 0.3658, "step": 32130, "task_loss": 0.6793662309646606 }, { "compression_loss": 0.0, "distillation_loss": 0.45977842807769775, "epoch": 11.62, "learning_rate": 4.591458482264866e-05, "loss": 0.3503, "step": 32140, "task_loss": 0.6161090731620789 }, { "compression_loss": 0.0, "distillation_loss": 0.2830163836479187, "epoch": 11.62, "learning_rate": 4.585679665044447e-05, "loss": 0.3435, "step": 32150, "task_loss": 0.5471289157867432 }, { "compression_loss": 0.0, "distillation_loss": 0.32702142000198364, "epoch": 11.62, "learning_rate": 4.579892671526463e-05, "loss": 0.3666, "step": 32160, "task_loss": 0.3628914952278137 }, { "compression_loss": 0.0, "distillation_loss": 0.38225293159484863, "epoch": 11.63, "learning_rate": 4.5740975315505996e-05, "loss": 0.3323, "step": 32170, "task_loss": 0.8414508104324341 }, { "compression_loss": 0.0, "distillation_loss": 0.5750049352645874, "epoch": 11.63, "learning_rate": 4.5682942749985466e-05, "loss": 0.3754, "step": 32180, "task_loss": 0.532424807548523 }, { "compression_loss": 0.0, "distillation_loss": 0.3244228959083557, "epoch": 11.63, "learning_rate": 4.562482931793846e-05, "loss": 0.3462, "step": 32190, "task_loss": 0.7852087616920471 }, { "compression_loss": 0.0, "distillation_loss": 0.39638885855674744, "epoch": 11.64, "learning_rate": 4.5566635319017376e-05, "loss": 0.3636, "step": 32200, "task_loss": 0.9977461099624634 }, { "compression_loss": 0.0, "distillation_loss": 0.25993040204048157, "epoch": 11.64, "learning_rate": 4.550836105329005e-05, "loss": 0.3724, "step": 32210, "task_loss": 0.39915382862091064 }, { "compression_loss": 0.0, "distillation_loss": 0.27179035544395447, "epoch": 11.64, "learning_rate": 4.545000682123818e-05, "loss": 0.3409, "step": 32220, "task_loss": 0.43176907300949097 }, { "compression_loss": 0.0, "distillation_loss": 0.29763340950012207, "epoch": 11.65, "learning_rate": 4.539157292375581e-05, "loss": 0.3388, "step": 32230, "task_loss": 0.5615702867507935 }, { "compression_loss": 0.0, "distillation_loss": 0.3821195662021637, "epoch": 11.65, "learning_rate": 4.5333059662147775e-05, "loss": 0.3492, "step": 32240, "task_loss": 0.5893045663833618 }, { "compression_loss": 0.0, "distillation_loss": 0.3731650710105896, "epoch": 11.66, "learning_rate": 4.527446733812812e-05, "loss": 0.3697, "step": 32250, "task_loss": 0.7143180966377258 }, { "epoch": 11.66, "eval_exact_match": 82.80037842951751, "eval_f1": 89.72915392440778, "step": 32250 }, { "compression_loss": 0.0, "distillation_loss": 0.3984615206718445, "epoch": 11.66, "learning_rate": 4.521579625381856e-05, "loss": 0.3311, "step": 32260, "task_loss": 0.6453840732574463 }, { "compression_loss": 0.0, "distillation_loss": 0.3678897023200989, "epoch": 11.66, "learning_rate": 4.515704671174696e-05, "loss": 0.3751, "step": 32270, "task_loss": 0.7683279514312744 }, { "compression_loss": 0.0, "distillation_loss": 0.3974093198776245, "epoch": 11.67, "learning_rate": 4.509821901484569e-05, "loss": 0.3603, "step": 32280, "task_loss": 0.5658100247383118 }, { "compression_loss": 0.0, "distillation_loss": 0.2496042549610138, "epoch": 11.67, "learning_rate": 4.503931346645016e-05, "loss": 0.3419, "step": 32290, "task_loss": 0.3385689854621887 }, { "compression_loss": 0.0, "distillation_loss": 0.27575236558914185, "epoch": 11.67, "learning_rate": 4.498033037029717e-05, "loss": 0.3434, "step": 32300, "task_loss": 0.25666338205337524 }, { "compression_loss": 0.0, "distillation_loss": 0.3864726424217224, "epoch": 11.68, "learning_rate": 4.49212700305234e-05, "loss": 0.3361, "step": 32310, "task_loss": 0.4089565873146057 }, { "compression_loss": 0.0, "distillation_loss": 0.3019556999206543, "epoch": 11.68, "learning_rate": 4.486213275166382e-05, "loss": 0.38, "step": 32320, "task_loss": 0.3994622230529785 }, { "compression_loss": 0.0, "distillation_loss": 0.22430866956710815, "epoch": 11.68, "learning_rate": 4.480291883865013e-05, "loss": 0.3506, "step": 32330, "task_loss": 0.40575936436653137 }, { "compression_loss": 0.0, "distillation_loss": 0.31596285104751587, "epoch": 11.69, "learning_rate": 4.474362859680918e-05, "loss": 0.3414, "step": 32340, "task_loss": 0.5828789472579956 }, { "compression_loss": 0.0, "distillation_loss": 0.35511207580566406, "epoch": 11.69, "learning_rate": 4.468426233186139e-05, "loss": 0.3421, "step": 32350, "task_loss": 0.6438747644424438 }, { "compression_loss": 0.0, "distillation_loss": 0.3512818217277527, "epoch": 11.69, "learning_rate": 4.4624820349919185e-05, "loss": 0.3627, "step": 32360, "task_loss": 0.7819494009017944 }, { "compression_loss": 0.0, "distillation_loss": 0.5698807239532471, "epoch": 11.7, "learning_rate": 4.456530295748541e-05, "loss": 0.375, "step": 32370, "task_loss": 1.0448747873306274 }, { "compression_loss": 0.0, "distillation_loss": 0.36504194140434265, "epoch": 11.7, "learning_rate": 4.450571046145175e-05, "loss": 0.3415, "step": 32380, "task_loss": 0.3004506826400757 }, { "compression_loss": 0.0, "distillation_loss": 0.3651716411113739, "epoch": 11.71, "learning_rate": 4.444604316909717e-05, "loss": 0.366, "step": 32390, "task_loss": 0.4216272234916687 }, { "compression_loss": 0.0, "distillation_loss": 0.3165162205696106, "epoch": 11.71, "learning_rate": 4.438630138808628e-05, "loss": 0.3705, "step": 32400, "task_loss": 0.45583444833755493 }, { "compression_loss": 0.0, "distillation_loss": 0.24389581382274628, "epoch": 11.71, "learning_rate": 4.4326485426467796e-05, "loss": 0.3477, "step": 32410, "task_loss": 0.5839788913726807 }, { "compression_loss": 0.0, "distillation_loss": 0.34841057658195496, "epoch": 11.72, "learning_rate": 4.426659559267294e-05, "loss": 0.3751, "step": 32420, "task_loss": 0.4604908227920532 }, { "compression_loss": 0.0, "distillation_loss": 0.2817525565624237, "epoch": 11.72, "learning_rate": 4.420663219551381e-05, "loss": 0.3287, "step": 32430, "task_loss": 0.4700966477394104 }, { "compression_loss": 0.0, "distillation_loss": 0.4372870624065399, "epoch": 11.72, "learning_rate": 4.414659554418186e-05, "loss": 0.3625, "step": 32440, "task_loss": 1.404410481452942 }, { "compression_loss": 0.0, "distillation_loss": 0.29228758811950684, "epoch": 11.73, "learning_rate": 4.408648594824626e-05, "loss": 0.3602, "step": 32450, "task_loss": 0.46710091829299927 }, { "compression_loss": 0.0, "distillation_loss": 0.3614046573638916, "epoch": 11.73, "learning_rate": 4.402630371765228e-05, "loss": 0.3403, "step": 32460, "task_loss": 0.9066839814186096 }, { "compression_loss": 0.0, "distillation_loss": 0.4615110158920288, "epoch": 11.73, "learning_rate": 4.396604916271977e-05, "loss": 0.4101, "step": 32470, "task_loss": 0.623814582824707 }, { "compression_loss": 0.0, "distillation_loss": 0.3592243194580078, "epoch": 11.74, "learning_rate": 4.3905722594141444e-05, "loss": 0.3632, "step": 32480, "task_loss": 0.442016065120697 }, { "compression_loss": 0.0, "distillation_loss": 0.2413320541381836, "epoch": 11.74, "learning_rate": 4.384532432298139e-05, "loss": 0.3191, "step": 32490, "task_loss": 0.45909905433654785 }, { "compression_loss": 0.0, "distillation_loss": 0.30714505910873413, "epoch": 11.75, "learning_rate": 4.37848546606734e-05, "loss": 0.3199, "step": 32500, "task_loss": 0.4241373538970947 }, { "epoch": 11.75, "eval_exact_match": 82.95175023651845, "eval_f1": 89.72995053074105, "step": 32500 }, { "compression_loss": 0.0, "distillation_loss": 0.34651437401771545, "epoch": 11.75, "learning_rate": 4.372431391901939e-05, "loss": 0.324, "step": 32510, "task_loss": 0.47919905185699463 }, { "compression_loss": 0.0, "distillation_loss": 0.3726678788661957, "epoch": 11.75, "learning_rate": 4.3663702410187786e-05, "loss": 0.3411, "step": 32520, "task_loss": 0.3535993993282318 }, { "compression_loss": 0.0, "distillation_loss": 0.25239893794059753, "epoch": 11.76, "learning_rate": 4.36030204467119e-05, "loss": 0.3611, "step": 32530, "task_loss": 0.40208229422569275 }, { "compression_loss": 0.0, "distillation_loss": 0.3526584506034851, "epoch": 11.76, "learning_rate": 4.354226834148834e-05, "loss": 0.3648, "step": 32540, "task_loss": 0.5418181419372559 }, { "compression_loss": 0.0, "distillation_loss": 0.442671537399292, "epoch": 11.76, "learning_rate": 4.34814464077754e-05, "loss": 0.3529, "step": 32550, "task_loss": 0.5203021764755249 }, { "compression_loss": 0.0, "distillation_loss": 0.3775908350944519, "epoch": 11.77, "learning_rate": 4.3420554959191415e-05, "loss": 0.3386, "step": 32560, "task_loss": 0.4405955672264099 }, { "compression_loss": 0.0, "distillation_loss": 0.39824825525283813, "epoch": 11.77, "learning_rate": 4.335959430971318e-05, "loss": 0.3693, "step": 32570, "task_loss": 0.5429366230964661 }, { "compression_loss": 0.0, "distillation_loss": 0.3338078260421753, "epoch": 11.77, "learning_rate": 4.3298564773674284e-05, "loss": 0.3439, "step": 32580, "task_loss": 0.3427931070327759 }, { "compression_loss": 0.0, "distillation_loss": 0.27158862352371216, "epoch": 11.78, "learning_rate": 4.3237466665763554e-05, "loss": 0.3539, "step": 32590, "task_loss": 0.9125320911407471 }, { "compression_loss": 0.0, "distillation_loss": 0.28898924589157104, "epoch": 11.78, "learning_rate": 4.317630030102336e-05, "loss": 0.3263, "step": 32600, "task_loss": 0.6775388717651367 }, { "compression_loss": 0.0, "distillation_loss": 0.4036529064178467, "epoch": 11.79, "learning_rate": 4.311506599484807e-05, "loss": 0.3472, "step": 32610, "task_loss": 0.6710329055786133 }, { "compression_loss": 0.0, "distillation_loss": 0.26878538727760315, "epoch": 11.79, "learning_rate": 4.3053764062982354e-05, "loss": 0.3293, "step": 32620, "task_loss": 0.46130526065826416 }, { "compression_loss": 0.0, "distillation_loss": 0.2734406590461731, "epoch": 11.79, "learning_rate": 4.299239482151957e-05, "loss": 0.3186, "step": 32630, "task_loss": 0.4390842318534851 }, { "compression_loss": 0.0, "distillation_loss": 0.30930137634277344, "epoch": 11.8, "learning_rate": 4.293095858690018e-05, "loss": 0.3809, "step": 32640, "task_loss": 0.4176652431488037 }, { "compression_loss": 0.0, "distillation_loss": 0.24716287851333618, "epoch": 11.8, "learning_rate": 4.286945567591006e-05, "loss": 0.3327, "step": 32650, "task_loss": 0.5557129979133606 }, { "compression_loss": 0.0, "distillation_loss": 0.38499587774276733, "epoch": 11.8, "learning_rate": 4.2807886405678904e-05, "loss": 0.3916, "step": 32660, "task_loss": 0.5830663442611694 }, { "compression_loss": 0.0, "distillation_loss": 0.36196571588516235, "epoch": 11.81, "learning_rate": 4.274625109367857e-05, "loss": 0.35, "step": 32670, "task_loss": 0.6983354687690735 }, { "compression_loss": 0.0, "distillation_loss": 0.330838143825531, "epoch": 11.81, "learning_rate": 4.268455005772145e-05, "loss": 0.3793, "step": 32680, "task_loss": 0.48391640186309814 }, { "compression_loss": 0.0, "distillation_loss": 0.4563385844230652, "epoch": 11.81, "learning_rate": 4.262278361595885e-05, "loss": 0.3782, "step": 32690, "task_loss": 0.7140378952026367 }, { "compression_loss": 0.0, "distillation_loss": 0.3044314682483673, "epoch": 11.82, "learning_rate": 4.25609520868793e-05, "loss": 0.3713, "step": 32700, "task_loss": 0.5128190517425537 }, { "compression_loss": 0.0, "distillation_loss": 0.30874133110046387, "epoch": 11.82, "learning_rate": 4.249905578930695e-05, "loss": 0.3483, "step": 32710, "task_loss": 0.3443719148635864 }, { "compression_loss": 0.0, "distillation_loss": 0.3269858658313751, "epoch": 11.83, "learning_rate": 4.2437095042399935e-05, "loss": 0.3142, "step": 32720, "task_loss": 0.5143279433250427 }, { "compression_loss": 0.0, "distillation_loss": 0.3339177370071411, "epoch": 11.83, "learning_rate": 4.2375070165648695e-05, "loss": 0.3226, "step": 32730, "task_loss": 0.38503554463386536 }, { "compression_loss": 0.0, "distillation_loss": 0.35822218656539917, "epoch": 11.83, "learning_rate": 4.231298147887436e-05, "loss": 0.3998, "step": 32740, "task_loss": 0.5768170356750488 }, { "compression_loss": 0.0, "distillation_loss": 0.32746079564094543, "epoch": 11.84, "learning_rate": 4.225082930222706e-05, "loss": 0.342, "step": 32750, "task_loss": 0.8048028945922852 }, { "epoch": 11.84, "eval_exact_match": 82.92336802270577, "eval_f1": 89.59989360234766, "step": 32750 }, { "compression_loss": 0.0, "distillation_loss": 0.4053548276424408, "epoch": 11.84, "learning_rate": 4.218861395618434e-05, "loss": 0.322, "step": 32760, "task_loss": 0.5083299875259399 }, { "compression_loss": 0.0, "distillation_loss": 0.34768015146255493, "epoch": 11.84, "learning_rate": 4.212633576154943e-05, "loss": 0.3578, "step": 32770, "task_loss": 0.8128582239151001 }, { "compression_loss": 0.0, "distillation_loss": 0.33670443296432495, "epoch": 11.85, "learning_rate": 4.206399503944965e-05, "loss": 0.3744, "step": 32780, "task_loss": 0.5336966514587402 }, { "compression_loss": 0.0, "distillation_loss": 0.37081536650657654, "epoch": 11.85, "learning_rate": 4.200159211133473e-05, "loss": 0.3778, "step": 32790, "task_loss": 0.44150346517562866 }, { "compression_loss": 0.0, "distillation_loss": 0.38825803995132446, "epoch": 11.85, "learning_rate": 4.193912729897513e-05, "loss": 0.3604, "step": 32800, "task_loss": 0.6093878149986267 }, { "compression_loss": 0.0, "distillation_loss": 0.30060669779777527, "epoch": 11.86, "learning_rate": 4.187660092446045e-05, "loss": 0.3095, "step": 32810, "task_loss": 0.5632023811340332 }, { "compression_loss": 0.0, "distillation_loss": 0.27266132831573486, "epoch": 11.86, "learning_rate": 4.18140133101977e-05, "loss": 0.3467, "step": 32820, "task_loss": 0.47467291355133057 }, { "compression_loss": 0.0, "distillation_loss": 0.3054879307746887, "epoch": 11.86, "learning_rate": 4.1751364778909655e-05, "loss": 0.3512, "step": 32830, "task_loss": 0.48678484559059143 }, { "compression_loss": 0.0, "distillation_loss": 0.32673510909080505, "epoch": 11.87, "learning_rate": 4.1688655653633224e-05, "loss": 0.3892, "step": 32840, "task_loss": 0.4992232024669647 }, { "compression_loss": 0.0, "distillation_loss": 0.3742809295654297, "epoch": 11.87, "learning_rate": 4.162588625771773e-05, "loss": 0.3305, "step": 32850, "task_loss": 0.8625702857971191 }, { "compression_loss": 0.0, "distillation_loss": 0.423870325088501, "epoch": 11.88, "learning_rate": 4.156305691482328e-05, "loss": 0.3461, "step": 32860, "task_loss": 0.5573567748069763 }, { "compression_loss": 0.0, "distillation_loss": 0.45885270833969116, "epoch": 11.88, "learning_rate": 4.150016794891912e-05, "loss": 0.3838, "step": 32870, "task_loss": 0.5527869462966919 }, { "compression_loss": 0.0, "distillation_loss": 0.23280248045921326, "epoch": 11.88, "learning_rate": 4.143721968428188e-05, "loss": 0.3861, "step": 32880, "task_loss": 0.41633132100105286 }, { "compression_loss": 0.0, "distillation_loss": 0.3545595705509186, "epoch": 11.89, "learning_rate": 4.1374212445494e-05, "loss": 0.336, "step": 32890, "task_loss": 0.5708621740341187 }, { "compression_loss": 0.0, "distillation_loss": 0.3292989730834961, "epoch": 11.89, "learning_rate": 4.131114655744196e-05, "loss": 0.3401, "step": 32900, "task_loss": 0.6946372985839844 }, { "compression_loss": 0.0, "distillation_loss": 0.43926069140434265, "epoch": 11.89, "learning_rate": 4.124802234531472e-05, "loss": 0.3562, "step": 32910, "task_loss": 0.786125123500824 }, { "compression_loss": 0.0, "distillation_loss": 0.3756178021430969, "epoch": 11.9, "learning_rate": 4.118484013460192e-05, "loss": 0.3703, "step": 32920, "task_loss": 0.4792003035545349 }, { "compression_loss": 0.0, "distillation_loss": 0.5044987797737122, "epoch": 11.9, "learning_rate": 4.112160025109231e-05, "loss": 0.3538, "step": 32930, "task_loss": 0.7191258072853088 }, { "compression_loss": 0.0, "distillation_loss": 0.2991417944431305, "epoch": 11.9, "learning_rate": 4.1058303020871964e-05, "loss": 0.3681, "step": 32940, "task_loss": 0.38584643602371216 }, { "compression_loss": 0.0, "distillation_loss": 0.3430123031139374, "epoch": 11.91, "learning_rate": 4.099494877032271e-05, "loss": 0.3623, "step": 32950, "task_loss": 0.599341630935669 }, { "compression_loss": 0.0, "distillation_loss": 0.3016449809074402, "epoch": 11.91, "learning_rate": 4.093153782612035e-05, "loss": 0.3564, "step": 32960, "task_loss": 0.49827057123184204 }, { "compression_loss": 0.0, "distillation_loss": 0.26733893156051636, "epoch": 11.92, "learning_rate": 4.086807051523305e-05, "loss": 0.3167, "step": 32970, "task_loss": 0.39013808965682983 }, { "compression_loss": 0.0, "distillation_loss": 0.30923688411712646, "epoch": 11.92, "learning_rate": 4.08045471649196e-05, "loss": 0.3393, "step": 32980, "task_loss": 0.455595463514328 }, { "compression_loss": 0.0, "distillation_loss": 0.31686368584632874, "epoch": 11.92, "learning_rate": 4.074096810272776e-05, "loss": 0.3616, "step": 32990, "task_loss": 0.6364704966545105 }, { "compression_loss": 0.0, "distillation_loss": 0.36765047907829285, "epoch": 11.93, "learning_rate": 4.067733365649255e-05, "loss": 0.3513, "step": 33000, "task_loss": 0.4931289255619049 }, { "epoch": 11.93, "eval_exact_match": 82.8476821192053, "eval_f1": 89.65882395133605, "step": 33000 }, { "compression_loss": 0.0, "distillation_loss": 0.422896146774292, "epoch": 11.93, "learning_rate": 4.061364415433457e-05, "loss": 0.3568, "step": 33010, "task_loss": 0.4513043165206909 }, { "compression_loss": 0.0, "distillation_loss": 0.3466213643550873, "epoch": 11.93, "learning_rate": 4.0549899924658306e-05, "loss": 0.3668, "step": 33020, "task_loss": 0.47706955671310425 }, { "compression_loss": 0.0, "distillation_loss": 0.35656729340553284, "epoch": 11.94, "learning_rate": 4.0486101296150455e-05, "loss": 0.3532, "step": 33030, "task_loss": 0.7009148597717285 }, { "compression_loss": 0.0, "distillation_loss": 0.40172943472862244, "epoch": 11.94, "learning_rate": 4.042224859777819e-05, "loss": 0.3756, "step": 33040, "task_loss": 0.6676845550537109 }, { "compression_loss": 0.0, "distillation_loss": 0.28709399700164795, "epoch": 11.94, "learning_rate": 4.03583421587875e-05, "loss": 0.3515, "step": 33050, "task_loss": 0.5011777877807617 }, { "compression_loss": 0.0, "distillation_loss": 0.26920461654663086, "epoch": 11.95, "learning_rate": 4.029438230870147e-05, "loss": 0.343, "step": 33060, "task_loss": 0.8642024993896484 }, { "compression_loss": 0.0, "distillation_loss": 0.330618292093277, "epoch": 11.95, "learning_rate": 4.02303693773186e-05, "loss": 0.3859, "step": 33070, "task_loss": 0.4772784411907196 }, { "compression_loss": 0.0, "distillation_loss": 0.3623301386833191, "epoch": 11.96, "learning_rate": 4.016630369471107e-05, "loss": 0.3878, "step": 33080, "task_loss": 0.6183096766471863 }, { "compression_loss": 0.0, "distillation_loss": 0.4232385456562042, "epoch": 11.96, "learning_rate": 4.010218559122312e-05, "loss": 0.3692, "step": 33090, "task_loss": 0.5985369682312012 }, { "compression_loss": 0.0, "distillation_loss": 0.4274103343486786, "epoch": 11.96, "learning_rate": 4.003801539746922e-05, "loss": 0.36, "step": 33100, "task_loss": 0.637836217880249 }, { "compression_loss": 0.0, "distillation_loss": 0.31120359897613525, "epoch": 11.97, "learning_rate": 3.997379344433248e-05, "loss": 0.3336, "step": 33110, "task_loss": 0.4128878116607666 }, { "compression_loss": 0.0, "distillation_loss": 0.35576552152633667, "epoch": 11.97, "learning_rate": 3.990952006296288e-05, "loss": 0.3998, "step": 33120, "task_loss": 1.0541032552719116 }, { "compression_loss": 0.0, "distillation_loss": 0.399566113948822, "epoch": 11.97, "learning_rate": 3.98451955847756e-05, "loss": 0.3525, "step": 33130, "task_loss": 0.8153650760650635 }, { "compression_loss": 0.0, "distillation_loss": 0.2788093388080597, "epoch": 11.98, "learning_rate": 3.978082034144926e-05, "loss": 0.3263, "step": 33140, "task_loss": 0.2653265595436096 }, { "compression_loss": 0.0, "distillation_loss": 0.3298949599266052, "epoch": 11.98, "learning_rate": 3.971639466492429e-05, "loss": 0.3387, "step": 33150, "task_loss": 0.5124013423919678 }, { "compression_loss": 0.0, "distillation_loss": 0.2869107127189636, "epoch": 11.98, "learning_rate": 3.9651918887401116e-05, "loss": 0.3311, "step": 33160, "task_loss": 0.3431087136268616 }, { "compression_loss": 0.0, "distillation_loss": 0.45431074500083923, "epoch": 11.99, "learning_rate": 3.958739334133853e-05, "loss": 0.3839, "step": 33170, "task_loss": 1.1194489002227783 }, { "compression_loss": 0.0, "distillation_loss": 0.34249794483184814, "epoch": 11.99, "learning_rate": 3.952281835945196e-05, "loss": 0.3492, "step": 33180, "task_loss": 0.924396812915802 }, { "compression_loss": 0.0, "distillation_loss": 0.4952884912490845, "epoch": 11.99, "learning_rate": 3.9458194274711716e-05, "loss": 0.3321, "step": 33190, "task_loss": 0.9026622772216797 }, { "compression_loss": 0.0, "distillation_loss": 0.34919559955596924, "epoch": 12.0, "learning_rate": 3.939352142034133e-05, "loss": 0.3531, "step": 33200, "task_loss": 0.6708678007125854 }, { "compression_loss": 0.0, "distillation_loss": 0.30084478855133057, "epoch": 12.0, "learning_rate": 3.932880012981575e-05, "loss": 0.3387, "step": 33210, "task_loss": 0.6755636930465698 }, { "compression_loss": 0.0, "distillation_loss": 0.2988947331905365, "epoch": 12.01, "learning_rate": 3.926403073685974e-05, "loss": 0.289, "step": 33220, "task_loss": 0.42057114839553833 }, { "compression_loss": 0.0, "distillation_loss": 0.33406442403793335, "epoch": 12.01, "learning_rate": 3.919921357544606e-05, "loss": 0.3132, "step": 33230, "task_loss": 0.5716870427131653 }, { "compression_loss": 0.0, "distillation_loss": 0.4645366072654724, "epoch": 12.01, "learning_rate": 3.91343489797938e-05, "loss": 0.3505, "step": 33240, "task_loss": 0.8373175263404846 }, { "compression_loss": 0.0, "distillation_loss": 0.23620998859405518, "epoch": 12.02, "learning_rate": 3.906943728436662e-05, "loss": 0.3047, "step": 33250, "task_loss": 0.5662074089050293 }, { "epoch": 12.02, "eval_exact_match": 82.88552507095554, "eval_f1": 89.60898535282445, "step": 33250 }, { "compression_loss": 0.0, "distillation_loss": 0.35468730330467224, "epoch": 12.02, "learning_rate": 3.900447882387106e-05, "loss": 0.3293, "step": 33260, "task_loss": 0.5851520895957947 }, { "compression_loss": 0.0, "distillation_loss": 0.3405912518501282, "epoch": 12.02, "learning_rate": 3.8939473933254764e-05, "loss": 0.316, "step": 33270, "task_loss": 0.5842773914337158 }, { "compression_loss": 0.0, "distillation_loss": 0.29812487959861755, "epoch": 12.03, "learning_rate": 3.8874422947704814e-05, "loss": 0.3099, "step": 33280, "task_loss": 0.353479266166687 }, { "compression_loss": 0.0, "distillation_loss": 0.28859126567840576, "epoch": 12.03, "learning_rate": 3.8809326202645984e-05, "loss": 0.3409, "step": 33290, "task_loss": 0.20747502148151398 }, { "compression_loss": 0.0, "distillation_loss": 0.28653669357299805, "epoch": 12.03, "learning_rate": 3.874418403373896e-05, "loss": 0.3129, "step": 33300, "task_loss": 0.6832212209701538 }, { "compression_loss": 0.0, "distillation_loss": 0.33109143376350403, "epoch": 12.04, "learning_rate": 3.867899677687868e-05, "loss": 0.3081, "step": 33310, "task_loss": 0.6277453899383545 }, { "compression_loss": 0.0, "distillation_loss": 0.3134967088699341, "epoch": 12.04, "learning_rate": 3.8613764768192565e-05, "loss": 0.2934, "step": 33320, "task_loss": 0.6228721737861633 }, { "compression_loss": 0.0, "distillation_loss": 0.24643732607364655, "epoch": 12.05, "learning_rate": 3.8548488344038775e-05, "loss": 0.294, "step": 33330, "task_loss": 0.4370988607406616 }, { "compression_loss": 0.0, "distillation_loss": 0.27333444356918335, "epoch": 12.05, "learning_rate": 3.8483167841004506e-05, "loss": 0.2789, "step": 33340, "task_loss": 0.3062555193901062 }, { "compression_loss": 0.0, "distillation_loss": 0.2717025876045227, "epoch": 12.05, "learning_rate": 3.8417803595904245e-05, "loss": 0.3134, "step": 33350, "task_loss": 0.8133729100227356 }, { "compression_loss": 0.0, "distillation_loss": 0.35719507932662964, "epoch": 12.06, "learning_rate": 3.835239594577801e-05, "loss": 0.3114, "step": 33360, "task_loss": 0.638519287109375 }, { "compression_loss": 0.0, "distillation_loss": 0.2419562190771103, "epoch": 12.06, "learning_rate": 3.8286945227889645e-05, "loss": 0.3095, "step": 33370, "task_loss": 0.40532201528549194 }, { "compression_loss": 0.0, "distillation_loss": 0.21433952450752258, "epoch": 12.06, "learning_rate": 3.8221451779725066e-05, "loss": 0.3122, "step": 33380, "task_loss": 0.5453925728797913 }, { "compression_loss": 0.0, "distillation_loss": 0.5214677453041077, "epoch": 12.07, "learning_rate": 3.815591593899051e-05, "loss": 0.3375, "step": 33390, "task_loss": 0.7860684394836426 }, { "compression_loss": 0.0, "distillation_loss": 0.25638237595558167, "epoch": 12.07, "learning_rate": 3.80903380436108e-05, "loss": 0.2805, "step": 33400, "task_loss": 0.2972143292427063 }, { "compression_loss": 0.0, "distillation_loss": 0.2858333885669708, "epoch": 12.07, "learning_rate": 3.802471843172762e-05, "loss": 0.3611, "step": 33410, "task_loss": 0.7213091850280762 }, { "compression_loss": 0.0, "distillation_loss": 0.24611973762512207, "epoch": 12.08, "learning_rate": 3.795905744169777e-05, "loss": 0.3028, "step": 33420, "task_loss": 0.353307843208313 }, { "compression_loss": 0.0, "distillation_loss": 0.39217904210090637, "epoch": 12.08, "learning_rate": 3.789335541209138e-05, "loss": 0.3213, "step": 33430, "task_loss": 0.5517551898956299 }, { "compression_loss": 0.0, "distillation_loss": 0.2604614496231079, "epoch": 12.09, "learning_rate": 3.782761268169021e-05, "loss": 0.3053, "step": 33440, "task_loss": 0.36389464139938354 }, { "compression_loss": 0.0, "distillation_loss": 0.24966314435005188, "epoch": 12.09, "learning_rate": 3.7761829589485886e-05, "loss": 0.3254, "step": 33450, "task_loss": 0.5294799208641052 }, { "compression_loss": 0.0, "distillation_loss": 0.20053541660308838, "epoch": 12.09, "learning_rate": 3.769600647467815e-05, "loss": 0.2726, "step": 33460, "task_loss": 0.25484806299209595 }, { "compression_loss": 0.0, "distillation_loss": 0.22263674437999725, "epoch": 12.1, "learning_rate": 3.7630143676673115e-05, "loss": 0.3163, "step": 33470, "task_loss": 0.4103575050830841 }, { "compression_loss": 0.0, "distillation_loss": 0.2307630330324173, "epoch": 12.1, "learning_rate": 3.756424153508152e-05, "loss": 0.2862, "step": 33480, "task_loss": 0.2939828634262085 }, { "compression_loss": 0.0, "distillation_loss": 0.27595192193984985, "epoch": 12.1, "learning_rate": 3.749830038971697e-05, "loss": 0.3034, "step": 33490, "task_loss": 0.576181173324585 }, { "compression_loss": 0.0, "distillation_loss": 0.26516520977020264, "epoch": 12.11, "learning_rate": 3.743892029168103e-05, "loss": 0.3007, "step": 33500, "task_loss": 0.4581066966056824 }, { "epoch": 12.11, "eval_exact_match": 82.83822138126774, "eval_f1": 89.7388509210768, "step": 33500 }, { "compression_loss": 0.0, "distillation_loss": 0.33838415145874023, "epoch": 12.11, "learning_rate": 3.737290597605322e-05, "loss": 0.3144, "step": 33510, "task_loss": 0.6935557126998901 }, { "compression_loss": 0.0, "distillation_loss": 0.27680984139442444, "epoch": 12.11, "learning_rate": 3.730685364324294e-05, "loss": 0.2987, "step": 33520, "task_loss": 0.5029197931289673 }, { "compression_loss": 0.0, "distillation_loss": 0.3041922450065613, "epoch": 12.12, "learning_rate": 3.724076363383821e-05, "loss": 0.2933, "step": 33530, "task_loss": 0.7183068990707397 }, { "compression_loss": 0.0, "distillation_loss": 0.3190416991710663, "epoch": 12.12, "learning_rate": 3.7174636288621354e-05, "loss": 0.3141, "step": 33540, "task_loss": 0.7233753204345703 }, { "compression_loss": 0.0, "distillation_loss": 0.3400724232196808, "epoch": 12.13, "learning_rate": 3.710847194856721e-05, "loss": 0.3335, "step": 33550, "task_loss": 0.4207264184951782 }, { "compression_loss": 0.0, "distillation_loss": 0.22366857528686523, "epoch": 12.13, "learning_rate": 3.704227095484135e-05, "loss": 0.3038, "step": 33560, "task_loss": 0.3199766278266907 }, { "compression_loss": 0.0, "distillation_loss": 0.313113272190094, "epoch": 12.13, "learning_rate": 3.697603364879837e-05, "loss": 0.2895, "step": 33570, "task_loss": 0.9284161329269409 }, { "compression_loss": 0.0, "distillation_loss": 0.28061339259147644, "epoch": 12.14, "learning_rate": 3.6909760371980076e-05, "loss": 0.312, "step": 33580, "task_loss": 0.40282338857650757 }, { "compression_loss": 0.0, "distillation_loss": 0.304638534784317, "epoch": 12.14, "learning_rate": 3.684345146611379e-05, "loss": 0.3121, "step": 33590, "task_loss": 0.33339840173721313 }, { "compression_loss": 0.0, "distillation_loss": 0.2917397916316986, "epoch": 12.14, "learning_rate": 3.677710727311052e-05, "loss": 0.3095, "step": 33600, "task_loss": 0.5817675590515137 }, { "compression_loss": 0.0, "distillation_loss": 0.25793755054473877, "epoch": 12.15, "learning_rate": 3.6710728135063216e-05, "loss": 0.3059, "step": 33610, "task_loss": 0.40517011284828186 }, { "compression_loss": 0.0, "distillation_loss": 0.2668699622154236, "epoch": 12.15, "learning_rate": 3.6644314394245064e-05, "loss": 0.3173, "step": 33620, "task_loss": 0.5295995473861694 }, { "compression_loss": 0.0, "distillation_loss": 0.2723209261894226, "epoch": 12.15, "learning_rate": 3.657786639310763e-05, "loss": 0.3043, "step": 33630, "task_loss": 0.49763721227645874 }, { "compression_loss": 0.0, "distillation_loss": 0.2693908214569092, "epoch": 12.16, "learning_rate": 3.651138447427916e-05, "loss": 0.2886, "step": 33640, "task_loss": 0.4400332570075989 }, { "compression_loss": 0.0, "distillation_loss": 0.5167297124862671, "epoch": 12.16, "learning_rate": 3.644486898056278e-05, "loss": 0.3416, "step": 33650, "task_loss": 0.9263020753860474 }, { "compression_loss": 0.0, "distillation_loss": 0.2826722264289856, "epoch": 12.16, "learning_rate": 3.637832025493472e-05, "loss": 0.3065, "step": 33660, "task_loss": 0.5701964497566223 }, { "compression_loss": 0.0, "distillation_loss": 0.29284870624542236, "epoch": 12.17, "learning_rate": 3.631173864054263e-05, "loss": 0.3013, "step": 33670, "task_loss": 0.5212148427963257 }, { "compression_loss": 0.0, "distillation_loss": 0.2942213714122772, "epoch": 12.17, "learning_rate": 3.624512448070367e-05, "loss": 0.3022, "step": 33680, "task_loss": 0.4938852787017822 }, { "compression_loss": 0.0, "distillation_loss": 0.2890348434448242, "epoch": 12.18, "learning_rate": 3.617847811890286e-05, "loss": 0.3044, "step": 33690, "task_loss": 0.3784128427505493 }, { "compression_loss": 0.0, "distillation_loss": 0.2744063138961792, "epoch": 12.18, "learning_rate": 3.6111799898791254e-05, "loss": 0.3688, "step": 33700, "task_loss": 0.38440898060798645 }, { "compression_loss": 0.0, "distillation_loss": 0.32158753275871277, "epoch": 12.18, "learning_rate": 3.604509016418417e-05, "loss": 0.3187, "step": 33710, "task_loss": 0.6017839908599854 }, { "compression_loss": 0.0, "distillation_loss": 0.2738969624042511, "epoch": 12.19, "learning_rate": 3.597834925905944e-05, "loss": 0.3011, "step": 33720, "task_loss": 0.653806746006012 }, { "compression_loss": 0.0, "distillation_loss": 0.31608352065086365, "epoch": 12.19, "learning_rate": 3.59115775275556e-05, "loss": 0.3181, "step": 33730, "task_loss": 0.9595538377761841 }, { "compression_loss": 0.0, "distillation_loss": 0.3204280138015747, "epoch": 12.19, "learning_rate": 3.584477531397016e-05, "loss": 0.3341, "step": 33740, "task_loss": 0.6531627774238586 }, { "compression_loss": 0.0, "distillation_loss": 0.2891020178794861, "epoch": 12.2, "learning_rate": 3.577794296275781e-05, "loss": 0.3018, "step": 33750, "task_loss": 0.5011224150657654 }, { "epoch": 12.2, "eval_exact_match": 82.99905392620624, "eval_f1": 89.93215898637722, "step": 33750 }, { "compression_loss": 0.0, "distillation_loss": 0.27659574151039124, "epoch": 12.2, "learning_rate": 3.57110808185286e-05, "loss": 0.3311, "step": 33760, "task_loss": 0.37247174978256226 }, { "compression_loss": 0.0, "distillation_loss": 0.22908209264278412, "epoch": 12.2, "learning_rate": 3.564418922604625e-05, "loss": 0.331, "step": 33770, "task_loss": 0.4124646484851837 }, { "compression_loss": 0.0, "distillation_loss": 0.2758791148662567, "epoch": 12.21, "learning_rate": 3.5577268530226296e-05, "loss": 0.3204, "step": 33780, "task_loss": 0.4327734708786011 }, { "compression_loss": 0.0, "distillation_loss": 0.28634101152420044, "epoch": 12.21, "learning_rate": 3.551031907613435e-05, "loss": 0.3361, "step": 33790, "task_loss": 0.5430434942245483 }, { "compression_loss": 0.0, "distillation_loss": 0.2788851261138916, "epoch": 12.22, "learning_rate": 3.544334120898431e-05, "loss": 0.3317, "step": 33800, "task_loss": 0.3666086792945862 }, { "compression_loss": 0.0, "distillation_loss": 0.2842410206794739, "epoch": 12.22, "learning_rate": 3.5376335274136567e-05, "loss": 0.3154, "step": 33810, "task_loss": 0.3724209666252136 }, { "compression_loss": 0.0, "distillation_loss": 0.27617254853248596, "epoch": 12.22, "learning_rate": 3.530930161709628e-05, "loss": 0.2959, "step": 33820, "task_loss": 0.8962105512619019 }, { "compression_loss": 0.0, "distillation_loss": 0.28928452730178833, "epoch": 12.23, "learning_rate": 3.5242240583511506e-05, "loss": 0.319, "step": 33830, "task_loss": 0.5773432850837708 }, { "compression_loss": 0.0, "distillation_loss": 0.34535133838653564, "epoch": 12.23, "learning_rate": 3.5175152519171495e-05, "loss": 0.3082, "step": 33840, "task_loss": 0.5388073921203613 }, { "compression_loss": 0.0, "distillation_loss": 0.30456995964050293, "epoch": 12.23, "learning_rate": 3.5108037770004867e-05, "loss": 0.2933, "step": 33850, "task_loss": 0.4720555543899536 }, { "compression_loss": 0.0, "distillation_loss": 0.2527344524860382, "epoch": 12.24, "learning_rate": 3.5040896682077835e-05, "loss": 0.3107, "step": 33860, "task_loss": 0.48990774154663086 }, { "compression_loss": 0.0, "distillation_loss": 0.2734881639480591, "epoch": 12.24, "learning_rate": 3.497372960159241e-05, "loss": 0.3044, "step": 33870, "task_loss": 0.3934549391269684 }, { "compression_loss": 0.0, "distillation_loss": 0.2221376895904541, "epoch": 12.24, "learning_rate": 3.490653687488467e-05, "loss": 0.3214, "step": 33880, "task_loss": 0.2999269366264343 }, { "compression_loss": 0.0, "distillation_loss": 0.3240625262260437, "epoch": 12.25, "learning_rate": 3.4839318848422894e-05, "loss": 0.2987, "step": 33890, "task_loss": 0.44523006677627563 }, { "compression_loss": 0.0, "distillation_loss": 0.37814152240753174, "epoch": 12.25, "learning_rate": 3.477207586880583e-05, "loss": 0.3273, "step": 33900, "task_loss": 0.7390934228897095 }, { "compression_loss": 0.0, "distillation_loss": 0.28805142641067505, "epoch": 12.26, "learning_rate": 3.47048082827609e-05, "loss": 0.2712, "step": 33910, "task_loss": 0.47077667713165283 }, { "compression_loss": 0.0, "distillation_loss": 0.24550151824951172, "epoch": 12.26, "learning_rate": 3.463751643714238e-05, "loss": 0.2862, "step": 33920, "task_loss": 0.31704485416412354 }, { "compression_loss": 0.0, "distillation_loss": 0.37711456418037415, "epoch": 12.26, "learning_rate": 3.457020067892969e-05, "loss": 0.3118, "step": 33930, "task_loss": 0.8705230951309204 }, { "compression_loss": 0.0, "distillation_loss": 0.2998274862766266, "epoch": 12.27, "learning_rate": 3.450286135522547e-05, "loss": 0.3065, "step": 33940, "task_loss": 0.455272376537323 }, { "compression_loss": 0.0, "distillation_loss": 0.2974916100502014, "epoch": 12.27, "learning_rate": 3.443549881325395e-05, "loss": 0.3035, "step": 33950, "task_loss": 0.31812000274658203 }, { "compression_loss": 0.0, "distillation_loss": 0.3739232122898102, "epoch": 12.27, "learning_rate": 3.4368113400359036e-05, "loss": 0.318, "step": 33960, "task_loss": 1.0148547887802124 }, { "compression_loss": 0.0, "distillation_loss": 0.2662932276725769, "epoch": 12.28, "learning_rate": 3.430070546400257e-05, "loss": 0.3136, "step": 33970, "task_loss": 0.4627422094345093 }, { "compression_loss": 0.0, "distillation_loss": 0.32645705342292786, "epoch": 12.28, "learning_rate": 3.423327535176256e-05, "loss": 0.3071, "step": 33980, "task_loss": 0.5508854389190674 }, { "compression_loss": 0.0, "distillation_loss": 0.3119924068450928, "epoch": 12.28, "learning_rate": 3.4165823411331315e-05, "loss": 0.3126, "step": 33990, "task_loss": 0.5757681727409363 }, { "compression_loss": 0.0, "distillation_loss": 0.23790660500526428, "epoch": 12.29, "learning_rate": 3.409834999051374e-05, "loss": 0.2965, "step": 34000, "task_loss": 0.7489010691642761 }, { "epoch": 12.29, "eval_exact_match": 82.72469252601702, "eval_f1": 89.51422301711462, "step": 34000 }, { "compression_loss": 0.0, "distillation_loss": 0.24514292180538177, "epoch": 12.29, "learning_rate": 3.403085543722547e-05, "loss": 0.3226, "step": 34010, "task_loss": 0.48626774549484253 }, { "compression_loss": 0.0, "distillation_loss": 0.38149791955947876, "epoch": 12.29, "learning_rate": 3.396334009949112e-05, "loss": 0.2977, "step": 34020, "task_loss": 0.31508100032806396 }, { "compression_loss": 0.0, "distillation_loss": 0.3336488604545593, "epoch": 12.3, "learning_rate": 3.389580432544248e-05, "loss": 0.3386, "step": 34030, "task_loss": 0.5556856393814087 }, { "compression_loss": 0.0, "distillation_loss": 0.20330330729484558, "epoch": 12.3, "learning_rate": 3.3828248463316716e-05, "loss": 0.3154, "step": 34040, "task_loss": 0.5865062475204468 }, { "compression_loss": 0.0, "distillation_loss": 0.25431859493255615, "epoch": 12.31, "learning_rate": 3.376067286145455e-05, "loss": 0.3229, "step": 34050, "task_loss": 0.32260221242904663 }, { "compression_loss": 0.0, "distillation_loss": 0.47123128175735474, "epoch": 12.31, "learning_rate": 3.3693077868298524e-05, "loss": 0.3253, "step": 34060, "task_loss": 0.4305189251899719 }, { "compression_loss": 0.0, "distillation_loss": 0.27410605549812317, "epoch": 12.31, "learning_rate": 3.362546383239115e-05, "loss": 0.3286, "step": 34070, "task_loss": 0.6105645895004272 }, { "compression_loss": 0.0, "distillation_loss": 0.31597283482551575, "epoch": 12.32, "learning_rate": 3.355783110237313e-05, "loss": 0.2906, "step": 34080, "task_loss": 0.6624294519424438 }, { "compression_loss": 0.0, "distillation_loss": 0.2440500557422638, "epoch": 12.32, "learning_rate": 3.349018002698157e-05, "loss": 0.2856, "step": 34090, "task_loss": 0.4859399199485779 }, { "compression_loss": 0.0, "distillation_loss": 0.2693696618080139, "epoch": 12.32, "learning_rate": 3.342251095504816e-05, "loss": 0.3057, "step": 34100, "task_loss": 0.5549313426017761 }, { "compression_loss": 0.0, "distillation_loss": 0.29348957538604736, "epoch": 12.33, "learning_rate": 3.335482423549738e-05, "loss": 0.3031, "step": 34110, "task_loss": 0.5355805158615112 }, { "compression_loss": 0.0, "distillation_loss": 0.3276856541633606, "epoch": 12.33, "learning_rate": 3.3287120217344734e-05, "loss": 0.3146, "step": 34120, "task_loss": 1.2835772037506104 }, { "compression_loss": 0.0, "distillation_loss": 0.36418938636779785, "epoch": 12.33, "learning_rate": 3.321939924969489e-05, "loss": 0.3095, "step": 34130, "task_loss": 0.4305654466152191 }, { "compression_loss": 0.0, "distillation_loss": 0.48510342836380005, "epoch": 12.34, "learning_rate": 3.315166168173994e-05, "loss": 0.3438, "step": 34140, "task_loss": 0.6050916314125061 }, { "compression_loss": 0.0, "distillation_loss": 0.24135936796665192, "epoch": 12.34, "learning_rate": 3.308390786275757e-05, "loss": 0.2987, "step": 34150, "task_loss": 0.5380813479423523 }, { "compression_loss": 0.0, "distillation_loss": 0.2821900546550751, "epoch": 12.35, "learning_rate": 3.3016138142109226e-05, "loss": 0.3237, "step": 34160, "task_loss": 0.503202497959137 }, { "compression_loss": 0.0, "distillation_loss": 0.32364422082901, "epoch": 12.35, "learning_rate": 3.29483528692384e-05, "loss": 0.3045, "step": 34170, "task_loss": 0.6083263754844666 }, { "compression_loss": 0.0, "distillation_loss": 0.2627584934234619, "epoch": 12.35, "learning_rate": 3.2880552393668754e-05, "loss": 0.3165, "step": 34180, "task_loss": 0.13859528303146362 }, { "compression_loss": 0.0, "distillation_loss": 0.2632201910018921, "epoch": 12.36, "learning_rate": 3.2812737065002324e-05, "loss": 0.3, "step": 34190, "task_loss": 0.669399082660675 }, { "compression_loss": 0.0, "distillation_loss": 0.25493764877319336, "epoch": 12.36, "learning_rate": 3.274490723291776e-05, "loss": 0.2976, "step": 34200, "task_loss": 0.6728479862213135 }, { "compression_loss": 0.0, "distillation_loss": 0.33259302377700806, "epoch": 12.36, "learning_rate": 3.267706324716848e-05, "loss": 0.3314, "step": 34210, "task_loss": 0.9753580689430237 }, { "compression_loss": 0.0, "distillation_loss": 0.31175535917282104, "epoch": 12.37, "learning_rate": 3.2609205457580886e-05, "loss": 0.3139, "step": 34220, "task_loss": 0.3914549946784973 }, { "compression_loss": 0.0, "distillation_loss": 0.3147822618484497, "epoch": 12.37, "learning_rate": 3.254133421405257e-05, "loss": 0.2919, "step": 34230, "task_loss": 0.8125878572463989 }, { "compression_loss": 0.0, "distillation_loss": 0.24442313611507416, "epoch": 12.37, "learning_rate": 3.247344986655048e-05, "loss": 0.3332, "step": 34240, "task_loss": 0.5116071105003357 }, { "compression_loss": 0.0, "distillation_loss": 0.2911091446876526, "epoch": 12.38, "learning_rate": 3.2405552765109136e-05, "loss": 0.3303, "step": 34250, "task_loss": 1.3424382209777832 }, { "epoch": 12.38, "eval_exact_match": 82.68684957426679, "eval_f1": 89.51533730161242, "step": 34250 }, { "compression_loss": 0.0, "distillation_loss": 0.3006014823913574, "epoch": 12.38, "learning_rate": 3.2337643259828825e-05, "loss": 0.2999, "step": 34260, "task_loss": 0.33499881625175476 }, { "compression_loss": 0.0, "distillation_loss": 0.2646566927433014, "epoch": 12.39, "learning_rate": 3.226972170087379e-05, "loss": 0.2945, "step": 34270, "task_loss": 0.45225051045417786 }, { "compression_loss": 0.0, "distillation_loss": 0.34713464975357056, "epoch": 12.39, "learning_rate": 3.220178843847045e-05, "loss": 0.3283, "step": 34280, "task_loss": 0.6201308369636536 }, { "compression_loss": 0.0, "distillation_loss": 0.28564396500587463, "epoch": 12.39, "learning_rate": 3.213384382290552e-05, "loss": 0.2633, "step": 34290, "task_loss": 0.5016683340072632 }, { "compression_loss": 0.0, "distillation_loss": 0.24384666979312897, "epoch": 12.4, "learning_rate": 3.20658882045243e-05, "loss": 0.3113, "step": 34300, "task_loss": 0.4746541976928711 }, { "compression_loss": 0.0, "distillation_loss": 0.23822401463985443, "epoch": 12.4, "learning_rate": 3.199792193372881e-05, "loss": 0.2974, "step": 34310, "task_loss": 0.3712196946144104 }, { "compression_loss": 0.0, "distillation_loss": 0.2704951763153076, "epoch": 12.4, "learning_rate": 3.192994536097598e-05, "loss": 0.2739, "step": 34320, "task_loss": 0.5020608901977539 }, { "compression_loss": 0.0, "distillation_loss": 0.25836482644081116, "epoch": 12.41, "learning_rate": 3.186195883677591e-05, "loss": 0.3205, "step": 34330, "task_loss": 0.4176018238067627 }, { "compression_loss": 0.0, "distillation_loss": 0.22267907857894897, "epoch": 12.41, "learning_rate": 3.1793962711689954e-05, "loss": 0.3038, "step": 34340, "task_loss": 0.36904817819595337 }, { "compression_loss": 0.0, "distillation_loss": 0.24134516716003418, "epoch": 12.41, "learning_rate": 3.172595733632902e-05, "loss": 0.3062, "step": 34350, "task_loss": 0.5529676079750061 }, { "compression_loss": 0.0, "distillation_loss": 0.2891293168067932, "epoch": 12.42, "learning_rate": 3.1657943061351665e-05, "loss": 0.2842, "step": 34360, "task_loss": 0.40239569544792175 }, { "compression_loss": 0.0, "distillation_loss": 0.25356581807136536, "epoch": 12.42, "learning_rate": 3.158992023746236e-05, "loss": 0.3037, "step": 34370, "task_loss": 0.5014570355415344 }, { "compression_loss": 0.0, "distillation_loss": 0.32125765085220337, "epoch": 12.43, "learning_rate": 3.15218892154097e-05, "loss": 0.3079, "step": 34380, "task_loss": 0.4384068250656128 }, { "compression_loss": 0.0, "distillation_loss": 0.3878769874572754, "epoch": 12.43, "learning_rate": 3.145385034598447e-05, "loss": 0.3303, "step": 34390, "task_loss": 0.8171356916427612 }, { "compression_loss": 0.0, "distillation_loss": 0.28572502732276917, "epoch": 12.43, "learning_rate": 3.138580398001798e-05, "loss": 0.3081, "step": 34400, "task_loss": 0.39703404903411865 }, { "compression_loss": 0.0, "distillation_loss": 0.23337849974632263, "epoch": 12.44, "learning_rate": 3.131775046838017e-05, "loss": 0.2909, "step": 34410, "task_loss": 0.33157849311828613 }, { "compression_loss": 0.0, "distillation_loss": 0.3653300404548645, "epoch": 12.44, "learning_rate": 3.124969016197782e-05, "loss": 0.3113, "step": 34420, "task_loss": 0.7859286069869995 }, { "compression_loss": 0.0, "distillation_loss": 0.2919321358203888, "epoch": 12.44, "learning_rate": 3.1181623411752774e-05, "loss": 0.3266, "step": 34430, "task_loss": 0.43368104100227356 }, { "compression_loss": 0.0, "distillation_loss": 0.20942334830760956, "epoch": 12.45, "learning_rate": 3.111355056868007e-05, "loss": 0.2836, "step": 34440, "task_loss": 0.4100843667984009 }, { "compression_loss": 0.0, "distillation_loss": 0.3563976287841797, "epoch": 12.45, "learning_rate": 3.104547198376618e-05, "loss": 0.32, "step": 34450, "task_loss": 0.28328973054885864 }, { "compression_loss": 0.0, "distillation_loss": 0.2783935070037842, "epoch": 12.45, "learning_rate": 3.09773880080472e-05, "loss": 0.3252, "step": 34460, "task_loss": 0.48669523000717163 }, { "compression_loss": 0.0, "distillation_loss": 0.28177738189697266, "epoch": 12.46, "learning_rate": 3.0909298992586974e-05, "loss": 0.3014, "step": 34470, "task_loss": 0.44701170921325684 }, { "compression_loss": 0.0, "distillation_loss": 0.2627007067203522, "epoch": 12.46, "learning_rate": 3.084120528847538e-05, "loss": 0.3006, "step": 34480, "task_loss": 0.44245171546936035 }, { "compression_loss": 0.0, "distillation_loss": 0.3353618383407593, "epoch": 12.46, "learning_rate": 3.077310724682645e-05, "loss": 0.3143, "step": 34490, "task_loss": 0.4516112208366394 }, { "compression_loss": 0.0, "distillation_loss": 0.32477229833602905, "epoch": 12.47, "learning_rate": 3.070500521877658e-05, "loss": 0.301, "step": 34500, "task_loss": 0.6717529892921448 }, { "epoch": 12.47, "eval_exact_match": 82.96121097445601, "eval_f1": 89.7702056217889, "step": 34500 }, { "compression_loss": 0.0, "distillation_loss": 0.27970433235168457, "epoch": 12.47, "learning_rate": 3.063689955548274e-05, "loss": 0.2976, "step": 34510, "task_loss": 0.5146293640136719 }, { "compression_loss": 0.0, "distillation_loss": 0.2640838921070099, "epoch": 12.48, "learning_rate": 3.056879060812061e-05, "loss": 0.3187, "step": 34520, "task_loss": 0.29868653416633606 }, { "compression_loss": 0.0, "distillation_loss": 0.2819470167160034, "epoch": 12.48, "learning_rate": 3.0500678727882856e-05, "loss": 0.2936, "step": 34530, "task_loss": 0.43330201506614685 }, { "compression_loss": 0.0, "distillation_loss": 0.3299875855445862, "epoch": 12.48, "learning_rate": 3.043256426597721e-05, "loss": 0.3005, "step": 34540, "task_loss": 0.4487469792366028 }, { "compression_loss": 0.0, "distillation_loss": 0.31510305404663086, "epoch": 12.49, "learning_rate": 3.036444757362475e-05, "loss": 0.3098, "step": 34550, "task_loss": 0.3750832676887512 }, { "compression_loss": 0.0, "distillation_loss": 0.31631648540496826, "epoch": 12.49, "learning_rate": 3.029632900205806e-05, "loss": 0.3136, "step": 34560, "task_loss": 0.29234394431114197 }, { "compression_loss": 0.0, "distillation_loss": 0.25344857573509216, "epoch": 12.49, "learning_rate": 3.0228208902519374e-05, "loss": 0.3155, "step": 34570, "task_loss": 0.4813919961452484 }, { "compression_loss": 0.0, "distillation_loss": 0.23850274085998535, "epoch": 12.5, "learning_rate": 3.016008762625886e-05, "loss": 0.3022, "step": 34580, "task_loss": 0.5706331729888916 }, { "compression_loss": 0.0, "distillation_loss": 0.23559576272964478, "epoch": 12.5, "learning_rate": 3.00919655245327e-05, "loss": 0.2813, "step": 34590, "task_loss": 0.3056916892528534 }, { "compression_loss": 0.0, "distillation_loss": 0.3584052324295044, "epoch": 12.5, "learning_rate": 3.002384294860138e-05, "loss": 0.3686, "step": 34600, "task_loss": 0.783943235874176 }, { "compression_loss": 0.0, "distillation_loss": 0.2597944736480713, "epoch": 12.51, "learning_rate": 2.9955720249727796e-05, "loss": 0.334, "step": 34610, "task_loss": 0.4639328718185425 }, { "compression_loss": 0.0, "distillation_loss": 0.2866797149181366, "epoch": 12.51, "learning_rate": 2.9887597779175498e-05, "loss": 0.3025, "step": 34620, "task_loss": 0.8007729053497314 }, { "compression_loss": 0.0, "distillation_loss": 0.3565661311149597, "epoch": 12.52, "learning_rate": 2.9819475888206846e-05, "loss": 0.3179, "step": 34630, "task_loss": 0.7241135239601135 }, { "compression_loss": 0.0, "distillation_loss": 0.30475327372550964, "epoch": 12.52, "learning_rate": 2.975135492808122e-05, "loss": 0.3257, "step": 34640, "task_loss": 0.4078003466129303 }, { "compression_loss": 0.0, "distillation_loss": 0.3345334827899933, "epoch": 12.52, "learning_rate": 2.9683235250053192e-05, "loss": 0.3087, "step": 34650, "task_loss": 0.4602591395378113 }, { "compression_loss": 0.0, "distillation_loss": 0.49751919507980347, "epoch": 12.53, "learning_rate": 2.9615117205370738e-05, "loss": 0.3164, "step": 34660, "task_loss": 0.6007466316223145 }, { "compression_loss": 0.0, "distillation_loss": 0.2072526514530182, "epoch": 12.53, "learning_rate": 2.9547001145273384e-05, "loss": 0.3216, "step": 34670, "task_loss": 0.7436428070068359 }, { "compression_loss": 0.0, "distillation_loss": 0.34686729311943054, "epoch": 12.53, "learning_rate": 2.9478887420990473e-05, "loss": 0.3079, "step": 34680, "task_loss": 0.9251314401626587 }, { "compression_loss": 0.0, "distillation_loss": 0.27641862630844116, "epoch": 12.54, "learning_rate": 2.9410776383739235e-05, "loss": 0.3094, "step": 34690, "task_loss": 0.45291346311569214 }, { "compression_loss": 0.0, "distillation_loss": 0.2968369126319885, "epoch": 12.54, "learning_rate": 2.934266838472311e-05, "loss": 0.2977, "step": 34700, "task_loss": 0.524625301361084 }, { "compression_loss": 0.0, "distillation_loss": 0.32808536291122437, "epoch": 12.54, "learning_rate": 2.9274563775129826e-05, "loss": 0.3131, "step": 34710, "task_loss": 0.7311720848083496 }, { "compression_loss": 0.0, "distillation_loss": 0.37095907330513, "epoch": 12.55, "learning_rate": 2.920646290612966e-05, "loss": 0.2997, "step": 34720, "task_loss": 0.9131951332092285 }, { "compression_loss": 0.0, "distillation_loss": 0.23286473751068115, "epoch": 12.55, "learning_rate": 2.913836612887359e-05, "loss": 0.3163, "step": 34730, "task_loss": 0.5092307329177856 }, { "compression_loss": 0.0, "distillation_loss": 0.2292613983154297, "epoch": 12.56, "learning_rate": 2.9070273794491498e-05, "loss": 0.2968, "step": 34740, "task_loss": 0.39329832792282104 }, { "compression_loss": 0.0, "distillation_loss": 0.24411755800247192, "epoch": 12.56, "learning_rate": 2.900218625409036e-05, "loss": 0.3213, "step": 34750, "task_loss": 0.35126519203186035 }, { "epoch": 12.56, "eval_exact_match": 83.33964049195838, "eval_f1": 90.00245261288558, "step": 34750 }, { "compression_loss": 0.0, "distillation_loss": 0.4118926227092743, "epoch": 12.56, "learning_rate": 2.8934103858752427e-05, "loss": 0.332, "step": 34760, "task_loss": 0.562553882598877 }, { "compression_loss": 0.0, "distillation_loss": 0.2720871865749359, "epoch": 12.57, "learning_rate": 2.8866026959533416e-05, "loss": 0.2967, "step": 34770, "task_loss": 0.33152109384536743 }, { "compression_loss": 0.0, "distillation_loss": 0.2610456645488739, "epoch": 12.57, "learning_rate": 2.8797955907460733e-05, "loss": 0.2883, "step": 34780, "task_loss": 0.835556149482727 }, { "compression_loss": 0.0, "distillation_loss": 0.24886228144168854, "epoch": 12.57, "learning_rate": 2.8729891053531584e-05, "loss": 0.2994, "step": 34790, "task_loss": 0.4246406555175781 }, { "compression_loss": 0.0, "distillation_loss": 0.31593984365463257, "epoch": 12.58, "learning_rate": 2.866183274871127e-05, "loss": 0.3201, "step": 34800, "task_loss": 0.6031840443611145 }, { "compression_loss": 0.0, "distillation_loss": 0.289052814245224, "epoch": 12.58, "learning_rate": 2.859378134393128e-05, "loss": 0.2935, "step": 34810, "task_loss": 0.4136536717414856 }, { "compression_loss": 0.0, "distillation_loss": 0.25019770860671997, "epoch": 12.58, "learning_rate": 2.8525737190087548e-05, "loss": 0.3172, "step": 34820, "task_loss": 0.5181039571762085 }, { "compression_loss": 0.0, "distillation_loss": 0.2552999258041382, "epoch": 12.59, "learning_rate": 2.8457700638038606e-05, "loss": 0.2592, "step": 34830, "task_loss": 0.4570186734199524 }, { "compression_loss": 0.0, "distillation_loss": 0.25203728675842285, "epoch": 12.59, "learning_rate": 2.8389672038603805e-05, "loss": 0.2763, "step": 34840, "task_loss": 0.5494889616966248 }, { "compression_loss": 0.0, "distillation_loss": 0.3487963080406189, "epoch": 12.59, "learning_rate": 2.8321651742561465e-05, "loss": 0.3192, "step": 34850, "task_loss": 0.5809800624847412 }, { "compression_loss": 0.0, "distillation_loss": 0.36887413263320923, "epoch": 12.6, "learning_rate": 2.8253640100647124e-05, "loss": 0.3091, "step": 34860, "task_loss": 0.24406704306602478 }, { "compression_loss": 0.0, "distillation_loss": 0.3776524066925049, "epoch": 12.6, "learning_rate": 2.818563746355165e-05, "loss": 0.3065, "step": 34870, "task_loss": 0.6552656888961792 }, { "compression_loss": 0.0, "distillation_loss": 0.27506256103515625, "epoch": 12.61, "learning_rate": 2.8117644181919547e-05, "loss": 0.3169, "step": 34880, "task_loss": 0.5139168500900269 }, { "compression_loss": 0.0, "distillation_loss": 0.35786789655685425, "epoch": 12.61, "learning_rate": 2.8049660606346994e-05, "loss": 0.3498, "step": 34890, "task_loss": 0.5621675252914429 }, { "compression_loss": 0.0, "distillation_loss": 0.3235039710998535, "epoch": 12.61, "learning_rate": 2.79816870873802e-05, "loss": 0.2973, "step": 34900, "task_loss": 0.4850617051124573 }, { "compression_loss": 0.0, "distillation_loss": 0.2791781425476074, "epoch": 12.62, "learning_rate": 2.7913723975513473e-05, "loss": 0.3168, "step": 34910, "task_loss": 0.44785845279693604 }, { "compression_loss": 0.0, "distillation_loss": 0.3183351457118988, "epoch": 12.62, "learning_rate": 2.7845771621187476e-05, "loss": 0.3185, "step": 34920, "task_loss": 0.26022273302078247 }, { "compression_loss": 0.0, "distillation_loss": 0.24664825201034546, "epoch": 12.62, "learning_rate": 2.7777830374787396e-05, "loss": 0.2517, "step": 34930, "task_loss": 0.3348398208618164 }, { "compression_loss": 0.0, "distillation_loss": 0.2722611427307129, "epoch": 12.63, "learning_rate": 2.770990058664115e-05, "loss": 0.3454, "step": 34940, "task_loss": 0.5424426794052124 }, { "compression_loss": 0.0, "distillation_loss": 0.280570924282074, "epoch": 12.63, "learning_rate": 2.7641982607017554e-05, "loss": 0.2948, "step": 34950, "task_loss": 0.5146224498748779 }, { "compression_loss": 0.0, "distillation_loss": 0.2772490680217743, "epoch": 12.63, "learning_rate": 2.757407678612457e-05, "loss": 0.2776, "step": 34960, "task_loss": 0.44447261095046997 }, { "compression_loss": 0.0, "distillation_loss": 0.24304187297821045, "epoch": 12.64, "learning_rate": 2.7506183474107415e-05, "loss": 0.274, "step": 34970, "task_loss": 0.3981664776802063 }, { "compression_loss": 0.0, "distillation_loss": 0.2865305542945862, "epoch": 12.64, "learning_rate": 2.743830302104688e-05, "loss": 0.3251, "step": 34980, "task_loss": 0.4353870749473572 }, { "compression_loss": 0.0, "distillation_loss": 0.2837364673614502, "epoch": 12.65, "learning_rate": 2.7370435776957363e-05, "loss": 0.2872, "step": 34990, "task_loss": 0.5949020385742188 }, { "compression_loss": 0.0, "distillation_loss": 0.3170028626918793, "epoch": 12.65, "learning_rate": 2.7302582091785228e-05, "loss": 0.3451, "step": 35000, "task_loss": 0.6154573559761047 }, { "epoch": 12.65, "eval_exact_match": 83.18826868495742, "eval_f1": 89.9089136876254, "step": 35000 }, { "compression_loss": 0.0, "distillation_loss": 0.23709344863891602, "epoch": 12.65, "learning_rate": 2.723474231540687e-05, "loss": 0.2959, "step": 35010, "task_loss": 0.532070517539978 }, { "compression_loss": 0.0, "distillation_loss": 0.2065018117427826, "epoch": 12.66, "learning_rate": 2.7166916797627012e-05, "loss": 0.2954, "step": 35020, "task_loss": 0.36863160133361816 }, { "compression_loss": 0.0, "distillation_loss": 0.34484899044036865, "epoch": 12.66, "learning_rate": 2.7099105888176807e-05, "loss": 0.3376, "step": 35030, "task_loss": 0.5684521794319153 }, { "compression_loss": 0.0, "distillation_loss": 0.20965680480003357, "epoch": 12.66, "learning_rate": 2.7031309936712125e-05, "loss": 0.2896, "step": 35040, "task_loss": 0.5758606195449829 }, { "compression_loss": 0.0, "distillation_loss": 0.2636858820915222, "epoch": 12.67, "learning_rate": 2.6963529292811673e-05, "loss": 0.2785, "step": 35050, "task_loss": 0.4598173499107361 }, { "compression_loss": 0.0, "distillation_loss": 0.23789474368095398, "epoch": 12.67, "learning_rate": 2.689576430597526e-05, "loss": 0.3038, "step": 35060, "task_loss": 0.490090548992157 }, { "compression_loss": 0.0, "distillation_loss": 0.29482781887054443, "epoch": 12.67, "learning_rate": 2.682801532562192e-05, "loss": 0.2974, "step": 35070, "task_loss": 0.9217762351036072 }, { "compression_loss": 0.0, "distillation_loss": 0.25767529010772705, "epoch": 12.68, "learning_rate": 2.6760282701088214e-05, "loss": 0.3068, "step": 35080, "task_loss": 0.642894983291626 }, { "compression_loss": 0.0, "distillation_loss": 0.2678022086620331, "epoch": 12.68, "learning_rate": 2.66925667816263e-05, "loss": 0.3014, "step": 35090, "task_loss": 0.6086078882217407 }, { "compression_loss": 0.0, "distillation_loss": 0.2076597809791565, "epoch": 12.69, "learning_rate": 2.662486791640225e-05, "loss": 0.2907, "step": 35100, "task_loss": 0.23006045818328857 }, { "compression_loss": 0.0, "distillation_loss": 0.33956581354141235, "epoch": 12.69, "learning_rate": 2.655718645449416e-05, "loss": 0.297, "step": 35110, "task_loss": 0.8203099966049194 }, { "compression_loss": 0.0, "distillation_loss": 0.2641342282295227, "epoch": 12.69, "learning_rate": 2.648952274489042e-05, "loss": 0.3053, "step": 35120, "task_loss": 0.3075989782810211 }, { "compression_loss": 0.0, "distillation_loss": 0.24056673049926758, "epoch": 12.7, "learning_rate": 2.6421877136487866e-05, "loss": 0.2915, "step": 35130, "task_loss": 1.0190541744232178 }, { "compression_loss": 0.0, "distillation_loss": 0.21946954727172852, "epoch": 12.7, "learning_rate": 2.6354249978089998e-05, "loss": 0.2835, "step": 35140, "task_loss": 0.5367399454116821 }, { "compression_loss": 0.0, "distillation_loss": 0.3418148458003998, "epoch": 12.7, "learning_rate": 2.6286641618405177e-05, "loss": 0.3303, "step": 35150, "task_loss": 0.3500261902809143 }, { "compression_loss": 0.0, "distillation_loss": 0.283519446849823, "epoch": 12.71, "learning_rate": 2.6219052406044874e-05, "loss": 0.3565, "step": 35160, "task_loss": 0.4854230284690857 }, { "compression_loss": 0.0, "distillation_loss": 0.3122413158416748, "epoch": 12.71, "learning_rate": 2.615148268952175e-05, "loss": 0.3211, "step": 35170, "task_loss": 0.6190265417098999 }, { "compression_loss": 0.0, "distillation_loss": 0.2543804943561554, "epoch": 12.71, "learning_rate": 2.6083932817248023e-05, "loss": 0.3016, "step": 35180, "task_loss": 0.2701253890991211 }, { "compression_loss": 0.0, "distillation_loss": 0.3085487484931946, "epoch": 12.72, "learning_rate": 2.6016403137533524e-05, "loss": 0.3235, "step": 35190, "task_loss": 0.6589609980583191 }, { "compression_loss": 0.0, "distillation_loss": 0.28512585163116455, "epoch": 12.72, "learning_rate": 2.594889399858401e-05, "loss": 0.3019, "step": 35200, "task_loss": 0.3173820972442627 }, { "compression_loss": 0.0, "distillation_loss": 0.2618717551231384, "epoch": 12.72, "learning_rate": 2.5881405748499288e-05, "loss": 0.3216, "step": 35210, "task_loss": 0.3481888771057129 }, { "compression_loss": 0.0, "distillation_loss": 0.24822674691677094, "epoch": 12.73, "learning_rate": 2.5813938735271474e-05, "loss": 0.3037, "step": 35220, "task_loss": 0.6111527681350708 }, { "compression_loss": 0.0, "distillation_loss": 0.33503878116607666, "epoch": 12.73, "learning_rate": 2.5746493306783173e-05, "loss": 0.2805, "step": 35230, "task_loss": 0.28723621368408203 }, { "compression_loss": 0.0, "distillation_loss": 0.21374112367630005, "epoch": 12.74, "learning_rate": 2.56790698108057e-05, "loss": 0.2953, "step": 35240, "task_loss": 0.32919079065322876 }, { "compression_loss": 0.0, "distillation_loss": 0.26212984323501587, "epoch": 12.74, "learning_rate": 2.5611668594997254e-05, "loss": 0.2846, "step": 35250, "task_loss": 0.3463735580444336 }, { "epoch": 12.74, "eval_exact_match": 83.28287606433302, "eval_f1": 90.13822188130554, "step": 35250 }, { "compression_loss": 0.0, "distillation_loss": 0.4006456732749939, "epoch": 12.74, "learning_rate": 2.5544290006901206e-05, "loss": 0.2915, "step": 35260, "task_loss": 0.35442763566970825 }, { "compression_loss": 0.0, "distillation_loss": 0.24394173920154572, "epoch": 12.75, "learning_rate": 2.5476934393944168e-05, "loss": 0.3141, "step": 35270, "task_loss": 0.5410995483398438 }, { "compression_loss": 0.0, "distillation_loss": 0.27239710092544556, "epoch": 12.75, "learning_rate": 2.5409602103434368e-05, "loss": 0.3226, "step": 35280, "task_loss": 0.3332737386226654 }, { "compression_loss": 0.0, "distillation_loss": 0.26623234152793884, "epoch": 12.75, "learning_rate": 2.5342293482559712e-05, "loss": 0.314, "step": 35290, "task_loss": 0.45213043689727783 }, { "compression_loss": 0.0, "distillation_loss": 0.28726357221603394, "epoch": 12.76, "learning_rate": 2.5275008878386102e-05, "loss": 0.2934, "step": 35300, "task_loss": 0.5393993854522705 }, { "compression_loss": 0.0, "distillation_loss": 0.3673667311668396, "epoch": 12.76, "learning_rate": 2.5207748637855563e-05, "loss": 0.2806, "step": 35310, "task_loss": 0.4093773663043976 }, { "compression_loss": 0.0, "distillation_loss": 0.334319144487381, "epoch": 12.76, "learning_rate": 2.5140513107784532e-05, "loss": 0.3143, "step": 35320, "task_loss": 1.018317461013794 }, { "compression_loss": 0.0, "distillation_loss": 0.28693056106567383, "epoch": 12.77, "learning_rate": 2.5073302634861993e-05, "loss": 0.2935, "step": 35330, "task_loss": 0.6353425979614258 }, { "compression_loss": 0.0, "distillation_loss": 0.2879865765571594, "epoch": 12.77, "learning_rate": 2.5006117565647766e-05, "loss": 0.2832, "step": 35340, "task_loss": 0.2560500502586365 }, { "compression_loss": 0.0, "distillation_loss": 0.2619374394416809, "epoch": 12.78, "learning_rate": 2.4938958246570628e-05, "loss": 0.3187, "step": 35350, "task_loss": 0.5173212289810181 }, { "compression_loss": 0.0, "distillation_loss": 0.26497191190719604, "epoch": 12.78, "learning_rate": 2.4871825023926654e-05, "loss": 0.2858, "step": 35360, "task_loss": 0.5506118535995483 }, { "compression_loss": 0.0, "distillation_loss": 0.2945820689201355, "epoch": 12.78, "learning_rate": 2.4804718243877275e-05, "loss": 0.2953, "step": 35370, "task_loss": 0.6132495403289795 }, { "compression_loss": 0.0, "distillation_loss": 0.3165275752544403, "epoch": 12.79, "learning_rate": 2.4737638252447642e-05, "loss": 0.3079, "step": 35380, "task_loss": 0.4461279511451721 }, { "compression_loss": 0.0, "distillation_loss": 0.3094812035560608, "epoch": 12.79, "learning_rate": 2.4670585395524735e-05, "loss": 0.3315, "step": 35390, "task_loss": 0.4002085328102112 }, { "compression_loss": 0.0, "distillation_loss": 0.39851170778274536, "epoch": 12.79, "learning_rate": 2.4603560018855634e-05, "loss": 0.3087, "step": 35400, "task_loss": 0.39463096857070923 }, { "compression_loss": 0.0, "distillation_loss": 0.3484683632850647, "epoch": 12.8, "learning_rate": 2.4536562468045718e-05, "loss": 0.2993, "step": 35410, "task_loss": 0.7016239166259766 }, { "compression_loss": 0.0, "distillation_loss": 0.27489396929740906, "epoch": 12.8, "learning_rate": 2.4469593088556896e-05, "loss": 0.2889, "step": 35420, "task_loss": 0.5056446194648743 }, { "compression_loss": 0.0, "distillation_loss": 0.25945621728897095, "epoch": 12.8, "learning_rate": 2.4402652225705794e-05, "loss": 0.3146, "step": 35430, "task_loss": 0.49554070830345154 }, { "compression_loss": 0.0, "distillation_loss": 0.3142120838165283, "epoch": 12.81, "learning_rate": 2.4335740224662032e-05, "loss": 0.2934, "step": 35440, "task_loss": 0.4447091817855835 }, { "compression_loss": 0.0, "distillation_loss": 0.33970868587493896, "epoch": 12.81, "learning_rate": 2.4268857430446355e-05, "loss": 0.3232, "step": 35450, "task_loss": 0.654474675655365 }, { "compression_loss": 0.0, "distillation_loss": 0.3706507682800293, "epoch": 12.82, "learning_rate": 2.4202004187928985e-05, "loss": 0.2983, "step": 35460, "task_loss": 0.5285034775733948 }, { "compression_loss": 0.0, "distillation_loss": 0.2316771298646927, "epoch": 12.82, "learning_rate": 2.413518084182767e-05, "loss": 0.3351, "step": 35470, "task_loss": 0.49286916851997375 }, { "compression_loss": 0.0, "distillation_loss": 0.2711471915245056, "epoch": 12.82, "learning_rate": 2.4068387736706095e-05, "loss": 0.2861, "step": 35480, "task_loss": 0.4698110818862915 }, { "compression_loss": 0.0, "distillation_loss": 0.2512504756450653, "epoch": 12.83, "learning_rate": 2.400162521697195e-05, "loss": 0.294, "step": 35490, "task_loss": 0.32535380125045776 }, { "compression_loss": 0.0, "distillation_loss": 0.3419429659843445, "epoch": 12.83, "learning_rate": 2.3934893626875248e-05, "loss": 0.3093, "step": 35500, "task_loss": 0.4717838764190674 }, { "epoch": 12.83, "eval_exact_match": 83.36802270577105, "eval_f1": 89.98336633831875, "step": 35500 }, { "compression_loss": 0.0, "distillation_loss": 0.26069217920303345, "epoch": 12.83, "learning_rate": 2.3868193310506494e-05, "loss": 0.3388, "step": 35510, "task_loss": 0.7599429488182068 }, { "compression_loss": 0.0, "distillation_loss": 0.22465810179710388, "epoch": 12.84, "learning_rate": 2.3801524611794955e-05, "loss": 0.2862, "step": 35520, "task_loss": 0.5255027413368225 }, { "compression_loss": 0.0, "distillation_loss": 0.3821221590042114, "epoch": 12.84, "learning_rate": 2.373488787450686e-05, "loss": 0.2972, "step": 35530, "task_loss": 0.5774866938591003 }, { "compression_loss": 0.0, "distillation_loss": 0.2761344909667969, "epoch": 12.84, "learning_rate": 2.3668283442243642e-05, "loss": 0.2888, "step": 35540, "task_loss": 0.48153436183929443 }, { "compression_loss": 0.0, "distillation_loss": 0.30200979113578796, "epoch": 12.85, "learning_rate": 2.3601711658440125e-05, "loss": 0.3074, "step": 35550, "task_loss": 0.48879027366638184 }, { "compression_loss": 0.0, "distillation_loss": 0.27535152435302734, "epoch": 12.85, "learning_rate": 2.353517286636285e-05, "loss": 0.291, "step": 35560, "task_loss": 0.36273616552352905 }, { "compression_loss": 0.0, "distillation_loss": 0.36965012550354004, "epoch": 12.86, "learning_rate": 2.3468667409108163e-05, "loss": 0.3443, "step": 35570, "task_loss": 0.4493734836578369 }, { "compression_loss": 0.0, "distillation_loss": 0.3609243631362915, "epoch": 12.86, "learning_rate": 2.34021956296006e-05, "loss": 0.3018, "step": 35580, "task_loss": 0.5398873686790466 }, { "compression_loss": 0.0, "distillation_loss": 0.2682161331176758, "epoch": 12.86, "learning_rate": 2.333575787059099e-05, "loss": 0.2933, "step": 35590, "task_loss": 0.4194197952747345 }, { "compression_loss": 0.0, "distillation_loss": 0.28163596987724304, "epoch": 12.87, "learning_rate": 2.3269354474654765e-05, "loss": 0.3208, "step": 35600, "task_loss": 0.28600168228149414 }, { "compression_loss": 0.0, "distillation_loss": 0.19686777889728546, "epoch": 12.87, "learning_rate": 2.320298578419016e-05, "loss": 0.2912, "step": 35610, "task_loss": 0.33698779344558716 }, { "compression_loss": 0.0, "distillation_loss": 0.3002614378929138, "epoch": 12.87, "learning_rate": 2.3136652141416464e-05, "loss": 0.3026, "step": 35620, "task_loss": 0.7368478775024414 }, { "compression_loss": 0.0, "distillation_loss": 0.27924036979675293, "epoch": 12.88, "learning_rate": 2.3070353888372245e-05, "loss": 0.2995, "step": 35630, "task_loss": 0.7203919887542725 }, { "compression_loss": 0.0, "distillation_loss": 0.32518601417541504, "epoch": 12.88, "learning_rate": 2.300409136691359e-05, "loss": 0.2785, "step": 35640, "task_loss": 0.8650093078613281 }, { "compression_loss": 0.0, "distillation_loss": 0.31950339674949646, "epoch": 12.88, "learning_rate": 2.2937864918712325e-05, "loss": 0.3148, "step": 35650, "task_loss": 0.5481790900230408 }, { "compression_loss": 0.0, "distillation_loss": 0.3315272927284241, "epoch": 12.89, "learning_rate": 2.287167488525432e-05, "loss": 0.2794, "step": 35660, "task_loss": 0.5987747311592102 }, { "compression_loss": 0.0, "distillation_loss": 0.3169107437133789, "epoch": 12.89, "learning_rate": 2.2805521607837617e-05, "loss": 0.3253, "step": 35670, "task_loss": 0.4451930522918701 }, { "compression_loss": 0.0, "distillation_loss": 0.28908562660217285, "epoch": 12.89, "learning_rate": 2.2739405427570775e-05, "loss": 0.3056, "step": 35680, "task_loss": 0.5222287178039551 }, { "compression_loss": 0.0, "distillation_loss": 0.2468404620885849, "epoch": 12.9, "learning_rate": 2.267332668537104e-05, "loss": 0.3056, "step": 35690, "task_loss": 0.6599795818328857 }, { "compression_loss": 0.0, "distillation_loss": 0.3190767168998718, "epoch": 12.9, "learning_rate": 2.260728572196264e-05, "loss": 0.3096, "step": 35700, "task_loss": 0.4459420442581177 }, { "compression_loss": 0.0, "distillation_loss": 0.33378738164901733, "epoch": 12.91, "learning_rate": 2.2541282877874973e-05, "loss": 0.331, "step": 35710, "task_loss": 0.7016539573669434 }, { "compression_loss": 0.0, "distillation_loss": 0.36465150117874146, "epoch": 12.91, "learning_rate": 2.2475318493440918e-05, "loss": 0.3183, "step": 35720, "task_loss": 0.37818336486816406 }, { "compression_loss": 0.0, "distillation_loss": 0.2557653784751892, "epoch": 12.91, "learning_rate": 2.2409392908794996e-05, "loss": 0.3124, "step": 35730, "task_loss": 0.4792822599411011 }, { "compression_loss": 0.0, "distillation_loss": 0.26674747467041016, "epoch": 12.92, "learning_rate": 2.2343506463871732e-05, "loss": 0.2902, "step": 35740, "task_loss": 0.28261226415634155 }, { "compression_loss": 0.0, "distillation_loss": 0.29377883672714233, "epoch": 12.92, "learning_rate": 2.227765949840374e-05, "loss": 0.3053, "step": 35750, "task_loss": 0.5766406059265137 }, { "epoch": 12.92, "eval_exact_match": 83.22611163670766, "eval_f1": 90.02511649594013, "step": 35750 }, { "compression_loss": 0.0, "distillation_loss": 0.23586252331733704, "epoch": 12.92, "learning_rate": 2.2211852351920155e-05, "loss": 0.275, "step": 35760, "task_loss": 0.46377095580101013 }, { "compression_loss": 0.0, "distillation_loss": 0.25810280442237854, "epoch": 12.93, "learning_rate": 2.214608536374474e-05, "loss": 0.3178, "step": 35770, "task_loss": 0.4173620641231537 }, { "compression_loss": 0.0, "distillation_loss": 0.2911950349807739, "epoch": 12.93, "learning_rate": 2.2080358872994204e-05, "loss": 0.3009, "step": 35780, "task_loss": 0.7365570664405823 }, { "compression_loss": 0.0, "distillation_loss": 0.3325164318084717, "epoch": 12.93, "learning_rate": 2.201467321857642e-05, "loss": 0.3085, "step": 35790, "task_loss": 0.7347078323364258 }, { "compression_loss": 0.0, "distillation_loss": 0.21316677331924438, "epoch": 12.94, "learning_rate": 2.1949028739188735e-05, "loss": 0.3124, "step": 35800, "task_loss": 0.2687242031097412 }, { "compression_loss": 0.0, "distillation_loss": 0.3000262379646301, "epoch": 12.94, "learning_rate": 2.188342577331614e-05, "loss": 0.3113, "step": 35810, "task_loss": 0.494425892829895 }, { "compression_loss": 0.0, "distillation_loss": 0.22712643444538116, "epoch": 12.95, "learning_rate": 2.1817864659229586e-05, "loss": 0.2945, "step": 35820, "task_loss": 0.3827125132083893 }, { "compression_loss": 0.0, "distillation_loss": 0.2773548364639282, "epoch": 12.95, "learning_rate": 2.175234573498424e-05, "loss": 0.3116, "step": 35830, "task_loss": 0.37510883808135986 }, { "compression_loss": 0.0, "distillation_loss": 0.34838151931762695, "epoch": 12.95, "learning_rate": 2.1686869338417688e-05, "loss": 0.2949, "step": 35840, "task_loss": 0.8953801989555359 }, { "compression_loss": 0.0, "distillation_loss": 0.28336194157600403, "epoch": 12.96, "learning_rate": 2.162143580714827e-05, "loss": 0.3152, "step": 35850, "task_loss": 0.4684157371520996 }, { "compression_loss": 0.0, "distillation_loss": 0.27176231145858765, "epoch": 12.96, "learning_rate": 2.1556045478573257e-05, "loss": 0.2948, "step": 35860, "task_loss": 0.5649899244308472 }, { "compression_loss": 0.0, "distillation_loss": 0.3015035390853882, "epoch": 12.96, "learning_rate": 2.149069868986719e-05, "loss": 0.2936, "step": 35870, "task_loss": 0.7198494672775269 }, { "compression_loss": 0.0, "distillation_loss": 0.2527949810028076, "epoch": 12.97, "learning_rate": 2.1425395777980063e-05, "loss": 0.3064, "step": 35880, "task_loss": 0.45133841037750244 }, { "compression_loss": 0.0, "distillation_loss": 0.2298678755760193, "epoch": 12.97, "learning_rate": 2.136013707963567e-05, "loss": 0.2759, "step": 35890, "task_loss": 0.46662408113479614 }, { "compression_loss": 0.0, "distillation_loss": 0.2663559913635254, "epoch": 12.97, "learning_rate": 2.1294922931329776e-05, "loss": 0.3132, "step": 35900, "task_loss": 0.3944147825241089 }, { "compression_loss": 0.0, "distillation_loss": 0.28284984827041626, "epoch": 12.98, "learning_rate": 2.12297536693285e-05, "loss": 0.2986, "step": 35910, "task_loss": 0.6652886867523193 }, { "compression_loss": 0.0, "distillation_loss": 0.316769540309906, "epoch": 12.98, "learning_rate": 2.116462962966643e-05, "loss": 0.3136, "step": 35920, "task_loss": 0.6186679601669312 }, { "compression_loss": 0.0, "distillation_loss": 0.26545512676239014, "epoch": 12.99, "learning_rate": 2.1099551148145046e-05, "loss": 0.3145, "step": 35930, "task_loss": 0.32127320766448975 }, { "compression_loss": 0.0, "distillation_loss": 0.3045346140861511, "epoch": 12.99, "learning_rate": 2.103451856033086e-05, "loss": 0.3368, "step": 35940, "task_loss": 0.5651464462280273 }, { "compression_loss": 0.0, "distillation_loss": 0.3307114243507385, "epoch": 12.99, "learning_rate": 2.0969532201553784e-05, "loss": 0.3048, "step": 35950, "task_loss": 0.7461162209510803 }, { "compression_loss": 0.0, "distillation_loss": 0.23095571994781494, "epoch": 13.0, "learning_rate": 2.0904592406905308e-05, "loss": 0.3029, "step": 35960, "task_loss": 0.5822739601135254 }, { "compression_loss": 0.0, "distillation_loss": 0.2591657042503357, "epoch": 13.0, "learning_rate": 2.083969951123688e-05, "loss": 0.3287, "step": 35970, "task_loss": 0.45596399903297424 }, { "compression_loss": 0.0, "distillation_loss": 0.2697155773639679, "epoch": 13.0, "learning_rate": 2.077485384915807e-05, "loss": 0.3053, "step": 35980, "task_loss": 0.38982054591178894 }, { "compression_loss": 0.0, "distillation_loss": 0.25783872604370117, "epoch": 13.01, "learning_rate": 2.071005575503493e-05, "loss": 0.2639, "step": 35990, "task_loss": 0.4523618221282959 }, { "compression_loss": 0.0, "distillation_loss": 0.2397623360157013, "epoch": 13.01, "learning_rate": 2.0645305562988208e-05, "loss": 0.3041, "step": 36000, "task_loss": 0.26546239852905273 }, { "epoch": 13.01, "eval_exact_match": 83.08420056764427, "eval_f1": 89.96743931947285, "step": 36000 }, { "compression_loss": 0.0, "distillation_loss": 0.29155221581459045, "epoch": 13.01, "learning_rate": 2.0580603606891696e-05, "loss": 0.3011, "step": 36010, "task_loss": 0.455023854970932 }, { "compression_loss": 0.0, "distillation_loss": 0.25799697637557983, "epoch": 13.02, "learning_rate": 2.0515950220370395e-05, "loss": 0.2589, "step": 36020, "task_loss": 0.9454857110977173 }, { "compression_loss": 0.0, "distillation_loss": 0.24174681305885315, "epoch": 13.02, "learning_rate": 2.0451345736798946e-05, "loss": 0.2773, "step": 36030, "task_loss": 0.7607046365737915 }, { "compression_loss": 0.0, "distillation_loss": 0.32882970571517944, "epoch": 13.02, "learning_rate": 2.0386790489299768e-05, "loss": 0.2842, "step": 36040, "task_loss": 0.4914087653160095 }, { "compression_loss": 0.0, "distillation_loss": 0.37801340222358704, "epoch": 13.03, "learning_rate": 2.0322284810741438e-05, "loss": 0.2852, "step": 36050, "task_loss": 0.872128963470459 }, { "compression_loss": 0.0, "distillation_loss": 0.29121482372283936, "epoch": 13.03, "learning_rate": 2.0257829033736913e-05, "loss": 0.2849, "step": 36060, "task_loss": 0.5817961096763611 }, { "compression_loss": 0.0, "distillation_loss": 0.30120038986206055, "epoch": 13.04, "learning_rate": 2.0193423490641865e-05, "loss": 0.2789, "step": 36070, "task_loss": 0.8409297466278076 }, { "compression_loss": 0.0, "distillation_loss": 0.2733917534351349, "epoch": 13.04, "learning_rate": 2.012906851355292e-05, "loss": 0.2832, "step": 36080, "task_loss": 0.7088493704795837 }, { "compression_loss": 0.0, "distillation_loss": 0.34920522570610046, "epoch": 13.04, "learning_rate": 2.0064764434306e-05, "loss": 0.2774, "step": 36090, "task_loss": 0.8275156617164612 }, { "compression_loss": 0.0, "distillation_loss": 0.2293846607208252, "epoch": 13.05, "learning_rate": 2.0000511584474526e-05, "loss": 0.2758, "step": 36100, "task_loss": 0.28478482365608215 }, { "compression_loss": 0.0, "distillation_loss": 0.21923667192459106, "epoch": 13.05, "learning_rate": 1.9936310295367844e-05, "loss": 0.2872, "step": 36110, "task_loss": 0.6393476128578186 }, { "compression_loss": 0.0, "distillation_loss": 0.23265597224235535, "epoch": 13.05, "learning_rate": 1.9872160898029345e-05, "loss": 0.2657, "step": 36120, "task_loss": 0.3859668970108032 }, { "compression_loss": 0.0, "distillation_loss": 0.3707078695297241, "epoch": 13.06, "learning_rate": 1.9808063723234918e-05, "loss": 0.275, "step": 36130, "task_loss": 0.7055017948150635 }, { "compression_loss": 0.0, "distillation_loss": 0.2488030344247818, "epoch": 13.06, "learning_rate": 1.9744019101491147e-05, "loss": 0.3085, "step": 36140, "task_loss": 0.3865114450454712 }, { "compression_loss": 0.0, "distillation_loss": 0.2739602327346802, "epoch": 13.06, "learning_rate": 1.968002736303364e-05, "loss": 0.3111, "step": 36150, "task_loss": 0.4336225986480713 }, { "compression_loss": 0.0, "distillation_loss": 0.24673353135585785, "epoch": 13.07, "learning_rate": 1.9616088837825307e-05, "loss": 0.277, "step": 36160, "task_loss": 0.626995325088501 }, { "compression_loss": 0.0, "distillation_loss": 0.24530833959579468, "epoch": 13.07, "learning_rate": 1.9552203855554702e-05, "loss": 0.2805, "step": 36170, "task_loss": 0.31983304023742676 }, { "compression_loss": 0.0, "distillation_loss": 0.37968188524246216, "epoch": 13.08, "learning_rate": 1.948837274563426e-05, "loss": 0.2758, "step": 36180, "task_loss": 0.33091604709625244 }, { "compression_loss": 0.0, "distillation_loss": 0.20261365175247192, "epoch": 13.08, "learning_rate": 1.9424595837198654e-05, "loss": 0.2734, "step": 36190, "task_loss": 0.5553550124168396 }, { "compression_loss": 0.0, "distillation_loss": 0.2725498080253601, "epoch": 13.08, "learning_rate": 1.936087345910306e-05, "loss": 0.2921, "step": 36200, "task_loss": 0.3602065443992615 }, { "compression_loss": 0.0, "distillation_loss": 0.22300642728805542, "epoch": 13.09, "learning_rate": 1.929720593992152e-05, "loss": 0.2541, "step": 36210, "task_loss": 0.37333545088768005 }, { "compression_loss": 0.0, "distillation_loss": 0.2175058275461197, "epoch": 13.09, "learning_rate": 1.9233593607945127e-05, "loss": 0.2579, "step": 36220, "task_loss": 0.2774814963340759 }, { "compression_loss": 0.0, "distillation_loss": 0.24256110191345215, "epoch": 13.09, "learning_rate": 1.917003679118049e-05, "loss": 0.2731, "step": 36230, "task_loss": 0.3967154920101166 }, { "compression_loss": 0.0, "distillation_loss": 0.20085860788822174, "epoch": 13.1, "learning_rate": 1.9106535817347912e-05, "loss": 0.2741, "step": 36240, "task_loss": 0.39077961444854736 }, { "compression_loss": 0.0, "distillation_loss": 0.25992414355278015, "epoch": 13.1, "learning_rate": 1.9043091013879773e-05, "loss": 0.2689, "step": 36250, "task_loss": 0.6975187063217163 }, { "epoch": 13.1, "eval_exact_match": 83.15042573320719, "eval_f1": 90.02105443305646, "step": 36250 }, { "compression_loss": 0.0, "distillation_loss": 0.27337756752967834, "epoch": 13.1, "learning_rate": 1.897970270791881e-05, "loss": 0.2923, "step": 36260, "task_loss": 0.6318705081939697 }, { "compression_loss": 0.0, "distillation_loss": 0.25147658586502075, "epoch": 13.11, "learning_rate": 1.891637122631645e-05, "loss": 0.2631, "step": 36270, "task_loss": 0.4380658268928528 }, { "compression_loss": 0.0, "distillation_loss": 0.24042510986328125, "epoch": 13.11, "learning_rate": 1.88530968956311e-05, "loss": 0.2943, "step": 36280, "task_loss": 0.4693765640258789 }, { "compression_loss": 0.0, "distillation_loss": 0.2659061551094055, "epoch": 13.12, "learning_rate": 1.8789880042126502e-05, "loss": 0.2875, "step": 36290, "task_loss": 0.5884963274002075 }, { "compression_loss": 0.0, "distillation_loss": 0.28706324100494385, "epoch": 13.12, "learning_rate": 1.8726720991769983e-05, "loss": 0.2792, "step": 36300, "task_loss": 0.6943851709365845 }, { "compression_loss": 0.0, "distillation_loss": 0.27372080087661743, "epoch": 13.12, "learning_rate": 1.8663620070230873e-05, "loss": 0.3017, "step": 36310, "task_loss": 0.6589608192443848 }, { "compression_loss": 0.0, "distillation_loss": 0.26573216915130615, "epoch": 13.13, "learning_rate": 1.8600577602878722e-05, "loss": 0.2827, "step": 36320, "task_loss": 0.418401300907135 }, { "compression_loss": 0.0, "distillation_loss": 0.2142913043498993, "epoch": 13.13, "learning_rate": 1.8537593914781706e-05, "loss": 0.2583, "step": 36330, "task_loss": 1.0775578022003174 }, { "compression_loss": 0.0, "distillation_loss": 0.33948519825935364, "epoch": 13.13, "learning_rate": 1.847466933070489e-05, "loss": 0.2959, "step": 36340, "task_loss": 0.8234223127365112 }, { "compression_loss": 0.0, "distillation_loss": 0.2795005738735199, "epoch": 13.14, "learning_rate": 1.8411804175108595e-05, "loss": 0.2732, "step": 36350, "task_loss": 0.662077784538269 }, { "compression_loss": 0.0, "distillation_loss": 0.287386417388916, "epoch": 13.14, "learning_rate": 1.8348998772146698e-05, "loss": 0.2564, "step": 36360, "task_loss": 0.8269932270050049 }, { "compression_loss": 0.0, "distillation_loss": 0.23500356078147888, "epoch": 13.14, "learning_rate": 1.828625344566498e-05, "loss": 0.2576, "step": 36370, "task_loss": 0.5284491777420044 }, { "compression_loss": 0.0, "distillation_loss": 0.2579915523529053, "epoch": 13.15, "learning_rate": 1.822356851919943e-05, "loss": 0.2789, "step": 36380, "task_loss": 0.5953745245933533 }, { "compression_loss": 0.0, "distillation_loss": 0.27623817324638367, "epoch": 13.15, "learning_rate": 1.816094431597464e-05, "loss": 0.2708, "step": 36390, "task_loss": 0.582027792930603 }, { "compression_loss": 0.0, "distillation_loss": 0.23820829391479492, "epoch": 13.16, "learning_rate": 1.8098381158902023e-05, "loss": 0.2726, "step": 36400, "task_loss": 0.6655611395835876 }, { "compression_loss": 0.0, "distillation_loss": 0.24597719311714172, "epoch": 13.16, "learning_rate": 1.803587937057828e-05, "loss": 0.2771, "step": 36410, "task_loss": 0.2989462912082672 }, { "compression_loss": 0.0, "distillation_loss": 0.2530842423439026, "epoch": 13.16, "learning_rate": 1.7973439273283633e-05, "loss": 0.2647, "step": 36420, "task_loss": 0.3660958409309387 }, { "compression_loss": 0.0, "distillation_loss": 0.2751024663448334, "epoch": 13.17, "learning_rate": 1.791106118898024e-05, "loss": 0.273, "step": 36430, "task_loss": 0.7918814420700073 }, { "compression_loss": 0.0, "distillation_loss": 0.21775180101394653, "epoch": 13.17, "learning_rate": 1.7848745439310454e-05, "loss": 0.251, "step": 36440, "task_loss": 0.6314330697059631 }, { "compression_loss": 0.0, "distillation_loss": 0.23425433039665222, "epoch": 13.17, "learning_rate": 1.7786492345595258e-05, "loss": 0.2819, "step": 36450, "task_loss": 0.5131993293762207 }, { "compression_loss": 0.0, "distillation_loss": 0.24818137288093567, "epoch": 13.18, "learning_rate": 1.7724302228832518e-05, "loss": 0.2778, "step": 36460, "task_loss": 0.5289952754974365 }, { "compression_loss": 0.0, "distillation_loss": 0.2537691295146942, "epoch": 13.18, "learning_rate": 1.7662175409695403e-05, "loss": 0.3113, "step": 36470, "task_loss": 0.515838623046875 }, { "compression_loss": 0.0, "distillation_loss": 0.4750899076461792, "epoch": 13.18, "learning_rate": 1.760011220853067e-05, "loss": 0.3064, "step": 36480, "task_loss": 0.7261573672294617 }, { "compression_loss": 0.0, "distillation_loss": 0.26949992775917053, "epoch": 13.19, "learning_rate": 1.7538112945357074e-05, "loss": 0.2587, "step": 36490, "task_loss": 0.7647272348403931 }, { "compression_loss": 0.0, "distillation_loss": 0.22216796875, "epoch": 13.19, "learning_rate": 1.747617793986364e-05, "loss": 0.273, "step": 36500, "task_loss": 0.3125956952571869 }, { "epoch": 13.19, "eval_exact_match": 83.4247871333964, "eval_f1": 90.10237204205706, "step": 36500 }, { "compression_loss": 0.0, "distillation_loss": 0.2604176998138428, "epoch": 13.19, "learning_rate": 1.7414307511408105e-05, "loss": 0.2708, "step": 36510, "task_loss": 0.7602921724319458 }, { "compression_loss": 0.0, "distillation_loss": 0.32731837034225464, "epoch": 13.2, "learning_rate": 1.7352501979015185e-05, "loss": 0.2674, "step": 36520, "task_loss": 0.29305794835090637 }, { "compression_loss": 0.0, "distillation_loss": 0.30296820402145386, "epoch": 13.2, "learning_rate": 1.7290761661374998e-05, "loss": 0.2957, "step": 36530, "task_loss": 0.5144740343093872 }, { "compression_loss": 0.0, "distillation_loss": 0.23465166985988617, "epoch": 13.21, "learning_rate": 1.722908687684138e-05, "loss": 0.254, "step": 36540, "task_loss": 0.3443576693534851 }, { "compression_loss": 0.0, "distillation_loss": 0.28396230936050415, "epoch": 13.21, "learning_rate": 1.7167477943430275e-05, "loss": 0.2687, "step": 36550, "task_loss": 0.6265298128128052 }, { "compression_loss": 0.0, "distillation_loss": 0.24885854125022888, "epoch": 13.21, "learning_rate": 1.710593517881802e-05, "loss": 0.2967, "step": 36560, "task_loss": 0.49757009744644165 }, { "compression_loss": 0.0, "distillation_loss": 0.2213917374610901, "epoch": 13.22, "learning_rate": 1.7044458900339855e-05, "loss": 0.2624, "step": 36570, "task_loss": 0.45970025658607483 }, { "compression_loss": 0.0, "distillation_loss": 0.23916515707969666, "epoch": 13.22, "learning_rate": 1.6983049424988106e-05, "loss": 0.2659, "step": 36580, "task_loss": 0.7370936870574951 }, { "compression_loss": 0.0, "distillation_loss": 0.2173445075750351, "epoch": 13.22, "learning_rate": 1.6921707069410698e-05, "loss": 0.2602, "step": 36590, "task_loss": 0.3339085578918457 }, { "compression_loss": 0.0, "distillation_loss": 0.25310006737709045, "epoch": 13.23, "learning_rate": 1.686043214990943e-05, "loss": 0.2725, "step": 36600, "task_loss": 0.5834509134292603 }, { "compression_loss": 0.0, "distillation_loss": 0.2566218674182892, "epoch": 13.23, "learning_rate": 1.6799224982438413e-05, "loss": 0.2721, "step": 36610, "task_loss": 0.3749829828739166 }, { "compression_loss": 0.0, "distillation_loss": 0.2179889976978302, "epoch": 13.23, "learning_rate": 1.6738085882602358e-05, "loss": 0.3091, "step": 36620, "task_loss": 0.4416264295578003 }, { "compression_loss": 0.0, "distillation_loss": 0.25685155391693115, "epoch": 13.24, "learning_rate": 1.6677015165655034e-05, "loss": 0.2846, "step": 36630, "task_loss": 0.3986138701438904 }, { "compression_loss": 0.0, "distillation_loss": 0.3165907859802246, "epoch": 13.24, "learning_rate": 1.6616013146497598e-05, "loss": 0.3063, "step": 36640, "task_loss": 0.36311423778533936 }, { "compression_loss": 0.0, "distillation_loss": 0.21515513956546783, "epoch": 13.25, "learning_rate": 1.6555080139676972e-05, "loss": 0.2745, "step": 36650, "task_loss": 0.3444489538669586 }, { "compression_loss": 0.0, "distillation_loss": 0.28814902901649475, "epoch": 13.25, "learning_rate": 1.6494216459384225e-05, "loss": 0.2534, "step": 36660, "task_loss": 0.6024624705314636 }, { "compression_loss": 0.0, "distillation_loss": 0.24540041387081146, "epoch": 13.25, "learning_rate": 1.6433422419452973e-05, "loss": 0.2658, "step": 36670, "task_loss": 0.6741258502006531 }, { "compression_loss": 0.0, "distillation_loss": 0.22696809470653534, "epoch": 13.26, "learning_rate": 1.6372698333357705e-05, "loss": 0.266, "step": 36680, "task_loss": 0.4832145571708679 }, { "compression_loss": 0.0, "distillation_loss": 0.31842273473739624, "epoch": 13.26, "learning_rate": 1.6312044514212245e-05, "loss": 0.2864, "step": 36690, "task_loss": 0.5585686564445496 }, { "compression_loss": 0.0, "distillation_loss": 0.28510862588882446, "epoch": 13.26, "learning_rate": 1.625146127476807e-05, "loss": 0.2635, "step": 36700, "task_loss": 0.5174915790557861 }, { "compression_loss": 0.0, "distillation_loss": 0.26577115058898926, "epoch": 13.27, "learning_rate": 1.6190948927412758e-05, "loss": 0.2706, "step": 36710, "task_loss": 0.7755802869796753 }, { "compression_loss": 0.0, "distillation_loss": 0.21998828649520874, "epoch": 13.27, "learning_rate": 1.613050778416828e-05, "loss": 0.2745, "step": 36720, "task_loss": 0.4503459632396698 }, { "compression_loss": 0.0, "distillation_loss": 0.34111788868904114, "epoch": 13.27, "learning_rate": 1.607013815668954e-05, "loss": 0.3072, "step": 36730, "task_loss": 0.6373797655105591 }, { "compression_loss": 0.0, "distillation_loss": 0.18357953429222107, "epoch": 13.28, "learning_rate": 1.6009840356262603e-05, "loss": 0.2499, "step": 36740, "task_loss": 0.23571716248989105 }, { "compression_loss": 0.0, "distillation_loss": 0.19039833545684814, "epoch": 13.28, "learning_rate": 1.594961469380322e-05, "loss": 0.2918, "step": 36750, "task_loss": 0.5455808043479919 }, { "epoch": 13.28, "eval_exact_match": 82.98959318826869, "eval_f1": 89.86127481224247, "step": 36750 }, { "compression_loss": 0.0, "distillation_loss": 0.2975670099258423, "epoch": 13.29, "learning_rate": 1.5889461479855153e-05, "loss": 0.2895, "step": 36760, "task_loss": 0.6404368281364441 }, { "compression_loss": 0.0, "distillation_loss": 0.2604735493659973, "epoch": 13.29, "learning_rate": 1.5829381024588614e-05, "loss": 0.2847, "step": 36770, "task_loss": 0.5445793867111206 }, { "compression_loss": 0.0, "distillation_loss": 0.23690715432167053, "epoch": 13.29, "learning_rate": 1.576937363779861e-05, "loss": 0.2696, "step": 36780, "task_loss": 0.5591181516647339 }, { "compression_loss": 0.0, "distillation_loss": 0.2577180862426758, "epoch": 13.3, "learning_rate": 1.570943962890342e-05, "loss": 0.2738, "step": 36790, "task_loss": 0.5541560649871826 }, { "compression_loss": 0.0, "distillation_loss": 0.2612329125404358, "epoch": 13.3, "learning_rate": 1.5649579306942943e-05, "loss": 0.27, "step": 36800, "task_loss": 0.48154765367507935 }, { "compression_loss": 0.0, "distillation_loss": 0.25569772720336914, "epoch": 13.3, "learning_rate": 1.558979298057715e-05, "loss": 0.2397, "step": 36810, "task_loss": 0.5663192868232727 }, { "compression_loss": 0.0, "distillation_loss": 0.24582552909851074, "epoch": 13.31, "learning_rate": 1.55300809580844e-05, "loss": 0.2712, "step": 36820, "task_loss": 0.39500463008880615 }, { "compression_loss": 0.0, "distillation_loss": 0.26157790422439575, "epoch": 13.31, "learning_rate": 1.5470443547360003e-05, "loss": 0.2886, "step": 36830, "task_loss": 0.5957146883010864 }, { "compression_loss": 0.0, "distillation_loss": 0.3169706463813782, "epoch": 13.31, "learning_rate": 1.5410881055914478e-05, "loss": 0.3012, "step": 36840, "task_loss": 0.5682312846183777 }, { "compression_loss": 0.0, "distillation_loss": 0.2886982858181, "epoch": 13.32, "learning_rate": 1.5351393790872065e-05, "loss": 0.2565, "step": 36850, "task_loss": 0.5027368664741516 }, { "compression_loss": 0.0, "distillation_loss": 0.2950834631919861, "epoch": 13.32, "learning_rate": 1.529198205896911e-05, "loss": 0.2677, "step": 36860, "task_loss": 0.46899813413619995 }, { "compression_loss": 0.0, "distillation_loss": 0.22698701918125153, "epoch": 13.32, "learning_rate": 1.5232646166552486e-05, "loss": 0.2933, "step": 36870, "task_loss": 0.334617555141449 }, { "compression_loss": 0.0, "distillation_loss": 0.2680467665195465, "epoch": 13.33, "learning_rate": 1.517338641957799e-05, "loss": 0.2625, "step": 36880, "task_loss": 0.32778269052505493 }, { "compression_loss": 0.0, "distillation_loss": 0.18022421002388, "epoch": 13.33, "learning_rate": 1.5114203123608817e-05, "loss": 0.3041, "step": 36890, "task_loss": 0.47484010457992554 }, { "compression_loss": 0.0, "distillation_loss": 0.2711172103881836, "epoch": 13.34, "learning_rate": 1.5055096583813937e-05, "loss": 0.2648, "step": 36900, "task_loss": 0.6222798824310303 }, { "compression_loss": 0.0, "distillation_loss": 0.33530765771865845, "epoch": 13.34, "learning_rate": 1.499606710496656e-05, "loss": 0.2891, "step": 36910, "task_loss": 0.5925235152244568 }, { "compression_loss": 0.0, "distillation_loss": 0.33584457635879517, "epoch": 13.34, "learning_rate": 1.4937114991442491e-05, "loss": 0.2857, "step": 36920, "task_loss": 0.4298863410949707 }, { "compression_loss": 0.0, "distillation_loss": 0.28718897700309753, "epoch": 13.35, "learning_rate": 1.48782405472187e-05, "loss": 0.3004, "step": 36930, "task_loss": 0.34843045473098755 }, { "compression_loss": 0.0, "distillation_loss": 0.3173147737979889, "epoch": 13.35, "learning_rate": 1.4819444075871585e-05, "loss": 0.2667, "step": 36940, "task_loss": 0.42160385847091675 }, { "compression_loss": 0.0, "distillation_loss": 0.27303415536880493, "epoch": 13.35, "learning_rate": 1.4760725880575534e-05, "loss": 0.2751, "step": 36950, "task_loss": 0.508614718914032 }, { "compression_loss": 0.0, "distillation_loss": 0.23166467249393463, "epoch": 13.36, "learning_rate": 1.4702086264101305e-05, "loss": 0.2968, "step": 36960, "task_loss": 0.6358349323272705 }, { "compression_loss": 0.0, "distillation_loss": 0.2538238763809204, "epoch": 13.36, "learning_rate": 1.4643525528814492e-05, "loss": 0.299, "step": 36970, "task_loss": 0.6361325979232788 }, { "compression_loss": 0.0, "distillation_loss": 0.16733571887016296, "epoch": 13.36, "learning_rate": 1.4585043976673916e-05, "loss": 0.2509, "step": 36980, "task_loss": 0.2583733797073364 }, { "compression_loss": 0.0, "distillation_loss": 0.30147695541381836, "epoch": 13.37, "learning_rate": 1.4526641909230136e-05, "loss": 0.3064, "step": 36990, "task_loss": 0.45961910486221313 }, { "compression_loss": 0.0, "distillation_loss": 0.33842676877975464, "epoch": 13.37, "learning_rate": 1.446831962762385e-05, "loss": 0.265, "step": 37000, "task_loss": 0.7715430855751038 }, { "epoch": 13.37, "eval_exact_match": 83.28287606433302, "eval_f1": 90.0359133389191, "step": 37000 }, { "compression_loss": 0.0, "distillation_loss": 0.24696779251098633, "epoch": 13.38, "learning_rate": 1.4410077432584366e-05, "loss": 0.2908, "step": 37010, "task_loss": 0.3677746057510376 }, { "compression_loss": 0.0, "distillation_loss": 0.2108057141304016, "epoch": 13.38, "learning_rate": 1.435191562442799e-05, "loss": 0.257, "step": 37020, "task_loss": 0.38845571875572205 }, { "compression_loss": 0.0, "distillation_loss": 0.18310099840164185, "epoch": 13.38, "learning_rate": 1.4293834503056611e-05, "loss": 0.2736, "step": 37030, "task_loss": 0.3544849753379822 }, { "compression_loss": 0.0, "distillation_loss": 0.26856935024261475, "epoch": 13.39, "learning_rate": 1.4235834367955988e-05, "loss": 0.2669, "step": 37040, "task_loss": 0.813249945640564 }, { "compression_loss": 0.0, "distillation_loss": 0.27939939498901367, "epoch": 13.39, "learning_rate": 1.417791551819433e-05, "loss": 0.2738, "step": 37050, "task_loss": 0.4761803448200226 }, { "compression_loss": 0.0, "distillation_loss": 0.2665058970451355, "epoch": 13.39, "learning_rate": 1.4120078252420704e-05, "loss": 0.2915, "step": 37060, "task_loss": 0.35540011525154114 }, { "compression_loss": 0.0, "distillation_loss": 0.2548675835132599, "epoch": 13.4, "learning_rate": 1.4062322868863515e-05, "loss": 0.3536, "step": 37070, "task_loss": 0.5708368420600891 }, { "compression_loss": 0.0, "distillation_loss": 0.2190103977918625, "epoch": 13.4, "learning_rate": 1.4004649665328914e-05, "loss": 0.2515, "step": 37080, "task_loss": 0.5469504594802856 }, { "compression_loss": 0.0, "distillation_loss": 0.1838226318359375, "epoch": 13.4, "learning_rate": 1.3947058939199343e-05, "loss": 0.2486, "step": 37090, "task_loss": 0.3871685862541199 }, { "compression_loss": 0.0, "distillation_loss": 0.1938687115907669, "epoch": 13.41, "learning_rate": 1.388955098743196e-05, "loss": 0.2762, "step": 37100, "task_loss": 0.6385823488235474 }, { "compression_loss": 0.0, "distillation_loss": 0.26518717408180237, "epoch": 13.41, "learning_rate": 1.3832126106557103e-05, "loss": 0.288, "step": 37110, "task_loss": 0.784468412399292 }, { "compression_loss": 0.0, "distillation_loss": 0.20624689757823944, "epoch": 13.42, "learning_rate": 1.377478459267674e-05, "loss": 0.2598, "step": 37120, "task_loss": 0.3261072635650635 }, { "compression_loss": 0.0, "distillation_loss": 0.1749204695224762, "epoch": 13.42, "learning_rate": 1.3717526741463045e-05, "loss": 0.2694, "step": 37130, "task_loss": 0.3123036324977875 }, { "compression_loss": 0.0, "distillation_loss": 0.3422029912471771, "epoch": 13.42, "learning_rate": 1.3660352848156717e-05, "loss": 0.2659, "step": 37140, "task_loss": 0.37898117303848267 }, { "compression_loss": 0.0, "distillation_loss": 0.21857774257659912, "epoch": 13.43, "learning_rate": 1.3603263207565584e-05, "loss": 0.2627, "step": 37150, "task_loss": 0.3548792600631714 }, { "compression_loss": 0.0, "distillation_loss": 0.3651176691055298, "epoch": 13.43, "learning_rate": 1.3546258114063033e-05, "loss": 0.2737, "step": 37160, "task_loss": 0.5343203544616699 }, { "compression_loss": 0.0, "distillation_loss": 0.24481065571308136, "epoch": 13.43, "learning_rate": 1.3489337861586507e-05, "loss": 0.2748, "step": 37170, "task_loss": 0.40071243047714233 }, { "compression_loss": 0.0, "distillation_loss": 0.2896316349506378, "epoch": 13.44, "learning_rate": 1.3432502743635948e-05, "loss": 0.2827, "step": 37180, "task_loss": 0.4923239052295685 }, { "compression_loss": 0.0, "distillation_loss": 0.23948924243450165, "epoch": 13.44, "learning_rate": 1.3375753053272343e-05, "loss": 0.274, "step": 37190, "task_loss": 0.47201618552207947 }, { "compression_loss": 0.0, "distillation_loss": 0.23711787164211273, "epoch": 13.44, "learning_rate": 1.3319089083116176e-05, "loss": 0.2608, "step": 37200, "task_loss": 0.49309107661247253 }, { "compression_loss": 0.0, "distillation_loss": 0.33351051807403564, "epoch": 13.45, "learning_rate": 1.326251112534595e-05, "loss": 0.2907, "step": 37210, "task_loss": 0.7690310478210449 }, { "compression_loss": 0.0, "distillation_loss": 0.30798155069351196, "epoch": 13.45, "learning_rate": 1.32060194716966e-05, "loss": 0.2686, "step": 37220, "task_loss": 0.9489891529083252 }, { "compression_loss": 0.0, "distillation_loss": 0.2473529577255249, "epoch": 13.46, "learning_rate": 1.3149614413458139e-05, "loss": 0.2617, "step": 37230, "task_loss": 0.7801364660263062 }, { "compression_loss": 0.0, "distillation_loss": 0.3227131962776184, "epoch": 13.46, "learning_rate": 1.3093296241473975e-05, "loss": 0.2915, "step": 37240, "task_loss": 0.5432208180427551 }, { "compression_loss": 0.0, "distillation_loss": 0.24519944190979004, "epoch": 13.46, "learning_rate": 1.3037065246139558e-05, "loss": 0.2713, "step": 37250, "task_loss": 0.3368620276451111 }, { "epoch": 13.46, "eval_exact_match": 83.4247871333964, "eval_f1": 90.01338222798685, "step": 37250 }, { "compression_loss": 0.0, "distillation_loss": 0.31522136926651, "epoch": 13.47, "learning_rate": 1.2980921717400803e-05, "loss": 0.2731, "step": 37260, "task_loss": 0.7898993492126465 }, { "compression_loss": 0.0, "distillation_loss": 0.27051877975463867, "epoch": 13.47, "learning_rate": 1.292486594475264e-05, "loss": 0.2496, "step": 37270, "task_loss": 0.5739043354988098 }, { "compression_loss": 0.0, "distillation_loss": 0.33544689416885376, "epoch": 13.47, "learning_rate": 1.2868898217237457e-05, "loss": 0.2742, "step": 37280, "task_loss": 0.44129830598831177 }, { "compression_loss": 0.0, "distillation_loss": 0.20862698554992676, "epoch": 13.48, "learning_rate": 1.2813018823443685e-05, "loss": 0.2452, "step": 37290, "task_loss": 0.27721577882766724 }, { "compression_loss": 0.0, "distillation_loss": 0.24522057175636292, "epoch": 13.48, "learning_rate": 1.2757228051504276e-05, "loss": 0.2757, "step": 37300, "task_loss": 0.5336194038391113 }, { "compression_loss": 0.0, "distillation_loss": 0.2989899516105652, "epoch": 13.48, "learning_rate": 1.2701526189095216e-05, "loss": 0.2576, "step": 37310, "task_loss": 0.4906134009361267 }, { "compression_loss": 0.0, "distillation_loss": 0.3206063210964203, "epoch": 13.49, "learning_rate": 1.2645913523434012e-05, "loss": 0.2517, "step": 37320, "task_loss": 0.6886972188949585 }, { "compression_loss": 0.0, "distillation_loss": 0.2796739339828491, "epoch": 13.49, "learning_rate": 1.2590390341278302e-05, "loss": 0.2925, "step": 37330, "task_loss": 0.3325718641281128 }, { "compression_loss": 0.0, "distillation_loss": 0.3092641830444336, "epoch": 13.49, "learning_rate": 1.2534956928924263e-05, "loss": 0.2733, "step": 37340, "task_loss": 0.41676968336105347 }, { "compression_loss": 0.0, "distillation_loss": 0.23041902482509613, "epoch": 13.5, "learning_rate": 1.2479613572205212e-05, "loss": 0.2796, "step": 37350, "task_loss": 0.4734857678413391 }, { "compression_loss": 0.0, "distillation_loss": 0.2079801857471466, "epoch": 13.5, "learning_rate": 1.2424360556490109e-05, "loss": 0.2577, "step": 37360, "task_loss": 0.3639516532421112 }, { "compression_loss": 0.0, "distillation_loss": 0.24474698305130005, "epoch": 13.51, "learning_rate": 1.2369198166682094e-05, "loss": 0.276, "step": 37370, "task_loss": 0.4654710292816162 }, { "compression_loss": 0.0, "distillation_loss": 0.2466435730457306, "epoch": 13.51, "learning_rate": 1.2314126687216974e-05, "loss": 0.2784, "step": 37380, "task_loss": 0.3153854012489319 }, { "compression_loss": 0.0, "distillation_loss": 0.2671513855457306, "epoch": 13.51, "learning_rate": 1.2259146402061829e-05, "loss": 0.2773, "step": 37390, "task_loss": 0.6717286109924316 }, { "compression_loss": 0.0, "distillation_loss": 0.31550562381744385, "epoch": 13.52, "learning_rate": 1.2204257594713501e-05, "loss": 0.2574, "step": 37400, "task_loss": 0.389668345451355 }, { "compression_loss": 0.0, "distillation_loss": 0.2816436290740967, "epoch": 13.52, "learning_rate": 1.2149460548197143e-05, "loss": 0.2817, "step": 37410, "task_loss": 0.5112844705581665 }, { "compression_loss": 0.0, "distillation_loss": 0.2804926037788391, "epoch": 13.52, "learning_rate": 1.2094755545064719e-05, "loss": 0.2612, "step": 37420, "task_loss": 0.4115559458732605 }, { "compression_loss": 0.0, "distillation_loss": 0.3953973054885864, "epoch": 13.53, "learning_rate": 1.2040142867393671e-05, "loss": 0.2852, "step": 37430, "task_loss": 0.3008333444595337 }, { "compression_loss": 0.0, "distillation_loss": 0.3041832149028778, "epoch": 13.53, "learning_rate": 1.1985622796785295e-05, "loss": 0.2876, "step": 37440, "task_loss": 0.303343802690506 }, { "compression_loss": 0.0, "distillation_loss": 0.19918349385261536, "epoch": 13.53, "learning_rate": 1.1931195614363417e-05, "loss": 0.2762, "step": 37450, "task_loss": 0.2883751392364502 }, { "compression_loss": 0.0, "distillation_loss": 0.3426469564437866, "epoch": 13.54, "learning_rate": 1.1876861600772893e-05, "loss": 0.3045, "step": 37460, "task_loss": 0.582063615322113 }, { "compression_loss": 0.0, "distillation_loss": 0.1825534999370575, "epoch": 13.54, "learning_rate": 1.1822621036178173e-05, "loss": 0.2802, "step": 37470, "task_loss": 0.3473503589630127 }, { "compression_loss": 0.0, "distillation_loss": 0.2241651713848114, "epoch": 13.55, "learning_rate": 1.1768474200261838e-05, "loss": 0.2738, "step": 37480, "task_loss": 0.5678466558456421 }, { "compression_loss": 0.0, "distillation_loss": 0.288207471370697, "epoch": 13.55, "learning_rate": 1.1714421372223179e-05, "loss": 0.275, "step": 37490, "task_loss": 0.5218504667282104 }, { "compression_loss": 0.0, "distillation_loss": 0.24978035688400269, "epoch": 13.55, "learning_rate": 1.1660462830776766e-05, "loss": 0.2603, "step": 37500, "task_loss": 0.4382033050060272 }, { "epoch": 13.55, "eval_exact_match": 83.32071901608326, "eval_f1": 89.90574640557605, "step": 37500 }, { "compression_loss": 0.0, "distillation_loss": 0.21151039004325867, "epoch": 13.56, "learning_rate": 1.1606598854150983e-05, "loss": 0.2647, "step": 37510, "task_loss": 0.38994455337524414 }, { "compression_loss": 0.0, "distillation_loss": 0.2562404274940491, "epoch": 13.56, "learning_rate": 1.1552829720086581e-05, "loss": 0.2566, "step": 37520, "task_loss": 0.6831725835800171 }, { "compression_loss": 0.0, "distillation_loss": 0.21861696243286133, "epoch": 13.56, "learning_rate": 1.1499155705835338e-05, "loss": 0.2516, "step": 37530, "task_loss": 0.24222993850708008 }, { "compression_loss": 0.0, "distillation_loss": 0.2870420217514038, "epoch": 13.57, "learning_rate": 1.1445577088158486e-05, "loss": 0.2781, "step": 37540, "task_loss": 0.4957524240016937 }, { "compression_loss": 0.0, "distillation_loss": 0.25489604473114014, "epoch": 13.57, "learning_rate": 1.1392094143325404e-05, "loss": 0.2559, "step": 37550, "task_loss": 0.3785157799720764 }, { "compression_loss": 0.0, "distillation_loss": 0.23829561471939087, "epoch": 13.57, "learning_rate": 1.1338707147112145e-05, "loss": 0.2596, "step": 37560, "task_loss": 0.49412262439727783 }, { "compression_loss": 0.0, "distillation_loss": 0.2603868544101715, "epoch": 13.58, "learning_rate": 1.1285416374800018e-05, "loss": 0.2809, "step": 37570, "task_loss": 0.42271482944488525 }, { "compression_loss": 0.0, "distillation_loss": 0.2052895724773407, "epoch": 13.58, "learning_rate": 1.1232222101174148e-05, "loss": 0.2781, "step": 37580, "task_loss": 0.18088670074939728 }, { "compression_loss": 0.0, "distillation_loss": 0.3106672763824463, "epoch": 13.59, "learning_rate": 1.1179124600522107e-05, "loss": 0.273, "step": 37590, "task_loss": 0.551022469997406 }, { "compression_loss": 0.0, "distillation_loss": 0.29829642176628113, "epoch": 13.59, "learning_rate": 1.1126124146632464e-05, "loss": 0.2803, "step": 37600, "task_loss": 0.4130031168460846 }, { "compression_loss": 0.0, "distillation_loss": 0.29676663875579834, "epoch": 13.59, "learning_rate": 1.1073221012793393e-05, "loss": 0.29, "step": 37610, "task_loss": 0.2675638198852539 }, { "compression_loss": 0.0, "distillation_loss": 0.31227415800094604, "epoch": 13.6, "learning_rate": 1.102041547179121e-05, "loss": 0.2634, "step": 37620, "task_loss": 0.5230624675750732 }, { "compression_loss": 0.0, "distillation_loss": 0.22656656801700592, "epoch": 13.6, "learning_rate": 1.0967707795909077e-05, "loss": 0.264, "step": 37630, "task_loss": 0.4179735481739044 }, { "compression_loss": 0.0, "distillation_loss": 0.2572036385536194, "epoch": 13.6, "learning_rate": 1.0915098256925474e-05, "loss": 0.2931, "step": 37640, "task_loss": 0.5777921676635742 }, { "compression_loss": 0.0, "distillation_loss": 0.22712549567222595, "epoch": 13.61, "learning_rate": 1.0862587126112873e-05, "loss": 0.261, "step": 37650, "task_loss": 0.4374583065509796 }, { "compression_loss": 0.0, "distillation_loss": 0.3408048748970032, "epoch": 13.61, "learning_rate": 1.0810174674236335e-05, "loss": 0.3011, "step": 37660, "task_loss": 0.6254220008850098 }, { "compression_loss": 0.0, "distillation_loss": 0.2701667547225952, "epoch": 13.61, "learning_rate": 1.0757861171552074e-05, "loss": 0.2735, "step": 37670, "task_loss": 0.7898134589195251 }, { "compression_loss": 0.0, "distillation_loss": 0.2562268078327179, "epoch": 13.62, "learning_rate": 1.0705646887806119e-05, "loss": 0.2414, "step": 37680, "task_loss": 0.7383904457092285 }, { "compression_loss": 0.0, "distillation_loss": 0.2719931900501251, "epoch": 13.62, "learning_rate": 1.0653532092232843e-05, "loss": 0.2863, "step": 37690, "task_loss": 0.7737250924110413 }, { "compression_loss": 0.0, "distillation_loss": 0.27693867683410645, "epoch": 13.62, "learning_rate": 1.0601517053553697e-05, "loss": 0.3029, "step": 37700, "task_loss": 0.4310859739780426 }, { "compression_loss": 0.0, "distillation_loss": 0.2542468011379242, "epoch": 13.63, "learning_rate": 1.0549602039975683e-05, "loss": 0.2914, "step": 37710, "task_loss": 0.4154024124145508 }, { "compression_loss": 0.0, "distillation_loss": 0.2769840955734253, "epoch": 13.63, "learning_rate": 1.0497787319190076e-05, "loss": 0.2727, "step": 37720, "task_loss": 0.5301538705825806 }, { "compression_loss": 0.0, "distillation_loss": 0.23731346428394318, "epoch": 13.64, "learning_rate": 1.0446073158370996e-05, "loss": 0.287, "step": 37730, "task_loss": 0.6987727880477905 }, { "compression_loss": 0.0, "distillation_loss": 0.3362783193588257, "epoch": 13.64, "learning_rate": 1.0394459824174065e-05, "loss": 0.2774, "step": 37740, "task_loss": 1.005785584449768 }, { "compression_loss": 0.0, "distillation_loss": 0.33067774772644043, "epoch": 13.64, "learning_rate": 1.0342947582734962e-05, "loss": 0.2619, "step": 37750, "task_loss": 0.7543010711669922 }, { "epoch": 13.64, "eval_exact_match": 83.12204351939451, "eval_f1": 89.86274805388804, "step": 37750 }, { "compression_loss": 0.0, "distillation_loss": 0.2364063858985901, "epoch": 13.65, "learning_rate": 1.029153669966814e-05, "loss": 0.2825, "step": 37760, "task_loss": 0.892463207244873 }, { "compression_loss": 0.0, "distillation_loss": 0.25400644540786743, "epoch": 13.65, "learning_rate": 1.02402274400654e-05, "loss": 0.2813, "step": 37770, "task_loss": 0.4577575623989105 }, { "compression_loss": 0.0, "distillation_loss": 0.18836143612861633, "epoch": 13.65, "learning_rate": 1.0189020068494543e-05, "loss": 0.2781, "step": 37780, "task_loss": 0.47670626640319824 }, { "compression_loss": 0.0, "distillation_loss": 0.3060493469238281, "epoch": 13.66, "learning_rate": 1.0137914848997974e-05, "loss": 0.2802, "step": 37790, "task_loss": 0.5032558441162109 }, { "compression_loss": 0.0, "distillation_loss": 0.21806681156158447, "epoch": 13.66, "learning_rate": 1.0086912045091422e-05, "loss": 0.2802, "step": 37800, "task_loss": 0.6232914328575134 }, { "compression_loss": 0.0, "distillation_loss": 0.3554821014404297, "epoch": 13.66, "learning_rate": 1.0036011919762471e-05, "loss": 0.2783, "step": 37810, "task_loss": 1.0437252521514893 }, { "compression_loss": 0.0, "distillation_loss": 0.17857030034065247, "epoch": 13.67, "learning_rate": 9.98521473546929e-06, "loss": 0.2551, "step": 37820, "task_loss": 0.293536901473999 }, { "compression_loss": 0.0, "distillation_loss": 0.2957502007484436, "epoch": 13.67, "learning_rate": 9.93452075413924e-06, "loss": 0.2708, "step": 37830, "task_loss": 0.6464605331420898 }, { "compression_loss": 0.0, "distillation_loss": 0.266282856464386, "epoch": 13.68, "learning_rate": 9.88393023716755e-06, "loss": 0.2543, "step": 37840, "task_loss": 0.849335789680481 }, { "compression_loss": 0.0, "distillation_loss": 0.233785018324852, "epoch": 13.68, "learning_rate": 9.833443445415917e-06, "loss": 0.2606, "step": 37850, "task_loss": 1.1107016801834106 }, { "compression_loss": 0.0, "distillation_loss": 0.24783793091773987, "epoch": 13.68, "learning_rate": 9.783060639211225e-06, "loss": 0.3038, "step": 37860, "task_loss": 0.616372287273407 }, { "compression_loss": 0.0, "distillation_loss": 0.24006393551826477, "epoch": 13.69, "learning_rate": 9.732782078344166e-06, "loss": 0.2648, "step": 37870, "task_loss": 0.7905951142311096 }, { "compression_loss": 0.0, "distillation_loss": 0.4207942485809326, "epoch": 13.69, "learning_rate": 9.682608022067903e-06, "loss": 0.3115, "step": 37880, "task_loss": 1.0007433891296387 }, { "compression_loss": 0.0, "distillation_loss": 0.2837132215499878, "epoch": 13.69, "learning_rate": 9.632538729096749e-06, "loss": 0.25, "step": 37890, "task_loss": 0.38253968954086304 }, { "compression_loss": 0.0, "distillation_loss": 0.23701059818267822, "epoch": 13.7, "learning_rate": 9.58257445760482e-06, "loss": 0.2581, "step": 37900, "task_loss": 0.3679601550102234 }, { "compression_loss": 0.0, "distillation_loss": 0.24502034485340118, "epoch": 13.7, "learning_rate": 9.532715465224678e-06, "loss": 0.2914, "step": 37910, "task_loss": 0.435784250497818 }, { "compression_loss": 0.0, "distillation_loss": 0.28633737564086914, "epoch": 13.7, "learning_rate": 9.482962009046073e-06, "loss": 0.2968, "step": 37920, "task_loss": 0.4898184537887573 }, { "compression_loss": 0.0, "distillation_loss": 0.2632879912853241, "epoch": 13.71, "learning_rate": 9.433314345614548e-06, "loss": 0.2611, "step": 37930, "task_loss": 0.6624996066093445 }, { "compression_loss": 0.0, "distillation_loss": 0.1981096863746643, "epoch": 13.71, "learning_rate": 9.383772730930163e-06, "loss": 0.2542, "step": 37940, "task_loss": 0.4208466112613678 }, { "compression_loss": 0.0, "distillation_loss": 0.25894618034362793, "epoch": 13.72, "learning_rate": 9.334337420446116e-06, "loss": 0.3009, "step": 37950, "task_loss": 0.699665367603302 }, { "compression_loss": 0.0, "distillation_loss": 0.32491040229797363, "epoch": 13.72, "learning_rate": 9.285008669067524e-06, "loss": 0.2866, "step": 37960, "task_loss": 0.42975303530693054 }, { "compression_loss": 0.0, "distillation_loss": 0.23986268043518066, "epoch": 13.72, "learning_rate": 9.23578673114999e-06, "loss": 0.265, "step": 37970, "task_loss": 0.4938848614692688 }, { "compression_loss": 0.0, "distillation_loss": 0.2247878909111023, "epoch": 13.73, "learning_rate": 9.18667186049839e-06, "loss": 0.2574, "step": 37980, "task_loss": 0.4352090060710907 }, { "compression_loss": 0.0, "distillation_loss": 0.3065471649169922, "epoch": 13.73, "learning_rate": 9.14256022874918e-06, "loss": 0.2714, "step": 37990, "task_loss": 0.6363136768341064 }, { "compression_loss": 0.0, "distillation_loss": 0.27550625801086426, "epoch": 13.73, "learning_rate": 9.09364948315695e-06, "loss": 0.2897, "step": 38000, "task_loss": 0.688079833984375 }, { "epoch": 13.73, "eval_exact_match": 83.15988647114474, "eval_f1": 89.95458217564638, "step": 38000 }, { "compression_loss": 0.0, "distillation_loss": 0.22112786769866943, "epoch": 13.74, "learning_rate": 9.044846537738092e-06, "loss": 0.2722, "step": 38010, "task_loss": 0.36097025871276855 }, { "compression_loss": 0.0, "distillation_loss": 0.20671901106834412, "epoch": 13.74, "learning_rate": 8.996151644137009e-06, "loss": 0.2845, "step": 38020, "task_loss": 0.3453246057033539 }, { "compression_loss": 0.0, "distillation_loss": 0.21796171367168427, "epoch": 13.74, "learning_rate": 8.947565053440948e-06, "loss": 0.2786, "step": 38030, "task_loss": 0.25846922397613525 }, { "compression_loss": 0.0, "distillation_loss": 0.2829256057739258, "epoch": 13.75, "learning_rate": 8.89908701617872e-06, "loss": 0.2858, "step": 38040, "task_loss": 0.7385716438293457 }, { "compression_loss": 0.0, "distillation_loss": 0.28261667490005493, "epoch": 13.75, "learning_rate": 8.850717782319354e-06, "loss": 0.2765, "step": 38050, "task_loss": 0.36890166997909546 }, { "compression_loss": 0.0, "distillation_loss": 0.23844432830810547, "epoch": 13.75, "learning_rate": 8.802457601270925e-06, "loss": 0.2739, "step": 38060, "task_loss": 0.4469187259674072 }, { "compression_loss": 0.0, "distillation_loss": 0.2905295789241791, "epoch": 13.76, "learning_rate": 8.754306721879127e-06, "loss": 0.2875, "step": 38070, "task_loss": 0.6334402561187744 }, { "compression_loss": 0.0, "distillation_loss": 0.25112390518188477, "epoch": 13.76, "learning_rate": 8.706265392426096e-06, "loss": 0.2441, "step": 38080, "task_loss": 0.2148028463125229 }, { "compression_loss": 0.0, "distillation_loss": 0.18571755290031433, "epoch": 13.77, "learning_rate": 8.658333860629078e-06, "loss": 0.2691, "step": 38090, "task_loss": 0.4117751121520996 }, { "compression_loss": 0.0, "distillation_loss": 0.29254400730133057, "epoch": 13.77, "learning_rate": 8.610512373639182e-06, "loss": 0.2785, "step": 38100, "task_loss": 0.7656481862068176 }, { "compression_loss": 0.0, "distillation_loss": 0.31580865383148193, "epoch": 13.77, "learning_rate": 8.562801178040054e-06, "loss": 0.2624, "step": 38110, "task_loss": 0.42948007583618164 }, { "compression_loss": 0.0, "distillation_loss": 0.28428199887275696, "epoch": 13.78, "learning_rate": 8.515200519846676e-06, "loss": 0.2668, "step": 38120, "task_loss": 0.7075586915016174 }, { "compression_loss": 0.0, "distillation_loss": 0.20711860060691833, "epoch": 13.78, "learning_rate": 8.46771064450404e-06, "loss": 0.2627, "step": 38130, "task_loss": 0.5416389107704163 }, { "compression_loss": 0.0, "distillation_loss": 0.24480155110359192, "epoch": 13.78, "learning_rate": 8.420331796885931e-06, "loss": 0.2857, "step": 38140, "task_loss": 0.6288049221038818 }, { "compression_loss": 0.0, "distillation_loss": 0.18393155932426453, "epoch": 13.79, "learning_rate": 8.373064221293584e-06, "loss": 0.2625, "step": 38150, "task_loss": 0.23807212710380554 }, { "compression_loss": 0.0, "distillation_loss": 0.23062025010585785, "epoch": 13.79, "learning_rate": 8.325908161454554e-06, "loss": 0.2629, "step": 38160, "task_loss": 0.3707573115825653 }, { "compression_loss": 0.0, "distillation_loss": 0.24218732118606567, "epoch": 13.79, "learning_rate": 8.278863860521317e-06, "loss": 0.2774, "step": 38170, "task_loss": 0.48844727873802185 }, { "compression_loss": 0.0, "distillation_loss": 0.16815409064292908, "epoch": 13.8, "learning_rate": 8.231931561070113e-06, "loss": 0.2581, "step": 38180, "task_loss": 0.25500476360321045 }, { "compression_loss": 0.0, "distillation_loss": 0.24209289252758026, "epoch": 13.8, "learning_rate": 8.185111505099662e-06, "loss": 0.2689, "step": 38190, "task_loss": 0.7029504776000977 }, { "compression_loss": 0.0, "distillation_loss": 0.23915712535381317, "epoch": 13.81, "learning_rate": 8.138403934029924e-06, "loss": 0.2745, "step": 38200, "task_loss": 0.34845811128616333 }, { "compression_loss": 0.0, "distillation_loss": 0.25952520966529846, "epoch": 13.81, "learning_rate": 8.091809088700822e-06, "loss": 0.2516, "step": 38210, "task_loss": 0.8645590543746948 }, { "compression_loss": 0.0, "distillation_loss": 0.25939249992370605, "epoch": 13.81, "learning_rate": 8.045327209371062e-06, "loss": 0.3073, "step": 38220, "task_loss": 0.5040200352668762 }, { "compression_loss": 0.0, "distillation_loss": 0.2844274044036865, "epoch": 13.82, "learning_rate": 7.998958535716842e-06, "loss": 0.2743, "step": 38230, "task_loss": 0.5570114254951477 }, { "compression_loss": 0.0, "distillation_loss": 0.27089565992355347, "epoch": 13.82, "learning_rate": 7.95270330683064e-06, "loss": 0.2819, "step": 38240, "task_loss": 0.4997580647468567 }, { "compression_loss": 0.0, "distillation_loss": 0.3141476511955261, "epoch": 13.82, "learning_rate": 7.906561761219948e-06, "loss": 0.2522, "step": 38250, "task_loss": 1.0347281694412231 }, { "epoch": 13.82, "eval_exact_match": 83.3112582781457, "eval_f1": 90.02387764179856, "step": 38250 }, { "compression_loss": 0.0, "distillation_loss": 0.2614123225212097, "epoch": 13.83, "learning_rate": 7.860534136806123e-06, "loss": 0.2562, "step": 38260, "task_loss": 0.4036688804626465 }, { "compression_loss": 0.0, "distillation_loss": 0.23341913521289825, "epoch": 13.83, "learning_rate": 7.814620670923052e-06, "loss": 0.2655, "step": 38270, "task_loss": 0.3119831681251526 }, { "compression_loss": 0.0, "distillation_loss": 0.34925973415374756, "epoch": 13.83, "learning_rate": 7.768821600316012e-06, "loss": 0.2829, "step": 38280, "task_loss": 0.6085324287414551 }, { "compression_loss": 0.0, "distillation_loss": 0.27616429328918457, "epoch": 13.84, "learning_rate": 7.723137161140407e-06, "loss": 0.2932, "step": 38290, "task_loss": 0.49988824129104614 }, { "compression_loss": 0.0, "distillation_loss": 0.25335800647735596, "epoch": 13.84, "learning_rate": 7.677567588960583e-06, "loss": 0.2753, "step": 38300, "task_loss": 0.4380761384963989 }, { "compression_loss": 0.0, "distillation_loss": 0.25001299381256104, "epoch": 13.85, "learning_rate": 7.632113118748554e-06, "loss": 0.2591, "step": 38310, "task_loss": 0.5579447746276855 }, { "compression_loss": 0.0, "distillation_loss": 0.28439947962760925, "epoch": 13.85, "learning_rate": 7.586773984882869e-06, "loss": 0.2777, "step": 38320, "task_loss": 0.4338080883026123 }, { "compression_loss": 0.0, "distillation_loss": 0.3441578149795532, "epoch": 13.85, "learning_rate": 7.541550421147339e-06, "loss": 0.2936, "step": 38330, "task_loss": 0.6278230547904968 }, { "compression_loss": 0.0, "distillation_loss": 0.29420727491378784, "epoch": 13.86, "learning_rate": 7.496442660729885e-06, "loss": 0.2811, "step": 38340, "task_loss": 0.472691148519516 }, { "compression_loss": 0.0, "distillation_loss": 0.28853827714920044, "epoch": 13.86, "learning_rate": 7.451450936221256e-06, "loss": 0.2634, "step": 38350, "task_loss": 0.36157310009002686 }, { "compression_loss": 0.0, "distillation_loss": 0.2415386438369751, "epoch": 13.86, "learning_rate": 7.4065754796139465e-06, "loss": 0.2792, "step": 38360, "task_loss": 0.5524470806121826 }, { "compression_loss": 0.0, "distillation_loss": 0.23241202533245087, "epoch": 13.87, "learning_rate": 7.3618165223008746e-06, "loss": 0.2783, "step": 38370, "task_loss": 0.27023470401763916 }, { "compression_loss": 0.0, "distillation_loss": 0.20150120556354523, "epoch": 13.87, "learning_rate": 7.31717429507429e-06, "loss": 0.2511, "step": 38380, "task_loss": 0.30635228753089905 }, { "compression_loss": 0.0, "distillation_loss": 0.20298174023628235, "epoch": 13.87, "learning_rate": 7.272649028124523e-06, "loss": 0.2488, "step": 38390, "task_loss": 0.5815449953079224 }, { "compression_loss": 0.0, "distillation_loss": 0.21220584213733673, "epoch": 13.88, "learning_rate": 7.228240951038831e-06, "loss": 0.2501, "step": 38400, "task_loss": 0.5942157506942749 }, { "compression_loss": 0.0, "distillation_loss": 0.23308081924915314, "epoch": 13.88, "learning_rate": 7.183950292800176e-06, "loss": 0.2973, "step": 38410, "task_loss": 0.3363405466079712 }, { "compression_loss": 0.0, "distillation_loss": 0.3139108419418335, "epoch": 13.89, "learning_rate": 7.139777281786092e-06, "loss": 0.2944, "step": 38420, "task_loss": 0.492350697517395 }, { "compression_loss": 0.0, "distillation_loss": 0.25158751010894775, "epoch": 13.89, "learning_rate": 7.095722145767483e-06, "loss": 0.2719, "step": 38430, "task_loss": 0.5319156646728516 }, { "compression_loss": 0.0, "distillation_loss": 0.2320326417684555, "epoch": 13.89, "learning_rate": 7.051785111907449e-06, "loss": 0.2772, "step": 38440, "task_loss": 0.39209455251693726 }, { "compression_loss": 0.0, "distillation_loss": 0.21443305909633636, "epoch": 13.9, "learning_rate": 7.007966406760085e-06, "loss": 0.2823, "step": 38450, "task_loss": 0.20772241055965424 }, { "compression_loss": 0.0, "distillation_loss": 0.26291579008102417, "epoch": 13.9, "learning_rate": 6.964266256269405e-06, "loss": 0.2433, "step": 38460, "task_loss": 0.31175604462623596 }, { "compression_loss": 0.0, "distillation_loss": 0.2669048011302948, "epoch": 13.9, "learning_rate": 6.920684885768056e-06, "loss": 0.2731, "step": 38470, "task_loss": 0.7086321115493774 }, { "compression_loss": 0.0, "distillation_loss": 0.18979918956756592, "epoch": 13.91, "learning_rate": 6.877222519976244e-06, "loss": 0.2844, "step": 38480, "task_loss": 0.2133408784866333 }, { "compression_loss": 0.0, "distillation_loss": 0.28547948598861694, "epoch": 13.91, "learning_rate": 6.833879383000548e-06, "loss": 0.2716, "step": 38490, "task_loss": 0.504301905632019 }, { "compression_loss": 0.0, "distillation_loss": 0.1712222397327423, "epoch": 13.91, "learning_rate": 6.790655698332759e-06, "loss": 0.2693, "step": 38500, "task_loss": 0.39858031272888184 }, { "epoch": 13.91, "eval_exact_match": 83.33964049195838, "eval_f1": 90.03027688318437, "step": 38500 }, { "compression_loss": 0.0, "distillation_loss": 0.20968683063983917, "epoch": 13.92, "learning_rate": 6.747551688848714e-06, "loss": 0.2652, "step": 38510, "task_loss": 0.19403359293937683 }, { "compression_loss": 0.0, "distillation_loss": 0.31207019090652466, "epoch": 13.92, "learning_rate": 6.704567576807191e-06, "loss": 0.276, "step": 38520, "task_loss": 0.6474623680114746 }, { "compression_loss": 0.0, "distillation_loss": 0.2539207935333252, "epoch": 13.92, "learning_rate": 6.661703583848728e-06, "loss": 0.239, "step": 38530, "task_loss": 0.2865660488605499 }, { "compression_loss": 0.0, "distillation_loss": 0.22099719941616058, "epoch": 13.93, "learning_rate": 6.61895993099449e-06, "loss": 0.2723, "step": 38540, "task_loss": 0.3822336792945862 }, { "compression_loss": 0.0, "distillation_loss": 0.18672585487365723, "epoch": 13.93, "learning_rate": 6.576336838645102e-06, "loss": 0.2508, "step": 38550, "task_loss": 0.49510657787323 }, { "compression_loss": 0.0, "distillation_loss": 0.22289982438087463, "epoch": 13.94, "learning_rate": 6.533834526579591e-06, "loss": 0.2732, "step": 38560, "task_loss": 0.4035664200782776 }, { "compression_loss": 0.0, "distillation_loss": 0.19927158951759338, "epoch": 13.94, "learning_rate": 6.4914532139541505e-06, "loss": 0.2728, "step": 38570, "task_loss": 0.21784666180610657 }, { "compression_loss": 0.0, "distillation_loss": 0.25364750623703003, "epoch": 13.94, "learning_rate": 6.449193119301083e-06, "loss": 0.2487, "step": 38580, "task_loss": 0.5803826451301575 }, { "compression_loss": 0.0, "distillation_loss": 0.27665311098098755, "epoch": 13.95, "learning_rate": 6.407054460527651e-06, "loss": 0.2664, "step": 38590, "task_loss": 0.6068881154060364 }, { "compression_loss": 0.0, "distillation_loss": 0.27936381101608276, "epoch": 13.95, "learning_rate": 6.365037454914958e-06, "loss": 0.2908, "step": 38600, "task_loss": 0.3354106843471527 }, { "compression_loss": 0.0, "distillation_loss": 0.30930522084236145, "epoch": 13.95, "learning_rate": 6.323142319116794e-06, "loss": 0.2636, "step": 38610, "task_loss": 0.5985520482063293 }, { "compression_loss": 0.0, "distillation_loss": 0.29672688245773315, "epoch": 13.96, "learning_rate": 6.2813692691585755e-06, "loss": 0.2853, "step": 38620, "task_loss": 0.8483359813690186 }, { "compression_loss": 0.0, "distillation_loss": 0.2600293755531311, "epoch": 13.96, "learning_rate": 6.239718520436195e-06, "loss": 0.2688, "step": 38630, "task_loss": 0.6306997537612915 }, { "compression_loss": 0.0, "distillation_loss": 0.26006078720092773, "epoch": 13.96, "learning_rate": 6.198190287714922e-06, "loss": 0.2678, "step": 38640, "task_loss": 0.49998217821121216 }, { "compression_loss": 0.0, "distillation_loss": 0.19626963138580322, "epoch": 13.97, "learning_rate": 6.156784785128284e-06, "loss": 0.2674, "step": 38650, "task_loss": 0.5400117635726929 }, { "compression_loss": 0.0, "distillation_loss": 0.25180870294570923, "epoch": 13.97, "learning_rate": 6.115502226176989e-06, "loss": 0.261, "step": 38660, "task_loss": 0.32281187176704407 }, { "compression_loss": 0.0, "distillation_loss": 0.20361928641796112, "epoch": 13.98, "learning_rate": 6.074342823727773e-06, "loss": 0.266, "step": 38670, "task_loss": 0.43746501207351685 }, { "compression_loss": 0.0, "distillation_loss": 0.29030585289001465, "epoch": 13.98, "learning_rate": 6.0333067900123664e-06, "loss": 0.2883, "step": 38680, "task_loss": 0.5268990993499756 }, { "compression_loss": 0.0, "distillation_loss": 0.23285171389579773, "epoch": 13.98, "learning_rate": 5.992394336626352e-06, "loss": 0.2654, "step": 38690, "task_loss": 0.3039249777793884 }, { "compression_loss": 0.0, "distillation_loss": 0.34479501843452454, "epoch": 13.99, "learning_rate": 5.951605674528116e-06, "loss": 0.2851, "step": 38700, "task_loss": 0.7995741963386536 }, { "compression_loss": 0.0, "distillation_loss": 0.2292272448539734, "epoch": 13.99, "learning_rate": 5.910941014037681e-06, "loss": 0.2899, "step": 38710, "task_loss": 0.6517189145088196 }, { "compression_loss": 0.0, "distillation_loss": 0.23348368704319, "epoch": 13.99, "learning_rate": 5.870400564835748e-06, "loss": 0.2841, "step": 38720, "task_loss": 0.2811996340751648 }, { "compression_loss": 0.0, "distillation_loss": 0.21506094932556152, "epoch": 14.0, "learning_rate": 5.8299845359624806e-06, "loss": 0.3014, "step": 38730, "task_loss": 0.37734872102737427 }, { "compression_loss": 0.0, "distillation_loss": 0.21314091980457306, "epoch": 14.0, "learning_rate": 5.789693135816522e-06, "loss": 0.2496, "step": 38740, "task_loss": 0.38001203536987305 }, { "compression_loss": 0.0, "distillation_loss": 0.2878772020339966, "epoch": 14.0, "learning_rate": 5.7495265721538805e-06, "loss": 0.2594, "step": 38750, "task_loss": 0.5972883701324463 }, { "epoch": 14.0, "eval_exact_match": 83.13150425733207, "eval_f1": 89.9718447883911, "step": 38750 }, { "compression_loss": 0.0, "distillation_loss": 0.20778994262218475, "epoch": 14.01, "learning_rate": 5.709485052086874e-06, "loss": 0.2567, "step": 38760, "task_loss": 0.30834150314331055 }, { "compression_loss": 0.0, "distillation_loss": 0.20597517490386963, "epoch": 14.01, "learning_rate": 5.669568782083027e-06, "loss": 0.2411, "step": 38770, "task_loss": 0.4118894338607788 }, { "compression_loss": 0.0, "distillation_loss": 0.18355685472488403, "epoch": 14.02, "learning_rate": 5.629777967964059e-06, "loss": 0.2841, "step": 38780, "task_loss": 0.43749743700027466 }, { "compression_loss": 0.0, "distillation_loss": 0.29135215282440186, "epoch": 14.02, "learning_rate": 5.5901128149047855e-06, "loss": 0.2679, "step": 38790, "task_loss": 0.6453703045845032 }, { "compression_loss": 0.0, "distillation_loss": 0.25022754073143005, "epoch": 14.02, "learning_rate": 5.550573527432088e-06, "loss": 0.2557, "step": 38800, "task_loss": 0.7301109433174133 }, { "compression_loss": 0.0, "distillation_loss": 0.3784531354904175, "epoch": 14.03, "learning_rate": 5.511160309423795e-06, "loss": 0.2771, "step": 38810, "task_loss": 1.0786703824996948 }, { "compression_loss": 0.0, "distillation_loss": 0.22921237349510193, "epoch": 14.03, "learning_rate": 5.471873364107755e-06, "loss": 0.2695, "step": 38820, "task_loss": 0.885239839553833 }, { "compression_loss": 0.0, "distillation_loss": 0.27633237838745117, "epoch": 14.03, "learning_rate": 5.432712894060635e-06, "loss": 0.2674, "step": 38830, "task_loss": 0.9187273979187012 }, { "compression_loss": 0.0, "distillation_loss": 0.22725428640842438, "epoch": 14.04, "learning_rate": 5.3936791012070005e-06, "loss": 0.2401, "step": 38840, "task_loss": 0.5373157858848572 }, { "compression_loss": 0.0, "distillation_loss": 0.22272822260856628, "epoch": 14.04, "learning_rate": 5.354772186818211e-06, "loss": 0.2611, "step": 38850, "task_loss": 0.3665705621242523 }, { "compression_loss": 0.0, "distillation_loss": 0.17846062779426575, "epoch": 14.04, "learning_rate": 5.315992351511408e-06, "loss": 0.2476, "step": 38860, "task_loss": 0.30240577459335327 }, { "compression_loss": 0.0, "distillation_loss": 0.2393602430820465, "epoch": 14.05, "learning_rate": 5.277339795248451e-06, "loss": 0.2667, "step": 38870, "task_loss": 0.34489595890045166 }, { "compression_loss": 0.0, "distillation_loss": 0.20027129352092743, "epoch": 14.05, "learning_rate": 5.238814717334919e-06, "loss": 0.2618, "step": 38880, "task_loss": 0.476666122674942 }, { "compression_loss": 0.0, "distillation_loss": 0.28304845094680786, "epoch": 14.05, "learning_rate": 5.2004173164190746e-06, "loss": 0.2651, "step": 38890, "task_loss": 0.8417985439300537 }, { "compression_loss": 0.0, "distillation_loss": 0.27796077728271484, "epoch": 14.06, "learning_rate": 5.16214779049083e-06, "loss": 0.2548, "step": 38900, "task_loss": 0.38383617997169495 }, { "compression_loss": 0.0, "distillation_loss": 0.24468521773815155, "epoch": 14.06, "learning_rate": 5.124006336880716e-06, "loss": 0.2648, "step": 38910, "task_loss": 0.6111380457878113 }, { "compression_loss": 0.0, "distillation_loss": 0.2580738961696625, "epoch": 14.07, "learning_rate": 5.085993152258912e-06, "loss": 0.2666, "step": 38920, "task_loss": 0.5941046476364136 }, { "compression_loss": 0.0, "distillation_loss": 0.21132975816726685, "epoch": 14.07, "learning_rate": 5.048108432634165e-06, "loss": 0.2466, "step": 38930, "task_loss": 0.42760226130485535 }, { "compression_loss": 0.0, "distillation_loss": 0.26778656244277954, "epoch": 14.07, "learning_rate": 5.010352373352838e-06, "loss": 0.2682, "step": 38940, "task_loss": 0.5522580146789551 }, { "compression_loss": 0.0, "distillation_loss": 0.2655670642852783, "epoch": 14.08, "learning_rate": 4.972725169097863e-06, "loss": 0.2532, "step": 38950, "task_loss": 0.710306704044342 }, { "compression_loss": 0.0, "distillation_loss": 0.21947705745697021, "epoch": 14.08, "learning_rate": 4.935227013887771e-06, "loss": 0.2299, "step": 38960, "task_loss": 0.4309462904930115 }, { "compression_loss": 0.0, "distillation_loss": 0.22299286723136902, "epoch": 14.08, "learning_rate": 4.89785810107564e-06, "loss": 0.28, "step": 38970, "task_loss": 0.21669545769691467 }, { "compression_loss": 0.0, "distillation_loss": 0.22194522619247437, "epoch": 14.09, "learning_rate": 4.8606186233481585e-06, "loss": 0.2666, "step": 38980, "task_loss": 0.44074350595474243 }, { "compression_loss": 0.0, "distillation_loss": 0.2186507284641266, "epoch": 14.09, "learning_rate": 4.823508772724598e-06, "loss": 0.2435, "step": 38990, "task_loss": 0.41085922718048096 }, { "compression_loss": 0.0, "distillation_loss": 0.22034092247486115, "epoch": 14.09, "learning_rate": 4.786528740555834e-06, "loss": 0.2503, "step": 39000, "task_loss": 0.5862570405006409 }, { "epoch": 14.09, "eval_exact_match": 83.65184484389782, "eval_f1": 90.2313400355417, "step": 39000 }, { "compression_loss": 0.0, "distillation_loss": 0.210486501455307, "epoch": 14.1, "learning_rate": 4.749678717523327e-06, "loss": 0.2644, "step": 39010, "task_loss": 0.37090587615966797 }, { "compression_loss": 0.0, "distillation_loss": 0.2620948255062103, "epoch": 14.1, "learning_rate": 4.7129588936382125e-06, "loss": 0.2383, "step": 39020, "task_loss": 0.5931086540222168 }, { "compression_loss": 0.0, "distillation_loss": 0.22131627798080444, "epoch": 14.11, "learning_rate": 4.676369458240229e-06, "loss": 0.2592, "step": 39030, "task_loss": 0.47717371582984924 }, { "compression_loss": 0.0, "distillation_loss": 0.20887985825538635, "epoch": 14.11, "learning_rate": 4.639910599996822e-06, "loss": 0.2599, "step": 39040, "task_loss": 0.3629271984100342 }, { "compression_loss": 0.0, "distillation_loss": 0.2679504156112671, "epoch": 14.11, "learning_rate": 4.6035825069021165e-06, "loss": 0.2778, "step": 39050, "task_loss": 0.27707189321517944 }, { "compression_loss": 0.0, "distillation_loss": 0.23027104139328003, "epoch": 14.12, "learning_rate": 4.56738536627599e-06, "loss": 0.2934, "step": 39060, "task_loss": 0.5485252141952515 }, { "compression_loss": 0.0, "distillation_loss": 0.19164533913135529, "epoch": 14.12, "learning_rate": 4.531319364763053e-06, "loss": 0.2653, "step": 39070, "task_loss": 0.44614046812057495 }, { "compression_loss": 0.0, "distillation_loss": 0.2256176471710205, "epoch": 14.12, "learning_rate": 4.495384688331747e-06, "loss": 0.2532, "step": 39080, "task_loss": 0.36074715852737427 }, { "compression_loss": 0.0, "distillation_loss": 0.28316569328308105, "epoch": 14.13, "learning_rate": 4.459581522273347e-06, "loss": 0.2566, "step": 39090, "task_loss": 0.43225523829460144 }, { "compression_loss": 0.0, "distillation_loss": 0.1975765973329544, "epoch": 14.13, "learning_rate": 4.423910051201023e-06, "loss": 0.2412, "step": 39100, "task_loss": 0.46456313133239746 }, { "compression_loss": 0.0, "distillation_loss": 0.31945353746414185, "epoch": 14.13, "learning_rate": 4.388370459048854e-06, "loss": 0.2579, "step": 39110, "task_loss": 0.428020179271698 }, { "compression_loss": 0.0, "distillation_loss": 0.21575048565864563, "epoch": 14.14, "learning_rate": 4.352962929070954e-06, "loss": 0.2495, "step": 39120, "task_loss": 0.5014318227767944 }, { "compression_loss": 0.0, "distillation_loss": 0.23357847332954407, "epoch": 14.14, "learning_rate": 4.317687643840437e-06, "loss": 0.2673, "step": 39130, "task_loss": 0.34480270743370056 }, { "compression_loss": 0.0, "distillation_loss": 0.2436312735080719, "epoch": 14.15, "learning_rate": 4.2825447852485375e-06, "loss": 0.2711, "step": 39140, "task_loss": 0.42724448442459106 }, { "compression_loss": 0.0, "distillation_loss": 0.227299302816391, "epoch": 14.15, "learning_rate": 4.247534534503656e-06, "loss": 0.2647, "step": 39150, "task_loss": 0.5893539190292358 }, { "compression_loss": 0.0, "distillation_loss": 0.2549132704734802, "epoch": 14.15, "learning_rate": 4.212657072130425e-06, "loss": 0.259, "step": 39160, "task_loss": 0.4166608452796936 }, { "compression_loss": 0.0, "distillation_loss": 0.2273089587688446, "epoch": 14.16, "learning_rate": 4.177912577968752e-06, "loss": 0.2439, "step": 39170, "task_loss": 0.30703485012054443 }, { "compression_loss": 0.0, "distillation_loss": 0.27625080943107605, "epoch": 14.16, "learning_rate": 4.143301231172939e-06, "loss": 0.2615, "step": 39180, "task_loss": 0.5613874793052673 }, { "compression_loss": 0.0, "distillation_loss": 0.2191055864095688, "epoch": 14.16, "learning_rate": 4.108823210210733e-06, "loss": 0.247, "step": 39190, "task_loss": 0.4466383755207062 }, { "compression_loss": 0.0, "distillation_loss": 0.17954121530056, "epoch": 14.17, "learning_rate": 4.0744786928624065e-06, "loss": 0.2321, "step": 39200, "task_loss": 0.3059490919113159 }, { "compression_loss": 0.0, "distillation_loss": 0.20783843100070953, "epoch": 14.17, "learning_rate": 4.040267856219822e-06, "loss": 0.235, "step": 39210, "task_loss": 0.41658928990364075 }, { "compression_loss": 0.0, "distillation_loss": 0.2153421938419342, "epoch": 14.17, "learning_rate": 4.006190876685586e-06, "loss": 0.2544, "step": 39220, "task_loss": 0.39804089069366455 }, { "compression_loss": 0.0, "distillation_loss": 0.24418562650680542, "epoch": 14.18, "learning_rate": 3.9722479299720385e-06, "loss": 0.2599, "step": 39230, "task_loss": 0.6925318837165833 }, { "compression_loss": 0.0, "distillation_loss": 0.2603607177734375, "epoch": 14.18, "learning_rate": 3.938439191100437e-06, "loss": 0.2733, "step": 39240, "task_loss": 0.6092582941055298 }, { "compression_loss": 0.0, "distillation_loss": 0.23545420169830322, "epoch": 14.19, "learning_rate": 3.904764834400003e-06, "loss": 0.2586, "step": 39250, "task_loss": 0.40439313650131226 }, { "epoch": 14.19, "eval_exact_match": 83.64238410596026, "eval_f1": 90.21788917699949, "step": 39250 }, { "compression_loss": 0.0, "distillation_loss": 0.2226608842611313, "epoch": 14.19, "learning_rate": 3.8712250335070584e-06, "loss": 0.2538, "step": 39260, "task_loss": 0.7255889177322388 }, { "compression_loss": 0.0, "distillation_loss": 0.15965428948402405, "epoch": 14.19, "learning_rate": 3.837819961364074e-06, "loss": 0.2443, "step": 39270, "task_loss": 0.4545014500617981 }, { "compression_loss": 0.0, "distillation_loss": 0.26116734743118286, "epoch": 14.2, "learning_rate": 3.8045497902188455e-06, "loss": 0.2566, "step": 39280, "task_loss": 0.4564734697341919 }, { "compression_loss": 0.0, "distillation_loss": 0.26344382762908936, "epoch": 14.2, "learning_rate": 3.771414691623565e-06, "loss": 0.251, "step": 39290, "task_loss": 1.0150420665740967 }, { "compression_loss": 0.0, "distillation_loss": 0.19946512579917908, "epoch": 14.2, "learning_rate": 3.7384148364339423e-06, "loss": 0.2671, "step": 39300, "task_loss": 0.4203642010688782 }, { "compression_loss": 0.0, "distillation_loss": 0.2514525353908539, "epoch": 14.21, "learning_rate": 3.7055503948083112e-06, "loss": 0.2652, "step": 39310, "task_loss": 0.6718653440475464 }, { "compression_loss": 0.0, "distillation_loss": 0.27354827523231506, "epoch": 14.21, "learning_rate": 3.6728215362068086e-06, "loss": 0.2628, "step": 39320, "task_loss": 0.5433547496795654 }, { "compression_loss": 0.0, "distillation_loss": 0.2853875756263733, "epoch": 14.21, "learning_rate": 3.640228429390429e-06, "loss": 0.2409, "step": 39330, "task_loss": 0.5214777588844299 }, { "compression_loss": 0.0, "distillation_loss": 0.2823842465877533, "epoch": 14.22, "learning_rate": 3.6077712424201805e-06, "loss": 0.231, "step": 39340, "task_loss": 0.6327749490737915 }, { "compression_loss": 0.0, "distillation_loss": 0.2311672866344452, "epoch": 14.22, "learning_rate": 3.5754501426562393e-06, "loss": 0.2442, "step": 39350, "task_loss": 0.5876401662826538 }, { "compression_loss": 0.0, "distillation_loss": 0.2697458863258362, "epoch": 14.22, "learning_rate": 3.5432652967570623e-06, "loss": 0.2538, "step": 39360, "task_loss": 0.5169529914855957 }, { "compression_loss": 0.0, "distillation_loss": 0.21121466159820557, "epoch": 14.23, "learning_rate": 3.5112168706785487e-06, "loss": 0.2612, "step": 39370, "task_loss": 0.3359443247318268 }, { "compression_loss": 0.0, "distillation_loss": 0.1496531367301941, "epoch": 14.23, "learning_rate": 3.4793050296731365e-06, "loss": 0.2507, "step": 39380, "task_loss": 0.36530622839927673 }, { "compression_loss": 0.0, "distillation_loss": 0.2131895124912262, "epoch": 14.24, "learning_rate": 3.447529938289038e-06, "loss": 0.2535, "step": 39390, "task_loss": 0.32073259353637695 }, { "compression_loss": 0.0, "distillation_loss": 0.24818633496761322, "epoch": 14.24, "learning_rate": 3.4158917603692885e-06, "loss": 0.2409, "step": 39400, "task_loss": 0.3351534307003021 }, { "compression_loss": 0.0, "distillation_loss": 0.23910020291805267, "epoch": 14.24, "learning_rate": 3.384390659050979e-06, "loss": 0.2667, "step": 39410, "task_loss": 0.5424809455871582 }, { "compression_loss": 0.0, "distillation_loss": 0.2993757724761963, "epoch": 14.25, "learning_rate": 3.353026796764378e-06, "loss": 0.2607, "step": 39420, "task_loss": 0.5096408724784851 }, { "compression_loss": 0.0, "distillation_loss": 0.2651025056838989, "epoch": 14.25, "learning_rate": 3.321800335232118e-06, "loss": 0.2468, "step": 39430, "task_loss": 0.3386688232421875 }, { "compression_loss": 0.0, "distillation_loss": 0.24977470934391022, "epoch": 14.25, "learning_rate": 3.2907114354683175e-06, "loss": 0.2505, "step": 39440, "task_loss": 0.3789813816547394 }, { "compression_loss": 0.0, "distillation_loss": 0.23737601935863495, "epoch": 14.26, "learning_rate": 3.2597602577778065e-06, "loss": 0.239, "step": 39450, "task_loss": 0.4992079734802246 }, { "compression_loss": 0.0, "distillation_loss": 0.20579993724822998, "epoch": 14.26, "learning_rate": 3.2289469617552613e-06, "loss": 0.2446, "step": 39460, "task_loss": 0.5167773962020874 }, { "compression_loss": 0.0, "distillation_loss": 0.18076182901859283, "epoch": 14.26, "learning_rate": 3.198271706284409e-06, "loss": 0.2441, "step": 39470, "task_loss": 0.6046625375747681 }, { "compression_loss": 0.0, "distillation_loss": 0.19197119772434235, "epoch": 14.27, "learning_rate": 3.1677346495371616e-06, "loss": 0.248, "step": 39480, "task_loss": 0.26388877630233765 }, { "compression_loss": 0.0, "distillation_loss": 0.22745785117149353, "epoch": 14.27, "learning_rate": 3.1373359489728783e-06, "loss": 0.2513, "step": 39490, "task_loss": 0.3964017629623413 }, { "compression_loss": 0.0, "distillation_loss": 0.29309511184692383, "epoch": 14.28, "learning_rate": 3.107075761337458e-06, "loss": 0.2644, "step": 39500, "task_loss": 0.5477885007858276 }, { "epoch": 14.28, "eval_exact_match": 83.14096499526963, "eval_f1": 90.00285618886593, "step": 39500 }, { "compression_loss": 0.0, "distillation_loss": 0.19775673747062683, "epoch": 14.28, "learning_rate": 3.076954242662615e-06, "loss": 0.2366, "step": 39510, "task_loss": 0.5608834028244019 }, { "compression_loss": 0.0, "distillation_loss": 0.3160851001739502, "epoch": 14.28, "learning_rate": 3.0469715482650264e-06, "loss": 0.2756, "step": 39520, "task_loss": 0.6379892826080322 }, { "compression_loss": 0.0, "distillation_loss": 0.2946835458278656, "epoch": 14.29, "learning_rate": 3.01712783274555e-06, "loss": 0.2552, "step": 39530, "task_loss": 0.5279828906059265 }, { "compression_loss": 0.0, "distillation_loss": 0.24564538896083832, "epoch": 14.29, "learning_rate": 2.987423249988411e-06, "loss": 0.2377, "step": 39540, "task_loss": 0.45563802123069763 }, { "compression_loss": 0.0, "distillation_loss": 0.315203458070755, "epoch": 14.29, "learning_rate": 2.9578579531604335e-06, "loss": 0.2574, "step": 39550, "task_loss": 0.4949073791503906 }, { "compression_loss": 0.0, "distillation_loss": 0.2737749218940735, "epoch": 14.3, "learning_rate": 2.9284320947102227e-06, "loss": 0.2593, "step": 39560, "task_loss": 0.41951417922973633 }, { "compression_loss": 0.0, "distillation_loss": 0.21220695972442627, "epoch": 14.3, "learning_rate": 2.899145826367412e-06, "loss": 0.2522, "step": 39570, "task_loss": 0.40739068388938904 }, { "compression_loss": 0.0, "distillation_loss": 0.18755120038986206, "epoch": 14.3, "learning_rate": 2.869999299141829e-06, "loss": 0.265, "step": 39580, "task_loss": 0.7724450826644897 }, { "compression_loss": 0.0, "distillation_loss": 0.21923671662807465, "epoch": 14.31, "learning_rate": 2.8409926633227947e-06, "loss": 0.2635, "step": 39590, "task_loss": 0.39632830023765564 }, { "compression_loss": 0.0, "distillation_loss": 0.22970925271511078, "epoch": 14.31, "learning_rate": 2.8121260684782567e-06, "loss": 0.246, "step": 39600, "task_loss": 0.34349530935287476 }, { "compression_loss": 0.0, "distillation_loss": 0.2519911229610443, "epoch": 14.32, "learning_rate": 2.7833996634540914e-06, "loss": 0.2908, "step": 39610, "task_loss": 0.5939441919326782 }, { "compression_loss": 0.0, "distillation_loss": 0.16203315556049347, "epoch": 14.32, "learning_rate": 2.7548135963733057e-06, "loss": 0.2331, "step": 39620, "task_loss": 0.20400336384773254 }, { "compression_loss": 0.0, "distillation_loss": 0.23254115879535675, "epoch": 14.32, "learning_rate": 2.726368014635275e-06, "loss": 0.2454, "step": 39630, "task_loss": 0.6872649192810059 }, { "compression_loss": 0.0, "distillation_loss": 0.22584383189678192, "epoch": 14.33, "learning_rate": 2.6980630649149797e-06, "loss": 0.2486, "step": 39640, "task_loss": 0.4566018581390381 }, { "compression_loss": 0.0, "distillation_loss": 0.175010085105896, "epoch": 14.33, "learning_rate": 2.669898893162257e-06, "loss": 0.2542, "step": 39650, "task_loss": 0.36080285906791687 }, { "compression_loss": 0.0, "distillation_loss": 0.2514447569847107, "epoch": 14.33, "learning_rate": 2.641875644601047e-06, "loss": 0.2486, "step": 39660, "task_loss": 0.7753199338912964 }, { "compression_loss": 0.0, "distillation_loss": 0.2958347201347351, "epoch": 14.34, "learning_rate": 2.6139934637286546e-06, "loss": 0.2493, "step": 39670, "task_loss": 0.58829665184021 }, { "compression_loss": 0.0, "distillation_loss": 0.3366328477859497, "epoch": 14.34, "learning_rate": 2.586252494314961e-06, "loss": 0.2687, "step": 39680, "task_loss": 0.6414390802383423 }, { "compression_loss": 0.0, "distillation_loss": 0.2517812252044678, "epoch": 14.34, "learning_rate": 2.558652879401753e-06, "loss": 0.2972, "step": 39690, "task_loss": 0.31333988904953003 }, { "compression_loss": 0.0, "distillation_loss": 0.23453620076179504, "epoch": 14.35, "learning_rate": 2.531194761301907e-06, "loss": 0.2559, "step": 39700, "task_loss": 0.5426726937294006 }, { "compression_loss": 0.0, "distillation_loss": 0.2716495990753174, "epoch": 14.35, "learning_rate": 2.503878281598726e-06, "loss": 0.2516, "step": 39710, "task_loss": 0.7860714197158813 }, { "compression_loss": 0.0, "distillation_loss": 0.18287578225135803, "epoch": 14.35, "learning_rate": 2.476703581145162e-06, "loss": 0.2595, "step": 39720, "task_loss": 0.48666173219680786 }, { "compression_loss": 0.0, "distillation_loss": 0.24697816371917725, "epoch": 14.36, "learning_rate": 2.4496708000631094e-06, "loss": 0.2729, "step": 39730, "task_loss": 0.5118921995162964 }, { "compression_loss": 0.0, "distillation_loss": 0.3017914891242981, "epoch": 14.36, "learning_rate": 2.4227800777426746e-06, "loss": 0.262, "step": 39740, "task_loss": 0.6162266135215759 }, { "compression_loss": 0.0, "distillation_loss": 0.27042317390441895, "epoch": 14.37, "learning_rate": 2.396031552841462e-06, "loss": 0.2544, "step": 39750, "task_loss": 1.038049340248108 }, { "epoch": 14.37, "eval_exact_match": 83.23557237464522, "eval_f1": 89.99843780981955, "step": 39750 }, { "compression_loss": 0.0, "distillation_loss": 0.21385975182056427, "epoch": 14.37, "learning_rate": 2.369425363283865e-06, "loss": 0.2569, "step": 39760, "task_loss": 0.37484338879585266 }, { "compression_loss": 0.0, "distillation_loss": 0.2144310027360916, "epoch": 14.37, "learning_rate": 2.3429616462603477e-06, "loss": 0.253, "step": 39770, "task_loss": 0.592105507850647 }, { "compression_loss": 0.0, "distillation_loss": 0.18579715490341187, "epoch": 14.38, "learning_rate": 2.316640538226721e-06, "loss": 0.2585, "step": 39780, "task_loss": 0.5446082949638367 }, { "compression_loss": 0.0, "distillation_loss": 0.20289719104766846, "epoch": 14.38, "learning_rate": 2.290462174903486e-06, "loss": 0.2512, "step": 39790, "task_loss": 0.3528170883655548 }, { "compression_loss": 0.0, "distillation_loss": 0.23953914642333984, "epoch": 14.38, "learning_rate": 2.2644266912750733e-06, "loss": 0.2547, "step": 39800, "task_loss": 0.43491581082344055 }, { "compression_loss": 0.0, "distillation_loss": 0.22709573805332184, "epoch": 14.39, "learning_rate": 2.238534221589196e-06, "loss": 0.2312, "step": 39810, "task_loss": 0.40255218744277954 }, { "compression_loss": 0.0, "distillation_loss": 0.27763283252716064, "epoch": 14.39, "learning_rate": 2.212784899356136e-06, "loss": 0.2786, "step": 39820, "task_loss": 0.830498218536377 }, { "compression_loss": 0.0, "distillation_loss": 0.2324332594871521, "epoch": 14.39, "learning_rate": 2.187178857348061e-06, "loss": 0.2695, "step": 39830, "task_loss": 0.6241767406463623 }, { "compression_loss": 0.0, "distillation_loss": 0.2514335513114929, "epoch": 14.4, "learning_rate": 2.1617162275983217e-06, "loss": 0.2693, "step": 39840, "task_loss": 0.6369235515594482 }, { "compression_loss": 0.0, "distillation_loss": 0.3284478187561035, "epoch": 14.4, "learning_rate": 2.1363971414008097e-06, "loss": 0.2641, "step": 39850, "task_loss": 0.7467869520187378 }, { "compression_loss": 0.0, "distillation_loss": 0.28706008195877075, "epoch": 14.41, "learning_rate": 2.1112217293092405e-06, "loss": 0.2763, "step": 39860, "task_loss": 0.45901063084602356 }, { "compression_loss": 0.0, "distillation_loss": 0.22485694289207458, "epoch": 14.41, "learning_rate": 2.0861901211365177e-06, "loss": 0.2772, "step": 39870, "task_loss": 0.40547189116477966 }, { "compression_loss": 0.0, "distillation_loss": 0.3032034635543823, "epoch": 14.41, "learning_rate": 2.0613024459540076e-06, "loss": 0.2529, "step": 39880, "task_loss": 0.6667108535766602 }, { "compression_loss": 0.0, "distillation_loss": 0.3540099263191223, "epoch": 14.42, "learning_rate": 2.0365588320909576e-06, "loss": 0.2639, "step": 39890, "task_loss": 0.9680027961730957 }, { "compression_loss": 0.0, "distillation_loss": 0.25109708309173584, "epoch": 14.42, "learning_rate": 2.0119594071337433e-06, "loss": 0.251, "step": 39900, "task_loss": 0.5497735738754272 }, { "compression_loss": 0.0, "distillation_loss": 0.21858660876750946, "epoch": 14.42, "learning_rate": 1.98750429792528e-06, "loss": 0.2511, "step": 39910, "task_loss": 0.22567731142044067 }, { "compression_loss": 0.0, "distillation_loss": 0.2529667913913727, "epoch": 14.43, "learning_rate": 1.9631936305643294e-06, "loss": 0.2542, "step": 39920, "task_loss": 0.6540744304656982 }, { "compression_loss": 0.0, "distillation_loss": 0.2905096411705017, "epoch": 14.43, "learning_rate": 1.9390275304048755e-06, "loss": 0.2536, "step": 39930, "task_loss": 0.6890720129013062 }, { "compression_loss": 0.0, "distillation_loss": 0.21479448676109314, "epoch": 14.43, "learning_rate": 1.915006122055445e-06, "loss": 0.2467, "step": 39940, "task_loss": 0.41722339391708374 }, { "compression_loss": 0.0, "distillation_loss": 0.22225235402584076, "epoch": 14.44, "learning_rate": 1.891129529378508e-06, "loss": 0.2533, "step": 39950, "task_loss": 0.3307686448097229 }, { "compression_loss": 0.0, "distillation_loss": 0.24084031581878662, "epoch": 14.44, "learning_rate": 1.867397875489799e-06, "loss": 0.2514, "step": 39960, "task_loss": 0.584737241268158 }, { "compression_loss": 0.0, "distillation_loss": 0.23029622435569763, "epoch": 14.45, "learning_rate": 1.8438112827577068e-06, "loss": 0.2666, "step": 39970, "task_loss": 0.8088452816009521 }, { "compression_loss": 0.0, "distillation_loss": 0.2523832619190216, "epoch": 14.45, "learning_rate": 1.8203698728026386e-06, "loss": 0.2525, "step": 39980, "task_loss": 0.4110991060733795 }, { "compression_loss": 0.0, "distillation_loss": 0.250593900680542, "epoch": 14.45, "learning_rate": 1.7970737664963832e-06, "loss": 0.2674, "step": 39990, "task_loss": 0.4489569067955017 }, { "compression_loss": 0.0, "distillation_loss": 0.24846208095550537, "epoch": 14.46, "learning_rate": 1.7739230839614962e-06, "loss": 0.273, "step": 40000, "task_loss": 0.5540671944618225 }, { "epoch": 14.46, "eval_exact_match": 83.58561967833491, "eval_f1": 90.22663507791502, "step": 40000 }, { "compression_loss": 0.0, "distillation_loss": 0.24182166159152985, "epoch": 14.46, "learning_rate": 1.7509179445706858e-06, "loss": 0.2589, "step": 40010, "task_loss": 0.34240439534187317 }, { "compression_loss": 0.0, "distillation_loss": 0.19622673094272614, "epoch": 14.46, "learning_rate": 1.7280584669461808e-06, "loss": 0.2581, "step": 40020, "task_loss": 0.298112690448761 }, { "compression_loss": 0.0, "distillation_loss": 0.23341301083564758, "epoch": 14.47, "learning_rate": 1.7053447689591473e-06, "loss": 0.2296, "step": 40030, "task_loss": 0.5251190662384033 }, { "compression_loss": 0.0, "distillation_loss": 0.18002331256866455, "epoch": 14.47, "learning_rate": 1.6827769677290294e-06, "loss": 0.2664, "step": 40040, "task_loss": 0.36037880182266235 }, { "compression_loss": 0.0, "distillation_loss": 0.2654402256011963, "epoch": 14.47, "learning_rate": 1.6603551796230232e-06, "loss": 0.254, "step": 40050, "task_loss": 0.7449613809585571 }, { "compression_loss": 0.0, "distillation_loss": 0.2447563111782074, "epoch": 14.48, "learning_rate": 1.6380795202553866e-06, "loss": 0.2711, "step": 40060, "task_loss": 0.5265692472457886 }, { "compression_loss": 0.0, "distillation_loss": 0.27043670415878296, "epoch": 14.48, "learning_rate": 1.615950104486924e-06, "loss": 0.2571, "step": 40070, "task_loss": 0.5612872838973999 }, { "compression_loss": 0.0, "distillation_loss": 0.27174508571624756, "epoch": 14.49, "learning_rate": 1.5939670464243362e-06, "loss": 0.2778, "step": 40080, "task_loss": 0.26313579082489014 }, { "compression_loss": 0.0, "distillation_loss": 0.23366330564022064, "epoch": 14.49, "learning_rate": 1.572130459419674e-06, "loss": 0.2423, "step": 40090, "task_loss": 0.49717170000076294 }, { "compression_loss": 0.0, "distillation_loss": 0.22687816619873047, "epoch": 14.49, "learning_rate": 1.5504404560697093e-06, "loss": 0.2412, "step": 40100, "task_loss": 0.37348806858062744 }, { "compression_loss": 0.0, "distillation_loss": 0.24465885758399963, "epoch": 14.5, "learning_rate": 1.5288971482153957e-06, "loss": 0.2711, "step": 40110, "task_loss": 0.3896139860153198 }, { "compression_loss": 0.0, "distillation_loss": 0.2719659209251404, "epoch": 14.5, "learning_rate": 1.5075006469412778e-06, "loss": 0.2532, "step": 40120, "task_loss": 0.400590181350708 }, { "compression_loss": 0.0, "distillation_loss": 0.23399712145328522, "epoch": 14.5, "learning_rate": 1.486251062574916e-06, "loss": 0.267, "step": 40130, "task_loss": 0.3120698034763336 }, { "compression_loss": 0.0, "distillation_loss": 0.24053291976451874, "epoch": 14.51, "learning_rate": 1.4651485046862933e-06, "loss": 0.2828, "step": 40140, "task_loss": 0.2387630045413971 }, { "compression_loss": 0.0, "distillation_loss": 0.21442466974258423, "epoch": 14.51, "learning_rate": 1.4441930820873195e-06, "loss": 0.2591, "step": 40150, "task_loss": 0.3812238574028015 }, { "compression_loss": 0.0, "distillation_loss": 0.3263953924179077, "epoch": 14.51, "learning_rate": 1.4233849028311808e-06, "loss": 0.2577, "step": 40160, "task_loss": 0.6377623677253723 }, { "compression_loss": 0.0, "distillation_loss": 0.3390432596206665, "epoch": 14.52, "learning_rate": 1.4027240742118542e-06, "loss": 0.259, "step": 40170, "task_loss": 0.5427772402763367 }, { "compression_loss": 0.0, "distillation_loss": 0.2371370792388916, "epoch": 14.52, "learning_rate": 1.3822107027635178e-06, "loss": 0.2576, "step": 40180, "task_loss": 0.49518412351608276 }, { "compression_loss": 0.0, "distillation_loss": 0.21536654233932495, "epoch": 14.52, "learning_rate": 1.3618448942600182e-06, "loss": 0.2747, "step": 40190, "task_loss": 0.5360449552536011 }, { "compression_loss": 0.0, "distillation_loss": 0.2513597011566162, "epoch": 14.53, "learning_rate": 1.3416267537143035e-06, "loss": 0.2498, "step": 40200, "task_loss": 0.5347855091094971 }, { "compression_loss": 0.0, "distillation_loss": 0.23619955778121948, "epoch": 14.53, "learning_rate": 1.3215563853779112e-06, "loss": 0.2524, "step": 40210, "task_loss": 0.4691365361213684 }, { "compression_loss": 0.0, "distillation_loss": 0.24675723910331726, "epoch": 14.54, "learning_rate": 1.3016338927404047e-06, "loss": 0.2325, "step": 40220, "task_loss": 0.5890612602233887 }, { "compression_loss": 0.0, "distillation_loss": 0.2255321443080902, "epoch": 14.54, "learning_rate": 1.2818593785288645e-06, "loss": 0.2666, "step": 40230, "task_loss": 0.4460175037384033 }, { "compression_loss": 0.0, "distillation_loss": 0.2343456894159317, "epoch": 14.54, "learning_rate": 1.262232944707321e-06, "loss": 0.2699, "step": 40240, "task_loss": 0.2739362120628357 }, { "compression_loss": 0.0, "distillation_loss": 0.24591998755931854, "epoch": 14.55, "learning_rate": 1.2427546924762823e-06, "loss": 0.2648, "step": 40250, "task_loss": 0.5649697780609131 }, { "epoch": 14.55, "eval_exact_match": 83.50993377483444, "eval_f1": 90.18612000703668, "step": 40250 }, { "compression_loss": 0.0, "distillation_loss": 0.28279536962509155, "epoch": 14.55, "learning_rate": 1.2234247222721573e-06, "loss": 0.2494, "step": 40260, "task_loss": 0.7293515205383301 }, { "compression_loss": 0.0, "distillation_loss": 0.275890976190567, "epoch": 14.55, "learning_rate": 1.2042431337667704e-06, "loss": 0.2645, "step": 40270, "task_loss": 0.5459344983100891 }, { "compression_loss": 0.0, "distillation_loss": 0.3029371201992035, "epoch": 14.56, "learning_rate": 1.1852100258668507e-06, "loss": 0.2717, "step": 40280, "task_loss": 0.5998193025588989 }, { "compression_loss": 0.0, "distillation_loss": 0.2221463918685913, "epoch": 14.56, "learning_rate": 1.1663254967134973e-06, "loss": 0.2615, "step": 40290, "task_loss": 0.6425062417984009 }, { "compression_loss": 0.0, "distillation_loss": 0.2234063744544983, "epoch": 14.56, "learning_rate": 1.1475896436816947e-06, "loss": 0.2291, "step": 40300, "task_loss": 0.43196287751197815 }, { "compression_loss": 0.0, "distillation_loss": 0.2834321856498718, "epoch": 14.57, "learning_rate": 1.1290025633797973e-06, "loss": 0.2739, "step": 40310, "task_loss": 0.6205756664276123 }, { "compression_loss": 0.0, "distillation_loss": 0.19937190413475037, "epoch": 14.57, "learning_rate": 1.1105643516490438e-06, "loss": 0.2409, "step": 40320, "task_loss": 0.2715055048465729 }, { "compression_loss": 0.0, "distillation_loss": 0.2276768982410431, "epoch": 14.58, "learning_rate": 1.0922751035630595e-06, "loss": 0.2689, "step": 40330, "task_loss": 0.4545585513114929 }, { "compression_loss": 0.0, "distillation_loss": 0.2939984202384949, "epoch": 14.58, "learning_rate": 1.0741349134273448e-06, "loss": 0.2416, "step": 40340, "task_loss": 0.833946704864502 }, { "compression_loss": 0.0, "distillation_loss": 0.18050755560398102, "epoch": 14.58, "learning_rate": 1.0561438747788377e-06, "loss": 0.2358, "step": 40350, "task_loss": 0.5802313685417175 }, { "compression_loss": 0.0, "distillation_loss": 0.17724663019180298, "epoch": 14.59, "learning_rate": 1.0383020803853682e-06, "loss": 0.2495, "step": 40360, "task_loss": 0.6058967709541321 }, { "compression_loss": 0.0, "distillation_loss": 0.20336687564849854, "epoch": 14.59, "learning_rate": 1.0206096222452321e-06, "loss": 0.2634, "step": 40370, "task_loss": 0.6848382949829102 }, { "compression_loss": 0.0, "distillation_loss": 0.1893875151872635, "epoch": 14.59, "learning_rate": 1.0030665915866944e-06, "loss": 0.2509, "step": 40380, "task_loss": 0.7192158699035645 }, { "compression_loss": 0.0, "distillation_loss": 0.2207334339618683, "epoch": 14.6, "learning_rate": 9.856730788675228e-07, "loss": 0.2711, "step": 40390, "task_loss": 0.6762465238571167 }, { "compression_loss": 0.0, "distillation_loss": 0.2893259525299072, "epoch": 14.6, "learning_rate": 9.68429173774512e-07, "loss": 0.2469, "step": 40400, "task_loss": 0.8992417454719543 }, { "compression_loss": 0.0, "distillation_loss": 0.25874942541122437, "epoch": 14.6, "learning_rate": 9.513349652230407e-07, "loss": 0.278, "step": 40410, "task_loss": 0.3645339608192444 }, { "compression_loss": 0.0, "distillation_loss": 0.21643847227096558, "epoch": 14.61, "learning_rate": 9.343905413565878e-07, "loss": 0.2496, "step": 40420, "task_loss": 0.4934597611427307 }, { "compression_loss": 0.0, "distillation_loss": 0.20562595129013062, "epoch": 14.61, "learning_rate": 9.175959895463138e-07, "loss": 0.263, "step": 40430, "task_loss": 0.4362022578716278 }, { "compression_loss": 0.0, "distillation_loss": 0.2967050075531006, "epoch": 14.62, "learning_rate": 9.009513963905602e-07, "loss": 0.2735, "step": 40440, "task_loss": 1.1383086442947388 }, { "compression_loss": 0.0, "distillation_loss": 0.1844678521156311, "epoch": 14.62, "learning_rate": 8.844568477144644e-07, "loss": 0.248, "step": 40450, "task_loss": 0.38792985677719116 }, { "compression_loss": 0.0, "distillation_loss": 0.25160741806030273, "epoch": 14.62, "learning_rate": 8.681124285694486e-07, "loss": 0.2813, "step": 40460, "task_loss": 0.45143911242485046 }, { "compression_loss": 0.0, "distillation_loss": 0.296841561794281, "epoch": 14.63, "learning_rate": 8.519182232328415e-07, "loss": 0.2561, "step": 40470, "task_loss": 0.6558849811553955 }, { "compression_loss": 0.0, "distillation_loss": 0.2747701108455658, "epoch": 14.63, "learning_rate": 8.358743152074111e-07, "loss": 0.2519, "step": 40480, "task_loss": 0.3261622190475464 }, { "compression_loss": 0.0, "distillation_loss": 0.1828879565000534, "epoch": 14.63, "learning_rate": 8.199807872209452e-07, "loss": 0.2396, "step": 40490, "task_loss": 0.2521137595176697 }, { "compression_loss": 0.0, "distillation_loss": 0.23122264444828033, "epoch": 14.64, "learning_rate": 8.042377212258123e-07, "loss": 0.284, "step": 40500, "task_loss": 0.34270986914634705 }, { "epoch": 14.64, "eval_exact_match": 83.66130558183538, "eval_f1": 90.23675007307362, "step": 40500 }, { "compression_loss": 0.0, "distillation_loss": 0.32861417531967163, "epoch": 14.64, "learning_rate": 7.886451983985576e-07, "loss": 0.2855, "step": 40510, "task_loss": 0.5027445554733276 }, { "compression_loss": 0.0, "distillation_loss": 0.28874510526657104, "epoch": 14.64, "learning_rate": 7.73203299139471e-07, "loss": 0.3079, "step": 40520, "task_loss": 0.6847516894340515 }, { "compression_loss": 0.0, "distillation_loss": 0.24555762112140656, "epoch": 14.65, "learning_rate": 7.579121030721837e-07, "loss": 0.254, "step": 40530, "task_loss": 0.565000057220459 }, { "compression_loss": 0.0, "distillation_loss": 0.24243924021720886, "epoch": 14.65, "learning_rate": 7.427716890432346e-07, "loss": 0.2675, "step": 40540, "task_loss": 0.38796621561050415 }, { "compression_loss": 0.0, "distillation_loss": 0.2042592316865921, "epoch": 14.65, "learning_rate": 7.277821351216984e-07, "loss": 0.2418, "step": 40550, "task_loss": 0.22988463938236237 }, { "compression_loss": 0.0, "distillation_loss": 0.22411584854125977, "epoch": 14.66, "learning_rate": 7.129435185987487e-07, "loss": 0.2477, "step": 40560, "task_loss": 0.4908447861671448 }, { "compression_loss": 0.0, "distillation_loss": 0.1977735459804535, "epoch": 14.66, "learning_rate": 6.982559159872881e-07, "loss": 0.255, "step": 40570, "task_loss": 0.4119817018508911 }, { "compression_loss": 0.0, "distillation_loss": 0.1954713761806488, "epoch": 14.67, "learning_rate": 6.837194030215288e-07, "loss": 0.2546, "step": 40580, "task_loss": 0.5656400918960571 }, { "compression_loss": 0.0, "distillation_loss": 0.26718318462371826, "epoch": 14.67, "learning_rate": 6.693340546566263e-07, "loss": 0.2438, "step": 40590, "task_loss": 0.43914592266082764 }, { "compression_loss": 0.0, "distillation_loss": 0.23150411248207092, "epoch": 14.67, "learning_rate": 6.550999450682693e-07, "loss": 0.2456, "step": 40600, "task_loss": 0.3785105347633362 }, { "compression_loss": 0.0, "distillation_loss": 0.30644112825393677, "epoch": 14.68, "learning_rate": 6.410171476523141e-07, "loss": 0.2634, "step": 40610, "task_loss": 0.5664448738098145 }, { "compression_loss": 0.0, "distillation_loss": 0.265768438577652, "epoch": 14.68, "learning_rate": 6.270857350243974e-07, "loss": 0.2696, "step": 40620, "task_loss": 0.4568207561969757 }, { "compression_loss": 0.0, "distillation_loss": 0.25326547026634216, "epoch": 14.68, "learning_rate": 6.133057790195773e-07, "loss": 0.2648, "step": 40630, "task_loss": 0.4748091697692871 }, { "compression_loss": 0.0, "distillation_loss": 0.20044632256031036, "epoch": 14.69, "learning_rate": 5.996773506919262e-07, "loss": 0.2603, "step": 40640, "task_loss": 0.3128846287727356 }, { "compression_loss": 0.0, "distillation_loss": 0.204111248254776, "epoch": 14.69, "learning_rate": 5.862005203142151e-07, "loss": 0.2759, "step": 40650, "task_loss": 0.6005997657775879 }, { "compression_loss": 0.0, "distillation_loss": 0.253496915102005, "epoch": 14.69, "learning_rate": 5.728753573775069e-07, "loss": 0.2404, "step": 40660, "task_loss": 0.39388108253479004 }, { "compression_loss": 0.0, "distillation_loss": 0.2911907732486725, "epoch": 14.7, "learning_rate": 5.597019305908235e-07, "loss": 0.2716, "step": 40670, "task_loss": 0.5800839066505432 }, { "compression_loss": 0.0, "distillation_loss": 0.2877765893936157, "epoch": 14.7, "learning_rate": 5.466803078807859e-07, "loss": 0.2683, "step": 40680, "task_loss": 0.4347482919692993 }, { "compression_loss": 0.0, "distillation_loss": 0.26411518454551697, "epoch": 14.71, "learning_rate": 5.33810556391261e-07, "loss": 0.2645, "step": 40690, "task_loss": 0.559246301651001 }, { "compression_loss": 0.0, "distillation_loss": 0.27388328313827515, "epoch": 14.71, "learning_rate": 5.210927424830092e-07, "loss": 0.2675, "step": 40700, "task_loss": 1.1375054121017456 }, { "compression_loss": 0.0, "distillation_loss": 0.2683138847351074, "epoch": 14.71, "learning_rate": 5.085269317333574e-07, "loss": 0.2511, "step": 40710, "task_loss": 0.8775959014892578 }, { "compression_loss": 0.0, "distillation_loss": 0.2723231613636017, "epoch": 14.72, "learning_rate": 4.961131889358528e-07, "loss": 0.2553, "step": 40720, "task_loss": 0.2622547149658203 }, { "compression_loss": 0.0, "distillation_loss": 0.24217526614665985, "epoch": 14.72, "learning_rate": 4.838515780999264e-07, "loss": 0.2746, "step": 40730, "task_loss": 0.6096961498260498 }, { "compression_loss": 0.0, "distillation_loss": 0.2224244624376297, "epoch": 14.72, "learning_rate": 4.717421624505669e-07, "loss": 0.2642, "step": 40740, "task_loss": 0.5393527746200562 }, { "compression_loss": 0.0, "distillation_loss": 0.2478673756122589, "epoch": 14.73, "learning_rate": 4.597850044279972e-07, "loss": 0.2461, "step": 40750, "task_loss": 0.46678709983825684 }, { "epoch": 14.73, "eval_exact_match": 83.37748344370861, "eval_f1": 90.06774265907103, "step": 40750 }, { "compression_loss": 0.0, "distillation_loss": 0.20128917694091797, "epoch": 14.73, "learning_rate": 4.4798016568733837e-07, "loss": 0.2472, "step": 40760, "task_loss": 0.41552314162254333 }, { "compression_loss": 0.0, "distillation_loss": 0.2630329430103302, "epoch": 14.73, "learning_rate": 4.3632770709831293e-07, "loss": 0.2575, "step": 40770, "task_loss": 0.25791311264038086 }, { "compression_loss": 0.0, "distillation_loss": 0.1728416383266449, "epoch": 14.74, "learning_rate": 4.248276887449154e-07, "loss": 0.2627, "step": 40780, "task_loss": 0.49050474166870117 }, { "compression_loss": 0.0, "distillation_loss": 0.2832787036895752, "epoch": 14.74, "learning_rate": 4.1348016992510895e-07, "loss": 0.2755, "step": 40790, "task_loss": 0.6007558107376099 }, { "compression_loss": 0.0, "distillation_loss": 0.2201354056596756, "epoch": 14.75, "learning_rate": 4.0228520915050915e-07, "loss": 0.2605, "step": 40800, "task_loss": 0.5979535579681396 }, { "compression_loss": 0.0, "distillation_loss": 0.2848866283893585, "epoch": 14.75, "learning_rate": 3.912428641461041e-07, "loss": 0.2484, "step": 40810, "task_loss": 0.6617125272750854 }, { "compression_loss": 0.0, "distillation_loss": 0.20860302448272705, "epoch": 14.75, "learning_rate": 3.8035319184993813e-07, "loss": 0.2533, "step": 40820, "task_loss": 0.6819936037063599 }, { "compression_loss": 0.0, "distillation_loss": 0.2802797555923462, "epoch": 14.76, "learning_rate": 3.6961624841282516e-07, "loss": 0.2516, "step": 40830, "task_loss": 0.6139797568321228 }, { "compression_loss": 0.0, "distillation_loss": 0.1864747554063797, "epoch": 14.76, "learning_rate": 3.590320891980492e-07, "loss": 0.2661, "step": 40840, "task_loss": 0.2509109675884247 }, { "compression_loss": 0.0, "distillation_loss": 0.2201501429080963, "epoch": 14.76, "learning_rate": 3.4860076878110103e-07, "loss": 0.2353, "step": 40850, "task_loss": 0.45859768986701965 }, { "compression_loss": 0.0, "distillation_loss": 0.2391672134399414, "epoch": 14.77, "learning_rate": 3.383223409493719e-07, "loss": 0.2631, "step": 40860, "task_loss": 0.41706597805023193 }, { "compression_loss": 0.0, "distillation_loss": 0.183127760887146, "epoch": 14.77, "learning_rate": 3.281968587018902e-07, "loss": 0.2552, "step": 40870, "task_loss": 0.5521631836891174 }, { "compression_loss": 0.0, "distillation_loss": 0.22473424673080444, "epoch": 14.77, "learning_rate": 3.1822437424905536e-07, "loss": 0.2545, "step": 40880, "task_loss": 0.358811616897583 }, { "compression_loss": 0.0, "distillation_loss": 0.20832541584968567, "epoch": 14.78, "learning_rate": 3.084049390123478e-07, "loss": 0.2506, "step": 40890, "task_loss": 0.40031999349594116 }, { "compression_loss": 0.0, "distillation_loss": 0.18158313632011414, "epoch": 14.78, "learning_rate": 2.9873860362407244e-07, "loss": 0.2465, "step": 40900, "task_loss": 0.39136573672294617 }, { "compression_loss": 0.0, "distillation_loss": 0.2597076892852783, "epoch": 14.78, "learning_rate": 2.892254179271059e-07, "loss": 0.2613, "step": 40910, "task_loss": 0.5301886796951294 }, { "compression_loss": 0.0, "distillation_loss": 0.2583584189414978, "epoch": 14.79, "learning_rate": 2.798654309746396e-07, "loss": 0.2467, "step": 40920, "task_loss": 0.5840583443641663 }, { "compression_loss": 0.0, "distillation_loss": 0.18077127635478973, "epoch": 14.79, "learning_rate": 2.706586910299069e-07, "loss": 0.2714, "step": 40930, "task_loss": 0.4754323959350586 }, { "compression_loss": 0.0, "distillation_loss": 0.2263426035642624, "epoch": 14.8, "learning_rate": 2.616052455659568e-07, "loss": 0.2638, "step": 40940, "task_loss": 0.34484654664993286 }, { "compression_loss": 0.0, "distillation_loss": 0.3615092635154724, "epoch": 14.8, "learning_rate": 2.5270514126540025e-07, "loss": 0.2714, "step": 40950, "task_loss": 0.6930694580078125 }, { "compression_loss": 0.0, "distillation_loss": 0.22011780738830566, "epoch": 14.8, "learning_rate": 2.4395842402016756e-07, "loss": 0.253, "step": 40960, "task_loss": 0.5014408230781555 }, { "compression_loss": 0.0, "distillation_loss": 0.2508123815059662, "epoch": 14.81, "learning_rate": 2.3536513893127166e-07, "loss": 0.2388, "step": 40970, "task_loss": 0.49518001079559326 }, { "compression_loss": 0.0, "distillation_loss": 0.2500151991844177, "epoch": 14.81, "learning_rate": 2.2692533030857832e-07, "loss": 0.2742, "step": 40980, "task_loss": 0.4772796630859375 }, { "compression_loss": 0.0, "distillation_loss": 0.19878563284873962, "epoch": 14.81, "learning_rate": 2.1863904167058634e-07, "loss": 0.2475, "step": 40990, "task_loss": 0.3303414583206177 }, { "compression_loss": 0.0, "distillation_loss": 0.23328831791877747, "epoch": 14.82, "learning_rate": 2.1050631574418112e-07, "loss": 0.2655, "step": 41000, "task_loss": 0.6365581154823303 }, { "epoch": 14.82, "eval_exact_match": 83.66130558183538, "eval_f1": 90.30829859300735, "step": 41000 } ], "max_steps": 41505, "num_train_epochs": 15, "total_flos": 3.128973983327232e+16, "trial_name": null, "trial_params": null }