{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.998535871156662, "eval_steps": 500, "global_step": 3072, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.9998692735249987e-05, "loss": 0.9611, "step": 10 }, { "epoch": 0.02, "learning_rate": 4.9994771077715244e-05, "loss": 0.94, "step": 20 }, { "epoch": 0.03, "learning_rate": 4.998823543752733e-05, "loss": 0.9521, "step": 30 }, { "epoch": 0.04, "learning_rate": 4.9979086498191216e-05, "loss": 0.9413, "step": 40 }, { "epoch": 0.05, "learning_rate": 4.9967325216513785e-05, "loss": 0.8717, "step": 50 }, { "epoch": 0.06, "learning_rate": 4.995295282250373e-05, "loss": 0.9099, "step": 60 }, { "epoch": 0.07, "learning_rate": 4.993597081924299e-05, "loss": 0.9671, "step": 70 }, { "epoch": 0.08, "learning_rate": 4.991638098272951e-05, "loss": 0.9971, "step": 80 }, { "epoch": 0.09, "learning_rate": 4.98941853616915e-05, "loss": 0.9215, "step": 90 }, { "epoch": 0.1, "learning_rate": 4.98693862773732e-05, "loss": 0.9008, "step": 100 }, { "epoch": 0.11, "learning_rate": 4.98419863232921e-05, "loss": 0.8907, "step": 110 }, { "epoch": 0.12, "learning_rate": 4.981198836496775e-05, "loss": 0.8838, "step": 120 }, { "epoch": 0.13, "learning_rate": 4.9779395539622024e-05, "loss": 0.9156, "step": 130 }, { "epoch": 0.14, "learning_rate": 4.9744211255851046e-05, "loss": 0.8241, "step": 140 }, { "epoch": 0.15, "learning_rate": 4.9706439193268736e-05, "loss": 0.8777, "step": 150 }, { "epoch": 0.16, "learning_rate": 4.966608330212198e-05, "loss": 0.8757, "step": 160 }, { "epoch": 0.17, "learning_rate": 4.962314780287748e-05, "loss": 0.8574, "step": 170 }, { "epoch": 0.18, "learning_rate": 4.957763718578041e-05, "loss": 0.8419, "step": 180 }, { "epoch": 0.19, "learning_rate": 4.952955621038482e-05, "loss": 0.8368, "step": 190 }, { "epoch": 0.2, "learning_rate": 4.9478909905055845e-05, "loss": 0.9646, "step": 200 }, { "epoch": 0.2, "learning_rate": 4.942570356644386e-05, "loss": 0.9666, "step": 210 }, { "epoch": 0.21, "learning_rate": 4.936994275893054e-05, "loss": 0.9451, "step": 220 }, { "epoch": 0.22, "learning_rate": 4.931163331404694e-05, "loss": 0.8937, "step": 230 }, { "epoch": 0.23, "learning_rate": 4.9250781329863606e-05, "loss": 0.9066, "step": 240 }, { "epoch": 0.24, "learning_rate": 4.9187393170352844e-05, "loss": 0.8378, "step": 250 }, { "epoch": 0.25, "learning_rate": 4.9121475464723175e-05, "loss": 0.8696, "step": 260 }, { "epoch": 0.26, "learning_rate": 4.9053035106726045e-05, "loss": 0.8534, "step": 270 }, { "epoch": 0.27, "learning_rate": 4.898207925393484e-05, "loss": 0.8417, "step": 280 }, { "epoch": 0.28, "learning_rate": 4.890861532699639e-05, "loss": 0.8718, "step": 290 }, { "epoch": 0.29, "learning_rate": 4.8832651008854845e-05, "loss": 0.9024, "step": 300 }, { "epoch": 0.3, "learning_rate": 4.8754194243948245e-05, "loss": 0.8846, "step": 310 }, { "epoch": 0.31, "learning_rate": 4.867325323737765e-05, "loss": 0.8569, "step": 320 }, { "epoch": 0.32, "learning_rate": 4.858983645404901e-05, "loss": 0.863, "step": 330 }, { "epoch": 0.33, "learning_rate": 4.8503952617787983e-05, "loss": 0.7955, "step": 340 }, { "epoch": 0.34, "learning_rate": 4.84156107104275e-05, "loss": 0.913, "step": 350 }, { "epoch": 0.35, "learning_rate": 4.8324819970868473e-05, "loss": 0.8538, "step": 360 }, { "epoch": 0.36, "learning_rate": 4.8231589894113584e-05, "loss": 0.9323, "step": 370 }, { "epoch": 0.37, "learning_rate": 4.813593023027427e-05, "loss": 0.8345, "step": 380 }, { "epoch": 0.38, "learning_rate": 4.803785098355105e-05, "loss": 0.8482, "step": 390 }, { "epoch": 0.39, "learning_rate": 4.793736241118728e-05, "loss": 0.9007, "step": 400 }, { "epoch": 0.4, "learning_rate": 4.783447502239645e-05, "loss": 0.8718, "step": 410 }, { "epoch": 0.41, "learning_rate": 4.7729199577263065e-05, "loss": 0.8733, "step": 420 }, { "epoch": 0.42, "learning_rate": 4.7621547085617406e-05, "loss": 0.7678, "step": 430 }, { "epoch": 0.43, "learning_rate": 4.751152880588409e-05, "loss": 0.8189, "step": 440 }, { "epoch": 0.44, "learning_rate": 4.739915624390463e-05, "loss": 0.9303, "step": 450 }, { "epoch": 0.45, "learning_rate": 4.728444115173415e-05, "loss": 0.8666, "step": 460 }, { "epoch": 0.46, "learning_rate": 4.716739552641237e-05, "loss": 0.8999, "step": 470 }, { "epoch": 0.47, "learning_rate": 4.7048031608708876e-05, "loss": 0.8813, "step": 480 }, { "epoch": 0.48, "learning_rate": 4.692636188184305e-05, "loss": 0.9063, "step": 490 }, { "epoch": 0.49, "learning_rate": 4.680239907017849e-05, "loss": 0.8109, "step": 500 }, { "epoch": 0.5, "learning_rate": 4.667615613789232e-05, "loss": 0.825, "step": 510 }, { "epoch": 0.51, "learning_rate": 4.654764628761936e-05, "loss": 0.9117, "step": 520 }, { "epoch": 0.52, "learning_rate": 4.6416882959071395e-05, "loss": 0.8901, "step": 530 }, { "epoch": 0.53, "learning_rate": 4.628387982763163e-05, "loss": 0.8362, "step": 540 }, { "epoch": 0.54, "learning_rate": 4.614865080292449e-05, "loss": 0.9148, "step": 550 }, { "epoch": 0.55, "learning_rate": 4.601121002736095e-05, "loss": 0.9449, "step": 560 }, { "epoch": 0.56, "learning_rate": 4.58715718746595e-05, "loss": 0.7937, "step": 570 }, { "epoch": 0.57, "learning_rate": 4.5729750948342926e-05, "loss": 0.835, "step": 580 }, { "epoch": 0.58, "learning_rate": 4.558576208021105e-05, "loss": 0.9117, "step": 590 }, { "epoch": 0.59, "learning_rate": 4.5439620328789593e-05, "loss": 0.7855, "step": 600 }, { "epoch": 0.6, "learning_rate": 4.529134097775539e-05, "loss": 0.8816, "step": 610 }, { "epoch": 0.61, "learning_rate": 4.514093953433792e-05, "loss": 0.8344, "step": 620 }, { "epoch": 0.61, "learning_rate": 4.498843172769763e-05, "loss": 0.8936, "step": 630 }, { "epoch": 0.62, "learning_rate": 4.4833833507280884e-05, "loss": 0.9126, "step": 640 }, { "epoch": 0.63, "learning_rate": 4.4677161041152005e-05, "loss": 0.8691, "step": 650 }, { "epoch": 0.64, "learning_rate": 4.451843071430236e-05, "loss": 0.8383, "step": 660 }, { "epoch": 0.65, "learning_rate": 4.435765912693686e-05, "loss": 0.9212, "step": 670 }, { "epoch": 0.66, "learning_rate": 4.419486309273781e-05, "loss": 0.8768, "step": 680 }, { "epoch": 0.67, "learning_rate": 4.403005963710655e-05, "loss": 0.9103, "step": 690 }, { "epoch": 0.68, "learning_rate": 4.386326599538295e-05, "loss": 0.8308, "step": 700 }, { "epoch": 0.69, "learning_rate": 4.369449961104289e-05, "loss": 0.9243, "step": 710 }, { "epoch": 0.7, "learning_rate": 4.352377813387398e-05, "loss": 0.8994, "step": 720 }, { "epoch": 0.71, "learning_rate": 4.335111941812975e-05, "loss": 0.9656, "step": 730 }, { "epoch": 0.72, "learning_rate": 4.3176541520662426e-05, "loss": 0.901, "step": 740 }, { "epoch": 0.73, "learning_rate": 4.3000062699034544e-05, "loss": 0.924, "step": 750 }, { "epoch": 0.74, "learning_rate": 4.28217014096095e-05, "loss": 0.8821, "step": 760 }, { "epoch": 0.75, "learning_rate": 4.264147630562141e-05, "loss": 0.8631, "step": 770 }, { "epoch": 0.76, "learning_rate": 4.245940623522433e-05, "loss": 0.9012, "step": 780 }, { "epoch": 0.77, "learning_rate": 4.227551023952104e-05, "loss": 0.9543, "step": 790 }, { "epoch": 0.78, "learning_rate": 4.208980755057178e-05, "loss": 0.8243, "step": 800 }, { "epoch": 0.79, "learning_rate": 4.19023175893829e-05, "loss": 0.8568, "step": 810 }, { "epoch": 0.8, "learning_rate": 4.171305996387577e-05, "loss": 0.9434, "step": 820 }, { "epoch": 0.81, "learning_rate": 4.1522054466836186e-05, "loss": 0.8362, "step": 830 }, { "epoch": 0.82, "learning_rate": 4.1329321073844415e-05, "loss": 0.8904, "step": 840 }, { "epoch": 0.83, "learning_rate": 4.1134879941186135e-05, "loss": 0.9025, "step": 850 }, { "epoch": 0.84, "learning_rate": 4.093875140374443e-05, "loss": 0.872, "step": 860 }, { "epoch": 0.85, "learning_rate": 4.074095597287318e-05, "loss": 0.8268, "step": 870 }, { "epoch": 0.86, "learning_rate": 4.054151433425194e-05, "loss": 0.8755, "step": 880 }, { "epoch": 0.87, "learning_rate": 4.034044734572262e-05, "loss": 0.8389, "step": 890 }, { "epoch": 0.88, "learning_rate": 4.013777603510814e-05, "loss": 0.8528, "step": 900 }, { "epoch": 0.89, "learning_rate": 3.993352159801332e-05, "loss": 0.8036, "step": 910 }, { "epoch": 0.9, "learning_rate": 3.9727705395608205e-05, "loss": 0.8854, "step": 920 }, { "epoch": 0.91, "learning_rate": 3.9520348952394115e-05, "loss": 0.8261, "step": 930 }, { "epoch": 0.92, "learning_rate": 3.931147395395256e-05, "loss": 0.842, "step": 940 }, { "epoch": 0.93, "learning_rate": 3.9101102244677335e-05, "loss": 0.8749, "step": 950 }, { "epoch": 0.94, "learning_rate": 3.888925582549006e-05, "loss": 0.8799, "step": 960 }, { "epoch": 0.95, "learning_rate": 3.867595685153922e-05, "loss": 0.8573, "step": 970 }, { "epoch": 0.96, "learning_rate": 3.846122762988319e-05, "loss": 0.8694, "step": 980 }, { "epoch": 0.97, "learning_rate": 3.824509061715738e-05, "loss": 0.927, "step": 990 }, { "epoch": 0.98, "learning_rate": 3.802756841722559e-05, "loss": 0.8816, "step": 1000 }, { "epoch": 0.99, "learning_rate": 3.780868377881619e-05, "loss": 0.9129, "step": 1010 }, { "epoch": 1.0, "learning_rate": 3.758845959314294e-05, "loss": 0.8362, "step": 1020 }, { "epoch": 1.01, "learning_rate": 3.7366918891511046e-05, "loss": 0.8907, "step": 1030 }, { "epoch": 1.02, "learning_rate": 3.7144084842908505e-05, "loss": 0.9378, "step": 1040 }, { "epoch": 1.02, "learning_rate": 3.691998075158306e-05, "loss": 0.9064, "step": 1050 }, { "epoch": 1.03, "learning_rate": 3.669463005460502e-05, "loss": 0.9101, "step": 1060 }, { "epoch": 1.04, "learning_rate": 3.646805631941619e-05, "loss": 0.8307, "step": 1070 }, { "epoch": 1.05, "learning_rate": 3.624028324136517e-05, "loss": 0.8958, "step": 1080 }, { "epoch": 1.06, "learning_rate": 3.6011334641229215e-05, "loss": 0.8681, "step": 1090 }, { "epoch": 1.07, "learning_rate": 3.578123446272309e-05, "loss": 0.8711, "step": 1100 }, { "epoch": 1.08, "learning_rate": 3.5550006769994996e-05, "loss": 0.8607, "step": 1110 }, { "epoch": 1.09, "learning_rate": 3.5317675745109866e-05, "loss": 0.9532, "step": 1120 }, { "epoch": 1.1, "learning_rate": 3.508426568552045e-05, "loss": 0.848, "step": 1130 }, { "epoch": 1.11, "learning_rate": 3.484980100152621e-05, "loss": 0.8097, "step": 1140 }, { "epoch": 1.12, "learning_rate": 3.461430621372044e-05, "loss": 0.8612, "step": 1150 }, { "epoch": 1.13, "learning_rate": 3.437780595042595e-05, "loss": 0.8281, "step": 1160 }, { "epoch": 1.14, "learning_rate": 3.414032494511935e-05, "loss": 0.8297, "step": 1170 }, { "epoch": 1.15, "learning_rate": 3.390188803384439e-05, "loss": 0.8607, "step": 1180 }, { "epoch": 1.16, "learning_rate": 3.3662520152614615e-05, "loss": 0.8986, "step": 1190 }, { "epoch": 1.17, "learning_rate": 3.34222463348055e-05, "loss": 0.8884, "step": 1200 }, { "epoch": 1.18, "learning_rate": 3.3181091708536435e-05, "loss": 0.8901, "step": 1210 }, { "epoch": 1.19, "learning_rate": 3.293908149404279e-05, "loss": 0.8583, "step": 1220 }, { "epoch": 1.2, "learning_rate": 3.2696241001038374e-05, "loss": 0.7928, "step": 1230 }, { "epoch": 1.21, "learning_rate": 3.245259562606851e-05, "loss": 0.8508, "step": 1240 }, { "epoch": 1.22, "learning_rate": 3.220817084985398e-05, "loss": 0.8245, "step": 1250 }, { "epoch": 1.23, "learning_rate": 3.196299223462633e-05, "loss": 0.8746, "step": 1260 }, { "epoch": 1.24, "learning_rate": 3.171708542145445e-05, "loss": 0.8666, "step": 1270 }, { "epoch": 1.25, "learning_rate": 3.147047612756302e-05, "loss": 0.8807, "step": 1280 }, { "epoch": 1.26, "learning_rate": 3.1223190143643014e-05, "loss": 0.8557, "step": 1290 }, { "epoch": 1.27, "learning_rate": 3.09752533311544e-05, "loss": 0.842, "step": 1300 }, { "epoch": 1.28, "learning_rate": 3.0726691619621617e-05, "loss": 0.8701, "step": 1310 }, { "epoch": 1.29, "learning_rate": 3.0477531003921745e-05, "loss": 0.8755, "step": 1320 }, { "epoch": 1.3, "learning_rate": 3.0227797541566e-05, "loss": 0.9009, "step": 1330 }, { "epoch": 1.31, "learning_rate": 2.9977517349974544e-05, "loss": 0.8578, "step": 1340 }, { "epoch": 1.32, "learning_rate": 2.9726716603745157e-05, "loss": 0.8067, "step": 1350 }, { "epoch": 1.33, "learning_rate": 2.9475421531915827e-05, "loss": 0.8139, "step": 1360 }, { "epoch": 1.34, "learning_rate": 2.922365841522167e-05, "loss": 0.8382, "step": 1370 }, { "epoch": 1.35, "learning_rate": 2.8971453583346536e-05, "loss": 0.9073, "step": 1380 }, { "epoch": 1.36, "learning_rate": 2.871883341216934e-05, "loss": 0.8783, "step": 1390 }, { "epoch": 1.37, "learning_rate": 2.846582432100567e-05, "loss": 0.8555, "step": 1400 }, { "epoch": 1.38, "learning_rate": 2.8212452769844834e-05, "loss": 0.8236, "step": 1410 }, { "epoch": 1.39, "learning_rate": 2.795874525658263e-05, "loss": 0.8816, "step": 1420 }, { "epoch": 1.4, "learning_rate": 2.7704728314250168e-05, "loss": 0.8199, "step": 1430 }, { "epoch": 1.41, "learning_rate": 2.7450428508239024e-05, "loss": 0.8271, "step": 1440 }, { "epoch": 1.42, "learning_rate": 2.7195872433522974e-05, "loss": 0.8444, "step": 1450 }, { "epoch": 1.43, "learning_rate": 2.694108671187669e-05, "loss": 0.8232, "step": 1460 }, { "epoch": 1.43, "learning_rate": 2.66860979890916e-05, "loss": 0.7959, "step": 1470 }, { "epoch": 1.44, "learning_rate": 2.6430932932189224e-05, "loss": 0.8368, "step": 1480 }, { "epoch": 1.45, "learning_rate": 2.617561822663228e-05, "loss": 0.8336, "step": 1490 }, { "epoch": 1.46, "learning_rate": 2.5920180573533976e-05, "loss": 0.8431, "step": 1500 }, { "epoch": 1.47, "learning_rate": 2.566464668686547e-05, "loss": 0.8309, "step": 1510 }, { "epoch": 1.48, "learning_rate": 2.5409043290662173e-05, "loss": 0.9332, "step": 1520 }, { "epoch": 1.49, "learning_rate": 2.5153397116228865e-05, "loss": 0.8416, "step": 1530 }, { "epoch": 1.5, "learning_rate": 2.489773489934413e-05, "loss": 0.8978, "step": 1540 }, { "epoch": 1.51, "learning_rate": 2.4642083377464305e-05, "loss": 0.8724, "step": 1550 }, { "epoch": 1.52, "learning_rate": 2.4386469286927196e-05, "loss": 0.9087, "step": 1560 }, { "epoch": 1.53, "learning_rate": 2.413091936015603e-05, "loss": 0.8749, "step": 1570 }, { "epoch": 1.54, "learning_rate": 2.3875460322863697e-05, "loss": 0.8781, "step": 1580 }, { "epoch": 1.55, "learning_rate": 2.3620118891257754e-05, "loss": 0.8544, "step": 1590 }, { "epoch": 1.56, "learning_rate": 2.3364921769246423e-05, "loss": 0.8244, "step": 1600 }, { "epoch": 1.57, "learning_rate": 2.3109895645645867e-05, "loss": 0.8656, "step": 1610 }, { "epoch": 1.58, "learning_rate": 2.2855067191389006e-05, "loss": 0.888, "step": 1620 }, { "epoch": 1.59, "learning_rate": 2.2600463056736293e-05, "loss": 0.8219, "step": 1630 }, { "epoch": 1.6, "learning_rate": 2.2346109868488567e-05, "loss": 0.8732, "step": 1640 }, { "epoch": 1.61, "learning_rate": 2.2092034227202383e-05, "loss": 0.8984, "step": 1650 }, { "epoch": 1.62, "learning_rate": 2.183826270440812e-05, "loss": 0.8474, "step": 1660 }, { "epoch": 1.63, "learning_rate": 2.158482183983109e-05, "loss": 0.8423, "step": 1670 }, { "epoch": 1.64, "learning_rate": 2.1331738138615958e-05, "loss": 0.8716, "step": 1680 }, { "epoch": 1.65, "learning_rate": 2.1079038068554842e-05, "loss": 0.8783, "step": 1690 }, { "epoch": 1.66, "learning_rate": 2.082674805731926e-05, "loss": 0.7759, "step": 1700 }, { "epoch": 1.67, "learning_rate": 2.0574894489696285e-05, "loss": 0.8975, "step": 1710 }, { "epoch": 1.68, "learning_rate": 2.03235037048292e-05, "loss": 0.8425, "step": 1720 }, { "epoch": 1.69, "learning_rate": 2.0072601993462946e-05, "loss": 0.824, "step": 1730 }, { "epoch": 1.7, "learning_rate": 1.982221559519454e-05, "loss": 0.7989, "step": 1740 }, { "epoch": 1.71, "learning_rate": 1.957237069572898e-05, "loss": 0.8145, "step": 1750 }, { "epoch": 1.72, "learning_rate": 1.932309342414067e-05, "loss": 0.8581, "step": 1760 }, { "epoch": 1.73, "learning_rate": 1.907440985014082e-05, "loss": 0.8886, "step": 1770 }, { "epoch": 1.74, "learning_rate": 1.8826345981351044e-05, "loss": 0.8048, "step": 1780 }, { "epoch": 1.75, "learning_rate": 1.8578927760583463e-05, "loss": 0.8534, "step": 1790 }, { "epoch": 1.76, "learning_rate": 1.8332181063127545e-05, "loss": 0.8392, "step": 1800 }, { "epoch": 1.77, "learning_rate": 1.8086131694044077e-05, "loss": 0.8926, "step": 1810 }, { "epoch": 1.78, "learning_rate": 1.784080538546642e-05, "loss": 0.923, "step": 1820 }, { "epoch": 1.79, "learning_rate": 1.7596227793909402e-05, "loss": 0.8209, "step": 1830 }, { "epoch": 1.8, "learning_rate": 1.7352424497586163e-05, "loss": 0.813, "step": 1840 }, { "epoch": 1.81, "learning_rate": 1.7109420993733115e-05, "loss": 0.8375, "step": 1850 }, { "epoch": 1.82, "learning_rate": 1.686724269594343e-05, "loss": 0.8477, "step": 1860 }, { "epoch": 1.83, "learning_rate": 1.6625914931509262e-05, "loss": 0.8263, "step": 1870 }, { "epoch": 1.84, "learning_rate": 1.6385462938772992e-05, "loss": 0.8816, "step": 1880 }, { "epoch": 1.84, "learning_rate": 1.614591186448774e-05, "loss": 0.8677, "step": 1890 }, { "epoch": 1.85, "learning_rate": 1.5907286761187535e-05, "loss": 0.9021, "step": 1900 }, { "epoch": 1.86, "learning_rate": 1.5669612584567265e-05, "loss": 0.8449, "step": 1910 }, { "epoch": 1.87, "learning_rate": 1.5432914190872757e-05, "loss": 0.8328, "step": 1920 }, { "epoch": 1.88, "learning_rate": 1.5197216334301339e-05, "loss": 0.8591, "step": 1930 }, { "epoch": 1.89, "learning_rate": 1.4962543664412986e-05, "loss": 0.9411, "step": 1940 }, { "epoch": 1.9, "learning_rate": 1.4728920723552407e-05, "loss": 0.9106, "step": 1950 }, { "epoch": 1.91, "learning_rate": 1.4496371944282441e-05, "loss": 0.8332, "step": 1960 }, { "epoch": 1.92, "learning_rate": 1.4264921646828838e-05, "loss": 0.783, "step": 1970 }, { "epoch": 1.93, "learning_rate": 1.4034594036536817e-05, "loss": 0.8703, "step": 1980 }, { "epoch": 1.94, "learning_rate": 1.3805413201339632e-05, "loss": 0.8918, "step": 1990 }, { "epoch": 1.95, "learning_rate": 1.3577403109239484e-05, "loss": 0.8795, "step": 2000 }, { "epoch": 1.96, "learning_rate": 1.3350587605800852e-05, "loss": 0.8222, "step": 2010 }, { "epoch": 1.97, "learning_rate": 1.3124990411656706e-05, "loss": 0.8187, "step": 2020 }, { "epoch": 1.98, "learning_rate": 1.2900635120027859e-05, "loss": 0.8343, "step": 2030 }, { "epoch": 1.99, "learning_rate": 1.2677545194255402e-05, "loss": 0.9145, "step": 2040 }, { "epoch": 2.0, "learning_rate": 1.2455743965347047e-05, "loss": 0.913, "step": 2050 }, { "epoch": 2.01, "learning_rate": 1.223525462953703e-05, "loss": 0.9304, "step": 2060 }, { "epoch": 2.02, "learning_rate": 1.201610024586026e-05, "loss": 0.82, "step": 2070 }, { "epoch": 2.03, "learning_rate": 1.1798303733740802e-05, "loss": 0.8232, "step": 2080 }, { "epoch": 2.04, "learning_rate": 1.1581887870594884e-05, "loss": 0.8997, "step": 2090 }, { "epoch": 2.05, "learning_rate": 1.1366875289448845e-05, "loss": 0.9386, "step": 2100 }, { "epoch": 2.06, "learning_rate": 1.1153288476572126e-05, "loss": 0.8515, "step": 2110 }, { "epoch": 2.07, "learning_rate": 1.0941149769125674e-05, "loss": 0.8724, "step": 2120 }, { "epoch": 2.08, "learning_rate": 1.0730481352825823e-05, "loss": 0.8616, "step": 2130 }, { "epoch": 2.09, "learning_rate": 1.0521305259624129e-05, "loss": 0.82, "step": 2140 }, { "epoch": 2.1, "learning_rate": 1.0313643365403274e-05, "loss": 0.866, "step": 2150 }, { "epoch": 2.11, "learning_rate": 1.0107517387689166e-05, "loss": 0.7712, "step": 2160 }, { "epoch": 2.12, "learning_rate": 9.90294888337981e-06, "loss": 0.8451, "step": 2170 }, { "epoch": 2.13, "learning_rate": 9.699959246490762e-06, "loss": 0.7923, "step": 2180 }, { "epoch": 2.14, "learning_rate": 9.498569705917771e-06, "loss": 0.8547, "step": 2190 }, { "epoch": 2.15, "learning_rate": 9.298801323216647e-06, "loss": 0.8624, "step": 2200 }, { "epoch": 2.16, "learning_rate": 9.100674990400571e-06, "loss": 0.8079, "step": 2210 }, { "epoch": 2.17, "learning_rate": 8.904211427755218e-06, "loss": 0.8211, "step": 2220 }, { "epoch": 2.18, "learning_rate": 8.70943118167179e-06, "loss": 0.9058, "step": 2230 }, { "epoch": 2.19, "learning_rate": 8.51635462249828e-06, "loss": 0.7715, "step": 2240 }, { "epoch": 2.2, "learning_rate": 8.325001942409066e-06, "loss": 0.8093, "step": 2250 }, { "epoch": 2.21, "learning_rate": 8.13539315329322e-06, "loss": 0.8812, "step": 2260 }, { "epoch": 2.22, "learning_rate": 7.947548084661674e-06, "loss": 0.8675, "step": 2270 }, { "epoch": 2.23, "learning_rate": 7.761486381573327e-06, "loss": 0.7757, "step": 2280 }, { "epoch": 2.24, "learning_rate": 7.577227502580667e-06, "loss": 0.9026, "step": 2290 }, { "epoch": 2.24, "learning_rate": 7.394790717694677e-06, "loss": 0.8779, "step": 2300 }, { "epoch": 2.25, "learning_rate": 7.214195106369587e-06, "loss": 0.8702, "step": 2310 }, { "epoch": 2.26, "learning_rate": 7.035459555507548e-06, "loss": 0.8602, "step": 2320 }, { "epoch": 2.27, "learning_rate": 6.858602757483371e-06, "loss": 0.8647, "step": 2330 }, { "epoch": 2.28, "learning_rate": 6.683643208189683e-06, "loss": 0.8216, "step": 2340 }, { "epoch": 2.29, "learning_rate": 6.510599205102594e-06, "loss": 0.8674, "step": 2350 }, { "epoch": 2.3, "learning_rate": 6.339488845368155e-06, "loss": 0.8232, "step": 2360 }, { "epoch": 2.31, "learning_rate": 6.17033002390969e-06, "loss": 0.8058, "step": 2370 }, { "epoch": 2.32, "learning_rate": 6.003140431556345e-06, "loss": 0.8183, "step": 2380 }, { "epoch": 2.33, "learning_rate": 5.837937553192999e-06, "loss": 0.8745, "step": 2390 }, { "epoch": 2.34, "learning_rate": 5.674738665931575e-06, "loss": 0.7398, "step": 2400 }, { "epoch": 2.35, "learning_rate": 5.513560837304304e-06, "loss": 0.8321, "step": 2410 }, { "epoch": 2.36, "learning_rate": 5.354420923478687e-06, "loss": 0.8657, "step": 2420 }, { "epoch": 2.37, "learning_rate": 5.197335567494696e-06, "loss": 0.8899, "step": 2430 }, { "epoch": 2.38, "learning_rate": 5.042321197524213e-06, "loss": 0.8185, "step": 2440 }, { "epoch": 2.39, "learning_rate": 4.889394025152979e-06, "loss": 0.9032, "step": 2450 }, { "epoch": 2.4, "learning_rate": 4.738570043685134e-06, "loss": 0.8692, "step": 2460 }, { "epoch": 2.41, "learning_rate": 4.589865026470619e-06, "loss": 0.8571, "step": 2470 }, { "epoch": 2.42, "learning_rate": 4.4432945252556284e-06, "loss": 0.8563, "step": 2480 }, { "epoch": 2.43, "learning_rate": 4.298873868556108e-06, "loss": 0.8423, "step": 2490 }, { "epoch": 2.44, "learning_rate": 4.156618160054771e-06, "loss": 0.8008, "step": 2500 }, { "epoch": 2.45, "learning_rate": 4.016542277021465e-06, "loss": 0.9192, "step": 2510 }, { "epoch": 2.46, "learning_rate": 3.878660868757323e-06, "loss": 0.9414, "step": 2520 }, { "epoch": 2.47, "learning_rate": 3.742988355062735e-06, "loss": 0.8645, "step": 2530 }, { "epoch": 2.48, "learning_rate": 3.609538924729269e-06, "loss": 0.7851, "step": 2540 }, { "epoch": 2.49, "learning_rate": 3.478326534055823e-06, "loss": 0.8207, "step": 2550 }, { "epoch": 2.5, "learning_rate": 3.3493649053890326e-06, "loss": 0.8443, "step": 2560 }, { "epoch": 2.51, "learning_rate": 3.2226675256882167e-06, "loss": 0.841, "step": 2570 }, { "epoch": 2.52, "learning_rate": 3.098247645114838e-06, "loss": 0.8591, "step": 2580 }, { "epoch": 2.53, "learning_rate": 2.9761182756468165e-06, "loss": 0.8146, "step": 2590 }, { "epoch": 2.54, "learning_rate": 2.856292189717741e-06, "loss": 0.8441, "step": 2600 }, { "epoch": 2.55, "learning_rate": 2.7387819188810534e-06, "loss": 0.878, "step": 2610 }, { "epoch": 2.56, "learning_rate": 2.623599752499553e-06, "loss": 0.8543, "step": 2620 }, { "epoch": 2.57, "learning_rate": 2.510757736460112e-06, "loss": 0.8435, "step": 2630 }, { "epoch": 2.58, "learning_rate": 2.4002676719139166e-06, "loss": 0.8424, "step": 2640 }, { "epoch": 2.59, "learning_rate": 2.2921411140423037e-06, "loss": 0.8488, "step": 2650 }, { "epoch": 2.6, "learning_rate": 2.1863893708482795e-06, "loss": 0.8021, "step": 2660 }, { "epoch": 2.61, "learning_rate": 2.0830235019739352e-06, "loss": 0.8198, "step": 2670 }, { "epoch": 2.62, "learning_rate": 1.9820543175438077e-06, "loss": 0.7836, "step": 2680 }, { "epoch": 2.63, "learning_rate": 1.8834923770343544e-06, "loss": 0.7949, "step": 2690 }, { "epoch": 2.64, "learning_rate": 1.787347988169613e-06, "loss": 0.8243, "step": 2700 }, { "epoch": 2.65, "learning_rate": 1.6936312058432158e-06, "loss": 0.8798, "step": 2710 }, { "epoch": 2.65, "learning_rate": 1.6023518310668618e-06, "loss": 0.9497, "step": 2720 }, { "epoch": 2.66, "learning_rate": 1.5135194099452521e-06, "loss": 0.8978, "step": 2730 }, { "epoch": 2.67, "learning_rate": 1.4271432326778273e-06, "loss": 0.8524, "step": 2740 }, { "epoch": 2.68, "learning_rate": 1.3432323325871216e-06, "loss": 0.7888, "step": 2750 }, { "epoch": 2.69, "learning_rate": 1.2617954851740832e-06, "loss": 0.8624, "step": 2760 }, { "epoch": 2.7, "learning_rate": 1.1828412072003108e-06, "loss": 0.8722, "step": 2770 }, { "epoch": 2.71, "learning_rate": 1.1063777557973638e-06, "loss": 0.8501, "step": 2780 }, { "epoch": 2.72, "learning_rate": 1.0324131276032127e-06, "loss": 0.8302, "step": 2790 }, { "epoch": 2.73, "learning_rate": 9.609550579259496e-07, "loss": 0.8424, "step": 2800 }, { "epoch": 2.74, "learning_rate": 8.920110199348325e-07, "loss": 0.8208, "step": 2810 }, { "epoch": 2.75, "learning_rate": 8.255882238786983e-07, "loss": 0.8773, "step": 2820 }, { "epoch": 2.76, "learning_rate": 7.616936163319404e-07, "loss": 0.7867, "step": 2830 }, { "epoch": 2.77, "learning_rate": 7.003338794680153e-07, "loss": 0.847, "step": 2840 }, { "epoch": 2.78, "learning_rate": 6.415154303606036e-07, "loss": 0.8479, "step": 2850 }, { "epoch": 2.79, "learning_rate": 5.852444203125223e-07, "loss": 0.8197, "step": 2860 }, { "epoch": 2.8, "learning_rate": 5.315267342124008e-07, "loss": 0.8719, "step": 2870 }, { "epoch": 2.81, "learning_rate": 4.803679899192392e-07, "loss": 0.922, "step": 2880 }, { "epoch": 2.82, "learning_rate": 4.3177353767488163e-07, "loss": 0.8622, "step": 2890 }, { "epoch": 2.83, "learning_rate": 3.8574845954448823e-07, "loss": 0.8361, "step": 2900 }, { "epoch": 2.84, "learning_rate": 3.4229756888503604e-07, "loss": 0.9201, "step": 2910 }, { "epoch": 2.85, "learning_rate": 3.014254098419406e-07, "loss": 0.834, "step": 2920 }, { "epoch": 2.86, "learning_rate": 2.6313625687383046e-07, "loss": 0.8569, "step": 2930 }, { "epoch": 2.87, "learning_rate": 2.2743411430549976e-07, "loss": 0.8411, "step": 2940 }, { "epoch": 2.88, "learning_rate": 1.943227159091482e-07, "loss": 0.8648, "step": 2950 }, { "epoch": 2.89, "learning_rate": 1.6380552451389086e-07, "loss": 0.8477, "step": 2960 }, { "epoch": 2.9, "learning_rate": 1.3588573164361463e-07, "loss": 0.8449, "step": 2970 }, { "epoch": 2.91, "learning_rate": 1.1056625718320334e-07, "loss": 0.8185, "step": 2980 }, { "epoch": 2.92, "learning_rate": 8.784974907317111e-08, "loss": 0.8326, "step": 2990 }, { "epoch": 2.93, "learning_rate": 6.773858303274483e-08, "loss": 0.84, "step": 3000 }, { "epoch": 2.94, "learning_rate": 5.0234862311399065e-08, "loss": 0.7393, "step": 3010 }, { "epoch": 2.95, "learning_rate": 3.5340417468898714e-08, "loss": 0.8723, "step": 3020 }, { "epoch": 2.96, "learning_rate": 2.3056806183865965e-08, "loss": 0.8481, "step": 3030 }, { "epoch": 2.97, "learning_rate": 1.3385313090857887e-08, "loss": 0.8273, "step": 3040 }, { "epoch": 2.98, "learning_rate": 6.326949646040525e-09, "loss": 0.899, "step": 3050 }, { "epoch": 2.99, "learning_rate": 1.8824540213874255e-09, "loss": 0.8056, "step": 3060 }, { "epoch": 3.0, "learning_rate": 5.229102749415571e-11, "loss": 0.8446, "step": 3070 }, { "epoch": 3.0, "step": 3072, "total_flos": 1.125146154656809e+18, "train_loss": 0.862230001638333, "train_runtime": 23418.3393, "train_samples_per_second": 2.1, "train_steps_per_second": 0.131 } ], "logging_steps": 10, "max_steps": 3072, "num_train_epochs": 3, "save_steps": 1000, "total_flos": 1.125146154656809e+18, "trial_name": null, "trial_params": null }