{ "best_metric": 0.00012181053898530081, "best_model_checkpoint": "autotrain-l9v4l-l3gs1/checkpoint-5750", "epoch": 2.0, "eval_steps": 500, "global_step": 5750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008695652173913044, "grad_norm": 8.19902229309082, "learning_rate": 1.3904982618771727e-06, "loss": 3.1857, "step": 25 }, { "epoch": 0.017391304347826087, "grad_norm": 9.214441299438477, "learning_rate": 2.838933951332561e-06, "loss": 3.1368, "step": 50 }, { "epoch": 0.02608695652173913, "grad_norm": 9.295684814453125, "learning_rate": 4.229432213209733e-06, "loss": 3.0863, "step": 75 }, { "epoch": 0.034782608695652174, "grad_norm": 15.308918952941895, "learning_rate": 5.677867902665122e-06, "loss": 2.9409, "step": 100 }, { "epoch": 0.043478260869565216, "grad_norm": 14.55487060546875, "learning_rate": 7.12630359212051e-06, "loss": 2.8279, "step": 125 }, { "epoch": 0.05217391304347826, "grad_norm": 9.613283157348633, "learning_rate": 8.574739281575898e-06, "loss": 2.6909, "step": 150 }, { "epoch": 0.06086956521739131, "grad_norm": 11.267277717590332, "learning_rate": 1.0023174971031286e-05, "loss": 2.5096, "step": 175 }, { "epoch": 0.06956521739130435, "grad_norm": 8.382554054260254, "learning_rate": 1.1471610660486674e-05, "loss": 2.2595, "step": 200 }, { "epoch": 0.0782608695652174, "grad_norm": 10.061994552612305, "learning_rate": 1.2920046349942064e-05, "loss": 2.003, "step": 225 }, { "epoch": 0.08695652173913043, "grad_norm": 10.652369499206543, "learning_rate": 1.4368482039397451e-05, "loss": 1.8495, "step": 250 }, { "epoch": 0.09565217391304348, "grad_norm": 7.267949104309082, "learning_rate": 1.581691772885284e-05, "loss": 1.5879, "step": 275 }, { "epoch": 0.10434782608695652, "grad_norm": 6.28985595703125, "learning_rate": 1.7265353418308228e-05, "loss": 1.3021, "step": 300 }, { "epoch": 0.11304347826086956, "grad_norm": 6.115077018737793, "learning_rate": 1.8713789107763614e-05, "loss": 1.0811, "step": 325 }, { "epoch": 0.12173913043478261, "grad_norm": 9.993290901184082, "learning_rate": 2.0162224797219005e-05, "loss": 0.9644, "step": 350 }, { "epoch": 0.13043478260869565, "grad_norm": 4.629942417144775, "learning_rate": 2.161066048667439e-05, "loss": 0.7733, "step": 375 }, { "epoch": 0.1391304347826087, "grad_norm": 4.087889194488525, "learning_rate": 2.305909617612978e-05, "loss": 0.6025, "step": 400 }, { "epoch": 0.14782608695652175, "grad_norm": 6.934291362762451, "learning_rate": 2.450753186558517e-05, "loss": 0.4823, "step": 425 }, { "epoch": 0.1565217391304348, "grad_norm": 8.899859428405762, "learning_rate": 2.5955967555040555e-05, "loss": 0.3408, "step": 450 }, { "epoch": 0.16521739130434782, "grad_norm": 5.459203243255615, "learning_rate": 2.7404403244495948e-05, "loss": 0.2558, "step": 475 }, { "epoch": 0.17391304347826086, "grad_norm": 2.3503053188323975, "learning_rate": 2.885283893395133e-05, "loss": 0.2116, "step": 500 }, { "epoch": 0.1826086956521739, "grad_norm": 10.073762893676758, "learning_rate": 3.030127462340672e-05, "loss": 0.1117, "step": 525 }, { "epoch": 0.19130434782608696, "grad_norm": 0.8817861080169678, "learning_rate": 3.1749710312862115e-05, "loss": 0.0779, "step": 550 }, { "epoch": 0.2, "grad_norm": 0.28817218542099, "learning_rate": 3.31981460023175e-05, "loss": 0.0559, "step": 575 }, { "epoch": 0.20869565217391303, "grad_norm": 0.3490276038646698, "learning_rate": 3.464658169177289e-05, "loss": 0.0372, "step": 600 }, { "epoch": 0.21739130434782608, "grad_norm": 0.18767504394054413, "learning_rate": 3.609501738122827e-05, "loss": 0.0273, "step": 625 }, { "epoch": 0.22608695652173913, "grad_norm": 0.16744671761989594, "learning_rate": 3.754345307068367e-05, "loss": 0.0242, "step": 650 }, { "epoch": 0.23478260869565218, "grad_norm": 0.18482975661754608, "learning_rate": 3.899188876013905e-05, "loss": 0.0186, "step": 675 }, { "epoch": 0.24347826086956523, "grad_norm": 0.10977461189031601, "learning_rate": 4.044032444959444e-05, "loss": 0.0153, "step": 700 }, { "epoch": 0.25217391304347825, "grad_norm": 0.09746221452951431, "learning_rate": 4.1888760139049825e-05, "loss": 0.0133, "step": 725 }, { "epoch": 0.2608695652173913, "grad_norm": 0.14220073819160461, "learning_rate": 4.3337195828505215e-05, "loss": 0.0127, "step": 750 }, { "epoch": 0.26956521739130435, "grad_norm": 0.08566069602966309, "learning_rate": 4.4785631517960605e-05, "loss": 0.0191, "step": 775 }, { "epoch": 0.2782608695652174, "grad_norm": 0.09003018587827682, "learning_rate": 4.6234067207415995e-05, "loss": 0.0114, "step": 800 }, { "epoch": 0.28695652173913044, "grad_norm": 0.07937142997980118, "learning_rate": 4.7682502896871385e-05, "loss": 0.0473, "step": 825 }, { "epoch": 0.2956521739130435, "grad_norm": 0.06389954686164856, "learning_rate": 4.913093858632677e-05, "loss": 0.0077, "step": 850 }, { "epoch": 0.30434782608695654, "grad_norm": 0.05928570777177811, "learning_rate": 4.9935583612471015e-05, "loss": 0.0073, "step": 875 }, { "epoch": 0.3130434782608696, "grad_norm": 0.06266701221466064, "learning_rate": 4.977454264364855e-05, "loss": 0.0376, "step": 900 }, { "epoch": 0.3217391304347826, "grad_norm": 0.04980575665831566, "learning_rate": 4.961350167482608e-05, "loss": 0.0125, "step": 925 }, { "epoch": 0.33043478260869563, "grad_norm": 0.3670000731945038, "learning_rate": 4.9452460706003613e-05, "loss": 0.0062, "step": 950 }, { "epoch": 0.3391304347826087, "grad_norm": 0.052209340035915375, "learning_rate": 4.929141973718114e-05, "loss": 0.0499, "step": 975 }, { "epoch": 0.34782608695652173, "grad_norm": 0.04546426981687546, "learning_rate": 4.9136820407111575e-05, "loss": 0.0663, "step": 1000 }, { "epoch": 0.3565217391304348, "grad_norm": 51.16396713256836, "learning_rate": 4.89757794382891e-05, "loss": 0.0721, "step": 1025 }, { "epoch": 0.3652173913043478, "grad_norm": 0.06547929346561432, "learning_rate": 4.881473846946664e-05, "loss": 0.0303, "step": 1050 }, { "epoch": 0.3739130434782609, "grad_norm": 0.03635074943304062, "learning_rate": 4.8653697500644166e-05, "loss": 0.0405, "step": 1075 }, { "epoch": 0.3826086956521739, "grad_norm": 0.05633755028247833, "learning_rate": 4.8492656531821695e-05, "loss": 0.0113, "step": 1100 }, { "epoch": 0.391304347826087, "grad_norm": 0.038711484521627426, "learning_rate": 4.833161556299923e-05, "loss": 0.0446, "step": 1125 }, { "epoch": 0.4, "grad_norm": 0.03419716656208038, "learning_rate": 4.817057459417676e-05, "loss": 0.0065, "step": 1150 }, { "epoch": 0.40869565217391307, "grad_norm": 0.03439817205071449, "learning_rate": 4.800953362535429e-05, "loss": 0.045, "step": 1175 }, { "epoch": 0.41739130434782606, "grad_norm": 0.02928638830780983, "learning_rate": 4.784849265653182e-05, "loss": 0.0039, "step": 1200 }, { "epoch": 0.4260869565217391, "grad_norm": 0.03179425746202469, "learning_rate": 4.7687451687709356e-05, "loss": 0.003, "step": 1225 }, { "epoch": 0.43478260869565216, "grad_norm": 0.022025400772690773, "learning_rate": 4.7526410718886885e-05, "loss": 0.0028, "step": 1250 }, { "epoch": 0.4434782608695652, "grad_norm": 0.027211090549826622, "learning_rate": 4.736536975006442e-05, "loss": 0.0025, "step": 1275 }, { "epoch": 0.45217391304347826, "grad_norm": 0.019419824704527855, "learning_rate": 4.7204328781241955e-05, "loss": 0.0025, "step": 1300 }, { "epoch": 0.4608695652173913, "grad_norm": 0.019226014614105225, "learning_rate": 4.704328781241948e-05, "loss": 0.0024, "step": 1325 }, { "epoch": 0.46956521739130436, "grad_norm": 0.01781400479376316, "learning_rate": 4.688224684359701e-05, "loss": 0.0022, "step": 1350 }, { "epoch": 0.4782608695652174, "grad_norm": 0.0204134713858366, "learning_rate": 4.672120587477454e-05, "loss": 0.0025, "step": 1375 }, { "epoch": 0.48695652173913045, "grad_norm": 0.022227726876735687, "learning_rate": 4.6560164905952075e-05, "loss": 0.0032, "step": 1400 }, { "epoch": 0.4956521739130435, "grad_norm": 0.1570238471031189, "learning_rate": 4.639912393712961e-05, "loss": 0.0506, "step": 1425 }, { "epoch": 0.5043478260869565, "grad_norm": 0.017615189775824547, "learning_rate": 4.623808296830714e-05, "loss": 0.0042, "step": 1450 }, { "epoch": 0.5130434782608696, "grad_norm": 0.017343297600746155, "learning_rate": 4.607704199948467e-05, "loss": 0.0021, "step": 1475 }, { "epoch": 0.5217391304347826, "grad_norm": 0.018280992284417152, "learning_rate": 4.59160010306622e-05, "loss": 0.0019, "step": 1500 }, { "epoch": 0.5304347826086957, "grad_norm": 0.017326297238469124, "learning_rate": 4.5754960061839736e-05, "loss": 0.0164, "step": 1525 }, { "epoch": 0.5391304347826087, "grad_norm": 0.014924581162631512, "learning_rate": 4.5593919093017265e-05, "loss": 0.0587, "step": 1550 }, { "epoch": 0.5478260869565217, "grad_norm": 0.023350143805146217, "learning_rate": 4.54328781241948e-05, "loss": 0.0207, "step": 1575 }, { "epoch": 0.5565217391304348, "grad_norm": 0.014112471602857113, "learning_rate": 4.527183715537233e-05, "loss": 0.0038, "step": 1600 }, { "epoch": 0.5652173913043478, "grad_norm": 0.016805477440357208, "learning_rate": 4.5110796186549856e-05, "loss": 0.0457, "step": 1625 }, { "epoch": 0.5739130434782609, "grad_norm": 0.018589412793517113, "learning_rate": 4.494975521772739e-05, "loss": 0.1005, "step": 1650 }, { "epoch": 0.5826086956521739, "grad_norm": 0.021146375685930252, "learning_rate": 4.478871424890492e-05, "loss": 0.0019, "step": 1675 }, { "epoch": 0.591304347826087, "grad_norm": 0.014253458008170128, "learning_rate": 4.4627673280082455e-05, "loss": 0.0019, "step": 1700 }, { "epoch": 0.6, "grad_norm": 0.02895612083375454, "learning_rate": 4.446663231125999e-05, "loss": 0.0373, "step": 1725 }, { "epoch": 0.6086956521739131, "grad_norm": 0.012195841409265995, "learning_rate": 4.430559134243752e-05, "loss": 0.0018, "step": 1750 }, { "epoch": 0.6173913043478261, "grad_norm": 0.04872201383113861, "learning_rate": 4.414455037361505e-05, "loss": 0.0344, "step": 1775 }, { "epoch": 0.6260869565217392, "grad_norm": 0.4130329489707947, "learning_rate": 4.398350940479258e-05, "loss": 0.0258, "step": 1800 }, { "epoch": 0.6347826086956522, "grad_norm": 0.4404933750629425, "learning_rate": 4.3822468435970116e-05, "loss": 0.0089, "step": 1825 }, { "epoch": 0.6434782608695652, "grad_norm": 0.012819499708712101, "learning_rate": 4.3661427467147645e-05, "loss": 0.0178, "step": 1850 }, { "epoch": 0.6521739130434783, "grad_norm": 0.020058365538716316, "learning_rate": 4.350038649832517e-05, "loss": 0.0015, "step": 1875 }, { "epoch": 0.6608695652173913, "grad_norm": 0.05424540862441063, "learning_rate": 4.333934552950271e-05, "loss": 0.0084, "step": 1900 }, { "epoch": 0.6695652173913044, "grad_norm": 0.011423332616686821, "learning_rate": 4.3178304560680236e-05, "loss": 0.0014, "step": 1925 }, { "epoch": 0.6782608695652174, "grad_norm": 0.03126579150557518, "learning_rate": 4.301726359185777e-05, "loss": 0.0012, "step": 1950 }, { "epoch": 0.6869565217391305, "grad_norm": 0.010747378692030907, "learning_rate": 4.28562226230353e-05, "loss": 0.0011, "step": 1975 }, { "epoch": 0.6956521739130435, "grad_norm": 0.010900999419391155, "learning_rate": 4.2695181654212834e-05, "loss": 0.001, "step": 2000 }, { "epoch": 0.7043478260869566, "grad_norm": 0.10920006781816483, "learning_rate": 4.253414068539037e-05, "loss": 0.0371, "step": 2025 }, { "epoch": 0.7130434782608696, "grad_norm": 0.008641124702990055, "learning_rate": 4.2379541355320796e-05, "loss": 0.009, "step": 2050 }, { "epoch": 0.7217391304347827, "grad_norm": 0.00958116352558136, "learning_rate": 4.2218500386498324e-05, "loss": 0.0011, "step": 2075 }, { "epoch": 0.7304347826086957, "grad_norm": 0.007281237281858921, "learning_rate": 4.205745941767586e-05, "loss": 0.001, "step": 2100 }, { "epoch": 0.7391304347826086, "grad_norm": 0.00872531346976757, "learning_rate": 4.1896418448853394e-05, "loss": 0.0285, "step": 2125 }, { "epoch": 0.7478260869565218, "grad_norm": 0.008862826973199844, "learning_rate": 4.173537748003092e-05, "loss": 0.0229, "step": 2150 }, { "epoch": 0.7565217391304347, "grad_norm": 0.012311381287872791, "learning_rate": 4.157433651120846e-05, "loss": 0.0682, "step": 2175 }, { "epoch": 0.7652173913043478, "grad_norm": 0.009278792887926102, "learning_rate": 4.1413295542385986e-05, "loss": 0.0517, "step": 2200 }, { "epoch": 0.7739130434782608, "grad_norm": 0.034189265221357346, "learning_rate": 4.1252254573563514e-05, "loss": 0.0803, "step": 2225 }, { "epoch": 0.782608695652174, "grad_norm": 0.043665286153554916, "learning_rate": 4.109121360474105e-05, "loss": 0.1145, "step": 2250 }, { "epoch": 0.7913043478260869, "grad_norm": 0.08209193497896194, "learning_rate": 4.093017263591858e-05, "loss": 0.0125, "step": 2275 }, { "epoch": 0.8, "grad_norm": 0.01089281216263771, "learning_rate": 4.076913166709611e-05, "loss": 0.0011, "step": 2300 }, { "epoch": 0.808695652173913, "grad_norm": 0.007122454699128866, "learning_rate": 4.060809069827364e-05, "loss": 0.0045, "step": 2325 }, { "epoch": 0.8173913043478261, "grad_norm": 0.009066189639270306, "learning_rate": 4.0447049729451176e-05, "loss": 0.0025, "step": 2350 }, { "epoch": 0.8260869565217391, "grad_norm": 0.00853909645229578, "learning_rate": 4.0286008760628704e-05, "loss": 0.0009, "step": 2375 }, { "epoch": 0.8347826086956521, "grad_norm": 0.007053891196846962, "learning_rate": 4.012496779180624e-05, "loss": 0.0008, "step": 2400 }, { "epoch": 0.8434782608695652, "grad_norm": 0.007313120178878307, "learning_rate": 3.9963926822983774e-05, "loss": 0.0013, "step": 2425 }, { "epoch": 0.8521739130434782, "grad_norm": 0.009188150055706501, "learning_rate": 3.9802885854161296e-05, "loss": 0.0685, "step": 2450 }, { "epoch": 0.8608695652173913, "grad_norm": 0.010708467103540897, "learning_rate": 3.964184488533883e-05, "loss": 0.0845, "step": 2475 }, { "epoch": 0.8695652173913043, "grad_norm": 0.009248602204024792, "learning_rate": 3.948080391651636e-05, "loss": 0.0621, "step": 2500 }, { "epoch": 0.8782608695652174, "grad_norm": 0.008605259470641613, "learning_rate": 3.9319762947693894e-05, "loss": 0.001, "step": 2525 }, { "epoch": 0.8869565217391304, "grad_norm": 0.00624378165230155, "learning_rate": 3.915872197887143e-05, "loss": 0.026, "step": 2550 }, { "epoch": 0.8956521739130435, "grad_norm": 0.008726169355213642, "learning_rate": 3.899768101004896e-05, "loss": 0.0344, "step": 2575 }, { "epoch": 0.9043478260869565, "grad_norm": 0.007860297337174416, "learning_rate": 3.883664004122649e-05, "loss": 0.0321, "step": 2600 }, { "epoch": 0.9130434782608695, "grad_norm": 0.006469408981502056, "learning_rate": 3.867559907240402e-05, "loss": 0.0456, "step": 2625 }, { "epoch": 0.9217391304347826, "grad_norm": 0.025073576718568802, "learning_rate": 3.8514558103581556e-05, "loss": 0.0607, "step": 2650 }, { "epoch": 0.9304347826086956, "grad_norm": 0.005454166326671839, "learning_rate": 3.8353517134759084e-05, "loss": 0.0007, "step": 2675 }, { "epoch": 0.9391304347826087, "grad_norm": 0.005138472653925419, "learning_rate": 3.819247616593661e-05, "loss": 0.0008, "step": 2700 }, { "epoch": 0.9478260869565217, "grad_norm": 0.04282465949654579, "learning_rate": 3.803143519711415e-05, "loss": 0.0007, "step": 2725 }, { "epoch": 0.9565217391304348, "grad_norm": 0.00715728010982275, "learning_rate": 3.7870394228291676e-05, "loss": 0.0387, "step": 2750 }, { "epoch": 0.9652173913043478, "grad_norm": 0.005337136331945658, "learning_rate": 3.770935325946921e-05, "loss": 0.0007, "step": 2775 }, { "epoch": 0.9739130434782609, "grad_norm": 0.005603776779025793, "learning_rate": 3.754831229064674e-05, "loss": 0.0006, "step": 2800 }, { "epoch": 0.9826086956521739, "grad_norm": 0.005324610508978367, "learning_rate": 3.7387271321824274e-05, "loss": 0.0392, "step": 2825 }, { "epoch": 0.991304347826087, "grad_norm": 0.004979921039193869, "learning_rate": 3.722623035300181e-05, "loss": 0.0204, "step": 2850 }, { "epoch": 1.0, "grad_norm": 0.0061447713524103165, "learning_rate": 3.706518938417934e-05, "loss": 0.0007, "step": 2875 }, { "epoch": 1.0, "eval_accuracy": 1.0, "eval_f1_macro": 1.0, "eval_f1_micro": 1.0, "eval_f1_weighted": 1.0, "eval_loss": 0.0005410231533460319, "eval_precision_macro": 1.0, "eval_precision_micro": 1.0, "eval_precision_weighted": 1.0, "eval_recall_macro": 1.0, "eval_recall_micro": 1.0, "eval_recall_weighted": 1.0, "eval_runtime": 5.0042, "eval_samples_per_second": 459.614, "eval_steps_per_second": 28.776, "step": 2875 }, { "epoch": 1.008695652173913, "grad_norm": 0.005072788801044226, "learning_rate": 3.690414841535687e-05, "loss": 0.0014, "step": 2900 }, { "epoch": 1.017391304347826, "grad_norm": 0.004767073784023523, "learning_rate": 3.67431074465344e-05, "loss": 0.0039, "step": 2925 }, { "epoch": 1.0260869565217392, "grad_norm": 0.019580593332648277, "learning_rate": 3.658206647771193e-05, "loss": 0.0129, "step": 2950 }, { "epoch": 1.0347826086956522, "grad_norm": 0.004164039622992277, "learning_rate": 3.6421025508889464e-05, "loss": 0.0005, "step": 2975 }, { "epoch": 1.0434782608695652, "grad_norm": 0.006551030091941357, "learning_rate": 3.625998454006699e-05, "loss": 0.0005, "step": 3000 }, { "epoch": 1.0521739130434782, "grad_norm": 0.004048625007271767, "learning_rate": 3.609894357124453e-05, "loss": 0.0005, "step": 3025 }, { "epoch": 1.0608695652173914, "grad_norm": 0.003683075774461031, "learning_rate": 3.5937902602422056e-05, "loss": 0.0005, "step": 3050 }, { "epoch": 1.0695652173913044, "grad_norm": 0.003764552529901266, "learning_rate": 3.577686163359959e-05, "loss": 0.0005, "step": 3075 }, { "epoch": 1.0782608695652174, "grad_norm": 0.014071044512093067, "learning_rate": 3.561582066477712e-05, "loss": 0.0005, "step": 3100 }, { "epoch": 1.0869565217391304, "grad_norm": 0.004224707838147879, "learning_rate": 3.5454779695954654e-05, "loss": 0.0004, "step": 3125 }, { "epoch": 1.0956521739130434, "grad_norm": 0.007790651638060808, "learning_rate": 3.529373872713219e-05, "loss": 0.0004, "step": 3150 }, { "epoch": 1.1043478260869566, "grad_norm": 0.0033269149716943502, "learning_rate": 3.513269775830972e-05, "loss": 0.0004, "step": 3175 }, { "epoch": 1.1130434782608696, "grad_norm": 0.003678582375869155, "learning_rate": 3.4971656789487245e-05, "loss": 0.0004, "step": 3200 }, { "epoch": 1.1217391304347826, "grad_norm": 0.415933758020401, "learning_rate": 3.4810615820664774e-05, "loss": 0.0471, "step": 3225 }, { "epoch": 1.1304347826086956, "grad_norm": 0.003935901448130608, "learning_rate": 3.464957485184231e-05, "loss": 0.0008, "step": 3250 }, { "epoch": 1.1391304347826088, "grad_norm": 0.005461950786411762, "learning_rate": 3.4488533883019844e-05, "loss": 0.0868, "step": 3275 }, { "epoch": 1.1478260869565218, "grad_norm": 0.00403949897736311, "learning_rate": 3.432749291419737e-05, "loss": 0.0314, "step": 3300 }, { "epoch": 1.1565217391304348, "grad_norm": 0.0038034759927541018, "learning_rate": 3.416645194537491e-05, "loss": 0.0006, "step": 3325 }, { "epoch": 1.1652173913043478, "grad_norm": 0.003490498522296548, "learning_rate": 3.4005410976552435e-05, "loss": 0.0004, "step": 3350 }, { "epoch": 1.1739130434782608, "grad_norm": 0.002800930989906192, "learning_rate": 3.384437000772997e-05, "loss": 0.0004, "step": 3375 }, { "epoch": 1.182608695652174, "grad_norm": 0.0032401601783931255, "learning_rate": 3.36833290389075e-05, "loss": 0.0004, "step": 3400 }, { "epoch": 1.191304347826087, "grad_norm": 0.008039023727178574, "learning_rate": 3.3522288070085034e-05, "loss": 0.1032, "step": 3425 }, { "epoch": 1.2, "grad_norm": 0.0048836818896234035, "learning_rate": 3.336124710126256e-05, "loss": 0.0007, "step": 3450 }, { "epoch": 1.208695652173913, "grad_norm": 0.01012449711561203, "learning_rate": 3.320020613244009e-05, "loss": 0.0223, "step": 3475 }, { "epoch": 1.2173913043478262, "grad_norm": 0.005898616276681423, "learning_rate": 3.3039165163617625e-05, "loss": 0.0143, "step": 3500 }, { "epoch": 1.2260869565217392, "grad_norm": 7.748152256011963, "learning_rate": 3.2878124194795154e-05, "loss": 0.0928, "step": 3525 }, { "epoch": 1.2347826086956522, "grad_norm": 0.0053124018013477325, "learning_rate": 3.271708322597269e-05, "loss": 0.0045, "step": 3550 }, { "epoch": 1.2434782608695651, "grad_norm": 0.009419775567948818, "learning_rate": 3.2556042257150224e-05, "loss": 0.0008, "step": 3575 }, { "epoch": 1.2521739130434781, "grad_norm": 0.0048626321367919445, "learning_rate": 3.239500128832775e-05, "loss": 0.0005, "step": 3600 }, { "epoch": 1.2608695652173914, "grad_norm": 0.004047353286296129, "learning_rate": 3.223396031950529e-05, "loss": 0.0004, "step": 3625 }, { "epoch": 1.2695652173913043, "grad_norm": 0.0036859684623777866, "learning_rate": 3.2072919350682815e-05, "loss": 0.0484, "step": 3650 }, { "epoch": 1.2782608695652173, "grad_norm": 0.007856795564293861, "learning_rate": 3.191187838186035e-05, "loss": 0.022, "step": 3675 }, { "epoch": 1.2869565217391306, "grad_norm": 0.0035763189662247896, "learning_rate": 3.175083741303788e-05, "loss": 0.0004, "step": 3700 }, { "epoch": 1.2956521739130435, "grad_norm": 0.003156292950734496, "learning_rate": 3.158979644421541e-05, "loss": 0.0004, "step": 3725 }, { "epoch": 1.3043478260869565, "grad_norm": 0.003629323560744524, "learning_rate": 3.142875547539294e-05, "loss": 0.0004, "step": 3750 }, { "epoch": 1.3130434782608695, "grad_norm": 68.89725494384766, "learning_rate": 3.126771450657047e-05, "loss": 0.0249, "step": 3775 }, { "epoch": 1.3217391304347825, "grad_norm": 0.004872568417340517, "learning_rate": 3.1106673537748005e-05, "loss": 0.106, "step": 3800 }, { "epoch": 1.3304347826086955, "grad_norm": 0.0038311562966555357, "learning_rate": 3.0945632568925534e-05, "loss": 0.0008, "step": 3825 }, { "epoch": 1.3391304347826087, "grad_norm": 0.01143663190305233, "learning_rate": 3.078459160010307e-05, "loss": 0.0433, "step": 3850 }, { "epoch": 1.3478260869565217, "grad_norm": 0.00519527355208993, "learning_rate": 3.0623550631280604e-05, "loss": 0.0004, "step": 3875 }, { "epoch": 1.3565217391304347, "grad_norm": 0.0031176016200333834, "learning_rate": 3.0462509662458132e-05, "loss": 0.0005, "step": 3900 }, { "epoch": 1.365217391304348, "grad_norm": 0.0033473202493041754, "learning_rate": 3.0301468693635664e-05, "loss": 0.0003, "step": 3925 }, { "epoch": 1.373913043478261, "grad_norm": 0.0031863965559750795, "learning_rate": 3.0140427724813192e-05, "loss": 0.0003, "step": 3950 }, { "epoch": 1.382608695652174, "grad_norm": 0.002523756120353937, "learning_rate": 2.9979386755990724e-05, "loss": 0.0318, "step": 3975 }, { "epoch": 1.391304347826087, "grad_norm": 0.0034575534518808126, "learning_rate": 2.9818345787168255e-05, "loss": 0.0003, "step": 4000 }, { "epoch": 1.4, "grad_norm": 0.004556054249405861, "learning_rate": 2.9657304818345787e-05, "loss": 0.0003, "step": 4025 }, { "epoch": 1.4086956521739131, "grad_norm": 0.009268323890864849, "learning_rate": 2.949626384952332e-05, "loss": 0.0296, "step": 4050 }, { "epoch": 1.4173913043478261, "grad_norm": 0.01629672758281231, "learning_rate": 2.9335222880700854e-05, "loss": 0.0021, "step": 4075 }, { "epoch": 1.4260869565217391, "grad_norm": 0.0037628798745572567, "learning_rate": 2.9174181911878385e-05, "loss": 0.0015, "step": 4100 }, { "epoch": 1.434782608695652, "grad_norm": 0.004670215770602226, "learning_rate": 2.9013140943055917e-05, "loss": 0.0329, "step": 4125 }, { "epoch": 1.4434782608695653, "grad_norm": 0.003128861775621772, "learning_rate": 2.885209997423345e-05, "loss": 0.0361, "step": 4150 }, { "epoch": 1.4521739130434783, "grad_norm": 0.0030446811579167843, "learning_rate": 2.869105900541098e-05, "loss": 0.0003, "step": 4175 }, { "epoch": 1.4608695652173913, "grad_norm": 0.0032489860896021128, "learning_rate": 2.853001803658851e-05, "loss": 0.0355, "step": 4200 }, { "epoch": 1.4695652173913043, "grad_norm": 0.002284318907186389, "learning_rate": 2.836897706776604e-05, "loss": 0.0276, "step": 4225 }, { "epoch": 1.4782608695652173, "grad_norm": 0.005029418971389532, "learning_rate": 2.8207936098943572e-05, "loss": 0.0617, "step": 4250 }, { "epoch": 1.4869565217391305, "grad_norm": 0.0027885879389941692, "learning_rate": 2.8046895130121104e-05, "loss": 0.0004, "step": 4275 }, { "epoch": 1.4956521739130435, "grad_norm": 0.0034100012853741646, "learning_rate": 2.7885854161298635e-05, "loss": 0.0003, "step": 4300 }, { "epoch": 1.5043478260869565, "grad_norm": 0.0024735534097999334, "learning_rate": 2.7724813192476167e-05, "loss": 0.0003, "step": 4325 }, { "epoch": 1.5130434782608697, "grad_norm": 0.0018064508913084865, "learning_rate": 2.75637722236537e-05, "loss": 0.0003, "step": 4350 }, { "epoch": 1.5217391304347827, "grad_norm": 0.001821321900933981, "learning_rate": 2.7402731254831234e-05, "loss": 0.0003, "step": 4375 }, { "epoch": 1.5304347826086957, "grad_norm": 0.002236420288681984, "learning_rate": 2.7241690286008765e-05, "loss": 0.0003, "step": 4400 }, { "epoch": 1.5391304347826087, "grad_norm": 0.0018212420400232077, "learning_rate": 2.708064931718629e-05, "loss": 0.0717, "step": 4425 }, { "epoch": 1.5478260869565217, "grad_norm": 0.0029810129199177027, "learning_rate": 2.6919608348363822e-05, "loss": 0.0003, "step": 4450 }, { "epoch": 1.5565217391304347, "grad_norm": 0.002055574208498001, "learning_rate": 2.6758567379541353e-05, "loss": 0.0003, "step": 4475 }, { "epoch": 1.5652173913043477, "grad_norm": 0.002401245990768075, "learning_rate": 2.659752641071889e-05, "loss": 0.0471, "step": 4500 }, { "epoch": 1.5739130434782609, "grad_norm": 0.0019228613236919045, "learning_rate": 2.643648544189642e-05, "loss": 0.004, "step": 4525 }, { "epoch": 1.5826086956521739, "grad_norm": 0.01082328986376524, "learning_rate": 2.6275444473073952e-05, "loss": 0.0004, "step": 4550 }, { "epoch": 1.591304347826087, "grad_norm": 0.004519768990576267, "learning_rate": 2.6114403504251483e-05, "loss": 0.0003, "step": 4575 }, { "epoch": 1.6, "grad_norm": 0.004080561455339193, "learning_rate": 2.5953362535429015e-05, "loss": 0.0003, "step": 4600 }, { "epoch": 1.608695652173913, "grad_norm": 0.0020184165332466364, "learning_rate": 2.5792321566606547e-05, "loss": 0.0003, "step": 4625 }, { "epoch": 1.617391304347826, "grad_norm": 0.00274313660338521, "learning_rate": 2.563128059778408e-05, "loss": 0.0002, "step": 4650 }, { "epoch": 1.626086956521739, "grad_norm": 0.0025938046164810658, "learning_rate": 2.5470239628961607e-05, "loss": 0.0002, "step": 4675 }, { "epoch": 1.634782608695652, "grad_norm": 0.002224980155006051, "learning_rate": 2.530919866013914e-05, "loss": 0.0002, "step": 4700 }, { "epoch": 1.643478260869565, "grad_norm": 0.0018848755862563848, "learning_rate": 2.514815769131667e-05, "loss": 0.0002, "step": 4725 }, { "epoch": 1.6521739130434783, "grad_norm": 0.0021186743397265673, "learning_rate": 2.4987116722494202e-05, "loss": 0.012, "step": 4750 }, { "epoch": 1.6608695652173913, "grad_norm": 0.002839816967025399, "learning_rate": 2.4826075753671733e-05, "loss": 0.0732, "step": 4775 }, { "epoch": 1.6695652173913045, "grad_norm": 0.0023581942077726126, "learning_rate": 2.466503478484927e-05, "loss": 0.0441, "step": 4800 }, { "epoch": 1.6782608695652175, "grad_norm": 0.0025644400157034397, "learning_rate": 2.45039938160268e-05, "loss": 0.0002, "step": 4825 }, { "epoch": 1.6869565217391305, "grad_norm": 0.0018391634803265333, "learning_rate": 2.4342952847204332e-05, "loss": 0.0003, "step": 4850 }, { "epoch": 1.6956521739130435, "grad_norm": 0.002724371151998639, "learning_rate": 2.418191187838186e-05, "loss": 0.0002, "step": 4875 }, { "epoch": 1.7043478260869565, "grad_norm": 0.002339495113119483, "learning_rate": 2.402087090955939e-05, "loss": 0.0002, "step": 4900 }, { "epoch": 1.7130434782608694, "grad_norm": 0.0015120344469323754, "learning_rate": 2.3859829940736923e-05, "loss": 0.0059, "step": 4925 }, { "epoch": 1.7217391304347827, "grad_norm": 0.0019244271097704768, "learning_rate": 2.369878897191446e-05, "loss": 0.0488, "step": 4950 }, { "epoch": 1.7304347826086957, "grad_norm": 0.006782891228795052, "learning_rate": 2.353774800309199e-05, "loss": 0.0002, "step": 4975 }, { "epoch": 1.7391304347826086, "grad_norm": 0.006890356075018644, "learning_rate": 2.337670703426952e-05, "loss": 0.0002, "step": 5000 }, { "epoch": 1.7478260869565219, "grad_norm": 0.0016472332645207644, "learning_rate": 2.321566606544705e-05, "loss": 0.0002, "step": 5025 }, { "epoch": 1.7565217391304349, "grad_norm": 0.002031004289165139, "learning_rate": 2.305462509662458e-05, "loss": 0.0002, "step": 5050 }, { "epoch": 1.7652173913043478, "grad_norm": 0.020191390067338943, "learning_rate": 2.2893584127802113e-05, "loss": 0.0006, "step": 5075 }, { "epoch": 1.7739130434782608, "grad_norm": 0.0014919223031029105, "learning_rate": 2.273254315897965e-05, "loss": 0.0002, "step": 5100 }, { "epoch": 1.7826086956521738, "grad_norm": 0.0019313708180561662, "learning_rate": 2.2571502190157177e-05, "loss": 0.0002, "step": 5125 }, { "epoch": 1.7913043478260868, "grad_norm": 0.0022281836718320847, "learning_rate": 2.2410461221334708e-05, "loss": 0.0002, "step": 5150 }, { "epoch": 1.8, "grad_norm": 0.0013652993366122246, "learning_rate": 2.224942025251224e-05, "loss": 0.0002, "step": 5175 }, { "epoch": 1.808695652173913, "grad_norm": 0.00228705327026546, "learning_rate": 2.208837928368977e-05, "loss": 0.0002, "step": 5200 }, { "epoch": 1.8173913043478263, "grad_norm": 0.001371930236928165, "learning_rate": 2.1927338314867303e-05, "loss": 0.0002, "step": 5225 }, { "epoch": 1.8260869565217392, "grad_norm": 0.0013612714828923345, "learning_rate": 2.1766297346044835e-05, "loss": 0.0002, "step": 5250 }, { "epoch": 1.8347826086956522, "grad_norm": 0.0018417349783703685, "learning_rate": 2.1605256377222367e-05, "loss": 0.0002, "step": 5275 }, { "epoch": 1.8434782608695652, "grad_norm": 0.0018068786012008786, "learning_rate": 2.1444215408399898e-05, "loss": 0.0002, "step": 5300 }, { "epoch": 1.8521739130434782, "grad_norm": 0.0039210072718560696, "learning_rate": 2.128317443957743e-05, "loss": 0.0002, "step": 5325 }, { "epoch": 1.8608695652173912, "grad_norm": 0.002227018354460597, "learning_rate": 2.112213347075496e-05, "loss": 0.0156, "step": 5350 }, { "epoch": 1.8695652173913042, "grad_norm": 0.001460268278606236, "learning_rate": 2.0961092501932493e-05, "loss": 0.0202, "step": 5375 }, { "epoch": 1.8782608695652174, "grad_norm": 0.001318955677561462, "learning_rate": 2.0800051533110025e-05, "loss": 0.0101, "step": 5400 }, { "epoch": 1.8869565217391304, "grad_norm": 0.0019615204073488712, "learning_rate": 2.0639010564287557e-05, "loss": 0.0002, "step": 5425 }, { "epoch": 1.8956521739130436, "grad_norm": 0.0014419632498174906, "learning_rate": 2.0477969595465088e-05, "loss": 0.0002, "step": 5450 }, { "epoch": 1.9043478260869566, "grad_norm": 0.00159283762332052, "learning_rate": 2.031692862664262e-05, "loss": 0.0002, "step": 5475 }, { "epoch": 1.9130434782608696, "grad_norm": 0.0014433007454499602, "learning_rate": 2.0155887657820148e-05, "loss": 0.0002, "step": 5500 }, { "epoch": 1.9217391304347826, "grad_norm": 0.0011873416369780898, "learning_rate": 1.9994846688997683e-05, "loss": 0.0002, "step": 5525 }, { "epoch": 1.9304347826086956, "grad_norm": 0.0015813339268788695, "learning_rate": 1.9833805720175215e-05, "loss": 0.0002, "step": 5550 }, { "epoch": 1.9391304347826086, "grad_norm": 0.001116643426939845, "learning_rate": 1.9672764751352747e-05, "loss": 0.0003, "step": 5575 }, { "epoch": 1.9478260869565216, "grad_norm": 0.0013602287508547306, "learning_rate": 1.9511723782530278e-05, "loss": 0.0001, "step": 5600 }, { "epoch": 1.9565217391304348, "grad_norm": 0.001626876532100141, "learning_rate": 1.9350682813707806e-05, "loss": 0.0002, "step": 5625 }, { "epoch": 1.9652173913043478, "grad_norm": 0.0015673220623284578, "learning_rate": 1.9189641844885338e-05, "loss": 0.0012, "step": 5650 }, { "epoch": 1.973913043478261, "grad_norm": 0.0014821627410128713, "learning_rate": 1.902860087606287e-05, "loss": 0.0001, "step": 5675 }, { "epoch": 1.982608695652174, "grad_norm": 0.02127527818083763, "learning_rate": 1.8867559907240405e-05, "loss": 0.0491, "step": 5700 }, { "epoch": 1.991304347826087, "grad_norm": 0.0015324688283726573, "learning_rate": 1.8706518938417936e-05, "loss": 0.0002, "step": 5725 }, { "epoch": 2.0, "grad_norm": 0.0018975608982145786, "learning_rate": 1.8545477969595465e-05, "loss": 0.0002, "step": 5750 }, { "epoch": 2.0, "eval_accuracy": 1.0, "eval_f1_macro": 1.0, "eval_f1_micro": 1.0, "eval_f1_weighted": 1.0, "eval_loss": 0.00012181053898530081, "eval_precision_macro": 1.0, "eval_precision_micro": 1.0, "eval_precision_weighted": 1.0, "eval_recall_macro": 1.0, "eval_recall_micro": 1.0, "eval_recall_weighted": 1.0, "eval_runtime": 5.0295, "eval_samples_per_second": 457.3, "eval_steps_per_second": 28.631, "step": 5750 } ], "logging_steps": 25, "max_steps": 8625, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3026347648512000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }