|
{ |
|
"best_metric": 0.00012181053898530081, |
|
"best_model_checkpoint": "autotrain-l9v4l-l3gs1/checkpoint-5750", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 5750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008695652173913044, |
|
"grad_norm": 8.19902229309082, |
|
"learning_rate": 1.3904982618771727e-06, |
|
"loss": 3.1857, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.017391304347826087, |
|
"grad_norm": 9.214441299438477, |
|
"learning_rate": 2.838933951332561e-06, |
|
"loss": 3.1368, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02608695652173913, |
|
"grad_norm": 9.295684814453125, |
|
"learning_rate": 4.229432213209733e-06, |
|
"loss": 3.0863, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.034782608695652174, |
|
"grad_norm": 15.308918952941895, |
|
"learning_rate": 5.677867902665122e-06, |
|
"loss": 2.9409, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.043478260869565216, |
|
"grad_norm": 14.55487060546875, |
|
"learning_rate": 7.12630359212051e-06, |
|
"loss": 2.8279, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05217391304347826, |
|
"grad_norm": 9.613283157348633, |
|
"learning_rate": 8.574739281575898e-06, |
|
"loss": 2.6909, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06086956521739131, |
|
"grad_norm": 11.267277717590332, |
|
"learning_rate": 1.0023174971031286e-05, |
|
"loss": 2.5096, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.06956521739130435, |
|
"grad_norm": 8.382554054260254, |
|
"learning_rate": 1.1471610660486674e-05, |
|
"loss": 2.2595, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0782608695652174, |
|
"grad_norm": 10.061994552612305, |
|
"learning_rate": 1.2920046349942064e-05, |
|
"loss": 2.003, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.08695652173913043, |
|
"grad_norm": 10.652369499206543, |
|
"learning_rate": 1.4368482039397451e-05, |
|
"loss": 1.8495, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09565217391304348, |
|
"grad_norm": 7.267949104309082, |
|
"learning_rate": 1.581691772885284e-05, |
|
"loss": 1.5879, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.10434782608695652, |
|
"grad_norm": 6.28985595703125, |
|
"learning_rate": 1.7265353418308228e-05, |
|
"loss": 1.3021, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11304347826086956, |
|
"grad_norm": 6.115077018737793, |
|
"learning_rate": 1.8713789107763614e-05, |
|
"loss": 1.0811, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.12173913043478261, |
|
"grad_norm": 9.993290901184082, |
|
"learning_rate": 2.0162224797219005e-05, |
|
"loss": 0.9644, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.13043478260869565, |
|
"grad_norm": 4.629942417144775, |
|
"learning_rate": 2.161066048667439e-05, |
|
"loss": 0.7733, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.1391304347826087, |
|
"grad_norm": 4.087889194488525, |
|
"learning_rate": 2.305909617612978e-05, |
|
"loss": 0.6025, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14782608695652175, |
|
"grad_norm": 6.934291362762451, |
|
"learning_rate": 2.450753186558517e-05, |
|
"loss": 0.4823, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.1565217391304348, |
|
"grad_norm": 8.899859428405762, |
|
"learning_rate": 2.5955967555040555e-05, |
|
"loss": 0.3408, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16521739130434782, |
|
"grad_norm": 5.459203243255615, |
|
"learning_rate": 2.7404403244495948e-05, |
|
"loss": 0.2558, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.17391304347826086, |
|
"grad_norm": 2.3503053188323975, |
|
"learning_rate": 2.885283893395133e-05, |
|
"loss": 0.2116, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1826086956521739, |
|
"grad_norm": 10.073762893676758, |
|
"learning_rate": 3.030127462340672e-05, |
|
"loss": 0.1117, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.19130434782608696, |
|
"grad_norm": 0.8817861080169678, |
|
"learning_rate": 3.1749710312862115e-05, |
|
"loss": 0.0779, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.28817218542099, |
|
"learning_rate": 3.31981460023175e-05, |
|
"loss": 0.0559, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.20869565217391303, |
|
"grad_norm": 0.3490276038646698, |
|
"learning_rate": 3.464658169177289e-05, |
|
"loss": 0.0372, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21739130434782608, |
|
"grad_norm": 0.18767504394054413, |
|
"learning_rate": 3.609501738122827e-05, |
|
"loss": 0.0273, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.22608695652173913, |
|
"grad_norm": 0.16744671761989594, |
|
"learning_rate": 3.754345307068367e-05, |
|
"loss": 0.0242, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.23478260869565218, |
|
"grad_norm": 0.18482975661754608, |
|
"learning_rate": 3.899188876013905e-05, |
|
"loss": 0.0186, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.24347826086956523, |
|
"grad_norm": 0.10977461189031601, |
|
"learning_rate": 4.044032444959444e-05, |
|
"loss": 0.0153, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.25217391304347825, |
|
"grad_norm": 0.09746221452951431, |
|
"learning_rate": 4.1888760139049825e-05, |
|
"loss": 0.0133, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.2608695652173913, |
|
"grad_norm": 0.14220073819160461, |
|
"learning_rate": 4.3337195828505215e-05, |
|
"loss": 0.0127, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.26956521739130435, |
|
"grad_norm": 0.08566069602966309, |
|
"learning_rate": 4.4785631517960605e-05, |
|
"loss": 0.0191, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.2782608695652174, |
|
"grad_norm": 0.09003018587827682, |
|
"learning_rate": 4.6234067207415995e-05, |
|
"loss": 0.0114, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.28695652173913044, |
|
"grad_norm": 0.07937142997980118, |
|
"learning_rate": 4.7682502896871385e-05, |
|
"loss": 0.0473, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.2956521739130435, |
|
"grad_norm": 0.06389954686164856, |
|
"learning_rate": 4.913093858632677e-05, |
|
"loss": 0.0077, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.30434782608695654, |
|
"grad_norm": 0.05928570777177811, |
|
"learning_rate": 4.9935583612471015e-05, |
|
"loss": 0.0073, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.3130434782608696, |
|
"grad_norm": 0.06266701221466064, |
|
"learning_rate": 4.977454264364855e-05, |
|
"loss": 0.0376, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3217391304347826, |
|
"grad_norm": 0.04980575665831566, |
|
"learning_rate": 4.961350167482608e-05, |
|
"loss": 0.0125, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.33043478260869563, |
|
"grad_norm": 0.3670000731945038, |
|
"learning_rate": 4.9452460706003613e-05, |
|
"loss": 0.0062, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3391304347826087, |
|
"grad_norm": 0.052209340035915375, |
|
"learning_rate": 4.929141973718114e-05, |
|
"loss": 0.0499, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.34782608695652173, |
|
"grad_norm": 0.04546426981687546, |
|
"learning_rate": 4.9136820407111575e-05, |
|
"loss": 0.0663, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3565217391304348, |
|
"grad_norm": 51.16396713256836, |
|
"learning_rate": 4.89757794382891e-05, |
|
"loss": 0.0721, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.3652173913043478, |
|
"grad_norm": 0.06547929346561432, |
|
"learning_rate": 4.881473846946664e-05, |
|
"loss": 0.0303, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3739130434782609, |
|
"grad_norm": 0.03635074943304062, |
|
"learning_rate": 4.8653697500644166e-05, |
|
"loss": 0.0405, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.3826086956521739, |
|
"grad_norm": 0.05633755028247833, |
|
"learning_rate": 4.8492656531821695e-05, |
|
"loss": 0.0113, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.391304347826087, |
|
"grad_norm": 0.038711484521627426, |
|
"learning_rate": 4.833161556299923e-05, |
|
"loss": 0.0446, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.03419716656208038, |
|
"learning_rate": 4.817057459417676e-05, |
|
"loss": 0.0065, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.40869565217391307, |
|
"grad_norm": 0.03439817205071449, |
|
"learning_rate": 4.800953362535429e-05, |
|
"loss": 0.045, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.41739130434782606, |
|
"grad_norm": 0.02928638830780983, |
|
"learning_rate": 4.784849265653182e-05, |
|
"loss": 0.0039, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4260869565217391, |
|
"grad_norm": 0.03179425746202469, |
|
"learning_rate": 4.7687451687709356e-05, |
|
"loss": 0.003, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.43478260869565216, |
|
"grad_norm": 0.022025400772690773, |
|
"learning_rate": 4.7526410718886885e-05, |
|
"loss": 0.0028, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.4434782608695652, |
|
"grad_norm": 0.027211090549826622, |
|
"learning_rate": 4.736536975006442e-05, |
|
"loss": 0.0025, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.45217391304347826, |
|
"grad_norm": 0.019419824704527855, |
|
"learning_rate": 4.7204328781241955e-05, |
|
"loss": 0.0025, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4608695652173913, |
|
"grad_norm": 0.019226014614105225, |
|
"learning_rate": 4.704328781241948e-05, |
|
"loss": 0.0024, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.46956521739130436, |
|
"grad_norm": 0.01781400479376316, |
|
"learning_rate": 4.688224684359701e-05, |
|
"loss": 0.0022, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.4782608695652174, |
|
"grad_norm": 0.0204134713858366, |
|
"learning_rate": 4.672120587477454e-05, |
|
"loss": 0.0025, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.48695652173913045, |
|
"grad_norm": 0.022227726876735687, |
|
"learning_rate": 4.6560164905952075e-05, |
|
"loss": 0.0032, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4956521739130435, |
|
"grad_norm": 0.1570238471031189, |
|
"learning_rate": 4.639912393712961e-05, |
|
"loss": 0.0506, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.5043478260869565, |
|
"grad_norm": 0.017615189775824547, |
|
"learning_rate": 4.623808296830714e-05, |
|
"loss": 0.0042, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5130434782608696, |
|
"grad_norm": 0.017343297600746155, |
|
"learning_rate": 4.607704199948467e-05, |
|
"loss": 0.0021, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.5217391304347826, |
|
"grad_norm": 0.018280992284417152, |
|
"learning_rate": 4.59160010306622e-05, |
|
"loss": 0.0019, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5304347826086957, |
|
"grad_norm": 0.017326297238469124, |
|
"learning_rate": 4.5754960061839736e-05, |
|
"loss": 0.0164, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.5391304347826087, |
|
"grad_norm": 0.014924581162631512, |
|
"learning_rate": 4.5593919093017265e-05, |
|
"loss": 0.0587, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5478260869565217, |
|
"grad_norm": 0.023350143805146217, |
|
"learning_rate": 4.54328781241948e-05, |
|
"loss": 0.0207, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.5565217391304348, |
|
"grad_norm": 0.014112471602857113, |
|
"learning_rate": 4.527183715537233e-05, |
|
"loss": 0.0038, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5652173913043478, |
|
"grad_norm": 0.016805477440357208, |
|
"learning_rate": 4.5110796186549856e-05, |
|
"loss": 0.0457, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.5739130434782609, |
|
"grad_norm": 0.018589412793517113, |
|
"learning_rate": 4.494975521772739e-05, |
|
"loss": 0.1005, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5826086956521739, |
|
"grad_norm": 0.021146375685930252, |
|
"learning_rate": 4.478871424890492e-05, |
|
"loss": 0.0019, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.591304347826087, |
|
"grad_norm": 0.014253458008170128, |
|
"learning_rate": 4.4627673280082455e-05, |
|
"loss": 0.0019, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.02895612083375454, |
|
"learning_rate": 4.446663231125999e-05, |
|
"loss": 0.0373, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.6086956521739131, |
|
"grad_norm": 0.012195841409265995, |
|
"learning_rate": 4.430559134243752e-05, |
|
"loss": 0.0018, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6173913043478261, |
|
"grad_norm": 0.04872201383113861, |
|
"learning_rate": 4.414455037361505e-05, |
|
"loss": 0.0344, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.6260869565217392, |
|
"grad_norm": 0.4130329489707947, |
|
"learning_rate": 4.398350940479258e-05, |
|
"loss": 0.0258, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6347826086956522, |
|
"grad_norm": 0.4404933750629425, |
|
"learning_rate": 4.3822468435970116e-05, |
|
"loss": 0.0089, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.6434782608695652, |
|
"grad_norm": 0.012819499708712101, |
|
"learning_rate": 4.3661427467147645e-05, |
|
"loss": 0.0178, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6521739130434783, |
|
"grad_norm": 0.020058365538716316, |
|
"learning_rate": 4.350038649832517e-05, |
|
"loss": 0.0015, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.6608695652173913, |
|
"grad_norm": 0.05424540862441063, |
|
"learning_rate": 4.333934552950271e-05, |
|
"loss": 0.0084, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6695652173913044, |
|
"grad_norm": 0.011423332616686821, |
|
"learning_rate": 4.3178304560680236e-05, |
|
"loss": 0.0014, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.6782608695652174, |
|
"grad_norm": 0.03126579150557518, |
|
"learning_rate": 4.301726359185777e-05, |
|
"loss": 0.0012, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6869565217391305, |
|
"grad_norm": 0.010747378692030907, |
|
"learning_rate": 4.28562226230353e-05, |
|
"loss": 0.0011, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.6956521739130435, |
|
"grad_norm": 0.010900999419391155, |
|
"learning_rate": 4.2695181654212834e-05, |
|
"loss": 0.001, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7043478260869566, |
|
"grad_norm": 0.10920006781816483, |
|
"learning_rate": 4.253414068539037e-05, |
|
"loss": 0.0371, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.7130434782608696, |
|
"grad_norm": 0.008641124702990055, |
|
"learning_rate": 4.2379541355320796e-05, |
|
"loss": 0.009, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.7217391304347827, |
|
"grad_norm": 0.00958116352558136, |
|
"learning_rate": 4.2218500386498324e-05, |
|
"loss": 0.0011, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.7304347826086957, |
|
"grad_norm": 0.007281237281858921, |
|
"learning_rate": 4.205745941767586e-05, |
|
"loss": 0.001, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7391304347826086, |
|
"grad_norm": 0.00872531346976757, |
|
"learning_rate": 4.1896418448853394e-05, |
|
"loss": 0.0285, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.7478260869565218, |
|
"grad_norm": 0.008862826973199844, |
|
"learning_rate": 4.173537748003092e-05, |
|
"loss": 0.0229, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7565217391304347, |
|
"grad_norm": 0.012311381287872791, |
|
"learning_rate": 4.157433651120846e-05, |
|
"loss": 0.0682, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.7652173913043478, |
|
"grad_norm": 0.009278792887926102, |
|
"learning_rate": 4.1413295542385986e-05, |
|
"loss": 0.0517, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7739130434782608, |
|
"grad_norm": 0.034189265221357346, |
|
"learning_rate": 4.1252254573563514e-05, |
|
"loss": 0.0803, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.782608695652174, |
|
"grad_norm": 0.043665286153554916, |
|
"learning_rate": 4.109121360474105e-05, |
|
"loss": 0.1145, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.7913043478260869, |
|
"grad_norm": 0.08209193497896194, |
|
"learning_rate": 4.093017263591858e-05, |
|
"loss": 0.0125, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.01089281216263771, |
|
"learning_rate": 4.076913166709611e-05, |
|
"loss": 0.0011, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.808695652173913, |
|
"grad_norm": 0.007122454699128866, |
|
"learning_rate": 4.060809069827364e-05, |
|
"loss": 0.0045, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.8173913043478261, |
|
"grad_norm": 0.009066189639270306, |
|
"learning_rate": 4.0447049729451176e-05, |
|
"loss": 0.0025, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.8260869565217391, |
|
"grad_norm": 0.00853909645229578, |
|
"learning_rate": 4.0286008760628704e-05, |
|
"loss": 0.0009, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.8347826086956521, |
|
"grad_norm": 0.007053891196846962, |
|
"learning_rate": 4.012496779180624e-05, |
|
"loss": 0.0008, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8434782608695652, |
|
"grad_norm": 0.007313120178878307, |
|
"learning_rate": 3.9963926822983774e-05, |
|
"loss": 0.0013, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.8521739130434782, |
|
"grad_norm": 0.009188150055706501, |
|
"learning_rate": 3.9802885854161296e-05, |
|
"loss": 0.0685, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8608695652173913, |
|
"grad_norm": 0.010708467103540897, |
|
"learning_rate": 3.964184488533883e-05, |
|
"loss": 0.0845, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 0.009248602204024792, |
|
"learning_rate": 3.948080391651636e-05, |
|
"loss": 0.0621, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8782608695652174, |
|
"grad_norm": 0.008605259470641613, |
|
"learning_rate": 3.9319762947693894e-05, |
|
"loss": 0.001, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.8869565217391304, |
|
"grad_norm": 0.00624378165230155, |
|
"learning_rate": 3.915872197887143e-05, |
|
"loss": 0.026, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.8956521739130435, |
|
"grad_norm": 0.008726169355213642, |
|
"learning_rate": 3.899768101004896e-05, |
|
"loss": 0.0344, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.9043478260869565, |
|
"grad_norm": 0.007860297337174416, |
|
"learning_rate": 3.883664004122649e-05, |
|
"loss": 0.0321, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.9130434782608695, |
|
"grad_norm": 0.006469408981502056, |
|
"learning_rate": 3.867559907240402e-05, |
|
"loss": 0.0456, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.9217391304347826, |
|
"grad_norm": 0.025073576718568802, |
|
"learning_rate": 3.8514558103581556e-05, |
|
"loss": 0.0607, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.9304347826086956, |
|
"grad_norm": 0.005454166326671839, |
|
"learning_rate": 3.8353517134759084e-05, |
|
"loss": 0.0007, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.9391304347826087, |
|
"grad_norm": 0.005138472653925419, |
|
"learning_rate": 3.819247616593661e-05, |
|
"loss": 0.0008, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.9478260869565217, |
|
"grad_norm": 0.04282465949654579, |
|
"learning_rate": 3.803143519711415e-05, |
|
"loss": 0.0007, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.9565217391304348, |
|
"grad_norm": 0.00715728010982275, |
|
"learning_rate": 3.7870394228291676e-05, |
|
"loss": 0.0387, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.9652173913043478, |
|
"grad_norm": 0.005337136331945658, |
|
"learning_rate": 3.770935325946921e-05, |
|
"loss": 0.0007, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.9739130434782609, |
|
"grad_norm": 0.005603776779025793, |
|
"learning_rate": 3.754831229064674e-05, |
|
"loss": 0.0006, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9826086956521739, |
|
"grad_norm": 0.005324610508978367, |
|
"learning_rate": 3.7387271321824274e-05, |
|
"loss": 0.0392, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.991304347826087, |
|
"grad_norm": 0.004979921039193869, |
|
"learning_rate": 3.722623035300181e-05, |
|
"loss": 0.0204, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.0061447713524103165, |
|
"learning_rate": 3.706518938417934e-05, |
|
"loss": 0.0007, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 1.0, |
|
"eval_f1_macro": 1.0, |
|
"eval_f1_micro": 1.0, |
|
"eval_f1_weighted": 1.0, |
|
"eval_loss": 0.0005410231533460319, |
|
"eval_precision_macro": 1.0, |
|
"eval_precision_micro": 1.0, |
|
"eval_precision_weighted": 1.0, |
|
"eval_recall_macro": 1.0, |
|
"eval_recall_micro": 1.0, |
|
"eval_recall_weighted": 1.0, |
|
"eval_runtime": 5.0042, |
|
"eval_samples_per_second": 459.614, |
|
"eval_steps_per_second": 28.776, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.008695652173913, |
|
"grad_norm": 0.005072788801044226, |
|
"learning_rate": 3.690414841535687e-05, |
|
"loss": 0.0014, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.017391304347826, |
|
"grad_norm": 0.004767073784023523, |
|
"learning_rate": 3.67431074465344e-05, |
|
"loss": 0.0039, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.0260869565217392, |
|
"grad_norm": 0.019580593332648277, |
|
"learning_rate": 3.658206647771193e-05, |
|
"loss": 0.0129, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.0347826086956522, |
|
"grad_norm": 0.004164039622992277, |
|
"learning_rate": 3.6421025508889464e-05, |
|
"loss": 0.0005, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.0434782608695652, |
|
"grad_norm": 0.006551030091941357, |
|
"learning_rate": 3.625998454006699e-05, |
|
"loss": 0.0005, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0521739130434782, |
|
"grad_norm": 0.004048625007271767, |
|
"learning_rate": 3.609894357124453e-05, |
|
"loss": 0.0005, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.0608695652173914, |
|
"grad_norm": 0.003683075774461031, |
|
"learning_rate": 3.5937902602422056e-05, |
|
"loss": 0.0005, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.0695652173913044, |
|
"grad_norm": 0.003764552529901266, |
|
"learning_rate": 3.577686163359959e-05, |
|
"loss": 0.0005, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.0782608695652174, |
|
"grad_norm": 0.014071044512093067, |
|
"learning_rate": 3.561582066477712e-05, |
|
"loss": 0.0005, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 0.004224707838147879, |
|
"learning_rate": 3.5454779695954654e-05, |
|
"loss": 0.0004, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.0956521739130434, |
|
"grad_norm": 0.007790651638060808, |
|
"learning_rate": 3.529373872713219e-05, |
|
"loss": 0.0004, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.1043478260869566, |
|
"grad_norm": 0.0033269149716943502, |
|
"learning_rate": 3.513269775830972e-05, |
|
"loss": 0.0004, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.1130434782608696, |
|
"grad_norm": 0.003678582375869155, |
|
"learning_rate": 3.4971656789487245e-05, |
|
"loss": 0.0004, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.1217391304347826, |
|
"grad_norm": 0.415933758020401, |
|
"learning_rate": 3.4810615820664774e-05, |
|
"loss": 0.0471, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.1304347826086956, |
|
"grad_norm": 0.003935901448130608, |
|
"learning_rate": 3.464957485184231e-05, |
|
"loss": 0.0008, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.1391304347826088, |
|
"grad_norm": 0.005461950786411762, |
|
"learning_rate": 3.4488533883019844e-05, |
|
"loss": 0.0868, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.1478260869565218, |
|
"grad_norm": 0.00403949897736311, |
|
"learning_rate": 3.432749291419737e-05, |
|
"loss": 0.0314, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.1565217391304348, |
|
"grad_norm": 0.0038034759927541018, |
|
"learning_rate": 3.416645194537491e-05, |
|
"loss": 0.0006, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.1652173913043478, |
|
"grad_norm": 0.003490498522296548, |
|
"learning_rate": 3.4005410976552435e-05, |
|
"loss": 0.0004, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.1739130434782608, |
|
"grad_norm": 0.002800930989906192, |
|
"learning_rate": 3.384437000772997e-05, |
|
"loss": 0.0004, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.182608695652174, |
|
"grad_norm": 0.0032401601783931255, |
|
"learning_rate": 3.36833290389075e-05, |
|
"loss": 0.0004, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.191304347826087, |
|
"grad_norm": 0.008039023727178574, |
|
"learning_rate": 3.3522288070085034e-05, |
|
"loss": 0.1032, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.0048836818896234035, |
|
"learning_rate": 3.336124710126256e-05, |
|
"loss": 0.0007, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.208695652173913, |
|
"grad_norm": 0.01012449711561203, |
|
"learning_rate": 3.320020613244009e-05, |
|
"loss": 0.0223, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.2173913043478262, |
|
"grad_norm": 0.005898616276681423, |
|
"learning_rate": 3.3039165163617625e-05, |
|
"loss": 0.0143, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2260869565217392, |
|
"grad_norm": 7.748152256011963, |
|
"learning_rate": 3.2878124194795154e-05, |
|
"loss": 0.0928, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.2347826086956522, |
|
"grad_norm": 0.0053124018013477325, |
|
"learning_rate": 3.271708322597269e-05, |
|
"loss": 0.0045, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.2434782608695651, |
|
"grad_norm": 0.009419775567948818, |
|
"learning_rate": 3.2556042257150224e-05, |
|
"loss": 0.0008, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.2521739130434781, |
|
"grad_norm": 0.0048626321367919445, |
|
"learning_rate": 3.239500128832775e-05, |
|
"loss": 0.0005, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.2608695652173914, |
|
"grad_norm": 0.004047353286296129, |
|
"learning_rate": 3.223396031950529e-05, |
|
"loss": 0.0004, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.2695652173913043, |
|
"grad_norm": 0.0036859684623777866, |
|
"learning_rate": 3.2072919350682815e-05, |
|
"loss": 0.0484, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.2782608695652173, |
|
"grad_norm": 0.007856795564293861, |
|
"learning_rate": 3.191187838186035e-05, |
|
"loss": 0.022, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.2869565217391306, |
|
"grad_norm": 0.0035763189662247896, |
|
"learning_rate": 3.175083741303788e-05, |
|
"loss": 0.0004, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.2956521739130435, |
|
"grad_norm": 0.003156292950734496, |
|
"learning_rate": 3.158979644421541e-05, |
|
"loss": 0.0004, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.3043478260869565, |
|
"grad_norm": 0.003629323560744524, |
|
"learning_rate": 3.142875547539294e-05, |
|
"loss": 0.0004, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.3130434782608695, |
|
"grad_norm": 68.89725494384766, |
|
"learning_rate": 3.126771450657047e-05, |
|
"loss": 0.0249, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.3217391304347825, |
|
"grad_norm": 0.004872568417340517, |
|
"learning_rate": 3.1106673537748005e-05, |
|
"loss": 0.106, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.3304347826086955, |
|
"grad_norm": 0.0038311562966555357, |
|
"learning_rate": 3.0945632568925534e-05, |
|
"loss": 0.0008, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.3391304347826087, |
|
"grad_norm": 0.01143663190305233, |
|
"learning_rate": 3.078459160010307e-05, |
|
"loss": 0.0433, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.3478260869565217, |
|
"grad_norm": 0.00519527355208993, |
|
"learning_rate": 3.0623550631280604e-05, |
|
"loss": 0.0004, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.3565217391304347, |
|
"grad_norm": 0.0031176016200333834, |
|
"learning_rate": 3.0462509662458132e-05, |
|
"loss": 0.0005, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.365217391304348, |
|
"grad_norm": 0.0033473202493041754, |
|
"learning_rate": 3.0301468693635664e-05, |
|
"loss": 0.0003, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.373913043478261, |
|
"grad_norm": 0.0031863965559750795, |
|
"learning_rate": 3.0140427724813192e-05, |
|
"loss": 0.0003, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.382608695652174, |
|
"grad_norm": 0.002523756120353937, |
|
"learning_rate": 2.9979386755990724e-05, |
|
"loss": 0.0318, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.391304347826087, |
|
"grad_norm": 0.0034575534518808126, |
|
"learning_rate": 2.9818345787168255e-05, |
|
"loss": 0.0003, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.004556054249405861, |
|
"learning_rate": 2.9657304818345787e-05, |
|
"loss": 0.0003, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.4086956521739131, |
|
"grad_norm": 0.009268323890864849, |
|
"learning_rate": 2.949626384952332e-05, |
|
"loss": 0.0296, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.4173913043478261, |
|
"grad_norm": 0.01629672758281231, |
|
"learning_rate": 2.9335222880700854e-05, |
|
"loss": 0.0021, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.4260869565217391, |
|
"grad_norm": 0.0037628798745572567, |
|
"learning_rate": 2.9174181911878385e-05, |
|
"loss": 0.0015, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.434782608695652, |
|
"grad_norm": 0.004670215770602226, |
|
"learning_rate": 2.9013140943055917e-05, |
|
"loss": 0.0329, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.4434782608695653, |
|
"grad_norm": 0.003128861775621772, |
|
"learning_rate": 2.885209997423345e-05, |
|
"loss": 0.0361, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.4521739130434783, |
|
"grad_norm": 0.0030446811579167843, |
|
"learning_rate": 2.869105900541098e-05, |
|
"loss": 0.0003, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.4608695652173913, |
|
"grad_norm": 0.0032489860896021128, |
|
"learning_rate": 2.853001803658851e-05, |
|
"loss": 0.0355, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.4695652173913043, |
|
"grad_norm": 0.002284318907186389, |
|
"learning_rate": 2.836897706776604e-05, |
|
"loss": 0.0276, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.4782608695652173, |
|
"grad_norm": 0.005029418971389532, |
|
"learning_rate": 2.8207936098943572e-05, |
|
"loss": 0.0617, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.4869565217391305, |
|
"grad_norm": 0.0027885879389941692, |
|
"learning_rate": 2.8046895130121104e-05, |
|
"loss": 0.0004, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.4956521739130435, |
|
"grad_norm": 0.0034100012853741646, |
|
"learning_rate": 2.7885854161298635e-05, |
|
"loss": 0.0003, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.5043478260869565, |
|
"grad_norm": 0.0024735534097999334, |
|
"learning_rate": 2.7724813192476167e-05, |
|
"loss": 0.0003, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.5130434782608697, |
|
"grad_norm": 0.0018064508913084865, |
|
"learning_rate": 2.75637722236537e-05, |
|
"loss": 0.0003, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.5217391304347827, |
|
"grad_norm": 0.001821321900933981, |
|
"learning_rate": 2.7402731254831234e-05, |
|
"loss": 0.0003, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.5304347826086957, |
|
"grad_norm": 0.002236420288681984, |
|
"learning_rate": 2.7241690286008765e-05, |
|
"loss": 0.0003, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.5391304347826087, |
|
"grad_norm": 0.0018212420400232077, |
|
"learning_rate": 2.708064931718629e-05, |
|
"loss": 0.0717, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.5478260869565217, |
|
"grad_norm": 0.0029810129199177027, |
|
"learning_rate": 2.6919608348363822e-05, |
|
"loss": 0.0003, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.5565217391304347, |
|
"grad_norm": 0.002055574208498001, |
|
"learning_rate": 2.6758567379541353e-05, |
|
"loss": 0.0003, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.5652173913043477, |
|
"grad_norm": 0.002401245990768075, |
|
"learning_rate": 2.659752641071889e-05, |
|
"loss": 0.0471, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.5739130434782609, |
|
"grad_norm": 0.0019228613236919045, |
|
"learning_rate": 2.643648544189642e-05, |
|
"loss": 0.004, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.5826086956521739, |
|
"grad_norm": 0.01082328986376524, |
|
"learning_rate": 2.6275444473073952e-05, |
|
"loss": 0.0004, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.591304347826087, |
|
"grad_norm": 0.004519768990576267, |
|
"learning_rate": 2.6114403504251483e-05, |
|
"loss": 0.0003, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.004080561455339193, |
|
"learning_rate": 2.5953362535429015e-05, |
|
"loss": 0.0003, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.608695652173913, |
|
"grad_norm": 0.0020184165332466364, |
|
"learning_rate": 2.5792321566606547e-05, |
|
"loss": 0.0003, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.617391304347826, |
|
"grad_norm": 0.00274313660338521, |
|
"learning_rate": 2.563128059778408e-05, |
|
"loss": 0.0002, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.626086956521739, |
|
"grad_norm": 0.0025938046164810658, |
|
"learning_rate": 2.5470239628961607e-05, |
|
"loss": 0.0002, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.634782608695652, |
|
"grad_norm": 0.002224980155006051, |
|
"learning_rate": 2.530919866013914e-05, |
|
"loss": 0.0002, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.643478260869565, |
|
"grad_norm": 0.0018848755862563848, |
|
"learning_rate": 2.514815769131667e-05, |
|
"loss": 0.0002, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.6521739130434783, |
|
"grad_norm": 0.0021186743397265673, |
|
"learning_rate": 2.4987116722494202e-05, |
|
"loss": 0.012, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.6608695652173913, |
|
"grad_norm": 0.002839816967025399, |
|
"learning_rate": 2.4826075753671733e-05, |
|
"loss": 0.0732, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.6695652173913045, |
|
"grad_norm": 0.0023581942077726126, |
|
"learning_rate": 2.466503478484927e-05, |
|
"loss": 0.0441, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.6782608695652175, |
|
"grad_norm": 0.0025644400157034397, |
|
"learning_rate": 2.45039938160268e-05, |
|
"loss": 0.0002, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.6869565217391305, |
|
"grad_norm": 0.0018391634803265333, |
|
"learning_rate": 2.4342952847204332e-05, |
|
"loss": 0.0003, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.6956521739130435, |
|
"grad_norm": 0.002724371151998639, |
|
"learning_rate": 2.418191187838186e-05, |
|
"loss": 0.0002, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.7043478260869565, |
|
"grad_norm": 0.002339495113119483, |
|
"learning_rate": 2.402087090955939e-05, |
|
"loss": 0.0002, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.7130434782608694, |
|
"grad_norm": 0.0015120344469323754, |
|
"learning_rate": 2.3859829940736923e-05, |
|
"loss": 0.0059, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.7217391304347827, |
|
"grad_norm": 0.0019244271097704768, |
|
"learning_rate": 2.369878897191446e-05, |
|
"loss": 0.0488, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.7304347826086957, |
|
"grad_norm": 0.006782891228795052, |
|
"learning_rate": 2.353774800309199e-05, |
|
"loss": 0.0002, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 0.006890356075018644, |
|
"learning_rate": 2.337670703426952e-05, |
|
"loss": 0.0002, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.7478260869565219, |
|
"grad_norm": 0.0016472332645207644, |
|
"learning_rate": 2.321566606544705e-05, |
|
"loss": 0.0002, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 1.7565217391304349, |
|
"grad_norm": 0.002031004289165139, |
|
"learning_rate": 2.305462509662458e-05, |
|
"loss": 0.0002, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.7652173913043478, |
|
"grad_norm": 0.020191390067338943, |
|
"learning_rate": 2.2893584127802113e-05, |
|
"loss": 0.0006, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 1.7739130434782608, |
|
"grad_norm": 0.0014919223031029105, |
|
"learning_rate": 2.273254315897965e-05, |
|
"loss": 0.0002, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.7826086956521738, |
|
"grad_norm": 0.0019313708180561662, |
|
"learning_rate": 2.2571502190157177e-05, |
|
"loss": 0.0002, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 1.7913043478260868, |
|
"grad_norm": 0.0022281836718320847, |
|
"learning_rate": 2.2410461221334708e-05, |
|
"loss": 0.0002, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.0013652993366122246, |
|
"learning_rate": 2.224942025251224e-05, |
|
"loss": 0.0002, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 1.808695652173913, |
|
"grad_norm": 0.00228705327026546, |
|
"learning_rate": 2.208837928368977e-05, |
|
"loss": 0.0002, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.8173913043478263, |
|
"grad_norm": 0.001371930236928165, |
|
"learning_rate": 2.1927338314867303e-05, |
|
"loss": 0.0002, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 1.8260869565217392, |
|
"grad_norm": 0.0013612714828923345, |
|
"learning_rate": 2.1766297346044835e-05, |
|
"loss": 0.0002, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.8347826086956522, |
|
"grad_norm": 0.0018417349783703685, |
|
"learning_rate": 2.1605256377222367e-05, |
|
"loss": 0.0002, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 1.8434782608695652, |
|
"grad_norm": 0.0018068786012008786, |
|
"learning_rate": 2.1444215408399898e-05, |
|
"loss": 0.0002, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.8521739130434782, |
|
"grad_norm": 0.0039210072718560696, |
|
"learning_rate": 2.128317443957743e-05, |
|
"loss": 0.0002, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 1.8608695652173912, |
|
"grad_norm": 0.002227018354460597, |
|
"learning_rate": 2.112213347075496e-05, |
|
"loss": 0.0156, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.8695652173913042, |
|
"grad_norm": 0.001460268278606236, |
|
"learning_rate": 2.0961092501932493e-05, |
|
"loss": 0.0202, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 1.8782608695652174, |
|
"grad_norm": 0.001318955677561462, |
|
"learning_rate": 2.0800051533110025e-05, |
|
"loss": 0.0101, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.8869565217391304, |
|
"grad_norm": 0.0019615204073488712, |
|
"learning_rate": 2.0639010564287557e-05, |
|
"loss": 0.0002, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 1.8956521739130436, |
|
"grad_norm": 0.0014419632498174906, |
|
"learning_rate": 2.0477969595465088e-05, |
|
"loss": 0.0002, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.9043478260869566, |
|
"grad_norm": 0.00159283762332052, |
|
"learning_rate": 2.031692862664262e-05, |
|
"loss": 0.0002, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 1.9130434782608696, |
|
"grad_norm": 0.0014433007454499602, |
|
"learning_rate": 2.0155887657820148e-05, |
|
"loss": 0.0002, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.9217391304347826, |
|
"grad_norm": 0.0011873416369780898, |
|
"learning_rate": 1.9994846688997683e-05, |
|
"loss": 0.0002, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 1.9304347826086956, |
|
"grad_norm": 0.0015813339268788695, |
|
"learning_rate": 1.9833805720175215e-05, |
|
"loss": 0.0002, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.9391304347826086, |
|
"grad_norm": 0.001116643426939845, |
|
"learning_rate": 1.9672764751352747e-05, |
|
"loss": 0.0003, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 1.9478260869565216, |
|
"grad_norm": 0.0013602287508547306, |
|
"learning_rate": 1.9511723782530278e-05, |
|
"loss": 0.0001, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.9565217391304348, |
|
"grad_norm": 0.001626876532100141, |
|
"learning_rate": 1.9350682813707806e-05, |
|
"loss": 0.0002, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 1.9652173913043478, |
|
"grad_norm": 0.0015673220623284578, |
|
"learning_rate": 1.9189641844885338e-05, |
|
"loss": 0.0012, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.973913043478261, |
|
"grad_norm": 0.0014821627410128713, |
|
"learning_rate": 1.902860087606287e-05, |
|
"loss": 0.0001, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 1.982608695652174, |
|
"grad_norm": 0.02127527818083763, |
|
"learning_rate": 1.8867559907240405e-05, |
|
"loss": 0.0491, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.991304347826087, |
|
"grad_norm": 0.0015324688283726573, |
|
"learning_rate": 1.8706518938417936e-05, |
|
"loss": 0.0002, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.0018975608982145786, |
|
"learning_rate": 1.8545477969595465e-05, |
|
"loss": 0.0002, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 1.0, |
|
"eval_f1_macro": 1.0, |
|
"eval_f1_micro": 1.0, |
|
"eval_f1_weighted": 1.0, |
|
"eval_loss": 0.00012181053898530081, |
|
"eval_precision_macro": 1.0, |
|
"eval_precision_micro": 1.0, |
|
"eval_precision_weighted": 1.0, |
|
"eval_recall_macro": 1.0, |
|
"eval_recall_micro": 1.0, |
|
"eval_recall_weighted": 1.0, |
|
"eval_runtime": 5.0295, |
|
"eval_samples_per_second": 457.3, |
|
"eval_steps_per_second": 28.631, |
|
"step": 5750 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 8625, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3026347648512000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|