semantic_tagging / checkpoint-5750 /trainer_state.json
yassirusa's picture
Upload folder using huggingface_hub
8ae79b0 verified
{
"best_metric": 0.00012181053898530081,
"best_model_checkpoint": "autotrain-l9v4l-l3gs1/checkpoint-5750",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 5750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.008695652173913044,
"grad_norm": 8.19902229309082,
"learning_rate": 1.3904982618771727e-06,
"loss": 3.1857,
"step": 25
},
{
"epoch": 0.017391304347826087,
"grad_norm": 9.214441299438477,
"learning_rate": 2.838933951332561e-06,
"loss": 3.1368,
"step": 50
},
{
"epoch": 0.02608695652173913,
"grad_norm": 9.295684814453125,
"learning_rate": 4.229432213209733e-06,
"loss": 3.0863,
"step": 75
},
{
"epoch": 0.034782608695652174,
"grad_norm": 15.308918952941895,
"learning_rate": 5.677867902665122e-06,
"loss": 2.9409,
"step": 100
},
{
"epoch": 0.043478260869565216,
"grad_norm": 14.55487060546875,
"learning_rate": 7.12630359212051e-06,
"loss": 2.8279,
"step": 125
},
{
"epoch": 0.05217391304347826,
"grad_norm": 9.613283157348633,
"learning_rate": 8.574739281575898e-06,
"loss": 2.6909,
"step": 150
},
{
"epoch": 0.06086956521739131,
"grad_norm": 11.267277717590332,
"learning_rate": 1.0023174971031286e-05,
"loss": 2.5096,
"step": 175
},
{
"epoch": 0.06956521739130435,
"grad_norm": 8.382554054260254,
"learning_rate": 1.1471610660486674e-05,
"loss": 2.2595,
"step": 200
},
{
"epoch": 0.0782608695652174,
"grad_norm": 10.061994552612305,
"learning_rate": 1.2920046349942064e-05,
"loss": 2.003,
"step": 225
},
{
"epoch": 0.08695652173913043,
"grad_norm": 10.652369499206543,
"learning_rate": 1.4368482039397451e-05,
"loss": 1.8495,
"step": 250
},
{
"epoch": 0.09565217391304348,
"grad_norm": 7.267949104309082,
"learning_rate": 1.581691772885284e-05,
"loss": 1.5879,
"step": 275
},
{
"epoch": 0.10434782608695652,
"grad_norm": 6.28985595703125,
"learning_rate": 1.7265353418308228e-05,
"loss": 1.3021,
"step": 300
},
{
"epoch": 0.11304347826086956,
"grad_norm": 6.115077018737793,
"learning_rate": 1.8713789107763614e-05,
"loss": 1.0811,
"step": 325
},
{
"epoch": 0.12173913043478261,
"grad_norm": 9.993290901184082,
"learning_rate": 2.0162224797219005e-05,
"loss": 0.9644,
"step": 350
},
{
"epoch": 0.13043478260869565,
"grad_norm": 4.629942417144775,
"learning_rate": 2.161066048667439e-05,
"loss": 0.7733,
"step": 375
},
{
"epoch": 0.1391304347826087,
"grad_norm": 4.087889194488525,
"learning_rate": 2.305909617612978e-05,
"loss": 0.6025,
"step": 400
},
{
"epoch": 0.14782608695652175,
"grad_norm": 6.934291362762451,
"learning_rate": 2.450753186558517e-05,
"loss": 0.4823,
"step": 425
},
{
"epoch": 0.1565217391304348,
"grad_norm": 8.899859428405762,
"learning_rate": 2.5955967555040555e-05,
"loss": 0.3408,
"step": 450
},
{
"epoch": 0.16521739130434782,
"grad_norm": 5.459203243255615,
"learning_rate": 2.7404403244495948e-05,
"loss": 0.2558,
"step": 475
},
{
"epoch": 0.17391304347826086,
"grad_norm": 2.3503053188323975,
"learning_rate": 2.885283893395133e-05,
"loss": 0.2116,
"step": 500
},
{
"epoch": 0.1826086956521739,
"grad_norm": 10.073762893676758,
"learning_rate": 3.030127462340672e-05,
"loss": 0.1117,
"step": 525
},
{
"epoch": 0.19130434782608696,
"grad_norm": 0.8817861080169678,
"learning_rate": 3.1749710312862115e-05,
"loss": 0.0779,
"step": 550
},
{
"epoch": 0.2,
"grad_norm": 0.28817218542099,
"learning_rate": 3.31981460023175e-05,
"loss": 0.0559,
"step": 575
},
{
"epoch": 0.20869565217391303,
"grad_norm": 0.3490276038646698,
"learning_rate": 3.464658169177289e-05,
"loss": 0.0372,
"step": 600
},
{
"epoch": 0.21739130434782608,
"grad_norm": 0.18767504394054413,
"learning_rate": 3.609501738122827e-05,
"loss": 0.0273,
"step": 625
},
{
"epoch": 0.22608695652173913,
"grad_norm": 0.16744671761989594,
"learning_rate": 3.754345307068367e-05,
"loss": 0.0242,
"step": 650
},
{
"epoch": 0.23478260869565218,
"grad_norm": 0.18482975661754608,
"learning_rate": 3.899188876013905e-05,
"loss": 0.0186,
"step": 675
},
{
"epoch": 0.24347826086956523,
"grad_norm": 0.10977461189031601,
"learning_rate": 4.044032444959444e-05,
"loss": 0.0153,
"step": 700
},
{
"epoch": 0.25217391304347825,
"grad_norm": 0.09746221452951431,
"learning_rate": 4.1888760139049825e-05,
"loss": 0.0133,
"step": 725
},
{
"epoch": 0.2608695652173913,
"grad_norm": 0.14220073819160461,
"learning_rate": 4.3337195828505215e-05,
"loss": 0.0127,
"step": 750
},
{
"epoch": 0.26956521739130435,
"grad_norm": 0.08566069602966309,
"learning_rate": 4.4785631517960605e-05,
"loss": 0.0191,
"step": 775
},
{
"epoch": 0.2782608695652174,
"grad_norm": 0.09003018587827682,
"learning_rate": 4.6234067207415995e-05,
"loss": 0.0114,
"step": 800
},
{
"epoch": 0.28695652173913044,
"grad_norm": 0.07937142997980118,
"learning_rate": 4.7682502896871385e-05,
"loss": 0.0473,
"step": 825
},
{
"epoch": 0.2956521739130435,
"grad_norm": 0.06389954686164856,
"learning_rate": 4.913093858632677e-05,
"loss": 0.0077,
"step": 850
},
{
"epoch": 0.30434782608695654,
"grad_norm": 0.05928570777177811,
"learning_rate": 4.9935583612471015e-05,
"loss": 0.0073,
"step": 875
},
{
"epoch": 0.3130434782608696,
"grad_norm": 0.06266701221466064,
"learning_rate": 4.977454264364855e-05,
"loss": 0.0376,
"step": 900
},
{
"epoch": 0.3217391304347826,
"grad_norm": 0.04980575665831566,
"learning_rate": 4.961350167482608e-05,
"loss": 0.0125,
"step": 925
},
{
"epoch": 0.33043478260869563,
"grad_norm": 0.3670000731945038,
"learning_rate": 4.9452460706003613e-05,
"loss": 0.0062,
"step": 950
},
{
"epoch": 0.3391304347826087,
"grad_norm": 0.052209340035915375,
"learning_rate": 4.929141973718114e-05,
"loss": 0.0499,
"step": 975
},
{
"epoch": 0.34782608695652173,
"grad_norm": 0.04546426981687546,
"learning_rate": 4.9136820407111575e-05,
"loss": 0.0663,
"step": 1000
},
{
"epoch": 0.3565217391304348,
"grad_norm": 51.16396713256836,
"learning_rate": 4.89757794382891e-05,
"loss": 0.0721,
"step": 1025
},
{
"epoch": 0.3652173913043478,
"grad_norm": 0.06547929346561432,
"learning_rate": 4.881473846946664e-05,
"loss": 0.0303,
"step": 1050
},
{
"epoch": 0.3739130434782609,
"grad_norm": 0.03635074943304062,
"learning_rate": 4.8653697500644166e-05,
"loss": 0.0405,
"step": 1075
},
{
"epoch": 0.3826086956521739,
"grad_norm": 0.05633755028247833,
"learning_rate": 4.8492656531821695e-05,
"loss": 0.0113,
"step": 1100
},
{
"epoch": 0.391304347826087,
"grad_norm": 0.038711484521627426,
"learning_rate": 4.833161556299923e-05,
"loss": 0.0446,
"step": 1125
},
{
"epoch": 0.4,
"grad_norm": 0.03419716656208038,
"learning_rate": 4.817057459417676e-05,
"loss": 0.0065,
"step": 1150
},
{
"epoch": 0.40869565217391307,
"grad_norm": 0.03439817205071449,
"learning_rate": 4.800953362535429e-05,
"loss": 0.045,
"step": 1175
},
{
"epoch": 0.41739130434782606,
"grad_norm": 0.02928638830780983,
"learning_rate": 4.784849265653182e-05,
"loss": 0.0039,
"step": 1200
},
{
"epoch": 0.4260869565217391,
"grad_norm": 0.03179425746202469,
"learning_rate": 4.7687451687709356e-05,
"loss": 0.003,
"step": 1225
},
{
"epoch": 0.43478260869565216,
"grad_norm": 0.022025400772690773,
"learning_rate": 4.7526410718886885e-05,
"loss": 0.0028,
"step": 1250
},
{
"epoch": 0.4434782608695652,
"grad_norm": 0.027211090549826622,
"learning_rate": 4.736536975006442e-05,
"loss": 0.0025,
"step": 1275
},
{
"epoch": 0.45217391304347826,
"grad_norm": 0.019419824704527855,
"learning_rate": 4.7204328781241955e-05,
"loss": 0.0025,
"step": 1300
},
{
"epoch": 0.4608695652173913,
"grad_norm": 0.019226014614105225,
"learning_rate": 4.704328781241948e-05,
"loss": 0.0024,
"step": 1325
},
{
"epoch": 0.46956521739130436,
"grad_norm": 0.01781400479376316,
"learning_rate": 4.688224684359701e-05,
"loss": 0.0022,
"step": 1350
},
{
"epoch": 0.4782608695652174,
"grad_norm": 0.0204134713858366,
"learning_rate": 4.672120587477454e-05,
"loss": 0.0025,
"step": 1375
},
{
"epoch": 0.48695652173913045,
"grad_norm": 0.022227726876735687,
"learning_rate": 4.6560164905952075e-05,
"loss": 0.0032,
"step": 1400
},
{
"epoch": 0.4956521739130435,
"grad_norm": 0.1570238471031189,
"learning_rate": 4.639912393712961e-05,
"loss": 0.0506,
"step": 1425
},
{
"epoch": 0.5043478260869565,
"grad_norm": 0.017615189775824547,
"learning_rate": 4.623808296830714e-05,
"loss": 0.0042,
"step": 1450
},
{
"epoch": 0.5130434782608696,
"grad_norm": 0.017343297600746155,
"learning_rate": 4.607704199948467e-05,
"loss": 0.0021,
"step": 1475
},
{
"epoch": 0.5217391304347826,
"grad_norm": 0.018280992284417152,
"learning_rate": 4.59160010306622e-05,
"loss": 0.0019,
"step": 1500
},
{
"epoch": 0.5304347826086957,
"grad_norm": 0.017326297238469124,
"learning_rate": 4.5754960061839736e-05,
"loss": 0.0164,
"step": 1525
},
{
"epoch": 0.5391304347826087,
"grad_norm": 0.014924581162631512,
"learning_rate": 4.5593919093017265e-05,
"loss": 0.0587,
"step": 1550
},
{
"epoch": 0.5478260869565217,
"grad_norm": 0.023350143805146217,
"learning_rate": 4.54328781241948e-05,
"loss": 0.0207,
"step": 1575
},
{
"epoch": 0.5565217391304348,
"grad_norm": 0.014112471602857113,
"learning_rate": 4.527183715537233e-05,
"loss": 0.0038,
"step": 1600
},
{
"epoch": 0.5652173913043478,
"grad_norm": 0.016805477440357208,
"learning_rate": 4.5110796186549856e-05,
"loss": 0.0457,
"step": 1625
},
{
"epoch": 0.5739130434782609,
"grad_norm": 0.018589412793517113,
"learning_rate": 4.494975521772739e-05,
"loss": 0.1005,
"step": 1650
},
{
"epoch": 0.5826086956521739,
"grad_norm": 0.021146375685930252,
"learning_rate": 4.478871424890492e-05,
"loss": 0.0019,
"step": 1675
},
{
"epoch": 0.591304347826087,
"grad_norm": 0.014253458008170128,
"learning_rate": 4.4627673280082455e-05,
"loss": 0.0019,
"step": 1700
},
{
"epoch": 0.6,
"grad_norm": 0.02895612083375454,
"learning_rate": 4.446663231125999e-05,
"loss": 0.0373,
"step": 1725
},
{
"epoch": 0.6086956521739131,
"grad_norm": 0.012195841409265995,
"learning_rate": 4.430559134243752e-05,
"loss": 0.0018,
"step": 1750
},
{
"epoch": 0.6173913043478261,
"grad_norm": 0.04872201383113861,
"learning_rate": 4.414455037361505e-05,
"loss": 0.0344,
"step": 1775
},
{
"epoch": 0.6260869565217392,
"grad_norm": 0.4130329489707947,
"learning_rate": 4.398350940479258e-05,
"loss": 0.0258,
"step": 1800
},
{
"epoch": 0.6347826086956522,
"grad_norm": 0.4404933750629425,
"learning_rate": 4.3822468435970116e-05,
"loss": 0.0089,
"step": 1825
},
{
"epoch": 0.6434782608695652,
"grad_norm": 0.012819499708712101,
"learning_rate": 4.3661427467147645e-05,
"loss": 0.0178,
"step": 1850
},
{
"epoch": 0.6521739130434783,
"grad_norm": 0.020058365538716316,
"learning_rate": 4.350038649832517e-05,
"loss": 0.0015,
"step": 1875
},
{
"epoch": 0.6608695652173913,
"grad_norm": 0.05424540862441063,
"learning_rate": 4.333934552950271e-05,
"loss": 0.0084,
"step": 1900
},
{
"epoch": 0.6695652173913044,
"grad_norm": 0.011423332616686821,
"learning_rate": 4.3178304560680236e-05,
"loss": 0.0014,
"step": 1925
},
{
"epoch": 0.6782608695652174,
"grad_norm": 0.03126579150557518,
"learning_rate": 4.301726359185777e-05,
"loss": 0.0012,
"step": 1950
},
{
"epoch": 0.6869565217391305,
"grad_norm": 0.010747378692030907,
"learning_rate": 4.28562226230353e-05,
"loss": 0.0011,
"step": 1975
},
{
"epoch": 0.6956521739130435,
"grad_norm": 0.010900999419391155,
"learning_rate": 4.2695181654212834e-05,
"loss": 0.001,
"step": 2000
},
{
"epoch": 0.7043478260869566,
"grad_norm": 0.10920006781816483,
"learning_rate": 4.253414068539037e-05,
"loss": 0.0371,
"step": 2025
},
{
"epoch": 0.7130434782608696,
"grad_norm": 0.008641124702990055,
"learning_rate": 4.2379541355320796e-05,
"loss": 0.009,
"step": 2050
},
{
"epoch": 0.7217391304347827,
"grad_norm": 0.00958116352558136,
"learning_rate": 4.2218500386498324e-05,
"loss": 0.0011,
"step": 2075
},
{
"epoch": 0.7304347826086957,
"grad_norm": 0.007281237281858921,
"learning_rate": 4.205745941767586e-05,
"loss": 0.001,
"step": 2100
},
{
"epoch": 0.7391304347826086,
"grad_norm": 0.00872531346976757,
"learning_rate": 4.1896418448853394e-05,
"loss": 0.0285,
"step": 2125
},
{
"epoch": 0.7478260869565218,
"grad_norm": 0.008862826973199844,
"learning_rate": 4.173537748003092e-05,
"loss": 0.0229,
"step": 2150
},
{
"epoch": 0.7565217391304347,
"grad_norm": 0.012311381287872791,
"learning_rate": 4.157433651120846e-05,
"loss": 0.0682,
"step": 2175
},
{
"epoch": 0.7652173913043478,
"grad_norm": 0.009278792887926102,
"learning_rate": 4.1413295542385986e-05,
"loss": 0.0517,
"step": 2200
},
{
"epoch": 0.7739130434782608,
"grad_norm": 0.034189265221357346,
"learning_rate": 4.1252254573563514e-05,
"loss": 0.0803,
"step": 2225
},
{
"epoch": 0.782608695652174,
"grad_norm": 0.043665286153554916,
"learning_rate": 4.109121360474105e-05,
"loss": 0.1145,
"step": 2250
},
{
"epoch": 0.7913043478260869,
"grad_norm": 0.08209193497896194,
"learning_rate": 4.093017263591858e-05,
"loss": 0.0125,
"step": 2275
},
{
"epoch": 0.8,
"grad_norm": 0.01089281216263771,
"learning_rate": 4.076913166709611e-05,
"loss": 0.0011,
"step": 2300
},
{
"epoch": 0.808695652173913,
"grad_norm": 0.007122454699128866,
"learning_rate": 4.060809069827364e-05,
"loss": 0.0045,
"step": 2325
},
{
"epoch": 0.8173913043478261,
"grad_norm": 0.009066189639270306,
"learning_rate": 4.0447049729451176e-05,
"loss": 0.0025,
"step": 2350
},
{
"epoch": 0.8260869565217391,
"grad_norm": 0.00853909645229578,
"learning_rate": 4.0286008760628704e-05,
"loss": 0.0009,
"step": 2375
},
{
"epoch": 0.8347826086956521,
"grad_norm": 0.007053891196846962,
"learning_rate": 4.012496779180624e-05,
"loss": 0.0008,
"step": 2400
},
{
"epoch": 0.8434782608695652,
"grad_norm": 0.007313120178878307,
"learning_rate": 3.9963926822983774e-05,
"loss": 0.0013,
"step": 2425
},
{
"epoch": 0.8521739130434782,
"grad_norm": 0.009188150055706501,
"learning_rate": 3.9802885854161296e-05,
"loss": 0.0685,
"step": 2450
},
{
"epoch": 0.8608695652173913,
"grad_norm": 0.010708467103540897,
"learning_rate": 3.964184488533883e-05,
"loss": 0.0845,
"step": 2475
},
{
"epoch": 0.8695652173913043,
"grad_norm": 0.009248602204024792,
"learning_rate": 3.948080391651636e-05,
"loss": 0.0621,
"step": 2500
},
{
"epoch": 0.8782608695652174,
"grad_norm": 0.008605259470641613,
"learning_rate": 3.9319762947693894e-05,
"loss": 0.001,
"step": 2525
},
{
"epoch": 0.8869565217391304,
"grad_norm": 0.00624378165230155,
"learning_rate": 3.915872197887143e-05,
"loss": 0.026,
"step": 2550
},
{
"epoch": 0.8956521739130435,
"grad_norm": 0.008726169355213642,
"learning_rate": 3.899768101004896e-05,
"loss": 0.0344,
"step": 2575
},
{
"epoch": 0.9043478260869565,
"grad_norm": 0.007860297337174416,
"learning_rate": 3.883664004122649e-05,
"loss": 0.0321,
"step": 2600
},
{
"epoch": 0.9130434782608695,
"grad_norm": 0.006469408981502056,
"learning_rate": 3.867559907240402e-05,
"loss": 0.0456,
"step": 2625
},
{
"epoch": 0.9217391304347826,
"grad_norm": 0.025073576718568802,
"learning_rate": 3.8514558103581556e-05,
"loss": 0.0607,
"step": 2650
},
{
"epoch": 0.9304347826086956,
"grad_norm": 0.005454166326671839,
"learning_rate": 3.8353517134759084e-05,
"loss": 0.0007,
"step": 2675
},
{
"epoch": 0.9391304347826087,
"grad_norm": 0.005138472653925419,
"learning_rate": 3.819247616593661e-05,
"loss": 0.0008,
"step": 2700
},
{
"epoch": 0.9478260869565217,
"grad_norm": 0.04282465949654579,
"learning_rate": 3.803143519711415e-05,
"loss": 0.0007,
"step": 2725
},
{
"epoch": 0.9565217391304348,
"grad_norm": 0.00715728010982275,
"learning_rate": 3.7870394228291676e-05,
"loss": 0.0387,
"step": 2750
},
{
"epoch": 0.9652173913043478,
"grad_norm": 0.005337136331945658,
"learning_rate": 3.770935325946921e-05,
"loss": 0.0007,
"step": 2775
},
{
"epoch": 0.9739130434782609,
"grad_norm": 0.005603776779025793,
"learning_rate": 3.754831229064674e-05,
"loss": 0.0006,
"step": 2800
},
{
"epoch": 0.9826086956521739,
"grad_norm": 0.005324610508978367,
"learning_rate": 3.7387271321824274e-05,
"loss": 0.0392,
"step": 2825
},
{
"epoch": 0.991304347826087,
"grad_norm": 0.004979921039193869,
"learning_rate": 3.722623035300181e-05,
"loss": 0.0204,
"step": 2850
},
{
"epoch": 1.0,
"grad_norm": 0.0061447713524103165,
"learning_rate": 3.706518938417934e-05,
"loss": 0.0007,
"step": 2875
},
{
"epoch": 1.0,
"eval_accuracy": 1.0,
"eval_f1_macro": 1.0,
"eval_f1_micro": 1.0,
"eval_f1_weighted": 1.0,
"eval_loss": 0.0005410231533460319,
"eval_precision_macro": 1.0,
"eval_precision_micro": 1.0,
"eval_precision_weighted": 1.0,
"eval_recall_macro": 1.0,
"eval_recall_micro": 1.0,
"eval_recall_weighted": 1.0,
"eval_runtime": 5.0042,
"eval_samples_per_second": 459.614,
"eval_steps_per_second": 28.776,
"step": 2875
},
{
"epoch": 1.008695652173913,
"grad_norm": 0.005072788801044226,
"learning_rate": 3.690414841535687e-05,
"loss": 0.0014,
"step": 2900
},
{
"epoch": 1.017391304347826,
"grad_norm": 0.004767073784023523,
"learning_rate": 3.67431074465344e-05,
"loss": 0.0039,
"step": 2925
},
{
"epoch": 1.0260869565217392,
"grad_norm": 0.019580593332648277,
"learning_rate": 3.658206647771193e-05,
"loss": 0.0129,
"step": 2950
},
{
"epoch": 1.0347826086956522,
"grad_norm": 0.004164039622992277,
"learning_rate": 3.6421025508889464e-05,
"loss": 0.0005,
"step": 2975
},
{
"epoch": 1.0434782608695652,
"grad_norm": 0.006551030091941357,
"learning_rate": 3.625998454006699e-05,
"loss": 0.0005,
"step": 3000
},
{
"epoch": 1.0521739130434782,
"grad_norm": 0.004048625007271767,
"learning_rate": 3.609894357124453e-05,
"loss": 0.0005,
"step": 3025
},
{
"epoch": 1.0608695652173914,
"grad_norm": 0.003683075774461031,
"learning_rate": 3.5937902602422056e-05,
"loss": 0.0005,
"step": 3050
},
{
"epoch": 1.0695652173913044,
"grad_norm": 0.003764552529901266,
"learning_rate": 3.577686163359959e-05,
"loss": 0.0005,
"step": 3075
},
{
"epoch": 1.0782608695652174,
"grad_norm": 0.014071044512093067,
"learning_rate": 3.561582066477712e-05,
"loss": 0.0005,
"step": 3100
},
{
"epoch": 1.0869565217391304,
"grad_norm": 0.004224707838147879,
"learning_rate": 3.5454779695954654e-05,
"loss": 0.0004,
"step": 3125
},
{
"epoch": 1.0956521739130434,
"grad_norm": 0.007790651638060808,
"learning_rate": 3.529373872713219e-05,
"loss": 0.0004,
"step": 3150
},
{
"epoch": 1.1043478260869566,
"grad_norm": 0.0033269149716943502,
"learning_rate": 3.513269775830972e-05,
"loss": 0.0004,
"step": 3175
},
{
"epoch": 1.1130434782608696,
"grad_norm": 0.003678582375869155,
"learning_rate": 3.4971656789487245e-05,
"loss": 0.0004,
"step": 3200
},
{
"epoch": 1.1217391304347826,
"grad_norm": 0.415933758020401,
"learning_rate": 3.4810615820664774e-05,
"loss": 0.0471,
"step": 3225
},
{
"epoch": 1.1304347826086956,
"grad_norm": 0.003935901448130608,
"learning_rate": 3.464957485184231e-05,
"loss": 0.0008,
"step": 3250
},
{
"epoch": 1.1391304347826088,
"grad_norm": 0.005461950786411762,
"learning_rate": 3.4488533883019844e-05,
"loss": 0.0868,
"step": 3275
},
{
"epoch": 1.1478260869565218,
"grad_norm": 0.00403949897736311,
"learning_rate": 3.432749291419737e-05,
"loss": 0.0314,
"step": 3300
},
{
"epoch": 1.1565217391304348,
"grad_norm": 0.0038034759927541018,
"learning_rate": 3.416645194537491e-05,
"loss": 0.0006,
"step": 3325
},
{
"epoch": 1.1652173913043478,
"grad_norm": 0.003490498522296548,
"learning_rate": 3.4005410976552435e-05,
"loss": 0.0004,
"step": 3350
},
{
"epoch": 1.1739130434782608,
"grad_norm": 0.002800930989906192,
"learning_rate": 3.384437000772997e-05,
"loss": 0.0004,
"step": 3375
},
{
"epoch": 1.182608695652174,
"grad_norm": 0.0032401601783931255,
"learning_rate": 3.36833290389075e-05,
"loss": 0.0004,
"step": 3400
},
{
"epoch": 1.191304347826087,
"grad_norm": 0.008039023727178574,
"learning_rate": 3.3522288070085034e-05,
"loss": 0.1032,
"step": 3425
},
{
"epoch": 1.2,
"grad_norm": 0.0048836818896234035,
"learning_rate": 3.336124710126256e-05,
"loss": 0.0007,
"step": 3450
},
{
"epoch": 1.208695652173913,
"grad_norm": 0.01012449711561203,
"learning_rate": 3.320020613244009e-05,
"loss": 0.0223,
"step": 3475
},
{
"epoch": 1.2173913043478262,
"grad_norm": 0.005898616276681423,
"learning_rate": 3.3039165163617625e-05,
"loss": 0.0143,
"step": 3500
},
{
"epoch": 1.2260869565217392,
"grad_norm": 7.748152256011963,
"learning_rate": 3.2878124194795154e-05,
"loss": 0.0928,
"step": 3525
},
{
"epoch": 1.2347826086956522,
"grad_norm": 0.0053124018013477325,
"learning_rate": 3.271708322597269e-05,
"loss": 0.0045,
"step": 3550
},
{
"epoch": 1.2434782608695651,
"grad_norm": 0.009419775567948818,
"learning_rate": 3.2556042257150224e-05,
"loss": 0.0008,
"step": 3575
},
{
"epoch": 1.2521739130434781,
"grad_norm": 0.0048626321367919445,
"learning_rate": 3.239500128832775e-05,
"loss": 0.0005,
"step": 3600
},
{
"epoch": 1.2608695652173914,
"grad_norm": 0.004047353286296129,
"learning_rate": 3.223396031950529e-05,
"loss": 0.0004,
"step": 3625
},
{
"epoch": 1.2695652173913043,
"grad_norm": 0.0036859684623777866,
"learning_rate": 3.2072919350682815e-05,
"loss": 0.0484,
"step": 3650
},
{
"epoch": 1.2782608695652173,
"grad_norm": 0.007856795564293861,
"learning_rate": 3.191187838186035e-05,
"loss": 0.022,
"step": 3675
},
{
"epoch": 1.2869565217391306,
"grad_norm": 0.0035763189662247896,
"learning_rate": 3.175083741303788e-05,
"loss": 0.0004,
"step": 3700
},
{
"epoch": 1.2956521739130435,
"grad_norm": 0.003156292950734496,
"learning_rate": 3.158979644421541e-05,
"loss": 0.0004,
"step": 3725
},
{
"epoch": 1.3043478260869565,
"grad_norm": 0.003629323560744524,
"learning_rate": 3.142875547539294e-05,
"loss": 0.0004,
"step": 3750
},
{
"epoch": 1.3130434782608695,
"grad_norm": 68.89725494384766,
"learning_rate": 3.126771450657047e-05,
"loss": 0.0249,
"step": 3775
},
{
"epoch": 1.3217391304347825,
"grad_norm": 0.004872568417340517,
"learning_rate": 3.1106673537748005e-05,
"loss": 0.106,
"step": 3800
},
{
"epoch": 1.3304347826086955,
"grad_norm": 0.0038311562966555357,
"learning_rate": 3.0945632568925534e-05,
"loss": 0.0008,
"step": 3825
},
{
"epoch": 1.3391304347826087,
"grad_norm": 0.01143663190305233,
"learning_rate": 3.078459160010307e-05,
"loss": 0.0433,
"step": 3850
},
{
"epoch": 1.3478260869565217,
"grad_norm": 0.00519527355208993,
"learning_rate": 3.0623550631280604e-05,
"loss": 0.0004,
"step": 3875
},
{
"epoch": 1.3565217391304347,
"grad_norm": 0.0031176016200333834,
"learning_rate": 3.0462509662458132e-05,
"loss": 0.0005,
"step": 3900
},
{
"epoch": 1.365217391304348,
"grad_norm": 0.0033473202493041754,
"learning_rate": 3.0301468693635664e-05,
"loss": 0.0003,
"step": 3925
},
{
"epoch": 1.373913043478261,
"grad_norm": 0.0031863965559750795,
"learning_rate": 3.0140427724813192e-05,
"loss": 0.0003,
"step": 3950
},
{
"epoch": 1.382608695652174,
"grad_norm": 0.002523756120353937,
"learning_rate": 2.9979386755990724e-05,
"loss": 0.0318,
"step": 3975
},
{
"epoch": 1.391304347826087,
"grad_norm": 0.0034575534518808126,
"learning_rate": 2.9818345787168255e-05,
"loss": 0.0003,
"step": 4000
},
{
"epoch": 1.4,
"grad_norm": 0.004556054249405861,
"learning_rate": 2.9657304818345787e-05,
"loss": 0.0003,
"step": 4025
},
{
"epoch": 1.4086956521739131,
"grad_norm": 0.009268323890864849,
"learning_rate": 2.949626384952332e-05,
"loss": 0.0296,
"step": 4050
},
{
"epoch": 1.4173913043478261,
"grad_norm": 0.01629672758281231,
"learning_rate": 2.9335222880700854e-05,
"loss": 0.0021,
"step": 4075
},
{
"epoch": 1.4260869565217391,
"grad_norm": 0.0037628798745572567,
"learning_rate": 2.9174181911878385e-05,
"loss": 0.0015,
"step": 4100
},
{
"epoch": 1.434782608695652,
"grad_norm": 0.004670215770602226,
"learning_rate": 2.9013140943055917e-05,
"loss": 0.0329,
"step": 4125
},
{
"epoch": 1.4434782608695653,
"grad_norm": 0.003128861775621772,
"learning_rate": 2.885209997423345e-05,
"loss": 0.0361,
"step": 4150
},
{
"epoch": 1.4521739130434783,
"grad_norm": 0.0030446811579167843,
"learning_rate": 2.869105900541098e-05,
"loss": 0.0003,
"step": 4175
},
{
"epoch": 1.4608695652173913,
"grad_norm": 0.0032489860896021128,
"learning_rate": 2.853001803658851e-05,
"loss": 0.0355,
"step": 4200
},
{
"epoch": 1.4695652173913043,
"grad_norm": 0.002284318907186389,
"learning_rate": 2.836897706776604e-05,
"loss": 0.0276,
"step": 4225
},
{
"epoch": 1.4782608695652173,
"grad_norm": 0.005029418971389532,
"learning_rate": 2.8207936098943572e-05,
"loss": 0.0617,
"step": 4250
},
{
"epoch": 1.4869565217391305,
"grad_norm": 0.0027885879389941692,
"learning_rate": 2.8046895130121104e-05,
"loss": 0.0004,
"step": 4275
},
{
"epoch": 1.4956521739130435,
"grad_norm": 0.0034100012853741646,
"learning_rate": 2.7885854161298635e-05,
"loss": 0.0003,
"step": 4300
},
{
"epoch": 1.5043478260869565,
"grad_norm": 0.0024735534097999334,
"learning_rate": 2.7724813192476167e-05,
"loss": 0.0003,
"step": 4325
},
{
"epoch": 1.5130434782608697,
"grad_norm": 0.0018064508913084865,
"learning_rate": 2.75637722236537e-05,
"loss": 0.0003,
"step": 4350
},
{
"epoch": 1.5217391304347827,
"grad_norm": 0.001821321900933981,
"learning_rate": 2.7402731254831234e-05,
"loss": 0.0003,
"step": 4375
},
{
"epoch": 1.5304347826086957,
"grad_norm": 0.002236420288681984,
"learning_rate": 2.7241690286008765e-05,
"loss": 0.0003,
"step": 4400
},
{
"epoch": 1.5391304347826087,
"grad_norm": 0.0018212420400232077,
"learning_rate": 2.708064931718629e-05,
"loss": 0.0717,
"step": 4425
},
{
"epoch": 1.5478260869565217,
"grad_norm": 0.0029810129199177027,
"learning_rate": 2.6919608348363822e-05,
"loss": 0.0003,
"step": 4450
},
{
"epoch": 1.5565217391304347,
"grad_norm": 0.002055574208498001,
"learning_rate": 2.6758567379541353e-05,
"loss": 0.0003,
"step": 4475
},
{
"epoch": 1.5652173913043477,
"grad_norm": 0.002401245990768075,
"learning_rate": 2.659752641071889e-05,
"loss": 0.0471,
"step": 4500
},
{
"epoch": 1.5739130434782609,
"grad_norm": 0.0019228613236919045,
"learning_rate": 2.643648544189642e-05,
"loss": 0.004,
"step": 4525
},
{
"epoch": 1.5826086956521739,
"grad_norm": 0.01082328986376524,
"learning_rate": 2.6275444473073952e-05,
"loss": 0.0004,
"step": 4550
},
{
"epoch": 1.591304347826087,
"grad_norm": 0.004519768990576267,
"learning_rate": 2.6114403504251483e-05,
"loss": 0.0003,
"step": 4575
},
{
"epoch": 1.6,
"grad_norm": 0.004080561455339193,
"learning_rate": 2.5953362535429015e-05,
"loss": 0.0003,
"step": 4600
},
{
"epoch": 1.608695652173913,
"grad_norm": 0.0020184165332466364,
"learning_rate": 2.5792321566606547e-05,
"loss": 0.0003,
"step": 4625
},
{
"epoch": 1.617391304347826,
"grad_norm": 0.00274313660338521,
"learning_rate": 2.563128059778408e-05,
"loss": 0.0002,
"step": 4650
},
{
"epoch": 1.626086956521739,
"grad_norm": 0.0025938046164810658,
"learning_rate": 2.5470239628961607e-05,
"loss": 0.0002,
"step": 4675
},
{
"epoch": 1.634782608695652,
"grad_norm": 0.002224980155006051,
"learning_rate": 2.530919866013914e-05,
"loss": 0.0002,
"step": 4700
},
{
"epoch": 1.643478260869565,
"grad_norm": 0.0018848755862563848,
"learning_rate": 2.514815769131667e-05,
"loss": 0.0002,
"step": 4725
},
{
"epoch": 1.6521739130434783,
"grad_norm": 0.0021186743397265673,
"learning_rate": 2.4987116722494202e-05,
"loss": 0.012,
"step": 4750
},
{
"epoch": 1.6608695652173913,
"grad_norm": 0.002839816967025399,
"learning_rate": 2.4826075753671733e-05,
"loss": 0.0732,
"step": 4775
},
{
"epoch": 1.6695652173913045,
"grad_norm": 0.0023581942077726126,
"learning_rate": 2.466503478484927e-05,
"loss": 0.0441,
"step": 4800
},
{
"epoch": 1.6782608695652175,
"grad_norm": 0.0025644400157034397,
"learning_rate": 2.45039938160268e-05,
"loss": 0.0002,
"step": 4825
},
{
"epoch": 1.6869565217391305,
"grad_norm": 0.0018391634803265333,
"learning_rate": 2.4342952847204332e-05,
"loss": 0.0003,
"step": 4850
},
{
"epoch": 1.6956521739130435,
"grad_norm": 0.002724371151998639,
"learning_rate": 2.418191187838186e-05,
"loss": 0.0002,
"step": 4875
},
{
"epoch": 1.7043478260869565,
"grad_norm": 0.002339495113119483,
"learning_rate": 2.402087090955939e-05,
"loss": 0.0002,
"step": 4900
},
{
"epoch": 1.7130434782608694,
"grad_norm": 0.0015120344469323754,
"learning_rate": 2.3859829940736923e-05,
"loss": 0.0059,
"step": 4925
},
{
"epoch": 1.7217391304347827,
"grad_norm": 0.0019244271097704768,
"learning_rate": 2.369878897191446e-05,
"loss": 0.0488,
"step": 4950
},
{
"epoch": 1.7304347826086957,
"grad_norm": 0.006782891228795052,
"learning_rate": 2.353774800309199e-05,
"loss": 0.0002,
"step": 4975
},
{
"epoch": 1.7391304347826086,
"grad_norm": 0.006890356075018644,
"learning_rate": 2.337670703426952e-05,
"loss": 0.0002,
"step": 5000
},
{
"epoch": 1.7478260869565219,
"grad_norm": 0.0016472332645207644,
"learning_rate": 2.321566606544705e-05,
"loss": 0.0002,
"step": 5025
},
{
"epoch": 1.7565217391304349,
"grad_norm": 0.002031004289165139,
"learning_rate": 2.305462509662458e-05,
"loss": 0.0002,
"step": 5050
},
{
"epoch": 1.7652173913043478,
"grad_norm": 0.020191390067338943,
"learning_rate": 2.2893584127802113e-05,
"loss": 0.0006,
"step": 5075
},
{
"epoch": 1.7739130434782608,
"grad_norm": 0.0014919223031029105,
"learning_rate": 2.273254315897965e-05,
"loss": 0.0002,
"step": 5100
},
{
"epoch": 1.7826086956521738,
"grad_norm": 0.0019313708180561662,
"learning_rate": 2.2571502190157177e-05,
"loss": 0.0002,
"step": 5125
},
{
"epoch": 1.7913043478260868,
"grad_norm": 0.0022281836718320847,
"learning_rate": 2.2410461221334708e-05,
"loss": 0.0002,
"step": 5150
},
{
"epoch": 1.8,
"grad_norm": 0.0013652993366122246,
"learning_rate": 2.224942025251224e-05,
"loss": 0.0002,
"step": 5175
},
{
"epoch": 1.808695652173913,
"grad_norm": 0.00228705327026546,
"learning_rate": 2.208837928368977e-05,
"loss": 0.0002,
"step": 5200
},
{
"epoch": 1.8173913043478263,
"grad_norm": 0.001371930236928165,
"learning_rate": 2.1927338314867303e-05,
"loss": 0.0002,
"step": 5225
},
{
"epoch": 1.8260869565217392,
"grad_norm": 0.0013612714828923345,
"learning_rate": 2.1766297346044835e-05,
"loss": 0.0002,
"step": 5250
},
{
"epoch": 1.8347826086956522,
"grad_norm": 0.0018417349783703685,
"learning_rate": 2.1605256377222367e-05,
"loss": 0.0002,
"step": 5275
},
{
"epoch": 1.8434782608695652,
"grad_norm": 0.0018068786012008786,
"learning_rate": 2.1444215408399898e-05,
"loss": 0.0002,
"step": 5300
},
{
"epoch": 1.8521739130434782,
"grad_norm": 0.0039210072718560696,
"learning_rate": 2.128317443957743e-05,
"loss": 0.0002,
"step": 5325
},
{
"epoch": 1.8608695652173912,
"grad_norm": 0.002227018354460597,
"learning_rate": 2.112213347075496e-05,
"loss": 0.0156,
"step": 5350
},
{
"epoch": 1.8695652173913042,
"grad_norm": 0.001460268278606236,
"learning_rate": 2.0961092501932493e-05,
"loss": 0.0202,
"step": 5375
},
{
"epoch": 1.8782608695652174,
"grad_norm": 0.001318955677561462,
"learning_rate": 2.0800051533110025e-05,
"loss": 0.0101,
"step": 5400
},
{
"epoch": 1.8869565217391304,
"grad_norm": 0.0019615204073488712,
"learning_rate": 2.0639010564287557e-05,
"loss": 0.0002,
"step": 5425
},
{
"epoch": 1.8956521739130436,
"grad_norm": 0.0014419632498174906,
"learning_rate": 2.0477969595465088e-05,
"loss": 0.0002,
"step": 5450
},
{
"epoch": 1.9043478260869566,
"grad_norm": 0.00159283762332052,
"learning_rate": 2.031692862664262e-05,
"loss": 0.0002,
"step": 5475
},
{
"epoch": 1.9130434782608696,
"grad_norm": 0.0014433007454499602,
"learning_rate": 2.0155887657820148e-05,
"loss": 0.0002,
"step": 5500
},
{
"epoch": 1.9217391304347826,
"grad_norm": 0.0011873416369780898,
"learning_rate": 1.9994846688997683e-05,
"loss": 0.0002,
"step": 5525
},
{
"epoch": 1.9304347826086956,
"grad_norm": 0.0015813339268788695,
"learning_rate": 1.9833805720175215e-05,
"loss": 0.0002,
"step": 5550
},
{
"epoch": 1.9391304347826086,
"grad_norm": 0.001116643426939845,
"learning_rate": 1.9672764751352747e-05,
"loss": 0.0003,
"step": 5575
},
{
"epoch": 1.9478260869565216,
"grad_norm": 0.0013602287508547306,
"learning_rate": 1.9511723782530278e-05,
"loss": 0.0001,
"step": 5600
},
{
"epoch": 1.9565217391304348,
"grad_norm": 0.001626876532100141,
"learning_rate": 1.9350682813707806e-05,
"loss": 0.0002,
"step": 5625
},
{
"epoch": 1.9652173913043478,
"grad_norm": 0.0015673220623284578,
"learning_rate": 1.9189641844885338e-05,
"loss": 0.0012,
"step": 5650
},
{
"epoch": 1.973913043478261,
"grad_norm": 0.0014821627410128713,
"learning_rate": 1.902860087606287e-05,
"loss": 0.0001,
"step": 5675
},
{
"epoch": 1.982608695652174,
"grad_norm": 0.02127527818083763,
"learning_rate": 1.8867559907240405e-05,
"loss": 0.0491,
"step": 5700
},
{
"epoch": 1.991304347826087,
"grad_norm": 0.0015324688283726573,
"learning_rate": 1.8706518938417936e-05,
"loss": 0.0002,
"step": 5725
},
{
"epoch": 2.0,
"grad_norm": 0.0018975608982145786,
"learning_rate": 1.8545477969595465e-05,
"loss": 0.0002,
"step": 5750
},
{
"epoch": 2.0,
"eval_accuracy": 1.0,
"eval_f1_macro": 1.0,
"eval_f1_micro": 1.0,
"eval_f1_weighted": 1.0,
"eval_loss": 0.00012181053898530081,
"eval_precision_macro": 1.0,
"eval_precision_micro": 1.0,
"eval_precision_weighted": 1.0,
"eval_recall_macro": 1.0,
"eval_recall_micro": 1.0,
"eval_recall_weighted": 1.0,
"eval_runtime": 5.0295,
"eval_samples_per_second": 457.3,
"eval_steps_per_second": 28.631,
"step": 5750
}
],
"logging_steps": 25,
"max_steps": 8625,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3026347648512000.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}