[ { "loss": 0.2659, "grad_norm": 3.4286012649536133, "learning_rate": 1.9715261958997724e-05, "epoch": 0.2847380410022779, "step": 500 }, { "loss": 0.1081, "grad_norm": 0.7319045066833496, "learning_rate": 1.9430523917995446e-05, "epoch": 0.5694760820045558, "step": 1000 }, { "loss": 0.0777, "grad_norm": 3.7819042205810547, "learning_rate": 1.9145785876993168e-05, "epoch": 0.8542141230068337, "step": 1500 }, { "eval_loss": 0.07203580439090729, "eval_overall": { "precision": 0.8912693298969072, "recall": 0.931167956916863, "f1": 0.9107818930041152, "accuracy": 0.9814269735680226 }, "eval_per_label": { "LOC": { "precision": 0.9239187076602398, "recall": 0.965160587915079, "f1": 0.9440894568690097, "number": 1837 }, "MISC": { "precision": 0.7727272727272727, "recall": 0.8297180043383948, "f1": 0.8002092050209204, "number": 922 }, "ORG": { "precision": 0.8624823695345557, "recall": 0.9120059656972409, "f1": 0.8865530989488946, "number": 1341 }, "PER": { "precision": 0.9420520999468368, "recall": 0.9619978284473398, "f1": 0.9519204942250873, "number": 1842 } }, "eval_runtime": 7.4533, "eval_samples_per_second": 436.049, "eval_steps_per_second": 54.607, "epoch": 1.0, "step": 1756 }, { "loss": 0.0631, "grad_norm": 0.11871737241744995, "learning_rate": 1.886104783599089e-05, "epoch": 1.1389521640091116, "step": 2000 }, { "loss": 0.0464, "grad_norm": 0.2318667322397232, "learning_rate": 1.8576309794988612e-05, "epoch": 1.4236902050113895, "step": 2500 }, { "loss": 0.0442, "grad_norm": 0.35079020261764526, "learning_rate": 1.8291571753986334e-05, "epoch": 1.7084282460136673, "step": 3000 }, { "loss": 0.0411, "grad_norm": 0.06829982250928879, "learning_rate": 1.8006833712984056e-05, "epoch": 1.9931662870159452, "step": 3500 }, { "eval_loss": 0.07787470519542694, "eval_overall": { "precision": 0.9304723885562209, "recall": 0.9414338606529788, "f1": 0.9359210306173666, "accuracy": 0.9842968152116324 }, "eval_per_label": { "LOC": { "precision": 0.9511015583019882, "recall": 0.9635274904735982, "f1": 0.9572742022714981, "number": 1837 }, "MISC": { "precision": 0.8725910064239829, "recall": 0.8839479392624728, "f1": 0.8782327586206896, "number": 922 }, "ORG": { "precision": 0.9050822122571002, "recall": 0.9030574198359433, "f1": 0.9040686823441583, "number": 1341 }, "PER": { "precision": 0.9568919638105375, "recall": 0.9761129207383279, "f1": 0.9664068798710024, "number": 1842 } }, "eval_runtime": 7.5011, "eval_samples_per_second": 433.267, "eval_steps_per_second": 54.258, "epoch": 2.0, "step": 3512 }, { "loss": 0.0258, "grad_norm": 4.330985069274902, "learning_rate": 1.7722095671981778e-05, "epoch": 2.277904328018223, "step": 4000 }, { "loss": 0.0284, "grad_norm": 0.06637139618396759, "learning_rate": 1.74373576309795e-05, "epoch": 2.562642369020501, "step": 4500 }, { "loss": 0.0269, "grad_norm": 11.263030052185059, "learning_rate": 1.7152619589977222e-05, "epoch": 2.847380410022779, "step": 5000 }, { "eval_loss": 0.07202505320310593, "eval_overall": { "precision": 0.9318106587222774, "recall": 0.9474924267923258, "f1": 0.9395861148197597, "accuracy": 0.9857685288750221 }, "eval_per_label": { "LOC": { "precision": 0.9626218851570965, "recall": 0.9673380511703865, "f1": 0.9649742058104807, "number": 1837 }, "MISC": { "precision": 0.8943355119825708, "recall": 0.8904555314533622, "f1": 0.8923913043478261, "number": 922 }, "ORG": { "precision": 0.8772791023842917, "recall": 0.9328859060402684, "f1": 0.9042284062161186, "number": 1341 }, "PER": { "precision": 0.9616630669546437, "recall": 0.9668838219326819, "f1": 0.9642663779101246, "number": 1842 } }, "eval_runtime": 6.5043, "eval_samples_per_second": 499.669, "eval_steps_per_second": 62.574, "epoch": 3.0, "step": 5268 }, { "loss": 0.0226, "grad_norm": 0.7242124676704407, "learning_rate": 1.6867881548974945e-05, "epoch": 3.132118451025057, "step": 5500 }, { "loss": 0.0165, "grad_norm": 0.006902824155986309, "learning_rate": 1.6583143507972667e-05, "epoch": 3.416856492027335, "step": 6000 }, { "loss": 0.0176, "grad_norm": 0.031127002090215683, "learning_rate": 1.629840546697039e-05, "epoch": 3.7015945330296125, "step": 6500 }, { "loss": 0.0196, "grad_norm": 0.0038718737196177244, "learning_rate": 1.601366742596811e-05, "epoch": 3.9863325740318905, "step": 7000 }, { "eval_loss": 0.08569859713315964, "eval_overall": { "precision": 0.930635838150289, "recall": 0.9483338943116796, "f1": 0.9394015170459282, "accuracy": 0.9850621063165951 }, "eval_per_label": { "LOC": { "precision": 0.9533011272141707, "recall": 0.9667936853565596, "f1": 0.96, "number": 1837 }, "MISC": { "precision": 0.8623188405797102, "recall": 0.903470715835141, "f1": 0.8824152542372882, "number": 922 }, "ORG": { "precision": 0.8995664739884393, "recall": 0.9284116331096197, "f1": 0.9137614678899083, "number": 1341 }, "PER": { "precision": 0.9668838219326819, "recall": 0.9668838219326819, "f1": 0.9668838219326819, "number": 1842 } }, "eval_runtime": 7.5249, "eval_samples_per_second": 431.899, "eval_steps_per_second": 54.087, "epoch": 4.0, "step": 7024 }, { "loss": 0.0138, "grad_norm": 0.17987537384033203, "learning_rate": 1.5728929384965833e-05, "epoch": 4.271070615034168, "step": 7500 }, { "loss": 0.0134, "grad_norm": 1.4385559558868408, "learning_rate": 1.5444191343963555e-05, "epoch": 4.555808656036446, "step": 8000 }, { "loss": 0.0139, "grad_norm": 0.02356315404176712, "learning_rate": 1.5159453302961277e-05, "epoch": 4.840546697038724, "step": 8500 }, { "eval_loss": 0.08417785912752151, "eval_overall": { "precision": 0.9310686015831134, "recall": 0.9501851228542578, "f1": 0.9405297351324339, "accuracy": 0.9851062577264967 }, "eval_per_label": { "LOC": { "precision": 0.9627228525121556, "recall": 0.9700598802395209, "f1": 0.9663774403470715, "number": 1837 }, "MISC": { "precision": 0.8386454183266933, "recall": 0.913232104121475, "f1": 0.8743509865005192, "number": 922 }, "ORG": { "precision": 0.9070110701107011, "recall": 0.9164802386278896, "f1": 0.9117210682492581, "number": 1341 }, "PER": { "precision": 0.9670981661272924, "recall": 0.9733984799131379, "f1": 0.9702380952380952, "number": 1842 } }, "eval_runtime": 7.5038, "eval_samples_per_second": 433.114, "eval_steps_per_second": 54.239, "epoch": 5.0, "step": 8780 }, { "loss": 0.0092, "grad_norm": 0.7477717399597168, "learning_rate": 1.4874715261958999e-05, "epoch": 5.125284738041002, "step": 9000 }, { "loss": 0.006, "grad_norm": 0.04322722181677818, "learning_rate": 1.4589977220956721e-05, "epoch": 5.41002277904328, "step": 9500 }, { "loss": 0.0073, "grad_norm": 0.051729559898376465, "learning_rate": 1.4305239179954442e-05, "epoch": 5.694760820045558, "step": 10000 }, { "loss": 0.0079, "grad_norm": 0.012189100496470928, "learning_rate": 1.4020501138952165e-05, "epoch": 5.979498861047836, "step": 10500 }, { "eval_loss": 0.09725591540336609, "eval_overall": { "precision": 0.9291845493562232, "recall": 0.9473241332884551, "f1": 0.9381666666666667, "accuracy": 0.9844734208512392 }, "eval_per_label": { "LOC": { "precision": 0.9546424759871932, "recall": 0.9738704409363091, "f1": 0.9641606036108865, "number": 1837 }, "MISC": { "precision": 0.858739837398374, "recall": 0.9164859002169198, "f1": 0.8866736621196223, "number": 922 }, "ORG": { "precision": 0.8921852387843705, "recall": 0.9194630872483222, "f1": 0.9056188027910393, "number": 1341 }, "PER": { "precision": 0.9691969196919692, "recall": 0.9565689467969598, "f1": 0.9628415300546448, "number": 1842 } }, "eval_runtime": 7.4822, "eval_samples_per_second": 434.362, "eval_steps_per_second": 54.395, "epoch": 6.0, "step": 10536 }, { "loss": 0.008, "grad_norm": 0.0029869996942579746, "learning_rate": 1.3735763097949887e-05, "epoch": 6.264236902050114, "step": 11000 }, { "loss": 0.0066, "grad_norm": 0.018263721838593483, "learning_rate": 1.3451025056947608e-05, "epoch": 6.548974943052392, "step": 11500 }, { "loss": 0.0057, "grad_norm": 0.020874306559562683, "learning_rate": 1.3166287015945332e-05, "epoch": 6.83371298405467, "step": 12000 }, { "eval_loss": 0.097112737596035, "eval_overall": { "precision": 0.9343584656084656, "recall": 0.9510265903736116, "f1": 0.9426188490408675, "accuracy": 0.9859745687878966 }, "eval_per_label": { "LOC": { "precision": 0.9689373297002725, "recall": 0.9678824169842134, "f1": 0.968409586056645, "number": 1837 }, "MISC": { "precision": 0.8805031446540881, "recall": 0.911062906724512, "f1": 0.8955223880597015, "number": 922 }, "ORG": { "precision": 0.8851063829787233, "recall": 0.930648769574944, "f1": 0.9073064340239914, "number": 1341 }, "PER": { "precision": 0.965386695511087, "recall": 0.9690553745928339, "f1": 0.967217556217827, "number": 1842 } }, "eval_runtime": 7.4789, "eval_samples_per_second": 434.555, "eval_steps_per_second": 54.42, "epoch": 7.0, "step": 12292 }, { "loss": 0.0031, "grad_norm": 0.0007487820694223046, "learning_rate": 1.2881548974943054e-05, "epoch": 7.118451025056948, "step": 12500 }, { "loss": 0.0037, "grad_norm": 15.724783897399902, "learning_rate": 1.2596810933940776e-05, "epoch": 7.403189066059226, "step": 13000 }, { "loss": 0.0061, "grad_norm": 0.614613950252533, "learning_rate": 1.2312072892938498e-05, "epoch": 7.687927107061503, "step": 13500 }, { "loss": 0.0036, "grad_norm": 0.0012391641503199935, "learning_rate": 1.2027334851936218e-05, "epoch": 7.972665148063781, "step": 14000 }, { "eval_loss": 0.10294844955205917, "eval_overall": { "precision": 0.9285831285831286, "recall": 0.9540558734432851, "f1": 0.9411471735701834, "accuracy": 0.9852387119562018 }, "eval_per_label": { "LOC": { "precision": 0.9664864864864865, "recall": 0.9733260751224823, "f1": 0.9698942229454841, "number": 1837 }, "MISC": { "precision": 0.8497983870967742, "recall": 0.9143167028199566, "f1": 0.8808777429467085, "number": 922 }, "ORG": { "precision": 0.9019607843137255, "recall": 0.9261744966442953, "f1": 0.9139072847682119, "number": 1341 }, "PER": { "precision": 0.9522799575821845, "recall": 0.9750271444082519, "f1": 0.9635193133047211, "number": 1842 } }, "eval_runtime": 7.6218, "eval_samples_per_second": 426.41, "eval_steps_per_second": 53.4, "epoch": 8.0, "step": 14048 }, { "loss": 0.0042, "grad_norm": 0.15165293216705322, "learning_rate": 1.1742596810933942e-05, "epoch": 8.257403189066059, "step": 14500 }, { "loss": 0.0038, "grad_norm": 0.01020512543618679, "learning_rate": 1.1457858769931664e-05, "epoch": 8.542141230068337, "step": 15000 }, { "loss": 0.0028, "grad_norm": 0.00046127362293191254, "learning_rate": 1.1173120728929384e-05, "epoch": 8.826879271070615, "step": 15500 }, { "eval_loss": 0.11473368108272552, "eval_overall": { "precision": 0.9368072787427626, "recall": 0.9530461124200605, "f1": 0.944856928339034, "accuracy": 0.9860187201977983 }, "eval_per_label": { "LOC": { "precision": 0.9632034632034632, "recall": 0.9689711486118672, "f1": 0.966078697421981, "number": 1837 }, "MISC": { "precision": 0.8518145161290323, "recall": 0.9164859002169198, "f1": 0.8829676071055382, "number": 922 }, "ORG": { "precision": 0.9313207547169812, "recall": 0.9202087994034303, "f1": 0.9257314328582145, "number": 1341 }, "PER": { "precision": 0.9595744680851064, "recall": 0.9793702497285559, "f1": 0.969371305749597, "number": 1842 } }, "eval_runtime": 7.5249, "eval_samples_per_second": 431.9, "eval_steps_per_second": 54.087, "epoch": 9.0, "step": 15804 }, { "loss": 0.0049, "grad_norm": 0.17741906642913818, "learning_rate": 1.0888382687927108e-05, "epoch": 9.111617312072893, "step": 16000 }, { "loss": 0.003, "grad_norm": 0.0018998866435140371, "learning_rate": 1.060364464692483e-05, "epoch": 9.39635535307517, "step": 16500 }, { "loss": 0.0042, "grad_norm": 0.0021700740326195955, "learning_rate": 1.0318906605922552e-05, "epoch": 9.681093394077449, "step": 17000 }, { "loss": 0.0026, "grad_norm": 0.014039441011846066, "learning_rate": 1.0034168564920275e-05, "epoch": 9.965831435079727, "step": 17500 }, { "eval_loss": 0.1184961125254631, "eval_overall": { "precision": 0.9358889623265036, "recall": 0.9532144059239314, "f1": 0.9444722361180591, "accuracy": 0.9852681462294696 }, "eval_per_label": { "LOC": { "precision": 0.9669197396963124, "recall": 0.9706042460533478, "f1": 0.9687584895408857, "number": 1837 }, "MISC": { "precision": 0.8826638477801269, "recall": 0.9056399132321041, "f1": 0.8940042826552462, "number": 922 }, "ORG": { "precision": 0.8961318051575932, "recall": 0.9328859060402684, "f1": 0.9141395688710267, "number": 1341 }, "PER": { "precision": 0.9619506966773848, "recall": 0.9744842562432139, "f1": 0.9681769147788565, "number": 1842 } }, "eval_runtime": 7.5803, "eval_samples_per_second": 428.744, "eval_steps_per_second": 53.692, "epoch": 10.0, "step": 17560 }, { "loss": 0.0011, "grad_norm": 0.00040622701635584235, "learning_rate": 9.749430523917997e-06, "epoch": 10.250569476082005, "step": 18000 }, { "loss": 0.001, "grad_norm": 0.0004215097869746387, "learning_rate": 9.464692482915719e-06, "epoch": 10.535307517084282, "step": 18500 }, { "loss": 0.002, "grad_norm": 0.0020023963879793882, "learning_rate": 9.17995444191344e-06, "epoch": 10.82004555808656, "step": 19000 }, { "eval_loss": 0.11947210878133774, "eval_overall": { "precision": 0.9384717168375786, "recall": 0.9548973409626389, "f1": 0.9466132799466134, "accuracy": 0.9857685288750221 }, "eval_per_label": { "LOC": { "precision": 0.9658906334596643, "recall": 0.9711486118671747, "f1": 0.9685124864277959, "number": 1837 }, "MISC": { "precision": 0.8954108858057631, "recall": 0.9099783080260304, "f1": 0.9026358257127487, "number": 922 }, "ORG": { "precision": 0.9075812274368231, "recall": 0.9373601789709173, "f1": 0.922230374174615, "number": 1341 }, "PER": { "precision": 0.9557805007991476, "recall": 0.9739413680781759, "f1": 0.9647754772788384, "number": 1842 } }, "eval_runtime": 7.5556, "eval_samples_per_second": 430.142, "eval_steps_per_second": 53.867, "epoch": 11.0, "step": 19316 }, { "loss": 0.0028, "grad_norm": 0.030722877010703087, "learning_rate": 8.895216400911163e-06, "epoch": 11.104783599088838, "step": 19500 }, { "loss": 0.0033, "grad_norm": 0.004747629631310701, "learning_rate": 8.610478359908885e-06, "epoch": 11.389521640091116, "step": 20000 }, { "loss": 0.0018, "grad_norm": 0.0014466517604887486, "learning_rate": 8.325740318906607e-06, "epoch": 11.674259681093394, "step": 20500 }, { "loss": 0.0004, "grad_norm": 0.005812987219542265, "learning_rate": 8.041002277904329e-06, "epoch": 11.958997722095672, "step": 21000 }, { "eval_loss": 0.12146918475627899, "eval_overall": { "precision": 0.9431799302209669, "recall": 0.9554022214742511, "f1": 0.9492517348047823, "accuracy": 0.9866074056631542 }, "eval_per_label": { "LOC": { "precision": 0.971195652173913, "recall": 0.9727817093086554, "f1": 0.9719880337231438, "number": 1837 }, "MISC": { "precision": 0.9096844396082698, "recall": 0.9067245119305857, "f1": 0.9082020640956002, "number": 922 }, "ORG": { "precision": 0.9121522693997072, "recall": 0.9291573452647278, "f1": 0.9205762837089029, "number": 1341 }, "PER": { "precision": 0.9545934530095037, "recall": 0.9815418023887079, "f1": 0.9678800856531049, "number": 1842 } }, "eval_runtime": 7.7732, "eval_samples_per_second": 418.106, "eval_steps_per_second": 52.36, "epoch": 12.0, "step": 21072 }, { "loss": 0.0006, "grad_norm": 0.0022160038352012634, "learning_rate": 7.75626423690205e-06, "epoch": 12.24373576309795, "step": 21500 }, { "loss": 0.0008, "grad_norm": 0.00019432637782301754, "learning_rate": 7.471526195899773e-06, "epoch": 12.528473804100228, "step": 22000 }, { "loss": 0.0011, "grad_norm": 1.6397913694381714, "learning_rate": 7.186788154897495e-06, "epoch": 12.813211845102506, "step": 22500 }, { "eval_loss": 0.12505799531936646, "eval_overall": { "precision": 0.9386808087504143, "recall": 0.9532144059239314, "f1": 0.9458917835671342, "accuracy": 0.9860923058809677 }, "eval_per_label": { "LOC": { "precision": 0.9675148890092041, "recall": 0.9727817093086554, "f1": 0.9701411509229099, "number": 1837 }, "MISC": { "precision": 0.8782791185729276, "recall": 0.9078091106290672, "f1": 0.8927999999999999, "number": 922 }, "ORG": { "precision": 0.9241741741741741, "recall": 0.9179716629381058, "f1": 0.9210624766180322, "number": 1341 }, "PER": { "precision": 0.9511041009463722, "recall": 0.9820846905537459, "f1": 0.9663461538461537, "number": 1842 } }, "eval_runtime": 7.5826, "eval_samples_per_second": 428.613, "eval_steps_per_second": 53.676, "epoch": 13.0, "step": 22828 }, { "loss": 0.0015, "grad_norm": 0.00018138765881303698, "learning_rate": 6.9020501138952166e-06, "epoch": 13.097949886104784, "step": 23000 }, { "loss": 0.0007, "grad_norm": 0.00034025911008939147, "learning_rate": 6.617312072892939e-06, "epoch": 13.382687927107062, "step": 23500 }, { "loss": 0.0006, "grad_norm": 0.00041584973223507404, "learning_rate": 6.3325740318906616e-06, "epoch": 13.66742596810934, "step": 24000 }, { "loss": 0.0008, "grad_norm": 0.0006968477973714471, "learning_rate": 6.047835990888384e-06, "epoch": 13.952164009111618, "step": 24500 }, { "eval_loss": 0.12853111326694489, "eval_overall": { "precision": 0.9430043203722167, "recall": 0.9550656344665096, "f1": 0.9489966555183946, "accuracy": 0.986342497203744 }, "eval_per_label": { "LOC": { "precision": 0.9736986301369863, "recall": 0.9673380511703865, "f1": 0.9705079191698526, "number": 1837 }, "MISC": { "precision": 0.8892438764643238, "recall": 0.9056399132321041, "f1": 0.8973670069854918, "number": 922 }, "ORG": { "precision": 0.9200293470286134, "recall": 0.9351230425055929, "f1": 0.9275147928994084, "number": 1341 }, "PER": { "precision": 0.9566367001586462, "recall": 0.9820846905537459, "f1": 0.9691936780069649, "number": 1842 } }, "eval_runtime": 7.5688, "eval_samples_per_second": 429.396, "eval_steps_per_second": 53.774, "epoch": 14.0, "step": 24584 }, { "loss": 0.0006, "grad_norm": 0.00020477738871704787, "learning_rate": 5.763097949886105e-06, "epoch": 14.236902050113896, "step": 25000 }, { "loss": 0.0007, "grad_norm": 0.00032146400189958513, "learning_rate": 5.478359908883827e-06, "epoch": 14.521640091116174, "step": 25500 }, { "loss": 0.0014, "grad_norm": 0.000293695367872715, "learning_rate": 5.19362186788155e-06, "epoch": 14.806378132118452, "step": 26000 }, { "eval_loss": 0.1253676861524582, "eval_overall": { "precision": 0.938937613767996, "recall": 0.9548973409626389, "f1": 0.9468502294534834, "accuracy": 0.9861364572908695 }, "eval_per_label": { "LOC": { "precision": 0.9607948442534908, "recall": 0.9738704409363091, "f1": 0.9672884563395511, "number": 1837 }, "MISC": { "precision": 0.8742203742203742, "recall": 0.9121475054229935, "f1": 0.8927813163481952, "number": 922 }, "ORG": { "precision": 0.9322289156626506, "recall": 0.9231916480238628, "f1": 0.9276882727613338, "number": 1341 }, "PER": { "precision": 0.9550502379693284, "recall": 0.9804560260586319, "f1": 0.9675863916421109, "number": 1842 } }, "eval_runtime": 8.6163, "eval_samples_per_second": 377.191, "eval_steps_per_second": 47.236, "epoch": 15.0, "step": 26340 }, { "loss": 0.0008, "grad_norm": 0.00019819244334939867, "learning_rate": 4.908883826879272e-06, "epoch": 15.09111617312073, "step": 26500 }, { "loss": 0.0008, "grad_norm": 0.00021079520229250193, "learning_rate": 4.624145785876993e-06, "epoch": 15.375854214123008, "step": 27000 }, { "loss": 0.0003, "grad_norm": 0.00012124140630476177, "learning_rate": 4.339407744874715e-06, "epoch": 15.660592255125284, "step": 27500 }, { "loss": 0.0008, "grad_norm": 0.000182148942258209, "learning_rate": 4.054669703872437e-06, "epoch": 15.945330296127562, "step": 28000 }, { "eval_loss": 0.12416187673807144, "eval_overall": { "precision": 0.9447862963578912, "recall": 0.9560753954897341, "f1": 0.9503973232956922, "accuracy": 0.9866957084829575 }, "eval_per_label": { "LOC": { "precision": 0.9680043383947939, "recall": 0.9716929776810016, "f1": 0.969845150774246, "number": 1837 }, "MISC": { "precision": 0.8954108858057631, "recall": 0.9099783080260304, "f1": 0.9026358257127487, "number": 922 }, "ORG": { "precision": 0.9301634472511144, "recall": 0.9336316181953765, "f1": 0.9318943059173799, "number": 1341 }, "PER": { "precision": 0.957051961823966, "recall": 0.9799131378935939, "f1": 0.9683476394849786, "number": 1842 } }, "eval_runtime": 8.6628, "eval_samples_per_second": 375.168, "eval_steps_per_second": 46.983, "epoch": 16.0, "step": 28096 }, { "loss": 0.001, "grad_norm": 0.019741835072636604, "learning_rate": 3.76993166287016e-06, "epoch": 16.23006833712984, "step": 28500 }, { "loss": 0.0003, "grad_norm": 0.00012148160021752119, "learning_rate": 3.4851936218678815e-06, "epoch": 16.514806378132118, "step": 29000 }, { "loss": 0.0005, "grad_norm": 0.00011341737263137475, "learning_rate": 3.200455580865604e-06, "epoch": 16.799544419134396, "step": 29500 }, { "eval_loss": 0.12274094671010971, "eval_overall": { "precision": 0.9438631456568676, "recall": 0.9564119824974756, "f1": 0.9500961297333445, "accuracy": 0.9868428798492965 }, "eval_per_label": { "LOC": { "precision": 0.971195652173913, "recall": 0.9727817093086554, "f1": 0.9719880337231438, "number": 1837 }, "MISC": { "precision": 0.8761609907120743, "recall": 0.920824295010846, "f1": 0.8979375991538868, "number": 922 }, "ORG": { "precision": 0.9307004470938898, "recall": 0.9313944817300522, "f1": 0.9310473350726798, "number": 1341 }, "PER": { "precision": 0.9614973262032086, "recall": 0.9761129207383279, "f1": 0.9687500000000001, "number": 1842 } }, "eval_runtime": 8.9038, "eval_samples_per_second": 365.012, "eval_steps_per_second": 45.711, "epoch": 17.0, "step": 29852 }, { "loss": 0.0007, "grad_norm": 0.00015434053784701973, "learning_rate": 2.9157175398633257e-06, "epoch": 17.084282460136674, "step": 30000 }, { "loss": 0.0, "grad_norm": 0.00021848917822353542, "learning_rate": 2.6309794988610482e-06, "epoch": 17.36902050113895, "step": 30500 }, { "loss": 0.0002, "grad_norm": 0.0001007779865176417, "learning_rate": 2.34624145785877e-06, "epoch": 17.65375854214123, "step": 31000 }, { "loss": 0.0002, "grad_norm": 0.00010259783448418602, "learning_rate": 2.061503416856492e-06, "epoch": 17.938496583143507, "step": 31500 }, { "eval_loss": 0.12293359637260437, "eval_overall": { "precision": 0.9440199335548173, "recall": 0.9564119824974756, "f1": 0.9501755559271025, "accuracy": 0.9870636368988049 }, "eval_per_label": { "LOC": { "precision": 0.9701249321021184, "recall": 0.9722373434948285, "f1": 0.9711799891245242, "number": 1837 }, "MISC": { "precision": 0.8810020876826722, "recall": 0.9154013015184381, "f1": 0.8978723404255319, "number": 922 }, "ORG": { "precision": 0.926829268292683, "recall": 0.9351230425055929, "f1": 0.9309576837416481, "number": 1341 }, "PER": { "precision": 0.9630620985010707, "recall": 0.9766558089033659, "f1": 0.969811320754717, "number": 1842 } }, "eval_runtime": 8.7313, "eval_samples_per_second": 372.226, "eval_steps_per_second": 46.614, "epoch": 18.0, "step": 31608 }, { "loss": 0.0001, "grad_norm": 0.0010326108895242214, "learning_rate": 1.7767653758542143e-06, "epoch": 18.223234624145785, "step": 32000 }, { "loss": 0.0007, "grad_norm": 0.00016551795124541968, "learning_rate": 1.4920273348519363e-06, "epoch": 18.507972665148063, "step": 32500 }, { "loss": 0.0004, "grad_norm": 0.00024008983746170998, "learning_rate": 1.2072892938496584e-06, "epoch": 18.79271070615034, "step": 33000 }, { "eval_loss": 0.12236841022968292, "eval_overall": { "precision": 0.944813829787234, "recall": 0.9565802760013463, "f1": 0.9506606455929084, "accuracy": 0.9872255254017779 }, "eval_per_label": { "LOC": { "precision": 0.971677559912854, "recall": 0.9711486118671747, "f1": 0.9714130138851075, "number": 1837 }, "MISC": { "precision": 0.8712667353244078, "recall": 0.9175704989154013, "f1": 0.8938193343898574, "number": 922 }, "ORG": { "precision": 0.9383458646616541, "recall": 0.930648769574944, "f1": 0.9344814676151254, "number": 1341 }, "PER": { "precision": 0.961149547631719, "recall": 0.9804560260586319, "f1": 0.9707067992475142, "number": 1842 } }, "eval_runtime": 6.5081, "eval_samples_per_second": 499.381, "eval_steps_per_second": 62.538, "epoch": 19.0, "step": 33364 }, { "loss": 0.0001, "grad_norm": 0.00016071839490905404, "learning_rate": 9.225512528473805e-07, "epoch": 19.07744874715262, "step": 33500 }, { "loss": 0.0001, "grad_norm": 9.665234392741695e-05, "learning_rate": 6.378132118451026e-07, "epoch": 19.362186788154897, "step": 34000 }, { "loss": 0.0001, "grad_norm": 0.004949676804244518, "learning_rate": 3.530751708428246e-07, "epoch": 19.646924829157175, "step": 34500 }, { "loss": 0.0005, "grad_norm": 0.00014002641546539962, "learning_rate": 6.83371298405467e-08, "epoch": 19.931662870159453, "step": 35000 }, { "eval_loss": 0.12258101999759674, "eval_overall": { "precision": 0.9443891102257637, "recall": 0.9574217435207001, "f1": 0.9508607721878657, "accuracy": 0.9872549596750456 }, "eval_per_label": { "LOC": { "precision": 0.9727371864776445, "recall": 0.9711486118671747, "f1": 0.9719422500681013, "number": 1837 }, "MISC": { "precision": 0.8746113989637305, "recall": 0.9154013015184381, "f1": 0.8945416004239534, "number": 922 }, "ORG": { "precision": 0.9336810730253353, "recall": 0.9343773303504848, "f1": 0.9340290719344019, "number": 1341 }, "PER": { "precision": 0.960169941582581, "recall": 0.9815418023887079, "f1": 0.970738255033557, "number": 1842 } }, "eval_runtime": 6.7405, "eval_samples_per_second": 482.16, "eval_steps_per_second": 60.381, "epoch": 20.0, "step": 35120 }, { "train_runtime": 3604.2086, "train_samples_per_second": 77.914, "train_steps_per_second": 9.744, "total_flos": 6145780901050062.0, "train_loss": 0.013757273165407275, "epoch": 20.0, "step": 35120 } ]