populism_english_bert_large_cased / trainer_state.json
AnonymousCS's picture
End of training
a1688a2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 1000,
"global_step": 111303,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02695345138945042,
"grad_norm": 1.7572588920593262,
"learning_rate": 4.955077581017583e-05,
"loss": 2.2431,
"step": 1000
},
{
"epoch": 0.05390690277890084,
"grad_norm": 1.843064308166504,
"learning_rate": 4.9101551620351654e-05,
"loss": 1.8256,
"step": 2000
},
{
"epoch": 0.08086035416835126,
"grad_norm": 1.7338684797286987,
"learning_rate": 4.865232743052748e-05,
"loss": 1.5467,
"step": 3000
},
{
"epoch": 0.10781380555780168,
"grad_norm": 1.794568657875061,
"learning_rate": 4.8203103240703305e-05,
"loss": 1.3891,
"step": 4000
},
{
"epoch": 0.13476725694725208,
"grad_norm": 1.6764180660247803,
"learning_rate": 4.7753879050879134e-05,
"loss": 1.3107,
"step": 5000
},
{
"epoch": 0.16172070833670252,
"grad_norm": 1.5753543376922607,
"learning_rate": 4.7304654861054956e-05,
"loss": 1.2522,
"step": 6000
},
{
"epoch": 0.18867415972615292,
"grad_norm": 1.5777322053909302,
"learning_rate": 4.6855430671230785e-05,
"loss": 1.2054,
"step": 7000
},
{
"epoch": 0.21562761111560336,
"grad_norm": 1.6358872652053833,
"learning_rate": 4.640620648140661e-05,
"loss": 1.1639,
"step": 8000
},
{
"epoch": 0.24258106250505376,
"grad_norm": 1.4646263122558594,
"learning_rate": 4.595698229158244e-05,
"loss": 1.1349,
"step": 9000
},
{
"epoch": 0.26953451389450417,
"grad_norm": 1.8237578868865967,
"learning_rate": 4.5507758101758266e-05,
"loss": 1.1017,
"step": 10000
},
{
"epoch": 0.2964879652839546,
"grad_norm": 4.1678619384765625,
"learning_rate": 4.5058533911934095e-05,
"loss": 1.0802,
"step": 11000
},
{
"epoch": 0.32344141667340504,
"grad_norm": 1.6083228588104248,
"learning_rate": 4.4609309722109924e-05,
"loss": 1.0584,
"step": 12000
},
{
"epoch": 0.35039486806285547,
"grad_norm": 1.5944287776947021,
"learning_rate": 4.4160085532285746e-05,
"loss": 1.0376,
"step": 13000
},
{
"epoch": 0.37734831945230585,
"grad_norm": 1.5678300857543945,
"learning_rate": 4.3710861342461575e-05,
"loss": 1.0186,
"step": 14000
},
{
"epoch": 0.4043017708417563,
"grad_norm": 1.5760828256607056,
"learning_rate": 4.32616371526374e-05,
"loss": 1.0006,
"step": 15000
},
{
"epoch": 0.4312552222312067,
"grad_norm": 1.5422290563583374,
"learning_rate": 4.281241296281323e-05,
"loss": 0.9868,
"step": 16000
},
{
"epoch": 0.45820867362065715,
"grad_norm": 1.8263678550720215,
"learning_rate": 4.236318877298905e-05,
"loss": 0.9737,
"step": 17000
},
{
"epoch": 0.4851621250101075,
"grad_norm": 1.4930555820465088,
"learning_rate": 4.191396458316488e-05,
"loss": 0.9595,
"step": 18000
},
{
"epoch": 0.512115576399558,
"grad_norm": 1.548412561416626,
"learning_rate": 4.14647403933407e-05,
"loss": 0.9448,
"step": 19000
},
{
"epoch": 0.5390690277890083,
"grad_norm": 1.478004813194275,
"learning_rate": 4.101551620351653e-05,
"loss": 0.9351,
"step": 20000
},
{
"epoch": 0.5660224791784588,
"grad_norm": 1.5493645668029785,
"learning_rate": 4.056629201369235e-05,
"loss": 0.9235,
"step": 21000
},
{
"epoch": 0.5929759305679092,
"grad_norm": 1.5261894464492798,
"learning_rate": 4.011706782386818e-05,
"loss": 0.9133,
"step": 22000
},
{
"epoch": 0.6199293819573597,
"grad_norm": 1.3742462396621704,
"learning_rate": 3.9667843634044e-05,
"loss": 0.9027,
"step": 23000
},
{
"epoch": 0.6468828333468101,
"grad_norm": 1.451717495918274,
"learning_rate": 3.921861944421983e-05,
"loss": 0.8937,
"step": 24000
},
{
"epoch": 0.6738362847362604,
"grad_norm": 1.5163180828094482,
"learning_rate": 3.876939525439566e-05,
"loss": 0.8837,
"step": 25000
},
{
"epoch": 0.7007897361257109,
"grad_norm": 1.312432885169983,
"learning_rate": 3.832017106457148e-05,
"loss": 0.8735,
"step": 26000
},
{
"epoch": 0.7277431875151613,
"grad_norm": 1.4663609266281128,
"learning_rate": 3.787094687474731e-05,
"loss": 0.8663,
"step": 27000
},
{
"epoch": 0.7546966389046117,
"grad_norm": 1.457512617111206,
"learning_rate": 3.742172268492314e-05,
"loss": 0.8587,
"step": 28000
},
{
"epoch": 0.7816500902940622,
"grad_norm": 1.4610168933868408,
"learning_rate": 3.697249849509897e-05,
"loss": 0.8514,
"step": 29000
},
{
"epoch": 0.8086035416835126,
"grad_norm": 1.6427557468414307,
"learning_rate": 3.652327430527479e-05,
"loss": 0.8457,
"step": 30000
},
{
"epoch": 0.8355569930729629,
"grad_norm": 1.3497254848480225,
"learning_rate": 3.607405011545062e-05,
"loss": 0.8372,
"step": 31000
},
{
"epoch": 0.8625104444624134,
"grad_norm": 1.597374677658081,
"learning_rate": 3.5624825925626444e-05,
"loss": 0.8308,
"step": 32000
},
{
"epoch": 0.8894638958518638,
"grad_norm": 1.5185362100601196,
"learning_rate": 3.517560173580227e-05,
"loss": 0.8262,
"step": 33000
},
{
"epoch": 0.9164173472413143,
"grad_norm": 1.452099323272705,
"learning_rate": 3.4726377545978095e-05,
"loss": 0.8188,
"step": 34000
},
{
"epoch": 0.9433707986307647,
"grad_norm": 1.4626882076263428,
"learning_rate": 3.4277153356153925e-05,
"loss": 0.8128,
"step": 35000
},
{
"epoch": 0.970324250020215,
"grad_norm": 1.329575538635254,
"learning_rate": 3.3827929166329754e-05,
"loss": 0.8057,
"step": 36000
},
{
"epoch": 0.9972777014096655,
"grad_norm": 1.3917378187179565,
"learning_rate": 3.3378704976505576e-05,
"loss": 0.8014,
"step": 37000
},
{
"epoch": 1.024231152799116,
"grad_norm": 1.4865970611572266,
"learning_rate": 3.2929480786681405e-05,
"loss": 0.7947,
"step": 38000
},
{
"epoch": 1.0511846041885664,
"grad_norm": 1.4632256031036377,
"learning_rate": 3.248025659685723e-05,
"loss": 0.7898,
"step": 39000
},
{
"epoch": 1.0781380555780167,
"grad_norm": 1.5164929628372192,
"learning_rate": 3.2031032407033056e-05,
"loss": 0.7843,
"step": 40000
},
{
"epoch": 1.1050915069674672,
"grad_norm": 1.4825857877731323,
"learning_rate": 3.158180821720888e-05,
"loss": 0.7796,
"step": 41000
},
{
"epoch": 1.1320449583569177,
"grad_norm": 1.4526509046554565,
"learning_rate": 3.113258402738471e-05,
"loss": 0.7771,
"step": 42000
},
{
"epoch": 1.158998409746368,
"grad_norm": 1.4606186151504517,
"learning_rate": 3.068335983756053e-05,
"loss": 0.7728,
"step": 43000
},
{
"epoch": 1.1859518611358184,
"grad_norm": 3.3901302814483643,
"learning_rate": 3.0234135647736362e-05,
"loss": 0.7664,
"step": 44000
},
{
"epoch": 1.212905312525269,
"grad_norm": 1.4377284049987793,
"learning_rate": 2.9784911457912185e-05,
"loss": 0.7656,
"step": 45000
},
{
"epoch": 1.2398587639147194,
"grad_norm": 1.432142734527588,
"learning_rate": 2.9335687268088014e-05,
"loss": 0.7592,
"step": 46000
},
{
"epoch": 1.2668122153041697,
"grad_norm": 1.3727272748947144,
"learning_rate": 2.8886463078263843e-05,
"loss": 0.7563,
"step": 47000
},
{
"epoch": 1.2937656666936201,
"grad_norm": 1.5601295232772827,
"learning_rate": 2.8437238888439665e-05,
"loss": 0.7509,
"step": 48000
},
{
"epoch": 1.3207191180830704,
"grad_norm": 1.4392520189285278,
"learning_rate": 2.7988014698615494e-05,
"loss": 0.7461,
"step": 49000
},
{
"epoch": 1.347672569472521,
"grad_norm": 1.365379810333252,
"learning_rate": 2.7538790508791316e-05,
"loss": 0.7416,
"step": 50000
},
{
"epoch": 1.3746260208619714,
"grad_norm": 1.352710247039795,
"learning_rate": 2.7089566318967145e-05,
"loss": 0.7403,
"step": 51000
},
{
"epoch": 1.4015794722514219,
"grad_norm": 1.3562721014022827,
"learning_rate": 2.664034212914297e-05,
"loss": 0.7359,
"step": 52000
},
{
"epoch": 1.4285329236408721,
"grad_norm": 1.627213954925537,
"learning_rate": 2.61911179393188e-05,
"loss": 0.7329,
"step": 53000
},
{
"epoch": 1.4554863750303226,
"grad_norm": 1.4729622602462769,
"learning_rate": 2.5741893749494622e-05,
"loss": 0.7302,
"step": 54000
},
{
"epoch": 1.4824398264197731,
"grad_norm": 1.4408637285232544,
"learning_rate": 2.529266955967045e-05,
"loss": 0.7283,
"step": 55000
},
{
"epoch": 1.5093932778092234,
"grad_norm": 1.504920482635498,
"learning_rate": 2.4843445369846277e-05,
"loss": 0.7233,
"step": 56000
},
{
"epoch": 1.5363467291986739,
"grad_norm": 1.4795109033584595,
"learning_rate": 2.4394221180022103e-05,
"loss": 0.7211,
"step": 57000
},
{
"epoch": 1.5633001805881244,
"grad_norm": 1.4444007873535156,
"learning_rate": 2.394499699019793e-05,
"loss": 0.7162,
"step": 58000
},
{
"epoch": 1.5902536319775749,
"grad_norm": 1.4556983709335327,
"learning_rate": 2.3495772800373754e-05,
"loss": 0.7156,
"step": 59000
},
{
"epoch": 1.6172070833670251,
"grad_norm": 1.5000152587890625,
"learning_rate": 2.304654861054958e-05,
"loss": 0.7118,
"step": 60000
},
{
"epoch": 1.6441605347564756,
"grad_norm": 1.385746717453003,
"learning_rate": 2.259732442072541e-05,
"loss": 0.7091,
"step": 61000
},
{
"epoch": 1.6711139861459259,
"grad_norm": 1.5274490118026733,
"learning_rate": 2.2148100230901235e-05,
"loss": 0.7073,
"step": 62000
},
{
"epoch": 1.6980674375353764,
"grad_norm": 1.411332607269287,
"learning_rate": 2.169887604107706e-05,
"loss": 0.703,
"step": 63000
},
{
"epoch": 1.7250208889248269,
"grad_norm": 1.6316828727722168,
"learning_rate": 2.124965185125289e-05,
"loss": 0.702,
"step": 64000
},
{
"epoch": 1.7519743403142773,
"grad_norm": 1.4710853099822998,
"learning_rate": 2.0800427661428715e-05,
"loss": 0.6995,
"step": 65000
},
{
"epoch": 1.7789277917037276,
"grad_norm": 1.984465479850769,
"learning_rate": 2.035120347160454e-05,
"loss": 0.6976,
"step": 66000
},
{
"epoch": 1.805881243093178,
"grad_norm": 1.5006201267242432,
"learning_rate": 1.9901979281780366e-05,
"loss": 0.6917,
"step": 67000
},
{
"epoch": 1.8328346944826284,
"grad_norm": 1.4539721012115479,
"learning_rate": 1.9452755091956192e-05,
"loss": 0.6906,
"step": 68000
},
{
"epoch": 1.8597881458720789,
"grad_norm": 1.4637404680252075,
"learning_rate": 1.9003530902132018e-05,
"loss": 0.6866,
"step": 69000
},
{
"epoch": 1.8867415972615293,
"grad_norm": 1.4094914197921753,
"learning_rate": 1.8554306712307847e-05,
"loss": 0.6856,
"step": 70000
},
{
"epoch": 1.9136950486509798,
"grad_norm": 1.828584909439087,
"learning_rate": 1.8105082522483672e-05,
"loss": 0.6838,
"step": 71000
},
{
"epoch": 1.9406485000404303,
"grad_norm": 1.3690617084503174,
"learning_rate": 1.7655858332659498e-05,
"loss": 0.6815,
"step": 72000
},
{
"epoch": 1.9676019514298806,
"grad_norm": 1.438297986984253,
"learning_rate": 1.7206634142835324e-05,
"loss": 0.678,
"step": 73000
},
{
"epoch": 1.9945554028193309,
"grad_norm": 1.482967495918274,
"learning_rate": 1.675740995301115e-05,
"loss": 0.6761,
"step": 74000
},
{
"epoch": 2.0215088542087813,
"grad_norm": 1.5024503469467163,
"learning_rate": 1.6308185763186975e-05,
"loss": 0.6719,
"step": 75000
},
{
"epoch": 2.048462305598232,
"grad_norm": 1.444847583770752,
"learning_rate": 1.5858961573362804e-05,
"loss": 0.6715,
"step": 76000
},
{
"epoch": 2.0754157569876823,
"grad_norm": 1.4710804224014282,
"learning_rate": 1.540973738353863e-05,
"loss": 0.6697,
"step": 77000
},
{
"epoch": 2.102369208377133,
"grad_norm": 1.4638535976409912,
"learning_rate": 1.4960513193714457e-05,
"loss": 0.6676,
"step": 78000
},
{
"epoch": 2.1293226597665833,
"grad_norm": 1.4491604566574097,
"learning_rate": 1.4511289003890283e-05,
"loss": 0.666,
"step": 79000
},
{
"epoch": 2.1562761111560333,
"grad_norm": 1.5296227931976318,
"learning_rate": 1.4062064814066108e-05,
"loss": 0.6638,
"step": 80000
},
{
"epoch": 2.183229562545484,
"grad_norm": 1.4576656818389893,
"learning_rate": 1.3612840624241934e-05,
"loss": 0.6603,
"step": 81000
},
{
"epoch": 2.2101830139349343,
"grad_norm": 1.3864960670471191,
"learning_rate": 1.3163616434417761e-05,
"loss": 0.6593,
"step": 82000
},
{
"epoch": 2.237136465324385,
"grad_norm": 1.7719892263412476,
"learning_rate": 1.2714392244593587e-05,
"loss": 0.6602,
"step": 83000
},
{
"epoch": 2.2640899167138353,
"grad_norm": 1.3673619031906128,
"learning_rate": 1.2265168054769415e-05,
"loss": 0.656,
"step": 84000
},
{
"epoch": 2.291043368103286,
"grad_norm": 1.4584944248199463,
"learning_rate": 1.181594386494524e-05,
"loss": 0.6536,
"step": 85000
},
{
"epoch": 2.317996819492736,
"grad_norm": 1.3770402669906616,
"learning_rate": 1.1366719675121066e-05,
"loss": 0.6519,
"step": 86000
},
{
"epoch": 2.3449502708821863,
"grad_norm": 1.3435842990875244,
"learning_rate": 1.0917495485296893e-05,
"loss": 0.6498,
"step": 87000
},
{
"epoch": 2.371903722271637,
"grad_norm": 1.4926843643188477,
"learning_rate": 1.0468271295472719e-05,
"loss": 0.6497,
"step": 88000
},
{
"epoch": 2.3988571736610873,
"grad_norm": 1.4428088665008545,
"learning_rate": 1.0019047105648545e-05,
"loss": 0.648,
"step": 89000
},
{
"epoch": 2.425810625050538,
"grad_norm": 1.3555138111114502,
"learning_rate": 9.569822915824372e-06,
"loss": 0.6463,
"step": 90000
},
{
"epoch": 2.4527640764399883,
"grad_norm": 1.8231887817382812,
"learning_rate": 9.1205987260002e-06,
"loss": 0.6441,
"step": 91000
},
{
"epoch": 2.4797175278294388,
"grad_norm": 1.4294542074203491,
"learning_rate": 8.671374536176025e-06,
"loss": 0.6443,
"step": 92000
},
{
"epoch": 2.506670979218889,
"grad_norm": 1.4158700704574585,
"learning_rate": 8.22215034635185e-06,
"loss": 0.6408,
"step": 93000
},
{
"epoch": 2.5336244306083393,
"grad_norm": 1.3847190141677856,
"learning_rate": 7.772926156527676e-06,
"loss": 0.6407,
"step": 94000
},
{
"epoch": 2.56057788199779,
"grad_norm": 1.4408886432647705,
"learning_rate": 7.323701966703503e-06,
"loss": 0.6408,
"step": 95000
},
{
"epoch": 2.5875313333872403,
"grad_norm": 1.5943797826766968,
"learning_rate": 6.87447777687933e-06,
"loss": 0.6379,
"step": 96000
},
{
"epoch": 2.6144847847766908,
"grad_norm": 1.4184848070144653,
"learning_rate": 6.425253587055157e-06,
"loss": 0.6367,
"step": 97000
},
{
"epoch": 2.641438236166141,
"grad_norm": 1.4443740844726562,
"learning_rate": 5.976029397230982e-06,
"loss": 0.635,
"step": 98000
},
{
"epoch": 2.6683916875555918,
"grad_norm": 1.5342421531677246,
"learning_rate": 5.526805207406809e-06,
"loss": 0.6349,
"step": 99000
},
{
"epoch": 2.695345138945042,
"grad_norm": 1.4228054285049438,
"learning_rate": 5.077581017582635e-06,
"loss": 0.6349,
"step": 100000
},
{
"epoch": 2.7222985903344923,
"grad_norm": 1.5616410970687866,
"learning_rate": 4.628356827758461e-06,
"loss": 0.6345,
"step": 101000
},
{
"epoch": 2.7492520417239428,
"grad_norm": 1.4892140626907349,
"learning_rate": 4.1791326379342876e-06,
"loss": 0.6322,
"step": 102000
},
{
"epoch": 2.7762054931133933,
"grad_norm": 1.48268723487854,
"learning_rate": 3.7299084481101145e-06,
"loss": 0.6318,
"step": 103000
},
{
"epoch": 2.8031589445028438,
"grad_norm": 1.6489795446395874,
"learning_rate": 3.2806842582859406e-06,
"loss": 0.6295,
"step": 104000
},
{
"epoch": 2.830112395892294,
"grad_norm": 1.458978295326233,
"learning_rate": 2.8314600684617667e-06,
"loss": 0.6291,
"step": 105000
},
{
"epoch": 2.8570658472817443,
"grad_norm": 1.338670015335083,
"learning_rate": 2.3822358786375928e-06,
"loss": 0.6265,
"step": 106000
},
{
"epoch": 2.8840192986711948,
"grad_norm": 1.3654705286026,
"learning_rate": 1.9330116888134193e-06,
"loss": 0.6259,
"step": 107000
},
{
"epoch": 2.9109727500606453,
"grad_norm": 1.3295788764953613,
"learning_rate": 1.4837874989892456e-06,
"loss": 0.6274,
"step": 108000
},
{
"epoch": 2.9379262014500958,
"grad_norm": 1.3103362321853638,
"learning_rate": 1.0345633091650721e-06,
"loss": 0.6261,
"step": 109000
},
{
"epoch": 2.9648796528395462,
"grad_norm": 1.3937286138534546,
"learning_rate": 5.853391193408983e-07,
"loss": 0.6253,
"step": 110000
},
{
"epoch": 2.9918331042289967,
"grad_norm": 1.3265410661697388,
"learning_rate": 1.3611492951672462e-07,
"loss": 0.6257,
"step": 111000
},
{
"epoch": 3.0,
"step": 111303,
"total_flos": 2.6556045146219667e+19,
"train_loss": 0.8057731239829722,
"train_runtime": 323766.3615,
"train_samples_per_second": 88.005,
"train_steps_per_second": 0.344
}
],
"logging_steps": 1000,
"max_steps": 111303,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.6556045146219667e+19,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}