esm2_t12_35M_ptm_lora_2100K / trainer_state.json
AmelieSchreiber's picture
Upload 11 files
d1bcf9c
raw
history blame
109 kB
{
"best_metric": 0.24899413187145658,
"best_model_checkpoint": "esm2_t12_35M_lora_ptm_sites_2023-10-10_00-58-43/checkpoint-176106",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 176106,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0003701556393528675,
"loss": 0.4195,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 0.0003701521172172271,
"loss": 0.1652,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 0.00037014627443809477,
"loss": 0.1381,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 0.0003701380523685105,
"loss": 0.0871,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 0.000370127474584151,
"loss": 0.0919,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 0.000370114541219666,
"loss": 0.0678,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 0.0003700992524396906,
"loss": 0.0678,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 0.00037008160843884317,
"loss": 0.0565,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 0.0003700616094417228,
"loss": 0.0712,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 0.00037003925570290656,
"loss": 0.0604,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 0.00037001454750694614,
"loss": 0.0632,
"step": 2200
},
{
"epoch": 0.01,
"learning_rate": 0.0003699874851683642,
"loss": 0.0697,
"step": 2400
},
{
"epoch": 0.01,
"learning_rate": 0.00036995806903165044,
"loss": 0.046,
"step": 2600
},
{
"epoch": 0.02,
"learning_rate": 0.0003699264641725442,
"loss": 0.0479,
"step": 2800
},
{
"epoch": 0.02,
"learning_rate": 0.00036989235335692274,
"loss": 0.042,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 0.0003698558899541489,
"loss": 0.0533,
"step": 3200
},
{
"epoch": 0.02,
"learning_rate": 0.00036981707442838315,
"loss": 0.0508,
"step": 3400
},
{
"epoch": 0.02,
"learning_rate": 0.0003697759072737272,
"loss": 0.0492,
"step": 3600
},
{
"epoch": 0.02,
"learning_rate": 0.00036973238901421803,
"loss": 0.0496,
"step": 3800
},
{
"epoch": 0.02,
"learning_rate": 0.0003696865202038208,
"loss": 0.0729,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 0.00036963830142642195,
"loss": 0.0556,
"step": 4200
},
{
"epoch": 0.02,
"learning_rate": 0.0003695877332958219,
"loss": 0.0393,
"step": 4400
},
{
"epoch": 0.03,
"learning_rate": 0.0003695348164557272,
"loss": 0.042,
"step": 4600
},
{
"epoch": 0.03,
"learning_rate": 0.00036947955157974214,
"loss": 0.0388,
"step": 4800
},
{
"epoch": 0.03,
"learning_rate": 0.0003694222332701908,
"loss": 0.0632,
"step": 5000
},
{
"epoch": 0.03,
"learning_rate": 0.00036936228619390763,
"loss": 0.0372,
"step": 5200
},
{
"epoch": 0.03,
"learning_rate": 0.0003692999932779562,
"loss": 0.0309,
"step": 5400
},
{
"epoch": 0.03,
"learning_rate": 0.00036923535531529345,
"loss": 0.0336,
"step": 5600
},
{
"epoch": 0.03,
"learning_rate": 0.0003691683731287276,
"loss": 0.0553,
"step": 5800
},
{
"epoch": 0.03,
"learning_rate": 0.0003690990475709077,
"loss": 0.0466,
"step": 6000
},
{
"epoch": 0.04,
"learning_rate": 0.0003690277436899897,
"loss": 0.0248,
"step": 6200
},
{
"epoch": 0.04,
"learning_rate": 0.00036895374577248087,
"loss": 0.0422,
"step": 6400
},
{
"epoch": 0.04,
"learning_rate": 0.00036887740721581455,
"loss": 0.0472,
"step": 6600
},
{
"epoch": 0.04,
"learning_rate": 0.0003687987289917415,
"loss": 0.041,
"step": 6800
},
{
"epoch": 0.04,
"learning_rate": 0.00036871771210179546,
"loss": 0.0321,
"step": 7000
},
{
"epoch": 0.04,
"learning_rate": 0.00036863435757727986,
"loss": 0.036,
"step": 7200
},
{
"epoch": 0.04,
"learning_rate": 0.00036854866647925537,
"loss": 0.0355,
"step": 7400
},
{
"epoch": 0.04,
"learning_rate": 0.0003684606398985257,
"loss": 0.0423,
"step": 7600
},
{
"epoch": 0.04,
"learning_rate": 0.00036837027895562436,
"loss": 0.0391,
"step": 7800
},
{
"epoch": 0.05,
"learning_rate": 0.0003682775848008,
"loss": 0.0473,
"step": 8000
},
{
"epoch": 0.05,
"learning_rate": 0.00036818255861400173,
"loss": 0.0326,
"step": 8200
},
{
"epoch": 0.05,
"learning_rate": 0.00036808520160486446,
"loss": 0.0295,
"step": 8400
},
{
"epoch": 0.05,
"learning_rate": 0.0003679855150126932,
"loss": 0.0365,
"step": 8600
},
{
"epoch": 0.05,
"learning_rate": 0.00036788350010644714,
"loss": 0.0244,
"step": 8800
},
{
"epoch": 0.05,
"learning_rate": 0.0003677791581847241,
"loss": 0.0484,
"step": 9000
},
{
"epoch": 0.05,
"learning_rate": 0.00036767249057574337,
"loss": 0.0285,
"step": 9200
},
{
"epoch": 0.05,
"learning_rate": 0.0003675634986373291,
"loss": 0.0393,
"step": 9400
},
{
"epoch": 0.05,
"learning_rate": 0.0003674521837568929,
"loss": 0.0204,
"step": 9600
},
{
"epoch": 0.06,
"learning_rate": 0.0003673385473514164,
"loss": 0.0448,
"step": 9800
},
{
"epoch": 0.06,
"learning_rate": 0.00036722259086743295,
"loss": 0.0451,
"step": 10000
},
{
"epoch": 0.06,
"learning_rate": 0.00036710431578100935,
"loss": 0.0295,
"step": 10200
},
{
"epoch": 0.06,
"learning_rate": 0.00036698372359772696,
"loss": 0.0361,
"step": 10400
},
{
"epoch": 0.06,
"learning_rate": 0.00036686081585266277,
"loss": 0.0308,
"step": 10600
},
{
"epoch": 0.06,
"learning_rate": 0.0003667355941103695,
"loss": 0.0293,
"step": 10800
},
{
"epoch": 0.06,
"learning_rate": 0.0003666087033850132,
"loss": 0.0319,
"step": 11000
},
{
"epoch": 0.06,
"learning_rate": 0.0003664788700095357,
"loss": 0.0267,
"step": 11200
},
{
"epoch": 0.06,
"learning_rate": 0.0003663467274988045,
"loss": 0.0195,
"step": 11400
},
{
"epoch": 0.07,
"learning_rate": 0.00036621227753492634,
"loss": 0.0425,
"step": 11600
},
{
"epoch": 0.07,
"learning_rate": 0.00036607552182938043,
"loss": 0.0211,
"step": 11800
},
{
"epoch": 0.07,
"learning_rate": 0.0003659364621229971,
"loss": 0.0352,
"step": 12000
},
{
"epoch": 0.07,
"learning_rate": 0.0003657951001859353,
"loss": 0.0266,
"step": 12200
},
{
"epoch": 0.07,
"learning_rate": 0.00036565216184881143,
"loss": 0.0332,
"step": 12400
},
{
"epoch": 0.07,
"learning_rate": 0.0003655062123664866,
"loss": 0.0346,
"step": 12600
},
{
"epoch": 0.07,
"learning_rate": 0.00036535796613034296,
"loss": 0.042,
"step": 12800
},
{
"epoch": 0.07,
"learning_rate": 0.00036520742502747924,
"loss": 0.0372,
"step": 13000
},
{
"epoch": 0.07,
"learning_rate": 0.0003650545909742067,
"loss": 0.0246,
"step": 13200
},
{
"epoch": 0.08,
"learning_rate": 0.0003648994659160249,
"loss": 0.0283,
"step": 13400
},
{
"epoch": 0.08,
"learning_rate": 0.00036474205182759645,
"loss": 0.0299,
"step": 13600
},
{
"epoch": 0.08,
"learning_rate": 0.0003645823507127223,
"loss": 0.0292,
"step": 13800
},
{
"epoch": 0.08,
"learning_rate": 0.000364420364604316,
"loss": 0.0354,
"step": 14000
},
{
"epoch": 0.08,
"learning_rate": 0.000364256095564378,
"loss": 0.0215,
"step": 14200
},
{
"epoch": 0.08,
"learning_rate": 0.00036408954568396915,
"loss": 0.0288,
"step": 14400
},
{
"epoch": 0.08,
"learning_rate": 0.0003639207170831844,
"loss": 0.0214,
"step": 14600
},
{
"epoch": 0.08,
"learning_rate": 0.0003637496119111255,
"loss": 0.0336,
"step": 14800
},
{
"epoch": 0.09,
"learning_rate": 0.0003635762323458739,
"loss": 0.0193,
"step": 15000
},
{
"epoch": 0.09,
"learning_rate": 0.0003634005805944629,
"loss": 0.0507,
"step": 15200
},
{
"epoch": 0.09,
"learning_rate": 0.0003632226588928494,
"loss": 0.032,
"step": 15400
},
{
"epoch": 0.09,
"learning_rate": 0.000363042469505886,
"loss": 0.0303,
"step": 15600
},
{
"epoch": 0.09,
"learning_rate": 0.0003628600147272912,
"loss": 0.0271,
"step": 15800
},
{
"epoch": 0.09,
"learning_rate": 0.00036267529687962144,
"loss": 0.0275,
"step": 16000
},
{
"epoch": 0.09,
"learning_rate": 0.00036248831831424026,
"loss": 0.0352,
"step": 16200
},
{
"epoch": 0.09,
"learning_rate": 0.0003622990814112894,
"loss": 0.0302,
"step": 16400
},
{
"epoch": 0.09,
"learning_rate": 0.00036210758857965785,
"loss": 0.0175,
"step": 16600
},
{
"epoch": 0.1,
"learning_rate": 0.00036191481659005633,
"loss": 0.0525,
"step": 16800
},
{
"epoch": 0.1,
"learning_rate": 0.0003617188304915086,
"loss": 0.0259,
"step": 17000
},
{
"epoch": 0.1,
"learning_rate": 0.0003615205958505779,
"loss": 0.0265,
"step": 17200
},
{
"epoch": 0.1,
"learning_rate": 0.00036132011519068993,
"loss": 0.0304,
"step": 17400
},
{
"epoch": 0.1,
"learning_rate": 0.00036111739106386103,
"loss": 0.0351,
"step": 17600
},
{
"epoch": 0.1,
"learning_rate": 0.0003609124260506658,
"loss": 0.0564,
"step": 17800
},
{
"epoch": 0.1,
"learning_rate": 0.0003607052227602041,
"loss": 0.0235,
"step": 18000
},
{
"epoch": 0.1,
"learning_rate": 0.0003604957838300679,
"loss": 0.0427,
"step": 18200
},
{
"epoch": 0.1,
"learning_rate": 0.00036028411192630784,
"loss": 0.0259,
"step": 18400
},
{
"epoch": 0.11,
"learning_rate": 0.00036007020974339896,
"loss": 0.0226,
"step": 18600
},
{
"epoch": 0.11,
"learning_rate": 0.00035985408000420693,
"loss": 0.0361,
"step": 18800
},
{
"epoch": 0.11,
"learning_rate": 0.00035963682276229046,
"loss": 0.0396,
"step": 19000
},
{
"epoch": 0.11,
"learning_rate": 0.0003594162572956826,
"loss": 0.038,
"step": 19200
},
{
"epoch": 0.11,
"learning_rate": 0.000359193472597272,
"loss": 0.0263,
"step": 19400
},
{
"epoch": 0.11,
"learning_rate": 0.00035896847150299397,
"loss": 0.0275,
"step": 19600
},
{
"epoch": 0.11,
"learning_rate": 0.0003587412568769976,
"loss": 0.0368,
"step": 19800
},
{
"epoch": 0.11,
"learning_rate": 0.0003585118316116088,
"loss": 0.0267,
"step": 20000
},
{
"epoch": 0.11,
"learning_rate": 0.000358280198627294,
"loss": 0.0252,
"step": 20200
},
{
"epoch": 0.12,
"learning_rate": 0.0003580463608726229,
"loss": 0.0396,
"step": 20400
},
{
"epoch": 0.12,
"learning_rate": 0.0003578103213242304,
"loss": 0.0297,
"step": 20600
},
{
"epoch": 0.12,
"learning_rate": 0.0003575720829867795,
"loss": 0.0214,
"step": 20800
},
{
"epoch": 0.12,
"learning_rate": 0.0003573316488929225,
"loss": 0.0252,
"step": 21000
},
{
"epoch": 0.12,
"learning_rate": 0.00035708902210326236,
"loss": 0.025,
"step": 21200
},
{
"epoch": 0.12,
"learning_rate": 0.0003568442057063144,
"loss": 0.0262,
"step": 21400
},
{
"epoch": 0.12,
"learning_rate": 0.000356597202818466,
"loss": 0.022,
"step": 21600
},
{
"epoch": 0.12,
"learning_rate": 0.0003563492679409498,
"loss": 0.0458,
"step": 21800
},
{
"epoch": 0.12,
"learning_rate": 0.0003560979124246924,
"loss": 0.0289,
"step": 22000
},
{
"epoch": 0.13,
"learning_rate": 0.00035584437991746675,
"loss": 0.0247,
"step": 22200
},
{
"epoch": 0.13,
"learning_rate": 0.0003555886736466121,
"loss": 0.0198,
"step": 22400
},
{
"epoch": 0.13,
"learning_rate": 0.0003553307968671388,
"loss": 0.0259,
"step": 22600
},
{
"epoch": 0.13,
"learning_rate": 0.0003550707528616864,
"loss": 0.0134,
"step": 22800
},
{
"epoch": 0.13,
"learning_rate": 0.0003548085449404821,
"loss": 0.0344,
"step": 23000
},
{
"epoch": 0.13,
"learning_rate": 0.0003545441764412988,
"loss": 0.0308,
"step": 23200
},
{
"epoch": 0.13,
"learning_rate": 0.0003542776507294125,
"loss": 0.0295,
"step": 23400
},
{
"epoch": 0.13,
"learning_rate": 0.0003540089711975591,
"loss": 0.0237,
"step": 23600
},
{
"epoch": 0.14,
"learning_rate": 0.00035373814126589203,
"loss": 0.0229,
"step": 23800
},
{
"epoch": 0.14,
"learning_rate": 0.0003534651643819378,
"loss": 0.0203,
"step": 24000
},
{
"epoch": 0.14,
"learning_rate": 0.00035319004402055264,
"loss": 0.0297,
"step": 24200
},
{
"epoch": 0.14,
"learning_rate": 0.000352914175302923,
"loss": 0.0285,
"step": 24400
},
{
"epoch": 0.14,
"learning_rate": 0.00035263478919374686,
"loss": 0.0224,
"step": 24600
},
{
"epoch": 0.14,
"learning_rate": 0.00035235327017739155,
"loss": 0.0333,
"step": 24800
},
{
"epoch": 0.14,
"learning_rate": 0.0003520696218374504,
"loss": 0.0564,
"step": 25000
},
{
"epoch": 0.14,
"learning_rate": 0.00035178384778462185,
"loss": 0.0341,
"step": 25200
},
{
"epoch": 0.14,
"learning_rate": 0.0003514959516566635,
"loss": 0.0278,
"step": 25400
},
{
"epoch": 0.15,
"learning_rate": 0.0003512059371183459,
"loss": 0.0358,
"step": 25600
},
{
"epoch": 0.15,
"learning_rate": 0.0003509138078614061,
"loss": 0.0298,
"step": 25800
},
{
"epoch": 0.15,
"learning_rate": 0.00035061956760450006,
"loss": 0.0314,
"step": 26000
},
{
"epoch": 0.15,
"learning_rate": 0.0003503232200931561,
"loss": 0.0264,
"step": 26200
},
{
"epoch": 0.15,
"learning_rate": 0.00035002476909972645,
"loss": 0.0259,
"step": 26400
},
{
"epoch": 0.15,
"learning_rate": 0.00034972421842333984,
"loss": 0.0274,
"step": 26600
},
{
"epoch": 0.15,
"learning_rate": 0.00034942309032960303,
"loss": 0.0193,
"step": 26800
},
{
"epoch": 0.15,
"learning_rate": 0.0003491183622419467,
"loss": 0.0223,
"step": 27000
},
{
"epoch": 0.15,
"learning_rate": 0.0003488115460094296,
"loss": 0.0213,
"step": 27200
},
{
"epoch": 0.16,
"learning_rate": 0.0003485026455376655,
"loss": 0.0268,
"step": 27400
},
{
"epoch": 0.16,
"learning_rate": 0.0003481916647587996,
"loss": 0.0232,
"step": 27600
},
{
"epoch": 0.16,
"learning_rate": 0.00034788017807543066,
"loss": 0.0325,
"step": 27800
},
{
"epoch": 0.16,
"learning_rate": 0.00034756505893653175,
"loss": 0.0242,
"step": 28000
},
{
"epoch": 0.16,
"learning_rate": 0.0003472478714255293,
"loss": 0.0221,
"step": 28200
},
{
"epoch": 0.16,
"learning_rate": 0.0003469286195800583,
"loss": 0.0276,
"step": 28400
},
{
"epoch": 0.16,
"learning_rate": 0.0003466073074640316,
"loss": 0.0225,
"step": 28600
},
{
"epoch": 0.16,
"learning_rate": 0.0003462839391675882,
"loss": 0.0219,
"step": 28800
},
{
"epoch": 0.16,
"learning_rate": 0.0003459585188070413,
"loss": 0.0355,
"step": 29000
},
{
"epoch": 0.17,
"learning_rate": 0.00034563105052482586,
"loss": 0.0211,
"step": 29200
},
{
"epoch": 0.17,
"learning_rate": 0.0003453015384894458,
"loss": 0.0353,
"step": 29400
},
{
"epoch": 0.17,
"learning_rate": 0.0003449699868954208,
"loss": 0.0253,
"step": 29600
},
{
"epoch": 0.17,
"learning_rate": 0.0003446363999632333,
"loss": 0.0406,
"step": 29800
},
{
"epoch": 0.17,
"learning_rate": 0.0003443007819392746,
"loss": 0.0201,
"step": 30000
},
{
"epoch": 0.17,
"learning_rate": 0.0003439631370957905,
"loss": 0.0261,
"step": 30200
},
{
"epoch": 0.17,
"learning_rate": 0.0003436234697308274,
"loss": 0.0426,
"step": 30400
},
{
"epoch": 0.17,
"learning_rate": 0.0003432817841681772,
"loss": 0.0184,
"step": 30600
},
{
"epoch": 0.17,
"learning_rate": 0.0003429380847573226,
"loss": 0.0201,
"step": 30800
},
{
"epoch": 0.18,
"learning_rate": 0.00034259237587338153,
"loss": 0.0331,
"step": 31000
},
{
"epoch": 0.18,
"learning_rate": 0.00034224466191705135,
"loss": 0.0208,
"step": 31200
},
{
"epoch": 0.18,
"learning_rate": 0.000341894947314553,
"loss": 0.0297,
"step": 31400
},
{
"epoch": 0.18,
"learning_rate": 0.0003415432365175747,
"loss": 0.0248,
"step": 31600
},
{
"epoch": 0.18,
"learning_rate": 0.0003411895340032153,
"loss": 0.0207,
"step": 31800
},
{
"epoch": 0.18,
"learning_rate": 0.00034083562765829117,
"loss": 0.0334,
"step": 32000
},
{
"epoch": 0.18,
"learning_rate": 0.0003404779651439537,
"loss": 0.0153,
"step": 32200
},
{
"epoch": 0.18,
"learning_rate": 0.0003401183244725952,
"loss": 0.0315,
"step": 32400
},
{
"epoch": 0.19,
"learning_rate": 0.0003397567102222577,
"loss": 0.0234,
"step": 32600
},
{
"epoch": 0.19,
"learning_rate": 0.00033939312699610597,
"loss": 0.026,
"step": 32800
},
{
"epoch": 0.19,
"learning_rate": 0.00033902757942236837,
"loss": 0.018,
"step": 33000
},
{
"epoch": 0.19,
"learning_rate": 0.00033866007215427904,
"loss": 0.0318,
"step": 33200
},
{
"epoch": 0.19,
"learning_rate": 0.00033829060987001754,
"loss": 0.0249,
"step": 33400
},
{
"epoch": 0.19,
"learning_rate": 0.00033791919727265,
"loss": 0.0266,
"step": 33600
},
{
"epoch": 0.19,
"learning_rate": 0.00033754583909006893,
"loss": 0.0259,
"step": 33800
},
{
"epoch": 0.19,
"learning_rate": 0.0003371705400749333,
"loss": 0.03,
"step": 34000
},
{
"epoch": 0.19,
"learning_rate": 0.0003367951959879635,
"loss": 0.0283,
"step": 34200
},
{
"epoch": 0.2,
"learning_rate": 0.000336416039308738,
"loss": 0.0269,
"step": 34400
},
{
"epoch": 0.2,
"learning_rate": 0.00033603495617873176,
"loss": 0.034,
"step": 34600
},
{
"epoch": 0.2,
"learning_rate": 0.00033565195144893837,
"loss": 0.0319,
"step": 34800
},
{
"epoch": 0.2,
"learning_rate": 0.0003352670299948123,
"loss": 0.018,
"step": 35000
},
{
"epoch": 0.2,
"learning_rate": 0.00033488019671620693,
"loss": 0.0419,
"step": 35200
},
{
"epoch": 0.2,
"learning_rate": 0.0003344914565373123,
"loss": 0.0371,
"step": 35400
},
{
"epoch": 0.2,
"learning_rate": 0.0003341008144065922,
"loss": 0.0253,
"step": 35600
},
{
"epoch": 0.2,
"learning_rate": 0.0003337082752967214,
"loss": 0.0254,
"step": 35800
},
{
"epoch": 0.2,
"learning_rate": 0.0003333138442045221,
"loss": 0.0287,
"step": 36000
},
{
"epoch": 0.21,
"learning_rate": 0.0003329175261509006,
"loss": 0.0203,
"step": 36200
},
{
"epoch": 0.21,
"learning_rate": 0.00033251932618078315,
"loss": 0.0234,
"step": 36400
},
{
"epoch": 0.21,
"learning_rate": 0.00033211924936305204,
"loss": 0.0162,
"step": 36600
},
{
"epoch": 0.21,
"learning_rate": 0.0003317193151808796,
"loss": 0.0215,
"step": 36800
},
{
"epoch": 0.21,
"learning_rate": 0.00033131550929049215,
"loss": 0.0274,
"step": 37000
},
{
"epoch": 0.21,
"learning_rate": 0.0003309098418764647,
"loss": 0.0337,
"step": 37200
},
{
"epoch": 0.21,
"learning_rate": 0.0003305023181027363,
"loss": 0.0214,
"step": 37400
},
{
"epoch": 0.21,
"learning_rate": 0.0003300929431568763,
"loss": 0.0192,
"step": 37600
},
{
"epoch": 0.21,
"learning_rate": 0.0003296817222500186,
"loss": 0.0373,
"step": 37800
},
{
"epoch": 0.22,
"learning_rate": 0.0003292686606167952,
"loss": 0.0341,
"step": 38000
},
{
"epoch": 0.22,
"learning_rate": 0.00032885376351526955,
"loss": 0.0267,
"step": 38200
},
{
"epoch": 0.22,
"learning_rate": 0.00032843703622686987,
"loss": 0.0257,
"step": 38400
},
{
"epoch": 0.22,
"learning_rate": 0.00032801848405632146,
"loss": 0.0352,
"step": 38600
},
{
"epoch": 0.22,
"learning_rate": 0.00032759811233157966,
"loss": 0.0291,
"step": 38800
},
{
"epoch": 0.22,
"learning_rate": 0.0003271780418373506,
"loss": 0.0236,
"step": 39000
},
{
"epoch": 0.22,
"learning_rate": 0.0003267540561114055,
"loss": 0.02,
"step": 39200
},
{
"epoch": 0.22,
"learning_rate": 0.00032632826692678864,
"loss": 0.0162,
"step": 39400
},
{
"epoch": 0.22,
"learning_rate": 0.00032590067970357875,
"loss": 0.0248,
"step": 39600
},
{
"epoch": 0.23,
"learning_rate": 0.0003254712998847426,
"loss": 0.0229,
"step": 39800
},
{
"epoch": 0.23,
"learning_rate": 0.00032504013293606604,
"loss": 0.0239,
"step": 40000
},
{
"epoch": 0.23,
"learning_rate": 0.0003246071843460839,
"loss": 0.0208,
"step": 40200
},
{
"epoch": 0.23,
"learning_rate": 0.0003241724596260105,
"loss": 0.0248,
"step": 40400
},
{
"epoch": 0.23,
"learning_rate": 0.00032373596430966946,
"loss": 0.0132,
"step": 40600
},
{
"epoch": 0.23,
"learning_rate": 0.000323297703953423,
"loss": 0.0176,
"step": 40800
},
{
"epoch": 0.23,
"learning_rate": 0.0003228598886025915,
"loss": 0.0327,
"step": 41000
},
{
"epoch": 0.23,
"learning_rate": 0.0003224203368588642,
"loss": 0.045,
"step": 41200
},
{
"epoch": 0.24,
"learning_rate": 0.0003219768323998541,
"loss": 0.0251,
"step": 41400
},
{
"epoch": 0.24,
"learning_rate": 0.0003215315852937889,
"loss": 0.0296,
"step": 41600
},
{
"epoch": 0.24,
"learning_rate": 0.0003210846012084369,
"loss": 0.0327,
"step": 41800
},
{
"epoch": 0.24,
"learning_rate": 0.0003206358858336769,
"loss": 0.0261,
"step": 42000
},
{
"epoch": 0.24,
"learning_rate": 0.0003201854448814265,
"loss": 0.0321,
"step": 42200
},
{
"epoch": 0.24,
"learning_rate": 0.00031973328408556876,
"loss": 0.0228,
"step": 42400
},
{
"epoch": 0.24,
"learning_rate": 0.0003192794092018796,
"loss": 0.0234,
"step": 42600
},
{
"epoch": 0.24,
"learning_rate": 0.0003188238260079543,
"loss": 0.0227,
"step": 42800
},
{
"epoch": 0.24,
"learning_rate": 0.00031836654030313415,
"loss": 0.0187,
"step": 43000
},
{
"epoch": 0.25,
"learning_rate": 0.00031790985703126633,
"loss": 0.0381,
"step": 43200
},
{
"epoch": 0.25,
"learning_rate": 0.0003174491922289618,
"loss": 0.0221,
"step": 43400
},
{
"epoch": 0.25,
"learning_rate": 0.000316986842414148,
"loss": 0.016,
"step": 43600
},
{
"epoch": 0.25,
"learning_rate": 0.0003165228134723018,
"loss": 0.0217,
"step": 43800
},
{
"epoch": 0.25,
"learning_rate": 0.0003160571113102746,
"loss": 0.0356,
"step": 44000
},
{
"epoch": 0.25,
"learning_rate": 0.00031558974185621694,
"loss": 0.0281,
"step": 44200
},
{
"epoch": 0.25,
"learning_rate": 0.000315120711059503,
"loss": 0.0231,
"step": 44400
},
{
"epoch": 0.25,
"learning_rate": 0.0003146500248906554,
"loss": 0.0322,
"step": 44600
},
{
"epoch": 0.25,
"learning_rate": 0.0003141776893412682,
"loss": 0.0247,
"step": 44800
},
{
"epoch": 0.26,
"learning_rate": 0.00031370371042393195,
"loss": 0.0271,
"step": 45000
},
{
"epoch": 0.26,
"learning_rate": 0.00031322809417215584,
"loss": 0.0184,
"step": 45200
},
{
"epoch": 0.26,
"learning_rate": 0.0003127508466402918,
"loss": 0.0232,
"step": 45400
},
{
"epoch": 0.26,
"learning_rate": 0.0003122719739034571,
"loss": 0.0475,
"step": 45600
},
{
"epoch": 0.26,
"learning_rate": 0.0003117914820574569,
"loss": 0.0303,
"step": 45800
},
{
"epoch": 0.26,
"learning_rate": 0.0003113093772187068,
"loss": 0.0394,
"step": 46000
},
{
"epoch": 0.26,
"learning_rate": 0.00031082566552415524,
"loss": 0.0267,
"step": 46200
},
{
"epoch": 0.26,
"learning_rate": 0.0003103403531312047,
"loss": 0.0316,
"step": 46400
},
{
"epoch": 0.26,
"learning_rate": 0.00030985344621763415,
"loss": 0.0216,
"step": 46600
},
{
"epoch": 0.27,
"learning_rate": 0.0003093649509815197,
"loss": 0.0199,
"step": 46800
},
{
"epoch": 0.27,
"learning_rate": 0.00030887487364115605,
"loss": 0.0231,
"step": 47000
},
{
"epoch": 0.27,
"learning_rate": 0.00030838322043497736,
"loss": 0.0202,
"step": 47200
},
{
"epoch": 0.27,
"learning_rate": 0.00030789246762956597,
"loss": 0.0283,
"step": 47400
},
{
"epoch": 0.27,
"learning_rate": 0.00030739768928821336,
"loss": 0.0394,
"step": 47600
},
{
"epoch": 0.27,
"learning_rate": 0.0003069013538848477,
"loss": 0.0264,
"step": 47800
},
{
"epoch": 0.27,
"learning_rate": 0.00030640346773756506,
"loss": 0.0411,
"step": 48000
},
{
"epoch": 0.27,
"learning_rate": 0.00030590403718420164,
"loss": 0.036,
"step": 48200
},
{
"epoch": 0.27,
"learning_rate": 0.00030540306858225326,
"loss": 0.0421,
"step": 48400
},
{
"epoch": 0.28,
"learning_rate": 0.0003049005683087941,
"loss": 0.0398,
"step": 48600
},
{
"epoch": 0.28,
"learning_rate": 0.000304396542760396,
"loss": 0.0173,
"step": 48800
},
{
"epoch": 0.28,
"learning_rate": 0.0003038909983530467,
"loss": 0.0317,
"step": 49000
},
{
"epoch": 0.28,
"learning_rate": 0.0003033839415220679,
"loss": 0.048,
"step": 49200
},
{
"epoch": 0.28,
"learning_rate": 0.00030287537872203423,
"loss": 0.0345,
"step": 49400
},
{
"epoch": 0.28,
"learning_rate": 0.0003023678704574289,
"loss": 0.0316,
"step": 49600
},
{
"epoch": 0.28,
"learning_rate": 0.00030185632260843674,
"loss": 0.0247,
"step": 49800
},
{
"epoch": 0.28,
"learning_rate": 0.0003013432882361978,
"loss": 0.0165,
"step": 50000
},
{
"epoch": 0.29,
"learning_rate": 0.0003008287738713774,
"loss": 0.0267,
"step": 50200
},
{
"epoch": 0.29,
"learning_rate": 0.0003003127860634806,
"loss": 0.0236,
"step": 50400
},
{
"epoch": 0.29,
"learning_rate": 0.00029979792229214487,
"loss": 0.0526,
"step": 50600
},
{
"epoch": 0.29,
"learning_rate": 0.00029927901460657474,
"loss": 0.0176,
"step": 50800
},
{
"epoch": 0.29,
"learning_rate": 0.0002987612586177442,
"loss": 0.0237,
"step": 51000
},
{
"epoch": 0.29,
"learning_rate": 0.000298239457344193,
"loss": 0.0169,
"step": 51200
},
{
"epoch": 0.29,
"learning_rate": 0.00029771621558824144,
"loss": 0.0421,
"step": 51400
},
{
"epoch": 0.29,
"learning_rate": 0.00029719154001048997,
"loss": 0.0271,
"step": 51600
},
{
"epoch": 0.29,
"learning_rate": 0.0002966654372897905,
"loss": 0.0225,
"step": 51800
},
{
"epoch": 0.3,
"learning_rate": 0.00029613791412316185,
"loss": 0.0283,
"step": 52000
},
{
"epoch": 0.3,
"learning_rate": 0.00029560897722570427,
"loss": 0.017,
"step": 52200
},
{
"epoch": 0.3,
"learning_rate": 0.00029507863333051433,
"loss": 0.0216,
"step": 52400
},
{
"epoch": 0.3,
"learning_rate": 0.00029454688918859875,
"loss": 0.0212,
"step": 52600
},
{
"epoch": 0.3,
"learning_rate": 0.00029401375156878874,
"loss": 0.0356,
"step": 52800
},
{
"epoch": 0.3,
"learning_rate": 0.00029347922725765375,
"loss": 0.0223,
"step": 53000
},
{
"epoch": 0.3,
"learning_rate": 0.0002929433230594152,
"loss": 0.0231,
"step": 53200
},
{
"epoch": 0.3,
"learning_rate": 0.0002924060457958596,
"loss": 0.0385,
"step": 53400
},
{
"epoch": 0.3,
"learning_rate": 0.000291867402306252,
"loss": 0.0194,
"step": 53600
},
{
"epoch": 0.31,
"learning_rate": 0.00029132739944724874,
"loss": 0.021,
"step": 53800
},
{
"epoch": 0.31,
"learning_rate": 0.0002907860440928105,
"loss": 0.0384,
"step": 54000
},
{
"epoch": 0.31,
"learning_rate": 0.00029024334313411393,
"loss": 0.0326,
"step": 54200
},
{
"epoch": 0.31,
"learning_rate": 0.00028969930347946533,
"loss": 0.0309,
"step": 54400
},
{
"epoch": 0.31,
"learning_rate": 0.00028915393205421116,
"loss": 0.0228,
"step": 54600
},
{
"epoch": 0.31,
"learning_rate": 0.00028860723580065116,
"loss": 0.0164,
"step": 54800
},
{
"epoch": 0.31,
"learning_rate": 0.0002880592216779493,
"loss": 0.0292,
"step": 55000
},
{
"epoch": 0.31,
"learning_rate": 0.0002875098966620452,
"loss": 0.0184,
"step": 55200
},
{
"epoch": 0.31,
"learning_rate": 0.0002869592677455658,
"loss": 0.0171,
"step": 55400
},
{
"epoch": 0.32,
"learning_rate": 0.00028640734193773564,
"loss": 0.0272,
"step": 55600
},
{
"epoch": 0.32,
"learning_rate": 0.0002858541262642884,
"loss": 0.0202,
"step": 55800
},
{
"epoch": 0.32,
"learning_rate": 0.00028529962776737674,
"loss": 0.025,
"step": 56000
},
{
"epoch": 0.32,
"learning_rate": 0.00028474385350548337,
"loss": 0.0232,
"step": 56200
},
{
"epoch": 0.32,
"learning_rate": 0.0002841868105533304,
"loss": 0.0286,
"step": 56400
},
{
"epoch": 0.32,
"learning_rate": 0.00028362850600179034,
"loss": 0.0246,
"step": 56600
},
{
"epoch": 0.32,
"learning_rate": 0.0002830689469577944,
"loss": 0.0271,
"step": 56800
},
{
"epoch": 0.32,
"learning_rate": 0.00028250814054424367,
"loss": 0.0216,
"step": 57000
},
{
"epoch": 0.32,
"learning_rate": 0.00028194890720638425,
"loss": 0.0261,
"step": 57200
},
{
"epoch": 0.33,
"learning_rate": 0.0002813856336334063,
"loss": 0.0328,
"step": 57400
},
{
"epoch": 0.33,
"learning_rate": 0.00028082113411859194,
"loss": 0.0209,
"step": 57600
},
{
"epoch": 0.33,
"learning_rate": 0.0002802554158477314,
"loss": 0.0266,
"step": 57800
},
{
"epoch": 0.33,
"learning_rate": 0.0002796884860221292,
"loss": 0.0163,
"step": 58000
},
{
"epoch": 0.33,
"learning_rate": 0.0002791203518585125,
"loss": 0.0302,
"step": 58200
},
{
"epoch": 0.33,
"learning_rate": 0.00027855102058893863,
"loss": 0.0205,
"step": 58400
},
{
"epoch": 0.33,
"learning_rate": 0.0002779804994607039,
"loss": 0.0227,
"step": 58600
},
{
"epoch": 0.33,
"learning_rate": 0.00027740879573625075,
"loss": 0.035,
"step": 58800
},
{
"epoch": 0.34,
"learning_rate": 0.0002768359166930753,
"loss": 0.0255,
"step": 59000
},
{
"epoch": 0.34,
"learning_rate": 0.00027626186962363523,
"loss": 0.0261,
"step": 59200
},
{
"epoch": 0.34,
"learning_rate": 0.0002756866618352563,
"loss": 0.0306,
"step": 59400
},
{
"epoch": 0.34,
"learning_rate": 0.0002751103006500397,
"loss": 0.0106,
"step": 59600
},
{
"epoch": 0.34,
"learning_rate": 0.00027453279340476877,
"loss": 0.0199,
"step": 59800
},
{
"epoch": 0.34,
"learning_rate": 0.000273957043500945,
"loss": 0.0366,
"step": 60000
},
{
"epoch": 0.34,
"learning_rate": 0.0002733772718425448,
"loss": 0.0181,
"step": 60200
},
{
"epoch": 0.34,
"learning_rate": 0.0002727992834466737,
"loss": 0.0211,
"step": 60400
},
{
"epoch": 0.34,
"learning_rate": 0.0002722172767484197,
"loss": 0.0309,
"step": 60600
},
{
"epoch": 0.35,
"learning_rate": 0.00027163416081682745,
"loss": 0.0205,
"step": 60800
},
{
"epoch": 0.35,
"learning_rate": 0.00027104994307466473,
"loss": 0.0422,
"step": 61000
},
{
"epoch": 0.35,
"learning_rate": 0.0002704646309587249,
"loss": 0.0197,
"step": 61200
},
{
"epoch": 0.35,
"learning_rate": 0.0002698782319197321,
"loss": 0.02,
"step": 61400
},
{
"epoch": 0.35,
"learning_rate": 0.00026929075342224635,
"loss": 0.0292,
"step": 61600
},
{
"epoch": 0.35,
"learning_rate": 0.00026870220294456887,
"loss": 0.0174,
"step": 61800
},
{
"epoch": 0.35,
"learning_rate": 0.00026811258797864644,
"loss": 0.0211,
"step": 62000
},
{
"epoch": 0.35,
"learning_rate": 0.00026752191602997627,
"loss": 0.0179,
"step": 62200
},
{
"epoch": 0.35,
"learning_rate": 0.0002669331558226597,
"loss": 0.0243,
"step": 62400
},
{
"epoch": 0.36,
"learning_rate": 0.00026634039766960824,
"loss": 0.0305,
"step": 62600
},
{
"epoch": 0.36,
"learning_rate": 0.0002657466050928862,
"loss": 0.0271,
"step": 62800
},
{
"epoch": 0.36,
"learning_rate": 0.0002651517856511696,
"loss": 0.045,
"step": 63000
},
{
"epoch": 0.36,
"learning_rate": 0.0002645559469162059,
"loss": 0.0267,
"step": 63200
},
{
"epoch": 0.36,
"learning_rate": 0.0002639590964727178,
"loss": 0.03,
"step": 63400
},
{
"epoch": 0.36,
"learning_rate": 0.00026336124191830645,
"loss": 0.018,
"step": 63600
},
{
"epoch": 0.36,
"learning_rate": 0.00026276239086335485,
"loss": 0.0297,
"step": 63800
},
{
"epoch": 0.36,
"learning_rate": 0.00026216255093093095,
"loss": 0.0159,
"step": 64000
},
{
"epoch": 0.36,
"learning_rate": 0.00026156172975669046,
"loss": 0.0184,
"step": 64200
},
{
"epoch": 0.37,
"learning_rate": 0.00026095993498878,
"loss": 0.0272,
"step": 64400
},
{
"epoch": 0.37,
"learning_rate": 0.0002603571742877395,
"loss": 0.0142,
"step": 64600
},
{
"epoch": 0.37,
"learning_rate": 0.00025975345532640456,
"loss": 0.0188,
"step": 64800
},
{
"epoch": 0.37,
"learning_rate": 0.00025915181148932056,
"loss": 0.0468,
"step": 65000
},
{
"epoch": 0.37,
"learning_rate": 0.00025854620376982297,
"loss": 0.0152,
"step": 65200
},
{
"epoch": 0.37,
"learning_rate": 0.00025793966084276023,
"loss": 0.0351,
"step": 65400
},
{
"epoch": 0.37,
"learning_rate": 0.00025733219042911403,
"loss": 0.0259,
"step": 65600
},
{
"epoch": 0.37,
"learning_rate": 0.0002567238002616722,
"loss": 0.0349,
"step": 65800
},
{
"epoch": 0.37,
"learning_rate": 0.00025611449808493066,
"loss": 0.0122,
"step": 66000
},
{
"epoch": 0.38,
"learning_rate": 0.0002555042916549949,
"loss": 0.0207,
"step": 66200
},
{
"epoch": 0.38,
"learning_rate": 0.00025489318873948087,
"loss": 0.0338,
"step": 66400
},
{
"epoch": 0.38,
"learning_rate": 0.00025428119711741644,
"loss": 0.0264,
"step": 66600
},
{
"epoch": 0.38,
"learning_rate": 0.00025366832457914223,
"loss": 0.0211,
"step": 66800
},
{
"epoch": 0.38,
"learning_rate": 0.0002530545789262125,
"loss": 0.026,
"step": 67000
},
{
"epoch": 0.38,
"learning_rate": 0.00025243996797129576,
"loss": 0.0384,
"step": 67200
},
{
"epoch": 0.38,
"learning_rate": 0.0002518244995380754,
"loss": 0.0242,
"step": 67400
},
{
"epoch": 0.38,
"learning_rate": 0.00025120818146115014,
"loss": 0.0185,
"step": 67600
},
{
"epoch": 0.38,
"learning_rate": 0.00025059102158593404,
"loss": 0.0151,
"step": 67800
},
{
"epoch": 0.39,
"learning_rate": 0.00024997302776855716,
"loss": 0.016,
"step": 68000
},
{
"epoch": 0.39,
"learning_rate": 0.0002493542078757648,
"loss": 0.0322,
"step": 68200
},
{
"epoch": 0.39,
"learning_rate": 0.00024873456978481814,
"loss": 0.0201,
"step": 68400
},
{
"epoch": 0.39,
"learning_rate": 0.00024811412138339326,
"loss": 0.0296,
"step": 68600
},
{
"epoch": 0.39,
"learning_rate": 0.00024749287056948145,
"loss": 0.0329,
"step": 68800
},
{
"epoch": 0.39,
"learning_rate": 0.000246870825251288,
"loss": 0.0136,
"step": 69000
},
{
"epoch": 0.39,
"learning_rate": 0.00024624799334713204,
"loss": 0.0267,
"step": 69200
},
{
"epoch": 0.39,
"learning_rate": 0.00024562438278534536,
"loss": 0.02,
"step": 69400
},
{
"epoch": 0.4,
"learning_rate": 0.00024500000150417183,
"loss": 0.027,
"step": 69600
},
{
"epoch": 0.4,
"learning_rate": 0.00024437485745166604,
"loss": 0.0287,
"step": 69800
},
{
"epoch": 0.4,
"learning_rate": 0.0002437489585855924,
"loss": 0.0213,
"step": 70000
},
{
"epoch": 0.4,
"learning_rate": 0.0002431223128733236,
"loss": 0.027,
"step": 70200
},
{
"epoch": 0.4,
"learning_rate": 0.00024249492829173943,
"loss": 0.0204,
"step": 70400
},
{
"epoch": 0.4,
"learning_rate": 0.00024186681282712484,
"loss": 0.0226,
"step": 70600
},
{
"epoch": 0.4,
"learning_rate": 0.00024123797447506894,
"loss": 0.0235,
"step": 70800
},
{
"epoch": 0.4,
"learning_rate": 0.00024060842124036243,
"loss": 0.022,
"step": 71000
},
{
"epoch": 0.4,
"learning_rate": 0.0002399781611368965,
"loss": 0.015,
"step": 71200
},
{
"epoch": 0.41,
"learning_rate": 0.0002393503587074034,
"loss": 0.02,
"step": 71400
},
{
"epoch": 0.41,
"learning_rate": 0.0002387187123780583,
"loss": 0.0172,
"step": 71600
},
{
"epoch": 0.41,
"learning_rate": 0.00023808638323498182,
"loss": 0.0184,
"step": 71800
},
{
"epoch": 0.41,
"learning_rate": 0.0002374533793274009,
"loss": 0.022,
"step": 72000
},
{
"epoch": 0.41,
"learning_rate": 0.00023681970871313178,
"loss": 0.0163,
"step": 72200
},
{
"epoch": 0.41,
"learning_rate": 0.00023618537945847764,
"loss": 0.042,
"step": 72400
},
{
"epoch": 0.41,
"learning_rate": 0.0002355503996381257,
"loss": 0.0249,
"step": 72600
},
{
"epoch": 0.41,
"learning_rate": 0.00023491477733504463,
"loss": 0.0392,
"step": 72800
},
{
"epoch": 0.41,
"learning_rate": 0.00023427852064038156,
"loss": 0.0206,
"step": 73000
},
{
"epoch": 0.42,
"learning_rate": 0.00023364482361278884,
"loss": 0.0196,
"step": 73200
},
{
"epoch": 0.42,
"learning_rate": 0.00023300732551134807,
"loss": 0.0273,
"step": 73400
},
{
"epoch": 0.42,
"learning_rate": 0.00023236921729921222,
"loss": 0.0197,
"step": 73600
},
{
"epoch": 0.42,
"learning_rate": 0.00023173370213417187,
"loss": 0.0247,
"step": 73800
},
{
"epoch": 0.42,
"learning_rate": 0.00023109440102573372,
"loss": 0.0279,
"step": 74000
},
{
"epoch": 0.42,
"learning_rate": 0.00023045451415715175,
"loss": 0.0236,
"step": 74200
},
{
"epoch": 0.42,
"learning_rate": 0.00022981404967385886,
"loss": 0.0183,
"step": 74400
},
{
"epoch": 0.42,
"learning_rate": 0.00022917301572864066,
"loss": 0.0285,
"step": 74600
},
{
"epoch": 0.42,
"learning_rate": 0.00022853142048153175,
"loss": 0.0298,
"step": 74800
},
{
"epoch": 0.43,
"learning_rate": 0.00022788927209971169,
"loss": 0.0202,
"step": 75000
},
{
"epoch": 0.43,
"learning_rate": 0.00022724657875740128,
"loss": 0.0198,
"step": 75200
},
{
"epoch": 0.43,
"learning_rate": 0.00022660334863575842,
"loss": 0.0297,
"step": 75400
},
{
"epoch": 0.43,
"learning_rate": 0.00022595958992277377,
"loss": 0.0167,
"step": 75600
},
{
"epoch": 0.43,
"learning_rate": 0.00022531531081316684,
"loss": 0.0184,
"step": 75800
},
{
"epoch": 0.43,
"learning_rate": 0.00022467051950828147,
"loss": 0.0282,
"step": 76000
},
{
"epoch": 0.43,
"learning_rate": 0.00022402845193252637,
"loss": 0.0345,
"step": 76200
},
{
"epoch": 0.43,
"learning_rate": 0.00022338266332551338,
"loss": 0.0173,
"step": 76400
},
{
"epoch": 0.43,
"learning_rate": 0.0002227363871248368,
"loss": 0.0243,
"step": 76600
},
{
"epoch": 0.44,
"learning_rate": 0.0002220896315572626,
"loss": 0.0166,
"step": 76800
},
{
"epoch": 0.44,
"learning_rate": 0.0002214424048556585,
"loss": 0.0257,
"step": 77000
},
{
"epoch": 0.44,
"learning_rate": 0.00022079471525888992,
"loss": 0.0477,
"step": 77200
},
{
"epoch": 0.44,
"learning_rate": 0.0002201465710117142,
"loss": 0.0205,
"step": 77400
},
{
"epoch": 0.44,
"learning_rate": 0.00021949798036467665,
"loss": 0.0275,
"step": 77600
},
{
"epoch": 0.44,
"learning_rate": 0.00021884895157400457,
"loss": 0.0196,
"step": 77800
},
{
"epoch": 0.44,
"learning_rate": 0.000218199492901503,
"loss": 0.0342,
"step": 78000
},
{
"epoch": 0.44,
"learning_rate": 0.00021754961261444885,
"loss": 0.0219,
"step": 78200
},
{
"epoch": 0.45,
"learning_rate": 0.00021689931898548614,
"loss": 0.017,
"step": 78400
},
{
"epoch": 0.45,
"learning_rate": 0.00021624862029252044,
"loss": 0.0263,
"step": 78600
},
{
"epoch": 0.45,
"learning_rate": 0.0002155975248186137,
"loss": 0.025,
"step": 78800
},
{
"epoch": 0.45,
"learning_rate": 0.00021494604085187845,
"loss": 0.0171,
"step": 79000
},
{
"epoch": 0.45,
"learning_rate": 0.0002142941766853728,
"loss": 0.0205,
"step": 79200
},
{
"epoch": 0.45,
"learning_rate": 0.00021364194061699446,
"loss": 0.0162,
"step": 79400
},
{
"epoch": 0.45,
"learning_rate": 0.00021298934094937536,
"loss": 0.0238,
"step": 79600
},
{
"epoch": 0.45,
"learning_rate": 0.0002123363859897756,
"loss": 0.0239,
"step": 79800
},
{
"epoch": 0.45,
"learning_rate": 0.00021168308404997838,
"loss": 0.0139,
"step": 80000
},
{
"epoch": 0.46,
"learning_rate": 0.00021102944344618345,
"loss": 0.0305,
"step": 80200
},
{
"epoch": 0.46,
"learning_rate": 0.0002103754724989017,
"loss": 0.0228,
"step": 80400
},
{
"epoch": 0.46,
"learning_rate": 0.00020972117953284915,
"loss": 0.0379,
"step": 80600
},
{
"epoch": 0.46,
"learning_rate": 0.0002090665728768409,
"loss": 0.0176,
"step": 80800
},
{
"epoch": 0.46,
"learning_rate": 0.0002084116608636852,
"loss": 0.0186,
"step": 81000
},
{
"epoch": 0.46,
"learning_rate": 0.00020775645183007728,
"loss": 0.019,
"step": 81200
},
{
"epoch": 0.46,
"learning_rate": 0.00020710095411649338,
"loss": 0.0251,
"step": 81400
},
{
"epoch": 0.46,
"learning_rate": 0.0002064451760670844,
"loss": 0.0229,
"step": 81600
},
{
"epoch": 0.46,
"learning_rate": 0.00020578912602956987,
"loss": 0.0303,
"step": 81800
},
{
"epoch": 0.47,
"learning_rate": 0.0002051360945654819,
"loss": 0.017,
"step": 82000
},
{
"epoch": 0.47,
"learning_rate": 0.00020447952686428194,
"loss": 0.0196,
"step": 82200
},
{
"epoch": 0.47,
"learning_rate": 0.00020382271219668632,
"loss": 0.0212,
"step": 82400
},
{
"epoch": 0.47,
"learning_rate": 0.00020316565892361012,
"loss": 0.0194,
"step": 82600
},
{
"epoch": 0.47,
"learning_rate": 0.00020250837540900578,
"loss": 0.0299,
"step": 82800
},
{
"epoch": 0.47,
"learning_rate": 0.00020185087001975654,
"loss": 0.0184,
"step": 83000
},
{
"epoch": 0.47,
"learning_rate": 0.00020119315112557005,
"loss": 0.033,
"step": 83200
},
{
"epoch": 0.47,
"learning_rate": 0.00020053522709887175,
"loss": 0.0207,
"step": 83400
},
{
"epoch": 0.47,
"learning_rate": 0.00019987710631469828,
"loss": 0.0141,
"step": 83600
},
{
"epoch": 0.48,
"learning_rate": 0.00019921879715059093,
"loss": 0.0225,
"step": 83800
},
{
"epoch": 0.48,
"learning_rate": 0.000198560307986489,
"loss": 0.0263,
"step": 84000
},
{
"epoch": 0.48,
"learning_rate": 0.00019790164720462304,
"loss": 0.0284,
"step": 84200
},
{
"epoch": 0.48,
"learning_rate": 0.00019724282318940825,
"loss": 0.016,
"step": 84400
},
{
"epoch": 0.48,
"learning_rate": 0.00019658384432733769,
"loss": 0.0394,
"step": 84600
},
{
"epoch": 0.48,
"learning_rate": 0.0001959247190068755,
"loss": 0.0208,
"step": 84800
},
{
"epoch": 0.48,
"learning_rate": 0.00019526545561835023,
"loss": 0.0199,
"step": 85000
},
{
"epoch": 0.48,
"learning_rate": 0.00019460606255384803,
"loss": 0.0188,
"step": 85200
},
{
"epoch": 0.48,
"learning_rate": 0.00019394654820710546,
"loss": 0.0246,
"step": 85400
},
{
"epoch": 0.49,
"learning_rate": 0.0001932869209734034,
"loss": 0.0386,
"step": 85600
},
{
"epoch": 0.49,
"learning_rate": 0.00019262718924945921,
"loss": 0.0336,
"step": 85800
},
{
"epoch": 0.49,
"learning_rate": 0.00019197066079753742,
"loss": 0.0226,
"step": 86000
},
{
"epoch": 0.49,
"learning_rate": 0.0001913107457060452,
"loss": 0.014,
"step": 86200
},
{
"epoch": 0.49,
"learning_rate": 0.00019065075128001235,
"loss": 0.0251,
"step": 86400
},
{
"epoch": 0.49,
"learning_rate": 0.00018999068592083065,
"loss": 0.0188,
"step": 86600
},
{
"epoch": 0.49,
"learning_rate": 0.00018933055803079484,
"loss": 0.0139,
"step": 86800
},
{
"epoch": 0.49,
"learning_rate": 0.0001886703760129956,
"loss": 0.0213,
"step": 87000
},
{
"epoch": 0.5,
"learning_rate": 0.0001880101482712127,
"loss": 0.0322,
"step": 87200
},
{
"epoch": 0.5,
"learning_rate": 0.00018734988320980793,
"loss": 0.0223,
"step": 87400
},
{
"epoch": 0.5,
"learning_rate": 0.00018668958923361806,
"loss": 0.0261,
"step": 87600
},
{
"epoch": 0.5,
"learning_rate": 0.00018602927474784813,
"loss": 0.019,
"step": 87800
},
{
"epoch": 0.5,
"learning_rate": 0.00018536894815796403,
"loss": 0.0222,
"step": 88000
},
{
"epoch": 0.5,
"learning_rate": 0.0001847119195163232,
"loss": 0.0147,
"step": 88200
},
{
"epoch": 0.5,
"learning_rate": 0.0001840515938906732,
"loss": 0.0206,
"step": 88400
},
{
"epoch": 0.5,
"learning_rate": 0.0001833912813357758,
"loss": 0.0189,
"step": 88600
},
{
"epoch": 0.5,
"learning_rate": 0.0001827309902570724,
"loss": 0.0224,
"step": 88800
},
{
"epoch": 0.51,
"learning_rate": 0.00018207072905973099,
"loss": 0.0197,
"step": 89000
},
{
"epoch": 0.51,
"learning_rate": 0.00018141050614853935,
"loss": 0.0323,
"step": 89200
},
{
"epoch": 0.51,
"learning_rate": 0.00018075032992779762,
"loss": 0.0138,
"step": 89400
},
{
"epoch": 0.51,
"learning_rate": 0.0001800902088012118,
"loss": 0.0197,
"step": 89600
},
{
"epoch": 0.51,
"learning_rate": 0.0001794334512880854,
"loss": 0.0307,
"step": 89800
},
{
"epoch": 0.51,
"learning_rate": 0.00017877346517762124,
"loss": 0.026,
"step": 90000
},
{
"epoch": 0.51,
"learning_rate": 0.00017811355932579115,
"loss": 0.0236,
"step": 90200
},
{
"epoch": 0.51,
"learning_rate": 0.00017745374213285934,
"loss": 0.0359,
"step": 90400
},
{
"epoch": 0.51,
"learning_rate": 0.0001767940219979617,
"loss": 0.0245,
"step": 90600
},
{
"epoch": 0.52,
"learning_rate": 0.00017613440731899813,
"loss": 0.0333,
"step": 90800
},
{
"epoch": 0.52,
"learning_rate": 0.00017547490649252667,
"loss": 0.0131,
"step": 91000
},
{
"epoch": 0.52,
"learning_rate": 0.00017481552791365573,
"loss": 0.0221,
"step": 91200
},
{
"epoch": 0.52,
"learning_rate": 0.00017415627997593782,
"loss": 0.0209,
"step": 91400
},
{
"epoch": 0.52,
"learning_rate": 0.0001734971710712621,
"loss": 0.0273,
"step": 91600
},
{
"epoch": 0.52,
"learning_rate": 0.0001728382095897483,
"loss": 0.0262,
"step": 91800
},
{
"epoch": 0.52,
"learning_rate": 0.00017217940391963928,
"loss": 0.0366,
"step": 92000
},
{
"epoch": 0.52,
"learning_rate": 0.00017152076244719467,
"loss": 0.0265,
"step": 92200
},
{
"epoch": 0.52,
"learning_rate": 0.00017086229355658372,
"loss": 0.023,
"step": 92400
},
{
"epoch": 0.53,
"learning_rate": 0.00017020400562977906,
"loss": 0.0304,
"step": 92600
},
{
"epoch": 0.53,
"learning_rate": 0.00016954590704644948,
"loss": 0.017,
"step": 92800
},
{
"epoch": 0.53,
"learning_rate": 0.00016888800618385382,
"loss": 0.0414,
"step": 93000
},
{
"epoch": 0.53,
"learning_rate": 0.00016823031141673374,
"loss": 0.0462,
"step": 93200
},
{
"epoch": 0.53,
"learning_rate": 0.00016757611797137148,
"loss": 0.0284,
"step": 93400
},
{
"epoch": 0.53,
"learning_rate": 0.00016691885937382842,
"loss": 0.0256,
"step": 93600
},
{
"epoch": 0.53,
"learning_rate": 0.0001662618319379937,
"loss": 0.0148,
"step": 93800
},
{
"epoch": 0.53,
"learning_rate": 0.00016560504402749084,
"loss": 0.0144,
"step": 94000
},
{
"epoch": 0.53,
"learning_rate": 0.00016494850400289434,
"loss": 0.0202,
"step": 94200
},
{
"epoch": 0.54,
"learning_rate": 0.00016429222022162316,
"loss": 0.0264,
"step": 94400
},
{
"epoch": 0.54,
"learning_rate": 0.00016363620103783448,
"loss": 0.0175,
"step": 94600
},
{
"epoch": 0.54,
"learning_rate": 0.00016298045480231735,
"loss": 0.0187,
"step": 94800
},
{
"epoch": 0.54,
"learning_rate": 0.0001623249898623863,
"loss": 0.0153,
"step": 95000
},
{
"epoch": 0.54,
"learning_rate": 0.00016166981456177496,
"loss": 0.0145,
"step": 95200
},
{
"epoch": 0.54,
"learning_rate": 0.00016101493724053015,
"loss": 0.0196,
"step": 95400
},
{
"epoch": 0.54,
"learning_rate": 0.00016036036623490562,
"loss": 0.0138,
"step": 95600
},
{
"epoch": 0.54,
"learning_rate": 0.00015970610987725575,
"loss": 0.0189,
"step": 95800
},
{
"epoch": 0.55,
"learning_rate": 0.00015905217649592963,
"loss": 0.0367,
"step": 96000
},
{
"epoch": 0.55,
"learning_rate": 0.00015839857441516498,
"loss": 0.0134,
"step": 96200
},
{
"epoch": 0.55,
"learning_rate": 0.00015774857740871961,
"loss": 0.0236,
"step": 96400
},
{
"epoch": 0.55,
"learning_rate": 0.00015709566112445702,
"loss": 0.0288,
"step": 96600
},
{
"epoch": 0.55,
"learning_rate": 0.0001564431010461969,
"loss": 0.0204,
"step": 96800
},
{
"epoch": 0.55,
"learning_rate": 0.00015579090548069552,
"loss": 0.0108,
"step": 97000
},
{
"epoch": 0.55,
"learning_rate": 0.0001551390827300693,
"loss": 0.0264,
"step": 97200
},
{
"epoch": 0.55,
"learning_rate": 0.0001544876410916887,
"loss": 0.0196,
"step": 97400
},
{
"epoch": 0.55,
"learning_rate": 0.0001538365888580729,
"loss": 0.0221,
"step": 97600
},
{
"epoch": 0.56,
"learning_rate": 0.00015318593431678411,
"loss": 0.0236,
"step": 97800
},
{
"epoch": 0.56,
"learning_rate": 0.0001525356857503223,
"loss": 0.0229,
"step": 98000
},
{
"epoch": 0.56,
"learning_rate": 0.0001518858514360193,
"loss": 0.0231,
"step": 98200
},
{
"epoch": 0.56,
"learning_rate": 0.00015123643964593393,
"loss": 0.0162,
"step": 98400
},
{
"epoch": 0.56,
"learning_rate": 0.00015058745864674644,
"loss": 0.0411,
"step": 98600
},
{
"epoch": 0.56,
"learning_rate": 0.00014993891669965337,
"loss": 0.0197,
"step": 98800
},
{
"epoch": 0.56,
"learning_rate": 0.00014929082206026223,
"loss": 0.0285,
"step": 99000
},
{
"epoch": 0.56,
"learning_rate": 0.0001486431829784866,
"loss": 0.0316,
"step": 99200
},
{
"epoch": 0.56,
"learning_rate": 0.00014799600769844118,
"loss": 0.0223,
"step": 99400
},
{
"epoch": 0.57,
"learning_rate": 0.0001473493044583364,
"loss": 0.0255,
"step": 99600
},
{
"epoch": 0.57,
"learning_rate": 0.00014670308149037416,
"loss": 0.0137,
"step": 99800
},
{
"epoch": 0.57,
"learning_rate": 0.0001460573470206426,
"loss": 0.0174,
"step": 100000
},
{
"epoch": 0.57,
"learning_rate": 0.00014541533420859412,
"loss": 0.0322,
"step": 100200
},
{
"epoch": 0.57,
"learning_rate": 0.0001447705988435316,
"loss": 0.0253,
"step": 100400
},
{
"epoch": 0.57,
"learning_rate": 0.00014412637657621645,
"loss": 0.0251,
"step": 100600
},
{
"epoch": 0.57,
"learning_rate": 0.0001434826756072689,
"loss": 0.0185,
"step": 100800
},
{
"epoch": 0.57,
"learning_rate": 0.00014283950413067326,
"loss": 0.0184,
"step": 101000
},
{
"epoch": 0.57,
"learning_rate": 0.00014219687033367387,
"loss": 0.0137,
"step": 101200
},
{
"epoch": 0.58,
"learning_rate": 0.00014155478239667057,
"loss": 0.0191,
"step": 101400
},
{
"epoch": 0.58,
"learning_rate": 0.00014091324849311473,
"loss": 0.0234,
"step": 101600
},
{
"epoch": 0.58,
"learning_rate": 0.000140272276789405,
"loss": 0.0199,
"step": 101800
},
{
"epoch": 0.58,
"learning_rate": 0.00013963187544478376,
"loss": 0.0141,
"step": 102000
},
{
"epoch": 0.58,
"learning_rate": 0.00013899205261123283,
"loss": 0.0207,
"step": 102200
},
{
"epoch": 0.58,
"learning_rate": 0.00013835281643337,
"loss": 0.0242,
"step": 102400
},
{
"epoch": 0.58,
"learning_rate": 0.00013771417504834503,
"loss": 0.031,
"step": 102600
},
{
"epoch": 0.58,
"learning_rate": 0.00013707613658573656,
"loss": 0.0239,
"step": 102800
},
{
"epoch": 0.58,
"learning_rate": 0.00013643870916744814,
"loss": 0.0238,
"step": 103000
},
{
"epoch": 0.59,
"learning_rate": 0.00013580190090760512,
"loss": 0.0349,
"step": 103200
},
{
"epoch": 0.59,
"learning_rate": 0.0001351657199124511,
"loss": 0.0177,
"step": 103400
},
{
"epoch": 0.59,
"learning_rate": 0.0001345301742802452,
"loss": 0.0139,
"step": 103600
},
{
"epoch": 0.59,
"learning_rate": 0.0001338952721011585,
"loss": 0.018,
"step": 103800
},
{
"epoch": 0.59,
"learning_rate": 0.00013326102145717149,
"loss": 0.0179,
"step": 104000
},
{
"epoch": 0.59,
"learning_rate": 0.00013262743042197046,
"loss": 0.0218,
"step": 104200
},
{
"epoch": 0.59,
"learning_rate": 0.00013199450706084573,
"loss": 0.0117,
"step": 104400
},
{
"epoch": 0.59,
"learning_rate": 0.00013136225943058828,
"loss": 0.0231,
"step": 104600
},
{
"epoch": 0.6,
"learning_rate": 0.00013073069557938726,
"loss": 0.031,
"step": 104800
},
{
"epoch": 0.6,
"learning_rate": 0.0001300998235467278,
"loss": 0.0206,
"step": 105000
},
{
"epoch": 0.6,
"learning_rate": 0.0001294728004700576,
"loss": 0.0213,
"step": 105200
},
{
"epoch": 0.6,
"learning_rate": 0.0001288433325983142,
"loss": 0.0407,
"step": 105400
},
{
"epoch": 0.6,
"learning_rate": 0.00012821458057027873,
"loss": 0.0225,
"step": 105600
},
{
"epoch": 0.6,
"learning_rate": 0.00012758969071674774,
"loss": 0.0235,
"step": 105800
},
{
"epoch": 0.6,
"learning_rate": 0.00012696239069890963,
"loss": 0.0212,
"step": 106000
},
{
"epoch": 0.6,
"learning_rate": 0.00012633583046820873,
"loss": 0.0328,
"step": 106200
},
{
"epoch": 0.6,
"learning_rate": 0.00012571001800043652,
"loss": 0.0278,
"step": 106400
},
{
"epoch": 0.61,
"learning_rate": 0.000125084961261866,
"loss": 0.0158,
"step": 106600
},
{
"epoch": 0.61,
"learning_rate": 0.00012446066820914994,
"loss": 0.0146,
"step": 106800
},
{
"epoch": 0.61,
"learning_rate": 0.00012383714678922,
"loss": 0.0284,
"step": 107000
},
{
"epoch": 0.61,
"learning_rate": 0.00012321440493918523,
"loss": 0.0149,
"step": 107200
},
{
"epoch": 0.61,
"learning_rate": 0.00012259245058623115,
"loss": 0.0138,
"step": 107400
},
{
"epoch": 0.61,
"learning_rate": 0.00012197129164751876,
"loss": 0.02,
"step": 107600
},
{
"epoch": 0.61,
"learning_rate": 0.00012135093603008409,
"loss": 0.0187,
"step": 107800
},
{
"epoch": 0.61,
"learning_rate": 0.00012073139163073704,
"loss": 0.0233,
"step": 108000
},
{
"epoch": 0.61,
"learning_rate": 0.00012011266633596143,
"loss": 0.0244,
"step": 108200
},
{
"epoch": 0.62,
"learning_rate": 0.00011949476802181382,
"loss": 0.0282,
"step": 108400
},
{
"epoch": 0.62,
"learning_rate": 0.0001188777045538242,
"loss": 0.0217,
"step": 108600
},
{
"epoch": 0.62,
"learning_rate": 0.00011826456278153069,
"loss": 0.0214,
"step": 108800
},
{
"epoch": 0.62,
"learning_rate": 0.000117649188287617,
"loss": 0.0245,
"step": 109000
},
{
"epoch": 0.62,
"learning_rate": 0.00011703467213314812,
"loss": 0.018,
"step": 109200
},
{
"epoch": 0.62,
"learning_rate": 0.00011642102214060081,
"loss": 0.0136,
"step": 109400
},
{
"epoch": 0.62,
"learning_rate": 0.00011580824612142588,
"loss": 0.0274,
"step": 109600
},
{
"epoch": 0.62,
"learning_rate": 0.00011519635187594886,
"loss": 0.0316,
"step": 109800
},
{
"epoch": 0.62,
"learning_rate": 0.00011458534719327081,
"loss": 0.0195,
"step": 110000
},
{
"epoch": 0.63,
"learning_rate": 0.00011397523985116925,
"loss": 0.0194,
"step": 110200
},
{
"epoch": 0.63,
"learning_rate": 0.0001133660376159988,
"loss": 0.0417,
"step": 110400
},
{
"epoch": 0.63,
"learning_rate": 0.00011275774824259256,
"loss": 0.0226,
"step": 110600
},
{
"epoch": 0.63,
"learning_rate": 0.00011215037947416353,
"loss": 0.0247,
"step": 110800
},
{
"epoch": 0.63,
"learning_rate": 0.00011154393904220578,
"loss": 0.0123,
"step": 111000
},
{
"epoch": 0.63,
"learning_rate": 0.00011093843466639602,
"loss": 0.0215,
"step": 111200
},
{
"epoch": 0.63,
"learning_rate": 0.00011033387405449557,
"loss": 0.0169,
"step": 111400
},
{
"epoch": 0.63,
"learning_rate": 0.00010973026490225217,
"loss": 0.0335,
"step": 111600
},
{
"epoch": 0.63,
"learning_rate": 0.00010912761489330187,
"loss": 0.021,
"step": 111800
},
{
"epoch": 0.64,
"learning_rate": 0.00010852593169907127,
"loss": 0.0212,
"step": 112000
},
{
"epoch": 0.64,
"learning_rate": 0.00010792522297867997,
"loss": 0.0251,
"step": 112200
},
{
"epoch": 0.64,
"learning_rate": 0.00010732549637884315,
"loss": 0.0195,
"step": 112400
},
{
"epoch": 0.64,
"learning_rate": 0.00010672975074337141,
"loss": 0.0154,
"step": 112600
},
{
"epoch": 0.64,
"learning_rate": 0.00010613200626886399,
"loss": 0.0161,
"step": 112800
},
{
"epoch": 0.64,
"learning_rate": 0.00010553526674164345,
"loss": 0.022,
"step": 113000
},
{
"epoch": 0.64,
"learning_rate": 0.00010493953975789901,
"loss": 0.0183,
"step": 113200
},
{
"epoch": 0.64,
"learning_rate": 0.00010434483290093065,
"loss": 0.0301,
"step": 113400
},
{
"epoch": 0.65,
"learning_rate": 0.00010375115374105277,
"loss": 0.0272,
"step": 113600
},
{
"epoch": 0.65,
"learning_rate": 0.00010315850983549783,
"loss": 0.0169,
"step": 113800
},
{
"epoch": 0.65,
"learning_rate": 0.00010256690872831991,
"loss": 0.0258,
"step": 114000
},
{
"epoch": 0.65,
"learning_rate": 0.00010197635795029873,
"loss": 0.0339,
"step": 114200
},
{
"epoch": 0.65,
"learning_rate": 0.00010138686501884381,
"loss": 0.0213,
"step": 114400
},
{
"epoch": 0.65,
"learning_rate": 0.00010079843743789918,
"loss": 0.0287,
"step": 114600
},
{
"epoch": 0.65,
"learning_rate": 0.00010021401679048213,
"loss": 0.0281,
"step": 114800
},
{
"epoch": 0.65,
"learning_rate": 9.962773694788469e-05,
"loss": 0.029,
"step": 115000
},
{
"epoch": 0.65,
"learning_rate": 9.904254484859887e-05,
"loss": 0.028,
"step": 115200
},
{
"epoch": 0.66,
"learning_rate": 9.845844794182107e-05,
"loss": 0.0131,
"step": 115400
},
{
"epoch": 0.66,
"learning_rate": 9.787545366280647e-05,
"loss": 0.0328,
"step": 115600
},
{
"epoch": 0.66,
"learning_rate": 9.729356943277424e-05,
"loss": 0.0271,
"step": 115800
},
{
"epoch": 0.66,
"learning_rate": 9.67128026588135e-05,
"loss": 0.0318,
"step": 116000
},
{
"epoch": 0.66,
"learning_rate": 9.613316073378832e-05,
"loss": 0.0234,
"step": 116200
},
{
"epoch": 0.66,
"learning_rate": 9.555465103624428e-05,
"loss": 0.025,
"step": 116400
},
{
"epoch": 0.66,
"learning_rate": 9.497728093031412e-05,
"loss": 0.0344,
"step": 116600
},
{
"epoch": 0.66,
"learning_rate": 9.440105776562451e-05,
"loss": 0.0164,
"step": 116800
},
{
"epoch": 0.66,
"learning_rate": 9.382598887720169e-05,
"loss": 0.0298,
"step": 117000
},
{
"epoch": 0.67,
"learning_rate": 9.325208158537876e-05,
"loss": 0.024,
"step": 117200
},
{
"epoch": 0.67,
"learning_rate": 9.267934319570226e-05,
"loss": 0.0153,
"step": 117400
},
{
"epoch": 0.67,
"learning_rate": 9.210778099883943e-05,
"loss": 0.0135,
"step": 117600
},
{
"epoch": 0.67,
"learning_rate": 9.153740227048476e-05,
"loss": 0.0168,
"step": 117800
},
{
"epoch": 0.67,
"learning_rate": 9.097105723733374e-05,
"loss": 0.0126,
"step": 118000
},
{
"epoch": 0.67,
"learning_rate": 9.040589819318035e-05,
"loss": 0.0195,
"step": 118200
},
{
"epoch": 0.67,
"learning_rate": 8.983910128561952e-05,
"loss": 0.0322,
"step": 118400
},
{
"epoch": 0.67,
"learning_rate": 8.92735167257004e-05,
"loss": 0.0205,
"step": 118600
},
{
"epoch": 0.67,
"learning_rate": 8.870915171302544e-05,
"loss": 0.0218,
"step": 118800
},
{
"epoch": 0.68,
"learning_rate": 8.814601343167284e-05,
"loss": 0.0164,
"step": 119000
},
{
"epoch": 0.68,
"learning_rate": 8.758410905010516e-05,
"loss": 0.0244,
"step": 119200
},
{
"epoch": 0.68,
"learning_rate": 8.702344572107807e-05,
"loss": 0.0099,
"step": 119400
},
{
"epoch": 0.68,
"learning_rate": 8.646403058154925e-05,
"loss": 0.0237,
"step": 119600
},
{
"epoch": 0.68,
"learning_rate": 8.590587075258757e-05,
"loss": 0.0274,
"step": 119800
},
{
"epoch": 0.68,
"learning_rate": 8.534897333928242e-05,
"loss": 0.0172,
"step": 120000
},
{
"epoch": 0.68,
"learning_rate": 8.479334543065332e-05,
"loss": 0.0254,
"step": 120200
},
{
"epoch": 0.68,
"learning_rate": 8.423899409955962e-05,
"loss": 0.0226,
"step": 120400
},
{
"epoch": 0.68,
"learning_rate": 8.368592640261049e-05,
"loss": 0.0171,
"step": 120600
},
{
"epoch": 0.69,
"learning_rate": 8.313414938007512e-05,
"loss": 0.0276,
"step": 120800
},
{
"epoch": 0.69,
"learning_rate": 8.258367005579311e-05,
"loss": 0.0146,
"step": 121000
},
{
"epoch": 0.69,
"learning_rate": 8.203449543708476e-05,
"loss": 0.0269,
"step": 121200
},
{
"epoch": 0.69,
"learning_rate": 8.148663251466253e-05,
"loss": 0.0174,
"step": 121400
},
{
"epoch": 0.69,
"learning_rate": 8.094008826254145e-05,
"loss": 0.0158,
"step": 121600
},
{
"epoch": 0.69,
"learning_rate": 8.039486963795052e-05,
"loss": 0.0353,
"step": 121800
},
{
"epoch": 0.69,
"learning_rate": 7.985098358124426e-05,
"loss": 0.0342,
"step": 122000
},
{
"epoch": 0.69,
"learning_rate": 7.930843701581424e-05,
"loss": 0.0251,
"step": 122200
},
{
"epoch": 0.7,
"learning_rate": 7.8767236848001e-05,
"loss": 0.0154,
"step": 122400
},
{
"epoch": 0.7,
"learning_rate": 7.822738996700614e-05,
"loss": 0.031,
"step": 122600
},
{
"epoch": 0.7,
"learning_rate": 7.768890324480457e-05,
"loss": 0.0205,
"step": 122800
},
{
"epoch": 0.7,
"learning_rate": 7.715178353605712e-05,
"loss": 0.012,
"step": 123000
},
{
"epoch": 0.7,
"learning_rate": 7.661603767802323e-05,
"loss": 0.012,
"step": 123200
},
{
"epoch": 0.7,
"learning_rate": 7.60816724904739e-05,
"loss": 0.0198,
"step": 123400
},
{
"epoch": 0.7,
"learning_rate": 7.55486947756049e-05,
"loss": 0.0189,
"step": 123600
},
{
"epoch": 0.7,
"learning_rate": 7.501711131795021e-05,
"loss": 0.0162,
"step": 123800
},
{
"epoch": 0.7,
"learning_rate": 7.448692888429562e-05,
"loss": 0.0146,
"step": 124000
},
{
"epoch": 0.71,
"learning_rate": 7.395815422359255e-05,
"loss": 0.016,
"step": 124200
},
{
"epoch": 0.71,
"learning_rate": 7.343342733796512e-05,
"loss": 0.0168,
"step": 124400
},
{
"epoch": 0.71,
"learning_rate": 7.290748127549915e-05,
"loss": 0.0294,
"step": 124600
},
{
"epoch": 0.71,
"learning_rate": 7.238296309154575e-05,
"loss": 0.0112,
"step": 124800
},
{
"epoch": 0.71,
"learning_rate": 7.185987946295322e-05,
"loss": 0.013,
"step": 125000
},
{
"epoch": 0.71,
"learning_rate": 7.133823704830904e-05,
"loss": 0.0237,
"step": 125200
},
{
"epoch": 0.71,
"learning_rate": 7.081804248785451e-05,
"loss": 0.0247,
"step": 125400
},
{
"epoch": 0.71,
"learning_rate": 7.029930240340067e-05,
"loss": 0.0225,
"step": 125600
},
{
"epoch": 0.71,
"learning_rate": 6.978202339824351e-05,
"loss": 0.028,
"step": 125800
},
{
"epoch": 0.72,
"learning_rate": 6.926621205708063e-05,
"loss": 0.0234,
"step": 126000
},
{
"epoch": 0.72,
"learning_rate": 6.875187494592678e-05,
"loss": 0.0175,
"step": 126200
},
{
"epoch": 0.72,
"learning_rate": 6.82390186120306e-05,
"loss": 0.0198,
"step": 126400
},
{
"epoch": 0.72,
"learning_rate": 6.773020271848358e-05,
"loss": 0.0233,
"step": 126600
},
{
"epoch": 0.72,
"learning_rate": 6.722032002013071e-05,
"loss": 0.0213,
"step": 126800
},
{
"epoch": 0.72,
"learning_rate": 6.6711937594947e-05,
"loss": 0.0209,
"step": 127000
},
{
"epoch": 0.72,
"learning_rate": 6.620506191438099e-05,
"loss": 0.0329,
"step": 127200
},
{
"epoch": 0.72,
"learning_rate": 6.569969943070103e-05,
"loss": 0.0224,
"step": 127400
},
{
"epoch": 0.72,
"learning_rate": 6.51958565769135e-05,
"loss": 0.0301,
"step": 127600
},
{
"epoch": 0.73,
"learning_rate": 6.469604754411272e-05,
"loss": 0.0303,
"step": 127800
},
{
"epoch": 0.73,
"learning_rate": 6.41952554936078e-05,
"loss": 0.0163,
"step": 128000
},
{
"epoch": 0.73,
"learning_rate": 6.369600222379772e-05,
"loss": 0.0162,
"step": 128200
},
{
"epoch": 0.73,
"learning_rate": 6.319829408992151e-05,
"loss": 0.0233,
"step": 128400
},
{
"epoch": 0.73,
"learning_rate": 6.27021374275494e-05,
"loss": 0.0273,
"step": 128600
},
{
"epoch": 0.73,
"learning_rate": 6.220753855250208e-05,
"loss": 0.0312,
"step": 128800
},
{
"epoch": 0.73,
"learning_rate": 6.171450376077071e-05,
"loss": 0.0216,
"step": 129000
},
{
"epoch": 0.73,
"learning_rate": 6.122303932843605e-05,
"loss": 0.0358,
"step": 129200
},
{
"epoch": 0.73,
"learning_rate": 6.073315151158924e-05,
"loss": 0.0106,
"step": 129400
},
{
"epoch": 0.74,
"learning_rate": 6.0244846546251834e-05,
"loss": 0.0101,
"step": 129600
},
{
"epoch": 0.74,
"learning_rate": 5.9758130648296665e-05,
"loss": 0.0247,
"step": 129800
},
{
"epoch": 0.74,
"learning_rate": 5.927301001336826e-05,
"loss": 0.0157,
"step": 130000
},
{
"epoch": 0.74,
"learning_rate": 5.878949081680443e-05,
"loss": 0.0219,
"step": 130200
},
{
"epoch": 0.74,
"learning_rate": 5.830998476252924e-05,
"loss": 0.0217,
"step": 130400
},
{
"epoch": 0.74,
"learning_rate": 5.7829678803223054e-05,
"loss": 0.0145,
"step": 130600
},
{
"epoch": 0.74,
"learning_rate": 5.735099265515025e-05,
"loss": 0.0225,
"step": 130800
},
{
"epoch": 0.74,
"learning_rate": 5.687393241174086e-05,
"loss": 0.0211,
"step": 131000
},
{
"epoch": 0.75,
"learning_rate": 5.639850414572804e-05,
"loss": 0.0275,
"step": 131200
},
{
"epoch": 0.75,
"learning_rate": 5.5924713909070656e-05,
"loss": 0.0171,
"step": 131400
},
{
"epoch": 0.75,
"learning_rate": 5.545256773287633e-05,
"loss": 0.0211,
"step": 131600
},
{
"epoch": 0.75,
"learning_rate": 5.498207162732463e-05,
"loss": 0.0194,
"step": 131800
},
{
"epoch": 0.75,
"learning_rate": 5.451323158159054e-05,
"loss": 0.0235,
"step": 132000
},
{
"epoch": 0.75,
"learning_rate": 5.4046053563768266e-05,
"loss": 0.0229,
"step": 132200
},
{
"epoch": 0.75,
"learning_rate": 5.358054352079529e-05,
"loss": 0.0073,
"step": 132400
},
{
"epoch": 0.75,
"learning_rate": 5.311670737837655e-05,
"loss": 0.0174,
"step": 132600
},
{
"epoch": 0.75,
"learning_rate": 5.265455104090913e-05,
"loss": 0.0173,
"step": 132800
},
{
"epoch": 0.76,
"learning_rate": 5.2194080391407055e-05,
"loss": 0.0173,
"step": 133000
},
{
"epoch": 0.76,
"learning_rate": 5.173530129142639e-05,
"loss": 0.0188,
"step": 133200
},
{
"epoch": 0.76,
"learning_rate": 5.127821958099065e-05,
"loss": 0.0264,
"step": 133400
},
{
"epoch": 0.76,
"learning_rate": 5.082284107851646e-05,
"loss": 0.0263,
"step": 133600
},
{
"epoch": 0.76,
"learning_rate": 5.036917158073942e-05,
"loss": 0.0139,
"step": 133800
},
{
"epoch": 0.76,
"learning_rate": 4.991721686264047e-05,
"loss": 0.0184,
"step": 134000
},
{
"epoch": 0.76,
"learning_rate": 4.946698267737218e-05,
"loss": 0.0207,
"step": 134200
},
{
"epoch": 0.76,
"learning_rate": 4.901847475618568e-05,
"loss": 0.0242,
"step": 134400
},
{
"epoch": 0.76,
"learning_rate": 4.857169880835763e-05,
"loss": 0.0192,
"step": 134600
},
{
"epoch": 0.77,
"learning_rate": 4.812666052111755e-05,
"loss": 0.0148,
"step": 134800
},
{
"epoch": 0.77,
"learning_rate": 4.7687789861181634e-05,
"loss": 0.0365,
"step": 135000
},
{
"epoch": 0.77,
"learning_rate": 4.724622635071022e-05,
"loss": 0.0186,
"step": 135200
},
{
"epoch": 0.77,
"learning_rate": 4.6806417373413885e-05,
"loss": 0.0225,
"step": 135400
},
{
"epoch": 0.77,
"learning_rate": 4.6368368527836036e-05,
"loss": 0.0176,
"step": 135600
},
{
"epoch": 0.77,
"learning_rate": 4.5932085390114806e-05,
"loss": 0.0263,
"step": 135800
},
{
"epoch": 0.77,
"learning_rate": 4.549757351391151e-05,
"loss": 0.0364,
"step": 136000
},
{
"epoch": 0.77,
"learning_rate": 4.506483843034039e-05,
"loss": 0.033,
"step": 136200
},
{
"epoch": 0.77,
"learning_rate": 4.463388564789776e-05,
"loss": 0.0167,
"step": 136400
},
{
"epoch": 0.78,
"learning_rate": 4.420472065239248e-05,
"loss": 0.0108,
"step": 136600
},
{
"epoch": 0.78,
"learning_rate": 4.377734890687561e-05,
"loss": 0.022,
"step": 136800
},
{
"epoch": 0.78,
"learning_rate": 4.335177585157113e-05,
"loss": 0.0164,
"step": 137000
},
{
"epoch": 0.78,
"learning_rate": 4.2928006903806404e-05,
"loss": 0.0185,
"step": 137200
},
{
"epoch": 0.78,
"learning_rate": 4.2508152745142374e-05,
"loss": 0.0273,
"step": 137400
},
{
"epoch": 0.78,
"learning_rate": 4.208799908482074e-05,
"loss": 0.0141,
"step": 137600
},
{
"epoch": 0.78,
"learning_rate": 4.166966561927144e-05,
"loss": 0.0173,
"step": 137800
},
{
"epoch": 0.78,
"learning_rate": 4.1253157673665675e-05,
"loss": 0.02,
"step": 138000
},
{
"epoch": 0.78,
"learning_rate": 4.083848054993692e-05,
"loss": 0.0276,
"step": 138200
},
{
"epoch": 0.79,
"learning_rate": 4.042563952671287e-05,
"loss": 0.0162,
"step": 138400
},
{
"epoch": 0.79,
"learning_rate": 4.0014639859248885e-05,
"loss": 0.0213,
"step": 138600
},
{
"epoch": 0.79,
"learning_rate": 3.960548677936065e-05,
"loss": 0.0228,
"step": 138800
},
{
"epoch": 0.79,
"learning_rate": 3.9198185495357965e-05,
"loss": 0.0224,
"step": 139000
},
{
"epoch": 0.79,
"learning_rate": 3.879274119197787e-05,
"loss": 0.0195,
"step": 139200
},
{
"epoch": 0.79,
"learning_rate": 3.8389159030319236e-05,
"loss": 0.0163,
"step": 139400
},
{
"epoch": 0.79,
"learning_rate": 3.79874441477767e-05,
"loss": 0.0194,
"step": 139600
},
{
"epoch": 0.79,
"learning_rate": 3.758760165797558e-05,
"loss": 0.0177,
"step": 139800
},
{
"epoch": 0.79,
"learning_rate": 3.718963665070633e-05,
"loss": 0.0197,
"step": 140000
},
{
"epoch": 0.8,
"learning_rate": 3.6793554191860186e-05,
"loss": 0.0202,
"step": 140200
},
{
"epoch": 0.8,
"learning_rate": 3.639935932336438e-05,
"loss": 0.0127,
"step": 140400
},
{
"epoch": 0.8,
"learning_rate": 3.6007057063118326e-05,
"loss": 0.0247,
"step": 140600
},
{
"epoch": 0.8,
"learning_rate": 3.561665240492917e-05,
"loss": 0.022,
"step": 140800
},
{
"epoch": 0.8,
"learning_rate": 3.522815031844875e-05,
"loss": 0.0198,
"step": 141000
},
{
"epoch": 0.8,
"learning_rate": 3.4841555749110164e-05,
"loss": 0.0164,
"step": 141200
},
{
"epoch": 0.8,
"learning_rate": 3.4458792263419346e-05,
"loss": 0.0249,
"step": 141400
},
{
"epoch": 0.8,
"learning_rate": 3.4076017868658e-05,
"loss": 0.0087,
"step": 141600
},
{
"epoch": 0.81,
"learning_rate": 3.369706512869315e-05,
"loss": 0.0293,
"step": 141800
},
{
"epoch": 0.81,
"learning_rate": 3.3318130301208905e-05,
"loss": 0.0273,
"step": 142000
},
{
"epoch": 0.81,
"learning_rate": 3.2941127304445294e-05,
"loss": 0.0147,
"step": 142200
},
{
"epoch": 0.81,
"learning_rate": 3.256606093745782e-05,
"loss": 0.0262,
"step": 142400
},
{
"epoch": 0.81,
"learning_rate": 3.219293597464966e-05,
"loss": 0.0278,
"step": 142600
},
{
"epoch": 0.81,
"learning_rate": 3.182175716571092e-05,
"loss": 0.025,
"step": 142800
},
{
"epoch": 0.81,
"learning_rate": 3.1452529235558165e-05,
"loss": 0.0212,
"step": 143000
},
{
"epoch": 0.81,
"learning_rate": 3.108525688427432e-05,
"loss": 0.0297,
"step": 143200
},
{
"epoch": 0.81,
"learning_rate": 3.071994478704871e-05,
"loss": 0.0141,
"step": 143400
},
{
"epoch": 0.82,
"learning_rate": 3.035659759411763e-05,
"loss": 0.0298,
"step": 143600
},
{
"epoch": 0.82,
"learning_rate": 2.9995219930705253e-05,
"loss": 0.0212,
"step": 143800
},
{
"epoch": 0.82,
"learning_rate": 2.9637608496407227e-05,
"loss": 0.0261,
"step": 144000
},
{
"epoch": 0.82,
"learning_rate": 2.928017376249928e-05,
"loss": 0.0436,
"step": 144200
},
{
"epoch": 0.82,
"learning_rate": 2.8924722260435328e-05,
"loss": 0.019,
"step": 144400
},
{
"epoch": 0.82,
"learning_rate": 2.8571258514931404e-05,
"loss": 0.0157,
"step": 144600
},
{
"epoch": 0.82,
"learning_rate": 2.8219787025400236e-05,
"loss": 0.0216,
"step": 144800
},
{
"epoch": 0.82,
"learning_rate": 2.787031226589443e-05,
"loss": 0.0246,
"step": 145000
},
{
"epoch": 0.82,
"learning_rate": 2.752283868504904e-05,
"loss": 0.0252,
"step": 145200
},
{
"epoch": 0.83,
"learning_rate": 2.7177370706025224e-05,
"loss": 0.0182,
"step": 145400
},
{
"epoch": 0.83,
"learning_rate": 2.6833912726453738e-05,
"loss": 0.0143,
"step": 145600
},
{
"epoch": 0.83,
"learning_rate": 2.649246911837925e-05,
"loss": 0.0198,
"step": 145800
},
{
"epoch": 0.83,
"learning_rate": 2.6153044228204397e-05,
"loss": 0.0123,
"step": 146000
},
{
"epoch": 0.83,
"learning_rate": 2.5815642376634615e-05,
"loss": 0.0385,
"step": 146200
},
{
"epoch": 0.83,
"learning_rate": 2.5480267858622927e-05,
"loss": 0.0177,
"step": 146400
},
{
"epoch": 0.83,
"learning_rate": 2.5148586597250578e-05,
"loss": 0.0278,
"step": 146600
},
{
"epoch": 0.83,
"learning_rate": 2.4817269338190568e-05,
"loss": 0.0252,
"step": 146800
},
{
"epoch": 0.83,
"learning_rate": 2.448799212146731e-05,
"loss": 0.0135,
"step": 147000
},
{
"epoch": 0.84,
"learning_rate": 2.4162390211592713e-05,
"loss": 0.0327,
"step": 147200
},
{
"epoch": 0.84,
"learning_rate": 2.3837195375797726e-05,
"loss": 0.0292,
"step": 147400
},
{
"epoch": 0.84,
"learning_rate": 2.351405305818026e-05,
"loss": 0.0288,
"step": 147600
},
{
"epoch": 0.84,
"learning_rate": 2.319296737217692e-05,
"loss": 0.0272,
"step": 147800
},
{
"epoch": 0.84,
"learning_rate": 2.2873942405044402e-05,
"loss": 0.0238,
"step": 148000
},
{
"epoch": 0.84,
"learning_rate": 2.2556982217807548e-05,
"loss": 0.0142,
"step": 148200
},
{
"epoch": 0.84,
"learning_rate": 2.2242090845207555e-05,
"loss": 0.0175,
"step": 148400
},
{
"epoch": 0.84,
"learning_rate": 2.19292722956507e-05,
"loss": 0.0151,
"step": 148600
},
{
"epoch": 0.84,
"learning_rate": 2.1618530551157263e-05,
"loss": 0.028,
"step": 148800
},
{
"epoch": 0.85,
"learning_rate": 2.1309869567310876e-05,
"loss": 0.0214,
"step": 149000
},
{
"epoch": 0.85,
"learning_rate": 2.100329327320813e-05,
"loss": 0.0218,
"step": 149200
},
{
"epoch": 0.85,
"learning_rate": 2.0698805571408578e-05,
"loss": 0.0188,
"step": 149400
},
{
"epoch": 0.85,
"learning_rate": 2.039641033788514e-05,
"loss": 0.0071,
"step": 149600
},
{
"epoch": 0.85,
"learning_rate": 2.0096111421974547e-05,
"loss": 0.0138,
"step": 149800
},
{
"epoch": 0.85,
"learning_rate": 1.979791264632855e-05,
"loss": 0.0284,
"step": 150000
},
{
"epoch": 0.85,
"learning_rate": 1.9501817806865195e-05,
"loss": 0.0154,
"step": 150200
},
{
"epoch": 0.85,
"learning_rate": 1.9207830672720558e-05,
"loss": 0.0229,
"step": 150400
},
{
"epoch": 0.86,
"learning_rate": 1.8915954986200532e-05,
"loss": 0.0259,
"step": 150600
},
{
"epoch": 0.86,
"learning_rate": 1.8626194462733508e-05,
"loss": 0.0187,
"step": 150800
},
{
"epoch": 0.86,
"learning_rate": 1.8338552790822838e-05,
"loss": 0.0171,
"step": 151000
},
{
"epoch": 0.86,
"learning_rate": 1.8053033632000137e-05,
"loss": 0.0266,
"step": 151200
},
{
"epoch": 0.86,
"learning_rate": 1.7771052291063565e-05,
"loss": 0.0356,
"step": 151400
},
{
"epoch": 0.86,
"learning_rate": 1.7489778377186878e-05,
"loss": 0.014,
"step": 151600
},
{
"epoch": 0.86,
"learning_rate": 1.7210637780862658e-05,
"loss": 0.0141,
"step": 151800
},
{
"epoch": 0.86,
"learning_rate": 1.693363405540805e-05,
"loss": 0.0244,
"step": 152000
},
{
"epoch": 0.86,
"learning_rate": 1.665877072693892e-05,
"loss": 0.0172,
"step": 152200
},
{
"epoch": 0.87,
"learning_rate": 1.638605129432503e-05,
"loss": 0.0271,
"step": 152400
},
{
"epoch": 0.87,
"learning_rate": 1.611547922914535e-05,
"loss": 0.0096,
"step": 152600
},
{
"epoch": 0.87,
"learning_rate": 1.584705797564406e-05,
"loss": 0.0308,
"step": 152800
},
{
"epoch": 0.87,
"learning_rate": 1.5580790950686504e-05,
"loss": 0.027,
"step": 153000
},
{
"epoch": 0.87,
"learning_rate": 1.531668154371589e-05,
"loss": 0.021,
"step": 153200
},
{
"epoch": 0.87,
"learning_rate": 1.5054733116709978e-05,
"loss": 0.0108,
"step": 153400
},
{
"epoch": 0.87,
"learning_rate": 1.4794949004138424e-05,
"loss": 0.0152,
"step": 153600
},
{
"epoch": 0.87,
"learning_rate": 1.4537332512920213e-05,
"loss": 0.0208,
"step": 153800
},
{
"epoch": 0.87,
"learning_rate": 1.4281886922381655e-05,
"loss": 0.0189,
"step": 154000
},
{
"epoch": 0.88,
"learning_rate": 1.4028615484214573e-05,
"loss": 0.0262,
"step": 154200
},
{
"epoch": 0.88,
"learning_rate": 1.3778771471221268e-05,
"loss": 0.0305,
"step": 154400
},
{
"epoch": 0.88,
"learning_rate": 1.35298470713599e-05,
"loss": 0.0378,
"step": 154600
},
{
"epoch": 0.88,
"learning_rate": 1.3283106396952985e-05,
"loss": 0.0212,
"step": 154800
},
{
"epoch": 0.88,
"learning_rate": 1.3038552588883296e-05,
"loss": 0.0139,
"step": 155000
},
{
"epoch": 0.88,
"learning_rate": 1.2796188760195822e-05,
"loss": 0.0268,
"step": 155200
},
{
"epoch": 0.88,
"learning_rate": 1.2556017996058265e-05,
"loss": 0.0164,
"step": 155400
},
{
"epoch": 0.88,
"learning_rate": 1.2318043353721693e-05,
"loss": 0.0118,
"step": 155600
},
{
"epoch": 0.88,
"learning_rate": 1.2082267862481735e-05,
"loss": 0.0203,
"step": 155800
},
{
"epoch": 0.89,
"learning_rate": 1.1848694523639894e-05,
"loss": 0.0221,
"step": 156000
},
{
"epoch": 0.89,
"learning_rate": 1.1617326310465425e-05,
"loss": 0.0155,
"step": 156200
},
{
"epoch": 0.89,
"learning_rate": 1.1388166168157457e-05,
"loss": 0.0217,
"step": 156400
},
{
"epoch": 0.89,
"learning_rate": 1.1161217013807514e-05,
"loss": 0.0277,
"step": 156600
},
{
"epoch": 0.89,
"learning_rate": 1.0937599900986833e-05,
"loss": 0.0159,
"step": 156800
},
{
"epoch": 0.89,
"learning_rate": 1.0715070270453825e-05,
"loss": 0.0238,
"step": 157000
},
{
"epoch": 0.89,
"learning_rate": 1.0494760196045681e-05,
"loss": 0.0184,
"step": 157200
},
{
"epoch": 0.89,
"learning_rate": 1.0276672482197057e-05,
"loss": 0.0125,
"step": 157400
},
{
"epoch": 0.89,
"learning_rate": 1.0060809905053135e-05,
"loss": 0.0379,
"step": 157600
},
{
"epoch": 0.9,
"learning_rate": 9.84717521243414e-06,
"loss": 0.0121,
"step": 157800
},
{
"epoch": 0.9,
"learning_rate": 9.63577112380061e-06,
"loss": 0.0301,
"step": 158000
},
{
"epoch": 0.9,
"learning_rate": 9.426600330218556e-06,
"loss": 0.0153,
"step": 158200
},
{
"epoch": 0.9,
"learning_rate": 9.21966549432532e-06,
"loss": 0.0139,
"step": 158400
},
{
"epoch": 0.9,
"learning_rate": 9.014969250295535e-06,
"loss": 0.0172,
"step": 158600
},
{
"epoch": 0.9,
"learning_rate": 8.812514203807878e-06,
"loss": 0.0237,
"step": 158800
},
{
"epoch": 0.9,
"learning_rate": 8.612302932011596e-06,
"loss": 0.041,
"step": 159000
},
{
"epoch": 0.9,
"learning_rate": 8.414337983493915e-06,
"loss": 0.0103,
"step": 159200
},
{
"epoch": 0.91,
"learning_rate": 8.218621878247375e-06,
"loss": 0.0257,
"step": 159400
},
{
"epoch": 0.91,
"learning_rate": 8.025157107638079e-06,
"loss": 0.0164,
"step": 159600
},
{
"epoch": 0.91,
"learning_rate": 7.833946134373719e-06,
"loss": 0.0135,
"step": 159800
},
{
"epoch": 0.91,
"learning_rate": 7.644991392472185e-06,
"loss": 0.0143,
"step": 160000
},
{
"epoch": 0.91,
"learning_rate": 7.4582952872307985e-06,
"loss": 0.0145,
"step": 160200
},
{
"epoch": 0.91,
"learning_rate": 7.2738601951956405e-06,
"loss": 0.0213,
"step": 160400
},
{
"epoch": 0.91,
"learning_rate": 7.091688464131197e-06,
"loss": 0.0215,
"step": 160600
},
{
"epoch": 0.91,
"learning_rate": 6.911782412990403e-06,
"loss": 0.0205,
"step": 160800
},
{
"epoch": 0.91,
"learning_rate": 6.734144331885486e-06,
"loss": 0.0221,
"step": 161000
},
{
"epoch": 0.92,
"learning_rate": 6.558776482058375e-06,
"loss": 0.0125,
"step": 161200
},
{
"epoch": 0.92,
"learning_rate": 6.3856810958522425e-06,
"loss": 0.009,
"step": 161400
},
{
"epoch": 0.92,
"learning_rate": 6.215708818435388e-06,
"loss": 0.0135,
"step": 161600
},
{
"epoch": 0.92,
"learning_rate": 6.047153551194691e-06,
"loss": 0.0271,
"step": 161800
},
{
"epoch": 0.92,
"learning_rate": 5.8808772602734375e-06,
"loss": 0.0242,
"step": 162000
},
{
"epoch": 0.92,
"learning_rate": 5.7168820622837906e-06,
"loss": 0.0153,
"step": 162200
},
{
"epoch": 0.92,
"learning_rate": 5.555170044800826e-06,
"loss": 0.0159,
"step": 162400
},
{
"epoch": 0.92,
"learning_rate": 5.396534712327065e-06,
"loss": 0.0279,
"step": 162600
},
{
"epoch": 0.92,
"learning_rate": 5.239383760959439e-06,
"loss": 0.0265,
"step": 162800
},
{
"epoch": 0.93,
"learning_rate": 5.084522068407875e-06,
"loss": 0.0251,
"step": 163000
},
{
"epoch": 0.93,
"learning_rate": 4.931951605982607e-06,
"loss": 0.0141,
"step": 163200
},
{
"epoch": 0.93,
"learning_rate": 4.781674315827682e-06,
"loss": 0.0143,
"step": 163400
},
{
"epoch": 0.93,
"learning_rate": 4.633692110896181e-06,
"loss": 0.0162,
"step": 163600
},
{
"epoch": 0.93,
"learning_rate": 4.48800687492597e-06,
"loss": 0.0108,
"step": 163800
},
{
"epoch": 0.93,
"learning_rate": 4.3446204624156405e-06,
"loss": 0.0173,
"step": 164000
},
{
"epoch": 0.93,
"learning_rate": 4.203534698601004e-06,
"loss": 0.014,
"step": 164200
},
{
"epoch": 0.93,
"learning_rate": 4.064751379431683e-06,
"loss": 0.0243,
"step": 164400
},
{
"epoch": 0.93,
"learning_rate": 3.9282722715484335e-06,
"loss": 0.0189,
"step": 164600
},
{
"epoch": 0.94,
"learning_rate": 3.7940991122605757e-06,
"loss": 0.016,
"step": 164800
},
{
"epoch": 0.94,
"learning_rate": 3.662233609523829e-06,
"loss": 0.0279,
"step": 165000
},
{
"epoch": 0.94,
"learning_rate": 3.5326774419187126e-06,
"loss": 0.019,
"step": 165200
},
{
"epoch": 0.94,
"learning_rate": 3.405432258628993e-06,
"loss": 0.0136,
"step": 165400
},
{
"epoch": 0.94,
"learning_rate": 3.2804996794208474e-06,
"loss": 0.0276,
"step": 165600
},
{
"epoch": 0.94,
"learning_rate": 3.1578812946221724e-06,
"loss": 0.024,
"step": 165800
},
{
"epoch": 0.94,
"learning_rate": 3.038174415263218e-06,
"loss": 0.0373,
"step": 166000
},
{
"epoch": 0.94,
"learning_rate": 2.920177482219368e-06,
"loss": 0.0204,
"step": 166200
},
{
"epoch": 0.94,
"learning_rate": 2.804499330302412e-06,
"loss": 0.016,
"step": 166400
},
{
"epoch": 0.95,
"learning_rate": 2.691141432036144e-06,
"loss": 0.023,
"step": 166600
},
{
"epoch": 0.95,
"learning_rate": 2.5801052304086817e-06,
"loss": 0.018,
"step": 166800
},
{
"epoch": 0.95,
"learning_rate": 2.4713921388541356e-06,
"loss": 0.0156,
"step": 167000
},
{
"epoch": 0.95,
"learning_rate": 2.365003541234589e-06,
"loss": 0.0249,
"step": 167200
},
{
"epoch": 0.95,
"learning_rate": 2.2609407918225517e-06,
"loss": 0.0186,
"step": 167400
},
{
"epoch": 0.95,
"learning_rate": 2.1592052152836374e-06,
"loss": 0.0104,
"step": 167600
},
{
"epoch": 0.95,
"learning_rate": 2.059798106659755e-06,
"loss": 0.0189,
"step": 167800
},
{
"epoch": 0.95,
"learning_rate": 1.9627207313525887e-06,
"loss": 0.028,
"step": 168000
},
{
"epoch": 0.96,
"learning_rate": 1.8679743251075292e-06,
"loss": 0.0186,
"step": 168200
},
{
"epoch": 0.96,
"learning_rate": 1.7755600939978937e-06,
"loss": 0.0279,
"step": 168400
},
{
"epoch": 0.96,
"learning_rate": 1.6854792144096584e-06,
"loss": 0.0143,
"step": 168600
},
{
"epoch": 0.96,
"learning_rate": 1.5977328330263757e-06,
"loss": 0.044,
"step": 168800
},
{
"epoch": 0.96,
"learning_rate": 1.5123220668146281e-06,
"loss": 0.0219,
"step": 169000
},
{
"epoch": 0.96,
"learning_rate": 1.429248003009848e-06,
"loss": 0.0186,
"step": 169200
},
{
"epoch": 0.96,
"learning_rate": 1.3485116991023881e-06,
"loss": 0.0167,
"step": 169400
},
{
"epoch": 0.96,
"learning_rate": 1.2701141828241649e-06,
"loss": 0.0204,
"step": 169600
},
{
"epoch": 0.96,
"learning_rate": 1.1940564521355066e-06,
"loss": 0.0139,
"step": 169800
},
{
"epoch": 0.97,
"learning_rate": 1.1203394752124363e-06,
"loss": 0.028,
"step": 170000
},
{
"epoch": 0.97,
"learning_rate": 1.049315240383742e-06,
"loss": 0.022,
"step": 170200
},
{
"epoch": 0.97,
"learning_rate": 9.806102344292969e-07,
"loss": 0.0216,
"step": 170400
},
{
"epoch": 0.97,
"learning_rate": 9.138975907874411e-07,
"loss": 0.0164,
"step": 170600
},
{
"epoch": 0.97,
"learning_rate": 8.495292671902981e-07,
"loss": 0.0085,
"step": 170800
},
{
"epoch": 0.97,
"learning_rate": 7.875060830137766e-07,
"loss": 0.0185,
"step": 171000
},
{
"epoch": 0.97,
"learning_rate": 7.278288277813219e-07,
"loss": 0.0248,
"step": 171200
},
{
"epoch": 0.97,
"learning_rate": 6.704982611538892e-07,
"loss": 0.0329,
"step": 171400
},
{
"epoch": 0.97,
"learning_rate": 6.15515112920264e-07,
"loss": 0.0138,
"step": 171600
},
{
"epoch": 0.98,
"learning_rate": 5.62880082987879e-07,
"loss": 0.0128,
"step": 171800
},
{
"epoch": 0.98,
"learning_rate": 5.125938413737512e-07,
"loss": 0.0163,
"step": 172000
},
{
"epoch": 0.98,
"learning_rate": 4.646570281960375e-07,
"loss": 0.0347,
"step": 172200
},
{
"epoch": 0.98,
"learning_rate": 4.190702536658564e-07,
"loss": 0.0232,
"step": 172400
},
{
"epoch": 0.98,
"learning_rate": 3.7583409807958233e-07,
"loss": 0.0225,
"step": 172600
},
{
"epoch": 0.98,
"learning_rate": 3.3494911181136674e-07,
"loss": 0.019,
"step": 172800
},
{
"epoch": 0.98,
"learning_rate": 2.964158153061924e-07,
"loss": 0.0155,
"step": 173000
},
{
"epoch": 0.98,
"learning_rate": 2.60234699073237e-07,
"loss": 0.0186,
"step": 173200
},
{
"epoch": 0.98,
"learning_rate": 2.265695131377354e-07,
"loss": 0.0216,
"step": 173400
},
{
"epoch": 0.99,
"learning_rate": 1.9508234282376272e-07,
"loss": 0.0087,
"step": 173600
},
{
"epoch": 0.99,
"learning_rate": 1.6594864270533008e-07,
"loss": 0.0149,
"step": 173800
},
{
"epoch": 0.99,
"learning_rate": 1.3916878363954376e-07,
"loss": 0.0171,
"step": 174000
},
{
"epoch": 0.99,
"learning_rate": 1.1474310652035549e-07,
"loss": 0.025,
"step": 174200
},
{
"epoch": 0.99,
"learning_rate": 9.26719222741447e-08,
"loss": 0.0136,
"step": 174400
},
{
"epoch": 0.99,
"learning_rate": 7.295551185577334e-08,
"loss": 0.0196,
"step": 174600
},
{
"epoch": 0.99,
"learning_rate": 5.559412624511325e-08,
"loss": 0.0167,
"step": 174800
},
{
"epoch": 0.99,
"learning_rate": 4.058798644371748e-08,
"loss": 0.0247,
"step": 175000
},
{
"epoch": 0.99,
"learning_rate": 2.793728347208737e-08,
"loss": 0.0182,
"step": 175200
},
{
"epoch": 1.0,
"learning_rate": 1.7642178367165738e-08,
"loss": 0.0112,
"step": 175400
},
{
"epoch": 1.0,
"learning_rate": 9.702802180446488e-09,
"loss": 0.0229,
"step": 175600
},
{
"epoch": 1.0,
"learning_rate": 4.11925597606365e-09,
"loss": 0.0155,
"step": 175800
},
{
"epoch": 1.0,
"learning_rate": 8.916108297639875e-10,
"loss": 0.0201,
"step": 176000
},
{
"epoch": 1.0,
"eval_accuracy": 0.9876599555715365,
"eval_auc": 0.8673592596422711,
"eval_f1": 0.24899413187145658,
"eval_loss": 0.4661065936088562,
"eval_mcc": 0.3305508498121041,
"eval_precision": 0.14941997670219148,
"eval_recall": 0.7463955099754822,
"eval_runtime": 10361.3775,
"eval_samples_per_second": 43.805,
"eval_steps_per_second": 4.38,
"step": 176106
}
],
"logging_steps": 200,
"max_steps": 176106,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 3.528456960194662e+17,
"trial_name": null,
"trial_params": null
}