|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 14.942528735632184, |
|
"global_step": 46800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.031928480204342274, |
|
"learning_rate": 1.995742869306088e-05, |
|
"loss": 1.0424302673339845, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06385696040868455, |
|
"learning_rate": 1.9914857386121755e-05, |
|
"loss": 0.5240679550170898, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09578544061302682, |
|
"learning_rate": 1.9872286079182633e-05, |
|
"loss": 0.5014432144165039, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1277139208173691, |
|
"learning_rate": 1.9829714772243508e-05, |
|
"loss": 0.48912925720214845, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.15964240102171137, |
|
"learning_rate": 1.9787143465304387e-05, |
|
"loss": 0.48597183227539065, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19157088122605365, |
|
"learning_rate": 1.974457215836526e-05, |
|
"loss": 0.4988706970214844, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.22349936143039592, |
|
"learning_rate": 1.970200085142614e-05, |
|
"loss": 0.48521129608154295, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2554278416347382, |
|
"learning_rate": 1.9659429544487015e-05, |
|
"loss": 0.4737409210205078, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.28735632183908044, |
|
"learning_rate": 1.9616858237547893e-05, |
|
"loss": 0.478267822265625, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.31928480204342274, |
|
"learning_rate": 1.957428693060877e-05, |
|
"loss": 0.46842235565185547, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.351213282247765, |
|
"learning_rate": 1.953171562366965e-05, |
|
"loss": 0.47340740203857423, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3831417624521073, |
|
"learning_rate": 1.9489144316730525e-05, |
|
"loss": 0.48022716522216796, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.41507024265644954, |
|
"learning_rate": 1.9446573009791403e-05, |
|
"loss": 0.4702701950073242, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.44699872286079184, |
|
"learning_rate": 1.9404001702852278e-05, |
|
"loss": 0.47483158111572266, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4789272030651341, |
|
"learning_rate": 1.9361430395913157e-05, |
|
"loss": 0.45973930358886717, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5108556832694764, |
|
"learning_rate": 1.9318859088974035e-05, |
|
"loss": 0.43192115783691404, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5427841634738186, |
|
"learning_rate": 1.927628778203491e-05, |
|
"loss": 0.44062965393066406, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5747126436781609, |
|
"learning_rate": 1.923371647509579e-05, |
|
"loss": 0.460767822265625, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6066411238825032, |
|
"learning_rate": 1.9191145168156663e-05, |
|
"loss": 0.4533885192871094, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6385696040868455, |
|
"learning_rate": 1.914857386121754e-05, |
|
"loss": 0.4435346221923828, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6704980842911877, |
|
"learning_rate": 1.9106002554278417e-05, |
|
"loss": 0.4584750747680664, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.70242656449553, |
|
"learning_rate": 1.9063431247339295e-05, |
|
"loss": 0.43961280822753906, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7343550446998723, |
|
"learning_rate": 1.902085994040017e-05, |
|
"loss": 0.43362281799316404, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7662835249042146, |
|
"learning_rate": 1.8978288633461048e-05, |
|
"loss": 0.4443118286132812, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7982120051085568, |
|
"learning_rate": 1.8935717326521927e-05, |
|
"loss": 0.4438581848144531, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8301404853128991, |
|
"learning_rate": 1.88931460195828e-05, |
|
"loss": 0.4310215759277344, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"learning_rate": 1.885057471264368e-05, |
|
"loss": 0.4399332809448242, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8939974457215837, |
|
"learning_rate": 1.8808003405704555e-05, |
|
"loss": 0.4511014175415039, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"learning_rate": 1.8765432098765433e-05, |
|
"loss": 0.44327774047851565, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9578544061302682, |
|
"learning_rate": 1.872286079182631e-05, |
|
"loss": 0.43171138763427735, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9897828863346104, |
|
"learning_rate": 1.868028948488719e-05, |
|
"loss": 0.4460626983642578, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.3910863995552063, |
|
"step": 3132 |
|
}, |
|
{ |
|
"epoch": 1.0217113665389528, |
|
"learning_rate": 1.8637718177948065e-05, |
|
"loss": 0.43170944213867185, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.053639846743295, |
|
"learning_rate": 1.8595146871008943e-05, |
|
"loss": 0.42756095886230466, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.0855683269476373, |
|
"learning_rate": 1.8552575564069818e-05, |
|
"loss": 0.423099479675293, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.1174968071519795, |
|
"learning_rate": 1.8510004257130697e-05, |
|
"loss": 0.42962390899658204, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.1494252873563218, |
|
"learning_rate": 1.846743295019157e-05, |
|
"loss": 0.4373976898193359, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.181353767560664, |
|
"learning_rate": 1.842486164325245e-05, |
|
"loss": 0.4381977462768555, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.2132822477650063, |
|
"learning_rate": 1.8382290336313325e-05, |
|
"loss": 0.4158343887329102, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.2452107279693487, |
|
"learning_rate": 1.8339719029374203e-05, |
|
"loss": 0.4281564712524414, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.277139208173691, |
|
"learning_rate": 1.829714772243508e-05, |
|
"loss": 0.41806602478027344, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.3090676883780332, |
|
"learning_rate": 1.8254576415495957e-05, |
|
"loss": 0.4173674011230469, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.3409961685823755, |
|
"learning_rate": 1.8212005108556835e-05, |
|
"loss": 0.43225109100341796, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.3729246487867177, |
|
"learning_rate": 1.816943380161771e-05, |
|
"loss": 0.421945686340332, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.40485312899106, |
|
"learning_rate": 1.812686249467859e-05, |
|
"loss": 0.42005214691162107, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.4367816091954024, |
|
"learning_rate": 1.8084291187739463e-05, |
|
"loss": 0.40552581787109376, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.4687100893997447, |
|
"learning_rate": 1.804171988080034e-05, |
|
"loss": 0.41495433807373044, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.500638569604087, |
|
"learning_rate": 1.7999148573861217e-05, |
|
"loss": 0.4268427658081055, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.5325670498084292, |
|
"learning_rate": 1.7956577266922095e-05, |
|
"loss": 0.4178670120239258, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.5644955300127714, |
|
"learning_rate": 1.7914005959982973e-05, |
|
"loss": 0.4088083267211914, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.5964240102171137, |
|
"learning_rate": 1.787143465304385e-05, |
|
"loss": 0.4090264129638672, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.628352490421456, |
|
"learning_rate": 1.7828863346104727e-05, |
|
"loss": 0.39727077484130857, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.6602809706257982, |
|
"learning_rate": 1.7786292039165605e-05, |
|
"loss": 0.40877700805664063, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.6922094508301404, |
|
"learning_rate": 1.774372073222648e-05, |
|
"loss": 0.4055898666381836, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"learning_rate": 1.770114942528736e-05, |
|
"loss": 0.40825370788574217, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.756066411238825, |
|
"learning_rate": 1.7658578118348237e-05, |
|
"loss": 0.39790542602539064, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.7879948914431671, |
|
"learning_rate": 1.761600681140911e-05, |
|
"loss": 0.4129365158081055, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.8199233716475096, |
|
"learning_rate": 1.757343550446999e-05, |
|
"loss": 0.4137036895751953, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"learning_rate": 1.7530864197530865e-05, |
|
"loss": 0.41169502258300783, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.883780332056194, |
|
"learning_rate": 1.7488292890591743e-05, |
|
"loss": 0.3932318115234375, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.9157088122605364, |
|
"learning_rate": 1.7445721583652618e-05, |
|
"loss": 0.40273929595947267, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.9476372924648788, |
|
"learning_rate": 1.7403150276713497e-05, |
|
"loss": 0.4069852066040039, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.979565772669221, |
|
"learning_rate": 1.736057896977437e-05, |
|
"loss": 0.40934764862060546, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.3667986989021301, |
|
"step": 6264 |
|
}, |
|
{ |
|
"epoch": 2.0114942528735633, |
|
"learning_rate": 1.731800766283525e-05, |
|
"loss": 0.40322193145751956, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.0434227330779056, |
|
"learning_rate": 1.727543635589613e-05, |
|
"loss": 0.3913343048095703, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.075351213282248, |
|
"learning_rate": 1.7232865048957003e-05, |
|
"loss": 0.4131240081787109, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.10727969348659, |
|
"learning_rate": 1.719029374201788e-05, |
|
"loss": 0.394369010925293, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.1392081736909323, |
|
"learning_rate": 1.7147722435078757e-05, |
|
"loss": 0.3991780471801758, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.1711366538952745, |
|
"learning_rate": 1.7105151128139635e-05, |
|
"loss": 0.3874116134643555, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.203065134099617, |
|
"learning_rate": 1.706257982120051e-05, |
|
"loss": 0.387044792175293, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.234993614303959, |
|
"learning_rate": 1.702000851426139e-05, |
|
"loss": 0.39865818023681643, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.2669220945083013, |
|
"learning_rate": 1.6977437207322267e-05, |
|
"loss": 0.3999287414550781, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.2988505747126435, |
|
"learning_rate": 1.6934865900383145e-05, |
|
"loss": 0.4086351013183594, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.330779054916986, |
|
"learning_rate": 1.689229459344402e-05, |
|
"loss": 0.3991414642333984, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.362707535121328, |
|
"learning_rate": 1.68497232865049e-05, |
|
"loss": 0.3910430145263672, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.3946360153256707, |
|
"learning_rate": 1.6807151979565773e-05, |
|
"loss": 0.39477340698242186, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.4265644955300125, |
|
"learning_rate": 1.676458067262665e-05, |
|
"loss": 0.38976318359375, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.458492975734355, |
|
"learning_rate": 1.6722009365687527e-05, |
|
"loss": 0.38246253967285154, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.4904214559386975, |
|
"learning_rate": 1.6679438058748405e-05, |
|
"loss": 0.38488063812255857, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.5223499361430397, |
|
"learning_rate": 1.6636866751809283e-05, |
|
"loss": 0.3952900695800781, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.554278416347382, |
|
"learning_rate": 1.6594295444870158e-05, |
|
"loss": 0.3940334701538086, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.586206896551724, |
|
"learning_rate": 1.6551724137931037e-05, |
|
"loss": 0.3789644622802734, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.6181353767560664, |
|
"learning_rate": 1.650915283099191e-05, |
|
"loss": 0.38175716400146487, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.6500638569604087, |
|
"learning_rate": 1.646658152405279e-05, |
|
"loss": 0.3957417678833008, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.681992337164751, |
|
"learning_rate": 1.6424010217113665e-05, |
|
"loss": 0.3931695556640625, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.713920817369093, |
|
"learning_rate": 1.6381438910174543e-05, |
|
"loss": 0.3700098419189453, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.7458492975734354, |
|
"learning_rate": 1.6338867603235418e-05, |
|
"loss": 0.3986296463012695, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"learning_rate": 1.6296296296296297e-05, |
|
"loss": 0.377045783996582, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.80970625798212, |
|
"learning_rate": 1.6253724989357175e-05, |
|
"loss": 0.38455604553222655, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.841634738186462, |
|
"learning_rate": 1.6211153682418053e-05, |
|
"loss": 0.39248775482177733, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.873563218390805, |
|
"learning_rate": 1.6168582375478928e-05, |
|
"loss": 0.3952408599853516, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.9054916985951467, |
|
"learning_rate": 1.6126011068539807e-05, |
|
"loss": 0.38378463745117186, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.9374201787994894, |
|
"learning_rate": 1.608343976160068e-05, |
|
"loss": 0.3779494857788086, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.969348659003831, |
|
"learning_rate": 1.604086845466156e-05, |
|
"loss": 0.39154972076416017, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.3532629609107971, |
|
"step": 9396 |
|
}, |
|
{ |
|
"epoch": 3.001277139208174, |
|
"learning_rate": 1.599829714772244e-05, |
|
"loss": 0.38505748748779295, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 3.033205619412516, |
|
"learning_rate": 1.5955725840783313e-05, |
|
"loss": 0.37334869384765623, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.0651340996168583, |
|
"learning_rate": 1.591315453384419e-05, |
|
"loss": 0.36475982666015627, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 3.0970625798212006, |
|
"learning_rate": 1.5870583226905067e-05, |
|
"loss": 0.3732810592651367, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 3.128991060025543, |
|
"learning_rate": 1.5828011919965945e-05, |
|
"loss": 0.36443687438964845, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 3.160919540229885, |
|
"learning_rate": 1.578544061302682e-05, |
|
"loss": 0.38134773254394533, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 3.1928480204342273, |
|
"learning_rate": 1.57428693060877e-05, |
|
"loss": 0.3660939407348633, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.2247765006385696, |
|
"learning_rate": 1.5700297999148573e-05, |
|
"loss": 0.38626991271972655, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 3.256704980842912, |
|
"learning_rate": 1.565772669220945e-05, |
|
"loss": 0.36326351165771487, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 3.288633461047254, |
|
"learning_rate": 1.561515538527033e-05, |
|
"loss": 0.3856014633178711, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 3.3205619412515963, |
|
"learning_rate": 1.5572584078331205e-05, |
|
"loss": 0.38314430236816405, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 3.3524904214559386, |
|
"learning_rate": 1.5530012771392083e-05, |
|
"loss": 0.3787594223022461, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.384418901660281, |
|
"learning_rate": 1.5487441464452958e-05, |
|
"loss": 0.37935165405273436, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 3.416347381864623, |
|
"learning_rate": 1.5444870157513837e-05, |
|
"loss": 0.3672695541381836, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 3.4482758620689653, |
|
"learning_rate": 1.540229885057471e-05, |
|
"loss": 0.3799928283691406, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.480204342273308, |
|
"learning_rate": 1.5359727543635593e-05, |
|
"loss": 0.381710090637207, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 3.51213282247765, |
|
"learning_rate": 1.531715623669647e-05, |
|
"loss": 0.35085960388183596, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.5440613026819925, |
|
"learning_rate": 1.5274584929757347e-05, |
|
"loss": 0.37504680633544923, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 3.5759897828863347, |
|
"learning_rate": 1.5232013622818223e-05, |
|
"loss": 0.36862171173095704, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 3.607918263090677, |
|
"learning_rate": 1.51894423158791e-05, |
|
"loss": 0.3727375793457031, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 3.6398467432950192, |
|
"learning_rate": 1.5146871008939977e-05, |
|
"loss": 0.37135467529296873, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 3.6717752234993615, |
|
"learning_rate": 1.5104299702000853e-05, |
|
"loss": 0.3693832778930664, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.7037037037037037, |
|
"learning_rate": 1.506172839506173e-05, |
|
"loss": 0.36289344787597655, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 3.735632183908046, |
|
"learning_rate": 1.5019157088122607e-05, |
|
"loss": 0.3692543411254883, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 3.767560664112388, |
|
"learning_rate": 1.4976585781183483e-05, |
|
"loss": 0.3623777770996094, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 3.7994891443167305, |
|
"learning_rate": 1.493401447424436e-05, |
|
"loss": 0.3629803848266602, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 3.8314176245210727, |
|
"learning_rate": 1.4891443167305237e-05, |
|
"loss": 0.3619497680664063, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.863346104725415, |
|
"learning_rate": 1.4848871860366115e-05, |
|
"loss": 0.35669734954833987, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 3.895274584929757, |
|
"learning_rate": 1.4806300553426992e-05, |
|
"loss": 0.359210205078125, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.9272030651340994, |
|
"learning_rate": 1.4763729246487868e-05, |
|
"loss": 0.3655078887939453, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 3.959131545338442, |
|
"learning_rate": 1.4721157939548745e-05, |
|
"loss": 0.3666878890991211, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 3.991060025542784, |
|
"learning_rate": 1.4678586632609622e-05, |
|
"loss": 0.3504248809814453, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.3464440107345581, |
|
"step": 12528 |
|
}, |
|
{ |
|
"epoch": 4.022988505747127, |
|
"learning_rate": 1.4636015325670498e-05, |
|
"loss": 0.35299652099609374, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 4.054916985951468, |
|
"learning_rate": 1.4593444018731375e-05, |
|
"loss": 0.3569066619873047, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 4.086845466155811, |
|
"learning_rate": 1.4550872711792252e-05, |
|
"loss": 0.3493093490600586, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 4.118773946360153, |
|
"learning_rate": 1.4508301404853132e-05, |
|
"loss": 0.3634659957885742, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 4.150702426564496, |
|
"learning_rate": 1.4465730097914008e-05, |
|
"loss": 0.37093032836914064, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.182630906768837, |
|
"learning_rate": 1.4423158790974885e-05, |
|
"loss": 0.37300064086914064, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 4.21455938697318, |
|
"learning_rate": 1.4380587484035762e-05, |
|
"loss": 0.358408203125, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 4.246487867177523, |
|
"learning_rate": 1.4338016177096638e-05, |
|
"loss": 0.3569377517700195, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 4.278416347381865, |
|
"learning_rate": 1.4295444870157515e-05, |
|
"loss": 0.3640250015258789, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 4.310344827586207, |
|
"learning_rate": 1.4252873563218392e-05, |
|
"loss": 0.3725627517700195, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.342273307790549, |
|
"learning_rate": 1.421030225627927e-05, |
|
"loss": 0.3584425354003906, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 4.374201787994892, |
|
"learning_rate": 1.4167730949340147e-05, |
|
"loss": 0.37500988006591796, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 4.406130268199234, |
|
"learning_rate": 1.4125159642401023e-05, |
|
"loss": 0.35521396636962893, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 4.438058748403576, |
|
"learning_rate": 1.40825883354619e-05, |
|
"loss": 0.35183258056640626, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 4.469987228607918, |
|
"learning_rate": 1.4040017028522777e-05, |
|
"loss": 0.35960380554199217, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.501915708812261, |
|
"learning_rate": 1.3997445721583653e-05, |
|
"loss": 0.3571435546875, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 4.533844189016603, |
|
"learning_rate": 1.395487441464453e-05, |
|
"loss": 0.35867115020751955, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 4.565772669220945, |
|
"learning_rate": 1.3912303107705407e-05, |
|
"loss": 0.34017555236816405, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 4.597701149425287, |
|
"learning_rate": 1.3869731800766283e-05, |
|
"loss": 0.3735758209228516, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 4.62962962962963, |
|
"learning_rate": 1.3827160493827162e-05, |
|
"loss": 0.36705623626708983, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.661558109833972, |
|
"learning_rate": 1.3784589186888038e-05, |
|
"loss": 0.3525060272216797, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 4.693486590038314, |
|
"learning_rate": 1.3742017879948915e-05, |
|
"loss": 0.3783878326416016, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 4.725415070242656, |
|
"learning_rate": 1.3699446573009792e-05, |
|
"loss": 0.3659477615356445, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 4.757343550446999, |
|
"learning_rate": 1.365687526607067e-05, |
|
"loss": 0.3537181091308594, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 4.789272030651341, |
|
"learning_rate": 1.3614303959131547e-05, |
|
"loss": 0.36351833343505857, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.821200510855683, |
|
"learning_rate": 1.3571732652192425e-05, |
|
"loss": 0.3483928298950195, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 4.853128991060025, |
|
"learning_rate": 1.3529161345253302e-05, |
|
"loss": 0.35997581481933594, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 4.885057471264368, |
|
"learning_rate": 1.3486590038314178e-05, |
|
"loss": 0.36364505767822264, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 4.91698595146871, |
|
"learning_rate": 1.3444018731375055e-05, |
|
"loss": 0.3437914276123047, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 4.948914431673052, |
|
"learning_rate": 1.3401447424435932e-05, |
|
"loss": 0.3517444610595703, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.980842911877395, |
|
"learning_rate": 1.3358876117496808e-05, |
|
"loss": 0.3623521041870117, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.33480075001716614, |
|
"step": 15660 |
|
}, |
|
{ |
|
"epoch": 5.012771392081737, |
|
"learning_rate": 1.3316304810557685e-05, |
|
"loss": 0.3527105712890625, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 5.044699872286079, |
|
"learning_rate": 1.3273733503618562e-05, |
|
"loss": 0.3470969009399414, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 5.076628352490421, |
|
"learning_rate": 1.3231162196679438e-05, |
|
"loss": 0.34361572265625, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 5.108556832694764, |
|
"learning_rate": 1.3188590889740317e-05, |
|
"loss": 0.35975650787353514, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.140485312899106, |
|
"learning_rate": 1.3146019582801193e-05, |
|
"loss": 0.34139663696289063, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 5.172413793103448, |
|
"learning_rate": 1.310344827586207e-05, |
|
"loss": 0.34681896209716795, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 5.20434227330779, |
|
"learning_rate": 1.3060876968922947e-05, |
|
"loss": 0.34409351348876954, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 5.236270753512133, |
|
"learning_rate": 1.3018305661983823e-05, |
|
"loss": 0.3398979949951172, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 5.268199233716475, |
|
"learning_rate": 1.29757343550447e-05, |
|
"loss": 0.3437363815307617, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.300127713920817, |
|
"learning_rate": 1.2933163048105577e-05, |
|
"loss": 0.33456321716308596, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 5.33205619412516, |
|
"learning_rate": 1.2890591741166453e-05, |
|
"loss": 0.3437419128417969, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 5.363984674329502, |
|
"learning_rate": 1.2848020434227333e-05, |
|
"loss": 0.3656898880004883, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 5.395913154533845, |
|
"learning_rate": 1.280544912728821e-05, |
|
"loss": 0.33624221801757814, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 5.427841634738186, |
|
"learning_rate": 1.2762877820349087e-05, |
|
"loss": 0.35640827178955076, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.459770114942529, |
|
"learning_rate": 1.2720306513409963e-05, |
|
"loss": 0.34689849853515625, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 5.491698595146871, |
|
"learning_rate": 1.267773520647084e-05, |
|
"loss": 0.3492275238037109, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 5.5236270753512136, |
|
"learning_rate": 1.2635163899531717e-05, |
|
"loss": 0.3490084457397461, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"learning_rate": 1.2592592592592593e-05, |
|
"loss": 0.33231266021728517, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 5.587484035759898, |
|
"learning_rate": 1.2550021285653472e-05, |
|
"loss": 0.3501285552978516, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.61941251596424, |
|
"learning_rate": 1.2507449978714348e-05, |
|
"loss": 0.3469379425048828, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 5.6513409961685825, |
|
"learning_rate": 1.2464878671775225e-05, |
|
"loss": 0.3600951766967773, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 5.683269476372924, |
|
"learning_rate": 1.2422307364836102e-05, |
|
"loss": 0.3439628982543945, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 5.715197956577267, |
|
"learning_rate": 1.2379736057896978e-05, |
|
"loss": 0.34368560791015623, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 5.747126436781609, |
|
"learning_rate": 1.2337164750957855e-05, |
|
"loss": 0.3624436950683594, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.7790549169859515, |
|
"learning_rate": 1.2294593444018732e-05, |
|
"loss": 0.3493986129760742, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 5.810983397190293, |
|
"learning_rate": 1.2252022137079608e-05, |
|
"loss": 0.33745758056640623, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 5.842911877394636, |
|
"learning_rate": 1.2209450830140485e-05, |
|
"loss": 0.3514177703857422, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 5.874840357598979, |
|
"learning_rate": 1.2166879523201363e-05, |
|
"loss": 0.3409608459472656, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 5.9067688378033205, |
|
"learning_rate": 1.212430821626224e-05, |
|
"loss": 0.3416262054443359, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.938697318007663, |
|
"learning_rate": 1.2081736909323117e-05, |
|
"loss": 0.33906620025634765, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 5.970625798212005, |
|
"learning_rate": 1.2039165602383993e-05, |
|
"loss": 0.3565713119506836, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.32608646154403687, |
|
"step": 18792 |
|
}, |
|
{ |
|
"epoch": 6.002554278416348, |
|
"learning_rate": 1.1996594295444872e-05, |
|
"loss": 0.3423441314697266, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 6.0344827586206895, |
|
"learning_rate": 1.1954022988505748e-05, |
|
"loss": 0.3439017105102539, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 6.066411238825032, |
|
"learning_rate": 1.1911451681566627e-05, |
|
"loss": 0.33277828216552735, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 6.098339719029374, |
|
"learning_rate": 1.1868880374627503e-05, |
|
"loss": 0.3349918365478516, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 6.130268199233717, |
|
"learning_rate": 1.182630906768838e-05, |
|
"loss": 0.3411783981323242, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 6.1621966794380585, |
|
"learning_rate": 1.1783737760749257e-05, |
|
"loss": 0.34938507080078124, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 6.194125159642401, |
|
"learning_rate": 1.1741166453810133e-05, |
|
"loss": 0.3302972412109375, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 6.226053639846743, |
|
"learning_rate": 1.169859514687101e-05, |
|
"loss": 0.3324479293823242, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.257982120051086, |
|
"learning_rate": 1.1656023839931887e-05, |
|
"loss": 0.3325730133056641, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 6.2899106002554275, |
|
"learning_rate": 1.1613452532992763e-05, |
|
"loss": 0.32390750885009767, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 6.32183908045977, |
|
"learning_rate": 1.157088122605364e-05, |
|
"loss": 0.3410587692260742, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 6.353767560664112, |
|
"learning_rate": 1.1528309919114518e-05, |
|
"loss": 0.33700084686279297, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 6.385696040868455, |
|
"learning_rate": 1.1485738612175395e-05, |
|
"loss": 0.343173942565918, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.417624521072797, |
|
"learning_rate": 1.1443167305236272e-05, |
|
"loss": 0.34580535888671876, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 6.449553001277139, |
|
"learning_rate": 1.1400595998297148e-05, |
|
"loss": 0.3282489395141602, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 6.481481481481482, |
|
"learning_rate": 1.1358024691358025e-05, |
|
"loss": 0.32179367065429687, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 6.513409961685824, |
|
"learning_rate": 1.1315453384418902e-05, |
|
"loss": 0.34018295288085937, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 6.545338441890166, |
|
"learning_rate": 1.1272882077479778e-05, |
|
"loss": 0.3345378494262695, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.577266922094508, |
|
"learning_rate": 1.1230310770540655e-05, |
|
"loss": 0.34897972106933595, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 6.609195402298851, |
|
"learning_rate": 1.1187739463601532e-05, |
|
"loss": 0.3494709014892578, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 6.641123882503193, |
|
"learning_rate": 1.1145168156662412e-05, |
|
"loss": 0.3467377090454102, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 6.673052362707535, |
|
"learning_rate": 1.1102596849723288e-05, |
|
"loss": 0.3386357116699219, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 6.704980842911877, |
|
"learning_rate": 1.1060025542784165e-05, |
|
"loss": 0.3346070098876953, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.73690932311622, |
|
"learning_rate": 1.1017454235845042e-05, |
|
"loss": 0.3389591598510742, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 6.768837803320562, |
|
"learning_rate": 1.0974882928905918e-05, |
|
"loss": 0.33276222229003904, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 6.800766283524904, |
|
"learning_rate": 1.0932311621966795e-05, |
|
"loss": 0.3417454528808594, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 6.832694763729246, |
|
"learning_rate": 1.0889740315027673e-05, |
|
"loss": 0.3352804183959961, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 6.864623243933589, |
|
"learning_rate": 1.084716900808855e-05, |
|
"loss": 0.341392822265625, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.896551724137931, |
|
"learning_rate": 1.0804597701149427e-05, |
|
"loss": 0.3257337188720703, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 6.928480204342273, |
|
"learning_rate": 1.0762026394210303e-05, |
|
"loss": 0.340169792175293, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 6.960408684546616, |
|
"learning_rate": 1.071945508727118e-05, |
|
"loss": 0.3393547821044922, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 6.992337164750958, |
|
"learning_rate": 1.0676883780332057e-05, |
|
"loss": 0.3345566940307617, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.3236748278141022, |
|
"step": 21924 |
|
}, |
|
{ |
|
"epoch": 7.0242656449553005, |
|
"learning_rate": 1.0634312473392933e-05, |
|
"loss": 0.341474609375, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 7.056194125159642, |
|
"learning_rate": 1.059174116645381e-05, |
|
"loss": 0.33665103912353517, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 7.088122605363985, |
|
"learning_rate": 1.0549169859514687e-05, |
|
"loss": 0.33666282653808594, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 7.120051085568327, |
|
"learning_rate": 1.0506598552575565e-05, |
|
"loss": 0.32267608642578127, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 7.1519795657726695, |
|
"learning_rate": 1.0464027245636442e-05, |
|
"loss": 0.32230213165283206, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 7.183908045977011, |
|
"learning_rate": 1.0421455938697318e-05, |
|
"loss": 0.3335090637207031, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 7.215836526181354, |
|
"learning_rate": 1.0378884631758195e-05, |
|
"loss": 0.3402565002441406, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 7.247765006385696, |
|
"learning_rate": 1.0336313324819072e-05, |
|
"loss": 0.33222633361816406, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 7.2796934865900385, |
|
"learning_rate": 1.029374201787995e-05, |
|
"loss": 0.3273036956787109, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 7.31162196679438, |
|
"learning_rate": 1.0251170710940828e-05, |
|
"loss": 0.34040824890136717, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 7.343550446998723, |
|
"learning_rate": 1.0208599404001705e-05, |
|
"loss": 0.33351837158203124, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.375478927203065, |
|
"learning_rate": 1.0166028097062582e-05, |
|
"loss": 0.3304040145874023, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 7.407407407407407, |
|
"learning_rate": 1.0123456790123458e-05, |
|
"loss": 0.3389059829711914, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 7.439335887611749, |
|
"learning_rate": 1.0080885483184335e-05, |
|
"loss": 0.35038864135742187, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 7.471264367816092, |
|
"learning_rate": 1.0038314176245212e-05, |
|
"loss": 0.32688159942626954, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 7.503192848020435, |
|
"learning_rate": 9.995742869306088e-06, |
|
"loss": 0.3443561935424805, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.535121328224776, |
|
"learning_rate": 9.953171562366965e-06, |
|
"loss": 0.33195884704589845, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 7.567049808429119, |
|
"learning_rate": 9.910600255427842e-06, |
|
"loss": 0.3439883041381836, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 7.598978288633461, |
|
"learning_rate": 9.86802894848872e-06, |
|
"loss": 0.32301868438720704, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 7.630906768837804, |
|
"learning_rate": 9.825457641549597e-06, |
|
"loss": 0.3437799072265625, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 7.662835249042145, |
|
"learning_rate": 9.782886334610473e-06, |
|
"loss": 0.3296714401245117, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.694763729246488, |
|
"learning_rate": 9.74031502767135e-06, |
|
"loss": 0.3370498275756836, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 7.72669220945083, |
|
"learning_rate": 9.697743720732228e-06, |
|
"loss": 0.3219729232788086, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 7.758620689655173, |
|
"learning_rate": 9.655172413793105e-06, |
|
"loss": 0.3258438491821289, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 7.790549169859514, |
|
"learning_rate": 9.612601106853982e-06, |
|
"loss": 0.33826839447021484, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 7.822477650063857, |
|
"learning_rate": 9.570029799914858e-06, |
|
"loss": 0.3263700866699219, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.854406130268199, |
|
"learning_rate": 9.527458492975735e-06, |
|
"loss": 0.3350722122192383, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 7.886334610472542, |
|
"learning_rate": 9.484887186036612e-06, |
|
"loss": 0.334184455871582, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 7.918263090676884, |
|
"learning_rate": 9.442315879097488e-06, |
|
"loss": 0.3267938232421875, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 7.950191570881226, |
|
"learning_rate": 9.399744572158365e-06, |
|
"loss": 0.3196305465698242, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 7.982120051085568, |
|
"learning_rate": 9.357173265219243e-06, |
|
"loss": 0.3230043029785156, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.32138851284980774, |
|
"step": 25056 |
|
}, |
|
{ |
|
"epoch": 8.01404853128991, |
|
"learning_rate": 9.31460195828012e-06, |
|
"loss": 0.3258121109008789, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 8.045977011494253, |
|
"learning_rate": 9.272030651340997e-06, |
|
"loss": 0.32868377685546873, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 8.077905491698596, |
|
"learning_rate": 9.229459344401875e-06, |
|
"loss": 0.31930324554443357, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 8.109833971902937, |
|
"learning_rate": 9.186888037462752e-06, |
|
"loss": 0.3216040420532227, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 8.14176245210728, |
|
"learning_rate": 9.144316730523628e-06, |
|
"loss": 0.3392459869384766, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 8.173690932311622, |
|
"learning_rate": 9.101745423584505e-06, |
|
"loss": 0.3244002914428711, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 8.205619412515965, |
|
"learning_rate": 9.059174116645382e-06, |
|
"loss": 0.329796142578125, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 8.237547892720306, |
|
"learning_rate": 9.016602809706258e-06, |
|
"loss": 0.310886344909668, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 8.269476372924649, |
|
"learning_rate": 8.974031502767135e-06, |
|
"loss": 0.3182815361022949, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 8.301404853128991, |
|
"learning_rate": 8.931460195828012e-06, |
|
"loss": 0.3267630386352539, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.31576622009277344, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 8.365261813537675, |
|
"learning_rate": 8.846317581949767e-06, |
|
"loss": 0.33119094848632813, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 8.397190293742018, |
|
"learning_rate": 8.803746275010643e-06, |
|
"loss": 0.3301513671875, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 8.42911877394636, |
|
"learning_rate": 8.76117496807152e-06, |
|
"loss": 0.3321194839477539, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 8.461047254150703, |
|
"learning_rate": 8.718603661132398e-06, |
|
"loss": 0.332429313659668, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 8.492975734355046, |
|
"learning_rate": 8.676032354193275e-06, |
|
"loss": 0.3258559036254883, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 8.524904214559387, |
|
"learning_rate": 8.633461047254152e-06, |
|
"loss": 0.32335933685302737, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 8.55683269476373, |
|
"learning_rate": 8.590889740315028e-06, |
|
"loss": 0.3441028594970703, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 8.588761174968072, |
|
"learning_rate": 8.548318433375905e-06, |
|
"loss": 0.3302944564819336, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 8.620689655172415, |
|
"learning_rate": 8.505747126436782e-06, |
|
"loss": 0.3203293228149414, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.652618135376756, |
|
"learning_rate": 8.463175819497658e-06, |
|
"loss": 0.3180072593688965, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 8.684546615581098, |
|
"learning_rate": 8.420604512558537e-06, |
|
"loss": 0.3143235015869141, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 8.71647509578544, |
|
"learning_rate": 8.378033205619413e-06, |
|
"loss": 0.31695037841796875, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 8.748403575989784, |
|
"learning_rate": 8.33546189868029e-06, |
|
"loss": 0.31933542251586916, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 8.780332056194124, |
|
"learning_rate": 8.292890591741167e-06, |
|
"loss": 0.3094404983520508, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 8.812260536398467, |
|
"learning_rate": 8.250319284802043e-06, |
|
"loss": 0.3251906204223633, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 8.84418901660281, |
|
"learning_rate": 8.207747977862922e-06, |
|
"loss": 0.31949323654174805, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 8.876117496807153, |
|
"learning_rate": 8.165176670923798e-06, |
|
"loss": 0.32407459259033206, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 8.908045977011493, |
|
"learning_rate": 8.122605363984675e-06, |
|
"loss": 0.34080780029296875, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 8.939974457215836, |
|
"learning_rate": 8.080034057045552e-06, |
|
"loss": 0.3133597183227539, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 8.971902937420179, |
|
"learning_rate": 8.037462750106428e-06, |
|
"loss": 0.31053606033325193, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.3119257688522339, |
|
"step": 28188 |
|
}, |
|
{ |
|
"epoch": 9.003831417624522, |
|
"learning_rate": 7.994891443167307e-06, |
|
"loss": 0.3158924293518066, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 9.035759897828862, |
|
"learning_rate": 7.952320136228183e-06, |
|
"loss": 0.3117160987854004, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 9.067688378033205, |
|
"learning_rate": 7.90974882928906e-06, |
|
"loss": 0.32216869354248046, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 9.099616858237548, |
|
"learning_rate": 7.867177522349937e-06, |
|
"loss": 0.314890079498291, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 9.13154533844189, |
|
"learning_rate": 7.824606215410813e-06, |
|
"loss": 0.3113277626037598, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 9.163473818646233, |
|
"learning_rate": 7.78203490847169e-06, |
|
"loss": 0.324998779296875, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 9.195402298850574, |
|
"learning_rate": 7.739463601532567e-06, |
|
"loss": 0.32509784698486327, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 9.227330779054917, |
|
"learning_rate": 7.696892294593445e-06, |
|
"loss": 0.3238474273681641, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 9.25925925925926, |
|
"learning_rate": 7.654320987654322e-06, |
|
"loss": 0.33168052673339843, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 9.291187739463602, |
|
"learning_rate": 7.611749680715198e-06, |
|
"loss": 0.3050485992431641, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 9.323116219667943, |
|
"learning_rate": 7.569178373776076e-06, |
|
"loss": 0.31826154708862303, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 9.355044699872286, |
|
"learning_rate": 7.5266070668369525e-06, |
|
"loss": 0.31972583770751956, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 9.386973180076629, |
|
"learning_rate": 7.48403575989783e-06, |
|
"loss": 0.3287076568603516, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 9.418901660280971, |
|
"learning_rate": 7.441464452958707e-06, |
|
"loss": 0.30761892318725587, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 9.450830140485312, |
|
"learning_rate": 7.398893146019583e-06, |
|
"loss": 0.3097171401977539, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 9.482758620689655, |
|
"learning_rate": 7.35632183908046e-06, |
|
"loss": 0.30806644439697267, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 9.514687100893997, |
|
"learning_rate": 7.3137505321413375e-06, |
|
"loss": 0.3174296760559082, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 9.54661558109834, |
|
"learning_rate": 7.271179225202214e-06, |
|
"loss": 0.31353973388671874, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 9.578544061302683, |
|
"learning_rate": 7.228607918263091e-06, |
|
"loss": 0.30578601837158204, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.610472541507024, |
|
"learning_rate": 7.1860366113239675e-06, |
|
"loss": 0.3045210838317871, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 9.642401021711366, |
|
"learning_rate": 7.143465304384846e-06, |
|
"loss": 0.3149559211730957, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 9.67432950191571, |
|
"learning_rate": 7.1008939974457225e-06, |
|
"loss": 0.3207520294189453, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 9.706257982120052, |
|
"learning_rate": 7.058322690506599e-06, |
|
"loss": 0.32308254241943357, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 9.738186462324393, |
|
"learning_rate": 7.015751383567476e-06, |
|
"loss": 0.3183433723449707, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 9.770114942528735, |
|
"learning_rate": 6.973180076628353e-06, |
|
"loss": 0.3200767135620117, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 9.802043422733078, |
|
"learning_rate": 6.93060876968923e-06, |
|
"loss": 0.3113987922668457, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 9.83397190293742, |
|
"learning_rate": 6.888037462750107e-06, |
|
"loss": 0.33167327880859376, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 9.865900383141762, |
|
"learning_rate": 6.845466155810983e-06, |
|
"loss": 0.3172581100463867, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 9.897828863346104, |
|
"learning_rate": 6.802894848871861e-06, |
|
"loss": 0.3124402046203613, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 9.929757343550447, |
|
"learning_rate": 6.760323541932738e-06, |
|
"loss": 0.30687171936035157, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 9.96168582375479, |
|
"learning_rate": 6.717752234993615e-06, |
|
"loss": 0.304738712310791, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 9.99361430395913, |
|
"learning_rate": 6.6751809280544925e-06, |
|
"loss": 0.3069480514526367, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.31222018599510193, |
|
"step": 31320 |
|
}, |
|
{ |
|
"epoch": 10.025542784163473, |
|
"learning_rate": 6.632609621115369e-06, |
|
"loss": 0.30982017517089844, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 10.057471264367816, |
|
"learning_rate": 6.590038314176246e-06, |
|
"loss": 0.29576118469238283, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 10.089399744572159, |
|
"learning_rate": 6.5474670072371225e-06, |
|
"loss": 0.318297004699707, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 10.121328224776502, |
|
"learning_rate": 6.504895700297999e-06, |
|
"loss": 0.2947650337219238, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 10.153256704980842, |
|
"learning_rate": 6.462324393358877e-06, |
|
"loss": 0.31912431716918943, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 10.185185185185185, |
|
"learning_rate": 6.419753086419753e-06, |
|
"loss": 0.31549993515014646, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 10.217113665389528, |
|
"learning_rate": 6.37718177948063e-06, |
|
"loss": 0.3185459327697754, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 10.24904214559387, |
|
"learning_rate": 6.334610472541508e-06, |
|
"loss": 0.3061997413635254, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 10.280970625798211, |
|
"learning_rate": 6.292039165602385e-06, |
|
"loss": 0.30356922149658205, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 10.312899106002554, |
|
"learning_rate": 6.249467858663262e-06, |
|
"loss": 0.30677152633666993, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 10.344827586206897, |
|
"learning_rate": 6.206896551724138e-06, |
|
"loss": 0.3148806190490723, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 10.37675606641124, |
|
"learning_rate": 6.164325244785016e-06, |
|
"loss": 0.3114926528930664, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 10.40868454661558, |
|
"learning_rate": 6.1217539378458925e-06, |
|
"loss": 0.31096630096435546, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 10.440613026819923, |
|
"learning_rate": 6.079182630906769e-06, |
|
"loss": 0.3131294822692871, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 10.472541507024266, |
|
"learning_rate": 6.036611323967646e-06, |
|
"loss": 0.31513975143432615, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 10.504469987228608, |
|
"learning_rate": 5.9940400170285225e-06, |
|
"loss": 0.3035664939880371, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 10.53639846743295, |
|
"learning_rate": 5.9514687100894e-06, |
|
"loss": 0.30514934539794925, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 10.568326947637292, |
|
"learning_rate": 5.9088974031502775e-06, |
|
"loss": 0.3236639404296875, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 10.600255427841635, |
|
"learning_rate": 5.866326096211154e-06, |
|
"loss": 0.32729095458984375, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 10.632183908045977, |
|
"learning_rate": 5.823754789272032e-06, |
|
"loss": 0.30390745162963867, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 10.66411238825032, |
|
"learning_rate": 5.781183482332908e-06, |
|
"loss": 0.30320255279541014, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 10.696040868454661, |
|
"learning_rate": 5.738612175393785e-06, |
|
"loss": 0.31617319107055664, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 10.727969348659004, |
|
"learning_rate": 5.696040868454662e-06, |
|
"loss": 0.31349088668823244, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 10.759897828863346, |
|
"learning_rate": 5.653469561515539e-06, |
|
"loss": 0.3055162620544434, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 10.79182630906769, |
|
"learning_rate": 5.610898254576416e-06, |
|
"loss": 0.29683116912841795, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 10.82375478927203, |
|
"learning_rate": 5.5683269476372925e-06, |
|
"loss": 0.2972592926025391, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 10.855683269476373, |
|
"learning_rate": 5.525755640698169e-06, |
|
"loss": 0.30813514709472656, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 10.887611749680715, |
|
"learning_rate": 5.4831843337590475e-06, |
|
"loss": 0.311496696472168, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 10.919540229885058, |
|
"learning_rate": 5.440613026819924e-06, |
|
"loss": 0.3147770881652832, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 10.951468710089399, |
|
"learning_rate": 5.398041719880801e-06, |
|
"loss": 0.2977629852294922, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 10.983397190293742, |
|
"learning_rate": 5.3554704129416775e-06, |
|
"loss": 0.30722129821777344, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.30927398800849915, |
|
"step": 34452 |
|
}, |
|
{ |
|
"epoch": 11.015325670498084, |
|
"learning_rate": 5.312899106002555e-06, |
|
"loss": 0.30775304794311525, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 11.047254150702427, |
|
"learning_rate": 5.270327799063432e-06, |
|
"loss": 0.31844793319702147, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 11.079182630906768, |
|
"learning_rate": 5.227756492124308e-06, |
|
"loss": 0.3163930511474609, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 11.11111111111111, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 0.3178179359436035, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 11.143039591315453, |
|
"learning_rate": 5.1426138782460625e-06, |
|
"loss": 0.3107015609741211, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 11.174968071519796, |
|
"learning_rate": 5.100042571306939e-06, |
|
"loss": 0.31198001861572267, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 11.206896551724139, |
|
"learning_rate": 5.057471264367817e-06, |
|
"loss": 0.30287263870239256, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 11.23882503192848, |
|
"learning_rate": 5.014899957428694e-06, |
|
"loss": 0.3046586036682129, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 11.270753512132822, |
|
"learning_rate": 4.972328650489571e-06, |
|
"loss": 0.29926385879516604, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 11.302681992337165, |
|
"learning_rate": 4.9297573435504475e-06, |
|
"loss": 0.31370662689208983, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 11.334610472541508, |
|
"learning_rate": 4.887186036611324e-06, |
|
"loss": 0.3177505874633789, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 11.366538952745849, |
|
"learning_rate": 4.844614729672202e-06, |
|
"loss": 0.3028862190246582, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 11.398467432950191, |
|
"learning_rate": 4.802043422733078e-06, |
|
"loss": 0.2965104293823242, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 11.430395913154534, |
|
"learning_rate": 4.759472115793956e-06, |
|
"loss": 0.31520273208618166, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 11.462324393358877, |
|
"learning_rate": 4.7169008088548325e-06, |
|
"loss": 0.2961687469482422, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 11.494252873563218, |
|
"learning_rate": 4.674329501915709e-06, |
|
"loss": 0.30442037582397463, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 11.52618135376756, |
|
"learning_rate": 4.631758194976586e-06, |
|
"loss": 0.3178094863891602, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 11.558109833971903, |
|
"learning_rate": 4.589186888037463e-06, |
|
"loss": 0.3036604118347168, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 11.590038314176246, |
|
"learning_rate": 4.54661558109834e-06, |
|
"loss": 0.3115557861328125, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 11.621966794380587, |
|
"learning_rate": 4.5040442741592175e-06, |
|
"loss": 0.30345108032226564, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 11.65389527458493, |
|
"learning_rate": 4.461472967220094e-06, |
|
"loss": 0.30634918212890627, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 11.685823754789272, |
|
"learning_rate": 4.418901660280971e-06, |
|
"loss": 0.3051659774780273, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 11.717752234993615, |
|
"learning_rate": 4.3763303533418475e-06, |
|
"loss": 0.3069002342224121, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 11.749680715197957, |
|
"learning_rate": 4.333759046402725e-06, |
|
"loss": 0.30947404861450195, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 11.781609195402298, |
|
"learning_rate": 4.291187739463602e-06, |
|
"loss": 0.3073232650756836, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 11.813537675606641, |
|
"learning_rate": 4.248616432524479e-06, |
|
"loss": 0.3001542472839355, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 11.845466155810984, |
|
"learning_rate": 4.206045125585356e-06, |
|
"loss": 0.31570695877075194, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 11.877394636015326, |
|
"learning_rate": 4.1634738186462325e-06, |
|
"loss": 0.311122932434082, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 11.909323116219667, |
|
"learning_rate": 4.12090251170711e-06, |
|
"loss": 0.301647891998291, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 11.94125159642401, |
|
"learning_rate": 4.078331204767987e-06, |
|
"loss": 0.3205462646484375, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 11.973180076628353, |
|
"learning_rate": 4.035759897828863e-06, |
|
"loss": 0.29651784896850586, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.3090454041957855, |
|
"step": 37584 |
|
}, |
|
{ |
|
"epoch": 12.005108556832695, |
|
"learning_rate": 3.993188590889741e-06, |
|
"loss": 0.31057783126831057, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 12.037037037037036, |
|
"learning_rate": 3.9506172839506175e-06, |
|
"loss": 0.305908145904541, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 12.068965517241379, |
|
"learning_rate": 3.908045977011495e-06, |
|
"loss": 0.3145115280151367, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 12.100893997445722, |
|
"learning_rate": 3.865474670072372e-06, |
|
"loss": 0.3086430168151855, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 12.132822477650064, |
|
"learning_rate": 3.822903363133248e-06, |
|
"loss": 0.3069817733764648, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 12.164750957854405, |
|
"learning_rate": 3.7803320561941254e-06, |
|
"loss": 0.3044874954223633, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 12.196679438058748, |
|
"learning_rate": 3.737760749255002e-06, |
|
"loss": 0.3003558731079102, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 12.22860791826309, |
|
"learning_rate": 3.6951894423158796e-06, |
|
"loss": 0.2989999961853027, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 12.260536398467433, |
|
"learning_rate": 3.6526181353767567e-06, |
|
"loss": 0.30233255386352537, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 12.292464878671776, |
|
"learning_rate": 3.6100468284376333e-06, |
|
"loss": 0.30123531341552734, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 12.324393358876117, |
|
"learning_rate": 3.56747552149851e-06, |
|
"loss": 0.3095419502258301, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 12.35632183908046, |
|
"learning_rate": 3.524904214559387e-06, |
|
"loss": 0.32312957763671873, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 12.388250319284802, |
|
"learning_rate": 3.4823329076202646e-06, |
|
"loss": 0.31157236099243163, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 12.420178799489145, |
|
"learning_rate": 3.4397616006811412e-06, |
|
"loss": 0.3003998374938965, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 12.452107279693486, |
|
"learning_rate": 3.3971902937420183e-06, |
|
"loss": 0.3086379051208496, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 12.484035759897829, |
|
"learning_rate": 3.354618986802895e-06, |
|
"loss": 0.28147794723510744, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 12.515964240102171, |
|
"learning_rate": 3.3120476798637717e-06, |
|
"loss": 0.29010528564453125, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 12.547892720306514, |
|
"learning_rate": 3.269476372924649e-06, |
|
"loss": 0.30009984970092773, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 12.579821200510855, |
|
"learning_rate": 3.2269050659855262e-06, |
|
"loss": 0.3059814834594727, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 12.611749680715198, |
|
"learning_rate": 3.184333759046403e-06, |
|
"loss": 0.2934641456604004, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 12.64367816091954, |
|
"learning_rate": 3.14176245210728e-06, |
|
"loss": 0.30141252517700196, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 12.675606641123883, |
|
"learning_rate": 3.0991911451681567e-06, |
|
"loss": 0.3053057289123535, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 12.707535121328224, |
|
"learning_rate": 3.056619838229034e-06, |
|
"loss": 0.2935024261474609, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 12.739463601532567, |
|
"learning_rate": 3.014048531289911e-06, |
|
"loss": 0.2974138069152832, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 12.77139208173691, |
|
"learning_rate": 2.971477224350788e-06, |
|
"loss": 0.29385158538818357, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 12.803320561941252, |
|
"learning_rate": 2.9289059174116646e-06, |
|
"loss": 0.309177303314209, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 12.835249042145595, |
|
"learning_rate": 2.8863346104725417e-06, |
|
"loss": 0.30561391830444334, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 12.867177522349936, |
|
"learning_rate": 2.8437633035334187e-06, |
|
"loss": 0.30923063278198243, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 12.899106002554278, |
|
"learning_rate": 2.801191996594296e-06, |
|
"loss": 0.30836896896362304, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 12.931034482758621, |
|
"learning_rate": 2.7586206896551725e-06, |
|
"loss": 0.31578615188598635, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 12.962962962962964, |
|
"learning_rate": 2.7160493827160496e-06, |
|
"loss": 0.31291526794433594, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 12.994891443167305, |
|
"learning_rate": 2.6734780757769262e-06, |
|
"loss": 0.3064906311035156, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.30278804898262024, |
|
"step": 40716 |
|
}, |
|
{ |
|
"epoch": 13.026819923371647, |
|
"learning_rate": 2.6309067688378037e-06, |
|
"loss": 0.2991274642944336, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 13.05874840357599, |
|
"learning_rate": 2.5883354618986804e-06, |
|
"loss": 0.3081726837158203, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 13.090676883780333, |
|
"learning_rate": 2.5457641549595575e-06, |
|
"loss": 0.28322860717773435, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 13.122605363984674, |
|
"learning_rate": 2.503192848020434e-06, |
|
"loss": 0.3077671813964844, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 13.154533844189016, |
|
"learning_rate": 2.4606215410813112e-06, |
|
"loss": 0.3038086128234863, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 13.186462324393359, |
|
"learning_rate": 2.4180502341421883e-06, |
|
"loss": 0.3015581703186035, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 13.218390804597702, |
|
"learning_rate": 2.3754789272030654e-06, |
|
"loss": 0.30243860244750975, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 13.250319284802043, |
|
"learning_rate": 2.332907620263942e-06, |
|
"loss": 0.3104331398010254, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 13.282247765006385, |
|
"learning_rate": 2.290336313324819e-06, |
|
"loss": 0.30330204010009765, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 13.314176245210728, |
|
"learning_rate": 2.2477650063856962e-06, |
|
"loss": 0.2972829818725586, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 13.34610472541507, |
|
"learning_rate": 2.205193699446573e-06, |
|
"loss": 0.300672607421875, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 13.378033205619413, |
|
"learning_rate": 2.1626223925074504e-06, |
|
"loss": 0.30183706283569334, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 13.409961685823754, |
|
"learning_rate": 2.120051085568327e-06, |
|
"loss": 0.3096357536315918, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 13.441890166028097, |
|
"learning_rate": 2.077479778629204e-06, |
|
"loss": 0.3032659912109375, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 13.47381864623244, |
|
"learning_rate": 2.0349084716900813e-06, |
|
"loss": 0.2949547386169434, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 13.505747126436782, |
|
"learning_rate": 1.992337164750958e-06, |
|
"loss": 0.2889937973022461, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 13.537675606641123, |
|
"learning_rate": 1.949765857811835e-06, |
|
"loss": 0.30804216384887695, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 13.569604086845466, |
|
"learning_rate": 1.9071945508727119e-06, |
|
"loss": 0.30334890365600586, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 13.601532567049809, |
|
"learning_rate": 1.864623243933589e-06, |
|
"loss": 0.2941057586669922, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 13.633461047254151, |
|
"learning_rate": 1.8220519369944658e-06, |
|
"loss": 0.2950305938720703, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 13.665389527458492, |
|
"learning_rate": 1.7794806300553427e-06, |
|
"loss": 0.3061481285095215, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 13.697318007662835, |
|
"learning_rate": 1.7369093231162198e-06, |
|
"loss": 0.2870841217041016, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 13.729246487867178, |
|
"learning_rate": 1.6943380161770967e-06, |
|
"loss": 0.28978254318237306, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 13.76117496807152, |
|
"learning_rate": 1.6517667092379737e-06, |
|
"loss": 0.30341114044189454, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 13.793103448275861, |
|
"learning_rate": 1.6091954022988506e-06, |
|
"loss": 0.307810001373291, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 13.825031928480204, |
|
"learning_rate": 1.5666240953597275e-06, |
|
"loss": 0.30963399887084964, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 13.856960408684547, |
|
"learning_rate": 1.5240527884206046e-06, |
|
"loss": 0.3031142807006836, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 13.88888888888889, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 0.3144196891784668, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 13.920817369093232, |
|
"learning_rate": 1.4389101745423588e-06, |
|
"loss": 0.28602792739868166, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 13.952745849297573, |
|
"learning_rate": 1.3963388676032354e-06, |
|
"loss": 0.2984015083312988, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 13.984674329501916, |
|
"learning_rate": 1.3537675606641127e-06, |
|
"loss": 0.29513154983520506, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.3065377175807953, |
|
"step": 43848 |
|
}, |
|
{ |
|
"epoch": 14.016602809706258, |
|
"learning_rate": 1.3111962537249896e-06, |
|
"loss": 0.28543767929077146, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 14.048531289910601, |
|
"learning_rate": 1.2686249467858662e-06, |
|
"loss": 0.2963153839111328, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 14.080459770114942, |
|
"learning_rate": 1.2260536398467433e-06, |
|
"loss": 0.3030729293823242, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 14.112388250319285, |
|
"learning_rate": 1.1834823329076204e-06, |
|
"loss": 0.2889740180969238, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 14.144316730523627, |
|
"learning_rate": 1.1409110259684973e-06, |
|
"loss": 0.2915242385864258, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 14.17624521072797, |
|
"learning_rate": 1.0983397190293744e-06, |
|
"loss": 0.28075174331665037, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 14.20817369093231, |
|
"learning_rate": 1.0557684120902512e-06, |
|
"loss": 0.3046562194824219, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 14.240102171136654, |
|
"learning_rate": 1.0131971051511283e-06, |
|
"loss": 0.28045236587524414, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 14.272030651340996, |
|
"learning_rate": 9.706257982120052e-07, |
|
"loss": 0.2948748970031738, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 14.303959131545339, |
|
"learning_rate": 9.280544912728822e-07, |
|
"loss": 0.2956666946411133, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 14.33588761174968, |
|
"learning_rate": 8.854831843337592e-07, |
|
"loss": 0.2919887924194336, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 14.367816091954023, |
|
"learning_rate": 8.429118773946361e-07, |
|
"loss": 0.29414264678955077, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 14.399744572158365, |
|
"learning_rate": 8.003405704555131e-07, |
|
"loss": 0.2997250938415527, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 14.431673052362708, |
|
"learning_rate": 7.5776926351639e-07, |
|
"loss": 0.2927609634399414, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 14.46360153256705, |
|
"learning_rate": 7.15197956577267e-07, |
|
"loss": 0.30317195892333987, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 14.495530012771392, |
|
"learning_rate": 6.72626649638144e-07, |
|
"loss": 0.29728498458862307, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 14.527458492975734, |
|
"learning_rate": 6.300553426990209e-07, |
|
"loss": 0.2906969451904297, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 14.559386973180077, |
|
"learning_rate": 5.874840357598978e-07, |
|
"loss": 0.3137422752380371, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 14.59131545338442, |
|
"learning_rate": 5.449127288207749e-07, |
|
"loss": 0.3111775016784668, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 14.62324393358876, |
|
"learning_rate": 5.023414218816518e-07, |
|
"loss": 0.30036439895629885, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 14.655172413793103, |
|
"learning_rate": 4.5977011494252875e-07, |
|
"loss": 0.2988995361328125, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 14.687100893997446, |
|
"learning_rate": 4.171988080034058e-07, |
|
"loss": 0.306041259765625, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 14.719029374201789, |
|
"learning_rate": 3.7462750106428265e-07, |
|
"loss": 0.30521770477294924, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 14.75095785440613, |
|
"learning_rate": 3.320561941251597e-07, |
|
"loss": 0.28645307540893555, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 14.782886334610472, |
|
"learning_rate": 2.894848871860366e-07, |
|
"loss": 0.29830142974853513, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 14.814814814814815, |
|
"learning_rate": 2.469135802469136e-07, |
|
"loss": 0.29040414810180665, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 14.846743295019158, |
|
"learning_rate": 2.0434227330779057e-07, |
|
"loss": 0.2852139472961426, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 14.878671775223498, |
|
"learning_rate": 1.6177096636866753e-07, |
|
"loss": 0.291912841796875, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 14.910600255427841, |
|
"learning_rate": 1.1919965942954449e-07, |
|
"loss": 0.2825672149658203, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 14.942528735632184, |
|
"learning_rate": 7.662835249042146e-08, |
|
"loss": 0.3074253273010254, |
|
"step": 46800 |
|
} |
|
], |
|
"max_steps": 46980, |
|
"num_train_epochs": 15, |
|
"total_flos": 76612640706201600, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|