|
{ |
|
"best_metric": 0.3242824375629425, |
|
"best_model_checkpoint": "/workspace/plateer_classifier_v0.1_result/checkpoint-110000", |
|
"epoch": 0.6441270979878347, |
|
"eval_steps": 55000, |
|
"global_step": 110000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0014640195241643742, |
|
"grad_norm": 50.05304718017578, |
|
"learning_rate": 4.880000000000001e-06, |
|
"loss": 4.3958, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0029280390483287485, |
|
"grad_norm": 48.363304138183594, |
|
"learning_rate": 9.88e-06, |
|
"loss": 1.6496, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.004392058572493123, |
|
"grad_norm": 54.546974182128906, |
|
"learning_rate": 1.488e-05, |
|
"loss": 0.8787, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.005856078096657497, |
|
"grad_norm": 50.317874908447266, |
|
"learning_rate": 1.9880000000000003e-05, |
|
"loss": 0.7721, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.007320097620821872, |
|
"grad_norm": 62.48823928833008, |
|
"learning_rate": 2.488e-05, |
|
"loss": 0.7047, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.008784117144986246, |
|
"grad_norm": 44.35001754760742, |
|
"learning_rate": 2.9880000000000002e-05, |
|
"loss": 0.6749, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01024813666915062, |
|
"grad_norm": 36.486793518066406, |
|
"learning_rate": 3.4880000000000005e-05, |
|
"loss": 0.6409, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.011712156193314994, |
|
"grad_norm": 47.03588104248047, |
|
"learning_rate": 3.988e-05, |
|
"loss": 0.6406, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.013176175717479368, |
|
"grad_norm": 31.227832794189453, |
|
"learning_rate": 4.488e-05, |
|
"loss": 0.6149, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.014640195241643743, |
|
"grad_norm": 39.8408317565918, |
|
"learning_rate": 4.9880000000000004e-05, |
|
"loss": 0.5956, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.016104214765808117, |
|
"grad_norm": 41.118736267089844, |
|
"learning_rate": 5.4879999999999996e-05, |
|
"loss": 0.5905, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.017568234289972492, |
|
"grad_norm": 29.624338150024414, |
|
"learning_rate": 5.988e-05, |
|
"loss": 0.5608, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.019032253814136865, |
|
"grad_norm": 22.993818283081055, |
|
"learning_rate": 6.488e-05, |
|
"loss": 0.5614, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.02049627333830124, |
|
"grad_norm": 19.964269638061523, |
|
"learning_rate": 6.988e-05, |
|
"loss": 0.5569, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.021960292862465612, |
|
"grad_norm": 36.538047790527344, |
|
"learning_rate": 7.488e-05, |
|
"loss": 0.5316, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.023424312386629988, |
|
"grad_norm": 37.63505935668945, |
|
"learning_rate": 7.988e-05, |
|
"loss": 0.5364, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.024888331910794363, |
|
"grad_norm": 25.934967041015625, |
|
"learning_rate": 8.486000000000001e-05, |
|
"loss": 0.5234, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.026352351434958735, |
|
"grad_norm": 24.810028076171875, |
|
"learning_rate": 8.986e-05, |
|
"loss": 0.5155, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.02781637095912311, |
|
"grad_norm": 32.76811981201172, |
|
"learning_rate": 9.484e-05, |
|
"loss": 0.5022, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.029280390483287486, |
|
"grad_norm": 27.094772338867188, |
|
"learning_rate": 9.984e-05, |
|
"loss": 0.5023, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.029280390483287486, |
|
"eval_accuracy": 0.8572352668691132, |
|
"eval_loss": 0.5044249296188354, |
|
"eval_runtime": 11541.1431, |
|
"eval_samples_per_second": 210.432, |
|
"eval_steps_per_second": 6.576, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.03074441000745186, |
|
"grad_norm": 24.74563217163086, |
|
"learning_rate": 0.00010484, |
|
"loss": 0.5073, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.032208429531616234, |
|
"grad_norm": 17.229019165039062, |
|
"learning_rate": 0.00010984, |
|
"loss": 0.4932, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.03367244905578061, |
|
"grad_norm": 23.318979263305664, |
|
"learning_rate": 0.00011484000000000002, |
|
"loss": 0.504, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.035136468579944985, |
|
"grad_norm": 22.271846771240234, |
|
"learning_rate": 0.00011983999999999999, |
|
"loss": 0.4817, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.036600488104109354, |
|
"grad_norm": 24.304887771606445, |
|
"learning_rate": 0.00012484, |
|
"loss": 0.4966, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.03806450762827373, |
|
"grad_norm": 23.76158905029297, |
|
"learning_rate": 0.00012984000000000002, |
|
"loss": 0.4899, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.039528527152438105, |
|
"grad_norm": 20.765274047851562, |
|
"learning_rate": 0.00013484, |
|
"loss": 0.4773, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.04099254667660248, |
|
"grad_norm": 12.793950080871582, |
|
"learning_rate": 0.00013982000000000003, |
|
"loss": 0.4781, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.042456566200766856, |
|
"grad_norm": 14.128210067749023, |
|
"learning_rate": 0.00014482, |
|
"loss": 0.4687, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.043920585724931224, |
|
"grad_norm": 22.348928451538086, |
|
"learning_rate": 0.00014982, |
|
"loss": 0.4722, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.0453846052490956, |
|
"grad_norm": 17.29800796508789, |
|
"learning_rate": 0.00015480000000000002, |
|
"loss": 0.4692, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.046848624773259975, |
|
"grad_norm": 11.0147066116333, |
|
"learning_rate": 0.0001598, |
|
"loss": 0.4689, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.04831264429742435, |
|
"grad_norm": 11.713265419006348, |
|
"learning_rate": 0.0001648, |
|
"loss": 0.4788, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.049776663821588726, |
|
"grad_norm": 12.367693901062012, |
|
"learning_rate": 0.0001698, |
|
"loss": 0.4697, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.0512406833457531, |
|
"grad_norm": 8.11889934539795, |
|
"learning_rate": 0.00017480000000000002, |
|
"loss": 0.4696, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.05270470286991747, |
|
"grad_norm": 12.321019172668457, |
|
"learning_rate": 0.0001798, |
|
"loss": 0.461, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.054168722394081846, |
|
"grad_norm": 15.612183570861816, |
|
"learning_rate": 0.00018480000000000002, |
|
"loss": 0.4646, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.05563274191824622, |
|
"grad_norm": 10.72978687286377, |
|
"learning_rate": 0.0001898, |
|
"loss": 0.4673, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.0570967614424106, |
|
"grad_norm": 8.815441131591797, |
|
"learning_rate": 0.0001948, |
|
"loss": 0.4472, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.05856078096657497, |
|
"grad_norm": 8.681705474853516, |
|
"learning_rate": 0.0001998, |
|
"loss": 0.4629, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.05856078096657497, |
|
"eval_accuracy": 0.8688706572649133, |
|
"eval_loss": 0.457188218832016, |
|
"eval_runtime": 11537.8227, |
|
"eval_samples_per_second": 210.492, |
|
"eval_steps_per_second": 6.578, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.06002480049073934, |
|
"grad_norm": 13.643828392028809, |
|
"learning_rate": 0.0001997014219778306, |
|
"loss": 0.456, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.06148882001490372, |
|
"grad_norm": 13.211404800415039, |
|
"learning_rate": 0.00019939040320473745, |
|
"loss": 0.4666, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.06295283953906809, |
|
"grad_norm": 11.1001615524292, |
|
"learning_rate": 0.00019907938443164432, |
|
"loss": 0.4495, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.06441685906323247, |
|
"grad_norm": 8.222249984741211, |
|
"learning_rate": 0.00019876836565855117, |
|
"loss": 0.4483, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.06588087858739684, |
|
"grad_norm": 13.589752197265625, |
|
"learning_rate": 0.0001984585909605504, |
|
"loss": 0.4438, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.06734489811156122, |
|
"grad_norm": 9.988068580627441, |
|
"learning_rate": 0.00019814757218745724, |
|
"loss": 0.447, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.0688089176357256, |
|
"grad_norm": 8.311960220336914, |
|
"learning_rate": 0.0001978365534143641, |
|
"loss": 0.4476, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.07027293715988997, |
|
"grad_norm": 8.099685668945312, |
|
"learning_rate": 0.00019752553464127094, |
|
"loss": 0.4477, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.07173695668405435, |
|
"grad_norm": 8.23130989074707, |
|
"learning_rate": 0.00019721451586817782, |
|
"loss": 0.4385, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.07320097620821871, |
|
"grad_norm": 10.875362396240234, |
|
"learning_rate": 0.00019690349709508467, |
|
"loss": 0.4345, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.07466499573238308, |
|
"grad_norm": 9.479572296142578, |
|
"learning_rate": 0.00019659247832199152, |
|
"loss": 0.4345, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.07612901525654746, |
|
"grad_norm": 11.883151054382324, |
|
"learning_rate": 0.0001962814595488984, |
|
"loss": 0.4241, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.07759303478071183, |
|
"grad_norm": 8.15208911895752, |
|
"learning_rate": 0.00019597044077580524, |
|
"loss": 0.4335, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.07905705430487621, |
|
"grad_norm": 9.323240280151367, |
|
"learning_rate": 0.0001956594220027121, |
|
"loss": 0.4396, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.08052107382904058, |
|
"grad_norm": 7.250824928283691, |
|
"learning_rate": 0.00019534840322961897, |
|
"loss": 0.4376, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.08198509335320496, |
|
"grad_norm": 12.220071792602539, |
|
"learning_rate": 0.0001950373844565258, |
|
"loss": 0.4323, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.08344911287736934, |
|
"grad_norm": 8.460916519165039, |
|
"learning_rate": 0.00019472636568343266, |
|
"loss": 0.4271, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.08491313240153371, |
|
"grad_norm": 6.110500812530518, |
|
"learning_rate": 0.0001944153469103395, |
|
"loss": 0.4253, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.08637715192569809, |
|
"grad_norm": 10.618386268615723, |
|
"learning_rate": 0.00019410432813724636, |
|
"loss": 0.427, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.08784117144986245, |
|
"grad_norm": 9.827556610107422, |
|
"learning_rate": 0.00019379330936415324, |
|
"loss": 0.4254, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.08784117144986245, |
|
"eval_accuracy": 0.877075711565186, |
|
"eval_loss": 0.4201970100402832, |
|
"eval_runtime": 11537.2443, |
|
"eval_samples_per_second": 210.503, |
|
"eval_steps_per_second": 6.578, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.0892994385846771, |
|
"grad_norm": 10.84184455871582, |
|
"learning_rate": 0.00019349020046898423, |
|
"loss": 0.4211, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 0.09076336380737672, |
|
"grad_norm": 7.9568657875061035, |
|
"learning_rate": 0.00019317920297562402, |
|
"loss": 0.4203, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.09222728903007635, |
|
"grad_norm": 12.237702369689941, |
|
"learning_rate": 0.00019286820548226384, |
|
"loss": 0.4181, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 0.09369121425277596, |
|
"grad_norm": 25.739120483398438, |
|
"learning_rate": 0.00019255720798890363, |
|
"loss": 0.4143, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.09515513947547559, |
|
"grad_norm": 8.341870307922363, |
|
"learning_rate": 0.00019224621049554342, |
|
"loss": 0.4171, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 0.09661906469817522, |
|
"grad_norm": 10.707802772521973, |
|
"learning_rate": 0.0001919352130021832, |
|
"loss": 0.4058, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.09808298992087484, |
|
"grad_norm": 7.021149158477783, |
|
"learning_rate": 0.00019162421550882302, |
|
"loss": 0.4211, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 0.09954691514357447, |
|
"grad_norm": 11.840470314025879, |
|
"learning_rate": 0.0001913132180154628, |
|
"loss": 0.4093, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.10101084036627409, |
|
"grad_norm": 7.401727676391602, |
|
"learning_rate": 0.0001910022205221026, |
|
"loss": 0.4281, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 0.10247476558897371, |
|
"grad_norm": 7.601231575012207, |
|
"learning_rate": 0.00019069246701871584, |
|
"loss": 0.4044, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.10393869081167334, |
|
"grad_norm": 6.85632848739624, |
|
"learning_rate": 0.00019038146952535563, |
|
"loss": 0.4244, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 0.10540261603437297, |
|
"grad_norm": 10.810693740844727, |
|
"learning_rate": 0.00019007171602196887, |
|
"loss": 0.4216, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.1068665412570726, |
|
"grad_norm": 9.758743286132812, |
|
"learning_rate": 0.00018976071852860865, |
|
"loss": 0.417, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 0.10833046647977221, |
|
"grad_norm": 10.75692367553711, |
|
"learning_rate": 0.00018944972103524847, |
|
"loss": 0.4143, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.10979439170247184, |
|
"grad_norm": 10.375711441040039, |
|
"learning_rate": 0.00018913872354188826, |
|
"loss": 0.4075, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.11125831692517146, |
|
"grad_norm": 8.414403915405273, |
|
"learning_rate": 0.00018882772604852805, |
|
"loss": 0.4148, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.11272224214787109, |
|
"grad_norm": 9.86490249633789, |
|
"learning_rate": 0.00018851672855516786, |
|
"loss": 0.4074, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 0.11418616737057072, |
|
"grad_norm": 7.522060394287109, |
|
"learning_rate": 0.00018820573106180765, |
|
"loss": 0.4106, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.11565009259327033, |
|
"grad_norm": 7.423270225524902, |
|
"learning_rate": 0.00018789473356844744, |
|
"loss": 0.4034, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 0.11711401781596996, |
|
"grad_norm": 8.761688232421875, |
|
"learning_rate": 0.00018758373607508723, |
|
"loss": 0.4025, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.11711401781596996, |
|
"eval_accuracy": 0.8823756104911845, |
|
"eval_loss": 0.4016551673412323, |
|
"eval_runtime": 11547.1595, |
|
"eval_samples_per_second": 210.336, |
|
"eval_steps_per_second": 6.573, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.11857794303866959, |
|
"grad_norm": 9.6015043258667, |
|
"learning_rate": 0.0001872802025215677, |
|
"loss": 0.4087, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 0.12004186826136921, |
|
"grad_norm": 6.658656120300293, |
|
"learning_rate": 0.00018696920502820748, |
|
"loss": 0.408, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.12150579348406883, |
|
"grad_norm": 6.935655117034912, |
|
"learning_rate": 0.00018665820753484727, |
|
"loss": 0.3983, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 0.12296971870676845, |
|
"grad_norm": 7.918155193328857, |
|
"learning_rate": 0.00018634721004148706, |
|
"loss": 0.3994, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.12443364392946808, |
|
"grad_norm": 7.246758937835693, |
|
"learning_rate": 0.00018603621254812688, |
|
"loss": 0.4111, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 0.1258975691521677, |
|
"grad_norm": 8.375380516052246, |
|
"learning_rate": 0.00018572521505476667, |
|
"loss": 0.4006, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.12736149437486732, |
|
"grad_norm": 6.993825435638428, |
|
"learning_rate": 0.0001854154615513799, |
|
"loss": 0.4113, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 0.12882541959756696, |
|
"grad_norm": 8.703255653381348, |
|
"learning_rate": 0.00018510446405801972, |
|
"loss": 0.3977, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.13028934482026658, |
|
"grad_norm": 6.940033912658691, |
|
"learning_rate": 0.0001847934665646595, |
|
"loss": 0.4005, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 0.1317532700429662, |
|
"grad_norm": 6.712055683135986, |
|
"learning_rate": 0.0001844824690712993, |
|
"loss": 0.41, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.13321719526566583, |
|
"grad_norm": 6.171209812164307, |
|
"learning_rate": 0.0001841714715779391, |
|
"loss": 0.3971, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 0.13468112048836545, |
|
"grad_norm": 10.764921188354492, |
|
"learning_rate": 0.0001838604740845789, |
|
"loss": 0.4105, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.1361450457110651, |
|
"grad_norm": 8.0676908493042, |
|
"learning_rate": 0.0001835494765912187, |
|
"loss": 0.3958, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 0.1376089709337647, |
|
"grad_norm": 5.20599365234375, |
|
"learning_rate": 0.00018323847909785848, |
|
"loss": 0.3946, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.13907289615646432, |
|
"grad_norm": 5.9439239501953125, |
|
"learning_rate": 0.0001829274816044983, |
|
"loss": 0.3951, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 0.14053682137916396, |
|
"grad_norm": 9.821541786193848, |
|
"learning_rate": 0.0001826164841111381, |
|
"loss": 0.3906, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.14200074660186357, |
|
"grad_norm": 6.659691333770752, |
|
"learning_rate": 0.00018230673060775133, |
|
"loss": 0.4009, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 0.1434646718245632, |
|
"grad_norm": 6.624240398406982, |
|
"learning_rate": 0.00018199573311439112, |
|
"loss": 0.3975, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.14492859704726282, |
|
"grad_norm": 7.993641376495361, |
|
"learning_rate": 0.0001816847356210309, |
|
"loss": 0.3925, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 0.14639252226996244, |
|
"grad_norm": 6.6386613845825195, |
|
"learning_rate": 0.0001813737381276707, |
|
"loss": 0.3975, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.14785644749266208, |
|
"grad_norm": 9.204560279846191, |
|
"learning_rate": 0.0001810627406343105, |
|
"loss": 0.3997, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 0.1493203727153617, |
|
"grad_norm": 8.072566986083984, |
|
"learning_rate": 0.0001807517431409503, |
|
"loss": 0.4022, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.15078429793806133, |
|
"grad_norm": 10.15225601196289, |
|
"learning_rate": 0.0001804407456475901, |
|
"loss": 0.392, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 0.15224822316076095, |
|
"grad_norm": 7.751401901245117, |
|
"learning_rate": 0.0001801297481542299, |
|
"loss": 0.3946, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.15371214838346056, |
|
"grad_norm": 8.481501579284668, |
|
"learning_rate": 0.0001798187506608697, |
|
"loss": 0.3883, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 0.1551760736061602, |
|
"grad_norm": 9.861278533935547, |
|
"learning_rate": 0.00017950775316750948, |
|
"loss": 0.3824, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.15663999882885982, |
|
"grad_norm": 6.405235290527344, |
|
"learning_rate": 0.0001791967556741493, |
|
"loss": 0.4006, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 0.15810392405155946, |
|
"grad_norm": 9.90355110168457, |
|
"learning_rate": 0.00017888575818078909, |
|
"loss": 0.3881, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.15956784927425907, |
|
"grad_norm": 9.354215621948242, |
|
"learning_rate": 0.00017857476068742887, |
|
"loss": 0.3965, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 0.16103177449695869, |
|
"grad_norm": 9.162219047546387, |
|
"learning_rate": 0.00017826376319406866, |
|
"loss": 0.3933, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.16249569971965833, |
|
"grad_norm": 6.755202770233154, |
|
"learning_rate": 0.00017795276570070848, |
|
"loss": 0.3874, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 0.16395962494235794, |
|
"grad_norm": 8.385200500488281, |
|
"learning_rate": 0.00017764176820734827, |
|
"loss": 0.3873, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.16542355016505758, |
|
"grad_norm": 6.508645057678223, |
|
"learning_rate": 0.00017733077071398806, |
|
"loss": 0.3895, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 0.1668874753877572, |
|
"grad_norm": 8.241129875183105, |
|
"learning_rate": 0.00017702226120057472, |
|
"loss": 0.3912, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.1683514006104568, |
|
"grad_norm": 7.879597187042236, |
|
"learning_rate": 0.00017671126370721454, |
|
"loss": 0.3929, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 0.16981532583315645, |
|
"grad_norm": 12.0702486038208, |
|
"learning_rate": 0.00017640026621385432, |
|
"loss": 0.404, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.17127925105585606, |
|
"grad_norm": 8.789772033691406, |
|
"learning_rate": 0.0001760892687204941, |
|
"loss": 0.3823, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 0.1727431762785557, |
|
"grad_norm": 11.022305488586426, |
|
"learning_rate": 0.00017577827122713393, |
|
"loss": 0.3887, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.17420710150125532, |
|
"grad_norm": 7.665167331695557, |
|
"learning_rate": 0.00017546727373377372, |
|
"loss": 0.394, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 0.17567102672395493, |
|
"grad_norm": 11.05783748626709, |
|
"learning_rate": 0.0001751562762404135, |
|
"loss": 0.3938, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.17713495194665457, |
|
"grad_norm": 8.389631271362305, |
|
"learning_rate": 0.0001748452787470533, |
|
"loss": 0.39, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 0.1785988771693542, |
|
"grad_norm": 8.158947944641113, |
|
"learning_rate": 0.0001745342812536931, |
|
"loss": 0.3818, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.1800628023920538, |
|
"grad_norm": 7.684356689453125, |
|
"learning_rate": 0.0001742232837603329, |
|
"loss": 0.3905, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 0.18152672761475344, |
|
"grad_norm": 10.129668235778809, |
|
"learning_rate": 0.00017391353025694614, |
|
"loss": 0.3886, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.18299065283745305, |
|
"grad_norm": 6.924737453460693, |
|
"learning_rate": 0.00017360253276358593, |
|
"loss": 0.3892, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 0.1844545780601527, |
|
"grad_norm": 5.863354206085205, |
|
"learning_rate": 0.00017329153527022572, |
|
"loss": 0.3822, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.1859185032828523, |
|
"grad_norm": 9.10240650177002, |
|
"learning_rate": 0.00017298053777686553, |
|
"loss": 0.3895, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 0.18738242850555192, |
|
"grad_norm": 9.565494537353516, |
|
"learning_rate": 0.00017266954028350532, |
|
"loss": 0.383, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.18884635372825156, |
|
"grad_norm": 8.238012313842773, |
|
"learning_rate": 0.0001723585427901451, |
|
"loss": 0.3854, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 0.19031027895095118, |
|
"grad_norm": 9.350130081176758, |
|
"learning_rate": 0.0001720475452967849, |
|
"loss": 0.3922, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.19177420417365082, |
|
"grad_norm": 6.337550163269043, |
|
"learning_rate": 0.00017173654780342472, |
|
"loss": 0.3778, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 0.19323812939635043, |
|
"grad_norm": 8.421921730041504, |
|
"learning_rate": 0.00017142679430003793, |
|
"loss": 0.3929, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.19470205461905005, |
|
"grad_norm": 8.888238906860352, |
|
"learning_rate": 0.00017111579680667774, |
|
"loss": 0.3844, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 0.1961659798417497, |
|
"grad_norm": 10.774327278137207, |
|
"learning_rate": 0.00017080479931331753, |
|
"loss": 0.3804, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.1976299050644493, |
|
"grad_norm": 7.07879114151001, |
|
"learning_rate": 0.00017049380181995732, |
|
"loss": 0.3954, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 0.19909383028714894, |
|
"grad_norm": 7.102870941162109, |
|
"learning_rate": 0.00017018280432659714, |
|
"loss": 0.3815, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.20055775550984856, |
|
"grad_norm": 5.815110206604004, |
|
"learning_rate": 0.00016987180683323693, |
|
"loss": 0.3907, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 0.20202168073254817, |
|
"grad_norm": 7.749156475067139, |
|
"learning_rate": 0.00016956080933987672, |
|
"loss": 0.3798, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.2034856059552478, |
|
"grad_norm": 7.0530476570129395, |
|
"learning_rate": 0.0001692498118465165, |
|
"loss": 0.3947, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 0.20494953117794742, |
|
"grad_norm": 6.623088836669922, |
|
"learning_rate": 0.00016893881435315632, |
|
"loss": 0.3816, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.20641345640064707, |
|
"grad_norm": 8.431561470031738, |
|
"learning_rate": 0.0001686278168597961, |
|
"loss": 0.3815, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 0.20787738162334668, |
|
"grad_norm": 11.600255012512207, |
|
"learning_rate": 0.00016831806335640935, |
|
"loss": 0.3782, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.2093413068460463, |
|
"grad_norm": 5.186095237731934, |
|
"learning_rate": 0.00016800706586304914, |
|
"loss": 0.3828, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 0.21080523206874593, |
|
"grad_norm": 12.819711685180664, |
|
"learning_rate": 0.00016769606836968895, |
|
"loss": 0.3902, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.21226915729144555, |
|
"grad_norm": 7.843264579772949, |
|
"learning_rate": 0.00016738507087632874, |
|
"loss": 0.3716, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 0.2137330825141452, |
|
"grad_norm": 8.602349281311035, |
|
"learning_rate": 0.00016707407338296853, |
|
"loss": 0.3791, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.2151970077368448, |
|
"grad_norm": 7.939485549926758, |
|
"learning_rate": 0.00016676307588960832, |
|
"loss": 0.3752, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 0.21666093295954442, |
|
"grad_norm": 6.328729629516602, |
|
"learning_rate": 0.00016645207839624814, |
|
"loss": 0.3761, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.21812485818224406, |
|
"grad_norm": 6.196065902709961, |
|
"learning_rate": 0.00016614108090288793, |
|
"loss": 0.3817, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 0.21958878340494367, |
|
"grad_norm": 10.096115112304688, |
|
"learning_rate": 0.00016583008340952771, |
|
"loss": 0.3828, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.2210527086276433, |
|
"grad_norm": 6.120075702667236, |
|
"learning_rate": 0.0001655190859161675, |
|
"loss": 0.3774, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 0.22251663385034293, |
|
"grad_norm": 6.575611114501953, |
|
"learning_rate": 0.00016520808842280732, |
|
"loss": 0.3823, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.22398055907304254, |
|
"grad_norm": 7.636918067932129, |
|
"learning_rate": 0.0001648970909294471, |
|
"loss": 0.3846, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 0.22544448429574218, |
|
"grad_norm": 15.759072303771973, |
|
"learning_rate": 0.00016458733742606037, |
|
"loss": 0.3842, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.2269084095184418, |
|
"grad_norm": 10.398168563842773, |
|
"learning_rate": 0.0001642775839226736, |
|
"loss": 0.3794, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 0.22837233474114144, |
|
"grad_norm": 6.939914703369141, |
|
"learning_rate": 0.0001639665864293134, |
|
"loss": 0.3763, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.22983625996384105, |
|
"grad_norm": 11.021454811096191, |
|
"learning_rate": 0.0001636555889359532, |
|
"loss": 0.368, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 0.23130018518654066, |
|
"grad_norm": 7.381429195404053, |
|
"learning_rate": 0.00016334459144259298, |
|
"loss": 0.3783, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.2327641104092403, |
|
"grad_norm": 9.803789138793945, |
|
"learning_rate": 0.0001630335939492328, |
|
"loss": 0.3828, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 0.23422803563193992, |
|
"grad_norm": 7.722465991973877, |
|
"learning_rate": 0.00016272259645587259, |
|
"loss": 0.3764, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.23569196085463953, |
|
"grad_norm": 8.471487998962402, |
|
"learning_rate": 0.00016241159896251237, |
|
"loss": 0.3879, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 0.23715588607733917, |
|
"grad_norm": 9.46483039855957, |
|
"learning_rate": 0.00016210060146915216, |
|
"loss": 0.3772, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.2386198113000388, |
|
"grad_norm": 11.850425720214844, |
|
"learning_rate": 0.00016178960397579198, |
|
"loss": 0.3688, |
|
"step": 40750 |
|
}, |
|
{ |
|
"epoch": 0.24008373652273843, |
|
"grad_norm": 7.718139171600342, |
|
"learning_rate": 0.00016147860648243177, |
|
"loss": 0.3728, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.24154766174543804, |
|
"grad_norm": 7.039102077484131, |
|
"learning_rate": 0.00016116760898907156, |
|
"loss": 0.3718, |
|
"step": 41250 |
|
}, |
|
{ |
|
"epoch": 0.24301158696813766, |
|
"grad_norm": 6.891547679901123, |
|
"learning_rate": 0.00016085661149571137, |
|
"loss": 0.3713, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.2444755121908373, |
|
"grad_norm": 8.54554271697998, |
|
"learning_rate": 0.00016054561400235116, |
|
"loss": 0.3818, |
|
"step": 41750 |
|
}, |
|
{ |
|
"epoch": 0.2459394374135369, |
|
"grad_norm": 6.554268836975098, |
|
"learning_rate": 0.00016023461650899095, |
|
"loss": 0.3706, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.24740336263623655, |
|
"grad_norm": 6.389885902404785, |
|
"learning_rate": 0.00015992361901563074, |
|
"loss": 0.3577, |
|
"step": 42250 |
|
}, |
|
{ |
|
"epoch": 0.24886728785893616, |
|
"grad_norm": 6.833805561065674, |
|
"learning_rate": 0.00015961262152227056, |
|
"loss": 0.3722, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.2503312130816358, |
|
"grad_norm": 9.135841369628906, |
|
"learning_rate": 0.00015930162402891034, |
|
"loss": 0.3747, |
|
"step": 42750 |
|
}, |
|
{ |
|
"epoch": 0.2517951383043354, |
|
"grad_norm": 7.466910362243652, |
|
"learning_rate": 0.00015899187052552358, |
|
"loss": 0.378, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.25325906352703503, |
|
"grad_norm": 14.597432136535645, |
|
"learning_rate": 0.00015868087303216337, |
|
"loss": 0.3743, |
|
"step": 43250 |
|
}, |
|
{ |
|
"epoch": 0.25472298874973465, |
|
"grad_norm": 6.523279190063477, |
|
"learning_rate": 0.00015836987553880316, |
|
"loss": 0.3728, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.25618691397243426, |
|
"grad_norm": 5.352029800415039, |
|
"learning_rate": 0.00015805887804544298, |
|
"loss": 0.367, |
|
"step": 43750 |
|
}, |
|
{ |
|
"epoch": 0.25765083919513393, |
|
"grad_norm": 8.408788681030273, |
|
"learning_rate": 0.00015774788055208277, |
|
"loss": 0.3694, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.25911476441783354, |
|
"grad_norm": 7.64408016204834, |
|
"learning_rate": 0.00015743688305872256, |
|
"loss": 0.3664, |
|
"step": 44250 |
|
}, |
|
{ |
|
"epoch": 0.26057868964053316, |
|
"grad_norm": 4.888110637664795, |
|
"learning_rate": 0.00015712588556536234, |
|
"loss": 0.3637, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.26204261486323277, |
|
"grad_norm": 5.068843841552734, |
|
"learning_rate": 0.00015681488807200216, |
|
"loss": 0.369, |
|
"step": 44750 |
|
}, |
|
{ |
|
"epoch": 0.2635065400859324, |
|
"grad_norm": 6.427637577056885, |
|
"learning_rate": 0.00015650389057864195, |
|
"loss": 0.3788, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.26497046530863205, |
|
"grad_norm": 8.00766658782959, |
|
"learning_rate": 0.00015619289308528174, |
|
"loss": 0.3638, |
|
"step": 45250 |
|
}, |
|
{ |
|
"epoch": 0.26643439053133167, |
|
"grad_norm": 8.729680061340332, |
|
"learning_rate": 0.00015588189559192155, |
|
"loss": 0.3736, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.2678983157540313, |
|
"grad_norm": 10.317773818969727, |
|
"learning_rate": 0.00015557089809856134, |
|
"loss": 0.3618, |
|
"step": 45750 |
|
}, |
|
{ |
|
"epoch": 0.2693622409767309, |
|
"grad_norm": 7.715869903564453, |
|
"learning_rate": 0.00015525990060520113, |
|
"loss": 0.3741, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.2708261661994305, |
|
"grad_norm": 5.711330890655518, |
|
"learning_rate": 0.00015494890311184092, |
|
"loss": 0.3745, |
|
"step": 46250 |
|
}, |
|
{ |
|
"epoch": 0.2722900914221302, |
|
"grad_norm": 9.835432052612305, |
|
"learning_rate": 0.00015463790561848074, |
|
"loss": 0.3693, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.2737540166448298, |
|
"grad_norm": 6.019217014312744, |
|
"learning_rate": 0.00015432815211509395, |
|
"loss": 0.3674, |
|
"step": 46750 |
|
}, |
|
{ |
|
"epoch": 0.2752179418675294, |
|
"grad_norm": 7.813283443450928, |
|
"learning_rate": 0.00015401715462173376, |
|
"loss": 0.3674, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.276681867090229, |
|
"grad_norm": 7.319979190826416, |
|
"learning_rate": 0.00015370615712837355, |
|
"loss": 0.3675, |
|
"step": 47250 |
|
}, |
|
{ |
|
"epoch": 0.27814579231292863, |
|
"grad_norm": 8.74886703491211, |
|
"learning_rate": 0.00015339515963501334, |
|
"loss": 0.3633, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.2796097175356283, |
|
"grad_norm": 9.456360816955566, |
|
"learning_rate": 0.00015308416214165316, |
|
"loss": 0.379, |
|
"step": 47750 |
|
}, |
|
{ |
|
"epoch": 0.2810736427583279, |
|
"grad_norm": 10.024221420288086, |
|
"learning_rate": 0.00015277316464829295, |
|
"loss": 0.375, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.2825375679810275, |
|
"grad_norm": 6.477073669433594, |
|
"learning_rate": 0.00015246216715493274, |
|
"loss": 0.3634, |
|
"step": 48250 |
|
}, |
|
{ |
|
"epoch": 0.28400149320372714, |
|
"grad_norm": 8.587589263916016, |
|
"learning_rate": 0.00015215116966157255, |
|
"loss": 0.3693, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.28546541842642675, |
|
"grad_norm": 10.675822257995605, |
|
"learning_rate": 0.00015184017216821234, |
|
"loss": 0.3668, |
|
"step": 48750 |
|
}, |
|
{ |
|
"epoch": 0.2869293436491264, |
|
"grad_norm": 10.77786636352539, |
|
"learning_rate": 0.00015153041866482558, |
|
"loss": 0.3711, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.28839326887182604, |
|
"grad_norm": 7.768797874450684, |
|
"learning_rate": 0.00015121942117146537, |
|
"loss": 0.3692, |
|
"step": 49250 |
|
}, |
|
{ |
|
"epoch": 0.28985719409452565, |
|
"grad_norm": 6.11573600769043, |
|
"learning_rate": 0.00015090842367810516, |
|
"loss": 0.3618, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.29132111931722526, |
|
"grad_norm": 7.369346618652344, |
|
"learning_rate": 0.00015059742618474495, |
|
"loss": 0.365, |
|
"step": 49750 |
|
}, |
|
{ |
|
"epoch": 0.2927850445399249, |
|
"grad_norm": 10.559876441955566, |
|
"learning_rate": 0.00015028642869138476, |
|
"loss": 0.369, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.29424896976262455, |
|
"grad_norm": 6.763681888580322, |
|
"learning_rate": 0.00014997543119802455, |
|
"loss": 0.3723, |
|
"step": 50250 |
|
}, |
|
{ |
|
"epoch": 0.29571289498532416, |
|
"grad_norm": 14.075911521911621, |
|
"learning_rate": 0.00014966443370466434, |
|
"loss": 0.3656, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.2971768202080238, |
|
"grad_norm": 7.817617893218994, |
|
"learning_rate": 0.00014935343621130416, |
|
"loss": 0.3745, |
|
"step": 50750 |
|
}, |
|
{ |
|
"epoch": 0.2986407454307234, |
|
"grad_norm": 5.018287181854248, |
|
"learning_rate": 0.00014904243871794395, |
|
"loss": 0.3664, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.300104670653423, |
|
"grad_norm": 9.846301078796387, |
|
"learning_rate": 0.00014873144122458373, |
|
"loss": 0.3644, |
|
"step": 51250 |
|
}, |
|
{ |
|
"epoch": 0.30156859587612267, |
|
"grad_norm": 8.65786361694336, |
|
"learning_rate": 0.00014842044373122352, |
|
"loss": 0.3698, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.3030325210988223, |
|
"grad_norm": 6.303979873657227, |
|
"learning_rate": 0.00014810944623786334, |
|
"loss": 0.3707, |
|
"step": 51750 |
|
}, |
|
{ |
|
"epoch": 0.3044964463215219, |
|
"grad_norm": 39.32520294189453, |
|
"learning_rate": 0.00014779844874450313, |
|
"loss": 0.3617, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.3059603715442215, |
|
"grad_norm": 6.535865306854248, |
|
"learning_rate": 0.00014748869524111637, |
|
"loss": 0.3642, |
|
"step": 52250 |
|
}, |
|
{ |
|
"epoch": 0.3074242967669211, |
|
"grad_norm": 6.031300067901611, |
|
"learning_rate": 0.00014717769774775616, |
|
"loss": 0.363, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.3088882219896208, |
|
"grad_norm": 7.255093097686768, |
|
"learning_rate": 0.00014686670025439595, |
|
"loss": 0.3594, |
|
"step": 52750 |
|
}, |
|
{ |
|
"epoch": 0.3103521472123204, |
|
"grad_norm": 7.491271018981934, |
|
"learning_rate": 0.00014655570276103576, |
|
"loss": 0.3697, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.31181607243502, |
|
"grad_norm": 8.154767036437988, |
|
"learning_rate": 0.00014624470526767555, |
|
"loss": 0.3667, |
|
"step": 53250 |
|
}, |
|
{ |
|
"epoch": 0.31327999765771963, |
|
"grad_norm": 7.7836384773254395, |
|
"learning_rate": 0.00014593370777431534, |
|
"loss": 0.3756, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.31474392288041925, |
|
"grad_norm": 7.439420223236084, |
|
"learning_rate": 0.00014562271028095513, |
|
"loss": 0.3734, |
|
"step": 53750 |
|
}, |
|
{ |
|
"epoch": 0.3162078481031189, |
|
"grad_norm": 7.654810428619385, |
|
"learning_rate": 0.00014531171278759494, |
|
"loss": 0.3689, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.31767177332581853, |
|
"grad_norm": 4.918389320373535, |
|
"learning_rate": 0.00014500195928420816, |
|
"loss": 0.3688, |
|
"step": 54250 |
|
}, |
|
{ |
|
"epoch": 0.31913569854851814, |
|
"grad_norm": 6.2310895919799805, |
|
"learning_rate": 0.00014469096179084797, |
|
"loss": 0.3711, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.32059962377121776, |
|
"grad_norm": 7.458713054656982, |
|
"learning_rate": 0.00014437996429748776, |
|
"loss": 0.3614, |
|
"step": 54750 |
|
}, |
|
{ |
|
"epoch": 0.32206354899391737, |
|
"grad_norm": 6.790125370025635, |
|
"learning_rate": 0.00014406896680412755, |
|
"loss": 0.3635, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.32206354899391737, |
|
"eval_accuracy": 0.8905084935576763, |
|
"eval_loss": 0.362331748008728, |
|
"eval_runtime": 11551.2138, |
|
"eval_samples_per_second": 210.262, |
|
"eval_steps_per_second": 6.571, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.32352747421661704, |
|
"grad_norm": 7.128218650817871, |
|
"learning_rate": 0.00014375796931076737, |
|
"loss": 0.357, |
|
"step": 55250 |
|
}, |
|
{ |
|
"epoch": 0.32499139943931665, |
|
"grad_norm": 4.943136692047119, |
|
"learning_rate": 0.00014344697181740715, |
|
"loss": 0.3576, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.32645532466201627, |
|
"grad_norm": 7.633016109466553, |
|
"learning_rate": 0.00014313597432404694, |
|
"loss": 0.3655, |
|
"step": 55750 |
|
}, |
|
{ |
|
"epoch": 0.3279192498847159, |
|
"grad_norm": 9.49149227142334, |
|
"learning_rate": 0.00014282497683068673, |
|
"loss": 0.3687, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.3293831751074155, |
|
"grad_norm": 7.4215521812438965, |
|
"learning_rate": 0.00014251397933732655, |
|
"loss": 0.3705, |
|
"step": 56250 |
|
}, |
|
{ |
|
"epoch": 0.33084710033011516, |
|
"grad_norm": 5.638499736785889, |
|
"learning_rate": 0.00014220298184396634, |
|
"loss": 0.3709, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.3323110255528148, |
|
"grad_norm": 9.440450668334961, |
|
"learning_rate": 0.00014189198435060613, |
|
"loss": 0.35, |
|
"step": 56750 |
|
}, |
|
{ |
|
"epoch": 0.3337749507755144, |
|
"grad_norm": 7.706991195678711, |
|
"learning_rate": 0.00014158098685724594, |
|
"loss": 0.3601, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.335238875998214, |
|
"grad_norm": 8.154605865478516, |
|
"learning_rate": 0.00014126998936388573, |
|
"loss": 0.3625, |
|
"step": 57250 |
|
}, |
|
{ |
|
"epoch": 0.3367028012209136, |
|
"grad_norm": 7.608438491821289, |
|
"learning_rate": 0.00014095899187052552, |
|
"loss": 0.3588, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.3381667264436133, |
|
"grad_norm": 5.466573715209961, |
|
"learning_rate": 0.00014064799437716534, |
|
"loss": 0.3528, |
|
"step": 57750 |
|
}, |
|
{ |
|
"epoch": 0.3396306516663129, |
|
"grad_norm": 7.514803409576416, |
|
"learning_rate": 0.00014033699688380512, |
|
"loss": 0.3624, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.3410945768890125, |
|
"grad_norm": 4.846391677856445, |
|
"learning_rate": 0.00014002599939044491, |
|
"loss": 0.3525, |
|
"step": 58250 |
|
}, |
|
{ |
|
"epoch": 0.3425585021117121, |
|
"grad_norm": 6.116271018981934, |
|
"learning_rate": 0.0001397150018970847, |
|
"loss": 0.3556, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.34402242733441174, |
|
"grad_norm": 7.234938621520996, |
|
"learning_rate": 0.00013940400440372452, |
|
"loss": 0.3723, |
|
"step": 58750 |
|
}, |
|
{ |
|
"epoch": 0.3454863525571114, |
|
"grad_norm": 8.690266609191895, |
|
"learning_rate": 0.0001390930069103643, |
|
"loss": 0.3671, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.346950277779811, |
|
"grad_norm": 5.558066368103027, |
|
"learning_rate": 0.0001387820094170041, |
|
"loss": 0.3563, |
|
"step": 59250 |
|
}, |
|
{ |
|
"epoch": 0.34841420300251064, |
|
"grad_norm": 5.277857303619385, |
|
"learning_rate": 0.0001384710119236439, |
|
"loss": 0.3633, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.34987812822521025, |
|
"grad_norm": 4.810859680175781, |
|
"learning_rate": 0.00013816125842025712, |
|
"loss": 0.3615, |
|
"step": 59750 |
|
}, |
|
{ |
|
"epoch": 0.35134205344790986, |
|
"grad_norm": 6.860721111297607, |
|
"learning_rate": 0.00013785026092689694, |
|
"loss": 0.3561, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.35280597867060953, |
|
"grad_norm": 6.673612117767334, |
|
"learning_rate": 0.00013753926343353673, |
|
"loss": 0.3513, |
|
"step": 60250 |
|
}, |
|
{ |
|
"epoch": 0.35426990389330915, |
|
"grad_norm": 6.9296956062316895, |
|
"learning_rate": 0.00013722826594017652, |
|
"loss": 0.3563, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.35573382911600876, |
|
"grad_norm": 6.235531806945801, |
|
"learning_rate": 0.0001369172684468163, |
|
"loss": 0.3586, |
|
"step": 60750 |
|
}, |
|
{ |
|
"epoch": 0.3571977543387084, |
|
"grad_norm": 6.549998760223389, |
|
"learning_rate": 0.00013660627095345612, |
|
"loss": 0.3572, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.358661679561408, |
|
"grad_norm": 6.800797939300537, |
|
"learning_rate": 0.0001362952734600959, |
|
"loss": 0.3687, |
|
"step": 61250 |
|
}, |
|
{ |
|
"epoch": 0.3601256047841076, |
|
"grad_norm": 5.545276641845703, |
|
"learning_rate": 0.0001359842759667357, |
|
"loss": 0.3539, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.36158953000680727, |
|
"grad_norm": 8.63070011138916, |
|
"learning_rate": 0.00013567327847337552, |
|
"loss": 0.3605, |
|
"step": 61750 |
|
}, |
|
{ |
|
"epoch": 0.3630534552295069, |
|
"grad_norm": 5.199543476104736, |
|
"learning_rate": 0.0001353622809800153, |
|
"loss": 0.3559, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.3645173804522065, |
|
"grad_norm": 27.297420501708984, |
|
"learning_rate": 0.0001350512834866551, |
|
"loss": 0.3676, |
|
"step": 62250 |
|
}, |
|
{ |
|
"epoch": 0.3659813056749061, |
|
"grad_norm": 8.235854148864746, |
|
"learning_rate": 0.00013474152998326833, |
|
"loss": 0.3583, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.3674452308976057, |
|
"grad_norm": 6.224372386932373, |
|
"learning_rate": 0.00013443053248990812, |
|
"loss": 0.3623, |
|
"step": 62750 |
|
}, |
|
{ |
|
"epoch": 0.3689091561203054, |
|
"grad_norm": 8.013957977294922, |
|
"learning_rate": 0.0001341195349965479, |
|
"loss": 0.3619, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.370373081343005, |
|
"grad_norm": 6.442314147949219, |
|
"learning_rate": 0.00013380853750318773, |
|
"loss": 0.3586, |
|
"step": 63250 |
|
}, |
|
{ |
|
"epoch": 0.3718370065657046, |
|
"grad_norm": 6.883063793182373, |
|
"learning_rate": 0.00013349754000982752, |
|
"loss": 0.3635, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.37330093178840423, |
|
"grad_norm": 5.502562999725342, |
|
"learning_rate": 0.0001331865425164673, |
|
"loss": 0.3525, |
|
"step": 63750 |
|
}, |
|
{ |
|
"epoch": 0.37476485701110385, |
|
"grad_norm": 6.841543197631836, |
|
"learning_rate": 0.00013287554502310712, |
|
"loss": 0.3564, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.3762287822338035, |
|
"grad_norm": 6.850903034210205, |
|
"learning_rate": 0.0001325645475297469, |
|
"loss": 0.3549, |
|
"step": 64250 |
|
}, |
|
{ |
|
"epoch": 0.37769270745650313, |
|
"grad_norm": 5.823826313018799, |
|
"learning_rate": 0.00013225479402636015, |
|
"loss": 0.3488, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.37915663267920274, |
|
"grad_norm": 9.849250793457031, |
|
"learning_rate": 0.00013194379653299997, |
|
"loss": 0.3526, |
|
"step": 64750 |
|
}, |
|
{ |
|
"epoch": 0.38062055790190236, |
|
"grad_norm": 7.8498992919921875, |
|
"learning_rate": 0.00013163279903963975, |
|
"loss": 0.3596, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.38208448312460197, |
|
"grad_norm": 7.845436096191406, |
|
"learning_rate": 0.00013132180154627954, |
|
"loss": 0.3497, |
|
"step": 65250 |
|
}, |
|
{ |
|
"epoch": 0.38354840834730164, |
|
"grad_norm": 10.533845901489258, |
|
"learning_rate": 0.00013101080405291933, |
|
"loss": 0.3523, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.38501233357000125, |
|
"grad_norm": 9.09399127960205, |
|
"learning_rate": 0.00013069980655955912, |
|
"loss": 0.347, |
|
"step": 65750 |
|
}, |
|
{ |
|
"epoch": 0.38647625879270087, |
|
"grad_norm": 7.205333232879639, |
|
"learning_rate": 0.00013038880906619894, |
|
"loss": 0.355, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.3879401840154005, |
|
"grad_norm": 6.770249843597412, |
|
"learning_rate": 0.00013007781157283873, |
|
"loss": 0.3549, |
|
"step": 66250 |
|
}, |
|
{ |
|
"epoch": 0.3894041092381001, |
|
"grad_norm": 8.14482593536377, |
|
"learning_rate": 0.00012976681407947851, |
|
"loss": 0.3537, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.39086803446079976, |
|
"grad_norm": 5.998184680938721, |
|
"learning_rate": 0.0001294558165861183, |
|
"loss": 0.3562, |
|
"step": 66750 |
|
}, |
|
{ |
|
"epoch": 0.3923319596834994, |
|
"grad_norm": 5.583696365356445, |
|
"learning_rate": 0.00012914481909275812, |
|
"loss": 0.3499, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.393795884906199, |
|
"grad_norm": 6.899207592010498, |
|
"learning_rate": 0.0001288338215993979, |
|
"loss": 0.3506, |
|
"step": 67250 |
|
}, |
|
{ |
|
"epoch": 0.3952598101288986, |
|
"grad_norm": 6.205395221710205, |
|
"learning_rate": 0.0001285228241060377, |
|
"loss": 0.3512, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.3967237353515982, |
|
"grad_norm": 9.125551223754883, |
|
"learning_rate": 0.0001282118266126775, |
|
"loss": 0.3585, |
|
"step": 67750 |
|
}, |
|
{ |
|
"epoch": 0.3981876605742979, |
|
"grad_norm": 6.943772792816162, |
|
"learning_rate": 0.0001279008291193173, |
|
"loss": 0.362, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.3996515857969975, |
|
"grad_norm": 6.106304168701172, |
|
"learning_rate": 0.0001275898316259571, |
|
"loss": 0.3545, |
|
"step": 68250 |
|
}, |
|
{ |
|
"epoch": 0.4011155110196971, |
|
"grad_norm": 6.197811126708984, |
|
"learning_rate": 0.00012728007812257036, |
|
"loss": 0.3524, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.4025794362423967, |
|
"grad_norm": 8.07652759552002, |
|
"learning_rate": 0.00012696908062921015, |
|
"loss": 0.3467, |
|
"step": 68750 |
|
}, |
|
{ |
|
"epoch": 0.40404336146509634, |
|
"grad_norm": 7.444363117218018, |
|
"learning_rate": 0.00012665808313584994, |
|
"loss": 0.3541, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.405507286687796, |
|
"grad_norm": 6.2395782470703125, |
|
"learning_rate": 0.00012634708564248972, |
|
"loss": 0.3488, |
|
"step": 69250 |
|
}, |
|
{ |
|
"epoch": 0.4069712119104956, |
|
"grad_norm": 7.489956378936768, |
|
"learning_rate": 0.00012603608814912954, |
|
"loss": 0.3595, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.40843513713319524, |
|
"grad_norm": 6.762283802032471, |
|
"learning_rate": 0.00012572509065576933, |
|
"loss": 0.3555, |
|
"step": 69750 |
|
}, |
|
{ |
|
"epoch": 0.40989906235589485, |
|
"grad_norm": 10.423229217529297, |
|
"learning_rate": 0.00012541409316240912, |
|
"loss": 0.3474, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.41136298757859446, |
|
"grad_norm": 7.812709331512451, |
|
"learning_rate": 0.0001251030956690489, |
|
"loss": 0.3588, |
|
"step": 70250 |
|
}, |
|
{ |
|
"epoch": 0.41282691280129413, |
|
"grad_norm": 8.506246566772461, |
|
"learning_rate": 0.00012479334216566215, |
|
"loss": 0.3473, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.41429083802399375, |
|
"grad_norm": 6.0005784034729, |
|
"learning_rate": 0.00012448234467230196, |
|
"loss": 0.3423, |
|
"step": 70750 |
|
}, |
|
{ |
|
"epoch": 0.41575476324669336, |
|
"grad_norm": 7.6112494468688965, |
|
"learning_rate": 0.00012417134717894175, |
|
"loss": 0.3469, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.417218688469393, |
|
"grad_norm": 6.460068225860596, |
|
"learning_rate": 0.00012386034968558154, |
|
"loss": 0.3514, |
|
"step": 71250 |
|
}, |
|
{ |
|
"epoch": 0.4186826136920926, |
|
"grad_norm": 25.509037017822266, |
|
"learning_rate": 0.00012354935219222136, |
|
"loss": 0.3538, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.42014653891479226, |
|
"grad_norm": 5.778562068939209, |
|
"learning_rate": 0.00012323835469886114, |
|
"loss": 0.3409, |
|
"step": 71750 |
|
}, |
|
{ |
|
"epoch": 0.42161046413749187, |
|
"grad_norm": 10.19543170928955, |
|
"learning_rate": 0.00012292735720550093, |
|
"loss": 0.3487, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.4230743893601915, |
|
"grad_norm": 7.6341633796691895, |
|
"learning_rate": 0.00012261635971214072, |
|
"loss": 0.3477, |
|
"step": 72250 |
|
}, |
|
{ |
|
"epoch": 0.4245383145828911, |
|
"grad_norm": 5.656210422515869, |
|
"learning_rate": 0.00012230536221878054, |
|
"loss": 0.353, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.4260022398055907, |
|
"grad_norm": 7.81094217300415, |
|
"learning_rate": 0.00012199436472542031, |
|
"loss": 0.3589, |
|
"step": 72750 |
|
}, |
|
{ |
|
"epoch": 0.4274661650282904, |
|
"grad_norm": 5.924116611480713, |
|
"learning_rate": 0.0001216833672320601, |
|
"loss": 0.346, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.42893009025099, |
|
"grad_norm": 6.293444633483887, |
|
"learning_rate": 0.00012137236973869992, |
|
"loss": 0.3496, |
|
"step": 73250 |
|
}, |
|
{ |
|
"epoch": 0.4303940154736896, |
|
"grad_norm": 9.766921997070312, |
|
"learning_rate": 0.00012106137224533971, |
|
"loss": 0.347, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.4318579406963892, |
|
"grad_norm": 5.998900890350342, |
|
"learning_rate": 0.0001207503747519795, |
|
"loss": 0.3465, |
|
"step": 73750 |
|
}, |
|
{ |
|
"epoch": 0.43332186591908883, |
|
"grad_norm": 8.364704132080078, |
|
"learning_rate": 0.00012043937725861929, |
|
"loss": 0.3429, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.4347857911417885, |
|
"grad_norm": 5.508989334106445, |
|
"learning_rate": 0.0001201283797652591, |
|
"loss": 0.355, |
|
"step": 74250 |
|
}, |
|
{ |
|
"epoch": 0.4362497163644881, |
|
"grad_norm": 6.357595443725586, |
|
"learning_rate": 0.00011981738227189889, |
|
"loss": 0.3504, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.43771364158718773, |
|
"grad_norm": 8.691376686096191, |
|
"learning_rate": 0.00011950762876851213, |
|
"loss": 0.3471, |
|
"step": 74750 |
|
}, |
|
{ |
|
"epoch": 0.43917756680988734, |
|
"grad_norm": 11.246256828308105, |
|
"learning_rate": 0.00011919663127515193, |
|
"loss": 0.3487, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.44064149203258696, |
|
"grad_norm": 6.3526811599731445, |
|
"learning_rate": 0.00011888563378179172, |
|
"loss": 0.3414, |
|
"step": 75250 |
|
}, |
|
{ |
|
"epoch": 0.4421054172552866, |
|
"grad_norm": 9.6268310546875, |
|
"learning_rate": 0.00011857463628843152, |
|
"loss": 0.3457, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.44356934247798624, |
|
"grad_norm": 8.093045234680176, |
|
"learning_rate": 0.00011826363879507131, |
|
"loss": 0.3515, |
|
"step": 75750 |
|
}, |
|
{ |
|
"epoch": 0.44503326770068585, |
|
"grad_norm": 7.497385025024414, |
|
"learning_rate": 0.00011795264130171111, |
|
"loss": 0.3361, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.44649719292338547, |
|
"grad_norm": 8.374622344970703, |
|
"learning_rate": 0.00011764164380835092, |
|
"loss": 0.3552, |
|
"step": 76250 |
|
}, |
|
{ |
|
"epoch": 0.4479611181460851, |
|
"grad_norm": 8.583603858947754, |
|
"learning_rate": 0.0001173306463149907, |
|
"loss": 0.3395, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.44942504336878475, |
|
"grad_norm": 5.933279991149902, |
|
"learning_rate": 0.0001170196488216305, |
|
"loss": 0.3539, |
|
"step": 76750 |
|
}, |
|
{ |
|
"epoch": 0.45088896859148436, |
|
"grad_norm": 7.1400556564331055, |
|
"learning_rate": 0.00011670989531824375, |
|
"loss": 0.3556, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.452352893814184, |
|
"grad_norm": 6.4177374839782715, |
|
"learning_rate": 0.00011639889782488354, |
|
"loss": 0.34, |
|
"step": 77250 |
|
}, |
|
{ |
|
"epoch": 0.4538168190368836, |
|
"grad_norm": 8.248872756958008, |
|
"learning_rate": 0.00011608790033152333, |
|
"loss": 0.3454, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.4552807442595832, |
|
"grad_norm": 6.789691925048828, |
|
"learning_rate": 0.00011577690283816314, |
|
"loss": 0.3506, |
|
"step": 77750 |
|
}, |
|
{ |
|
"epoch": 0.4567446694822829, |
|
"grad_norm": 7.519604206085205, |
|
"learning_rate": 0.00011546590534480293, |
|
"loss": 0.3438, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.4582085947049825, |
|
"grad_norm": 11.287620544433594, |
|
"learning_rate": 0.00011515490785144272, |
|
"loss": 0.3536, |
|
"step": 78250 |
|
}, |
|
{ |
|
"epoch": 0.4596725199276821, |
|
"grad_norm": 5.6864914894104, |
|
"learning_rate": 0.00011484391035808254, |
|
"loss": 0.348, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.4611364451503817, |
|
"grad_norm": 7.405890941619873, |
|
"learning_rate": 0.00011453291286472232, |
|
"loss": 0.3395, |
|
"step": 78750 |
|
}, |
|
{ |
|
"epoch": 0.4626003703730813, |
|
"grad_norm": 5.379487991333008, |
|
"learning_rate": 0.00011422315936133556, |
|
"loss": 0.3463, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.46406429559578094, |
|
"grad_norm": 7.769617080688477, |
|
"learning_rate": 0.00011391216186797535, |
|
"loss": 0.3458, |
|
"step": 79250 |
|
}, |
|
{ |
|
"epoch": 0.4655282208184806, |
|
"grad_norm": 9.26171875, |
|
"learning_rate": 0.00011360116437461514, |
|
"loss": 0.3394, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.4669921460411802, |
|
"grad_norm": 9.037941932678223, |
|
"learning_rate": 0.00011329016688125493, |
|
"loss": 0.349, |
|
"step": 79750 |
|
}, |
|
{ |
|
"epoch": 0.46845607126387984, |
|
"grad_norm": 8.776792526245117, |
|
"learning_rate": 0.00011297916938789475, |
|
"loss": 0.3384, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.46991999648657945, |
|
"grad_norm": 6.737313270568848, |
|
"learning_rate": 0.00011266817189453454, |
|
"loss": 0.3472, |
|
"step": 80250 |
|
}, |
|
{ |
|
"epoch": 0.47138392170927906, |
|
"grad_norm": 7.2374114990234375, |
|
"learning_rate": 0.00011235717440117432, |
|
"loss": 0.3434, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.47284784693197873, |
|
"grad_norm": 6.939677715301514, |
|
"learning_rate": 0.00011204617690781414, |
|
"loss": 0.3451, |
|
"step": 80750 |
|
}, |
|
{ |
|
"epoch": 0.47431177215467835, |
|
"grad_norm": 4.702803611755371, |
|
"learning_rate": 0.00011173517941445393, |
|
"loss": 0.3508, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.47577569737737796, |
|
"grad_norm": 7.359582901000977, |
|
"learning_rate": 0.00011142418192109372, |
|
"loss": 0.3415, |
|
"step": 81250 |
|
}, |
|
{ |
|
"epoch": 0.4772396226000776, |
|
"grad_norm": 8.404651641845703, |
|
"learning_rate": 0.00011111442841770696, |
|
"loss": 0.3438, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.4787035478227772, |
|
"grad_norm": 6.176925182342529, |
|
"learning_rate": 0.00011080343092434675, |
|
"loss": 0.3484, |
|
"step": 81750 |
|
}, |
|
{ |
|
"epoch": 0.48016747304547686, |
|
"grad_norm": 8.614276885986328, |
|
"learning_rate": 0.00011049243343098655, |
|
"loss": 0.3525, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.48163139826817647, |
|
"grad_norm": 5.756929874420166, |
|
"learning_rate": 0.00011018143593762635, |
|
"loss": 0.3432, |
|
"step": 82250 |
|
}, |
|
{ |
|
"epoch": 0.4830953234908761, |
|
"grad_norm": 7.686267852783203, |
|
"learning_rate": 0.00010987043844426614, |
|
"loss": 0.3508, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.4845592487135757, |
|
"grad_norm": 6.590146541595459, |
|
"learning_rate": 0.00010955944095090593, |
|
"loss": 0.3357, |
|
"step": 82750 |
|
}, |
|
{ |
|
"epoch": 0.4860231739362753, |
|
"grad_norm": 7.363981246948242, |
|
"learning_rate": 0.00010924968744751918, |
|
"loss": 0.3469, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.487487099158975, |
|
"grad_norm": 5.942411422729492, |
|
"learning_rate": 0.00010893868995415897, |
|
"loss": 0.3464, |
|
"step": 83250 |
|
}, |
|
{ |
|
"epoch": 0.4889510243816746, |
|
"grad_norm": 8.531744003295898, |
|
"learning_rate": 0.00010862769246079879, |
|
"loss": 0.3349, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.4904149496043742, |
|
"grad_norm": 20.821125030517578, |
|
"learning_rate": 0.00010831669496743858, |
|
"loss": 0.3434, |
|
"step": 83750 |
|
}, |
|
{ |
|
"epoch": 0.4918788748270738, |
|
"grad_norm": 9.569067001342773, |
|
"learning_rate": 0.00010800569747407836, |
|
"loss": 0.3421, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.49334280004977343, |
|
"grad_norm": 7.6851725578308105, |
|
"learning_rate": 0.00010769469998071815, |
|
"loss": 0.3407, |
|
"step": 84250 |
|
}, |
|
{ |
|
"epoch": 0.4948067252724731, |
|
"grad_norm": 9.591890335083008, |
|
"learning_rate": 0.00010738370248735797, |
|
"loss": 0.347, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.4962706504951727, |
|
"grad_norm": 5.16259765625, |
|
"learning_rate": 0.00010707270499399776, |
|
"loss": 0.3383, |
|
"step": 84750 |
|
}, |
|
{ |
|
"epoch": 0.49773457571787233, |
|
"grad_norm": 4.6993794441223145, |
|
"learning_rate": 0.00010676170750063755, |
|
"loss": 0.3392, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.49919850094057194, |
|
"grad_norm": 6.331507682800293, |
|
"learning_rate": 0.00010645071000727735, |
|
"loss": 0.351, |
|
"step": 85250 |
|
}, |
|
{ |
|
"epoch": 0.5006624261632716, |
|
"grad_norm": 7.329137325286865, |
|
"learning_rate": 0.00010613971251391714, |
|
"loss": 0.3486, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.5021263513859712, |
|
"grad_norm": 6.907947540283203, |
|
"learning_rate": 0.00010582871502055694, |
|
"loss": 0.3443, |
|
"step": 85750 |
|
}, |
|
{ |
|
"epoch": 0.5035902766086708, |
|
"grad_norm": 4.780885696411133, |
|
"learning_rate": 0.00010551771752719674, |
|
"loss": 0.3401, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.5050542018313705, |
|
"grad_norm": 9.042526245117188, |
|
"learning_rate": 0.00010520672003383653, |
|
"loss": 0.3402, |
|
"step": 86250 |
|
}, |
|
{ |
|
"epoch": 0.5065181270540701, |
|
"grad_norm": 5.397533416748047, |
|
"learning_rate": 0.00010489572254047632, |
|
"loss": 0.3392, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.5079820522767697, |
|
"grad_norm": 7.72251033782959, |
|
"learning_rate": 0.00010458472504711612, |
|
"loss": 0.3337, |
|
"step": 86750 |
|
}, |
|
{ |
|
"epoch": 0.5094459774994693, |
|
"grad_norm": 7.379674434661865, |
|
"learning_rate": 0.00010427497154372936, |
|
"loss": 0.3457, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.510909902722169, |
|
"grad_norm": 7.123027801513672, |
|
"learning_rate": 0.00010396397405036915, |
|
"loss": 0.3311, |
|
"step": 87250 |
|
}, |
|
{ |
|
"epoch": 0.5123738279448685, |
|
"grad_norm": 6.388451099395752, |
|
"learning_rate": 0.00010365297655700897, |
|
"loss": 0.3386, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.5138377531675682, |
|
"grad_norm": 8.933717727661133, |
|
"learning_rate": 0.00010334197906364876, |
|
"loss": 0.3377, |
|
"step": 87750 |
|
}, |
|
{ |
|
"epoch": 0.5153016783902679, |
|
"grad_norm": 5.813757419586182, |
|
"learning_rate": 0.000103032225560262, |
|
"loss": 0.3368, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.5167656036129674, |
|
"grad_norm": 10.707741737365723, |
|
"learning_rate": 0.00010272122806690178, |
|
"loss": 0.3429, |
|
"step": 88250 |
|
}, |
|
{ |
|
"epoch": 0.5182295288356671, |
|
"grad_norm": 7.433245658874512, |
|
"learning_rate": 0.00010241023057354157, |
|
"loss": 0.3457, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.5196934540583666, |
|
"grad_norm": 6.408331394195557, |
|
"learning_rate": 0.00010209923308018139, |
|
"loss": 0.3409, |
|
"step": 88750 |
|
}, |
|
{ |
|
"epoch": 0.5211573792810663, |
|
"grad_norm": 7.5843987464904785, |
|
"learning_rate": 0.00010178823558682118, |
|
"loss": 0.3347, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.522621304503766, |
|
"grad_norm": 9.049858093261719, |
|
"learning_rate": 0.00010147723809346097, |
|
"loss": 0.3392, |
|
"step": 89250 |
|
}, |
|
{ |
|
"epoch": 0.5240852297264655, |
|
"grad_norm": 8.207107543945312, |
|
"learning_rate": 0.00010116624060010076, |
|
"loss": 0.334, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.5255491549491652, |
|
"grad_norm": 6.511790752410889, |
|
"learning_rate": 0.00010085648709671401, |
|
"loss": 0.3462, |
|
"step": 89750 |
|
}, |
|
{ |
|
"epoch": 0.5270130801718648, |
|
"grad_norm": 5.541443824768066, |
|
"learning_rate": 0.0001005454896033538, |
|
"loss": 0.3318, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.5284770053945644, |
|
"grad_norm": 6.216821670532227, |
|
"learning_rate": 0.0001002344921099936, |
|
"loss": 0.338, |
|
"step": 90250 |
|
}, |
|
{ |
|
"epoch": 0.5299409306172641, |
|
"grad_norm": 5.138360977172852, |
|
"learning_rate": 9.992349461663339e-05, |
|
"loss": 0.3457, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.5314048558399637, |
|
"grad_norm": 8.401073455810547, |
|
"learning_rate": 9.961249712327319e-05, |
|
"loss": 0.3523, |
|
"step": 90750 |
|
}, |
|
{ |
|
"epoch": 0.5328687810626633, |
|
"grad_norm": 8.749157905578613, |
|
"learning_rate": 9.930149962991298e-05, |
|
"loss": 0.3391, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.5343327062853629, |
|
"grad_norm": 7.809004783630371, |
|
"learning_rate": 9.899050213655278e-05, |
|
"loss": 0.3422, |
|
"step": 91250 |
|
}, |
|
{ |
|
"epoch": 0.5357966315080626, |
|
"grad_norm": 7.649618148803711, |
|
"learning_rate": 9.867950464319257e-05, |
|
"loss": 0.3512, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.5372605567307622, |
|
"grad_norm": 8.770468711853027, |
|
"learning_rate": 9.836850714983237e-05, |
|
"loss": 0.3367, |
|
"step": 91750 |
|
}, |
|
{ |
|
"epoch": 0.5387244819534618, |
|
"grad_norm": 8.32112979888916, |
|
"learning_rate": 9.805750965647216e-05, |
|
"loss": 0.3384, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.5401884071761615, |
|
"grad_norm": 9.602888107299805, |
|
"learning_rate": 9.774651216311197e-05, |
|
"loss": 0.3344, |
|
"step": 92250 |
|
}, |
|
{ |
|
"epoch": 0.541652332398861, |
|
"grad_norm": 3.2295093536376953, |
|
"learning_rate": 9.743551466975177e-05, |
|
"loss": 0.3314, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.5431162576215607, |
|
"grad_norm": 5.456012725830078, |
|
"learning_rate": 9.712451717639156e-05, |
|
"loss": 0.3313, |
|
"step": 92750 |
|
}, |
|
{ |
|
"epoch": 0.5445801828442604, |
|
"grad_norm": 7.777164936065674, |
|
"learning_rate": 9.681351968303136e-05, |
|
"loss": 0.3417, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.5460441080669599, |
|
"grad_norm": 10.10175895690918, |
|
"learning_rate": 9.650252218967115e-05, |
|
"loss": 0.3357, |
|
"step": 93250 |
|
}, |
|
{ |
|
"epoch": 0.5475080332896596, |
|
"grad_norm": 8.296233177185059, |
|
"learning_rate": 9.619152469631095e-05, |
|
"loss": 0.3368, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.5489719585123591, |
|
"grad_norm": 5.55683708190918, |
|
"learning_rate": 9.588052720295075e-05, |
|
"loss": 0.3338, |
|
"step": 93750 |
|
}, |
|
{ |
|
"epoch": 0.5504358837350588, |
|
"grad_norm": 5.92700719833374, |
|
"learning_rate": 9.556952970959054e-05, |
|
"loss": 0.3431, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.5518998089577585, |
|
"grad_norm": 5.411899089813232, |
|
"learning_rate": 9.525853221623034e-05, |
|
"loss": 0.3393, |
|
"step": 94250 |
|
}, |
|
{ |
|
"epoch": 0.553363734180458, |
|
"grad_norm": 6.517271995544434, |
|
"learning_rate": 9.494753472287013e-05, |
|
"loss": 0.3332, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.5548276594031577, |
|
"grad_norm": 9.099715232849121, |
|
"learning_rate": 9.463653722950994e-05, |
|
"loss": 0.3343, |
|
"step": 94750 |
|
}, |
|
{ |
|
"epoch": 0.5562915846258573, |
|
"grad_norm": 4.845067501068115, |
|
"learning_rate": 9.432553973614972e-05, |
|
"loss": 0.3344, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.5577555098485569, |
|
"grad_norm": 8.56153392791748, |
|
"learning_rate": 9.401454224278953e-05, |
|
"loss": 0.33, |
|
"step": 95250 |
|
}, |
|
{ |
|
"epoch": 0.5592194350712566, |
|
"grad_norm": 7.1542439460754395, |
|
"learning_rate": 9.370354474942933e-05, |
|
"loss": 0.3186, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.5606833602939562, |
|
"grad_norm": 7.00217342376709, |
|
"learning_rate": 9.339254725606912e-05, |
|
"loss": 0.335, |
|
"step": 95750 |
|
}, |
|
{ |
|
"epoch": 0.5621472855166558, |
|
"grad_norm": 7.365664482116699, |
|
"learning_rate": 9.308279375268236e-05, |
|
"loss": 0.3303, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.5636112107393554, |
|
"grad_norm": 8.063042640686035, |
|
"learning_rate": 9.277179625932215e-05, |
|
"loss": 0.3441, |
|
"step": 96250 |
|
}, |
|
{ |
|
"epoch": 0.565075135962055, |
|
"grad_norm": 5.403791904449463, |
|
"learning_rate": 9.246079876596195e-05, |
|
"loss": 0.3318, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.5665390611847547, |
|
"grad_norm": 5.911950588226318, |
|
"learning_rate": 9.215104526257519e-05, |
|
"loss": 0.3327, |
|
"step": 96750 |
|
}, |
|
{ |
|
"epoch": 0.5680029864074543, |
|
"grad_norm": 5.484018802642822, |
|
"learning_rate": 9.184004776921499e-05, |
|
"loss": 0.3384, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.569466911630154, |
|
"grad_norm": 4.785627365112305, |
|
"learning_rate": 9.152905027585478e-05, |
|
"loss": 0.3437, |
|
"step": 97250 |
|
}, |
|
{ |
|
"epoch": 0.5709308368528535, |
|
"grad_norm": 7.17230749130249, |
|
"learning_rate": 9.121805278249458e-05, |
|
"loss": 0.3331, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.5723947620755532, |
|
"grad_norm": 7.777104377746582, |
|
"learning_rate": 9.090705528913437e-05, |
|
"loss": 0.3371, |
|
"step": 97750 |
|
}, |
|
{ |
|
"epoch": 0.5738586872982528, |
|
"grad_norm": 6.8572001457214355, |
|
"learning_rate": 9.059605779577417e-05, |
|
"loss": 0.3397, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.5753226125209524, |
|
"grad_norm": 9.132293701171875, |
|
"learning_rate": 9.028506030241398e-05, |
|
"loss": 0.3421, |
|
"step": 98250 |
|
}, |
|
{ |
|
"epoch": 0.5767865377436521, |
|
"grad_norm": 7.351444244384766, |
|
"learning_rate": 8.997406280905376e-05, |
|
"loss": 0.3315, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.5782504629663516, |
|
"grad_norm": 5.444695949554443, |
|
"learning_rate": 8.966306531569357e-05, |
|
"loss": 0.3313, |
|
"step": 98750 |
|
}, |
|
{ |
|
"epoch": 0.5797143881890513, |
|
"grad_norm": 6.229501724243164, |
|
"learning_rate": 8.935206782233336e-05, |
|
"loss": 0.3321, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.581178313411751, |
|
"grad_norm": 4.431236743927002, |
|
"learning_rate": 8.904107032897316e-05, |
|
"loss": 0.3326, |
|
"step": 99250 |
|
}, |
|
{ |
|
"epoch": 0.5826422386344505, |
|
"grad_norm": 4.78348445892334, |
|
"learning_rate": 8.873007283561296e-05, |
|
"loss": 0.3362, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.5841061638571502, |
|
"grad_norm": 5.964051723480225, |
|
"learning_rate": 8.841907534225275e-05, |
|
"loss": 0.3408, |
|
"step": 99750 |
|
}, |
|
{ |
|
"epoch": 0.5855700890798498, |
|
"grad_norm": 5.310559272766113, |
|
"learning_rate": 8.810807784889255e-05, |
|
"loss": 0.3328, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.5870340143025494, |
|
"grad_norm": 4.985818862915039, |
|
"learning_rate": 8.779708035553234e-05, |
|
"loss": 0.337, |
|
"step": 100250 |
|
}, |
|
{ |
|
"epoch": 0.5884979395252491, |
|
"grad_norm": 4.851356506347656, |
|
"learning_rate": 8.748608286217213e-05, |
|
"loss": 0.3314, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.5899618647479486, |
|
"grad_norm": 6.863201141357422, |
|
"learning_rate": 8.717508536881193e-05, |
|
"loss": 0.3231, |
|
"step": 100750 |
|
}, |
|
{ |
|
"epoch": 0.5914257899706483, |
|
"grad_norm": 6.387337684631348, |
|
"learning_rate": 8.686533186542517e-05, |
|
"loss": 0.322, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.5928897151933479, |
|
"grad_norm": 7.897363662719727, |
|
"learning_rate": 8.655433437206496e-05, |
|
"loss": 0.3361, |
|
"step": 101250 |
|
}, |
|
{ |
|
"epoch": 0.5943536404160475, |
|
"grad_norm": 5.876019477844238, |
|
"learning_rate": 8.624333687870476e-05, |
|
"loss": 0.3211, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.5958175656387472, |
|
"grad_norm": 4.175768852233887, |
|
"learning_rate": 8.593233938534457e-05, |
|
"loss": 0.3317, |
|
"step": 101750 |
|
}, |
|
{ |
|
"epoch": 0.5972814908614468, |
|
"grad_norm": 6.496226787567139, |
|
"learning_rate": 8.562134189198435e-05, |
|
"loss": 0.3289, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.5987454160841464, |
|
"grad_norm": 7.092103004455566, |
|
"learning_rate": 8.531034439862416e-05, |
|
"loss": 0.3329, |
|
"step": 102250 |
|
}, |
|
{ |
|
"epoch": 0.600209341306846, |
|
"grad_norm": 7.335963726043701, |
|
"learning_rate": 8.499934690526395e-05, |
|
"loss": 0.3305, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.6016732665295457, |
|
"grad_norm": 6.620415687561035, |
|
"learning_rate": 8.468834941190375e-05, |
|
"loss": 0.3324, |
|
"step": 102750 |
|
}, |
|
{ |
|
"epoch": 0.6031371917522453, |
|
"grad_norm": 6.866759777069092, |
|
"learning_rate": 8.437735191854355e-05, |
|
"loss": 0.3395, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.6046011169749449, |
|
"grad_norm": 7.7242045402526855, |
|
"learning_rate": 8.406759841515678e-05, |
|
"loss": 0.3368, |
|
"step": 103250 |
|
}, |
|
{ |
|
"epoch": 0.6060650421976446, |
|
"grad_norm": 6.402958869934082, |
|
"learning_rate": 8.375660092179658e-05, |
|
"loss": 0.3366, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.6075289674203441, |
|
"grad_norm": 6.456150531768799, |
|
"learning_rate": 8.344560342843637e-05, |
|
"loss": 0.3372, |
|
"step": 103750 |
|
}, |
|
{ |
|
"epoch": 0.6089928926430438, |
|
"grad_norm": 7.6825971603393555, |
|
"learning_rate": 8.313460593507617e-05, |
|
"loss": 0.3331, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.6104568178657435, |
|
"grad_norm": 11.974824905395508, |
|
"learning_rate": 8.282360844171596e-05, |
|
"loss": 0.3317, |
|
"step": 104250 |
|
}, |
|
{ |
|
"epoch": 0.611920743088443, |
|
"grad_norm": 5.445409774780273, |
|
"learning_rate": 8.251261094835576e-05, |
|
"loss": 0.3303, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.6133846683111427, |
|
"grad_norm": 8.099034309387207, |
|
"learning_rate": 8.220161345499555e-05, |
|
"loss": 0.3317, |
|
"step": 104750 |
|
}, |
|
{ |
|
"epoch": 0.6148485935338422, |
|
"grad_norm": 21.789043426513672, |
|
"learning_rate": 8.189061596163535e-05, |
|
"loss": 0.3146, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.6163125187565419, |
|
"grad_norm": 6.879361152648926, |
|
"learning_rate": 8.158086245824859e-05, |
|
"loss": 0.3346, |
|
"step": 105250 |
|
}, |
|
{ |
|
"epoch": 0.6177764439792416, |
|
"grad_norm": 5.477085113525391, |
|
"learning_rate": 8.126986496488838e-05, |
|
"loss": 0.3274, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.6192403692019411, |
|
"grad_norm": 6.2816667556762695, |
|
"learning_rate": 8.095886747152818e-05, |
|
"loss": 0.3271, |
|
"step": 105750 |
|
}, |
|
{ |
|
"epoch": 0.6207042944246408, |
|
"grad_norm": 9.089285850524902, |
|
"learning_rate": 8.064786997816797e-05, |
|
"loss": 0.3351, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.6221682196473404, |
|
"grad_norm": 6.114886283874512, |
|
"learning_rate": 8.033687248480777e-05, |
|
"loss": 0.3296, |
|
"step": 106250 |
|
}, |
|
{ |
|
"epoch": 0.62363214487004, |
|
"grad_norm": 7.2542548179626465, |
|
"learning_rate": 8.002587499144756e-05, |
|
"loss": 0.3246, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.6250960700927397, |
|
"grad_norm": 5.58528995513916, |
|
"learning_rate": 7.971487749808737e-05, |
|
"loss": 0.3327, |
|
"step": 106750 |
|
}, |
|
{ |
|
"epoch": 0.6265599953154393, |
|
"grad_norm": 3.898178815841675, |
|
"learning_rate": 7.940388000472715e-05, |
|
"loss": 0.3291, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.6280239205381389, |
|
"grad_norm": 5.644820690155029, |
|
"learning_rate": 7.909288251136696e-05, |
|
"loss": 0.3281, |
|
"step": 107250 |
|
}, |
|
{ |
|
"epoch": 0.6294878457608385, |
|
"grad_norm": 6.363776206970215, |
|
"learning_rate": 7.878188501800676e-05, |
|
"loss": 0.3304, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.6309517709835382, |
|
"grad_norm": 5.209687232971191, |
|
"learning_rate": 7.847213151462e-05, |
|
"loss": 0.3224, |
|
"step": 107750 |
|
}, |
|
{ |
|
"epoch": 0.6324156962062378, |
|
"grad_norm": 6.911553382873535, |
|
"learning_rate": 7.81611340212598e-05, |
|
"loss": 0.3246, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.6338796214289374, |
|
"grad_norm": 7.6557111740112305, |
|
"learning_rate": 7.785013652789959e-05, |
|
"loss": 0.322, |
|
"step": 108250 |
|
}, |
|
{ |
|
"epoch": 0.6353435466516371, |
|
"grad_norm": 7.857481002807617, |
|
"learning_rate": 7.753913903453939e-05, |
|
"loss": 0.3318, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.6368074718743366, |
|
"grad_norm": 5.911120891571045, |
|
"learning_rate": 7.722814154117918e-05, |
|
"loss": 0.325, |
|
"step": 108750 |
|
}, |
|
{ |
|
"epoch": 0.6382713970970363, |
|
"grad_norm": 8.592209815979004, |
|
"learning_rate": 7.691714404781898e-05, |
|
"loss": 0.3209, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.639735322319736, |
|
"grad_norm": 6.824602127075195, |
|
"learning_rate": 7.660614655445879e-05, |
|
"loss": 0.3331, |
|
"step": 109250 |
|
}, |
|
{ |
|
"epoch": 0.6411992475424355, |
|
"grad_norm": 6.813981056213379, |
|
"learning_rate": 7.629514906109858e-05, |
|
"loss": 0.3313, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.6426631727651352, |
|
"grad_norm": 5.7169671058654785, |
|
"learning_rate": 7.598539555771181e-05, |
|
"loss": 0.3206, |
|
"step": 109750 |
|
}, |
|
{ |
|
"epoch": 0.6441270979878347, |
|
"grad_norm": 5.429720401763916, |
|
"learning_rate": 7.56743980643516e-05, |
|
"loss": 0.3192, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.6441270979878347, |
|
"eval_accuracy": 0.8997983351325891, |
|
"eval_loss": 0.3242824375629425, |
|
"eval_runtime": 11546.6804, |
|
"eval_samples_per_second": 210.345, |
|
"eval_steps_per_second": 6.573, |
|
"step": 110000 |
|
} |
|
], |
|
"logging_steps": 250, |
|
"max_steps": 170773, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 55000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8505890873482936e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|