|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500.0, |
|
"global_step": 72699, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006877673695649183, |
|
"grad_norm": 0.19629216194152832, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9938, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0013755347391298366, |
|
"grad_norm": 0.24478936195373535, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9596, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.002063302108694755, |
|
"grad_norm": 0.22170111536979675, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9526, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0027510694782596733, |
|
"grad_norm": 0.2311343252658844, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9471, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.003438836847824592, |
|
"grad_norm": 0.20621128380298615, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9435, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.00412660421738951, |
|
"grad_norm": 0.22248196601867676, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9396, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.004814371586954428, |
|
"grad_norm": 0.20232965052127838, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9362, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0055021389565193465, |
|
"grad_norm": 0.21155332028865814, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9285, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.006189906326084265, |
|
"grad_norm": 0.25176894664764404, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9319, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.006877673695649184, |
|
"grad_norm": 0.21027377247810364, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9304, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.007565441065214102, |
|
"grad_norm": 0.2434869110584259, |
|
"learning_rate": 0.0001, |
|
"loss": 1.93, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.00825320843477902, |
|
"grad_norm": 0.1908300668001175, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9254, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.008940975804343939, |
|
"grad_norm": 0.2221110612154007, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9226, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.009628743173908856, |
|
"grad_norm": 0.22620266675949097, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9262, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.010316510543473776, |
|
"grad_norm": 0.21463032066822052, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9201, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.011004277913038693, |
|
"grad_norm": 0.19383488595485687, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9236, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.011692045282603612, |
|
"grad_norm": 0.22416023910045624, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9186, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.01237981265216853, |
|
"grad_norm": 0.2285342961549759, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9207, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.013067580021733449, |
|
"grad_norm": 0.20416004955768585, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9171, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.013755347391298368, |
|
"grad_norm": 0.20697274804115295, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9177, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.014443114760863286, |
|
"grad_norm": 0.2317676991224289, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9138, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.015130882130428205, |
|
"grad_norm": 0.21276156604290009, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9111, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.015818649499993124, |
|
"grad_norm": 0.20574018359184265, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9155, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.01650641686955804, |
|
"grad_norm": 0.19410207867622375, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9073, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.01719418423912296, |
|
"grad_norm": 0.19570203125476837, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9085, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.017881951608687878, |
|
"grad_norm": 0.2081640362739563, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9093, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.018569718978252797, |
|
"grad_norm": 0.19721642136573792, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9086, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.019257486347817713, |
|
"grad_norm": 0.202309712767601, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9039, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.019945253717382632, |
|
"grad_norm": 0.22128838300704956, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9067, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.02063302108694755, |
|
"grad_norm": 0.25011196732521057, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9055, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.02132078845651247, |
|
"grad_norm": 0.20523639023303986, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9039, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.022008555826077386, |
|
"grad_norm": 0.2327890396118164, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9059, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.022696323195642305, |
|
"grad_norm": 0.22426384687423706, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9033, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.023384090565207225, |
|
"grad_norm": 0.2116124927997589, |
|
"learning_rate": 0.0001, |
|
"loss": 1.902, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.024071857934772144, |
|
"grad_norm": 0.21172966063022614, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9007, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.02475962530433706, |
|
"grad_norm": 0.19443170726299286, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9003, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.02544739267390198, |
|
"grad_norm": 0.21195723116397858, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9015, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.026135160043466898, |
|
"grad_norm": 0.22141411900520325, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8957, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.026822927413031817, |
|
"grad_norm": 0.22995401918888092, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8979, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.027510694782596736, |
|
"grad_norm": 0.2246379405260086, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8966, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.028198462152161652, |
|
"grad_norm": 0.22695621848106384, |
|
"learning_rate": 0.0001, |
|
"loss": 1.895, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.02888622952172657, |
|
"grad_norm": 0.19988253712654114, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8934, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.02957399689129149, |
|
"grad_norm": 0.21754223108291626, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8972, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.03026176426085641, |
|
"grad_norm": 0.19053423404693604, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8912, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.030949531630421325, |
|
"grad_norm": 0.21589875221252441, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8935, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.03163729899998625, |
|
"grad_norm": 0.2087436020374298, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8923, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.03232506636955116, |
|
"grad_norm": 0.2261374592781067, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8914, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.03301283373911608, |
|
"grad_norm": 0.1949523240327835, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8905, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.033700601108681, |
|
"grad_norm": 0.21544858813285828, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8909, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.03438836847824592, |
|
"grad_norm": 0.20145681500434875, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8876, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.03507613584781084, |
|
"grad_norm": 0.21707232296466827, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8915, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.035763903217375756, |
|
"grad_norm": 0.1982990950345993, |
|
"learning_rate": 0.0001, |
|
"loss": 1.888, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.036451670586940675, |
|
"grad_norm": 0.2223712056875229, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8868, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.037139437956505594, |
|
"grad_norm": 0.19649413228034973, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8869, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.03782720532607051, |
|
"grad_norm": 0.22767962515354156, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8901, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.038514972695635426, |
|
"grad_norm": 0.19138416647911072, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8916, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.039202740065200345, |
|
"grad_norm": 0.19380460679531097, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8889, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.039890507434765264, |
|
"grad_norm": 0.19751518964767456, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8868, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.04057827480433018, |
|
"grad_norm": 0.21071408689022064, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8862, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.0412660421738951, |
|
"grad_norm": 0.19260670244693756, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8827, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.04195380954346002, |
|
"grad_norm": 0.19185714423656464, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8866, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.04264157691302494, |
|
"grad_norm": 0.24877017736434937, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8854, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.04332934428258986, |
|
"grad_norm": 0.1947249323129654, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8842, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.04401711165215477, |
|
"grad_norm": 0.20210722088813782, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8837, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.04470487902171969, |
|
"grad_norm": 0.22242394089698792, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8817, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.04539264639128461, |
|
"grad_norm": 0.2049330472946167, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8845, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.04608041376084953, |
|
"grad_norm": 0.19368599355220795, |
|
"learning_rate": 0.0001, |
|
"loss": 1.884, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.04676818113041445, |
|
"grad_norm": 0.1886671483516693, |
|
"learning_rate": 0.0001, |
|
"loss": 1.883, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.04745594849997937, |
|
"grad_norm": 0.19359445571899414, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8824, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.04814371586954429, |
|
"grad_norm": 0.195325568318367, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8806, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.04883148323910921, |
|
"grad_norm": 0.21584388613700867, |
|
"learning_rate": 0.0001, |
|
"loss": 1.879, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.04951925060867412, |
|
"grad_norm": 0.19085532426834106, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8817, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.05020701797823904, |
|
"grad_norm": 0.2133578211069107, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8797, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.05089478534780396, |
|
"grad_norm": 0.19587628543376923, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8806, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.051582552717368876, |
|
"grad_norm": 0.22608409821987152, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8803, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.052270320086933796, |
|
"grad_norm": 0.20075012743473053, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8773, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.052958087456498715, |
|
"grad_norm": 0.2007540464401245, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8775, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.053645854826063634, |
|
"grad_norm": 0.20465299487113953, |
|
"learning_rate": 0.0001, |
|
"loss": 1.88, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.05433362219562855, |
|
"grad_norm": 0.19921573996543884, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8749, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.05502138956519347, |
|
"grad_norm": 0.19196507334709167, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8808, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.055709156934758385, |
|
"grad_norm": 0.20529140532016754, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8787, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.056396924304323304, |
|
"grad_norm": 0.23082584142684937, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8752, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.05708469167388822, |
|
"grad_norm": 0.18597312271595, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8793, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.05777245904345314, |
|
"grad_norm": 0.23071937263011932, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8782, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.05846022641301806, |
|
"grad_norm": 0.19141189754009247, |
|
"learning_rate": 0.0001, |
|
"loss": 1.875, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.05914799378258298, |
|
"grad_norm": 0.23278222978115082, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8805, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.0598357611521479, |
|
"grad_norm": 0.21169067919254303, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8753, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.06052352852171282, |
|
"grad_norm": 0.2010953575372696, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8758, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.06121129589127773, |
|
"grad_norm": 0.19260814785957336, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8731, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.06189906326084265, |
|
"grad_norm": 0.19751103222370148, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8719, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.06258683063040757, |
|
"grad_norm": 0.21297581493854523, |
|
"learning_rate": 0.0001, |
|
"loss": 1.875, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.0632745979999725, |
|
"grad_norm": 0.2128158062696457, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8711, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.06396236536953741, |
|
"grad_norm": 0.18719784915447235, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8741, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.06465013273910232, |
|
"grad_norm": 0.2352721244096756, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8717, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.06533790010866725, |
|
"grad_norm": 0.22228975594043732, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8707, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.06602566747823216, |
|
"grad_norm": 0.18716222047805786, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8705, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.06671343484779708, |
|
"grad_norm": 0.22167149186134338, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8739, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.067401202217362, |
|
"grad_norm": 0.24794642627239227, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8747, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.06808896958692692, |
|
"grad_norm": 0.18762528896331787, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8702, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.06877673695649184, |
|
"grad_norm": 0.19063113629817963, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8733, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.06946450432605676, |
|
"grad_norm": 0.1940603107213974, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8685, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.07015227169562167, |
|
"grad_norm": 0.19752484560012817, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8762, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.07084003906518659, |
|
"grad_norm": 0.23486199975013733, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8708, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.07152780643475151, |
|
"grad_norm": 0.20315973460674286, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8676, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.07221557380431642, |
|
"grad_norm": 0.1925646960735321, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8634, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.07290334117388135, |
|
"grad_norm": 0.20540663599967957, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8706, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.07359110854344626, |
|
"grad_norm": 0.23649099469184875, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8685, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.07427887591301119, |
|
"grad_norm": 0.23272614181041718, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8724, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.0749666432825761, |
|
"grad_norm": 0.1887608915567398, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8707, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.07565441065214101, |
|
"grad_norm": 0.18964676558971405, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8642, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.07634217802170594, |
|
"grad_norm": 0.20009934902191162, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8657, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.07702994539127085, |
|
"grad_norm": 0.1821998506784439, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8673, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.07771771276083578, |
|
"grad_norm": 0.18905235826969147, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8687, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.07840548013040069, |
|
"grad_norm": 0.19986678659915924, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8627, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.07909324749996562, |
|
"grad_norm": 0.1904374659061432, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8633, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.07978101486953053, |
|
"grad_norm": 0.19536761939525604, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8685, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.08046878223909545, |
|
"grad_norm": 0.18209826946258545, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8599, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.08115654960866037, |
|
"grad_norm": 0.21385939419269562, |
|
"learning_rate": 0.0001, |
|
"loss": 1.866, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.08184431697822528, |
|
"grad_norm": 0.20338542759418488, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8669, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.0825320843477902, |
|
"grad_norm": 0.19536232948303223, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8644, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.08321985171735512, |
|
"grad_norm": 0.18480873107910156, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8668, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.08390761908692004, |
|
"grad_norm": 0.18024863302707672, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8638, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.08459538645648496, |
|
"grad_norm": 0.18774175643920898, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8652, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.08528315382604988, |
|
"grad_norm": 0.2518685460090637, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8649, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.0859709211956148, |
|
"grad_norm": 0.20646634697914124, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8658, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.08665868856517972, |
|
"grad_norm": 0.19222316145896912, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8642, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.08734645593474463, |
|
"grad_norm": 0.19531960785388947, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8641, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.08803422330430954, |
|
"grad_norm": 0.18218673765659332, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8599, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.08872199067387447, |
|
"grad_norm": 0.18686556816101074, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8588, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.08940975804343938, |
|
"grad_norm": 0.20718005299568176, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8595, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.09009752541300431, |
|
"grad_norm": 0.17680206894874573, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8625, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.09078529278256922, |
|
"grad_norm": 0.25429028272628784, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8635, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.09147306015213415, |
|
"grad_norm": 0.19778478145599365, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8618, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.09216082752169906, |
|
"grad_norm": 0.21198226511478424, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8613, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.09284859489126399, |
|
"grad_norm": 0.1819111704826355, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8601, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.0935363622608289, |
|
"grad_norm": 0.2141820788383484, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8598, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.09422412963039381, |
|
"grad_norm": 0.20356012880802155, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8619, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.09491189699995874, |
|
"grad_norm": 0.18998335301876068, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8597, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.09559966436952365, |
|
"grad_norm": 0.19086682796478271, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8622, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.09628743173908857, |
|
"grad_norm": 0.2049364447593689, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8617, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.09697519910865349, |
|
"grad_norm": 0.19833974540233612, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8609, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.09766296647821841, |
|
"grad_norm": 0.19551745057106018, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8581, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.09835073384778333, |
|
"grad_norm": 0.1846143752336502, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8569, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.09903850121734824, |
|
"grad_norm": 0.1906626969575882, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8614, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.09972626858691316, |
|
"grad_norm": 0.19115209579467773, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8633, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.10041403595647808, |
|
"grad_norm": 0.18704906105995178, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8601, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.101101803326043, |
|
"grad_norm": 0.18635210394859314, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8605, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.10178957069560791, |
|
"grad_norm": 0.1947161853313446, |
|
"learning_rate": 0.0001, |
|
"loss": 1.861, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.10247733806517284, |
|
"grad_norm": 0.22087708115577698, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8553, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.10316510543473775, |
|
"grad_norm": 0.1805039346218109, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8591, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.10385287280430268, |
|
"grad_norm": 0.19084776937961578, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8561, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.10454064017386759, |
|
"grad_norm": 0.20166590809822083, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8584, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.1052284075434325, |
|
"grad_norm": 0.1892371028661728, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8526, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.10591617491299743, |
|
"grad_norm": 0.22085241973400116, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8561, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.10660394228256234, |
|
"grad_norm": 0.186112642288208, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8597, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.10729170965212727, |
|
"grad_norm": 0.1959947943687439, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8558, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.10797947702169218, |
|
"grad_norm": 0.21492016315460205, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8608, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.1086672443912571, |
|
"grad_norm": 0.18600517511367798, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8559, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.10935501176082202, |
|
"grad_norm": 0.18841132521629333, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8542, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.11004277913038694, |
|
"grad_norm": 0.20758236944675446, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8565, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.11073054649995186, |
|
"grad_norm": 0.20206254720687866, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8553, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.11141831386951677, |
|
"grad_norm": 0.19620998203754425, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8542, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.1121060812390817, |
|
"grad_norm": 0.19747626781463623, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8545, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.11279384860864661, |
|
"grad_norm": 0.21328890323638916, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8552, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.11348161597821153, |
|
"grad_norm": 0.18296054005622864, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8579, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.11416938334777645, |
|
"grad_norm": 0.21098335087299347, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8526, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.11485715071734137, |
|
"grad_norm": 0.18666841089725494, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8484, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.11554491808690628, |
|
"grad_norm": 0.18522906303405762, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8538, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.1162326854564712, |
|
"grad_norm": 0.1890312135219574, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8519, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.11692045282603612, |
|
"grad_norm": 0.197422057390213, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8513, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.11760822019560103, |
|
"grad_norm": 0.21355442702770233, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8561, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.11829598756516596, |
|
"grad_norm": 0.18543662130832672, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8538, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.11898375493473087, |
|
"grad_norm": 0.20849215984344482, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8527, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.1196715223042958, |
|
"grad_norm": 0.2109488546848297, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8496, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.12035928967386071, |
|
"grad_norm": 0.20195640623569489, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8499, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.12104705704342564, |
|
"grad_norm": 0.1749362200498581, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8559, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.12173482441299055, |
|
"grad_norm": 0.20881310105323792, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8536, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.12242259178255546, |
|
"grad_norm": 0.1801750510931015, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8507, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.12311035915212039, |
|
"grad_norm": 0.1898815929889679, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8493, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.1237981265216853, |
|
"grad_norm": 0.19754734635353088, |
|
"learning_rate": 0.0001, |
|
"loss": 1.853, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.12448589389125023, |
|
"grad_norm": 0.1855219006538391, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8529, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.12517366126081514, |
|
"grad_norm": 0.19341996312141418, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8513, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.12586142863038005, |
|
"grad_norm": 0.19776052236557007, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8507, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.126549195999945, |
|
"grad_norm": 0.185306116938591, |
|
"learning_rate": 0.0001, |
|
"loss": 1.851, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.1272369633695099, |
|
"grad_norm": 0.19926750659942627, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8504, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.12792473073907482, |
|
"grad_norm": 0.21605028212070465, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8502, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.12861249810863973, |
|
"grad_norm": 0.18174859881401062, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8505, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.12930026547820464, |
|
"grad_norm": 0.19654984772205353, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8517, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.12998803284776958, |
|
"grad_norm": 0.1764276772737503, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8483, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.1306758002173345, |
|
"grad_norm": 0.17811571061611176, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8469, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.1313635675868994, |
|
"grad_norm": 0.20159000158309937, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8455, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.13205133495646432, |
|
"grad_norm": 0.1840062290430069, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8511, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.13273910232602926, |
|
"grad_norm": 0.190440833568573, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8474, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.13342686969559417, |
|
"grad_norm": 0.20033535361289978, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8479, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.13411463706515908, |
|
"grad_norm": 0.1811174899339676, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8504, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.134802404434724, |
|
"grad_norm": 0.2073344737291336, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8507, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.1354901718042889, |
|
"grad_norm": 0.21762603521347046, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8499, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.13617793917385385, |
|
"grad_norm": 0.1864607185125351, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8471, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.13686570654341876, |
|
"grad_norm": 0.17837654054164886, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8485, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.13755347391298367, |
|
"grad_norm": 0.20498532056808472, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8497, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.13824124128254858, |
|
"grad_norm": 0.18355566263198853, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8458, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.13892900865211352, |
|
"grad_norm": 0.2033490389585495, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8451, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.13961677602167843, |
|
"grad_norm": 0.1855219006538391, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8475, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.14030454339124335, |
|
"grad_norm": 0.18876652419567108, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8473, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.14099231076080826, |
|
"grad_norm": 0.1731424629688263, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8475, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.14168007813037317, |
|
"grad_norm": 0.186906635761261, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8498, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.1423678454999381, |
|
"grad_norm": 0.18285425007343292, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8451, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.14305561286950302, |
|
"grad_norm": 0.19545456767082214, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8487, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.14374338023906794, |
|
"grad_norm": 0.16256272792816162, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8461, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.14443114760863285, |
|
"grad_norm": 0.19637931883335114, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8462, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.14511891497819776, |
|
"grad_norm": 0.20408660173416138, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8465, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.1458066823477627, |
|
"grad_norm": 0.2140285223722458, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8421, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.1464944497173276, |
|
"grad_norm": 0.18366774916648865, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8454, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.14718221708689253, |
|
"grad_norm": 0.19011645019054413, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8427, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.14786998445645744, |
|
"grad_norm": 0.1923753321170807, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8442, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.14855775182602238, |
|
"grad_norm": 0.19208142161369324, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8413, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.1492455191955873, |
|
"grad_norm": 0.19608841836452484, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8468, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.1499332865651522, |
|
"grad_norm": 0.19484341144561768, |
|
"learning_rate": 0.0001, |
|
"loss": 1.849, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.15062105393471711, |
|
"grad_norm": 0.18584389984607697, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8416, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.15130882130428203, |
|
"grad_norm": 0.1894279420375824, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8454, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.15199658867384697, |
|
"grad_norm": 0.19622810184955597, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8449, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.15268435604341188, |
|
"grad_norm": 0.18603233993053436, |
|
"learning_rate": 0.0001, |
|
"loss": 1.848, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.1533721234129768, |
|
"grad_norm": 0.18146397173404694, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8413, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.1540598907825417, |
|
"grad_norm": 0.20820939540863037, |
|
"learning_rate": 0.0001, |
|
"loss": 1.844, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.15474765815210664, |
|
"grad_norm": 0.18021373450756073, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8434, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.15543542552167156, |
|
"grad_norm": 0.19339635968208313, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8405, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.15612319289123647, |
|
"grad_norm": 0.1994727998971939, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8403, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.15681096026080138, |
|
"grad_norm": 0.1830483376979828, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8415, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.1574987276303663, |
|
"grad_norm": 0.17064842581748962, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8433, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.15818649499993123, |
|
"grad_norm": 0.19161944091320038, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8428, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.15887426236949614, |
|
"grad_norm": 0.21216394007205963, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8432, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.15956202973906106, |
|
"grad_norm": 0.1909138560295105, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8429, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.16024979710862597, |
|
"grad_norm": 0.20326951146125793, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8419, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.1609375644781909, |
|
"grad_norm": 0.19515758752822876, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8448, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.16162533184775582, |
|
"grad_norm": 0.2075706273317337, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8439, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.16231309921732073, |
|
"grad_norm": 0.21147705614566803, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8433, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.16300086658688565, |
|
"grad_norm": 0.18318484723567963, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8383, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.16368863395645056, |
|
"grad_norm": 0.18728312849998474, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8426, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.1643764013260155, |
|
"grad_norm": 0.20905287563800812, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8421, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.1650641686955804, |
|
"grad_norm": 0.18393969535827637, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8408, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.16575193606514532, |
|
"grad_norm": 0.18366305530071259, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8365, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.16643970343471023, |
|
"grad_norm": 0.19170603156089783, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8416, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.16712747080427517, |
|
"grad_norm": 0.172319233417511, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8411, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.1678152381738401, |
|
"grad_norm": 0.2174234390258789, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8416, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.168503005543405, |
|
"grad_norm": 0.20210625231266022, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8422, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.1691907729129699, |
|
"grad_norm": 0.1902657449245453, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8369, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.16987854028253482, |
|
"grad_norm": 0.18901073932647705, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8415, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.17056630765209976, |
|
"grad_norm": 0.17624430358409882, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8373, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.17125407502166468, |
|
"grad_norm": 0.1844191551208496, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8391, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.1719418423912296, |
|
"grad_norm": 0.19392350316047668, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8416, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.1726296097607945, |
|
"grad_norm": 0.18644706904888153, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8409, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.17331737713035944, |
|
"grad_norm": 0.19530895352363586, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8381, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.17400514449992435, |
|
"grad_norm": 0.18004032969474792, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8419, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.17469291186948926, |
|
"grad_norm": 0.20025117695331573, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8379, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.17538067923905418, |
|
"grad_norm": 0.17622490227222443, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8364, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.1760684466086191, |
|
"grad_norm": 0.19657030701637268, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8364, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.17675621397818403, |
|
"grad_norm": 0.19141744077205658, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8388, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.17744398134774894, |
|
"grad_norm": 0.23409488797187805, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8392, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.17813174871731385, |
|
"grad_norm": 0.19104769825935364, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8407, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.17881951608687877, |
|
"grad_norm": 0.1978139728307724, |
|
"learning_rate": 0.0001, |
|
"loss": 1.836, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.1795072834564437, |
|
"grad_norm": 0.1839970201253891, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8406, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.18019505082600862, |
|
"grad_norm": 0.1969710737466812, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8382, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.18088281819557353, |
|
"grad_norm": 0.21036314964294434, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8372, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.18157058556513844, |
|
"grad_norm": 0.18064115941524506, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8387, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.18225835293470335, |
|
"grad_norm": 0.20280593633651733, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8345, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.1829461203042683, |
|
"grad_norm": 0.21196794509887695, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8403, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.1836338876738332, |
|
"grad_norm": 0.18529263138771057, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8395, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.18432165504339812, |
|
"grad_norm": 0.20009498298168182, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8418, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.18500942241296303, |
|
"grad_norm": 0.1844586879014969, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8388, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.18569718978252797, |
|
"grad_norm": 0.17497003078460693, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8374, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.18638495715209288, |
|
"grad_norm": 0.21536414325237274, |
|
"learning_rate": 0.0001, |
|
"loss": 1.834, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.1870727245216578, |
|
"grad_norm": 0.20212842524051666, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8361, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.1877604918912227, |
|
"grad_norm": 0.21032044291496277, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8352, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.18844825926078762, |
|
"grad_norm": 0.17547431588172913, |
|
"learning_rate": 0.0001, |
|
"loss": 1.839, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.18913602663035256, |
|
"grad_norm": 0.17463110387325287, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8345, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.18982379399991747, |
|
"grad_norm": 0.19794687628746033, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8367, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.19051156136948239, |
|
"grad_norm": 0.17595866322517395, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8349, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.1911993287390473, |
|
"grad_norm": 0.19087472558021545, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8377, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.1918870961086122, |
|
"grad_norm": 0.1895439624786377, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8392, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.19257486347817715, |
|
"grad_norm": 0.19558320939540863, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8331, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.19326263084774206, |
|
"grad_norm": 0.18495230376720428, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8357, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 0.19395039821730697, |
|
"grad_norm": 0.19197221100330353, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8379, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.1946381655868719, |
|
"grad_norm": 0.17729446291923523, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8336, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 0.19532593295643683, |
|
"grad_norm": 0.20683547854423523, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8344, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.19601370032600174, |
|
"grad_norm": 0.16708314418792725, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8375, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.19670146769556665, |
|
"grad_norm": 0.2065526694059372, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8397, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.19738923506513156, |
|
"grad_norm": 0.2007008045911789, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8351, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 0.19807700243469648, |
|
"grad_norm": 0.1773243397474289, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8338, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.19876476980426142, |
|
"grad_norm": 0.1875116229057312, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8379, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 0.19945253717382633, |
|
"grad_norm": 0.19387130439281464, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8343, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.20014030454339124, |
|
"grad_norm": 0.17164736986160278, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8338, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 0.20082807191295615, |
|
"grad_norm": 0.19135966897010803, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8321, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.2015158392825211, |
|
"grad_norm": 0.21152153611183167, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8332, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 0.202203606652086, |
|
"grad_norm": 0.19576500356197357, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8338, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.20289137402165092, |
|
"grad_norm": 0.21700510382652283, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8381, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.20357914139121583, |
|
"grad_norm": 0.18183092772960663, |
|
"learning_rate": 0.0001, |
|
"loss": 1.833, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.20426690876078074, |
|
"grad_norm": 0.1678183525800705, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8365, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 0.20495467613034568, |
|
"grad_norm": 0.1790694147348404, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8323, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.2056424434999106, |
|
"grad_norm": 0.17274673283100128, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8357, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 0.2063302108694755, |
|
"grad_norm": 0.1773209273815155, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8338, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.20701797823904042, |
|
"grad_norm": 0.29811668395996094, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8322, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 0.20770574560860536, |
|
"grad_norm": 0.18590272963047028, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8307, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.20839351297817027, |
|
"grad_norm": 0.19656258821487427, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8364, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 0.20908128034773518, |
|
"grad_norm": 0.1760113537311554, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8363, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.2097690477173001, |
|
"grad_norm": 0.17442069947719574, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8346, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 0.210456815086865, |
|
"grad_norm": 0.2154201865196228, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8359, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.21114458245642995, |
|
"grad_norm": 0.18702222406864166, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8333, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 0.21183234982599486, |
|
"grad_norm": 0.222214013338089, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8386, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.21252011719555977, |
|
"grad_norm": 0.18646612763404846, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8336, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 0.21320788456512468, |
|
"grad_norm": 0.19032032787799835, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8359, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.21389565193468962, |
|
"grad_norm": 0.1962030827999115, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8314, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 0.21458341930425454, |
|
"grad_norm": 0.18067054450511932, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8298, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.21527118667381945, |
|
"grad_norm": 0.1977655440568924, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8335, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 0.21595895404338436, |
|
"grad_norm": 0.17689162492752075, |
|
"learning_rate": 0.0001, |
|
"loss": 1.834, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.21664672141294927, |
|
"grad_norm": 0.189301997423172, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8302, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 0.2173344887825142, |
|
"grad_norm": 0.21416552364826202, |
|
"learning_rate": 0.0001, |
|
"loss": 1.833, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.21802225615207912, |
|
"grad_norm": 0.17280973494052887, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8325, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 0.21871002352164404, |
|
"grad_norm": 0.2203332632780075, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8315, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.21939779089120895, |
|
"grad_norm": 0.17942380905151367, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8313, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 0.2200855582607739, |
|
"grad_norm": 0.2053511142730713, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8322, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.2207733256303388, |
|
"grad_norm": 0.18660666048526764, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8315, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 0.2214610929999037, |
|
"grad_norm": 0.20179618895053864, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8309, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.22214886036946863, |
|
"grad_norm": 0.1849927455186844, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8349, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 0.22283662773903354, |
|
"grad_norm": 0.16893066465854645, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8333, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.22352439510859848, |
|
"grad_norm": 0.1815815567970276, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8277, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 0.2242121624781634, |
|
"grad_norm": 0.17478667199611664, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8324, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.2248999298477283, |
|
"grad_norm": 0.20333503186702728, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8299, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 0.22558769721729321, |
|
"grad_norm": 0.19628338515758514, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8322, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.22627546458685815, |
|
"grad_norm": 0.19011887907981873, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8301, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 0.22696323195642307, |
|
"grad_norm": 0.19007809460163116, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8306, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.22765099932598798, |
|
"grad_norm": 0.18108965456485748, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8304, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 0.2283387666955529, |
|
"grad_norm": 0.16927501559257507, |
|
"learning_rate": 0.0001, |
|
"loss": 1.832, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.2290265340651178, |
|
"grad_norm": 0.18328557908535004, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8315, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 0.22971430143468274, |
|
"grad_norm": 0.21978403627872467, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8314, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.23040206880424766, |
|
"grad_norm": 0.1928972601890564, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8281, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 0.23108983617381257, |
|
"grad_norm": 0.19355738162994385, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8289, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.23177760354337748, |
|
"grad_norm": 0.18013496696949005, |
|
"learning_rate": 0.0001, |
|
"loss": 1.831, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 0.2324653709129424, |
|
"grad_norm": 0.1848910003900528, |
|
"learning_rate": 0.0001, |
|
"loss": 1.826, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.23315313828250733, |
|
"grad_norm": 0.20185594260692596, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8274, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 0.23384090565207225, |
|
"grad_norm": 0.1898491382598877, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8292, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.23452867302163716, |
|
"grad_norm": 0.17610591650009155, |
|
"learning_rate": 0.0001, |
|
"loss": 1.831, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.23521644039120207, |
|
"grad_norm": 0.2032867968082428, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8306, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.235904207760767, |
|
"grad_norm": 0.1812831312417984, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8331, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 0.23659197513033192, |
|
"grad_norm": 0.17079557478427887, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8266, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.23727974249989683, |
|
"grad_norm": 0.17599579691886902, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8327, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 0.23796750986946175, |
|
"grad_norm": 0.16692423820495605, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8294, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.23865527723902666, |
|
"grad_norm": 0.17235307395458221, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8324, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 0.2393430446085916, |
|
"grad_norm": 0.18419289588928223, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8234, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.2400308119781565, |
|
"grad_norm": 0.16880065202713013, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8315, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 0.24071857934772142, |
|
"grad_norm": 0.18046660721302032, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8288, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.24140634671728634, |
|
"grad_norm": 0.19775420427322388, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8304, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 0.24209411408685128, |
|
"grad_norm": 0.18596383929252625, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8269, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.2427818814564162, |
|
"grad_norm": 0.18525435030460358, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8293, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 0.2434696488259811, |
|
"grad_norm": 0.2105979025363922, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8252, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.244157416195546, |
|
"grad_norm": 0.18099245429039001, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8271, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 0.24484518356511092, |
|
"grad_norm": 0.17330291867256165, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8261, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.24553295093467586, |
|
"grad_norm": 0.17979152500629425, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8304, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 0.24622071830424078, |
|
"grad_norm": 0.19253650307655334, |
|
"learning_rate": 0.0001, |
|
"loss": 1.83, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.2469084856738057, |
|
"grad_norm": 0.20440231263637543, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8251, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 0.2475962530433706, |
|
"grad_norm": 0.18242883682250977, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8286, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.24828402041293554, |
|
"grad_norm": 0.1742672622203827, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8271, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 0.24897178778250045, |
|
"grad_norm": 0.19099250435829163, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8284, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.24965955515206537, |
|
"grad_norm": 0.19839410483837128, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8254, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 0.2503473225216303, |
|
"grad_norm": 0.18187545239925385, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8258, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.2510350898911952, |
|
"grad_norm": 0.16419640183448792, |
|
"learning_rate": 0.0001, |
|
"loss": 1.825, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 0.2517228572607601, |
|
"grad_norm": 0.1788015216588974, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8257, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.25241062463032504, |
|
"grad_norm": 0.2013292908668518, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8345, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 0.25309839199989, |
|
"grad_norm": 0.18886993825435638, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8269, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.25378615936945487, |
|
"grad_norm": 0.18426848948001862, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8291, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 0.2544739267390198, |
|
"grad_norm": 0.1836244910955429, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8228, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.2551616941085847, |
|
"grad_norm": 0.18584777414798737, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8283, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 0.25584946147814963, |
|
"grad_norm": 0.16920630633831024, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8274, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.25653722884771457, |
|
"grad_norm": 0.20111984014511108, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8285, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 0.25722499621727946, |
|
"grad_norm": 0.18769313395023346, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8295, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.2579127635868444, |
|
"grad_norm": 0.18159103393554688, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8236, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.2586005309564093, |
|
"grad_norm": 0.1929440200328827, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8279, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.2592882983259742, |
|
"grad_norm": 0.16436657309532166, |
|
"learning_rate": 0.0001, |
|
"loss": 1.823, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 0.25997606569553916, |
|
"grad_norm": 0.1638740748167038, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8251, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.26066383306510404, |
|
"grad_norm": 0.18252821266651154, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8251, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 0.261351600434669, |
|
"grad_norm": 0.18031029403209686, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8243, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.26203936780423387, |
|
"grad_norm": 0.1770683377981186, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8274, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 0.2627271351737988, |
|
"grad_norm": 0.20250555872917175, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8258, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.26341490254336375, |
|
"grad_norm": 0.16491496562957764, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8251, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 0.26410266991292863, |
|
"grad_norm": 0.19582998752593994, |
|
"learning_rate": 0.0001, |
|
"loss": 1.824, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.2647904372824936, |
|
"grad_norm": 0.17773911356925964, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8195, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 0.2654782046520585, |
|
"grad_norm": 0.18118888139724731, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8239, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.2661659720216234, |
|
"grad_norm": 0.15766191482543945, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8232, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 0.26685373939118834, |
|
"grad_norm": 0.17026937007904053, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8223, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.2675415067607532, |
|
"grad_norm": 0.18863512575626373, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8257, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 0.26822927413031816, |
|
"grad_norm": 0.18321500718593597, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8238, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.2689170414998831, |
|
"grad_norm": 0.20935237407684326, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8229, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 0.269604808869448, |
|
"grad_norm": 0.19490981101989746, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8194, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.2702925762390129, |
|
"grad_norm": 0.19290666282176971, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8258, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 0.2709803436085778, |
|
"grad_norm": 0.1819174438714981, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8224, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.27166811097814275, |
|
"grad_norm": 0.18501299619674683, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8297, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 0.2723558783477077, |
|
"grad_norm": 0.19111846387386322, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8226, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.2730436457172726, |
|
"grad_norm": 0.18800359964370728, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8215, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 0.2737314130868375, |
|
"grad_norm": 0.18408334255218506, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8239, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.2744191804564024, |
|
"grad_norm": 0.19500131905078888, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8232, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 0.27510694782596734, |
|
"grad_norm": 0.18263010680675507, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8246, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.2757947151955323, |
|
"grad_norm": 0.1732577383518219, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8241, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 0.27648248256509717, |
|
"grad_norm": 0.1958979219198227, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8215, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.2771702499346621, |
|
"grad_norm": 0.1755562722682953, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8275, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 0.27785801730422705, |
|
"grad_norm": 0.17292717099189758, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8221, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.27854578467379193, |
|
"grad_norm": 0.16997367143630981, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8221, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 0.27923355204335687, |
|
"grad_norm": 0.1903601735830307, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8243, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.27992131941292175, |
|
"grad_norm": 0.17447033524513245, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8229, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 0.2806090867824867, |
|
"grad_norm": 0.18861395120620728, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8222, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.28129685415205163, |
|
"grad_norm": 0.17015644907951355, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8207, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 0.2819846215216165, |
|
"grad_norm": 0.19356681406497955, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8202, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.28267238889118146, |
|
"grad_norm": 0.1988779753446579, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8199, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 0.28336015626074634, |
|
"grad_norm": 0.1967942714691162, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8217, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.2840479236303113, |
|
"grad_norm": 0.18917816877365112, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8229, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 0.2847356909998762, |
|
"grad_norm": 0.16583094000816345, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8219, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.2854234583694411, |
|
"grad_norm": 0.19918115437030792, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8246, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 0.28611122573900605, |
|
"grad_norm": 0.1981818974018097, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8211, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.28679899310857093, |
|
"grad_norm": 0.1838293969631195, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8224, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 0.28748676047813587, |
|
"grad_norm": 0.20068101584911346, |
|
"learning_rate": 0.0001, |
|
"loss": 1.82, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.2881745278477008, |
|
"grad_norm": 0.17375263571739197, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8195, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 0.2888622952172657, |
|
"grad_norm": 0.16706246137619019, |
|
"learning_rate": 0.0001, |
|
"loss": 1.826, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.28955006258683064, |
|
"grad_norm": 0.20021022856235504, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8207, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 0.2902378299563955, |
|
"grad_norm": 0.20570990443229675, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8221, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.29092559732596046, |
|
"grad_norm": 0.2043515294790268, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8239, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 0.2916133646955254, |
|
"grad_norm": 0.17122073471546173, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8203, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.2923011320650903, |
|
"grad_norm": 0.19589883089065552, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8206, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 0.2929888994346552, |
|
"grad_norm": 0.19675767421722412, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8244, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.29367666680422017, |
|
"grad_norm": 0.1788429468870163, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8225, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 0.29436443417378505, |
|
"grad_norm": 0.17564085125923157, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8242, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.29505220154335, |
|
"grad_norm": 0.1807086318731308, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8245, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 0.2957399689129149, |
|
"grad_norm": 0.1772526502609253, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8231, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.2964277362824798, |
|
"grad_norm": 0.1903577297925949, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8209, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 0.29711550365204475, |
|
"grad_norm": 0.17995303869247437, |
|
"learning_rate": 0.0001, |
|
"loss": 1.817, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.29780327102160964, |
|
"grad_norm": 0.1937420666217804, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8241, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 0.2984910383911746, |
|
"grad_norm": 0.1729700267314911, |
|
"learning_rate": 0.0001, |
|
"loss": 1.822, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.29917880576073946, |
|
"grad_norm": 0.16370828449726105, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8217, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 0.2998665731303044, |
|
"grad_norm": 0.17373540997505188, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8191, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.30055434049986934, |
|
"grad_norm": 0.19695748388767242, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8236, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 0.30124210786943423, |
|
"grad_norm": 0.20299525558948517, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8181, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.30192987523899917, |
|
"grad_norm": 0.5943254828453064, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8207, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 0.30261764260856405, |
|
"grad_norm": 0.1915915608406067, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8245, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.303305409978129, |
|
"grad_norm": 0.16212280094623566, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8226, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 0.30399317734769393, |
|
"grad_norm": 0.16871103644371033, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8193, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.3046809447172588, |
|
"grad_norm": 0.1811041682958603, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8187, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 0.30536871208682376, |
|
"grad_norm": 0.1868380606174469, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8219, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.3060564794563887, |
|
"grad_norm": 0.18134795129299164, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8207, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 0.3067442468259536, |
|
"grad_norm": 0.17329555749893188, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8193, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.3074320141955185, |
|
"grad_norm": 0.18371562659740448, |
|
"learning_rate": 0.0001, |
|
"loss": 1.821, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 0.3081197815650834, |
|
"grad_norm": 0.17543677985668182, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8182, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.30880754893464835, |
|
"grad_norm": 0.18362955749034882, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8187, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 0.3094953163042133, |
|
"grad_norm": 0.20341430604457855, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8198, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.31018308367377817, |
|
"grad_norm": 0.1833573579788208, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8167, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 0.3108708510433431, |
|
"grad_norm": 0.1798466444015503, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8204, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.311558618412908, |
|
"grad_norm": 0.18346908688545227, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8197, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 0.31224638578247293, |
|
"grad_norm": 0.1842503696680069, |
|
"learning_rate": 0.0001, |
|
"loss": 1.822, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.3129341531520379, |
|
"grad_norm": 0.1917971521615982, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8205, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 0.31362192052160276, |
|
"grad_norm": 0.18140938878059387, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8187, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.3143096878911677, |
|
"grad_norm": 0.17349034547805786, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8204, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 0.3149974552607326, |
|
"grad_norm": 0.17727358639240265, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8203, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.3156852226302975, |
|
"grad_norm": 0.1764019876718521, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8197, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 0.31637298999986246, |
|
"grad_norm": 0.18336281180381775, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8168, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.31706075736942735, |
|
"grad_norm": 0.15488466620445251, |
|
"learning_rate": 0.0001, |
|
"loss": 1.819, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 0.3177485247389923, |
|
"grad_norm": 0.16988332569599152, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8151, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.31843629210855723, |
|
"grad_norm": 0.16344988346099854, |
|
"learning_rate": 0.0001, |
|
"loss": 1.819, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 0.3191240594781221, |
|
"grad_norm": 0.17984721064567566, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8182, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.31981182684768705, |
|
"grad_norm": 0.19572113454341888, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8158, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 0.32049959421725194, |
|
"grad_norm": 0.21890446543693542, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8158, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.3211873615868169, |
|
"grad_norm": 0.1672099530696869, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8183, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 0.3218751289563818, |
|
"grad_norm": 0.18066146969795227, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8194, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.3225628963259467, |
|
"grad_norm": 0.1749303936958313, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8192, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 0.32325066369551164, |
|
"grad_norm": 0.1646299809217453, |
|
"learning_rate": 0.0001, |
|
"loss": 1.819, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.3239384310650765, |
|
"grad_norm": 0.204520583152771, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8166, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 0.32462619843464147, |
|
"grad_norm": 0.166048064827919, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8163, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.3253139658042064, |
|
"grad_norm": 0.17722272872924805, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8158, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 0.3260017331737713, |
|
"grad_norm": 0.1896638125181198, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8165, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.32668950054333623, |
|
"grad_norm": 0.16389790177345276, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8163, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 0.3273772679129011, |
|
"grad_norm": 0.17973138391971588, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8201, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.32806503528246606, |
|
"grad_norm": 0.20095448195934296, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8174, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 0.328752802652031, |
|
"grad_norm": 0.18039678037166595, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8179, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.3294405700215959, |
|
"grad_norm": 0.1760893315076828, |
|
"learning_rate": 0.0001, |
|
"loss": 1.816, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 0.3301283373911608, |
|
"grad_norm": 0.171057790517807, |
|
"learning_rate": 0.0001, |
|
"loss": 1.816, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.33081610476072576, |
|
"grad_norm": 0.17639483511447906, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8157, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 0.33150387213029064, |
|
"grad_norm": 0.16385740041732788, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8195, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.3321916394998556, |
|
"grad_norm": 0.18215522170066833, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8157, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 0.33287940686942047, |
|
"grad_norm": 0.17613132297992706, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8152, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.3335671742389854, |
|
"grad_norm": 0.16723348200321198, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8141, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 0.33425494160855035, |
|
"grad_norm": 0.16092203557491302, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8173, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.33494270897811523, |
|
"grad_norm": 0.17928454279899597, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8188, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 0.3356304763476802, |
|
"grad_norm": 0.18230123817920685, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8152, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.33631824371724506, |
|
"grad_norm": 0.1699696034193039, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8194, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 0.33700601108681, |
|
"grad_norm": 0.1800839602947235, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8126, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.33769377845637494, |
|
"grad_norm": 0.19913671910762787, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8148, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 0.3383815458259398, |
|
"grad_norm": 0.16596053540706635, |
|
"learning_rate": 0.0001, |
|
"loss": 1.818, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.33906931319550476, |
|
"grad_norm": 0.1894855797290802, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8142, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 0.33975708056506965, |
|
"grad_norm": 0.1800161600112915, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8152, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.3404448479346346, |
|
"grad_norm": 0.17433103919029236, |
|
"learning_rate": 0.0001, |
|
"loss": 1.815, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 0.3411326153041995, |
|
"grad_norm": 0.18210847675800323, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8168, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.3418203826737644, |
|
"grad_norm": 0.17840790748596191, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8159, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 0.34250815004332935, |
|
"grad_norm": 0.18368154764175415, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8171, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.34319591741289424, |
|
"grad_norm": 0.17999804019927979, |
|
"learning_rate": 0.0001, |
|
"loss": 1.817, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 0.3438836847824592, |
|
"grad_norm": 0.19299517571926117, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8161, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.3445714521520241, |
|
"grad_norm": 0.17866362631320953, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8121, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 0.345259219521589, |
|
"grad_norm": 0.16793055832386017, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8137, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.34594698689115394, |
|
"grad_norm": 0.18356679379940033, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8158, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 0.3466347542607189, |
|
"grad_norm": 0.18392959237098694, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8135, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.34732252163028376, |
|
"grad_norm": 0.18158595263957977, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8168, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 0.3480102889998487, |
|
"grad_norm": 0.1956174075603485, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8137, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.3486980563694136, |
|
"grad_norm": 0.17629751563072205, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8161, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 0.34938582373897853, |
|
"grad_norm": 0.1842150092124939, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8112, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.35007359110854347, |
|
"grad_norm": 0.18889479339122772, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8152, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 0.35076135847810835, |
|
"grad_norm": 0.16872894763946533, |
|
"learning_rate": 0.0001, |
|
"loss": 1.818, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.3514491258476733, |
|
"grad_norm": 0.16502858698368073, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8127, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 0.3521368932172382, |
|
"grad_norm": 0.1778111755847931, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8202, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.3528246605868031, |
|
"grad_norm": 0.16866064071655273, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8155, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 0.35351242795636806, |
|
"grad_norm": 0.1845904141664505, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8171, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.35420019532593294, |
|
"grad_norm": 0.19138947129249573, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8164, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 0.3548879626954979, |
|
"grad_norm": 0.18222880363464355, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8131, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.35557573006506277, |
|
"grad_norm": 0.17819440364837646, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8147, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 0.3562634974346277, |
|
"grad_norm": 0.20162558555603027, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8188, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.35695126480419265, |
|
"grad_norm": 0.17715832591056824, |
|
"learning_rate": 0.0001, |
|
"loss": 1.813, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 0.35763903217375753, |
|
"grad_norm": 0.16032275557518005, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8135, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.35832679954332247, |
|
"grad_norm": 0.17023804783821106, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8168, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 0.3590145669128874, |
|
"grad_norm": 0.19815494120121002, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8123, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.3597023342824523, |
|
"grad_norm": 0.19192709028720856, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8164, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 0.36039010165201724, |
|
"grad_norm": 0.18932852149009705, |
|
"learning_rate": 0.0001, |
|
"loss": 1.813, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.3610778690215821, |
|
"grad_norm": 0.16477489471435547, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8147, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 0.36176563639114706, |
|
"grad_norm": 0.19172504544258118, |
|
"learning_rate": 0.0001, |
|
"loss": 1.814, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.362453403760712, |
|
"grad_norm": 0.19087177515029907, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8123, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 0.3631411711302769, |
|
"grad_norm": 0.1714990735054016, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8133, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.3638289384998418, |
|
"grad_norm": 0.16309858858585358, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8168, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 0.3645167058694067, |
|
"grad_norm": 0.1791163831949234, |
|
"learning_rate": 0.0001, |
|
"loss": 1.818, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.36520447323897165, |
|
"grad_norm": 0.17130139470100403, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8182, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 0.3658922406085366, |
|
"grad_norm": 0.17432111501693726, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8177, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.3665800079781015, |
|
"grad_norm": 0.15398447215557098, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8161, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 0.3672677753476664, |
|
"grad_norm": 0.2831607162952423, |
|
"learning_rate": 0.0001, |
|
"loss": 1.815, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.3679555427172313, |
|
"grad_norm": 0.17564986646175385, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8129, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 0.36864331008679624, |
|
"grad_norm": 0.18288859724998474, |
|
"learning_rate": 0.0001, |
|
"loss": 1.813, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.3693310774563612, |
|
"grad_norm": 0.1621311753988266, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8069, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 0.37001884482592606, |
|
"grad_norm": 0.16472625732421875, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8136, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.370706612195491, |
|
"grad_norm": 0.16450871527194977, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8149, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 0.37139437956505594, |
|
"grad_norm": 0.1769149899482727, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8078, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.3720821469346208, |
|
"grad_norm": 0.1917348951101303, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8121, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 0.37276991430418577, |
|
"grad_norm": 0.18277530372142792, |
|
"learning_rate": 0.0001, |
|
"loss": 1.812, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.37345768167375065, |
|
"grad_norm": 0.1814720183610916, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8092, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 0.3741454490433156, |
|
"grad_norm": 0.17358410358428955, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8118, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.37483321641288053, |
|
"grad_norm": 0.18569444119930267, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8115, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 0.3755209837824454, |
|
"grad_norm": 0.15812502801418304, |
|
"learning_rate": 0.0001, |
|
"loss": 1.813, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.37620875115201036, |
|
"grad_norm": 0.19051866233348846, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8162, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 0.37689651852157524, |
|
"grad_norm": 0.1646508276462555, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8109, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.3775842858911402, |
|
"grad_norm": 0.16069738566875458, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8088, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 0.3782720532607051, |
|
"grad_norm": 0.18708954751491547, |
|
"learning_rate": 0.0001, |
|
"loss": 1.809, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.37895982063027, |
|
"grad_norm": 0.18674279749393463, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8141, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 0.37964758799983495, |
|
"grad_norm": 0.17408175766468048, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8126, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.38033535536939983, |
|
"grad_norm": 0.15924981236457825, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8122, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 0.38102312273896477, |
|
"grad_norm": 0.17203688621520996, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8118, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.3817108901085297, |
|
"grad_norm": 0.18587364256381989, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8129, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 0.3823986574780946, |
|
"grad_norm": 0.18941548466682434, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8099, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.38308642484765953, |
|
"grad_norm": 0.14958040416240692, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8141, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 0.3837741922172244, |
|
"grad_norm": 0.17599830031394958, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8141, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.38446195958678936, |
|
"grad_norm": 0.17611196637153625, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8114, |
|
"step": 27950 |
|
}, |
|
{ |
|
"epoch": 0.3851497269563543, |
|
"grad_norm": 0.1823156625032425, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8116, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.3858374943259192, |
|
"grad_norm": 0.17287470400333405, |
|
"learning_rate": 0.0001, |
|
"loss": 1.812, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 0.3865252616954841, |
|
"grad_norm": 0.17163801193237305, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8102, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.38721302906504906, |
|
"grad_norm": 0.16863061487674713, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8085, |
|
"step": 28150 |
|
}, |
|
{ |
|
"epoch": 0.38790079643461395, |
|
"grad_norm": 0.1910269409418106, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8128, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.3885885638041789, |
|
"grad_norm": 0.16055557131767273, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8122, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 0.3892763311737438, |
|
"grad_norm": 0.17268548905849457, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8084, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.3899640985433087, |
|
"grad_norm": 0.16962352395057678, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8131, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 0.39065186591287365, |
|
"grad_norm": 0.1744450330734253, |
|
"learning_rate": 0.0001, |
|
"loss": 1.811, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.39133963328243854, |
|
"grad_norm": 0.17569154500961304, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8165, |
|
"step": 28450 |
|
}, |
|
{ |
|
"epoch": 0.3920274006520035, |
|
"grad_norm": 0.17034880816936493, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8125, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.39271516802156836, |
|
"grad_norm": 0.16873665153980255, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8124, |
|
"step": 28550 |
|
}, |
|
{ |
|
"epoch": 0.3934029353911333, |
|
"grad_norm": 0.1771818846464157, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8132, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.39409070276069824, |
|
"grad_norm": 0.17641928791999817, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8131, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 0.3947784701302631, |
|
"grad_norm": 0.16521941125392914, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8082, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.39546623749982807, |
|
"grad_norm": 0.17453192174434662, |
|
"learning_rate": 0.0001, |
|
"loss": 1.813, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 0.39615400486939295, |
|
"grad_norm": 0.17454297840595245, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8129, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.3968417722389579, |
|
"grad_norm": 0.155872642993927, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8116, |
|
"step": 28850 |
|
}, |
|
{ |
|
"epoch": 0.39752953960852283, |
|
"grad_norm": 0.17079751193523407, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8097, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.3982173069780877, |
|
"grad_norm": 0.1715528666973114, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8082, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 0.39890507434765266, |
|
"grad_norm": 0.17352135479450226, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8058, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.3995928417172176, |
|
"grad_norm": 0.17056448757648468, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8093, |
|
"step": 29050 |
|
}, |
|
{ |
|
"epoch": 0.4002806090867825, |
|
"grad_norm": 0.16389931738376617, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8079, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.4009683764563474, |
|
"grad_norm": 0.17660637199878693, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8101, |
|
"step": 29150 |
|
}, |
|
{ |
|
"epoch": 0.4016561438259123, |
|
"grad_norm": 0.1871548742055893, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8124, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.40234391119547724, |
|
"grad_norm": 0.17292185127735138, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8074, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 0.4030316785650422, |
|
"grad_norm": 0.16299203038215637, |
|
"learning_rate": 0.0001, |
|
"loss": 1.81, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.40371944593460707, |
|
"grad_norm": 0.20287854969501495, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8141, |
|
"step": 29350 |
|
}, |
|
{ |
|
"epoch": 0.404407213304172, |
|
"grad_norm": 0.1632193922996521, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8102, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.4050949806737369, |
|
"grad_norm": 0.16991235315799713, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8084, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 0.40578274804330183, |
|
"grad_norm": 0.17448389530181885, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8108, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.4064705154128668, |
|
"grad_norm": 0.1706276535987854, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8091, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 0.40715828278243166, |
|
"grad_norm": 0.187569260597229, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8077, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.4078460501519966, |
|
"grad_norm": 0.18289169669151306, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8062, |
|
"step": 29650 |
|
}, |
|
{ |
|
"epoch": 0.4085338175215615, |
|
"grad_norm": 0.17096656560897827, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8117, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.4092215848911264, |
|
"grad_norm": 0.18183813989162445, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8108, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 0.40990935226069136, |
|
"grad_norm": 0.18215380609035492, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8113, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.41059711963025625, |
|
"grad_norm": 0.19367296993732452, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8111, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 0.4112848869998212, |
|
"grad_norm": 0.18118008971214294, |
|
"learning_rate": 0.0001, |
|
"loss": 1.81, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.4119726543693861, |
|
"grad_norm": 0.16475409269332886, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8095, |
|
"step": 29950 |
|
}, |
|
{ |
|
"epoch": 0.412660421738951, |
|
"grad_norm": 0.19968119263648987, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8078, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.41334818910851595, |
|
"grad_norm": 0.2024579495191574, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8097, |
|
"step": 30050 |
|
}, |
|
{ |
|
"epoch": 0.41403595647808084, |
|
"grad_norm": 0.1678769886493683, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8099, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.4147237238476458, |
|
"grad_norm": 0.19947120547294617, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8124, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 0.4154114912172107, |
|
"grad_norm": 0.1908283233642578, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8094, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.4160992585867756, |
|
"grad_norm": 0.16802892088890076, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8029, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 0.41678702595634054, |
|
"grad_norm": 0.1601232886314392, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8078, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.4174747933259054, |
|
"grad_norm": 0.16903936862945557, |
|
"learning_rate": 0.0001, |
|
"loss": 1.811, |
|
"step": 30350 |
|
}, |
|
{ |
|
"epoch": 0.41816256069547036, |
|
"grad_norm": 0.17131748795509338, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8057, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.4188503280650353, |
|
"grad_norm": 0.17509245872497559, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8109, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 0.4195380954346002, |
|
"grad_norm": 0.17135483026504517, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8086, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.42022586280416513, |
|
"grad_norm": 0.1780470460653305, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8054, |
|
"step": 30550 |
|
}, |
|
{ |
|
"epoch": 0.42091363017373, |
|
"grad_norm": 0.16642825305461884, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8101, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.42160139754329495, |
|
"grad_norm": 0.17237281799316406, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8131, |
|
"step": 30650 |
|
}, |
|
{ |
|
"epoch": 0.4222891649128599, |
|
"grad_norm": 0.1773928999900818, |
|
"learning_rate": 0.0001, |
|
"loss": 1.807, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.4229769322824248, |
|
"grad_norm": 0.15655359625816345, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8102, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 0.4236646996519897, |
|
"grad_norm": 0.18366913497447968, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8045, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.4243524670215546, |
|
"grad_norm": 0.15379434823989868, |
|
"learning_rate": 0.0001, |
|
"loss": 1.808, |
|
"step": 30850 |
|
}, |
|
{ |
|
"epoch": 0.42504023439111954, |
|
"grad_norm": 0.17815300822257996, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8043, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.4257280017606845, |
|
"grad_norm": 0.17477139830589294, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8106, |
|
"step": 30950 |
|
}, |
|
{ |
|
"epoch": 0.42641576913024937, |
|
"grad_norm": 0.18266303837299347, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8089, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.4271035364998143, |
|
"grad_norm": 0.17377638816833496, |
|
"learning_rate": 0.0001, |
|
"loss": 1.808, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 0.42779130386937925, |
|
"grad_norm": 0.16105225682258606, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8058, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.42847907123894413, |
|
"grad_norm": 0.16976149380207062, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8108, |
|
"step": 31150 |
|
}, |
|
{ |
|
"epoch": 0.42916683860850907, |
|
"grad_norm": 0.1994379609823227, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8103, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.42985460597807396, |
|
"grad_norm": 0.1827680766582489, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8044, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 0.4305423733476389, |
|
"grad_norm": 0.17883870005607605, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8067, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.43123014071720384, |
|
"grad_norm": 0.1809430867433548, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8105, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 0.4319179080867687, |
|
"grad_norm": 0.15287983417510986, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8032, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.43260567545633366, |
|
"grad_norm": 0.1845768690109253, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8044, |
|
"step": 31450 |
|
}, |
|
{ |
|
"epoch": 0.43329344282589854, |
|
"grad_norm": 0.15448009967803955, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8074, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.4339812101954635, |
|
"grad_norm": 0.16838380694389343, |
|
"learning_rate": 0.0001, |
|
"loss": 1.806, |
|
"step": 31550 |
|
}, |
|
{ |
|
"epoch": 0.4346689775650284, |
|
"grad_norm": 0.16129769384860992, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8103, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.4353567449345933, |
|
"grad_norm": 0.16702227294445038, |
|
"learning_rate": 0.0001, |
|
"loss": 1.81, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 0.43604451230415825, |
|
"grad_norm": 0.1646498441696167, |
|
"learning_rate": 0.0001, |
|
"loss": 1.806, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.43673227967372313, |
|
"grad_norm": 0.1929212510585785, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8073, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 0.4374200470432881, |
|
"grad_norm": 0.1728442758321762, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8062, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.438107814412853, |
|
"grad_norm": 0.15660201013088226, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8025, |
|
"step": 31850 |
|
}, |
|
{ |
|
"epoch": 0.4387955817824179, |
|
"grad_norm": 0.1685377061367035, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8079, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.43948334915198284, |
|
"grad_norm": 0.18124371767044067, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8042, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 0.4401711165215478, |
|
"grad_norm": 0.18348287045955658, |
|
"learning_rate": 0.0001, |
|
"loss": 1.809, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.44085888389111266, |
|
"grad_norm": 0.17936021089553833, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8052, |
|
"step": 32050 |
|
}, |
|
{ |
|
"epoch": 0.4415466512606776, |
|
"grad_norm": 0.17418572306632996, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8075, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.4422344186302425, |
|
"grad_norm": 0.16956304013729095, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8077, |
|
"step": 32150 |
|
}, |
|
{ |
|
"epoch": 0.4429221859998074, |
|
"grad_norm": 0.18142879009246826, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8053, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.44360995336937237, |
|
"grad_norm": 0.17536590993404388, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8055, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 0.44429772073893725, |
|
"grad_norm": 0.18276521563529968, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8052, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.4449854881085022, |
|
"grad_norm": 0.15810468792915344, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8012, |
|
"step": 32350 |
|
}, |
|
{ |
|
"epoch": 0.4456732554780671, |
|
"grad_norm": 0.17224664986133575, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8077, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.446361022847632, |
|
"grad_norm": 0.17988136410713196, |
|
"learning_rate": 0.0001, |
|
"loss": 1.806, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 0.44704879021719696, |
|
"grad_norm": 0.16269569098949432, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8072, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.44773655758676184, |
|
"grad_norm": 0.1897774487733841, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8034, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 0.4484243249563268, |
|
"grad_norm": 0.17675265669822693, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8053, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.44911209232589167, |
|
"grad_norm": 0.1847987174987793, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8065, |
|
"step": 32650 |
|
}, |
|
{ |
|
"epoch": 0.4497998596954566, |
|
"grad_norm": 0.16706308722496033, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8072, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.45048762706502155, |
|
"grad_norm": 0.19702313840389252, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8092, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 0.45117539443458643, |
|
"grad_norm": 0.17378373444080353, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8069, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.45186316180415137, |
|
"grad_norm": 0.15358635783195496, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8042, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 0.4525509291737163, |
|
"grad_norm": 0.16188420355319977, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8046, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.4532386965432812, |
|
"grad_norm": 0.15988096594810486, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8048, |
|
"step": 32950 |
|
}, |
|
{ |
|
"epoch": 0.45392646391284613, |
|
"grad_norm": 0.17328138649463654, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8031, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.454614231282411, |
|
"grad_norm": 0.18192242085933685, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8015, |
|
"step": 33050 |
|
}, |
|
{ |
|
"epoch": 0.45530199865197596, |
|
"grad_norm": 0.18269090354442596, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8086, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.4559897660215409, |
|
"grad_norm": 0.1573922038078308, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8054, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 0.4566775333911058, |
|
"grad_norm": 0.20478671789169312, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8047, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.4573653007606707, |
|
"grad_norm": 0.17149974405765533, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8045, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 0.4580530681302356, |
|
"grad_norm": 0.1575038731098175, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8008, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.45874083549980055, |
|
"grad_norm": 0.1684975028038025, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8036, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 0.4594286028693655, |
|
"grad_norm": 0.17977888882160187, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8058, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.46011637023893037, |
|
"grad_norm": 0.1595628559589386, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8052, |
|
"step": 33450 |
|
}, |
|
{ |
|
"epoch": 0.4608041376084953, |
|
"grad_norm": 0.17325359582901, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8036, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.4614919049780602, |
|
"grad_norm": 0.1705903857946396, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8048, |
|
"step": 33550 |
|
}, |
|
{ |
|
"epoch": 0.46217967234762514, |
|
"grad_norm": 0.1714329570531845, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8042, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.4628674397171901, |
|
"grad_norm": 0.17674137651920319, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8067, |
|
"step": 33650 |
|
}, |
|
{ |
|
"epoch": 0.46355520708675496, |
|
"grad_norm": 0.1605982631444931, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8059, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.4642429744563199, |
|
"grad_norm": 0.17221522331237793, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8025, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 0.4649307418258848, |
|
"grad_norm": 0.17648015916347504, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8065, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.4656185091954497, |
|
"grad_norm": 0.16860069334506989, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8064, |
|
"step": 33850 |
|
}, |
|
{ |
|
"epoch": 0.46630627656501467, |
|
"grad_norm": 0.19352567195892334, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8043, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.46699404393457955, |
|
"grad_norm": 0.1634499430656433, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7994, |
|
"step": 33950 |
|
}, |
|
{ |
|
"epoch": 0.4676818113041445, |
|
"grad_norm": 0.1790640950202942, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8075, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.46836957867370943, |
|
"grad_norm": 0.16731584072113037, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8022, |
|
"step": 34050 |
|
}, |
|
{ |
|
"epoch": 0.4690573460432743, |
|
"grad_norm": 0.17351976037025452, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8066, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.46974511341283925, |
|
"grad_norm": 0.18717612326145172, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8048, |
|
"step": 34150 |
|
}, |
|
{ |
|
"epoch": 0.47043288078240414, |
|
"grad_norm": 0.18829597532749176, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8018, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.4711206481519691, |
|
"grad_norm": 0.16731028258800507, |
|
"learning_rate": 0.0001, |
|
"loss": 1.806, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 0.471808415521534, |
|
"grad_norm": 0.17419900000095367, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8009, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.4724961828910989, |
|
"grad_norm": 0.16232840716838837, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8048, |
|
"step": 34350 |
|
}, |
|
{ |
|
"epoch": 0.47318395026066384, |
|
"grad_norm": 0.1557988077402115, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8021, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.47387171763022873, |
|
"grad_norm": 0.18441712856292725, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8066, |
|
"step": 34450 |
|
}, |
|
{ |
|
"epoch": 0.47455948499979367, |
|
"grad_norm": 0.1681167036294937, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8035, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.4752472523693586, |
|
"grad_norm": 0.1694021373987198, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8056, |
|
"step": 34550 |
|
}, |
|
{ |
|
"epoch": 0.4759350197389235, |
|
"grad_norm": 0.16909408569335938, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8012, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.47662278710848843, |
|
"grad_norm": 0.18573597073554993, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8025, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 0.4773105544780533, |
|
"grad_norm": 0.1591121107339859, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8022, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.47799832184761826, |
|
"grad_norm": 0.16243012249469757, |
|
"learning_rate": 0.0001, |
|
"loss": 1.809, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 0.4786860892171832, |
|
"grad_norm": 0.1876152753829956, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8043, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.4793738565867481, |
|
"grad_norm": 0.160101518034935, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8041, |
|
"step": 34850 |
|
}, |
|
{ |
|
"epoch": 0.480061623956313, |
|
"grad_norm": 0.17508384585380554, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8025, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.48074939132587796, |
|
"grad_norm": 0.16169220209121704, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8045, |
|
"step": 34950 |
|
}, |
|
{ |
|
"epoch": 0.48143715869544285, |
|
"grad_norm": 0.17065638303756714, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8046, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.4821249260650078, |
|
"grad_norm": 0.16137543320655823, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8006, |
|
"step": 35050 |
|
}, |
|
{ |
|
"epoch": 0.48281269343457267, |
|
"grad_norm": 0.1716589331626892, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8065, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.4835004608041376, |
|
"grad_norm": 0.16750770807266235, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8069, |
|
"step": 35150 |
|
}, |
|
{ |
|
"epoch": 0.48418822817370255, |
|
"grad_norm": 0.1668424755334854, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8045, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.48487599554326744, |
|
"grad_norm": 0.1577017605304718, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8015, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 0.4855637629128324, |
|
"grad_norm": 0.16916392743587494, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8012, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.48625153028239726, |
|
"grad_norm": 0.16878165304660797, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8049, |
|
"step": 35350 |
|
}, |
|
{ |
|
"epoch": 0.4869392976519622, |
|
"grad_norm": 0.1834115982055664, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8024, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.48762706502152714, |
|
"grad_norm": 0.16310469806194305, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8043, |
|
"step": 35450 |
|
}, |
|
{ |
|
"epoch": 0.488314832391092, |
|
"grad_norm": 0.17430266737937927, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8017, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.48900259976065696, |
|
"grad_norm": 0.20293480157852173, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8022, |
|
"step": 35550 |
|
}, |
|
{ |
|
"epoch": 0.48969036713022185, |
|
"grad_norm": 0.16140292584896088, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8012, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.4903781344997868, |
|
"grad_norm": 0.15472573041915894, |
|
"learning_rate": 0.0001, |
|
"loss": 1.804, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 0.49106590186935173, |
|
"grad_norm": 0.19431902468204498, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8013, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.4917536692389166, |
|
"grad_norm": 0.1693229377269745, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8004, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 0.49244143660848155, |
|
"grad_norm": 0.19499187171459198, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8035, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.4931292039780465, |
|
"grad_norm": 0.16046124696731567, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8062, |
|
"step": 35850 |
|
}, |
|
{ |
|
"epoch": 0.4938169713476114, |
|
"grad_norm": 0.17743340134620667, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8038, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.4945047387171763, |
|
"grad_norm": 0.20568375289440155, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8039, |
|
"step": 35950 |
|
}, |
|
{ |
|
"epoch": 0.4951925060867412, |
|
"grad_norm": 0.1706654578447342, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8022, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.49588027345630614, |
|
"grad_norm": 0.17956335842609406, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8038, |
|
"step": 36050 |
|
}, |
|
{ |
|
"epoch": 0.4965680408258711, |
|
"grad_norm": 0.1683945506811142, |
|
"learning_rate": 0.0001, |
|
"loss": 1.801, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.49725580819543597, |
|
"grad_norm": 0.16132575273513794, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7994, |
|
"step": 36150 |
|
}, |
|
{ |
|
"epoch": 0.4979435755650009, |
|
"grad_norm": 0.15439482033252716, |
|
"learning_rate": 0.0001, |
|
"loss": 1.798, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.4986313429345658, |
|
"grad_norm": 0.17427167296409607, |
|
"learning_rate": 0.0001, |
|
"loss": 1.803, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 0.49931911030413073, |
|
"grad_norm": 0.1826677918434143, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8041, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.5000068776736957, |
|
"grad_norm": 0.1664198338985443, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8007, |
|
"step": 36350 |
|
}, |
|
{ |
|
"epoch": 0.5006946450432606, |
|
"grad_norm": 0.19743186235427856, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8009, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.5013824124128254, |
|
"grad_norm": 0.17416580021381378, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8033, |
|
"step": 36450 |
|
}, |
|
{ |
|
"epoch": 0.5020701797823904, |
|
"grad_norm": 0.16447678208351135, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8027, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.5027579471519553, |
|
"grad_norm": 0.19978569447994232, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8027, |
|
"step": 36550 |
|
}, |
|
{ |
|
"epoch": 0.5034457145215202, |
|
"grad_norm": 0.1768701672554016, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8018, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.5041334818910852, |
|
"grad_norm": 0.17458416521549225, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8031, |
|
"step": 36650 |
|
}, |
|
{ |
|
"epoch": 0.5048212492606501, |
|
"grad_norm": 0.15409618616104126, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8043, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.505509016630215, |
|
"grad_norm": 0.20529916882514954, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8026, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 0.50619678399978, |
|
"grad_norm": 0.1579432338476181, |
|
"learning_rate": 0.0001, |
|
"loss": 1.806, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.5068845513693448, |
|
"grad_norm": 0.16803112626075745, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8027, |
|
"step": 36850 |
|
}, |
|
{ |
|
"epoch": 0.5075723187389097, |
|
"grad_norm": 0.19382716715335846, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8029, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.5082600861084746, |
|
"grad_norm": 0.17823243141174316, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8035, |
|
"step": 36950 |
|
}, |
|
{ |
|
"epoch": 0.5089478534780396, |
|
"grad_norm": 0.1742970496416092, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8033, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.5096356208476045, |
|
"grad_norm": 0.17236186563968658, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8062, |
|
"step": 37050 |
|
}, |
|
{ |
|
"epoch": 0.5103233882171694, |
|
"grad_norm": 0.1705719381570816, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8052, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.5110111555867344, |
|
"grad_norm": 0.19941222667694092, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8011, |
|
"step": 37150 |
|
}, |
|
{ |
|
"epoch": 0.5116989229562993, |
|
"grad_norm": 0.16263477504253387, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8026, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.5123866903258641, |
|
"grad_norm": 0.15199637413024902, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8001, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 0.5130744576954291, |
|
"grad_norm": 0.16797873377799988, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8016, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.513762225064994, |
|
"grad_norm": 0.16336190700531006, |
|
"learning_rate": 0.0001, |
|
"loss": 1.798, |
|
"step": 37350 |
|
}, |
|
{ |
|
"epoch": 0.5144499924345589, |
|
"grad_norm": 0.16497831046581268, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8001, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.5151377598041239, |
|
"grad_norm": 0.1712917536497116, |
|
"learning_rate": 0.0001, |
|
"loss": 1.804, |
|
"step": 37450 |
|
}, |
|
{ |
|
"epoch": 0.5158255271736888, |
|
"grad_norm": 0.16597513854503632, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8019, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.5165132945432537, |
|
"grad_norm": 0.15661810338497162, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8043, |
|
"step": 37550 |
|
}, |
|
{ |
|
"epoch": 0.5172010619128186, |
|
"grad_norm": 0.17713536322116852, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7973, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.5178888292823836, |
|
"grad_norm": 0.15873874723911285, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8003, |
|
"step": 37650 |
|
}, |
|
{ |
|
"epoch": 0.5185765966519484, |
|
"grad_norm": 0.1784040331840515, |
|
"learning_rate": 0.0001, |
|
"loss": 1.798, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.5192643640215133, |
|
"grad_norm": 0.16135090589523315, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8082, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 0.5199521313910783, |
|
"grad_norm": 0.15565833449363708, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8006, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.5206398987606432, |
|
"grad_norm": 0.1711311787366867, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7975, |
|
"step": 37850 |
|
}, |
|
{ |
|
"epoch": 0.5213276661302081, |
|
"grad_norm": 0.17314565181732178, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7997, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.5220154334997731, |
|
"grad_norm": 0.1723901331424713, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8006, |
|
"step": 37950 |
|
}, |
|
{ |
|
"epoch": 0.522703200869338, |
|
"grad_norm": 0.15868623554706573, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8013, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.5233909682389029, |
|
"grad_norm": 0.17163942754268646, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7991, |
|
"step": 38050 |
|
}, |
|
{ |
|
"epoch": 0.5240787356084677, |
|
"grad_norm": 0.17622709274291992, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8027, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.5247665029780327, |
|
"grad_norm": 0.1616000235080719, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7993, |
|
"step": 38150 |
|
}, |
|
{ |
|
"epoch": 0.5254542703475976, |
|
"grad_norm": 0.1638936698436737, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7978, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.5261420377171625, |
|
"grad_norm": 0.1706729531288147, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7999, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 0.5268298050867275, |
|
"grad_norm": 0.2048814296722412, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7987, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.5275175724562924, |
|
"grad_norm": 0.15826106071472168, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8022, |
|
"step": 38350 |
|
}, |
|
{ |
|
"epoch": 0.5282053398258573, |
|
"grad_norm": 0.16068226099014282, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8032, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.5288931071954223, |
|
"grad_norm": 0.17855240404605865, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7994, |
|
"step": 38450 |
|
}, |
|
{ |
|
"epoch": 0.5295808745649871, |
|
"grad_norm": 0.16978466510772705, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8022, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.530268641934552, |
|
"grad_norm": 0.1745109260082245, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8008, |
|
"step": 38550 |
|
}, |
|
{ |
|
"epoch": 0.530956409304117, |
|
"grad_norm": 0.1952807605266571, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7977, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.5316441766736819, |
|
"grad_norm": 0.1846735179424286, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8033, |
|
"step": 38650 |
|
}, |
|
{ |
|
"epoch": 0.5323319440432468, |
|
"grad_norm": 0.17474836111068726, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8034, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.5330197114128117, |
|
"grad_norm": 0.1729106903076172, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8043, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 0.5337074787823767, |
|
"grad_norm": 0.18584811687469482, |
|
"learning_rate": 0.0001, |
|
"loss": 1.805, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.5343952461519416, |
|
"grad_norm": 0.15596157312393188, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8014, |
|
"step": 38850 |
|
}, |
|
{ |
|
"epoch": 0.5350830135215064, |
|
"grad_norm": 0.15528340637683868, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7969, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.5357707808910714, |
|
"grad_norm": 0.1738685965538025, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8003, |
|
"step": 38950 |
|
}, |
|
{ |
|
"epoch": 0.5364585482606363, |
|
"grad_norm": 0.1620347946882248, |
|
"learning_rate": 0.0001, |
|
"loss": 1.796, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.5371463156302012, |
|
"grad_norm": 0.1705981343984604, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8008, |
|
"step": 39050 |
|
}, |
|
{ |
|
"epoch": 0.5378340829997662, |
|
"grad_norm": 0.16167068481445312, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8037, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.5385218503693311, |
|
"grad_norm": 0.15977101027965546, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8043, |
|
"step": 39150 |
|
}, |
|
{ |
|
"epoch": 0.539209617738896, |
|
"grad_norm": 0.1699797809123993, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8025, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.5398973851084609, |
|
"grad_norm": 0.17108047008514404, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7999, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 0.5405851524780259, |
|
"grad_norm": 0.1756991147994995, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8001, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.5412729198475907, |
|
"grad_norm": 0.1716366708278656, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7987, |
|
"step": 39350 |
|
}, |
|
{ |
|
"epoch": 0.5419606872171556, |
|
"grad_norm": 0.16876575350761414, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8013, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.5426484545867206, |
|
"grad_norm": 0.1650577336549759, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8001, |
|
"step": 39450 |
|
}, |
|
{ |
|
"epoch": 0.5433362219562855, |
|
"grad_norm": 0.17242754995822906, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8006, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.5440239893258504, |
|
"grad_norm": 0.16941705346107483, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7995, |
|
"step": 39550 |
|
}, |
|
{ |
|
"epoch": 0.5447117566954154, |
|
"grad_norm": 0.21036018431186676, |
|
"learning_rate": 0.0001, |
|
"loss": 1.802, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.5453995240649803, |
|
"grad_norm": 0.16824571788311005, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7992, |
|
"step": 39650 |
|
}, |
|
{ |
|
"epoch": 0.5460872914345452, |
|
"grad_norm": 0.162497416138649, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7978, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.5467750588041101, |
|
"grad_norm": 0.18297506868839264, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7968, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 0.547462826173675, |
|
"grad_norm": 0.15444135665893555, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7942, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.5481505935432399, |
|
"grad_norm": 0.17254306375980377, |
|
"learning_rate": 0.0001, |
|
"loss": 1.797, |
|
"step": 39850 |
|
}, |
|
{ |
|
"epoch": 0.5488383609128048, |
|
"grad_norm": 0.18030798435211182, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8008, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.5495261282823698, |
|
"grad_norm": 0.18069452047348022, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7988, |
|
"step": 39950 |
|
}, |
|
{ |
|
"epoch": 0.5502138956519347, |
|
"grad_norm": 0.16256502270698547, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8019, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.5509016630214996, |
|
"grad_norm": 0.16416381299495697, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7976, |
|
"step": 40050 |
|
}, |
|
{ |
|
"epoch": 0.5515894303910646, |
|
"grad_norm": 0.1743890941143036, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7966, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.5522771977606294, |
|
"grad_norm": 0.1875494122505188, |
|
"learning_rate": 0.0001, |
|
"loss": 1.799, |
|
"step": 40150 |
|
}, |
|
{ |
|
"epoch": 0.5529649651301943, |
|
"grad_norm": 0.18323060870170593, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7968, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.5536527324997593, |
|
"grad_norm": 0.1552455574274063, |
|
"learning_rate": 0.0001, |
|
"loss": 1.799, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 0.5543404998693242, |
|
"grad_norm": 0.1685846745967865, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7989, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.5550282672388891, |
|
"grad_norm": 0.16371703147888184, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7943, |
|
"step": 40350 |
|
}, |
|
{ |
|
"epoch": 0.5557160346084541, |
|
"grad_norm": 0.17993508279323578, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7972, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.556403801978019, |
|
"grad_norm": 0.17061980068683624, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7954, |
|
"step": 40450 |
|
}, |
|
{ |
|
"epoch": 0.5570915693475839, |
|
"grad_norm": 0.17588096857070923, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7975, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.5577793367171487, |
|
"grad_norm": 0.16484741866588593, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7959, |
|
"step": 40550 |
|
}, |
|
{ |
|
"epoch": 0.5584671040867137, |
|
"grad_norm": 0.1812593787908554, |
|
"learning_rate": 0.0001, |
|
"loss": 1.801, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.5591548714562786, |
|
"grad_norm": 0.17755167186260223, |
|
"learning_rate": 0.0001, |
|
"loss": 1.797, |
|
"step": 40650 |
|
}, |
|
{ |
|
"epoch": 0.5598426388258435, |
|
"grad_norm": 0.16877087950706482, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7975, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.5605304061954085, |
|
"grad_norm": 0.15780018270015717, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7967, |
|
"step": 40750 |
|
}, |
|
{ |
|
"epoch": 0.5612181735649734, |
|
"grad_norm": 0.15145239233970642, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7988, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.5619059409345383, |
|
"grad_norm": 0.18385986983776093, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7965, |
|
"step": 40850 |
|
}, |
|
{ |
|
"epoch": 0.5625937083041033, |
|
"grad_norm": 0.15375161170959473, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7946, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.5632814756736682, |
|
"grad_norm": 0.15694858133792877, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7989, |
|
"step": 40950 |
|
}, |
|
{ |
|
"epoch": 0.563969243043233, |
|
"grad_norm": 0.1538461446762085, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7965, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.5646570104127979, |
|
"grad_norm": 0.16211877763271332, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7931, |
|
"step": 41050 |
|
}, |
|
{ |
|
"epoch": 0.5653447777823629, |
|
"grad_norm": 0.1737697869539261, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7972, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.5660325451519278, |
|
"grad_norm": 0.1610105037689209, |
|
"learning_rate": 0.0001, |
|
"loss": 1.798, |
|
"step": 41150 |
|
}, |
|
{ |
|
"epoch": 0.5667203125214927, |
|
"grad_norm": 0.1762542873620987, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7991, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.5674080798910577, |
|
"grad_norm": 0.16195493936538696, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7972, |
|
"step": 41250 |
|
}, |
|
{ |
|
"epoch": 0.5680958472606226, |
|
"grad_norm": 0.18047676980495453, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7962, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.5687836146301875, |
|
"grad_norm": 0.18760687112808228, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8, |
|
"step": 41350 |
|
}, |
|
{ |
|
"epoch": 0.5694713819997524, |
|
"grad_norm": 0.17012238502502441, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7969, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.5701591493693173, |
|
"grad_norm": 0.1699533313512802, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7953, |
|
"step": 41450 |
|
}, |
|
{ |
|
"epoch": 0.5708469167388822, |
|
"grad_norm": 0.16422894597053528, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7995, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.5715346841084472, |
|
"grad_norm": 0.17526569962501526, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7967, |
|
"step": 41550 |
|
}, |
|
{ |
|
"epoch": 0.5722224514780121, |
|
"grad_norm": 0.158601313829422, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8006, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.572910218847577, |
|
"grad_norm": 0.1562766283750534, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7969, |
|
"step": 41650 |
|
}, |
|
{ |
|
"epoch": 0.5735979862171419, |
|
"grad_norm": 0.15490677952766418, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8017, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.5742857535867069, |
|
"grad_norm": 0.17004509270191193, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7958, |
|
"step": 41750 |
|
}, |
|
{ |
|
"epoch": 0.5749735209562717, |
|
"grad_norm": 0.17213889956474304, |
|
"learning_rate": 0.0001, |
|
"loss": 1.797, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.5756612883258366, |
|
"grad_norm": 0.17541930079460144, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7972, |
|
"step": 41850 |
|
}, |
|
{ |
|
"epoch": 0.5763490556954016, |
|
"grad_norm": 0.18296034634113312, |
|
"learning_rate": 0.0001, |
|
"loss": 1.796, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.5770368230649665, |
|
"grad_norm": 0.1777525097131729, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7959, |
|
"step": 41950 |
|
}, |
|
{ |
|
"epoch": 0.5777245904345314, |
|
"grad_norm": 0.17678572237491608, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7989, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.5784123578040964, |
|
"grad_norm": 0.1763673573732376, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8004, |
|
"step": 42050 |
|
}, |
|
{ |
|
"epoch": 0.5791001251736613, |
|
"grad_norm": 0.18608896434307098, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7997, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.5797878925432262, |
|
"grad_norm": 0.1691625863313675, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7988, |
|
"step": 42150 |
|
}, |
|
{ |
|
"epoch": 0.580475659912791, |
|
"grad_norm": 0.1609441488981247, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7993, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.581163427282356, |
|
"grad_norm": 0.15776963531970978, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7994, |
|
"step": 42250 |
|
}, |
|
{ |
|
"epoch": 0.5818511946519209, |
|
"grad_norm": 0.20214344561100006, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7998, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.5825389620214858, |
|
"grad_norm": 0.18112723529338837, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8, |
|
"step": 42350 |
|
}, |
|
{ |
|
"epoch": 0.5832267293910508, |
|
"grad_norm": 0.1543450802564621, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7982, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.5839144967606157, |
|
"grad_norm": 0.15315985679626465, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7995, |
|
"step": 42450 |
|
}, |
|
{ |
|
"epoch": 0.5846022641301806, |
|
"grad_norm": 0.16166909039020538, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7995, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.5852900314997456, |
|
"grad_norm": 0.15933014452457428, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7968, |
|
"step": 42550 |
|
}, |
|
{ |
|
"epoch": 0.5859777988693105, |
|
"grad_norm": 0.15434689819812775, |
|
"learning_rate": 0.0001, |
|
"loss": 1.797, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.5866655662388753, |
|
"grad_norm": 0.1875755488872528, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7964, |
|
"step": 42650 |
|
}, |
|
{ |
|
"epoch": 0.5873533336084403, |
|
"grad_norm": 0.15559327602386475, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7997, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.5880411009780052, |
|
"grad_norm": 0.16149398684501648, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7956, |
|
"step": 42750 |
|
}, |
|
{ |
|
"epoch": 0.5887288683475701, |
|
"grad_norm": 0.1777992695569992, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7912, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.589416635717135, |
|
"grad_norm": 0.15934714674949646, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7989, |
|
"step": 42850 |
|
}, |
|
{ |
|
"epoch": 0.5901044030867, |
|
"grad_norm": 0.16847145557403564, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7997, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.5907921704562649, |
|
"grad_norm": 0.17410792410373688, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7999, |
|
"step": 42950 |
|
}, |
|
{ |
|
"epoch": 0.5914799378258297, |
|
"grad_norm": 0.18102861940860748, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7983, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.5921677051953947, |
|
"grad_norm": 0.1682325005531311, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7986, |
|
"step": 43050 |
|
}, |
|
{ |
|
"epoch": 0.5928554725649596, |
|
"grad_norm": 0.17732855677604675, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8004, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.5935432399345245, |
|
"grad_norm": 0.16327179968357086, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7969, |
|
"step": 43150 |
|
}, |
|
{ |
|
"epoch": 0.5942310073040895, |
|
"grad_norm": 0.1582539677619934, |
|
"learning_rate": 0.0001, |
|
"loss": 1.798, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.5949187746736544, |
|
"grad_norm": 0.14965754747390747, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7986, |
|
"step": 43250 |
|
}, |
|
{ |
|
"epoch": 0.5956065420432193, |
|
"grad_norm": 0.1617211103439331, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7938, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.5962943094127843, |
|
"grad_norm": 0.17458325624465942, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7978, |
|
"step": 43350 |
|
}, |
|
{ |
|
"epoch": 0.5969820767823492, |
|
"grad_norm": 0.1668146252632141, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7983, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.597669844151914, |
|
"grad_norm": 0.15414200723171234, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7989, |
|
"step": 43450 |
|
}, |
|
{ |
|
"epoch": 0.5983576115214789, |
|
"grad_norm": 0.15912353992462158, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7964, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.5990453788910439, |
|
"grad_norm": 0.15936636924743652, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7944, |
|
"step": 43550 |
|
}, |
|
{ |
|
"epoch": 0.5997331462606088, |
|
"grad_norm": 0.17340709269046783, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7912, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.6004209136301737, |
|
"grad_norm": 0.18960115313529968, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7946, |
|
"step": 43650 |
|
}, |
|
{ |
|
"epoch": 0.6011086809997387, |
|
"grad_norm": 0.17091485857963562, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7998, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.6017964483693036, |
|
"grad_norm": 0.17222945392131805, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8016, |
|
"step": 43750 |
|
}, |
|
{ |
|
"epoch": 0.6024842157388685, |
|
"grad_norm": 0.1608862429857254, |
|
"learning_rate": 0.0001, |
|
"loss": 1.794, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.6031719831084335, |
|
"grad_norm": 0.16626954078674316, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7971, |
|
"step": 43850 |
|
}, |
|
{ |
|
"epoch": 0.6038597504779983, |
|
"grad_norm": 0.1769898533821106, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7992, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.6045475178475632, |
|
"grad_norm": 0.1665075570344925, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7976, |
|
"step": 43950 |
|
}, |
|
{ |
|
"epoch": 0.6052352852171281, |
|
"grad_norm": 0.1957935094833374, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7972, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.6059230525866931, |
|
"grad_norm": 0.20066794753074646, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7976, |
|
"step": 44050 |
|
}, |
|
{ |
|
"epoch": 0.606610819956258, |
|
"grad_norm": 0.16102181375026703, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7942, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.6072985873258229, |
|
"grad_norm": 0.16587640345096588, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7964, |
|
"step": 44150 |
|
}, |
|
{ |
|
"epoch": 0.6079863546953879, |
|
"grad_norm": 0.17338010668754578, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7955, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.6086741220649527, |
|
"grad_norm": 0.1979152411222458, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7964, |
|
"step": 44250 |
|
}, |
|
{ |
|
"epoch": 0.6093618894345176, |
|
"grad_norm": 0.16478174924850464, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8013, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.6100496568040826, |
|
"grad_norm": 0.16508819162845612, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7922, |
|
"step": 44350 |
|
}, |
|
{ |
|
"epoch": 0.6107374241736475, |
|
"grad_norm": 0.15964439511299133, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7975, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.6114251915432124, |
|
"grad_norm": 0.18116386234760284, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7972, |
|
"step": 44450 |
|
}, |
|
{ |
|
"epoch": 0.6121129589127774, |
|
"grad_norm": 0.1808495819568634, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7958, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.6128007262823423, |
|
"grad_norm": 0.1634376347064972, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7931, |
|
"step": 44550 |
|
}, |
|
{ |
|
"epoch": 0.6134884936519072, |
|
"grad_norm": 0.15140944719314575, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7995, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.614176261021472, |
|
"grad_norm": 0.15988072752952576, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7957, |
|
"step": 44650 |
|
}, |
|
{ |
|
"epoch": 0.614864028391037, |
|
"grad_norm": 0.16280120611190796, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7986, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.6155517957606019, |
|
"grad_norm": 0.16643498837947845, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7975, |
|
"step": 44750 |
|
}, |
|
{ |
|
"epoch": 0.6162395631301668, |
|
"grad_norm": 0.151467427611351, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7972, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.6169273304997318, |
|
"grad_norm": 0.1621852070093155, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7948, |
|
"step": 44850 |
|
}, |
|
{ |
|
"epoch": 0.6176150978692967, |
|
"grad_norm": 0.1828535795211792, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7939, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.6183028652388616, |
|
"grad_norm": 0.1630941480398178, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7987, |
|
"step": 44950 |
|
}, |
|
{ |
|
"epoch": 0.6189906326084266, |
|
"grad_norm": 0.1701328009366989, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7955, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.6196783999779915, |
|
"grad_norm": 0.16631458699703217, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7985, |
|
"step": 45050 |
|
}, |
|
{ |
|
"epoch": 0.6203661673475563, |
|
"grad_norm": 0.17133264243602753, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7946, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.6210539347171212, |
|
"grad_norm": 0.19388112425804138, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7944, |
|
"step": 45150 |
|
}, |
|
{ |
|
"epoch": 0.6217417020866862, |
|
"grad_norm": 0.1769258826971054, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7937, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.6224294694562511, |
|
"grad_norm": 0.21986328065395355, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7946, |
|
"step": 45250 |
|
}, |
|
{ |
|
"epoch": 0.623117236825816, |
|
"grad_norm": 0.1711747795343399, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7923, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.623805004195381, |
|
"grad_norm": 0.1730772852897644, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7976, |
|
"step": 45350 |
|
}, |
|
{ |
|
"epoch": 0.6244927715649459, |
|
"grad_norm": 0.16657279431819916, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7958, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.6251805389345108, |
|
"grad_norm": 0.15675725042819977, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7931, |
|
"step": 45450 |
|
}, |
|
{ |
|
"epoch": 0.6258683063040757, |
|
"grad_norm": 0.17763769626617432, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7972, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.6265560736736406, |
|
"grad_norm": 0.1630527824163437, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7948, |
|
"step": 45550 |
|
}, |
|
{ |
|
"epoch": 0.6272438410432055, |
|
"grad_norm": 0.16628991067409515, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7959, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.6279316084127705, |
|
"grad_norm": 0.1589209884405136, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7949, |
|
"step": 45650 |
|
}, |
|
{ |
|
"epoch": 0.6286193757823354, |
|
"grad_norm": 0.17715197801589966, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7971, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.6293071431519003, |
|
"grad_norm": 0.1824561059474945, |
|
"learning_rate": 0.0001, |
|
"loss": 1.795, |
|
"step": 45750 |
|
}, |
|
{ |
|
"epoch": 0.6299949105214652, |
|
"grad_norm": 0.16866008937358856, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7957, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.6306826778910302, |
|
"grad_norm": 0.14337721467018127, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7937, |
|
"step": 45850 |
|
}, |
|
{ |
|
"epoch": 0.631370445260595, |
|
"grad_norm": 0.15916399657726288, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7938, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.6320582126301599, |
|
"grad_norm": 0.1653524488210678, |
|
"learning_rate": 0.0001, |
|
"loss": 1.795, |
|
"step": 45950 |
|
}, |
|
{ |
|
"epoch": 0.6327459799997249, |
|
"grad_norm": 0.1588210016489029, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7963, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.6334337473692898, |
|
"grad_norm": 0.16008345782756805, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7978, |
|
"step": 46050 |
|
}, |
|
{ |
|
"epoch": 0.6341215147388547, |
|
"grad_norm": 0.16054043173789978, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7914, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.6348092821084197, |
|
"grad_norm": 0.19745290279388428, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7938, |
|
"step": 46150 |
|
}, |
|
{ |
|
"epoch": 0.6354970494779846, |
|
"grad_norm": 0.18955908715724945, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7948, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.6361848168475495, |
|
"grad_norm": 0.16962236166000366, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7911, |
|
"step": 46250 |
|
}, |
|
{ |
|
"epoch": 0.6368725842171145, |
|
"grad_norm": 0.17200341820716858, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7935, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.6375603515866793, |
|
"grad_norm": 0.17781908810138702, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7905, |
|
"step": 46350 |
|
}, |
|
{ |
|
"epoch": 0.6382481189562442, |
|
"grad_norm": 0.17602622509002686, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7945, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.6389358863258091, |
|
"grad_norm": 0.1686919629573822, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7892, |
|
"step": 46450 |
|
}, |
|
{ |
|
"epoch": 0.6396236536953741, |
|
"grad_norm": 0.15013763308525085, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7969, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.640311421064939, |
|
"grad_norm": 0.16534103453159332, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7943, |
|
"step": 46550 |
|
}, |
|
{ |
|
"epoch": 0.6409991884345039, |
|
"grad_norm": 0.16527748107910156, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7904, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.6416869558040689, |
|
"grad_norm": 0.15024395287036896, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7944, |
|
"step": 46650 |
|
}, |
|
{ |
|
"epoch": 0.6423747231736338, |
|
"grad_norm": 0.17082852125167847, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7942, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.6430624905431986, |
|
"grad_norm": 0.1649017482995987, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7936, |
|
"step": 46750 |
|
}, |
|
{ |
|
"epoch": 0.6437502579127636, |
|
"grad_norm": 0.16045525670051575, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7913, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.6444380252823285, |
|
"grad_norm": 0.18290746212005615, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7898, |
|
"step": 46850 |
|
}, |
|
{ |
|
"epoch": 0.6451257926518934, |
|
"grad_norm": 0.14731939136981964, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7934, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.6458135600214583, |
|
"grad_norm": 0.16072627902030945, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7933, |
|
"step": 46950 |
|
}, |
|
{ |
|
"epoch": 0.6465013273910233, |
|
"grad_norm": 0.14942970871925354, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7944, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.6471890947605882, |
|
"grad_norm": 0.14922235906124115, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7953, |
|
"step": 47050 |
|
}, |
|
{ |
|
"epoch": 0.647876862130153, |
|
"grad_norm": 0.17120474576950073, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7955, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.648564629499718, |
|
"grad_norm": 0.17423823475837708, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7919, |
|
"step": 47150 |
|
}, |
|
{ |
|
"epoch": 0.6492523968692829, |
|
"grad_norm": 0.1567763239145279, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7934, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.6499401642388478, |
|
"grad_norm": 0.15817411243915558, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7928, |
|
"step": 47250 |
|
}, |
|
{ |
|
"epoch": 0.6506279316084128, |
|
"grad_norm": 0.1748141348361969, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7884, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.6513156989779777, |
|
"grad_norm": 0.2045951634645462, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7978, |
|
"step": 47350 |
|
}, |
|
{ |
|
"epoch": 0.6520034663475426, |
|
"grad_norm": 0.17650052905082703, |
|
"learning_rate": 0.0001, |
|
"loss": 1.792, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.6526912337171076, |
|
"grad_norm": 0.17905278503894806, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7958, |
|
"step": 47450 |
|
}, |
|
{ |
|
"epoch": 0.6533790010866725, |
|
"grad_norm": 0.1599511355161667, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7912, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.6540667684562373, |
|
"grad_norm": 0.1584351658821106, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7949, |
|
"step": 47550 |
|
}, |
|
{ |
|
"epoch": 0.6547545358258022, |
|
"grad_norm": 0.17251476645469666, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7913, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.6554423031953672, |
|
"grad_norm": 0.17718471586704254, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7934, |
|
"step": 47650 |
|
}, |
|
{ |
|
"epoch": 0.6561300705649321, |
|
"grad_norm": 0.15196654200553894, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7894, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.656817837934497, |
|
"grad_norm": 0.17444145679473877, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7907, |
|
"step": 47750 |
|
}, |
|
{ |
|
"epoch": 0.657505605304062, |
|
"grad_norm": 0.15149961411952972, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7959, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.6581933726736269, |
|
"grad_norm": 0.1591227501630783, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7907, |
|
"step": 47850 |
|
}, |
|
{ |
|
"epoch": 0.6588811400431918, |
|
"grad_norm": 0.20135171711444855, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7963, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.6595689074127568, |
|
"grad_norm": 0.16523614525794983, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7968, |
|
"step": 47950 |
|
}, |
|
{ |
|
"epoch": 0.6602566747823216, |
|
"grad_norm": 0.15842151641845703, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7897, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.6609444421518865, |
|
"grad_norm": 0.160832479596138, |
|
"learning_rate": 0.0001, |
|
"loss": 1.796, |
|
"step": 48050 |
|
}, |
|
{ |
|
"epoch": 0.6616322095214515, |
|
"grad_norm": 0.16063477098941803, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7903, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.6623199768910164, |
|
"grad_norm": 0.1595107465982437, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7953, |
|
"step": 48150 |
|
}, |
|
{ |
|
"epoch": 0.6630077442605813, |
|
"grad_norm": 0.18313910067081451, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7957, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.6636955116301462, |
|
"grad_norm": 0.17561380565166473, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7906, |
|
"step": 48250 |
|
}, |
|
{ |
|
"epoch": 0.6643832789997112, |
|
"grad_norm": 0.18327072262763977, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7916, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.665071046369276, |
|
"grad_norm": 0.16745221614837646, |
|
"learning_rate": 0.0001, |
|
"loss": 1.791, |
|
"step": 48350 |
|
}, |
|
{ |
|
"epoch": 0.6657588137388409, |
|
"grad_norm": 0.16286319494247437, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7942, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.6664465811084059, |
|
"grad_norm": 0.15864308178424835, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7953, |
|
"step": 48450 |
|
}, |
|
{ |
|
"epoch": 0.6671343484779708, |
|
"grad_norm": 0.16778843104839325, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7945, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.6678221158475357, |
|
"grad_norm": 0.1448727399110794, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7942, |
|
"step": 48550 |
|
}, |
|
{ |
|
"epoch": 0.6685098832171007, |
|
"grad_norm": 0.16745643317699432, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7903, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.6691976505866656, |
|
"grad_norm": 0.1633836030960083, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7938, |
|
"step": 48650 |
|
}, |
|
{ |
|
"epoch": 0.6698854179562305, |
|
"grad_norm": 0.15037505328655243, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7963, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.6705731853257954, |
|
"grad_norm": 0.1707869917154312, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7895, |
|
"step": 48750 |
|
}, |
|
{ |
|
"epoch": 0.6712609526953603, |
|
"grad_norm": 0.17392534017562866, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7926, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.6719487200649252, |
|
"grad_norm": 0.1588422805070877, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7958, |
|
"step": 48850 |
|
}, |
|
{ |
|
"epoch": 0.6726364874344901, |
|
"grad_norm": 0.1751549243927002, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7931, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.6733242548040551, |
|
"grad_norm": 0.1722249686717987, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7892, |
|
"step": 48950 |
|
}, |
|
{ |
|
"epoch": 0.67401202217362, |
|
"grad_norm": 0.1673288643360138, |
|
"learning_rate": 0.0001, |
|
"loss": 1.793, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.6746997895431849, |
|
"grad_norm": 0.1552770733833313, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7916, |
|
"step": 49050 |
|
}, |
|
{ |
|
"epoch": 0.6753875569127499, |
|
"grad_norm": 0.15788178145885468, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7981, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.6760753242823148, |
|
"grad_norm": 0.17959725856781006, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7949, |
|
"step": 49150 |
|
}, |
|
{ |
|
"epoch": 0.6767630916518796, |
|
"grad_norm": 0.1584416925907135, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7946, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.6774508590214446, |
|
"grad_norm": 0.1645151674747467, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7916, |
|
"step": 49250 |
|
}, |
|
{ |
|
"epoch": 0.6781386263910095, |
|
"grad_norm": 0.1522347778081894, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7891, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.6788263937605744, |
|
"grad_norm": 0.16095298528671265, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7927, |
|
"step": 49350 |
|
}, |
|
{ |
|
"epoch": 0.6795141611301393, |
|
"grad_norm": 0.15317974984645844, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7947, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.6802019284997043, |
|
"grad_norm": 0.16854670643806458, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7929, |
|
"step": 49450 |
|
}, |
|
{ |
|
"epoch": 0.6808896958692692, |
|
"grad_norm": 0.1702488660812378, |
|
"learning_rate": 0.0001, |
|
"loss": 1.791, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.6815774632388341, |
|
"grad_norm": 0.16388344764709473, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7894, |
|
"step": 49550 |
|
}, |
|
{ |
|
"epoch": 0.682265230608399, |
|
"grad_norm": 0.16601653397083282, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7949, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.6829529979779639, |
|
"grad_norm": 0.17910674214363098, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7875, |
|
"step": 49650 |
|
}, |
|
{ |
|
"epoch": 0.6836407653475288, |
|
"grad_norm": 0.15689565241336823, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7904, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.6843285327170938, |
|
"grad_norm": 0.15473750233650208, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7894, |
|
"step": 49750 |
|
}, |
|
{ |
|
"epoch": 0.6850163000866587, |
|
"grad_norm": 0.16794639825820923, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7934, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.6857040674562236, |
|
"grad_norm": 0.15183915197849274, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7887, |
|
"step": 49850 |
|
}, |
|
{ |
|
"epoch": 0.6863918348257885, |
|
"grad_norm": 0.15028232336044312, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7929, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.6870796021953535, |
|
"grad_norm": 0.16230390965938568, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7948, |
|
"step": 49950 |
|
}, |
|
{ |
|
"epoch": 0.6877673695649184, |
|
"grad_norm": 0.16958658397197723, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7932, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.6884551369344832, |
|
"grad_norm": 0.15662765502929688, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7904, |
|
"step": 50050 |
|
}, |
|
{ |
|
"epoch": 0.6891429043040482, |
|
"grad_norm": 0.17507807910442352, |
|
"learning_rate": 0.0001, |
|
"loss": 1.795, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.6898306716736131, |
|
"grad_norm": 0.16449585556983948, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7888, |
|
"step": 50150 |
|
}, |
|
{ |
|
"epoch": 0.690518439043178, |
|
"grad_norm": 0.17615753412246704, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7889, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.691206206412743, |
|
"grad_norm": 0.16010646522045135, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7932, |
|
"step": 50250 |
|
}, |
|
{ |
|
"epoch": 0.6918939737823079, |
|
"grad_norm": 0.14614787697792053, |
|
"learning_rate": 0.0001, |
|
"loss": 1.792, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.6925817411518728, |
|
"grad_norm": 0.19960370659828186, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7907, |
|
"step": 50350 |
|
}, |
|
{ |
|
"epoch": 0.6932695085214378, |
|
"grad_norm": 0.16230808198451996, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7855, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.6939572758910026, |
|
"grad_norm": 0.16344518959522247, |
|
"learning_rate": 0.0001, |
|
"loss": 1.791, |
|
"step": 50450 |
|
}, |
|
{ |
|
"epoch": 0.6946450432605675, |
|
"grad_norm": 0.16584964096546173, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7916, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.6953328106301324, |
|
"grad_norm": 0.15551120042800903, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7948, |
|
"step": 50550 |
|
}, |
|
{ |
|
"epoch": 0.6960205779996974, |
|
"grad_norm": 0.1697503924369812, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7917, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.6967083453692623, |
|
"grad_norm": 0.15577536821365356, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7905, |
|
"step": 50650 |
|
}, |
|
{ |
|
"epoch": 0.6973961127388272, |
|
"grad_norm": 0.17658278346061707, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7884, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.6980838801083922, |
|
"grad_norm": 0.16718824207782745, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7936, |
|
"step": 50750 |
|
}, |
|
{ |
|
"epoch": 0.6987716474779571, |
|
"grad_norm": 0.16996939480304718, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7919, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.6994594148475219, |
|
"grad_norm": 0.15299175679683685, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7919, |
|
"step": 50850 |
|
}, |
|
{ |
|
"epoch": 0.7001471822170869, |
|
"grad_norm": 0.1672915816307068, |
|
"learning_rate": 0.0001, |
|
"loss": 1.795, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.7008349495866518, |
|
"grad_norm": 0.17287658154964447, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7877, |
|
"step": 50950 |
|
}, |
|
{ |
|
"epoch": 0.7015227169562167, |
|
"grad_norm": 0.16447900235652924, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7915, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.7022104843257817, |
|
"grad_norm": 0.16016733646392822, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7911, |
|
"step": 51050 |
|
}, |
|
{ |
|
"epoch": 0.7028982516953466, |
|
"grad_norm": 0.15329506993293762, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7915, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.7035860190649115, |
|
"grad_norm": 0.1695086658000946, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7925, |
|
"step": 51150 |
|
}, |
|
{ |
|
"epoch": 0.7042737864344764, |
|
"grad_norm": 0.15667758882045746, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7908, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.7049615538040414, |
|
"grad_norm": 0.1636906862258911, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7911, |
|
"step": 51250 |
|
}, |
|
{ |
|
"epoch": 0.7056493211736062, |
|
"grad_norm": 0.16701051592826843, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7929, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.7063370885431711, |
|
"grad_norm": 0.17164082825183868, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7922, |
|
"step": 51350 |
|
}, |
|
{ |
|
"epoch": 0.7070248559127361, |
|
"grad_norm": 0.18162649869918823, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7892, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.707712623282301, |
|
"grad_norm": 0.1521824300289154, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7937, |
|
"step": 51450 |
|
}, |
|
{ |
|
"epoch": 0.7084003906518659, |
|
"grad_norm": 0.168669655919075, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7873, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.7090881580214309, |
|
"grad_norm": 0.17441484332084656, |
|
"learning_rate": 0.0001, |
|
"loss": 1.79, |
|
"step": 51550 |
|
}, |
|
{ |
|
"epoch": 0.7097759253909958, |
|
"grad_norm": 0.1877586394548416, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7927, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.7104636927605607, |
|
"grad_norm": 0.16195935010910034, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7962, |
|
"step": 51650 |
|
}, |
|
{ |
|
"epoch": 0.7111514601301255, |
|
"grad_norm": 0.16282670199871063, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7939, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.7118392274996905, |
|
"grad_norm": 0.15550565719604492, |
|
"learning_rate": 0.0001, |
|
"loss": 1.793, |
|
"step": 51750 |
|
}, |
|
{ |
|
"epoch": 0.7125269948692554, |
|
"grad_norm": 0.16963760554790497, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7921, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.7132147622388203, |
|
"grad_norm": 0.1632436364889145, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7943, |
|
"step": 51850 |
|
}, |
|
{ |
|
"epoch": 0.7139025296083853, |
|
"grad_norm": 0.15533354878425598, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7917, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.7145902969779502, |
|
"grad_norm": 0.15280106663703918, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7874, |
|
"step": 51950 |
|
}, |
|
{ |
|
"epoch": 0.7152780643475151, |
|
"grad_norm": 0.1561509668827057, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7918, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.7159658317170801, |
|
"grad_norm": 0.1560848206281662, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7927, |
|
"step": 52050 |
|
}, |
|
{ |
|
"epoch": 0.7166535990866449, |
|
"grad_norm": 0.1706065684556961, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7877, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.7173413664562098, |
|
"grad_norm": 0.16388699412345886, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7859, |
|
"step": 52150 |
|
}, |
|
{ |
|
"epoch": 0.7180291338257748, |
|
"grad_norm": 0.16502410173416138, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7899, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.7187169011953397, |
|
"grad_norm": 0.17022061347961426, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7881, |
|
"step": 52250 |
|
}, |
|
{ |
|
"epoch": 0.7194046685649046, |
|
"grad_norm": 0.17903153598308563, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7892, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.7200924359344695, |
|
"grad_norm": 0.15719935297966003, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7934, |
|
"step": 52350 |
|
}, |
|
{ |
|
"epoch": 0.7207802033040345, |
|
"grad_norm": 0.16321443021297455, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7914, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.7214679706735994, |
|
"grad_norm": 0.1724744439125061, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7905, |
|
"step": 52450 |
|
}, |
|
{ |
|
"epoch": 0.7221557380431642, |
|
"grad_norm": 0.16059927642345428, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7929, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.7228435054127292, |
|
"grad_norm": 0.17748789489269257, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7913, |
|
"step": 52550 |
|
}, |
|
{ |
|
"epoch": 0.7235312727822941, |
|
"grad_norm": 0.16190293431282043, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7956, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.724219040151859, |
|
"grad_norm": 0.1841738224029541, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7899, |
|
"step": 52650 |
|
}, |
|
{ |
|
"epoch": 0.724906807521424, |
|
"grad_norm": 0.15971702337265015, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7891, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.7255945748909889, |
|
"grad_norm": 0.15894858539104462, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7939, |
|
"step": 52750 |
|
}, |
|
{ |
|
"epoch": 0.7262823422605538, |
|
"grad_norm": 0.15041370689868927, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7885, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.7269701096301187, |
|
"grad_norm": 0.15757033228874207, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7881, |
|
"step": 52850 |
|
}, |
|
{ |
|
"epoch": 0.7276578769996837, |
|
"grad_norm": 0.16385579109191895, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7889, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.7283456443692485, |
|
"grad_norm": 0.15629428625106812, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7932, |
|
"step": 52950 |
|
}, |
|
{ |
|
"epoch": 0.7290334117388134, |
|
"grad_norm": 0.1573755145072937, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7926, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.7297211791083784, |
|
"grad_norm": 0.15800927579402924, |
|
"learning_rate": 0.0001, |
|
"loss": 1.789, |
|
"step": 53050 |
|
}, |
|
{ |
|
"epoch": 0.7304089464779433, |
|
"grad_norm": 0.16997511684894562, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7892, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.7310967138475082, |
|
"grad_norm": 0.1457889825105667, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7892, |
|
"step": 53150 |
|
}, |
|
{ |
|
"epoch": 0.7317844812170732, |
|
"grad_norm": 0.15250973403453827, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7892, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.7324722485866381, |
|
"grad_norm": 0.1561204344034195, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7915, |
|
"step": 53250 |
|
}, |
|
{ |
|
"epoch": 0.733160015956203, |
|
"grad_norm": 0.17602892220020294, |
|
"learning_rate": 0.0001, |
|
"loss": 1.789, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.7338477833257679, |
|
"grad_norm": 0.15751750767230988, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7924, |
|
"step": 53350 |
|
}, |
|
{ |
|
"epoch": 0.7345355506953328, |
|
"grad_norm": 0.1686706244945526, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7859, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.7352233180648977, |
|
"grad_norm": 0.15886232256889343, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7891, |
|
"step": 53450 |
|
}, |
|
{ |
|
"epoch": 0.7359110854344626, |
|
"grad_norm": 0.1548243910074234, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7887, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.7365988528040276, |
|
"grad_norm": 0.16160327196121216, |
|
"learning_rate": 0.0001, |
|
"loss": 1.792, |
|
"step": 53550 |
|
}, |
|
{ |
|
"epoch": 0.7372866201735925, |
|
"grad_norm": 0.1588127613067627, |
|
"learning_rate": 0.0001, |
|
"loss": 1.791, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.7379743875431574, |
|
"grad_norm": 0.1562395691871643, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7876, |
|
"step": 53650 |
|
}, |
|
{ |
|
"epoch": 0.7386621549127224, |
|
"grad_norm": 0.1463010013103485, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7903, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.7393499222822872, |
|
"grad_norm": 0.1688784807920456, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7874, |
|
"step": 53750 |
|
}, |
|
{ |
|
"epoch": 0.7400376896518521, |
|
"grad_norm": 0.16111525893211365, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7891, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.7407254570214171, |
|
"grad_norm": 0.15798266232013702, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7901, |
|
"step": 53850 |
|
}, |
|
{ |
|
"epoch": 0.741413224390982, |
|
"grad_norm": 0.1544068306684494, |
|
"learning_rate": 0.0001, |
|
"loss": 1.79, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.7421009917605469, |
|
"grad_norm": 0.16747315227985382, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7923, |
|
"step": 53950 |
|
}, |
|
{ |
|
"epoch": 0.7427887591301119, |
|
"grad_norm": 0.20277969539165497, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7932, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.7434765264996768, |
|
"grad_norm": 0.1490595042705536, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7899, |
|
"step": 54050 |
|
}, |
|
{ |
|
"epoch": 0.7441642938692417, |
|
"grad_norm": 0.15864817798137665, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7838, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.7448520612388065, |
|
"grad_norm": 0.17168639600276947, |
|
"learning_rate": 0.0001, |
|
"loss": 1.79, |
|
"step": 54150 |
|
}, |
|
{ |
|
"epoch": 0.7455398286083715, |
|
"grad_norm": 0.1612584888935089, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7894, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.7462275959779364, |
|
"grad_norm": 0.16638678312301636, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7852, |
|
"step": 54250 |
|
}, |
|
{ |
|
"epoch": 0.7469153633475013, |
|
"grad_norm": 0.16757947206497192, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7899, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.7476031307170663, |
|
"grad_norm": 0.17740657925605774, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7891, |
|
"step": 54350 |
|
}, |
|
{ |
|
"epoch": 0.7482908980866312, |
|
"grad_norm": 0.15608841180801392, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7864, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.7489786654561961, |
|
"grad_norm": 0.1486404538154602, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7895, |
|
"step": 54450 |
|
}, |
|
{ |
|
"epoch": 0.7496664328257611, |
|
"grad_norm": 0.17158234119415283, |
|
"learning_rate": 0.0001, |
|
"loss": 1.789, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.750354200195326, |
|
"grad_norm": 0.1535918265581131, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7858, |
|
"step": 54550 |
|
}, |
|
{ |
|
"epoch": 0.7510419675648908, |
|
"grad_norm": 0.17464052140712738, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7884, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.7517297349344557, |
|
"grad_norm": 0.15320485830307007, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7909, |
|
"step": 54650 |
|
}, |
|
{ |
|
"epoch": 0.7524175023040207, |
|
"grad_norm": 0.16376914083957672, |
|
"learning_rate": 0.0001, |
|
"loss": 1.789, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.7531052696735856, |
|
"grad_norm": 0.17047230899333954, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7886, |
|
"step": 54750 |
|
}, |
|
{ |
|
"epoch": 0.7537930370431505, |
|
"grad_norm": 0.1580251306295395, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7904, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.7544808044127155, |
|
"grad_norm": 0.16085964441299438, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7872, |
|
"step": 54850 |
|
}, |
|
{ |
|
"epoch": 0.7551685717822804, |
|
"grad_norm": 0.1530008316040039, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7909, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.7558563391518452, |
|
"grad_norm": 0.18514500558376312, |
|
"learning_rate": 0.0001, |
|
"loss": 1.789, |
|
"step": 54950 |
|
}, |
|
{ |
|
"epoch": 0.7565441065214102, |
|
"grad_norm": 0.16724203526973724, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7895, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.7572318738909751, |
|
"grad_norm": 0.17008638381958008, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7909, |
|
"step": 55050 |
|
}, |
|
{ |
|
"epoch": 0.75791964126054, |
|
"grad_norm": 0.15402346849441528, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7858, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.758607408630105, |
|
"grad_norm": 0.1750432401895523, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7898, |
|
"step": 55150 |
|
}, |
|
{ |
|
"epoch": 0.7592951759996699, |
|
"grad_norm": 0.18680183589458466, |
|
"learning_rate": 0.0001, |
|
"loss": 1.788, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.7599829433692348, |
|
"grad_norm": 0.16581743955612183, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7902, |
|
"step": 55250 |
|
}, |
|
{ |
|
"epoch": 0.7606707107387997, |
|
"grad_norm": 0.16159740090370178, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7843, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.7613584781083647, |
|
"grad_norm": 0.14381587505340576, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7918, |
|
"step": 55350 |
|
}, |
|
{ |
|
"epoch": 0.7620462454779295, |
|
"grad_norm": 0.15160152316093445, |
|
"learning_rate": 0.0001, |
|
"loss": 1.789, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.7627340128474944, |
|
"grad_norm": 0.16748382151126862, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7865, |
|
"step": 55450 |
|
}, |
|
{ |
|
"epoch": 0.7634217802170594, |
|
"grad_norm": 0.15434932708740234, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7894, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.7641095475866243, |
|
"grad_norm": 0.16281753778457642, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7909, |
|
"step": 55550 |
|
}, |
|
{ |
|
"epoch": 0.7647973149561892, |
|
"grad_norm": 0.1581009328365326, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7872, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.7654850823257542, |
|
"grad_norm": 0.16244924068450928, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7882, |
|
"step": 55650 |
|
}, |
|
{ |
|
"epoch": 0.7661728496953191, |
|
"grad_norm": 0.1727581024169922, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7894, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.766860617064884, |
|
"grad_norm": 0.15804524719715118, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7883, |
|
"step": 55750 |
|
}, |
|
{ |
|
"epoch": 0.7675483844344488, |
|
"grad_norm": 0.16742980480194092, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7883, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.7682361518040138, |
|
"grad_norm": 0.15518859028816223, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7877, |
|
"step": 55850 |
|
}, |
|
{ |
|
"epoch": 0.7689239191735787, |
|
"grad_norm": 0.14549891650676727, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7894, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.7696116865431436, |
|
"grad_norm": 0.15677410364151, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7868, |
|
"step": 55950 |
|
}, |
|
{ |
|
"epoch": 0.7702994539127086, |
|
"grad_norm": 0.1627907007932663, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7861, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.7709872212822735, |
|
"grad_norm": 0.17789112031459808, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7917, |
|
"step": 56050 |
|
}, |
|
{ |
|
"epoch": 0.7716749886518384, |
|
"grad_norm": 0.17732852697372437, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7885, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.7723627560214034, |
|
"grad_norm": 0.16175003349781036, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7847, |
|
"step": 56150 |
|
}, |
|
{ |
|
"epoch": 0.7730505233909682, |
|
"grad_norm": 0.16384829580783844, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7879, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.7737382907605331, |
|
"grad_norm": 0.18334250152111053, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7841, |
|
"step": 56250 |
|
}, |
|
{ |
|
"epoch": 0.7744260581300981, |
|
"grad_norm": 0.16775920987129211, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7844, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.775113825499663, |
|
"grad_norm": 0.15945740044116974, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7867, |
|
"step": 56350 |
|
}, |
|
{ |
|
"epoch": 0.7758015928692279, |
|
"grad_norm": 0.16826015710830688, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7874, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.7764893602387928, |
|
"grad_norm": 0.16733418405056, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7843, |
|
"step": 56450 |
|
}, |
|
{ |
|
"epoch": 0.7771771276083578, |
|
"grad_norm": 0.17716175317764282, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7852, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.7778648949779227, |
|
"grad_norm": 0.15145139396190643, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7864, |
|
"step": 56550 |
|
}, |
|
{ |
|
"epoch": 0.7785526623474875, |
|
"grad_norm": 0.1650010645389557, |
|
"learning_rate": 0.0001, |
|
"loss": 1.788, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.7792404297170525, |
|
"grad_norm": 0.15676827728748322, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7863, |
|
"step": 56650 |
|
}, |
|
{ |
|
"epoch": 0.7799281970866174, |
|
"grad_norm": 0.15251976251602173, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7894, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.7806159644561823, |
|
"grad_norm": 0.16107071936130524, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7872, |
|
"step": 56750 |
|
}, |
|
{ |
|
"epoch": 0.7813037318257473, |
|
"grad_norm": 0.16008871793746948, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7879, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.7819914991953122, |
|
"grad_norm": 0.1748703122138977, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7883, |
|
"step": 56850 |
|
}, |
|
{ |
|
"epoch": 0.7826792665648771, |
|
"grad_norm": 0.1847066432237625, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7878, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.7833670339344421, |
|
"grad_norm": 0.14105017483234406, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7872, |
|
"step": 56950 |
|
}, |
|
{ |
|
"epoch": 0.784054801304007, |
|
"grad_norm": 0.1463741511106491, |
|
"learning_rate": 0.0001, |
|
"loss": 1.784, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.7847425686735718, |
|
"grad_norm": 0.15982814133167267, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7904, |
|
"step": 57050 |
|
}, |
|
{ |
|
"epoch": 0.7854303360431367, |
|
"grad_norm": 0.15282031893730164, |
|
"learning_rate": 0.0001, |
|
"loss": 1.788, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.7861181034127017, |
|
"grad_norm": 0.16466231644153595, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7862, |
|
"step": 57150 |
|
}, |
|
{ |
|
"epoch": 0.7868058707822666, |
|
"grad_norm": 0.16176077723503113, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7883, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.7874936381518315, |
|
"grad_norm": 0.16768991947174072, |
|
"learning_rate": 0.0001, |
|
"loss": 1.791, |
|
"step": 57250 |
|
}, |
|
{ |
|
"epoch": 0.7881814055213965, |
|
"grad_norm": 0.15378397703170776, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7889, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.7888691728909614, |
|
"grad_norm": 0.16845440864562988, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7865, |
|
"step": 57350 |
|
}, |
|
{ |
|
"epoch": 0.7895569402605263, |
|
"grad_norm": 0.16859596967697144, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7893, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.7902447076300912, |
|
"grad_norm": 0.17096339166164398, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7842, |
|
"step": 57450 |
|
}, |
|
{ |
|
"epoch": 0.7909324749996561, |
|
"grad_norm": 0.19546246528625488, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7859, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.791620242369221, |
|
"grad_norm": 0.15690521895885468, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7905, |
|
"step": 57550 |
|
}, |
|
{ |
|
"epoch": 0.7923080097387859, |
|
"grad_norm": 0.15288680791854858, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7871, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.7929957771083509, |
|
"grad_norm": 0.15947267413139343, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7851, |
|
"step": 57650 |
|
}, |
|
{ |
|
"epoch": 0.7936835444779158, |
|
"grad_norm": 0.1813030242919922, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7833, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.7943713118474807, |
|
"grad_norm": 0.16709686815738678, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7908, |
|
"step": 57750 |
|
}, |
|
{ |
|
"epoch": 0.7950590792170457, |
|
"grad_norm": 0.19110731780529022, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7845, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.7957468465866105, |
|
"grad_norm": 0.15795393288135529, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7908, |
|
"step": 57850 |
|
}, |
|
{ |
|
"epoch": 0.7964346139561754, |
|
"grad_norm": 0.14493565261363983, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7893, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.7971223813257404, |
|
"grad_norm": 0.14182139933109283, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7883, |
|
"step": 57950 |
|
}, |
|
{ |
|
"epoch": 0.7978101486953053, |
|
"grad_norm": 0.14074084162712097, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7857, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.7984979160648702, |
|
"grad_norm": 0.1791408807039261, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7889, |
|
"step": 58050 |
|
}, |
|
{ |
|
"epoch": 0.7991856834344352, |
|
"grad_norm": 0.17944924533367157, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7884, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.7998734508040001, |
|
"grad_norm": 0.19336557388305664, |
|
"learning_rate": 0.0001, |
|
"loss": 1.786, |
|
"step": 58150 |
|
}, |
|
{ |
|
"epoch": 0.800561218173565, |
|
"grad_norm": 0.14197582006454468, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7834, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.8012489855431298, |
|
"grad_norm": 0.17862093448638916, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7859, |
|
"step": 58250 |
|
}, |
|
{ |
|
"epoch": 0.8019367529126948, |
|
"grad_norm": 0.15174590051174164, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7883, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.8026245202822597, |
|
"grad_norm": 0.15902046859264374, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7859, |
|
"step": 58350 |
|
}, |
|
{ |
|
"epoch": 0.8033122876518246, |
|
"grad_norm": 0.1593545824289322, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7871, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.8040000550213896, |
|
"grad_norm": 0.16780108213424683, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7892, |
|
"step": 58450 |
|
}, |
|
{ |
|
"epoch": 0.8046878223909545, |
|
"grad_norm": 0.16704651713371277, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7827, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.8053755897605194, |
|
"grad_norm": 0.20908869802951813, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7868, |
|
"step": 58550 |
|
}, |
|
{ |
|
"epoch": 0.8060633571300844, |
|
"grad_norm": 0.1484072208404541, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7905, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.8067511244996493, |
|
"grad_norm": 0.16092757880687714, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7849, |
|
"step": 58650 |
|
}, |
|
{ |
|
"epoch": 0.8074388918692141, |
|
"grad_norm": 0.15798570215702057, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7897, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.808126659238779, |
|
"grad_norm": 0.15388993918895721, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7874, |
|
"step": 58750 |
|
}, |
|
{ |
|
"epoch": 0.808814426608344, |
|
"grad_norm": 0.16136646270751953, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7866, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.8095021939779089, |
|
"grad_norm": 0.20280751585960388, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7868, |
|
"step": 58850 |
|
}, |
|
{ |
|
"epoch": 0.8101899613474738, |
|
"grad_norm": 0.16941416263580322, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7834, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.8108777287170388, |
|
"grad_norm": 0.1597299724817276, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7823, |
|
"step": 58950 |
|
}, |
|
{ |
|
"epoch": 0.8115654960866037, |
|
"grad_norm": 0.1581617146730423, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7902, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.8122532634561686, |
|
"grad_norm": 0.17084243893623352, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7873, |
|
"step": 59050 |
|
}, |
|
{ |
|
"epoch": 0.8129410308257335, |
|
"grad_norm": 0.16124476492404938, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7894, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.8136287981952984, |
|
"grad_norm": 0.15042969584465027, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7873, |
|
"step": 59150 |
|
}, |
|
{ |
|
"epoch": 0.8143165655648633, |
|
"grad_norm": 0.14492358267307281, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7836, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.8150043329344283, |
|
"grad_norm": 0.17020314931869507, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7859, |
|
"step": 59250 |
|
}, |
|
{ |
|
"epoch": 0.8156921003039932, |
|
"grad_norm": 0.1630934178829193, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7841, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.8163798676735581, |
|
"grad_norm": 0.17032647132873535, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7851, |
|
"step": 59350 |
|
}, |
|
{ |
|
"epoch": 0.817067635043123, |
|
"grad_norm": 0.15546603500843048, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7866, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.817755402412688, |
|
"grad_norm": 0.1688961237668991, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7858, |
|
"step": 59450 |
|
}, |
|
{ |
|
"epoch": 0.8184431697822528, |
|
"grad_norm": 0.15222899615764618, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7848, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.8191309371518177, |
|
"grad_norm": 0.15309302508831024, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7847, |
|
"step": 59550 |
|
}, |
|
{ |
|
"epoch": 0.8198187045213827, |
|
"grad_norm": 0.1601337045431137, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7861, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.8205064718909476, |
|
"grad_norm": 0.14973758161067963, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7893, |
|
"step": 59650 |
|
}, |
|
{ |
|
"epoch": 0.8211942392605125, |
|
"grad_norm": 0.17928583920001984, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7841, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.8218820066300775, |
|
"grad_norm": 0.1628539264202118, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7861, |
|
"step": 59750 |
|
}, |
|
{ |
|
"epoch": 0.8225697739996424, |
|
"grad_norm": 0.1617124229669571, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7837, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.8232575413692073, |
|
"grad_norm": 0.16710211336612701, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7843, |
|
"step": 59850 |
|
}, |
|
{ |
|
"epoch": 0.8239453087387723, |
|
"grad_norm": 0.18266211450099945, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7882, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.8246330761083371, |
|
"grad_norm": 0.15460216999053955, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7856, |
|
"step": 59950 |
|
}, |
|
{ |
|
"epoch": 0.825320843477902, |
|
"grad_norm": 0.19238495826721191, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7867, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.8260086108474669, |
|
"grad_norm": 0.17882536351680756, |
|
"learning_rate": 0.0001, |
|
"loss": 1.79, |
|
"step": 60050 |
|
}, |
|
{ |
|
"epoch": 0.8266963782170319, |
|
"grad_norm": 0.17022471129894257, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7843, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.8273841455865968, |
|
"grad_norm": 0.16253788769245148, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7842, |
|
"step": 60150 |
|
}, |
|
{ |
|
"epoch": 0.8280719129561617, |
|
"grad_norm": 0.1684889793395996, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7871, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.8287596803257267, |
|
"grad_norm": 0.1623234748840332, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7812, |
|
"step": 60250 |
|
}, |
|
{ |
|
"epoch": 0.8294474476952916, |
|
"grad_norm": 0.14207519590854645, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7873, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.8301352150648564, |
|
"grad_norm": 0.15550558269023895, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7876, |
|
"step": 60350 |
|
}, |
|
{ |
|
"epoch": 0.8308229824344214, |
|
"grad_norm": 0.16578029096126556, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7804, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.8315107498039863, |
|
"grad_norm": 0.16406333446502686, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7837, |
|
"step": 60450 |
|
}, |
|
{ |
|
"epoch": 0.8321985171735512, |
|
"grad_norm": 0.1568935364484787, |
|
"learning_rate": 0.0001, |
|
"loss": 1.786, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.8328862845431161, |
|
"grad_norm": 0.17918673157691956, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7877, |
|
"step": 60550 |
|
}, |
|
{ |
|
"epoch": 0.8335740519126811, |
|
"grad_norm": 0.14733350276947021, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7821, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.834261819282246, |
|
"grad_norm": 0.14916177093982697, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7862, |
|
"step": 60650 |
|
}, |
|
{ |
|
"epoch": 0.8349495866518108, |
|
"grad_norm": 0.15052981674671173, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7892, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.8356373540213758, |
|
"grad_norm": 0.1831791251897812, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7844, |
|
"step": 60750 |
|
}, |
|
{ |
|
"epoch": 0.8363251213909407, |
|
"grad_norm": 0.16115884482860565, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7827, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.8370128887605056, |
|
"grad_norm": 0.15721943974494934, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7862, |
|
"step": 60850 |
|
}, |
|
{ |
|
"epoch": 0.8377006561300706, |
|
"grad_norm": 0.1528850942850113, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7852, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.8383884234996355, |
|
"grad_norm": 0.16134890913963318, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7875, |
|
"step": 60950 |
|
}, |
|
{ |
|
"epoch": 0.8390761908692004, |
|
"grad_norm": 0.16336651146411896, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7848, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.8397639582387654, |
|
"grad_norm": 0.16578875482082367, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7858, |
|
"step": 61050 |
|
}, |
|
{ |
|
"epoch": 0.8404517256083303, |
|
"grad_norm": 0.16235701739788055, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7869, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.8411394929778951, |
|
"grad_norm": 0.16650299727916718, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7868, |
|
"step": 61150 |
|
}, |
|
{ |
|
"epoch": 0.84182726034746, |
|
"grad_norm": 0.148828387260437, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7827, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.842515027717025, |
|
"grad_norm": 0.1572546660900116, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7846, |
|
"step": 61250 |
|
}, |
|
{ |
|
"epoch": 0.8432027950865899, |
|
"grad_norm": 0.15572214126586914, |
|
"learning_rate": 0.0001, |
|
"loss": 1.788, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.8438905624561548, |
|
"grad_norm": 0.18148384988307953, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7829, |
|
"step": 61350 |
|
}, |
|
{ |
|
"epoch": 0.8445783298257198, |
|
"grad_norm": 0.16225239634513855, |
|
"learning_rate": 0.0001, |
|
"loss": 1.787, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.8452660971952847, |
|
"grad_norm": 0.1546306014060974, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7886, |
|
"step": 61450 |
|
}, |
|
{ |
|
"epoch": 0.8459538645648496, |
|
"grad_norm": 0.1589781790971756, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7876, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.8466416319344146, |
|
"grad_norm": 0.16938839852809906, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7805, |
|
"step": 61550 |
|
}, |
|
{ |
|
"epoch": 0.8473293993039794, |
|
"grad_norm": 0.17635032534599304, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7836, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.8480171666735443, |
|
"grad_norm": 0.16436606645584106, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7829, |
|
"step": 61650 |
|
}, |
|
{ |
|
"epoch": 0.8487049340431092, |
|
"grad_norm": 0.15410180389881134, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7833, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.8493927014126742, |
|
"grad_norm": 0.15711359679698944, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7855, |
|
"step": 61750 |
|
}, |
|
{ |
|
"epoch": 0.8500804687822391, |
|
"grad_norm": 0.14257673919200897, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7846, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.850768236151804, |
|
"grad_norm": 0.1770082414150238, |
|
"learning_rate": 0.0001, |
|
"loss": 1.786, |
|
"step": 61850 |
|
}, |
|
{ |
|
"epoch": 0.851456003521369, |
|
"grad_norm": 0.14938481152057648, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7841, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.8521437708909338, |
|
"grad_norm": 0.16232655942440033, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7872, |
|
"step": 61950 |
|
}, |
|
{ |
|
"epoch": 0.8528315382604987, |
|
"grad_norm": 0.14662796258926392, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7846, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.8535193056300637, |
|
"grad_norm": 0.15960827469825745, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7868, |
|
"step": 62050 |
|
}, |
|
{ |
|
"epoch": 0.8542070729996286, |
|
"grad_norm": 0.1585722714662552, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7833, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.8548948403691935, |
|
"grad_norm": 0.15847063064575195, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7861, |
|
"step": 62150 |
|
}, |
|
{ |
|
"epoch": 0.8555826077387585, |
|
"grad_norm": 0.1581469178199768, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7872, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.8562703751083234, |
|
"grad_norm": 0.18087923526763916, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7837, |
|
"step": 62250 |
|
}, |
|
{ |
|
"epoch": 0.8569581424778883, |
|
"grad_norm": 0.15878331661224365, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7865, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.8576459098474531, |
|
"grad_norm": 0.1652536690235138, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7864, |
|
"step": 62350 |
|
}, |
|
{ |
|
"epoch": 0.8583336772170181, |
|
"grad_norm": 0.16467753052711487, |
|
"learning_rate": 0.0001, |
|
"loss": 1.788, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.859021444586583, |
|
"grad_norm": 0.17342518270015717, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7853, |
|
"step": 62450 |
|
}, |
|
{ |
|
"epoch": 0.8597092119561479, |
|
"grad_norm": 0.15487852692604065, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7861, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.8603969793257129, |
|
"grad_norm": 0.16185085475444794, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7891, |
|
"step": 62550 |
|
}, |
|
{ |
|
"epoch": 0.8610847466952778, |
|
"grad_norm": 0.18629157543182373, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7836, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.8617725140648427, |
|
"grad_norm": 0.20009976625442505, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7849, |
|
"step": 62650 |
|
}, |
|
{ |
|
"epoch": 0.8624602814344077, |
|
"grad_norm": 0.16432398557662964, |
|
"learning_rate": 0.0001, |
|
"loss": 1.786, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.8631480488039726, |
|
"grad_norm": 0.16151119768619537, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7838, |
|
"step": 62750 |
|
}, |
|
{ |
|
"epoch": 0.8638358161735374, |
|
"grad_norm": 0.16223236918449402, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7857, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.8645235835431024, |
|
"grad_norm": 0.15118102729320526, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7824, |
|
"step": 62850 |
|
}, |
|
{ |
|
"epoch": 0.8652113509126673, |
|
"grad_norm": 0.15173585712909698, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7858, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.8658991182822322, |
|
"grad_norm": 0.1547808051109314, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7806, |
|
"step": 62950 |
|
}, |
|
{ |
|
"epoch": 0.8665868856517971, |
|
"grad_norm": 0.1542670577764511, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7816, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.8672746530213621, |
|
"grad_norm": 0.16760842502117157, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7845, |
|
"step": 63050 |
|
}, |
|
{ |
|
"epoch": 0.867962420390927, |
|
"grad_norm": 0.17703787982463837, |
|
"learning_rate": 0.0001, |
|
"loss": 1.788, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.8686501877604919, |
|
"grad_norm": 0.1573743224143982, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7792, |
|
"step": 63150 |
|
}, |
|
{ |
|
"epoch": 0.8693379551300568, |
|
"grad_norm": 0.1451522409915924, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7854, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.8700257224996217, |
|
"grad_norm": 0.17078782618045807, |
|
"learning_rate": 0.0001, |
|
"loss": 1.784, |
|
"step": 63250 |
|
}, |
|
{ |
|
"epoch": 0.8707134898691866, |
|
"grad_norm": 0.15471959114074707, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7832, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.8714012572387516, |
|
"grad_norm": 0.16724149882793427, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7783, |
|
"step": 63350 |
|
}, |
|
{ |
|
"epoch": 0.8720890246083165, |
|
"grad_norm": 0.15160906314849854, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7843, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.8727767919778814, |
|
"grad_norm": 0.156820610165596, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7856, |
|
"step": 63450 |
|
}, |
|
{ |
|
"epoch": 0.8734645593474463, |
|
"grad_norm": 0.16410048305988312, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7845, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.8741523267170113, |
|
"grad_norm": 0.16022023558616638, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7801, |
|
"step": 63550 |
|
}, |
|
{ |
|
"epoch": 0.8748400940865761, |
|
"grad_norm": 0.1775195300579071, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7824, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.875527861456141, |
|
"grad_norm": 0.17621392011642456, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7792, |
|
"step": 63650 |
|
}, |
|
{ |
|
"epoch": 0.876215628825706, |
|
"grad_norm": 0.17508172988891602, |
|
"learning_rate": 0.0001, |
|
"loss": 1.785, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.8769033961952709, |
|
"grad_norm": 0.167220801115036, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7838, |
|
"step": 63750 |
|
}, |
|
{ |
|
"epoch": 0.8775911635648358, |
|
"grad_norm": 0.22981862723827362, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7885, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.8782789309344008, |
|
"grad_norm": 0.17177161574363708, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7846, |
|
"step": 63850 |
|
}, |
|
{ |
|
"epoch": 0.8789666983039657, |
|
"grad_norm": 0.16599243879318237, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7819, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.8796544656735306, |
|
"grad_norm": 0.17125064134597778, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7839, |
|
"step": 63950 |
|
}, |
|
{ |
|
"epoch": 0.8803422330430956, |
|
"grad_norm": 0.17469707131385803, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7797, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.8810300004126604, |
|
"grad_norm": 0.16639864444732666, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7833, |
|
"step": 64050 |
|
}, |
|
{ |
|
"epoch": 0.8817177677822253, |
|
"grad_norm": 0.16656282544136047, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7816, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.8824055351517902, |
|
"grad_norm": 0.14526651799678802, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7817, |
|
"step": 64150 |
|
}, |
|
{ |
|
"epoch": 0.8830933025213552, |
|
"grad_norm": 0.1783958077430725, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7828, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.8837810698909201, |
|
"grad_norm": 0.16352634131908417, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7807, |
|
"step": 64250 |
|
}, |
|
{ |
|
"epoch": 0.884468837260485, |
|
"grad_norm": 0.16130295395851135, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7803, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.88515660463005, |
|
"grad_norm": 0.16286851465702057, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7866, |
|
"step": 64350 |
|
}, |
|
{ |
|
"epoch": 0.8858443719996149, |
|
"grad_norm": 0.16668406128883362, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7805, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.8865321393691797, |
|
"grad_norm": 0.16575850546360016, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7803, |
|
"step": 64450 |
|
}, |
|
{ |
|
"epoch": 0.8872199067387447, |
|
"grad_norm": 0.16535095870494843, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7795, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.8879076741083096, |
|
"grad_norm": 0.14137853682041168, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7854, |
|
"step": 64550 |
|
}, |
|
{ |
|
"epoch": 0.8885954414778745, |
|
"grad_norm": 0.14880156517028809, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7862, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.8892832088474394, |
|
"grad_norm": 0.17448197305202484, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7847, |
|
"step": 64650 |
|
}, |
|
{ |
|
"epoch": 0.8899709762170044, |
|
"grad_norm": 0.1944260448217392, |
|
"learning_rate": 0.0001, |
|
"loss": 1.786, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.8906587435865693, |
|
"grad_norm": 0.1693488508462906, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7857, |
|
"step": 64750 |
|
}, |
|
{ |
|
"epoch": 0.8913465109561342, |
|
"grad_norm": 0.16250942647457123, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7835, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.8920342783256991, |
|
"grad_norm": 0.1573057919740677, |
|
"learning_rate": 0.0001, |
|
"loss": 1.782, |
|
"step": 64850 |
|
}, |
|
{ |
|
"epoch": 0.892722045695264, |
|
"grad_norm": 0.19034920632839203, |
|
"learning_rate": 0.0001, |
|
"loss": 1.782, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.8934098130648289, |
|
"grad_norm": 0.13963682949543, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7887, |
|
"step": 64950 |
|
}, |
|
{ |
|
"epoch": 0.8940975804343939, |
|
"grad_norm": 0.25064077973365784, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7873, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.8947853478039588, |
|
"grad_norm": 0.17574715614318848, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7841, |
|
"step": 65050 |
|
}, |
|
{ |
|
"epoch": 0.8954731151735237, |
|
"grad_norm": 0.156754732131958, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7807, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.8961608825430887, |
|
"grad_norm": 0.17132636904716492, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7801, |
|
"step": 65150 |
|
}, |
|
{ |
|
"epoch": 0.8968486499126536, |
|
"grad_norm": 0.15248049795627594, |
|
"learning_rate": 0.0001, |
|
"loss": 1.781, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.8975364172822184, |
|
"grad_norm": 0.1603154093027115, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7836, |
|
"step": 65250 |
|
}, |
|
{ |
|
"epoch": 0.8982241846517833, |
|
"grad_norm": 0.14862816035747528, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7823, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.8989119520213483, |
|
"grad_norm": 0.17050820589065552, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7856, |
|
"step": 65350 |
|
}, |
|
{ |
|
"epoch": 0.8995997193909132, |
|
"grad_norm": 0.16287332773208618, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7833, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.9002874867604781, |
|
"grad_norm": 0.15486200153827667, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7804, |
|
"step": 65450 |
|
}, |
|
{ |
|
"epoch": 0.9009752541300431, |
|
"grad_norm": 0.16483095288276672, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7845, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.901663021499608, |
|
"grad_norm": 0.15963926911354065, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7865, |
|
"step": 65550 |
|
}, |
|
{ |
|
"epoch": 0.9023507888691729, |
|
"grad_norm": 0.14927932620048523, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7814, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.9030385562387379, |
|
"grad_norm": 0.15622937679290771, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7841, |
|
"step": 65650 |
|
}, |
|
{ |
|
"epoch": 0.9037263236083027, |
|
"grad_norm": 0.14870509505271912, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7865, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.9044140909778676, |
|
"grad_norm": 0.16585543751716614, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7803, |
|
"step": 65750 |
|
}, |
|
{ |
|
"epoch": 0.9051018583474326, |
|
"grad_norm": 0.16925722360610962, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7905, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.9057896257169975, |
|
"grad_norm": 0.16086918115615845, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7818, |
|
"step": 65850 |
|
}, |
|
{ |
|
"epoch": 0.9064773930865624, |
|
"grad_norm": 0.17064189910888672, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7829, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.9071651604561273, |
|
"grad_norm": 0.1507936716079712, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7826, |
|
"step": 65950 |
|
}, |
|
{ |
|
"epoch": 0.9078529278256923, |
|
"grad_norm": 0.16139142215251923, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7832, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.9085406951952572, |
|
"grad_norm": 0.14373824000358582, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7834, |
|
"step": 66050 |
|
}, |
|
{ |
|
"epoch": 0.909228462564822, |
|
"grad_norm": 0.14268267154693604, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7832, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.909916229934387, |
|
"grad_norm": 0.14548690617084503, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7827, |
|
"step": 66150 |
|
}, |
|
{ |
|
"epoch": 0.9106039973039519, |
|
"grad_norm": 0.1726326048374176, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7799, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.9112917646735168, |
|
"grad_norm": 0.1607373058795929, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7809, |
|
"step": 66250 |
|
}, |
|
{ |
|
"epoch": 0.9119795320430818, |
|
"grad_norm": 0.14730975031852722, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7791, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.9126672994126467, |
|
"grad_norm": 0.1616540104150772, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7791, |
|
"step": 66350 |
|
}, |
|
{ |
|
"epoch": 0.9133550667822116, |
|
"grad_norm": 0.16029463708400726, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7828, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.9140428341517765, |
|
"grad_norm": 0.15002845227718353, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7812, |
|
"step": 66450 |
|
}, |
|
{ |
|
"epoch": 0.9147306015213414, |
|
"grad_norm": 0.14482907950878143, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7802, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.9154183688909063, |
|
"grad_norm": 0.17749476432800293, |
|
"learning_rate": 0.0001, |
|
"loss": 1.781, |
|
"step": 66550 |
|
}, |
|
{ |
|
"epoch": 0.9161061362604712, |
|
"grad_norm": 0.15776415169239044, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7816, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.9167939036300362, |
|
"grad_norm": 0.149980366230011, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7756, |
|
"step": 66650 |
|
}, |
|
{ |
|
"epoch": 0.9174816709996011, |
|
"grad_norm": 0.16899780929088593, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7814, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.918169438369166, |
|
"grad_norm": 0.17424631118774414, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7781, |
|
"step": 66750 |
|
}, |
|
{ |
|
"epoch": 0.918857205738731, |
|
"grad_norm": 0.1580991894006729, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7801, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.9195449731082959, |
|
"grad_norm": 0.16126061975955963, |
|
"learning_rate": 0.0001, |
|
"loss": 1.782, |
|
"step": 66850 |
|
}, |
|
{ |
|
"epoch": 0.9202327404778607, |
|
"grad_norm": 0.15646252036094666, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7828, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.9209205078474257, |
|
"grad_norm": 0.17129796743392944, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7844, |
|
"step": 66950 |
|
}, |
|
{ |
|
"epoch": 0.9216082752169906, |
|
"grad_norm": 0.1756673902273178, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7839, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.9222960425865555, |
|
"grad_norm": 0.15259510278701782, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7795, |
|
"step": 67050 |
|
}, |
|
{ |
|
"epoch": 0.9229838099561204, |
|
"grad_norm": 0.1639316827058792, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7843, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.9236715773256854, |
|
"grad_norm": 0.17190176248550415, |
|
"learning_rate": 0.0001, |
|
"loss": 1.78, |
|
"step": 67150 |
|
}, |
|
{ |
|
"epoch": 0.9243593446952503, |
|
"grad_norm": 0.16864174604415894, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7852, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.9250471120648152, |
|
"grad_norm": 0.15548075735569, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7828, |
|
"step": 67250 |
|
}, |
|
{ |
|
"epoch": 0.9257348794343802, |
|
"grad_norm": 0.16301994025707245, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7846, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.926422646803945, |
|
"grad_norm": 0.1735038459300995, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7798, |
|
"step": 67350 |
|
}, |
|
{ |
|
"epoch": 0.9271104141735099, |
|
"grad_norm": 0.1380920112133026, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7806, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.9277981815430749, |
|
"grad_norm": 0.15920446813106537, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7792, |
|
"step": 67450 |
|
}, |
|
{ |
|
"epoch": 0.9284859489126398, |
|
"grad_norm": 0.17028312385082245, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7888, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.9291737162822047, |
|
"grad_norm": 0.1769266575574875, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7814, |
|
"step": 67550 |
|
}, |
|
{ |
|
"epoch": 0.9298614836517696, |
|
"grad_norm": 0.1450556516647339, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7817, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.9305492510213346, |
|
"grad_norm": 0.16302357614040375, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7813, |
|
"step": 67650 |
|
}, |
|
{ |
|
"epoch": 0.9312370183908995, |
|
"grad_norm": 0.1574389934539795, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7776, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.9319247857604643, |
|
"grad_norm": 0.14627063274383545, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7826, |
|
"step": 67750 |
|
}, |
|
{ |
|
"epoch": 0.9326125531300293, |
|
"grad_norm": 0.18861928582191467, |
|
"learning_rate": 0.0001, |
|
"loss": 1.781, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.9333003204995942, |
|
"grad_norm": 0.1549026519060135, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7787, |
|
"step": 67850 |
|
}, |
|
{ |
|
"epoch": 0.9339880878691591, |
|
"grad_norm": 0.1620372235774994, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7826, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.9346758552387241, |
|
"grad_norm": 0.15894797444343567, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7818, |
|
"step": 67950 |
|
}, |
|
{ |
|
"epoch": 0.935363622608289, |
|
"grad_norm": 0.19588086009025574, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7835, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.9360513899778539, |
|
"grad_norm": 0.1861431747674942, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7815, |
|
"step": 68050 |
|
}, |
|
{ |
|
"epoch": 0.9367391573474189, |
|
"grad_norm": 0.16720125079154968, |
|
"learning_rate": 0.0001, |
|
"loss": 1.781, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.9374269247169837, |
|
"grad_norm": 0.1603463739156723, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7788, |
|
"step": 68150 |
|
}, |
|
{ |
|
"epoch": 0.9381146920865486, |
|
"grad_norm": 0.14092972874641418, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7824, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.9388024594561135, |
|
"grad_norm": 0.1622365266084671, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7779, |
|
"step": 68250 |
|
}, |
|
{ |
|
"epoch": 0.9394902268256785, |
|
"grad_norm": 0.16566450893878937, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7789, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.9401779941952434, |
|
"grad_norm": 0.14181503653526306, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7773, |
|
"step": 68350 |
|
}, |
|
{ |
|
"epoch": 0.9408657615648083, |
|
"grad_norm": 0.16675251722335815, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7796, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.9415535289343733, |
|
"grad_norm": 0.15481418371200562, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7797, |
|
"step": 68450 |
|
}, |
|
{ |
|
"epoch": 0.9422412963039382, |
|
"grad_norm": 0.16480682790279388, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7767, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.942929063673503, |
|
"grad_norm": 0.13726095855236053, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7799, |
|
"step": 68550 |
|
}, |
|
{ |
|
"epoch": 0.943616831043068, |
|
"grad_norm": 0.1498117446899414, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7826, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.9443045984126329, |
|
"grad_norm": 0.15102407336235046, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7807, |
|
"step": 68650 |
|
}, |
|
{ |
|
"epoch": 0.9449923657821978, |
|
"grad_norm": 0.1596510410308838, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7773, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.9456801331517628, |
|
"grad_norm": 0.15061867237091064, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7781, |
|
"step": 68750 |
|
}, |
|
{ |
|
"epoch": 0.9463679005213277, |
|
"grad_norm": 0.18302445113658905, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7801, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.9470556678908926, |
|
"grad_norm": 0.1563147008419037, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7807, |
|
"step": 68850 |
|
}, |
|
{ |
|
"epoch": 0.9477434352604575, |
|
"grad_norm": 0.1559109389781952, |
|
"learning_rate": 0.0001, |
|
"loss": 1.779, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.9484312026300225, |
|
"grad_norm": 0.1892656683921814, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7815, |
|
"step": 68950 |
|
}, |
|
{ |
|
"epoch": 0.9491189699995873, |
|
"grad_norm": 0.16753901541233063, |
|
"learning_rate": 0.0001, |
|
"loss": 1.779, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.9498067373691522, |
|
"grad_norm": 0.16571739315986633, |
|
"learning_rate": 0.0001, |
|
"loss": 1.781, |
|
"step": 69050 |
|
}, |
|
{ |
|
"epoch": 0.9504945047387172, |
|
"grad_norm": 0.15618735551834106, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7801, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.9511822721082821, |
|
"grad_norm": 0.15602505207061768, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7782, |
|
"step": 69150 |
|
}, |
|
{ |
|
"epoch": 0.951870039477847, |
|
"grad_norm": 0.1441372036933899, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7808, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.952557806847412, |
|
"grad_norm": 0.16956308484077454, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7805, |
|
"step": 69250 |
|
}, |
|
{ |
|
"epoch": 0.9532455742169769, |
|
"grad_norm": 0.1570560336112976, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7829, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.9539333415865417, |
|
"grad_norm": 0.13851186633110046, |
|
"learning_rate": 0.0001, |
|
"loss": 1.779, |
|
"step": 69350 |
|
}, |
|
{ |
|
"epoch": 0.9546211089561066, |
|
"grad_norm": 0.18309037387371063, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7772, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.9553088763256716, |
|
"grad_norm": 1.6850249767303467, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7781, |
|
"step": 69450 |
|
}, |
|
{ |
|
"epoch": 0.9559966436952365, |
|
"grad_norm": 0.1578509509563446, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7843, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.9566844110648014, |
|
"grad_norm": 0.15330944955348969, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7785, |
|
"step": 69550 |
|
}, |
|
{ |
|
"epoch": 0.9573721784343664, |
|
"grad_norm": 0.15504170954227448, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7851, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.9580599458039313, |
|
"grad_norm": 0.17802022397518158, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7794, |
|
"step": 69650 |
|
}, |
|
{ |
|
"epoch": 0.9587477131734962, |
|
"grad_norm": 0.18508057296276093, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7827, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.9594354805430612, |
|
"grad_norm": 0.19704073667526245, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7809, |
|
"step": 69750 |
|
}, |
|
{ |
|
"epoch": 0.960123247912626, |
|
"grad_norm": 0.17070503532886505, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7791, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.9608110152821909, |
|
"grad_norm": 0.1832980215549469, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7798, |
|
"step": 69850 |
|
}, |
|
{ |
|
"epoch": 0.9614987826517559, |
|
"grad_norm": 0.15290822088718414, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7819, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.9621865500213208, |
|
"grad_norm": 0.1691426783800125, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7792, |
|
"step": 69950 |
|
}, |
|
{ |
|
"epoch": 0.9628743173908857, |
|
"grad_norm": 0.1656666249036789, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7836, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.9635620847604506, |
|
"grad_norm": 0.15653489530086517, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7811, |
|
"step": 70050 |
|
}, |
|
{ |
|
"epoch": 0.9642498521300156, |
|
"grad_norm": 0.15945695340633392, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7789, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.9649376194995805, |
|
"grad_norm": 0.173899307847023, |
|
"learning_rate": 0.0001, |
|
"loss": 1.782, |
|
"step": 70150 |
|
}, |
|
{ |
|
"epoch": 0.9656253868691453, |
|
"grad_norm": 0.13982714712619781, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7796, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.9663131542387103, |
|
"grad_norm": 0.16570891439914703, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7814, |
|
"step": 70250 |
|
}, |
|
{ |
|
"epoch": 0.9670009216082752, |
|
"grad_norm": 0.1680910885334015, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7797, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.9676886889778401, |
|
"grad_norm": 0.18602094054222107, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7799, |
|
"step": 70350 |
|
}, |
|
{ |
|
"epoch": 0.9683764563474051, |
|
"grad_norm": 0.15171028673648834, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7824, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.96906422371697, |
|
"grad_norm": 0.17273007333278656, |
|
"learning_rate": 0.0001, |
|
"loss": 1.779, |
|
"step": 70450 |
|
}, |
|
{ |
|
"epoch": 0.9697519910865349, |
|
"grad_norm": 0.1841355711221695, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7849, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.9704397584560998, |
|
"grad_norm": 0.14629191160202026, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7822, |
|
"step": 70550 |
|
}, |
|
{ |
|
"epoch": 0.9711275258256648, |
|
"grad_norm": 0.19547376036643982, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7805, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.9718152931952296, |
|
"grad_norm": 0.1695117950439453, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7808, |
|
"step": 70650 |
|
}, |
|
{ |
|
"epoch": 0.9725030605647945, |
|
"grad_norm": 0.15734167397022247, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7826, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.9731908279343595, |
|
"grad_norm": 0.15534259378910065, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7784, |
|
"step": 70750 |
|
}, |
|
{ |
|
"epoch": 0.9738785953039244, |
|
"grad_norm": 0.17524221539497375, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7802, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.9745663626734893, |
|
"grad_norm": 0.16551004350185394, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7774, |
|
"step": 70850 |
|
}, |
|
{ |
|
"epoch": 0.9752541300430543, |
|
"grad_norm": 0.18955057859420776, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7771, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.9759418974126192, |
|
"grad_norm": 0.1564190834760666, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7836, |
|
"step": 70950 |
|
}, |
|
{ |
|
"epoch": 0.976629664782184, |
|
"grad_norm": 0.18080365657806396, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7809, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.977317432151749, |
|
"grad_norm": 0.17052794992923737, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7785, |
|
"step": 71050 |
|
}, |
|
{ |
|
"epoch": 0.9780051995213139, |
|
"grad_norm": 0.15679985284805298, |
|
"learning_rate": 0.0001, |
|
"loss": 1.777, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.9786929668908788, |
|
"grad_norm": 0.14611759781837463, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7831, |
|
"step": 71150 |
|
}, |
|
{ |
|
"epoch": 0.9793807342604437, |
|
"grad_norm": 0.17994888126850128, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7811, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.9800685016300087, |
|
"grad_norm": 0.1523408442735672, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7819, |
|
"step": 71250 |
|
}, |
|
{ |
|
"epoch": 0.9807562689995736, |
|
"grad_norm": 0.14828313887119293, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7766, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.9814440363691385, |
|
"grad_norm": 0.1424998790025711, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7788, |
|
"step": 71350 |
|
}, |
|
{ |
|
"epoch": 0.9821318037387035, |
|
"grad_norm": 0.14312104880809784, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7783, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.9828195711082683, |
|
"grad_norm": 0.14697466790676117, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7808, |
|
"step": 71450 |
|
}, |
|
{ |
|
"epoch": 0.9835073384778332, |
|
"grad_norm": 0.16363121569156647, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7783, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.9841951058473982, |
|
"grad_norm": 0.1542508453130722, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7817, |
|
"step": 71550 |
|
}, |
|
{ |
|
"epoch": 0.9848828732169631, |
|
"grad_norm": 0.1389523297548294, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7791, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.985570640586528, |
|
"grad_norm": 0.15856057405471802, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7833, |
|
"step": 71650 |
|
}, |
|
{ |
|
"epoch": 0.986258407956093, |
|
"grad_norm": 0.15098857879638672, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7764, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.9869461753256579, |
|
"grad_norm": 0.14318101108074188, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7782, |
|
"step": 71750 |
|
}, |
|
{ |
|
"epoch": 0.9876339426952228, |
|
"grad_norm": 0.16459529101848602, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7774, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.9883217100647876, |
|
"grad_norm": 0.14705689251422882, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7813, |
|
"step": 71850 |
|
}, |
|
{ |
|
"epoch": 0.9890094774343526, |
|
"grad_norm": 0.2091091424226761, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7819, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.9896972448039175, |
|
"grad_norm": 0.1711418330669403, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7782, |
|
"step": 71950 |
|
}, |
|
{ |
|
"epoch": 0.9903850121734824, |
|
"grad_norm": 0.15255683660507202, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7851, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.9910727795430474, |
|
"grad_norm": 0.17501915991306305, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7824, |
|
"step": 72050 |
|
}, |
|
{ |
|
"epoch": 0.9917605469126123, |
|
"grad_norm": 0.1605847328901291, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7802, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 0.9924483142821772, |
|
"grad_norm": 0.14898759126663208, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7836, |
|
"step": 72150 |
|
}, |
|
{ |
|
"epoch": 0.9931360816517422, |
|
"grad_norm": 0.15966999530792236, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7773, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.993823849021307, |
|
"grad_norm": 0.14977654814720154, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7764, |
|
"step": 72250 |
|
}, |
|
{ |
|
"epoch": 0.9945116163908719, |
|
"grad_norm": 0.16077259182929993, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7789, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 0.9951993837604368, |
|
"grad_norm": 0.1603011190891266, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7756, |
|
"step": 72350 |
|
}, |
|
{ |
|
"epoch": 0.9958871511300018, |
|
"grad_norm": 0.17926956713199615, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7805, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.9965749184995667, |
|
"grad_norm": 0.15523836016654968, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7816, |
|
"step": 72450 |
|
}, |
|
{ |
|
"epoch": 0.9972626858691316, |
|
"grad_norm": 0.15533694624900818, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7817, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.9979504532386966, |
|
"grad_norm": 0.17167145013809204, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7793, |
|
"step": 72550 |
|
}, |
|
{ |
|
"epoch": 0.9986382206082615, |
|
"grad_norm": 0.1536383181810379, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7792, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.9993259879778263, |
|
"grad_norm": 0.15611621737480164, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7798, |
|
"step": 72650 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 72699, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7804002887190855e+21, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|