|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9933642609665863, |
|
"eval_steps": 500, |
|
"global_step": 34000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.011744670855599272, |
|
"grad_norm": 0.025151433423161507, |
|
"learning_rate": 0.0001997648165569144, |
|
"loss": 1.2331, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.023489341711198545, |
|
"grad_norm": 0.028844915330410004, |
|
"learning_rate": 0.0001992944496707432, |
|
"loss": 1.1461, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03523401256679781, |
|
"grad_norm": 0.0280243381857872, |
|
"learning_rate": 0.00019882408278457198, |
|
"loss": 1.1523, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04697868342239709, |
|
"grad_norm": 0.029297035187482834, |
|
"learning_rate": 0.00019835371589840077, |
|
"loss": 1.1273, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05872335427799636, |
|
"grad_norm": 0.02906208299100399, |
|
"learning_rate": 0.00019788334901222956, |
|
"loss": 1.1149, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07046802513359562, |
|
"grad_norm": 0.027980709448456764, |
|
"learning_rate": 0.00019741298212605835, |
|
"loss": 1.1108, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08221269598919491, |
|
"grad_norm": 0.03009636141359806, |
|
"learning_rate": 0.00019694261523988714, |
|
"loss": 1.1041, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.09395736684479418, |
|
"grad_norm": 0.02974865213036537, |
|
"learning_rate": 0.00019647224835371593, |
|
"loss": 1.1088, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.10570203770039345, |
|
"grad_norm": 0.03112063929438591, |
|
"learning_rate": 0.0001960018814675447, |
|
"loss": 1.1052, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.11744670855599272, |
|
"grad_norm": 0.03213803470134735, |
|
"learning_rate": 0.00019553151458137348, |
|
"loss": 1.1037, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12919137941159198, |
|
"grad_norm": 0.029907317832112312, |
|
"learning_rate": 0.00019506114769520227, |
|
"loss": 1.0927, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.14093605026719125, |
|
"grad_norm": 0.03029988706111908, |
|
"learning_rate": 0.00019459078080903106, |
|
"loss": 1.0926, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.15268072112279055, |
|
"grad_norm": 0.03094552271068096, |
|
"learning_rate": 0.00019412041392285985, |
|
"loss": 1.0977, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.16442539197838982, |
|
"grad_norm": 0.031248079612851143, |
|
"learning_rate": 0.00019365004703668864, |
|
"loss": 1.0914, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1761700628339891, |
|
"grad_norm": 0.03235971927642822, |
|
"learning_rate": 0.00019317968015051742, |
|
"loss": 1.1114, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18791473368958836, |
|
"grad_norm": 0.031699199229478836, |
|
"learning_rate": 0.0001927093132643462, |
|
"loss": 1.0883, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.19965940454518763, |
|
"grad_norm": 0.032460007816553116, |
|
"learning_rate": 0.00019223894637817498, |
|
"loss": 1.0925, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.2114040754007869, |
|
"grad_norm": 0.03195160627365112, |
|
"learning_rate": 0.00019176857949200376, |
|
"loss": 1.0849, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.22314874625638617, |
|
"grad_norm": 0.031754713505506516, |
|
"learning_rate": 0.00019129821260583255, |
|
"loss": 1.0872, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.23489341711198544, |
|
"grad_norm": 0.0315755270421505, |
|
"learning_rate": 0.00019082784571966134, |
|
"loss": 1.0899, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2466380879675847, |
|
"grad_norm": 0.03236432373523712, |
|
"learning_rate": 0.00019035747883349013, |
|
"loss": 1.0846, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.25838275882318396, |
|
"grad_norm": 0.034620147198438644, |
|
"learning_rate": 0.00018988711194731892, |
|
"loss": 1.0813, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.2701274296787832, |
|
"grad_norm": 0.03313825652003288, |
|
"learning_rate": 0.0001894167450611477, |
|
"loss": 1.0829, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.2818721005343825, |
|
"grad_norm": 0.033103689551353455, |
|
"learning_rate": 0.0001889463781749765, |
|
"loss": 1.0889, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.2936167713899818, |
|
"grad_norm": 0.0330510288476944, |
|
"learning_rate": 0.0001884760112888053, |
|
"loss": 1.0792, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3053614422455811, |
|
"grad_norm": 0.03478708490729332, |
|
"learning_rate": 0.00018800564440263405, |
|
"loss": 1.0819, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.31710611310118036, |
|
"grad_norm": 0.033343035727739334, |
|
"learning_rate": 0.00018753527751646284, |
|
"loss": 1.0774, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.32885078395677964, |
|
"grad_norm": 0.034466274082660675, |
|
"learning_rate": 0.00018706491063029163, |
|
"loss": 1.0814, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.3405954548123789, |
|
"grad_norm": 0.034216560423374176, |
|
"learning_rate": 0.00018659454374412042, |
|
"loss": 1.0823, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.3523401256679782, |
|
"grad_norm": 0.033088088035583496, |
|
"learning_rate": 0.0001861241768579492, |
|
"loss": 1.0745, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.36408479652357745, |
|
"grad_norm": 0.03337638080120087, |
|
"learning_rate": 0.000185653809971778, |
|
"loss": 1.0876, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.3758294673791767, |
|
"grad_norm": 0.03440937399864197, |
|
"learning_rate": 0.00018518344308560678, |
|
"loss": 1.0708, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.387574138234776, |
|
"grad_norm": 0.03419345244765282, |
|
"learning_rate": 0.00018471307619943557, |
|
"loss": 1.0849, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.39931880909037526, |
|
"grad_norm": 0.03453630208969116, |
|
"learning_rate": 0.00018424270931326436, |
|
"loss": 1.0716, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.41106347994597453, |
|
"grad_norm": 0.033510930836200714, |
|
"learning_rate": 0.00018377234242709312, |
|
"loss": 1.0708, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.4228081508015738, |
|
"grad_norm": 0.03349142521619797, |
|
"learning_rate": 0.00018330197554092191, |
|
"loss": 1.0813, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.43455282165717307, |
|
"grad_norm": 0.0351400151848793, |
|
"learning_rate": 0.0001828316086547507, |
|
"loss": 1.0835, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.44629749251277234, |
|
"grad_norm": 0.034197255969047546, |
|
"learning_rate": 0.0001823612417685795, |
|
"loss": 1.0762, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.4580421633683716, |
|
"grad_norm": 0.03609395772218704, |
|
"learning_rate": 0.00018189087488240828, |
|
"loss": 1.0665, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.4697868342239709, |
|
"grad_norm": 0.034626953303813934, |
|
"learning_rate": 0.00018142050799623707, |
|
"loss": 1.073, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.48153150507957015, |
|
"grad_norm": 0.03524584695696831, |
|
"learning_rate": 0.00018095014111006586, |
|
"loss": 1.0655, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.4932761759351694, |
|
"grad_norm": 0.0350763201713562, |
|
"learning_rate": 0.00018047977422389465, |
|
"loss": 1.0733, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.5050208467907686, |
|
"grad_norm": 0.0361902192234993, |
|
"learning_rate": 0.0001800094073377234, |
|
"loss": 1.0837, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.5167655176463679, |
|
"grad_norm": 0.03573866933584213, |
|
"learning_rate": 0.0001795390404515522, |
|
"loss": 1.0807, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.5285101885019672, |
|
"grad_norm": 0.03497539460659027, |
|
"learning_rate": 0.000179068673565381, |
|
"loss": 1.0599, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5402548593575665, |
|
"grad_norm": 0.034839775413274765, |
|
"learning_rate": 0.00017859830667920978, |
|
"loss": 1.0732, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.5519995302131657, |
|
"grad_norm": 0.035876356065273285, |
|
"learning_rate": 0.0001781279397930386, |
|
"loss": 1.0674, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.563744201068765, |
|
"grad_norm": 0.034889355301856995, |
|
"learning_rate": 0.00017765757290686738, |
|
"loss": 1.0701, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.5754888719243643, |
|
"grad_norm": 0.03712477535009384, |
|
"learning_rate": 0.00017718720602069617, |
|
"loss": 1.0532, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.5872335427799636, |
|
"grad_norm": 0.036021534353494644, |
|
"learning_rate": 0.00017671683913452493, |
|
"loss": 1.0734, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5989782136355629, |
|
"grad_norm": 0.03627597168087959, |
|
"learning_rate": 0.00017624647224835372, |
|
"loss": 1.0704, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.6107228844911622, |
|
"grad_norm": 0.03433089703321457, |
|
"learning_rate": 0.0001757761053621825, |
|
"loss": 1.0741, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.6224675553467615, |
|
"grad_norm": 0.03627678006887436, |
|
"learning_rate": 0.0001753057384760113, |
|
"loss": 1.0626, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.6342122262023607, |
|
"grad_norm": 0.035330165177583694, |
|
"learning_rate": 0.0001748353715898401, |
|
"loss": 1.0597, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.64595689705796, |
|
"grad_norm": 0.03662644326686859, |
|
"learning_rate": 0.00017436500470366888, |
|
"loss": 1.0712, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.6577015679135593, |
|
"grad_norm": 0.03487861156463623, |
|
"learning_rate": 0.00017389463781749767, |
|
"loss": 1.0526, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.6694462387691585, |
|
"grad_norm": 0.036091409623622894, |
|
"learning_rate": 0.00017342427093132646, |
|
"loss": 1.0641, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.6811909096247578, |
|
"grad_norm": 0.037575751543045044, |
|
"learning_rate": 0.00017295390404515525, |
|
"loss": 1.0593, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.6929355804803571, |
|
"grad_norm": 0.03606625273823738, |
|
"learning_rate": 0.000172483537158984, |
|
"loss": 1.067, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.7046802513359564, |
|
"grad_norm": 0.03537227585911751, |
|
"learning_rate": 0.0001720131702728128, |
|
"loss": 1.0635, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7164249221915556, |
|
"grad_norm": 0.03722114861011505, |
|
"learning_rate": 0.0001715428033866416, |
|
"loss": 1.0704, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.7281695930471549, |
|
"grad_norm": 0.03697160631418228, |
|
"learning_rate": 0.00017107243650047038, |
|
"loss": 1.0523, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.7399142639027542, |
|
"grad_norm": 0.03575093299150467, |
|
"learning_rate": 0.00017060206961429917, |
|
"loss": 1.0629, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.7516589347583534, |
|
"grad_norm": 0.0371549054980278, |
|
"learning_rate": 0.00017013170272812795, |
|
"loss": 1.068, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.7634036056139527, |
|
"grad_norm": 0.03723159059882164, |
|
"learning_rate": 0.00016966133584195674, |
|
"loss": 1.0515, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.775148276469552, |
|
"grad_norm": 0.03721994906663895, |
|
"learning_rate": 0.00016919096895578553, |
|
"loss": 1.0628, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.7868929473251512, |
|
"grad_norm": 0.03621937334537506, |
|
"learning_rate": 0.00016872060206961432, |
|
"loss": 1.0691, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.7986376181807505, |
|
"grad_norm": 0.03620177507400513, |
|
"learning_rate": 0.00016825023518344308, |
|
"loss": 1.0455, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.8103822890363498, |
|
"grad_norm": 0.037204090505838394, |
|
"learning_rate": 0.00016777986829727187, |
|
"loss": 1.072, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.8221269598919491, |
|
"grad_norm": 0.03673955425620079, |
|
"learning_rate": 0.00016730950141110066, |
|
"loss": 1.0649, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.8338716307475483, |
|
"grad_norm": 0.036923281848430634, |
|
"learning_rate": 0.00016683913452492945, |
|
"loss": 1.0661, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.8456163016031476, |
|
"grad_norm": 0.03734573721885681, |
|
"learning_rate": 0.00016636876763875824, |
|
"loss": 1.0636, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.8573609724587469, |
|
"grad_norm": 0.03644491732120514, |
|
"learning_rate": 0.00016589840075258703, |
|
"loss": 1.0558, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.8691056433143461, |
|
"grad_norm": 0.03660232573747635, |
|
"learning_rate": 0.00016542803386641582, |
|
"loss": 1.0671, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.8808503141699454, |
|
"grad_norm": 0.036776404827833176, |
|
"learning_rate": 0.0001649576669802446, |
|
"loss": 1.0643, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.8925949850255447, |
|
"grad_norm": 0.03714260086417198, |
|
"learning_rate": 0.00016448730009407337, |
|
"loss": 1.0645, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.904339655881144, |
|
"grad_norm": 0.03652457147836685, |
|
"learning_rate": 0.00016401693320790216, |
|
"loss": 1.0541, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.9160843267367432, |
|
"grad_norm": 0.03829098492860794, |
|
"learning_rate": 0.00016354656632173095, |
|
"loss": 1.0585, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.9278289975923425, |
|
"grad_norm": 0.036905597895383835, |
|
"learning_rate": 0.00016307619943555974, |
|
"loss": 1.0563, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.9395736684479418, |
|
"grad_norm": 0.036890897899866104, |
|
"learning_rate": 0.00016260583254938853, |
|
"loss": 1.059, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.951318339303541, |
|
"grad_norm": 0.03704727813601494, |
|
"learning_rate": 0.00016213546566321731, |
|
"loss": 1.0567, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.9630630101591403, |
|
"grad_norm": 0.03992870822548866, |
|
"learning_rate": 0.0001616650987770461, |
|
"loss": 1.0675, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.9748076810147396, |
|
"grad_norm": 0.03689022362232208, |
|
"learning_rate": 0.0001611947318908749, |
|
"loss": 1.0592, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.9865523518703389, |
|
"grad_norm": 0.03615827485918999, |
|
"learning_rate": 0.00016072436500470368, |
|
"loss": 1.0404, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.9982970227259381, |
|
"grad_norm": 0.03881993889808655, |
|
"learning_rate": 0.00016025399811853244, |
|
"loss": 1.0506, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.0100416935815373, |
|
"grad_norm": 0.03911367058753967, |
|
"learning_rate": 0.00015978363123236123, |
|
"loss": 1.0508, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.0217863644371366, |
|
"grad_norm": 0.03765745460987091, |
|
"learning_rate": 0.00015931326434619002, |
|
"loss": 1.0432, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.0335310352927358, |
|
"grad_norm": 0.038481660187244415, |
|
"learning_rate": 0.0001588428974600188, |
|
"loss": 1.0493, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.045275706148335, |
|
"grad_norm": 0.03929019346833229, |
|
"learning_rate": 0.0001583725305738476, |
|
"loss": 1.0584, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.0570203770039344, |
|
"grad_norm": 0.03770457208156586, |
|
"learning_rate": 0.0001579021636876764, |
|
"loss": 1.0497, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.0687650478595336, |
|
"grad_norm": 0.037688400596380234, |
|
"learning_rate": 0.00015743179680150518, |
|
"loss": 1.0432, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.080509718715133, |
|
"grad_norm": 0.0388583168387413, |
|
"learning_rate": 0.00015696142991533397, |
|
"loss": 1.0406, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.0922543895707322, |
|
"grad_norm": 0.0381295308470726, |
|
"learning_rate": 0.00015649106302916276, |
|
"loss": 1.0343, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.1039990604263314, |
|
"grad_norm": 0.03957001864910126, |
|
"learning_rate": 0.00015602069614299155, |
|
"loss": 1.0445, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.1157437312819307, |
|
"grad_norm": 0.03933073207736015, |
|
"learning_rate": 0.00015555032925682034, |
|
"loss": 1.0498, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.12748840213753, |
|
"grad_norm": 0.03816806897521019, |
|
"learning_rate": 0.00015507996237064912, |
|
"loss": 1.0441, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.1392330729931293, |
|
"grad_norm": 0.038057826459407806, |
|
"learning_rate": 0.0001546095954844779, |
|
"loss": 1.0404, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.1509777438487285, |
|
"grad_norm": 0.03927973657846451, |
|
"learning_rate": 0.0001541392285983067, |
|
"loss": 1.0435, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.1627224147043278, |
|
"grad_norm": 0.039503954350948334, |
|
"learning_rate": 0.0001536688617121355, |
|
"loss": 1.0592, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.174467085559927, |
|
"grad_norm": 0.04005419462919235, |
|
"learning_rate": 0.00015319849482596428, |
|
"loss": 1.0445, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.1862117564155263, |
|
"grad_norm": 0.03986848145723343, |
|
"learning_rate": 0.00015272812793979304, |
|
"loss": 1.0569, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.1979564272711256, |
|
"grad_norm": 0.03886035457253456, |
|
"learning_rate": 0.00015225776105362183, |
|
"loss": 1.0473, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.2097010981267249, |
|
"grad_norm": 0.03941928222775459, |
|
"learning_rate": 0.00015178739416745062, |
|
"loss": 1.0399, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.2214457689823242, |
|
"grad_norm": 0.04138774052262306, |
|
"learning_rate": 0.0001513170272812794, |
|
"loss": 1.0387, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.2331904398379234, |
|
"grad_norm": 0.040227312594652176, |
|
"learning_rate": 0.0001508466603951082, |
|
"loss": 1.0592, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.2449351106935227, |
|
"grad_norm": 0.03977705165743828, |
|
"learning_rate": 0.000150376293508937, |
|
"loss": 1.047, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.256679781549122, |
|
"grad_norm": 0.040624938905239105, |
|
"learning_rate": 0.00014990592662276578, |
|
"loss": 1.0528, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.2684244524047212, |
|
"grad_norm": 0.03948456794023514, |
|
"learning_rate": 0.00014943555973659457, |
|
"loss": 1.0413, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.2801691232603205, |
|
"grad_norm": 0.0396355502307415, |
|
"learning_rate": 0.00014896519285042333, |
|
"loss": 1.0403, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.2919137941159198, |
|
"grad_norm": 0.04015343636274338, |
|
"learning_rate": 0.00014849482596425212, |
|
"loss": 1.057, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.303658464971519, |
|
"grad_norm": 0.03963370993733406, |
|
"learning_rate": 0.0001480244590780809, |
|
"loss": 1.0474, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.3154031358271183, |
|
"grad_norm": 0.03991986811161041, |
|
"learning_rate": 0.0001475540921919097, |
|
"loss": 1.0333, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.3271478066827176, |
|
"grad_norm": 0.040591537952423096, |
|
"learning_rate": 0.00014708372530573848, |
|
"loss": 1.0401, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.3388924775383169, |
|
"grad_norm": 0.039190664887428284, |
|
"learning_rate": 0.00014661335841956727, |
|
"loss": 1.0467, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.3506371483939161, |
|
"grad_norm": 0.03903213143348694, |
|
"learning_rate": 0.00014614299153339606, |
|
"loss": 1.0557, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.3623818192495154, |
|
"grad_norm": 0.03976823762059212, |
|
"learning_rate": 0.00014567262464722485, |
|
"loss": 1.055, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.3741264901051147, |
|
"grad_norm": 0.04024571180343628, |
|
"learning_rate": 0.00014520225776105364, |
|
"loss": 1.0375, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.385871160960714, |
|
"grad_norm": 0.040485769510269165, |
|
"learning_rate": 0.0001447318908748824, |
|
"loss": 1.0407, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.3976158318163132, |
|
"grad_norm": 0.04040844738483429, |
|
"learning_rate": 0.0001442615239887112, |
|
"loss": 1.0395, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.4093605026719125, |
|
"grad_norm": 0.039215583354234695, |
|
"learning_rate": 0.00014379115710253998, |
|
"loss": 1.0417, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.4211051735275118, |
|
"grad_norm": 0.040224071592092514, |
|
"learning_rate": 0.00014332079021636877, |
|
"loss": 1.0342, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.432849844383111, |
|
"grad_norm": 0.03919661417603493, |
|
"learning_rate": 0.00014285042333019756, |
|
"loss": 1.0396, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.4445945152387103, |
|
"grad_norm": 0.0384608618915081, |
|
"learning_rate": 0.00014238005644402635, |
|
"loss": 1.0467, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.4563391860943098, |
|
"grad_norm": 0.039908237755298615, |
|
"learning_rate": 0.00014190968955785514, |
|
"loss": 1.0539, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.468083856949909, |
|
"grad_norm": 0.0399697907269001, |
|
"learning_rate": 0.00014143932267168393, |
|
"loss": 1.0445, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.4798285278055083, |
|
"grad_norm": 0.04020760953426361, |
|
"learning_rate": 0.0001409689557855127, |
|
"loss": 1.0456, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.4915731986611076, |
|
"grad_norm": 0.04049207270145416, |
|
"learning_rate": 0.00014049858889934148, |
|
"loss": 1.0402, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.5033178695167067, |
|
"grad_norm": 0.0405813567340374, |
|
"learning_rate": 0.00014002822201317027, |
|
"loss": 1.0327, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.515062540372306, |
|
"grad_norm": 0.04098460450768471, |
|
"learning_rate": 0.00013955785512699906, |
|
"loss": 1.0426, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.5268072112279052, |
|
"grad_norm": 0.03912067785859108, |
|
"learning_rate": 0.00013908748824082785, |
|
"loss": 1.0398, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.5385518820835045, |
|
"grad_norm": 0.040918510407209396, |
|
"learning_rate": 0.00013861712135465663, |
|
"loss": 1.0511, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.5502965529391037, |
|
"grad_norm": 0.039643533527851105, |
|
"learning_rate": 0.00013814675446848542, |
|
"loss": 1.0408, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.562041223794703, |
|
"grad_norm": 0.04073023423552513, |
|
"learning_rate": 0.0001376763875823142, |
|
"loss": 1.0465, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.5737858946503023, |
|
"grad_norm": 0.03993350267410278, |
|
"learning_rate": 0.000137206020696143, |
|
"loss": 1.0481, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.5855305655059015, |
|
"grad_norm": 0.0393822006881237, |
|
"learning_rate": 0.00013673565380997176, |
|
"loss": 1.0378, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.5972752363615008, |
|
"grad_norm": 0.041657913476228714, |
|
"learning_rate": 0.00013626528692380055, |
|
"loss": 1.0305, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.6090199072171, |
|
"grad_norm": 0.0409579873085022, |
|
"learning_rate": 0.00013579492003762934, |
|
"loss": 1.0433, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.6207645780726994, |
|
"grad_norm": 0.039673928171396255, |
|
"learning_rate": 0.00013532455315145813, |
|
"loss": 1.0286, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.6325092489282986, |
|
"grad_norm": 0.04071459546685219, |
|
"learning_rate": 0.00013485418626528692, |
|
"loss": 1.0413, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.644253919783898, |
|
"grad_norm": 0.04058365523815155, |
|
"learning_rate": 0.00013438381937911574, |
|
"loss": 1.0465, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.6559985906394972, |
|
"grad_norm": 0.04230272024869919, |
|
"learning_rate": 0.00013391345249294453, |
|
"loss": 1.0304, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.6677432614950964, |
|
"grad_norm": 0.04120560362935066, |
|
"learning_rate": 0.0001334430856067733, |
|
"loss": 1.0459, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.6794879323506957, |
|
"grad_norm": 0.0403909832239151, |
|
"learning_rate": 0.00013297271872060208, |
|
"loss": 1.0402, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.691232603206295, |
|
"grad_norm": 0.04099250212311745, |
|
"learning_rate": 0.00013250235183443087, |
|
"loss": 1.0477, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.7029772740618943, |
|
"grad_norm": 0.039906516671180725, |
|
"learning_rate": 0.00013203198494825965, |
|
"loss": 1.057, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.7147219449174935, |
|
"grad_norm": 0.04008522629737854, |
|
"learning_rate": 0.00013156161806208844, |
|
"loss": 1.0433, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.7264666157730928, |
|
"grad_norm": 0.0416274331510067, |
|
"learning_rate": 0.00013109125117591723, |
|
"loss": 1.0405, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.738211286628692, |
|
"grad_norm": 0.04142718017101288, |
|
"learning_rate": 0.00013062088428974602, |
|
"loss": 1.0349, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.7499559574842916, |
|
"grad_norm": 0.0407978855073452, |
|
"learning_rate": 0.0001301505174035748, |
|
"loss": 1.0415, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.7617006283398908, |
|
"grad_norm": 0.03977083042263985, |
|
"learning_rate": 0.0001296801505174036, |
|
"loss": 1.0522, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.77344529919549, |
|
"grad_norm": 0.04186280444264412, |
|
"learning_rate": 0.00012920978363123236, |
|
"loss": 1.0515, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.7851899700510894, |
|
"grad_norm": 0.04049232602119446, |
|
"learning_rate": 0.00012873941674506115, |
|
"loss": 1.0517, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.7969346409066886, |
|
"grad_norm": 0.039164550602436066, |
|
"learning_rate": 0.00012826904985888994, |
|
"loss": 1.0403, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.808679311762288, |
|
"grad_norm": 0.04166054725646973, |
|
"learning_rate": 0.00012779868297271873, |
|
"loss": 1.0469, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.8204239826178872, |
|
"grad_norm": 0.0396597720682621, |
|
"learning_rate": 0.00012732831608654752, |
|
"loss": 1.0433, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.8321686534734865, |
|
"grad_norm": 0.041060902178287506, |
|
"learning_rate": 0.0001268579492003763, |
|
"loss": 1.0298, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.8439133243290857, |
|
"grad_norm": 0.04100984334945679, |
|
"learning_rate": 0.0001263875823142051, |
|
"loss": 1.0423, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 1.855657995184685, |
|
"grad_norm": 0.03933743014931679, |
|
"learning_rate": 0.00012591721542803389, |
|
"loss": 1.0459, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.8674026660402843, |
|
"grad_norm": 0.04171588271856308, |
|
"learning_rate": 0.00012544684854186265, |
|
"loss": 1.0411, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.8791473368958835, |
|
"grad_norm": 0.04075104370713234, |
|
"learning_rate": 0.00012497648165569144, |
|
"loss": 1.0521, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.8908920077514828, |
|
"grad_norm": 0.04037100449204445, |
|
"learning_rate": 0.00012450611476952023, |
|
"loss": 1.0482, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 1.902636678607082, |
|
"grad_norm": 0.04113980382680893, |
|
"learning_rate": 0.00012403574788334901, |
|
"loss": 1.0335, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.9143813494626813, |
|
"grad_norm": 0.04102000594139099, |
|
"learning_rate": 0.0001235653809971778, |
|
"loss": 1.0462, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 1.9261260203182806, |
|
"grad_norm": 0.041277866810560226, |
|
"learning_rate": 0.0001230950141110066, |
|
"loss": 1.0472, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.93787069117388, |
|
"grad_norm": 0.040296610444784164, |
|
"learning_rate": 0.00012262464722483538, |
|
"loss": 1.0455, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.9496153620294792, |
|
"grad_norm": 0.04030190408229828, |
|
"learning_rate": 0.00012215428033866417, |
|
"loss": 1.036, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.9613600328850784, |
|
"grad_norm": 0.04076563939452171, |
|
"learning_rate": 0.00012168391345249295, |
|
"loss": 1.0355, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.9731047037406777, |
|
"grad_norm": 0.042260345071554184, |
|
"learning_rate": 0.00012121354656632174, |
|
"loss": 1.0318, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.984849374596277, |
|
"grad_norm": 0.04094604775309563, |
|
"learning_rate": 0.00012074317968015052, |
|
"loss": 1.0272, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.9965940454518762, |
|
"grad_norm": 0.04106820747256279, |
|
"learning_rate": 0.0001202728127939793, |
|
"loss": 1.0248, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.0083387163074753, |
|
"grad_norm": 0.041894011199474335, |
|
"learning_rate": 0.00011980244590780809, |
|
"loss": 1.0415, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.0200833871630746, |
|
"grad_norm": 0.04177311062812805, |
|
"learning_rate": 0.00011933207902163688, |
|
"loss": 1.034, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.031828058018674, |
|
"grad_norm": 0.04308000206947327, |
|
"learning_rate": 0.00011886171213546567, |
|
"loss": 1.0164, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 2.043572728874273, |
|
"grad_norm": 0.04087553173303604, |
|
"learning_rate": 0.00011839134524929444, |
|
"loss": 1.0271, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.0553173997298724, |
|
"grad_norm": 0.04184258356690407, |
|
"learning_rate": 0.00011792097836312323, |
|
"loss": 1.0214, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.0670620705854716, |
|
"grad_norm": 0.0424019880592823, |
|
"learning_rate": 0.00011745061147695202, |
|
"loss": 1.041, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.078806741441071, |
|
"grad_norm": 0.04137306660413742, |
|
"learning_rate": 0.00011698024459078081, |
|
"loss": 1.0182, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.09055141229667, |
|
"grad_norm": 0.04244280233979225, |
|
"learning_rate": 0.0001165098777046096, |
|
"loss": 1.0269, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.1022960831522695, |
|
"grad_norm": 0.04111913591623306, |
|
"learning_rate": 0.00011603951081843838, |
|
"loss": 1.0335, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.1140407540078687, |
|
"grad_norm": 0.0424952507019043, |
|
"learning_rate": 0.00011556914393226716, |
|
"loss": 1.0266, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.125785424863468, |
|
"grad_norm": 0.0427490659058094, |
|
"learning_rate": 0.00011509877704609595, |
|
"loss": 1.0274, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.1375300957190673, |
|
"grad_norm": 0.04336949810385704, |
|
"learning_rate": 0.00011462841015992474, |
|
"loss": 1.0268, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.1492747665746665, |
|
"grad_norm": 0.04262473061680794, |
|
"learning_rate": 0.00011415804327375352, |
|
"loss": 1.0214, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.161019437430266, |
|
"grad_norm": 0.04471023753285408, |
|
"learning_rate": 0.00011368767638758231, |
|
"loss": 1.0295, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.172764108285865, |
|
"grad_norm": 0.044171739369630814, |
|
"learning_rate": 0.0001132173095014111, |
|
"loss": 1.0296, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.1845087791414644, |
|
"grad_norm": 0.04301764816045761, |
|
"learning_rate": 0.0001127469426152399, |
|
"loss": 1.0258, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.196253449997064, |
|
"grad_norm": 0.04325546696782112, |
|
"learning_rate": 0.00011227657572906869, |
|
"loss": 1.0288, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 2.207998120852663, |
|
"grad_norm": 0.04137617349624634, |
|
"learning_rate": 0.00011180620884289748, |
|
"loss": 1.0385, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.2197427917082626, |
|
"grad_norm": 0.043259892612695694, |
|
"learning_rate": 0.00011133584195672627, |
|
"loss": 1.0264, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 2.2314874625638614, |
|
"grad_norm": 0.044129449874162674, |
|
"learning_rate": 0.00011086547507055504, |
|
"loss": 1.0313, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.243232133419461, |
|
"grad_norm": 0.04146253690123558, |
|
"learning_rate": 0.00011039510818438383, |
|
"loss": 1.0304, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 2.25497680427506, |
|
"grad_norm": 0.042836517095565796, |
|
"learning_rate": 0.00010992474129821262, |
|
"loss": 1.0281, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.2667214751306597, |
|
"grad_norm": 0.04558749496936798, |
|
"learning_rate": 0.00010945437441204141, |
|
"loss": 1.03, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 2.2784661459862585, |
|
"grad_norm": 0.04284907504916191, |
|
"learning_rate": 0.00010898400752587018, |
|
"loss": 1.0469, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.2902108168418582, |
|
"grad_norm": 0.04364863410592079, |
|
"learning_rate": 0.00010851364063969897, |
|
"loss": 1.0295, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.301955487697457, |
|
"grad_norm": 0.042074691504240036, |
|
"learning_rate": 0.00010804327375352776, |
|
"loss": 1.0458, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.3137001585530568, |
|
"grad_norm": 0.043050698935985565, |
|
"learning_rate": 0.00010757290686735655, |
|
"loss": 1.0438, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 2.3254448294086556, |
|
"grad_norm": 0.04526820033788681, |
|
"learning_rate": 0.00010710253998118534, |
|
"loss": 1.0283, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.3371895002642553, |
|
"grad_norm": 0.04481109231710434, |
|
"learning_rate": 0.00010663217309501412, |
|
"loss": 1.0205, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 2.348934171119854, |
|
"grad_norm": 0.04517560824751854, |
|
"learning_rate": 0.0001061618062088429, |
|
"loss": 1.0179, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.3607962886840097, |
|
"grad_norm": 0.045082226395606995, |
|
"learning_rate": 0.0001056914393226717, |
|
"loss": 1.0406, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 2.372540959539609, |
|
"grad_norm": 0.042884670197963715, |
|
"learning_rate": 0.00010522107243650048, |
|
"loss": 1.0221, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.3842856303952082, |
|
"grad_norm": 0.04309197515249252, |
|
"learning_rate": 0.00010475070555032926, |
|
"loss": 1.0287, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 2.3960303012508075, |
|
"grad_norm": 0.04434290900826454, |
|
"learning_rate": 0.00010428033866415805, |
|
"loss": 1.0269, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.407774972106407, |
|
"grad_norm": 0.044556260108947754, |
|
"learning_rate": 0.00010380997177798684, |
|
"loss": 1.0454, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.419519642962006, |
|
"grad_norm": 0.043353039771318436, |
|
"learning_rate": 0.00010333960489181563, |
|
"loss": 1.0401, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 2.4312643138176053, |
|
"grad_norm": 0.045345306396484375, |
|
"learning_rate": 0.0001028692380056444, |
|
"loss": 1.0172, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 2.4430089846732046, |
|
"grad_norm": 0.043075308203697205, |
|
"learning_rate": 0.00010239887111947319, |
|
"loss": 1.0413, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 2.454753655528804, |
|
"grad_norm": 0.044308003038167953, |
|
"learning_rate": 0.00010192850423330198, |
|
"loss": 1.0292, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 2.466498326384403, |
|
"grad_norm": 0.04506301134824753, |
|
"learning_rate": 0.00010145813734713077, |
|
"loss": 1.0382, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.4782429972400024, |
|
"grad_norm": 0.04396146163344383, |
|
"learning_rate": 0.00010098777046095956, |
|
"loss": 1.0337, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 2.4899876680956017, |
|
"grad_norm": 0.044499751180410385, |
|
"learning_rate": 0.00010051740357478833, |
|
"loss": 1.0407, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 2.501732338951201, |
|
"grad_norm": 0.042769189924001694, |
|
"learning_rate": 0.00010004703668861712, |
|
"loss": 1.031, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 2.5134770098068, |
|
"grad_norm": 0.0427669994533062, |
|
"learning_rate": 9.957666980244591e-05, |
|
"loss": 1.0193, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 2.5252216806623995, |
|
"grad_norm": 0.04454643651843071, |
|
"learning_rate": 9.91063029162747e-05, |
|
"loss": 1.0256, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.5369663515179988, |
|
"grad_norm": 0.042179521173238754, |
|
"learning_rate": 9.863593603010348e-05, |
|
"loss": 1.0293, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.548711022373598, |
|
"grad_norm": 0.04245784506201744, |
|
"learning_rate": 9.816556914393227e-05, |
|
"loss": 1.0138, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 2.5604556932291973, |
|
"grad_norm": 0.04282999783754349, |
|
"learning_rate": 9.769520225776106e-05, |
|
"loss": 1.0216, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.5722003640847966, |
|
"grad_norm": 0.04309820756316185, |
|
"learning_rate": 9.722483537158984e-05, |
|
"loss": 1.0436, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 2.583945034940396, |
|
"grad_norm": 0.0428336001932621, |
|
"learning_rate": 9.675446848541862e-05, |
|
"loss": 1.0327, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.595689705795995, |
|
"grad_norm": 0.0433744341135025, |
|
"learning_rate": 9.628410159924742e-05, |
|
"loss": 1.0248, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 2.6074343766515944, |
|
"grad_norm": 0.0440264493227005, |
|
"learning_rate": 9.581373471307621e-05, |
|
"loss": 1.0276, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.6191790475071937, |
|
"grad_norm": 0.04474351555109024, |
|
"learning_rate": 9.5343367826905e-05, |
|
"loss": 1.0392, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 2.630923718362793, |
|
"grad_norm": 0.04529641568660736, |
|
"learning_rate": 9.487300094073378e-05, |
|
"loss": 1.0436, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.642668389218392, |
|
"grad_norm": 0.04383498802781105, |
|
"learning_rate": 9.440263405456257e-05, |
|
"loss": 1.0271, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.6544130600739915, |
|
"grad_norm": 0.04416006803512573, |
|
"learning_rate": 9.393226716839135e-05, |
|
"loss": 1.035, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.6661577309295907, |
|
"grad_norm": 0.0439978651702404, |
|
"learning_rate": 9.346190028222014e-05, |
|
"loss": 1.0242, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 2.67790240178519, |
|
"grad_norm": 0.043451737612485886, |
|
"learning_rate": 9.299153339604892e-05, |
|
"loss": 1.0266, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.6896470726407893, |
|
"grad_norm": 0.043133124709129333, |
|
"learning_rate": 9.252116650987771e-05, |
|
"loss": 1.0313, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 2.7013917434963886, |
|
"grad_norm": 0.04365682601928711, |
|
"learning_rate": 9.20507996237065e-05, |
|
"loss": 1.0164, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.713136414351988, |
|
"grad_norm": 0.045253172516822815, |
|
"learning_rate": 9.158043273753529e-05, |
|
"loss": 1.0234, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 2.724881085207587, |
|
"grad_norm": 0.04371510446071625, |
|
"learning_rate": 9.111006585136406e-05, |
|
"loss": 1.0262, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 2.7366257560631864, |
|
"grad_norm": 0.04663108289241791, |
|
"learning_rate": 9.063969896519285e-05, |
|
"loss": 1.027, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 2.7483704269187856, |
|
"grad_norm": 0.043591178953647614, |
|
"learning_rate": 9.016933207902164e-05, |
|
"loss": 1.0362, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 2.760115097774385, |
|
"grad_norm": 0.04423443600535393, |
|
"learning_rate": 8.969896519285043e-05, |
|
"loss": 1.0262, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.771859768629984, |
|
"grad_norm": 0.045264832675457, |
|
"learning_rate": 8.922859830667922e-05, |
|
"loss": 1.0227, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 2.7836044394855834, |
|
"grad_norm": 0.04213082045316696, |
|
"learning_rate": 8.8758231420508e-05, |
|
"loss": 1.0161, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 2.7953491103411827, |
|
"grad_norm": 0.04401146247982979, |
|
"learning_rate": 8.828786453433678e-05, |
|
"loss": 1.0269, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 2.807093781196782, |
|
"grad_norm": 0.043770622462034225, |
|
"learning_rate": 8.781749764816557e-05, |
|
"loss": 1.0429, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 2.8188384520523813, |
|
"grad_norm": 0.04466963931918144, |
|
"learning_rate": 8.734713076199436e-05, |
|
"loss": 1.0309, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.8305831229079805, |
|
"grad_norm": 0.042598120868206024, |
|
"learning_rate": 8.687676387582314e-05, |
|
"loss": 1.0321, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 2.84232779376358, |
|
"grad_norm": 0.04534047096967697, |
|
"learning_rate": 8.640639698965193e-05, |
|
"loss": 1.0335, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 2.854072464619179, |
|
"grad_norm": 0.044474124908447266, |
|
"learning_rate": 8.593603010348071e-05, |
|
"loss": 1.0303, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 2.8658171354747783, |
|
"grad_norm": 0.04398440942168236, |
|
"learning_rate": 8.546566321730952e-05, |
|
"loss": 1.0228, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 2.8775618063303776, |
|
"grad_norm": 0.043202903121709824, |
|
"learning_rate": 8.499529633113829e-05, |
|
"loss": 1.0299, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.889306477185977, |
|
"grad_norm": 0.04326211288571358, |
|
"learning_rate": 8.452492944496708e-05, |
|
"loss": 1.0324, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 2.901051148041576, |
|
"grad_norm": 0.044613469392061234, |
|
"learning_rate": 8.405456255879587e-05, |
|
"loss": 1.0269, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 2.9127958188971754, |
|
"grad_norm": 0.04421741142868996, |
|
"learning_rate": 8.358419567262466e-05, |
|
"loss": 1.0193, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 2.9245404897527747, |
|
"grad_norm": 0.044949114322662354, |
|
"learning_rate": 8.311382878645344e-05, |
|
"loss": 1.0296, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 2.936285160608374, |
|
"grad_norm": 0.044719964265823364, |
|
"learning_rate": 8.264346190028222e-05, |
|
"loss": 1.031, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.9480298314639732, |
|
"grad_norm": 0.04360034689307213, |
|
"learning_rate": 8.217309501411101e-05, |
|
"loss": 1.0163, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 2.9597745023195725, |
|
"grad_norm": 0.0441979356110096, |
|
"learning_rate": 8.17027281279398e-05, |
|
"loss": 1.0325, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 2.971519173175172, |
|
"grad_norm": 0.044677652418613434, |
|
"learning_rate": 8.123236124176858e-05, |
|
"loss": 1.0286, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 2.983263844030771, |
|
"grad_norm": 0.042885322123765945, |
|
"learning_rate": 8.076199435559737e-05, |
|
"loss": 1.0365, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 2.9950085148863703, |
|
"grad_norm": 0.042082566767930984, |
|
"learning_rate": 8.029162746942616e-05, |
|
"loss": 1.0351, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.0067531857419696, |
|
"grad_norm": 0.04490746557712555, |
|
"learning_rate": 7.982126058325495e-05, |
|
"loss": 1.0112, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 3.018497856597569, |
|
"grad_norm": 0.048318084329366684, |
|
"learning_rate": 7.935089369708372e-05, |
|
"loss": 1.0144, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 3.030242527453168, |
|
"grad_norm": 0.04372231662273407, |
|
"learning_rate": 7.888052681091251e-05, |
|
"loss": 1.0081, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 3.0419871983087674, |
|
"grad_norm": 0.04528006911277771, |
|
"learning_rate": 7.84101599247413e-05, |
|
"loss": 1.0215, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 3.0537318691643667, |
|
"grad_norm": 0.04795797914266586, |
|
"learning_rate": 7.793979303857009e-05, |
|
"loss": 1.0226, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.065476540019966, |
|
"grad_norm": 0.04441961273550987, |
|
"learning_rate": 7.746942615239888e-05, |
|
"loss": 1.0298, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 3.077221210875565, |
|
"grad_norm": 0.044861868023872375, |
|
"learning_rate": 7.699905926622765e-05, |
|
"loss": 1.0158, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 3.0889658817311645, |
|
"grad_norm": 0.04549916088581085, |
|
"learning_rate": 7.652869238005644e-05, |
|
"loss": 1.0107, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 3.1007105525867638, |
|
"grad_norm": 0.04485148563981056, |
|
"learning_rate": 7.605832549388523e-05, |
|
"loss": 1.0295, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 3.112455223442363, |
|
"grad_norm": 0.0463709756731987, |
|
"learning_rate": 7.558795860771402e-05, |
|
"loss": 1.0237, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.1241998942979623, |
|
"grad_norm": 0.04507851600646973, |
|
"learning_rate": 7.51175917215428e-05, |
|
"loss": 1.0257, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 3.1359445651535616, |
|
"grad_norm": 0.04443085938692093, |
|
"learning_rate": 7.46472248353716e-05, |
|
"loss": 1.0161, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 3.147689236009161, |
|
"grad_norm": 0.04493951424956322, |
|
"learning_rate": 7.417685794920039e-05, |
|
"loss": 1.0103, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 3.15943390686476, |
|
"grad_norm": 0.04466501250863075, |
|
"learning_rate": 7.370649106302918e-05, |
|
"loss": 1.0184, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 3.1711785777203594, |
|
"grad_norm": 0.04674587398767471, |
|
"learning_rate": 7.323612417685795e-05, |
|
"loss": 1.0234, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.1829232485759587, |
|
"grad_norm": 0.04568205028772354, |
|
"learning_rate": 7.276575729068674e-05, |
|
"loss": 1.0177, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 3.194667919431558, |
|
"grad_norm": 0.04736079275608063, |
|
"learning_rate": 7.229539040451553e-05, |
|
"loss": 1.0238, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 3.206412590287157, |
|
"grad_norm": 0.04510754346847534, |
|
"learning_rate": 7.182502351834432e-05, |
|
"loss": 1.0239, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 3.2181572611427565, |
|
"grad_norm": 0.04676396772265434, |
|
"learning_rate": 7.13546566321731e-05, |
|
"loss": 1.0226, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 3.2299019319983557, |
|
"grad_norm": 0.04639539122581482, |
|
"learning_rate": 7.088428974600188e-05, |
|
"loss": 1.0304, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.241646602853955, |
|
"grad_norm": 0.046673484146595, |
|
"learning_rate": 7.041392285983067e-05, |
|
"loss": 1.029, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 3.2533912737095543, |
|
"grad_norm": 0.04434806853532791, |
|
"learning_rate": 6.994355597365946e-05, |
|
"loss": 1.0231, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 3.2651359445651535, |
|
"grad_norm": 0.046948377043008804, |
|
"learning_rate": 6.947318908748824e-05, |
|
"loss": 1.0212, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 3.276880615420753, |
|
"grad_norm": 0.045691922307014465, |
|
"learning_rate": 6.900282220131703e-05, |
|
"loss": 1.0228, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 3.288625286276352, |
|
"grad_norm": 0.04534591734409332, |
|
"learning_rate": 6.853245531514582e-05, |
|
"loss": 1.0311, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.3003699571319514, |
|
"grad_norm": 0.045218247920274734, |
|
"learning_rate": 6.80620884289746e-05, |
|
"loss": 1.0145, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 3.3121146279875506, |
|
"grad_norm": 0.046320728957653046, |
|
"learning_rate": 6.75917215428034e-05, |
|
"loss": 1.0184, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 3.32385929884315, |
|
"grad_norm": 0.04595513269305229, |
|
"learning_rate": 6.712135465663217e-05, |
|
"loss": 1.0181, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 3.335603969698749, |
|
"grad_norm": 0.04726444184780121, |
|
"learning_rate": 6.665098777046096e-05, |
|
"loss": 1.019, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 3.3473486405543484, |
|
"grad_norm": 0.0476396419107914, |
|
"learning_rate": 6.618062088428975e-05, |
|
"loss": 1.0109, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.3590933114099477, |
|
"grad_norm": 0.04619845747947693, |
|
"learning_rate": 6.571025399811854e-05, |
|
"loss": 1.0322, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 3.370837982265547, |
|
"grad_norm": 0.04548267647624016, |
|
"learning_rate": 6.523988711194731e-05, |
|
"loss": 1.0088, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 3.3825826531211463, |
|
"grad_norm": 0.04472291097044945, |
|
"learning_rate": 6.47695202257761e-05, |
|
"loss": 1.0371, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 3.3943273239767455, |
|
"grad_norm": 0.04602396488189697, |
|
"learning_rate": 6.429915333960489e-05, |
|
"loss": 1.0246, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 3.406071994832345, |
|
"grad_norm": 0.0454532653093338, |
|
"learning_rate": 6.382878645343368e-05, |
|
"loss": 1.0254, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.417816665687944, |
|
"grad_norm": 0.04494043067097664, |
|
"learning_rate": 6.335841956726247e-05, |
|
"loss": 1.0193, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 3.4295613365435433, |
|
"grad_norm": 0.045226361602544785, |
|
"learning_rate": 6.288805268109126e-05, |
|
"loss": 1.0075, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 3.4413060073991426, |
|
"grad_norm": 0.04578743502497673, |
|
"learning_rate": 6.241768579492005e-05, |
|
"loss": 1.0175, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 3.453050678254742, |
|
"grad_norm": 0.04741055890917778, |
|
"learning_rate": 6.194731890874884e-05, |
|
"loss": 1.025, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 3.464795349110341, |
|
"grad_norm": 0.046121254563331604, |
|
"learning_rate": 6.147695202257761e-05, |
|
"loss": 1.0122, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.4765400199659404, |
|
"grad_norm": 0.04572110250592232, |
|
"learning_rate": 6.10065851364064e-05, |
|
"loss": 1.0226, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 3.4882846908215397, |
|
"grad_norm": 0.04542776942253113, |
|
"learning_rate": 6.053621825023519e-05, |
|
"loss": 1.0139, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 3.500029361677139, |
|
"grad_norm": 0.04612453654408455, |
|
"learning_rate": 6.006585136406397e-05, |
|
"loss": 1.0137, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 3.5117740325327382, |
|
"grad_norm": 0.04599248990416527, |
|
"learning_rate": 5.959548447789276e-05, |
|
"loss": 1.0254, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 3.5235187033883375, |
|
"grad_norm": 0.047213006764650345, |
|
"learning_rate": 5.9125117591721544e-05, |
|
"loss": 1.0202, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.5352633742439368, |
|
"grad_norm": 0.04772693291306496, |
|
"learning_rate": 5.865475070555033e-05, |
|
"loss": 1.0158, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 3.547008045099536, |
|
"grad_norm": 0.04780668392777443, |
|
"learning_rate": 5.8184383819379116e-05, |
|
"loss": 1.0122, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 3.5587527159551353, |
|
"grad_norm": 0.04593056067824364, |
|
"learning_rate": 5.7714016933207905e-05, |
|
"loss": 1.0196, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 3.5704973868107346, |
|
"grad_norm": 0.046468161046504974, |
|
"learning_rate": 5.724365004703669e-05, |
|
"loss": 1.016, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 3.582242057666334, |
|
"grad_norm": 0.046613674610853195, |
|
"learning_rate": 5.6773283160865476e-05, |
|
"loss": 1.028, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.593986728521933, |
|
"grad_norm": 0.0453767292201519, |
|
"learning_rate": 5.6302916274694265e-05, |
|
"loss": 1.0179, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 3.6057313993775324, |
|
"grad_norm": 0.0448760949075222, |
|
"learning_rate": 5.583254938852305e-05, |
|
"loss": 1.0107, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 3.6174760702331317, |
|
"grad_norm": 0.04624709486961365, |
|
"learning_rate": 5.5362182502351837e-05, |
|
"loss": 1.0191, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 3.629220741088731, |
|
"grad_norm": 0.04776296019554138, |
|
"learning_rate": 5.489181561618062e-05, |
|
"loss": 1.0145, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 3.64096541194433, |
|
"grad_norm": 0.044639695435762405, |
|
"learning_rate": 5.442144873000941e-05, |
|
"loss": 1.0286, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.6527100827999295, |
|
"grad_norm": 0.04474237933754921, |
|
"learning_rate": 5.395108184383819e-05, |
|
"loss": 1.0202, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 3.6644547536555288, |
|
"grad_norm": 0.045259665697813034, |
|
"learning_rate": 5.348071495766698e-05, |
|
"loss": 1.0034, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 3.676199424511128, |
|
"grad_norm": 0.04631993547081947, |
|
"learning_rate": 5.3010348071495775e-05, |
|
"loss": 1.026, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 3.6879440953667273, |
|
"grad_norm": 0.04611456021666527, |
|
"learning_rate": 5.253998118532456e-05, |
|
"loss": 1.0084, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 3.6996887662223266, |
|
"grad_norm": 0.04521900787949562, |
|
"learning_rate": 5.206961429915335e-05, |
|
"loss": 1.019, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.711433437077926, |
|
"grad_norm": 0.04682457074522972, |
|
"learning_rate": 5.1599247412982136e-05, |
|
"loss": 1.029, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 3.723178107933525, |
|
"grad_norm": 0.04528072476387024, |
|
"learning_rate": 5.112888052681092e-05, |
|
"loss": 1.0381, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 3.7349227787891244, |
|
"grad_norm": 0.044861361384391785, |
|
"learning_rate": 5.065851364063971e-05, |
|
"loss": 1.0215, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 3.7466674496447236, |
|
"grad_norm": 0.04482056945562363, |
|
"learning_rate": 5.018814675446849e-05, |
|
"loss": 1.0113, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 3.758412120500323, |
|
"grad_norm": 0.046249981969594955, |
|
"learning_rate": 4.971777986829728e-05, |
|
"loss": 1.0162, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.7702155147102, |
|
"grad_norm": 0.04573667049407959, |
|
"learning_rate": 4.924741298212606e-05, |
|
"loss": 1.0207, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 3.7819601855657994, |
|
"grad_norm": 0.045440200716257095, |
|
"learning_rate": 4.877704609595485e-05, |
|
"loss": 1.0204, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 3.7937048564213987, |
|
"grad_norm": 0.043568406254053116, |
|
"learning_rate": 4.830667920978363e-05, |
|
"loss": 1.0218, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 3.805449527276998, |
|
"grad_norm": 0.04695621505379677, |
|
"learning_rate": 4.783631232361242e-05, |
|
"loss": 1.016, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 3.817194198132597, |
|
"grad_norm": 0.04511117562651634, |
|
"learning_rate": 4.7365945437441204e-05, |
|
"loss": 1.018, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 3.8289388689881965, |
|
"grad_norm": 0.0471058115363121, |
|
"learning_rate": 4.689557855126999e-05, |
|
"loss": 1.0265, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 3.8406835398437957, |
|
"grad_norm": 0.04763401299715042, |
|
"learning_rate": 4.6425211665098775e-05, |
|
"loss": 1.0207, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 3.852428210699395, |
|
"grad_norm": 0.04863814637064934, |
|
"learning_rate": 4.5954844778927564e-05, |
|
"loss": 1.0176, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 3.8641728815549943, |
|
"grad_norm": 0.04665295407176018, |
|
"learning_rate": 4.5484477892756347e-05, |
|
"loss": 1.0143, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 3.8759175524105935, |
|
"grad_norm": 0.04519110545516014, |
|
"learning_rate": 4.501411100658514e-05, |
|
"loss": 1.0206, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.887662223266193, |
|
"grad_norm": 0.04543546214699745, |
|
"learning_rate": 4.4543744120413925e-05, |
|
"loss": 1.0204, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 3.899406894121792, |
|
"grad_norm": 0.04484422877430916, |
|
"learning_rate": 4.4073377234242714e-05, |
|
"loss": 1.0166, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 3.9111515649773914, |
|
"grad_norm": 0.04460673779249191, |
|
"learning_rate": 4.3603010348071496e-05, |
|
"loss": 1.0116, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 3.9228962358329906, |
|
"grad_norm": 0.04780727997422218, |
|
"learning_rate": 4.3132643461900285e-05, |
|
"loss": 1.0127, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 3.93464090668859, |
|
"grad_norm": 0.045138854533433914, |
|
"learning_rate": 4.2662276575729074e-05, |
|
"loss": 1.0173, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.946385577544189, |
|
"grad_norm": 0.046860042959451675, |
|
"learning_rate": 4.219190968955786e-05, |
|
"loss": 1.0164, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 3.9581302483997884, |
|
"grad_norm": 0.04478363320231438, |
|
"learning_rate": 4.1721542803386646e-05, |
|
"loss": 1.0339, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 3.9698749192553877, |
|
"grad_norm": 0.04575762897729874, |
|
"learning_rate": 4.125117591721543e-05, |
|
"loss": 1.01, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 3.981619590110987, |
|
"grad_norm": 0.04523037001490593, |
|
"learning_rate": 4.078080903104422e-05, |
|
"loss": 1.0145, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 3.9933642609665863, |
|
"grad_norm": 0.04593048244714737, |
|
"learning_rate": 4.0310442144873e-05, |
|
"loss": 1.0131, |
|
"step": 34000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 42570, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.302351602279881e+20, |
|
"train_batch_size": 9, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|