|
{ |
|
"best_metric": 0.37550708651542664, |
|
"best_model_checkpoint": "cbb-3b/checkpoint-1098", |
|
"epoch": 2.9979522184300342, |
|
"eval_steps": 500, |
|
"global_step": 1098, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0027303754266211604, |
|
"grad_norm": 0.7549694776535034, |
|
"learning_rate": 1.360544217687075e-06, |
|
"loss": 1.2225, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005460750853242321, |
|
"grad_norm": 0.7538214325904846, |
|
"learning_rate": 2.72108843537415e-06, |
|
"loss": 1.2103, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008191126279863481, |
|
"grad_norm": 0.7328954935073853, |
|
"learning_rate": 4.081632653061224e-06, |
|
"loss": 1.1858, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.010921501706484642, |
|
"grad_norm": 0.7359272837638855, |
|
"learning_rate": 5.4421768707483e-06, |
|
"loss": 1.1885, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.013651877133105802, |
|
"grad_norm": 0.740386426448822, |
|
"learning_rate": 6.802721088435375e-06, |
|
"loss": 1.1781, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.016382252559726963, |
|
"grad_norm": 0.6984951496124268, |
|
"learning_rate": 8.163265306122448e-06, |
|
"loss": 1.1395, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01911262798634812, |
|
"grad_norm": 0.6689624786376953, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 1.137, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.021843003412969283, |
|
"grad_norm": 0.6134174466133118, |
|
"learning_rate": 1.08843537414966e-05, |
|
"loss": 1.1531, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.024573378839590442, |
|
"grad_norm": 0.5647606253623962, |
|
"learning_rate": 1.2244897959183674e-05, |
|
"loss": 1.1201, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.027303754266211604, |
|
"grad_norm": 0.541833221912384, |
|
"learning_rate": 1.360544217687075e-05, |
|
"loss": 1.0989, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.030034129692832763, |
|
"grad_norm": 0.4785626232624054, |
|
"learning_rate": 1.4965986394557824e-05, |
|
"loss": 1.0664, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.032764505119453925, |
|
"grad_norm": 0.42421552538871765, |
|
"learning_rate": 1.6326530612244897e-05, |
|
"loss": 1.057, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03549488054607509, |
|
"grad_norm": 0.384870707988739, |
|
"learning_rate": 1.7687074829931973e-05, |
|
"loss": 0.9794, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03822525597269624, |
|
"grad_norm": 0.31449463963508606, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 0.9485, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.040955631399317405, |
|
"grad_norm": 0.29094135761260986, |
|
"learning_rate": 2.0408163265306123e-05, |
|
"loss": 0.9581, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04368600682593857, |
|
"grad_norm": 0.2500893771648407, |
|
"learning_rate": 2.17687074829932e-05, |
|
"loss": 0.9363, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04641638225255973, |
|
"grad_norm": 0.2445881962776184, |
|
"learning_rate": 2.3129251700680275e-05, |
|
"loss": 0.9186, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.049146757679180884, |
|
"grad_norm": 0.2477860301733017, |
|
"learning_rate": 2.448979591836735e-05, |
|
"loss": 0.9099, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05187713310580205, |
|
"grad_norm": 0.24853268265724182, |
|
"learning_rate": 2.5850340136054425e-05, |
|
"loss": 0.912, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05460750853242321, |
|
"grad_norm": 0.22501873970031738, |
|
"learning_rate": 2.72108843537415e-05, |
|
"loss": 0.8836, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05733788395904437, |
|
"grad_norm": 0.21223071217536926, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 0.8651, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.060068259385665526, |
|
"grad_norm": 0.20172430574893951, |
|
"learning_rate": 2.9931972789115647e-05, |
|
"loss": 0.8393, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06279863481228669, |
|
"grad_norm": 0.17902718484401703, |
|
"learning_rate": 3.1292517006802724e-05, |
|
"loss": 0.8033, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06552901023890785, |
|
"grad_norm": 0.1813097447156906, |
|
"learning_rate": 3.265306122448979e-05, |
|
"loss": 0.8152, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06825938566552901, |
|
"grad_norm": 0.19280143082141876, |
|
"learning_rate": 3.401360544217687e-05, |
|
"loss": 0.8051, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07098976109215017, |
|
"grad_norm": 0.17157189548015594, |
|
"learning_rate": 3.5374149659863946e-05, |
|
"loss": 0.794, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07372013651877134, |
|
"grad_norm": 0.1467738002538681, |
|
"learning_rate": 3.673469387755102e-05, |
|
"loss": 0.7874, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07645051194539249, |
|
"grad_norm": 0.13913457095623016, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 0.7519, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07918088737201365, |
|
"grad_norm": 0.13179022073745728, |
|
"learning_rate": 3.945578231292517e-05, |
|
"loss": 0.76, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08191126279863481, |
|
"grad_norm": 0.1376553773880005, |
|
"learning_rate": 4.0816326530612245e-05, |
|
"loss": 0.7369, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08464163822525597, |
|
"grad_norm": 0.14040575921535492, |
|
"learning_rate": 4.217687074829932e-05, |
|
"loss": 0.7463, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08737201365187713, |
|
"grad_norm": 0.13217338919639587, |
|
"learning_rate": 4.35374149659864e-05, |
|
"loss": 0.7298, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0901023890784983, |
|
"grad_norm": 0.11285194754600525, |
|
"learning_rate": 4.4897959183673474e-05, |
|
"loss": 0.7134, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09283276450511946, |
|
"grad_norm": 0.10098642110824585, |
|
"learning_rate": 4.625850340136055e-05, |
|
"loss": 0.7238, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09556313993174062, |
|
"grad_norm": 0.10341370850801468, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.6908, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09829351535836177, |
|
"grad_norm": 0.09662918746471405, |
|
"learning_rate": 4.89795918367347e-05, |
|
"loss": 0.7, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10102389078498293, |
|
"grad_norm": 0.09548471122980118, |
|
"learning_rate": 5.034013605442177e-05, |
|
"loss": 0.7207, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1037542662116041, |
|
"grad_norm": 0.09512269496917725, |
|
"learning_rate": 5.170068027210885e-05, |
|
"loss": 0.7016, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10648464163822526, |
|
"grad_norm": 0.0912129282951355, |
|
"learning_rate": 5.3061224489795926e-05, |
|
"loss": 0.6891, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.10921501706484642, |
|
"grad_norm": 0.08661182224750519, |
|
"learning_rate": 5.4421768707483e-05, |
|
"loss": 0.6982, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11194539249146758, |
|
"grad_norm": 0.09124922007322311, |
|
"learning_rate": 5.5782312925170065e-05, |
|
"loss": 0.7051, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11467576791808874, |
|
"grad_norm": 0.09174500405788422, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 0.6978, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1174061433447099, |
|
"grad_norm": 0.0679943636059761, |
|
"learning_rate": 5.850340136054422e-05, |
|
"loss": 0.6889, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12013651877133105, |
|
"grad_norm": 0.07204238325357437, |
|
"learning_rate": 5.9863945578231295e-05, |
|
"loss": 0.704, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12286689419795221, |
|
"grad_norm": 0.08089234679937363, |
|
"learning_rate": 6.122448979591838e-05, |
|
"loss": 0.6838, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12559726962457338, |
|
"grad_norm": 0.09053023904561996, |
|
"learning_rate": 6.258503401360545e-05, |
|
"loss": 0.6754, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.12832764505119454, |
|
"grad_norm": 0.07513958215713501, |
|
"learning_rate": 6.394557823129253e-05, |
|
"loss": 0.6894, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1310580204778157, |
|
"grad_norm": 0.07480401545763016, |
|
"learning_rate": 6.530612244897959e-05, |
|
"loss": 0.6809, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13378839590443686, |
|
"grad_norm": 0.07617643475532532, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.697, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.13651877133105803, |
|
"grad_norm": 0.06744271516799927, |
|
"learning_rate": 6.802721088435374e-05, |
|
"loss": 0.6921, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1392491467576792, |
|
"grad_norm": 0.07185206562280655, |
|
"learning_rate": 6.938775510204082e-05, |
|
"loss": 0.6536, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.14197952218430035, |
|
"grad_norm": 0.07255382090806961, |
|
"learning_rate": 7.074829931972789e-05, |
|
"loss": 0.653, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1447098976109215, |
|
"grad_norm": 0.07474930584430695, |
|
"learning_rate": 7.210884353741498e-05, |
|
"loss": 0.6888, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.14744027303754267, |
|
"grad_norm": 0.0754467323422432, |
|
"learning_rate": 7.346938775510205e-05, |
|
"loss": 0.6818, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.15017064846416384, |
|
"grad_norm": 0.07726683467626572, |
|
"learning_rate": 7.482993197278913e-05, |
|
"loss": 0.6835, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.15290102389078497, |
|
"grad_norm": 0.07462974637746811, |
|
"learning_rate": 7.619047619047618e-05, |
|
"loss": 0.667, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.15563139931740613, |
|
"grad_norm": 0.06939647346735, |
|
"learning_rate": 7.755102040816327e-05, |
|
"loss": 0.6668, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1583617747440273, |
|
"grad_norm": 0.08218149840831757, |
|
"learning_rate": 7.891156462585034e-05, |
|
"loss": 0.6762, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.16109215017064846, |
|
"grad_norm": 0.0838819146156311, |
|
"learning_rate": 8.027210884353742e-05, |
|
"loss": 0.6685, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.16382252559726962, |
|
"grad_norm": 0.07441603392362595, |
|
"learning_rate": 8.163265306122449e-05, |
|
"loss": 0.6573, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16655290102389078, |
|
"grad_norm": 0.0746053010225296, |
|
"learning_rate": 8.299319727891157e-05, |
|
"loss": 0.6582, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.16928327645051194, |
|
"grad_norm": 0.08602144569158554, |
|
"learning_rate": 8.435374149659864e-05, |
|
"loss": 0.6547, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1720136518771331, |
|
"grad_norm": 0.08236663043498993, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 0.6081, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.17474402730375427, |
|
"grad_norm": 0.08744888752698898, |
|
"learning_rate": 8.70748299319728e-05, |
|
"loss": 0.6576, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.17747440273037543, |
|
"grad_norm": 0.08321461081504822, |
|
"learning_rate": 8.843537414965987e-05, |
|
"loss": 0.6137, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1802047781569966, |
|
"grad_norm": 0.08639347553253174, |
|
"learning_rate": 8.979591836734695e-05, |
|
"loss": 0.6579, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.18293515358361775, |
|
"grad_norm": 0.09154847264289856, |
|
"learning_rate": 9.115646258503402e-05, |
|
"loss": 0.6391, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.18566552901023892, |
|
"grad_norm": 0.1094379723072052, |
|
"learning_rate": 9.25170068027211e-05, |
|
"loss": 0.61, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.18839590443686008, |
|
"grad_norm": 0.11089900881052017, |
|
"learning_rate": 9.387755102040817e-05, |
|
"loss": 0.6452, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.19112627986348124, |
|
"grad_norm": 0.11615785956382751, |
|
"learning_rate": 9.523809523809524e-05, |
|
"loss": 0.6463, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19385665529010238, |
|
"grad_norm": 0.08359086513519287, |
|
"learning_rate": 9.659863945578231e-05, |
|
"loss": 0.6364, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.19658703071672354, |
|
"grad_norm": 0.0885363295674324, |
|
"learning_rate": 9.79591836734694e-05, |
|
"loss": 0.6092, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1993174061433447, |
|
"grad_norm": 0.09258115291595459, |
|
"learning_rate": 9.931972789115646e-05, |
|
"loss": 0.6229, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.20204778156996586, |
|
"grad_norm": 0.08969170600175858, |
|
"learning_rate": 0.00010068027210884355, |
|
"loss": 0.6173, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.20477815699658702, |
|
"grad_norm": 0.10124260932207108, |
|
"learning_rate": 0.00010204081632653062, |
|
"loss": 0.6414, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2075085324232082, |
|
"grad_norm": 0.08671349287033081, |
|
"learning_rate": 0.0001034013605442177, |
|
"loss": 0.6145, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.21023890784982935, |
|
"grad_norm": 0.09684890508651733, |
|
"learning_rate": 0.00010476190476190477, |
|
"loss": 0.6262, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2129692832764505, |
|
"grad_norm": 0.08690830320119858, |
|
"learning_rate": 0.00010612244897959185, |
|
"loss": 0.6316, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.21569965870307167, |
|
"grad_norm": 0.10457205027341843, |
|
"learning_rate": 0.00010748299319727892, |
|
"loss": 0.639, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.21843003412969283, |
|
"grad_norm": 0.10080841183662415, |
|
"learning_rate": 0.000108843537414966, |
|
"loss": 0.592, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.221160409556314, |
|
"grad_norm": 0.08858262002468109, |
|
"learning_rate": 0.00011020408163265306, |
|
"loss": 0.6471, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.22389078498293516, |
|
"grad_norm": 0.08708172291517258, |
|
"learning_rate": 0.00011156462585034013, |
|
"loss": 0.6222, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.22662116040955632, |
|
"grad_norm": 0.1075206995010376, |
|
"learning_rate": 0.00011292517006802721, |
|
"loss": 0.5961, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.22935153583617748, |
|
"grad_norm": 0.11788732558488846, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 0.609, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.23208191126279865, |
|
"grad_norm": 0.0956830084323883, |
|
"learning_rate": 0.00011564625850340137, |
|
"loss": 0.6042, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2348122866894198, |
|
"grad_norm": 0.09799174964427948, |
|
"learning_rate": 0.00011700680272108844, |
|
"loss": 0.6045, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.23754266211604094, |
|
"grad_norm": 0.09177012741565704, |
|
"learning_rate": 0.00011836734693877552, |
|
"loss": 0.6068, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2402730375426621, |
|
"grad_norm": 0.10407502949237823, |
|
"learning_rate": 0.00011972789115646259, |
|
"loss": 0.5993, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.24300341296928327, |
|
"grad_norm": 0.1047271341085434, |
|
"learning_rate": 0.00012108843537414967, |
|
"loss": 0.6144, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.24573378839590443, |
|
"grad_norm": 0.0866198018193245, |
|
"learning_rate": 0.00012244897959183676, |
|
"loss": 0.6203, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2484641638225256, |
|
"grad_norm": 0.09400323033332825, |
|
"learning_rate": 0.0001238095238095238, |
|
"loss": 0.6056, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.25119453924914675, |
|
"grad_norm": 0.0817628726363182, |
|
"learning_rate": 0.0001251700680272109, |
|
"loss": 0.5853, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.25392491467576794, |
|
"grad_norm": 0.09105788916349411, |
|
"learning_rate": 0.00012653061224489798, |
|
"loss": 0.5952, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2566552901023891, |
|
"grad_norm": 0.09889201074838638, |
|
"learning_rate": 0.00012789115646258506, |
|
"loss": 0.5994, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2593856655290102, |
|
"grad_norm": 0.09481444954872131, |
|
"learning_rate": 0.00012925170068027212, |
|
"loss": 0.5918, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2621160409556314, |
|
"grad_norm": 0.11730329692363739, |
|
"learning_rate": 0.00013061224489795917, |
|
"loss": 0.592, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.26484641638225254, |
|
"grad_norm": 0.15733356773853302, |
|
"learning_rate": 0.00013197278911564626, |
|
"loss": 0.5636, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2675767918088737, |
|
"grad_norm": 0.20819880068302155, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.6101, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.27030716723549486, |
|
"grad_norm": 0.18305541574954987, |
|
"learning_rate": 0.0001346938775510204, |
|
"loss": 0.5814, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.27303754266211605, |
|
"grad_norm": 0.10316050797700882, |
|
"learning_rate": 0.00013605442176870748, |
|
"loss": 0.5871, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2757679180887372, |
|
"grad_norm": 0.13305549323558807, |
|
"learning_rate": 0.00013741496598639456, |
|
"loss": 0.5846, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2784982935153584, |
|
"grad_norm": 0.0950811356306076, |
|
"learning_rate": 0.00013877551020408165, |
|
"loss": 0.5711, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2812286689419795, |
|
"grad_norm": 0.1198628693819046, |
|
"learning_rate": 0.0001401360544217687, |
|
"loss": 0.5914, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2839590443686007, |
|
"grad_norm": 0.08809541165828705, |
|
"learning_rate": 0.00014149659863945578, |
|
"loss": 0.5872, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.28668941979522183, |
|
"grad_norm": 0.09801067411899567, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 0.566, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.289419795221843, |
|
"grad_norm": 0.08766568452119827, |
|
"learning_rate": 0.00014421768707482995, |
|
"loss": 0.5808, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.29215017064846416, |
|
"grad_norm": 0.09133429825305939, |
|
"learning_rate": 0.000145578231292517, |
|
"loss": 0.6037, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.29488054607508535, |
|
"grad_norm": 0.09074072539806366, |
|
"learning_rate": 0.0001469387755102041, |
|
"loss": 0.5897, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2976109215017065, |
|
"grad_norm": 0.08934789896011353, |
|
"learning_rate": 0.00014829931972789117, |
|
"loss": 0.5998, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3003412969283277, |
|
"grad_norm": 0.08707176148891449, |
|
"learning_rate": 0.00014965986394557826, |
|
"loss": 0.5762, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3030716723549488, |
|
"grad_norm": 0.0948200449347496, |
|
"learning_rate": 0.0001510204081632653, |
|
"loss": 0.5734, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.30580204778156994, |
|
"grad_norm": 0.08889783173799515, |
|
"learning_rate": 0.00015238095238095237, |
|
"loss": 0.5867, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.30853242320819113, |
|
"grad_norm": 0.08152323961257935, |
|
"learning_rate": 0.00015374149659863945, |
|
"loss": 0.5527, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.31126279863481227, |
|
"grad_norm": 0.09019389748573303, |
|
"learning_rate": 0.00015510204081632654, |
|
"loss": 0.6007, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.31399317406143346, |
|
"grad_norm": 0.08257456868886948, |
|
"learning_rate": 0.00015646258503401362, |
|
"loss": 0.5569, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3167235494880546, |
|
"grad_norm": 0.08834348618984222, |
|
"learning_rate": 0.00015782312925170067, |
|
"loss": 0.6026, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3194539249146758, |
|
"grad_norm": 0.08634665608406067, |
|
"learning_rate": 0.00015918367346938776, |
|
"loss": 0.5926, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.3221843003412969, |
|
"grad_norm": 0.07867719978094101, |
|
"learning_rate": 0.00016054421768707484, |
|
"loss": 0.5707, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.3249146757679181, |
|
"grad_norm": 0.09690061956644058, |
|
"learning_rate": 0.00016190476190476192, |
|
"loss": 0.5793, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.32764505119453924, |
|
"grad_norm": 0.08276376128196716, |
|
"learning_rate": 0.00016326530612244898, |
|
"loss": 0.5459, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33037542662116043, |
|
"grad_norm": 0.09276240319013596, |
|
"learning_rate": 0.00016462585034013606, |
|
"loss": 0.5732, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.33310580204778156, |
|
"grad_norm": 0.0819844901561737, |
|
"learning_rate": 0.00016598639455782315, |
|
"loss": 0.5349, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.33583617747440275, |
|
"grad_norm": 0.08146791905164719, |
|
"learning_rate": 0.00016734693877551023, |
|
"loss": 0.5656, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3385665529010239, |
|
"grad_norm": 0.0879024788737297, |
|
"learning_rate": 0.00016870748299319729, |
|
"loss": 0.5758, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3412969283276451, |
|
"grad_norm": 0.07890356332063675, |
|
"learning_rate": 0.00017006802721088434, |
|
"loss": 0.5332, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3440273037542662, |
|
"grad_norm": 0.10049955546855927, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 0.5671, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.34675767918088735, |
|
"grad_norm": 0.09643971920013428, |
|
"learning_rate": 0.0001727891156462585, |
|
"loss": 0.5812, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.34948805460750854, |
|
"grad_norm": 0.08666185289621353, |
|
"learning_rate": 0.0001741496598639456, |
|
"loss": 0.5487, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.35221843003412967, |
|
"grad_norm": 0.1031438484787941, |
|
"learning_rate": 0.00017551020408163265, |
|
"loss": 0.5558, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.35494880546075086, |
|
"grad_norm": 0.09404855966567993, |
|
"learning_rate": 0.00017687074829931973, |
|
"loss": 0.5615, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.357679180887372, |
|
"grad_norm": 0.09127198159694672, |
|
"learning_rate": 0.00017823129251700681, |
|
"loss": 0.5656, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3604095563139932, |
|
"grad_norm": 0.08694130182266235, |
|
"learning_rate": 0.0001795918367346939, |
|
"loss": 0.5379, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3631399317406143, |
|
"grad_norm": 0.09511597454547882, |
|
"learning_rate": 0.00018095238095238095, |
|
"loss": 0.5535, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3658703071672355, |
|
"grad_norm": 0.09129739552736282, |
|
"learning_rate": 0.00018231292517006804, |
|
"loss": 0.5678, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.36860068259385664, |
|
"grad_norm": 0.09248334169387817, |
|
"learning_rate": 0.00018367346938775512, |
|
"loss": 0.5574, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.37133105802047783, |
|
"grad_norm": 0.09906318038702011, |
|
"learning_rate": 0.0001850340136054422, |
|
"loss": 0.5499, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.37406143344709897, |
|
"grad_norm": 0.09928654134273529, |
|
"learning_rate": 0.00018639455782312926, |
|
"loss": 0.5413, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.37679180887372016, |
|
"grad_norm": 0.07559472322463989, |
|
"learning_rate": 0.00018775510204081634, |
|
"loss": 0.5475, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3795221843003413, |
|
"grad_norm": 0.08408834040164948, |
|
"learning_rate": 0.00018911564625850343, |
|
"loss": 0.5432, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3822525597269625, |
|
"grad_norm": 0.08800789713859558, |
|
"learning_rate": 0.00019047619047619048, |
|
"loss": 0.5587, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3849829351535836, |
|
"grad_norm": 0.09994784742593765, |
|
"learning_rate": 0.00019183673469387756, |
|
"loss": 0.555, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.38771331058020475, |
|
"grad_norm": 0.07616768032312393, |
|
"learning_rate": 0.00019319727891156462, |
|
"loss": 0.5621, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.39044368600682594, |
|
"grad_norm": 0.10337202996015549, |
|
"learning_rate": 0.0001945578231292517, |
|
"loss": 0.5282, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3931740614334471, |
|
"grad_norm": 0.08526328206062317, |
|
"learning_rate": 0.0001959183673469388, |
|
"loss": 0.5439, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.39590443686006827, |
|
"grad_norm": 0.10538353770971298, |
|
"learning_rate": 0.00019727891156462587, |
|
"loss": 0.5481, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3986348122866894, |
|
"grad_norm": 0.07550521194934845, |
|
"learning_rate": 0.00019863945578231293, |
|
"loss": 0.5414, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4013651877133106, |
|
"grad_norm": 0.10045620799064636, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5382, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4040955631399317, |
|
"grad_norm": 0.08987366408109665, |
|
"learning_rate": 0.00019999971548969982, |
|
"loss": 0.5417, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.4068259385665529, |
|
"grad_norm": 0.0801815390586853, |
|
"learning_rate": 0.0001999988619604182, |
|
"loss": 0.5275, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.40955631399317405, |
|
"grad_norm": 0.08214934170246124, |
|
"learning_rate": 0.00019999743941701188, |
|
"loss": 0.543, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.41228668941979524, |
|
"grad_norm": 0.08146006613969803, |
|
"learning_rate": 0.00019999544786757545, |
|
"loss": 0.5409, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4150170648464164, |
|
"grad_norm": 0.08081945031881332, |
|
"learning_rate": 0.00019999288732344122, |
|
"loss": 0.5509, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.41774744027303756, |
|
"grad_norm": 0.09135357290506363, |
|
"learning_rate": 0.0001999897577991792, |
|
"loss": 0.518, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4204778156996587, |
|
"grad_norm": 0.09191333502531052, |
|
"learning_rate": 0.0001999860593125971, |
|
"loss": 0.5276, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4232081911262799, |
|
"grad_norm": 0.08375995606184006, |
|
"learning_rate": 0.00019998179188473997, |
|
"loss": 0.5319, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.425938566552901, |
|
"grad_norm": 0.08481922000646591, |
|
"learning_rate": 0.00019997695553989042, |
|
"loss": 0.5437, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4286689419795222, |
|
"grad_norm": 0.08768640458583832, |
|
"learning_rate": 0.00019997155030556822, |
|
"loss": 0.5445, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.43139931740614335, |
|
"grad_norm": 0.08787625283002853, |
|
"learning_rate": 0.00019996557621253027, |
|
"loss": 0.5479, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4341296928327645, |
|
"grad_norm": 0.09505843371152878, |
|
"learning_rate": 0.0001999590332947704, |
|
"loss": 0.5263, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.43686006825938567, |
|
"grad_norm": 0.10003377497196198, |
|
"learning_rate": 0.00019995192158951919, |
|
"loss": 0.5228, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4395904436860068, |
|
"grad_norm": 0.0675501748919487, |
|
"learning_rate": 0.00019994424113724363, |
|
"loss": 0.4977, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.442320819112628, |
|
"grad_norm": 0.09747067093849182, |
|
"learning_rate": 0.00019993599198164715, |
|
"loss": 0.5161, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.44505119453924913, |
|
"grad_norm": 0.0837995857000351, |
|
"learning_rate": 0.0001999271741696691, |
|
"loss": 0.5243, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4477815699658703, |
|
"grad_norm": 0.0793512687087059, |
|
"learning_rate": 0.00019991778775148465, |
|
"loss": 0.5141, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.45051194539249145, |
|
"grad_norm": 0.07802822440862656, |
|
"learning_rate": 0.00019990783278050448, |
|
"loss": 0.515, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.45324232081911264, |
|
"grad_norm": 0.08355724066495895, |
|
"learning_rate": 0.0001998973093133744, |
|
"loss": 0.5176, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4559726962457338, |
|
"grad_norm": 0.08045308291912079, |
|
"learning_rate": 0.00019988621740997512, |
|
"loss": 0.5151, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.45870307167235497, |
|
"grad_norm": 0.07589907944202423, |
|
"learning_rate": 0.00019987455713342187, |
|
"loss": 0.5249, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4614334470989761, |
|
"grad_norm": 0.08553771674633026, |
|
"learning_rate": 0.000199862328550064, |
|
"loss": 0.5485, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4641638225255973, |
|
"grad_norm": 0.08599649369716644, |
|
"learning_rate": 0.00019984953172948465, |
|
"loss": 0.53, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4668941979522184, |
|
"grad_norm": 0.06906479597091675, |
|
"learning_rate": 0.0001998361667445004, |
|
"loss": 0.5336, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4696245733788396, |
|
"grad_norm": 0.07526392489671707, |
|
"learning_rate": 0.00019982223367116076, |
|
"loss": 0.5013, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.47235494880546075, |
|
"grad_norm": 0.0722610279917717, |
|
"learning_rate": 0.00019980773258874778, |
|
"loss": 0.5217, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4750853242320819, |
|
"grad_norm": 0.0773632749915123, |
|
"learning_rate": 0.00019979266357977564, |
|
"loss": 0.5184, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4778156996587031, |
|
"grad_norm": 0.07160216569900513, |
|
"learning_rate": 0.00019977702672999007, |
|
"loss": 0.5009, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4805460750853242, |
|
"grad_norm": 0.0764177069067955, |
|
"learning_rate": 0.00019976082212836793, |
|
"loss": 0.5126, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4832764505119454, |
|
"grad_norm": 0.07116773724555969, |
|
"learning_rate": 0.0001997440498671168, |
|
"loss": 0.514, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.48600682593856653, |
|
"grad_norm": 0.08402683585882187, |
|
"learning_rate": 0.00019972671004167433, |
|
"loss": 0.5133, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4887372013651877, |
|
"grad_norm": 0.07286666333675385, |
|
"learning_rate": 0.00019970880275070762, |
|
"loss": 0.5221, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.49146757679180886, |
|
"grad_norm": 0.08641263097524643, |
|
"learning_rate": 0.00019969032809611287, |
|
"loss": 0.4959, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49419795221843005, |
|
"grad_norm": 0.08849737048149109, |
|
"learning_rate": 0.0001996712861830147, |
|
"loss": 0.4952, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4969283276450512, |
|
"grad_norm": 0.08661802858114243, |
|
"learning_rate": 0.00019965167711976552, |
|
"loss": 0.5023, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.49965870307167237, |
|
"grad_norm": 0.08355259150266647, |
|
"learning_rate": 0.0001996315010179449, |
|
"loss": 0.5235, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5023890784982935, |
|
"grad_norm": 0.07524804770946503, |
|
"learning_rate": 0.00019961075799235903, |
|
"loss": 0.5143, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5051194539249146, |
|
"grad_norm": 0.08126044273376465, |
|
"learning_rate": 0.00019958944816104, |
|
"loss": 0.496, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5078498293515359, |
|
"grad_norm": 0.08320248872041702, |
|
"learning_rate": 0.00019956757164524516, |
|
"loss": 0.5106, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.510580204778157, |
|
"grad_norm": 0.07375509291887283, |
|
"learning_rate": 0.00019954512856945632, |
|
"loss": 0.4811, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5133105802047782, |
|
"grad_norm": 0.07187776267528534, |
|
"learning_rate": 0.00019952211906137932, |
|
"loss": 0.5104, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5160409556313993, |
|
"grad_norm": 0.07441398501396179, |
|
"learning_rate": 0.00019949854325194294, |
|
"loss": 0.5304, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5187713310580204, |
|
"grad_norm": 0.07976701855659485, |
|
"learning_rate": 0.00019947440127529836, |
|
"loss": 0.4945, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5215017064846417, |
|
"grad_norm": 0.07280328124761581, |
|
"learning_rate": 0.00019944969326881845, |
|
"loss": 0.4848, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5242320819112628, |
|
"grad_norm": 0.07618428766727448, |
|
"learning_rate": 0.00019942441937309684, |
|
"loss": 0.4858, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5269624573378839, |
|
"grad_norm": 0.0665225088596344, |
|
"learning_rate": 0.00019939857973194717, |
|
"loss": 0.4955, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5296928327645051, |
|
"grad_norm": 0.08379194140434265, |
|
"learning_rate": 0.0001993721744924024, |
|
"loss": 0.5067, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5324232081911263, |
|
"grad_norm": 0.07564423978328705, |
|
"learning_rate": 0.00019934520380471372, |
|
"loss": 0.5159, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5351535836177475, |
|
"grad_norm": 0.07225633412599564, |
|
"learning_rate": 0.0001993176678223499, |
|
"loss": 0.5144, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5378839590443686, |
|
"grad_norm": 0.07224252074956894, |
|
"learning_rate": 0.0001992895667019964, |
|
"loss": 0.4859, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5406143344709897, |
|
"grad_norm": 0.079926997423172, |
|
"learning_rate": 0.0001992609006035543, |
|
"loss": 0.4872, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.543344709897611, |
|
"grad_norm": 0.08545151352882385, |
|
"learning_rate": 0.0001992316696901397, |
|
"loss": 0.5105, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5460750853242321, |
|
"grad_norm": 0.08008193224668503, |
|
"learning_rate": 0.00019920187412808248, |
|
"loss": 0.4903, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5488054607508532, |
|
"grad_norm": 0.06717066466808319, |
|
"learning_rate": 0.0001991715140869255, |
|
"loss": 0.5037, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5515358361774744, |
|
"grad_norm": 0.08613338321447372, |
|
"learning_rate": 0.00019914058973942368, |
|
"loss": 0.4999, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5542662116040956, |
|
"grad_norm": 0.07288234680891037, |
|
"learning_rate": 0.00019910910126154293, |
|
"loss": 0.5019, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5569965870307167, |
|
"grad_norm": 0.07831370085477829, |
|
"learning_rate": 0.00019907704883245916, |
|
"loss": 0.4595, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5597269624573379, |
|
"grad_norm": 0.0916525200009346, |
|
"learning_rate": 0.00019904443263455728, |
|
"loss": 0.4994, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.562457337883959, |
|
"grad_norm": 0.07431495934724808, |
|
"learning_rate": 0.00019901125285343022, |
|
"loss": 0.5059, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5651877133105802, |
|
"grad_norm": 0.07864730060100555, |
|
"learning_rate": 0.0001989775096778777, |
|
"loss": 0.4824, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5679180887372014, |
|
"grad_norm": 0.06928006559610367, |
|
"learning_rate": 0.0001989432032999054, |
|
"loss": 0.4887, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5706484641638225, |
|
"grad_norm": 0.07330948859453201, |
|
"learning_rate": 0.0001989083339147237, |
|
"loss": 0.4804, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5733788395904437, |
|
"grad_norm": 0.07905860990285873, |
|
"learning_rate": 0.0001988729017207465, |
|
"loss": 0.5126, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5761092150170648, |
|
"grad_norm": 0.07062509655952454, |
|
"learning_rate": 0.00019883690691959035, |
|
"loss": 0.5063, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.578839590443686, |
|
"grad_norm": 0.071404367685318, |
|
"learning_rate": 0.00019880034971607308, |
|
"loss": 0.495, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5815699658703072, |
|
"grad_norm": 0.0727284774184227, |
|
"learning_rate": 0.00019876323031821266, |
|
"loss": 0.4994, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5843003412969283, |
|
"grad_norm": 0.07198608666658401, |
|
"learning_rate": 0.00019872554893722618, |
|
"loss": 0.4903, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5870307167235495, |
|
"grad_norm": 0.07637451589107513, |
|
"learning_rate": 0.0001986873057875284, |
|
"loss": 0.5057, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5897610921501707, |
|
"grad_norm": 0.06596951186656952, |
|
"learning_rate": 0.00019864850108673073, |
|
"loss": 0.4932, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5924914675767918, |
|
"grad_norm": 0.06999579071998596, |
|
"learning_rate": 0.0001986091350556399, |
|
"loss": 0.4887, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.595221843003413, |
|
"grad_norm": 0.06687980890274048, |
|
"learning_rate": 0.00019856920791825683, |
|
"loss": 0.472, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5979522184300341, |
|
"grad_norm": 0.07001427561044693, |
|
"learning_rate": 0.00019852871990177503, |
|
"loss": 0.4692, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6006825938566553, |
|
"grad_norm": 0.06714101880788803, |
|
"learning_rate": 0.00019848767123657976, |
|
"loss": 0.4813, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6034129692832765, |
|
"grad_norm": 0.07292049378156662, |
|
"learning_rate": 0.0001984460621562463, |
|
"loss": 0.4885, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6061433447098976, |
|
"grad_norm": 0.06814104318618774, |
|
"learning_rate": 0.00019840389289753896, |
|
"loss": 0.4938, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6088737201365187, |
|
"grad_norm": 0.06866355985403061, |
|
"learning_rate": 0.00019836116370040944, |
|
"loss": 0.4776, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6116040955631399, |
|
"grad_norm": 0.07145702093839645, |
|
"learning_rate": 0.00019831787480799568, |
|
"loss": 0.4883, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6143344709897611, |
|
"grad_norm": 0.06319977343082428, |
|
"learning_rate": 0.00019827402646662047, |
|
"loss": 0.4882, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6170648464163823, |
|
"grad_norm": 0.08186688274145126, |
|
"learning_rate": 0.0001982296189257898, |
|
"loss": 0.4917, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6197952218430034, |
|
"grad_norm": 0.06892900168895721, |
|
"learning_rate": 0.00019818465243819184, |
|
"loss": 0.4808, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6225255972696245, |
|
"grad_norm": 0.0752168744802475, |
|
"learning_rate": 0.00019813912725969509, |
|
"loss": 0.4858, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6252559726962458, |
|
"grad_norm": 0.08079662919044495, |
|
"learning_rate": 0.0001980930436493472, |
|
"loss": 0.5101, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6279863481228669, |
|
"grad_norm": 0.0717153325676918, |
|
"learning_rate": 0.00019804640186937343, |
|
"loss": 0.4799, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.630716723549488, |
|
"grad_norm": 0.08962002396583557, |
|
"learning_rate": 0.0001979992021851751, |
|
"loss": 0.5067, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6334470989761092, |
|
"grad_norm": 0.08904211223125458, |
|
"learning_rate": 0.00019795144486532814, |
|
"loss": 0.4725, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6361774744027304, |
|
"grad_norm": 0.06842932850122452, |
|
"learning_rate": 0.00019790313018158156, |
|
"loss": 0.4996, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6389078498293516, |
|
"grad_norm": 0.08361311256885529, |
|
"learning_rate": 0.0001978542584088558, |
|
"loss": 0.4945, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6416382252559727, |
|
"grad_norm": 0.07219431549310684, |
|
"learning_rate": 0.00019780482982524142, |
|
"loss": 0.4488, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6443686006825938, |
|
"grad_norm": 0.07717226445674896, |
|
"learning_rate": 0.00019775484471199715, |
|
"loss": 0.4814, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.647098976109215, |
|
"grad_norm": 0.07770105451345444, |
|
"learning_rate": 0.0001977043033535486, |
|
"loss": 0.4731, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6498293515358362, |
|
"grad_norm": 0.06878919899463654, |
|
"learning_rate": 0.00019765320603748655, |
|
"loss": 0.4833, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6525597269624573, |
|
"grad_norm": 0.07085343450307846, |
|
"learning_rate": 0.0001976015530545652, |
|
"loss": 0.4907, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6552901023890785, |
|
"grad_norm": 0.07935165613889694, |
|
"learning_rate": 0.0001975493446987007, |
|
"loss": 0.4794, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6580204778156996, |
|
"grad_norm": 0.06543820351362228, |
|
"learning_rate": 0.00019749658126696934, |
|
"loss": 0.4906, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6607508532423209, |
|
"grad_norm": 0.07727054506540298, |
|
"learning_rate": 0.00019744326305960595, |
|
"loss": 0.4868, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.663481228668942, |
|
"grad_norm": 0.06668544560670853, |
|
"learning_rate": 0.00019738939038000205, |
|
"loss": 0.475, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6662116040955631, |
|
"grad_norm": 0.07048569619655609, |
|
"learning_rate": 0.00019733496353470433, |
|
"loss": 0.4878, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6689419795221843, |
|
"grad_norm": 0.07110477238893509, |
|
"learning_rate": 0.00019727998283341274, |
|
"loss": 0.4663, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6716723549488055, |
|
"grad_norm": 0.07245586067438126, |
|
"learning_rate": 0.00019722444858897878, |
|
"loss": 0.4899, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6744027303754266, |
|
"grad_norm": 0.07484875619411469, |
|
"learning_rate": 0.00019716836111740378, |
|
"loss": 0.4831, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6771331058020478, |
|
"grad_norm": 0.07812648266553879, |
|
"learning_rate": 0.00019711172073783696, |
|
"loss": 0.4654, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6798634812286689, |
|
"grad_norm": 0.060632165521383286, |
|
"learning_rate": 0.00019705452777257377, |
|
"loss": 0.4706, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6825938566552902, |
|
"grad_norm": 0.07092992216348648, |
|
"learning_rate": 0.000196996782547054, |
|
"loss": 0.4792, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6853242320819113, |
|
"grad_norm": 0.06629595905542374, |
|
"learning_rate": 0.00019693848538985983, |
|
"loss": 0.4791, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.6880546075085324, |
|
"grad_norm": 0.06915664672851562, |
|
"learning_rate": 0.00019687963663271409, |
|
"loss": 0.4623, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6907849829351536, |
|
"grad_norm": 0.0694665014743805, |
|
"learning_rate": 0.00019682023661047836, |
|
"loss": 0.48, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.6935153583617747, |
|
"grad_norm": 0.06899196654558182, |
|
"learning_rate": 0.00019676028566115102, |
|
"loss": 0.4855, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6962457337883959, |
|
"grad_norm": 0.0740811675786972, |
|
"learning_rate": 0.00019669978412586528, |
|
"loss": 0.4833, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6989761092150171, |
|
"grad_norm": 0.06517481803894043, |
|
"learning_rate": 0.00019663873234888733, |
|
"loss": 0.4523, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7017064846416382, |
|
"grad_norm": 0.06481153517961502, |
|
"learning_rate": 0.0001965771306776144, |
|
"loss": 0.4689, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7044368600682593, |
|
"grad_norm": 0.06042364612221718, |
|
"learning_rate": 0.00019651497946257266, |
|
"loss": 0.4757, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7071672354948806, |
|
"grad_norm": 0.0717868059873581, |
|
"learning_rate": 0.00019645227905741534, |
|
"loss": 0.4773, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7098976109215017, |
|
"grad_norm": 0.06427443772554398, |
|
"learning_rate": 0.00019638902981892068, |
|
"loss": 0.4875, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7126279863481229, |
|
"grad_norm": 0.07786547392606735, |
|
"learning_rate": 0.00019632523210698987, |
|
"loss": 0.4758, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.715358361774744, |
|
"grad_norm": 0.07115910202264786, |
|
"learning_rate": 0.00019626088628464498, |
|
"loss": 0.4651, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7180887372013652, |
|
"grad_norm": 0.06626811623573303, |
|
"learning_rate": 0.00019619599271802706, |
|
"loss": 0.4873, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7208191126279864, |
|
"grad_norm": 0.07854583859443665, |
|
"learning_rate": 0.00019613055177639384, |
|
"loss": 0.4945, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.7235494880546075, |
|
"grad_norm": 0.0847892239689827, |
|
"learning_rate": 0.00019606456383211777, |
|
"loss": 0.4671, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7262798634812286, |
|
"grad_norm": 0.06735772639513016, |
|
"learning_rate": 0.00019599802926068384, |
|
"loss": 0.4767, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.7290102389078499, |
|
"grad_norm": 0.07502768933773041, |
|
"learning_rate": 0.00019593094844068748, |
|
"loss": 0.462, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.731740614334471, |
|
"grad_norm": 0.07276903837919235, |
|
"learning_rate": 0.00019586332175383238, |
|
"loss": 0.4754, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7344709897610922, |
|
"grad_norm": 0.07755447924137115, |
|
"learning_rate": 0.00019579514958492826, |
|
"loss": 0.492, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.7372013651877133, |
|
"grad_norm": 0.07876396179199219, |
|
"learning_rate": 0.0001957264323218889, |
|
"loss": 0.4737, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7399317406143344, |
|
"grad_norm": 0.07997962832450867, |
|
"learning_rate": 0.0001956571703557296, |
|
"loss": 0.4592, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7426621160409557, |
|
"grad_norm": 0.08079583197832108, |
|
"learning_rate": 0.00019558736408056525, |
|
"loss": 0.473, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7453924914675768, |
|
"grad_norm": 0.0736604854464531, |
|
"learning_rate": 0.00019551701389360795, |
|
"loss": 0.4741, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7481228668941979, |
|
"grad_norm": 0.0741550549864769, |
|
"learning_rate": 0.00019544612019516472, |
|
"loss": 0.4611, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7508532423208191, |
|
"grad_norm": 0.06802786141633987, |
|
"learning_rate": 0.00019537468338863537, |
|
"loss": 0.4621, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7535836177474403, |
|
"grad_norm": 0.06499720364809036, |
|
"learning_rate": 0.00019530270388050998, |
|
"loss": 0.4676, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7563139931740614, |
|
"grad_norm": 0.06809037923812866, |
|
"learning_rate": 0.00019523018208036677, |
|
"loss": 0.475, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7590443686006826, |
|
"grad_norm": 0.06455886363983154, |
|
"learning_rate": 0.0001951571184008698, |
|
"loss": 0.4807, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.7617747440273037, |
|
"grad_norm": 0.06833679229021072, |
|
"learning_rate": 0.00019508351325776642, |
|
"loss": 0.4751, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.764505119453925, |
|
"grad_norm": 0.07593976706266403, |
|
"learning_rate": 0.00019500936706988502, |
|
"loss": 0.4714, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7672354948805461, |
|
"grad_norm": 0.0687364712357521, |
|
"learning_rate": 0.00019493468025913276, |
|
"loss": 0.4575, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.7699658703071672, |
|
"grad_norm": 0.07183225452899933, |
|
"learning_rate": 0.00019485945325049288, |
|
"loss": 0.4815, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.7726962457337884, |
|
"grad_norm": 0.06775309145450592, |
|
"learning_rate": 0.00019478368647202264, |
|
"loss": 0.4543, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7754266211604095, |
|
"grad_norm": 0.06261654198169708, |
|
"learning_rate": 0.00019470738035485058, |
|
"loss": 0.4724, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7781569965870307, |
|
"grad_norm": 0.06674676388502121, |
|
"learning_rate": 0.00019463053533317425, |
|
"loss": 0.4667, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7808873720136519, |
|
"grad_norm": 0.06266098469495773, |
|
"learning_rate": 0.0001945531518442576, |
|
"loss": 0.4614, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.783617747440273, |
|
"grad_norm": 0.06769178062677383, |
|
"learning_rate": 0.0001944752303284287, |
|
"loss": 0.4609, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7863481228668942, |
|
"grad_norm": 0.07618339359760284, |
|
"learning_rate": 0.00019439677122907697, |
|
"loss": 0.4822, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7890784982935154, |
|
"grad_norm": 0.06216439977288246, |
|
"learning_rate": 0.00019431777499265087, |
|
"loss": 0.4573, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.7918088737201365, |
|
"grad_norm": 0.06998062878847122, |
|
"learning_rate": 0.00019423824206865527, |
|
"loss": 0.4683, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7945392491467577, |
|
"grad_norm": 0.06178448721766472, |
|
"learning_rate": 0.00019415817290964883, |
|
"loss": 0.4643, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.7972696245733788, |
|
"grad_norm": 0.06611185520887375, |
|
"learning_rate": 0.00019407756797124164, |
|
"loss": 0.4712, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.06682468205690384, |
|
"learning_rate": 0.00019399642771209238, |
|
"loss": 0.474, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8027303754266212, |
|
"grad_norm": 0.0632803738117218, |
|
"learning_rate": 0.00019391475259390584, |
|
"loss": 0.4776, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8054607508532423, |
|
"grad_norm": 0.06498962640762329, |
|
"learning_rate": 0.0001938325430814302, |
|
"loss": 0.4735, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8081911262798634, |
|
"grad_norm": 0.06621643900871277, |
|
"learning_rate": 0.00019374979964245463, |
|
"loss": 0.4785, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8109215017064847, |
|
"grad_norm": 0.05847141519188881, |
|
"learning_rate": 0.00019366652274780628, |
|
"loss": 0.4702, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.8136518771331058, |
|
"grad_norm": 0.06962229311466217, |
|
"learning_rate": 0.00019358271287134784, |
|
"loss": 0.4612, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.816382252559727, |
|
"grad_norm": 0.06132384389638901, |
|
"learning_rate": 0.00019349837048997478, |
|
"loss": 0.4453, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.8191126279863481, |
|
"grad_norm": 0.06574399024248123, |
|
"learning_rate": 0.00019341349608361267, |
|
"loss": 0.4545, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8218430034129692, |
|
"grad_norm": 0.06561442464590073, |
|
"learning_rate": 0.00019332809013521428, |
|
"loss": 0.4619, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8245733788395905, |
|
"grad_norm": 0.06309875100851059, |
|
"learning_rate": 0.00019324215313075706, |
|
"loss": 0.465, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.8273037542662116, |
|
"grad_norm": 0.06544878333806992, |
|
"learning_rate": 0.00019315568555924035, |
|
"loss": 0.4571, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.8300341296928327, |
|
"grad_norm": 0.07011238485574722, |
|
"learning_rate": 0.0001930686879126824, |
|
"loss": 0.4579, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.8327645051194539, |
|
"grad_norm": 0.06445574760437012, |
|
"learning_rate": 0.0001929811606861177, |
|
"loss": 0.4695, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.8354948805460751, |
|
"grad_norm": 0.061930734664201736, |
|
"learning_rate": 0.00019289310437759427, |
|
"loss": 0.4449, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.8382252559726963, |
|
"grad_norm": 0.0658838227391243, |
|
"learning_rate": 0.00019280451948817059, |
|
"loss": 0.4726, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.8409556313993174, |
|
"grad_norm": 0.06302706897258759, |
|
"learning_rate": 0.00019271540652191296, |
|
"loss": 0.447, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.8436860068259385, |
|
"grad_norm": 0.08308806270360947, |
|
"learning_rate": 0.0001926257659858925, |
|
"loss": 0.4605, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.8464163822525598, |
|
"grad_norm": 0.06508838385343552, |
|
"learning_rate": 0.00019253559839018235, |
|
"loss": 0.4778, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8491467576791809, |
|
"grad_norm": 0.07429094612598419, |
|
"learning_rate": 0.00019244490424785468, |
|
"loss": 0.4659, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.851877133105802, |
|
"grad_norm": 0.07138285785913467, |
|
"learning_rate": 0.00019235368407497788, |
|
"loss": 0.4564, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8546075085324232, |
|
"grad_norm": 0.07202211022377014, |
|
"learning_rate": 0.00019226193839061347, |
|
"loss": 0.4377, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8573378839590444, |
|
"grad_norm": 0.0779070258140564, |
|
"learning_rate": 0.0001921696677168133, |
|
"loss": 0.4532, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.8600682593856656, |
|
"grad_norm": 0.07717596739530563, |
|
"learning_rate": 0.00019207687257861655, |
|
"loss": 0.4654, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8627986348122867, |
|
"grad_norm": 0.0708346962928772, |
|
"learning_rate": 0.00019198355350404667, |
|
"loss": 0.4584, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8655290102389078, |
|
"grad_norm": 0.0656716600060463, |
|
"learning_rate": 0.00019188971102410837, |
|
"loss": 0.4504, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.868259385665529, |
|
"grad_norm": 0.06869971752166748, |
|
"learning_rate": 0.00019179534567278475, |
|
"loss": 0.4592, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.8709897610921502, |
|
"grad_norm": 0.06358928978443146, |
|
"learning_rate": 0.00019170045798703406, |
|
"loss": 0.4376, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.8737201365187713, |
|
"grad_norm": 0.06602993607521057, |
|
"learning_rate": 0.0001916050485067868, |
|
"loss": 0.4692, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8764505119453925, |
|
"grad_norm": 0.06115058436989784, |
|
"learning_rate": 0.00019150911777494258, |
|
"loss": 0.462, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.8791808873720136, |
|
"grad_norm": 0.06374403834342957, |
|
"learning_rate": 0.00019141266633736697, |
|
"loss": 0.4325, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.8819112627986349, |
|
"grad_norm": 0.06459895521402359, |
|
"learning_rate": 0.0001913156947428886, |
|
"loss": 0.4605, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.884641638225256, |
|
"grad_norm": 0.06160016357898712, |
|
"learning_rate": 0.00019121820354329577, |
|
"loss": 0.4604, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.8873720136518771, |
|
"grad_norm": 0.06345291435718536, |
|
"learning_rate": 0.00019112019329333346, |
|
"loss": 0.4565, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8901023890784983, |
|
"grad_norm": 0.06534894555807114, |
|
"learning_rate": 0.00019102166455070024, |
|
"loss": 0.4619, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8928327645051195, |
|
"grad_norm": 0.06186550110578537, |
|
"learning_rate": 0.00019092261787604492, |
|
"loss": 0.4477, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.8955631399317406, |
|
"grad_norm": 0.058699868619441986, |
|
"learning_rate": 0.00019082305383296352, |
|
"loss": 0.4484, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.8982935153583618, |
|
"grad_norm": 0.05798410624265671, |
|
"learning_rate": 0.00019072297298799589, |
|
"loss": 0.4605, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9010238907849829, |
|
"grad_norm": 0.06147664040327072, |
|
"learning_rate": 0.00019062237591062272, |
|
"loss": 0.4489, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.903754266211604, |
|
"grad_norm": 0.06032559648156166, |
|
"learning_rate": 0.00019052126317326207, |
|
"loss": 0.4412, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9064846416382253, |
|
"grad_norm": 0.06326504051685333, |
|
"learning_rate": 0.00019041963535126625, |
|
"loss": 0.4547, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.9092150170648464, |
|
"grad_norm": 0.06808637827634811, |
|
"learning_rate": 0.0001903174930229185, |
|
"loss": 0.4513, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.9119453924914676, |
|
"grad_norm": 0.06384904682636261, |
|
"learning_rate": 0.00019021483676942973, |
|
"loss": 0.4542, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.9146757679180887, |
|
"grad_norm": 0.07148803770542145, |
|
"learning_rate": 0.00019011166717493517, |
|
"loss": 0.4569, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9174061433447099, |
|
"grad_norm": 0.06942867487668991, |
|
"learning_rate": 0.000190007984826491, |
|
"loss": 0.4496, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.9201365187713311, |
|
"grad_norm": 0.06153569370508194, |
|
"learning_rate": 0.00018990379031407124, |
|
"loss": 0.464, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.9228668941979522, |
|
"grad_norm": 0.07417679578065872, |
|
"learning_rate": 0.00018979908423056408, |
|
"loss": 0.4396, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.9255972696245733, |
|
"grad_norm": 0.06745341420173645, |
|
"learning_rate": 0.0001896938671717687, |
|
"loss": 0.4584, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.9283276450511946, |
|
"grad_norm": 0.060262780636548996, |
|
"learning_rate": 0.00018958813973639184, |
|
"loss": 0.4363, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9310580204778157, |
|
"grad_norm": 0.06427337974309921, |
|
"learning_rate": 0.0001894819025260444, |
|
"loss": 0.4352, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.9337883959044369, |
|
"grad_norm": 0.06150776520371437, |
|
"learning_rate": 0.00018937515614523797, |
|
"loss": 0.4644, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.936518771331058, |
|
"grad_norm": 0.06864424049854279, |
|
"learning_rate": 0.0001892679012013815, |
|
"loss": 0.4608, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.9392491467576792, |
|
"grad_norm": 0.06174071133136749, |
|
"learning_rate": 0.00018916013830477766, |
|
"loss": 0.4402, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.9419795221843004, |
|
"grad_norm": 0.0684589147567749, |
|
"learning_rate": 0.00018905186806861957, |
|
"loss": 0.4569, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.9447098976109215, |
|
"grad_norm": 0.05750627443194389, |
|
"learning_rate": 0.00018894309110898712, |
|
"loss": 0.4522, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.9474402730375426, |
|
"grad_norm": 0.0697883740067482, |
|
"learning_rate": 0.00018883380804484367, |
|
"loss": 0.4594, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.9501706484641638, |
|
"grad_norm": 0.06613462418317795, |
|
"learning_rate": 0.00018872401949803237, |
|
"loss": 0.4459, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.952901023890785, |
|
"grad_norm": 0.06346327811479568, |
|
"learning_rate": 0.00018861372609327263, |
|
"loss": 0.4316, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9556313993174061, |
|
"grad_norm": 0.06382953375577927, |
|
"learning_rate": 0.00018850292845815672, |
|
"loss": 0.4358, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9583617747440273, |
|
"grad_norm": 0.07121171057224274, |
|
"learning_rate": 0.0001883916272231459, |
|
"loss": 0.465, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9610921501706484, |
|
"grad_norm": 0.06311832368373871, |
|
"learning_rate": 0.0001882798230215672, |
|
"loss": 0.4478, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9638225255972697, |
|
"grad_norm": 0.06858519464731216, |
|
"learning_rate": 0.00018816751648960956, |
|
"loss": 0.4402, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.9665529010238908, |
|
"grad_norm": 0.06063356623053551, |
|
"learning_rate": 0.00018805470826632024, |
|
"loss": 0.4373, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.9692832764505119, |
|
"grad_norm": 0.06550437211990356, |
|
"learning_rate": 0.0001879413989936013, |
|
"loss": 0.4448, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9720136518771331, |
|
"grad_norm": 0.06248946860432625, |
|
"learning_rate": 0.00018782758931620584, |
|
"loss": 0.4576, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.9747440273037543, |
|
"grad_norm": 0.07067371159791946, |
|
"learning_rate": 0.00018771327988173435, |
|
"loss": 0.4644, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.9774744027303754, |
|
"grad_norm": 0.06225898116827011, |
|
"learning_rate": 0.00018759847134063108, |
|
"loss": 0.4617, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.9802047781569966, |
|
"grad_norm": 0.061437107622623444, |
|
"learning_rate": 0.0001874831643461803, |
|
"loss": 0.4339, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.9829351535836177, |
|
"grad_norm": 0.059149857610464096, |
|
"learning_rate": 0.00018736735955450251, |
|
"loss": 0.4238, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.985665529010239, |
|
"grad_norm": 0.06511219590902328, |
|
"learning_rate": 0.0001872510576245509, |
|
"loss": 0.4394, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.9883959044368601, |
|
"grad_norm": 0.06580841541290283, |
|
"learning_rate": 0.00018713425921810733, |
|
"loss": 0.4218, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.9911262798634812, |
|
"grad_norm": 0.07789267599582672, |
|
"learning_rate": 0.00018701696499977884, |
|
"loss": 0.4524, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.9938566552901024, |
|
"grad_norm": 0.06430528312921524, |
|
"learning_rate": 0.0001868991756369937, |
|
"loss": 0.4503, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.9965870307167235, |
|
"grad_norm": 0.06355779618024826, |
|
"learning_rate": 0.00018678089179999762, |
|
"loss": 0.4556, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.9993174061433447, |
|
"grad_norm": 0.06800378113985062, |
|
"learning_rate": 0.00018666211416184999, |
|
"loss": 0.44, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.9993174061433447, |
|
"eval_loss": 0.4462641775608063, |
|
"eval_runtime": 311.1378, |
|
"eval_samples_per_second": 8.369, |
|
"eval_steps_per_second": 1.048, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.0020477815699658, |
|
"grad_norm": 0.14618873596191406, |
|
"learning_rate": 0.00018654284339842013, |
|
"loss": 0.7832, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.004778156996587, |
|
"grad_norm": 0.10670002549886703, |
|
"learning_rate": 0.00018642308018838316, |
|
"loss": 0.4482, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.0075085324232083, |
|
"grad_norm": 0.07775750756263733, |
|
"learning_rate": 0.00018630282521321645, |
|
"loss": 0.4345, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.0102389078498293, |
|
"grad_norm": 0.07130205631256104, |
|
"learning_rate": 0.0001861820791571956, |
|
"loss": 0.4294, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0129692832764505, |
|
"grad_norm": 0.07318615168333054, |
|
"learning_rate": 0.00018606084270739049, |
|
"loss": 0.449, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.0156996587030718, |
|
"grad_norm": 0.06613319367170334, |
|
"learning_rate": 0.0001859391165536615, |
|
"loss": 0.4435, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.0184300341296928, |
|
"grad_norm": 0.06562095880508423, |
|
"learning_rate": 0.0001858169013886556, |
|
"loss": 0.4288, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.021160409556314, |
|
"grad_norm": 0.060670241713523865, |
|
"learning_rate": 0.00018569419790780218, |
|
"loss": 0.4029, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.023890784982935, |
|
"grad_norm": 0.06414277106523514, |
|
"learning_rate": 0.00018557100680930937, |
|
"loss": 0.4357, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0266211604095563, |
|
"grad_norm": 0.06078667938709259, |
|
"learning_rate": 0.00018544732879415986, |
|
"loss": 0.4188, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.0293515358361776, |
|
"grad_norm": 0.06345190107822418, |
|
"learning_rate": 0.00018532316456610704, |
|
"loss": 0.4501, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.0320819112627986, |
|
"grad_norm": 0.06139195337891579, |
|
"learning_rate": 0.00018519851483167097, |
|
"loss": 0.438, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.0348122866894198, |
|
"grad_norm": 0.059995777904987335, |
|
"learning_rate": 0.00018507338030013427, |
|
"loss": 0.4505, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.0375426621160408, |
|
"grad_norm": 0.06199508160352707, |
|
"learning_rate": 0.00018494776168353827, |
|
"loss": 0.4564, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.040273037542662, |
|
"grad_norm": 0.062205228954553604, |
|
"learning_rate": 0.00018482165969667874, |
|
"loss": 0.4519, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.0430034129692833, |
|
"grad_norm": 0.06433286517858505, |
|
"learning_rate": 0.00018469507505710194, |
|
"loss": 0.4394, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.0457337883959044, |
|
"grad_norm": 0.06373082101345062, |
|
"learning_rate": 0.00018456800848510056, |
|
"loss": 0.4456, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.0484641638225256, |
|
"grad_norm": 0.0655735656619072, |
|
"learning_rate": 0.00018444046070370963, |
|
"loss": 0.4527, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.0511945392491469, |
|
"grad_norm": 0.059250976890325546, |
|
"learning_rate": 0.00018431243243870223, |
|
"loss": 0.4338, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.0539249146757679, |
|
"grad_norm": 0.05919628590345383, |
|
"learning_rate": 0.00018418392441858555, |
|
"loss": 0.4252, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.0566552901023891, |
|
"grad_norm": 0.07075149565935135, |
|
"learning_rate": 0.0001840549373745968, |
|
"loss": 0.4478, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.0593856655290101, |
|
"grad_norm": 0.06196924299001694, |
|
"learning_rate": 0.0001839254720406987, |
|
"loss": 0.4446, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.0621160409556314, |
|
"grad_norm": 0.07002051174640656, |
|
"learning_rate": 0.00018379552915357575, |
|
"loss": 0.4668, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.0648464163822526, |
|
"grad_norm": 0.05986930802464485, |
|
"learning_rate": 0.00018366510945262972, |
|
"loss": 0.4361, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0675767918088737, |
|
"grad_norm": 0.06568475067615509, |
|
"learning_rate": 0.00018353421367997563, |
|
"loss": 0.4432, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.070307167235495, |
|
"grad_norm": 0.063268281519413, |
|
"learning_rate": 0.00018340284258043732, |
|
"loss": 0.4479, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.073037542662116, |
|
"grad_norm": 0.06184746325016022, |
|
"learning_rate": 0.00018327099690154344, |
|
"loss": 0.4392, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.0757679180887372, |
|
"grad_norm": 0.06682950258255005, |
|
"learning_rate": 0.00018313867739352304, |
|
"loss": 0.4469, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.0784982935153584, |
|
"grad_norm": 0.06049386039376259, |
|
"learning_rate": 0.00018300588480930143, |
|
"loss": 0.4448, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.0812286689419794, |
|
"grad_norm": 0.058452919125556946, |
|
"learning_rate": 0.0001828726199044957, |
|
"loss": 0.4387, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.0839590443686007, |
|
"grad_norm": 0.06608898937702179, |
|
"learning_rate": 0.0001827388834374107, |
|
"loss": 0.4316, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.086689419795222, |
|
"grad_norm": 0.06221776083111763, |
|
"learning_rate": 0.0001826046761690344, |
|
"loss": 0.4362, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.089419795221843, |
|
"grad_norm": 0.0670786052942276, |
|
"learning_rate": 0.00018246999886303383, |
|
"loss": 0.4394, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.0921501706484642, |
|
"grad_norm": 0.061892326921224594, |
|
"learning_rate": 0.00018233485228575063, |
|
"loss": 0.4565, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0948805460750852, |
|
"grad_norm": 0.06282811611890793, |
|
"learning_rate": 0.00018219923720619663, |
|
"loss": 0.4421, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.0976109215017065, |
|
"grad_norm": 0.061520010232925415, |
|
"learning_rate": 0.0001820631543960496, |
|
"loss": 0.4346, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.1003412969283277, |
|
"grad_norm": 0.05969773232936859, |
|
"learning_rate": 0.0001819266046296487, |
|
"loss": 0.4472, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.1030716723549487, |
|
"grad_norm": 0.060664501041173935, |
|
"learning_rate": 0.00018178958868399033, |
|
"loss": 0.453, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.10580204778157, |
|
"grad_norm": 0.0612984299659729, |
|
"learning_rate": 0.00018165210733872336, |
|
"loss": 0.4406, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.108532423208191, |
|
"grad_norm": 0.059849295765161514, |
|
"learning_rate": 0.000181514161376145, |
|
"loss": 0.4423, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.1112627986348123, |
|
"grad_norm": 0.059180960059165955, |
|
"learning_rate": 0.0001813757515811962, |
|
"loss": 0.4401, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.1139931740614335, |
|
"grad_norm": 0.05857124924659729, |
|
"learning_rate": 0.00018123687874145721, |
|
"loss": 0.4159, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.1167235494880545, |
|
"grad_norm": 0.06205347552895546, |
|
"learning_rate": 0.00018109754364714305, |
|
"loss": 0.4318, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.1194539249146758, |
|
"grad_norm": 0.06382250785827637, |
|
"learning_rate": 0.0001809577470910992, |
|
"loss": 0.4416, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.122184300341297, |
|
"grad_norm": 0.05814497917890549, |
|
"learning_rate": 0.00018081748986879679, |
|
"loss": 0.4392, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.124914675767918, |
|
"grad_norm": 0.058424465358257294, |
|
"learning_rate": 0.00018067677277832834, |
|
"loss": 0.4266, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.1276450511945393, |
|
"grad_norm": 0.05630108341574669, |
|
"learning_rate": 0.00018053559662040302, |
|
"loss": 0.4401, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.1303754266211605, |
|
"grad_norm": 0.06453561037778854, |
|
"learning_rate": 0.00018039396219834237, |
|
"loss": 0.4267, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.1331058020477816, |
|
"grad_norm": 0.06126587092876434, |
|
"learning_rate": 0.00018025187031807532, |
|
"loss": 0.4346, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.1358361774744028, |
|
"grad_norm": 0.057017982006073, |
|
"learning_rate": 0.00018010932178813397, |
|
"loss": 0.4367, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.1385665529010238, |
|
"grad_norm": 0.06581621617078781, |
|
"learning_rate": 0.00017996631741964888, |
|
"loss": 0.4157, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.141296928327645, |
|
"grad_norm": 0.055874526500701904, |
|
"learning_rate": 0.00017982285802634426, |
|
"loss": 0.4341, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.144027303754266, |
|
"grad_norm": 0.059336546808481216, |
|
"learning_rate": 0.0001796789444245337, |
|
"loss": 0.4029, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.1467576791808873, |
|
"grad_norm": 0.06833340972661972, |
|
"learning_rate": 0.00017953457743311523, |
|
"loss": 0.4564, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.1494880546075086, |
|
"grad_norm": 0.061153508722782135, |
|
"learning_rate": 0.00017938975787356673, |
|
"loss": 0.4496, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.1522184300341296, |
|
"grad_norm": 0.0649651363492012, |
|
"learning_rate": 0.00017924448656994133, |
|
"loss": 0.4323, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.1549488054607508, |
|
"grad_norm": 0.0639922022819519, |
|
"learning_rate": 0.00017909876434886273, |
|
"loss": 0.4421, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.157679180887372, |
|
"grad_norm": 0.06662526726722717, |
|
"learning_rate": 0.00017895259203952032, |
|
"loss": 0.4532, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.1604095563139931, |
|
"grad_norm": 0.05699828639626503, |
|
"learning_rate": 0.0001788059704736647, |
|
"loss": 0.4382, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.1631399317406144, |
|
"grad_norm": 0.06322555243968964, |
|
"learning_rate": 0.00017865890048560277, |
|
"loss": 0.4423, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.1658703071672356, |
|
"grad_norm": 0.05652053654193878, |
|
"learning_rate": 0.00017851138291219301, |
|
"loss": 0.4338, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.1686006825938566, |
|
"grad_norm": 0.06619950383901596, |
|
"learning_rate": 0.00017836341859284093, |
|
"loss": 0.4272, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.1713310580204779, |
|
"grad_norm": 0.060171984136104584, |
|
"learning_rate": 0.00017821500836949386, |
|
"loss": 0.4371, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.174061433447099, |
|
"grad_norm": 0.06065813824534416, |
|
"learning_rate": 0.0001780661530866366, |
|
"loss": 0.4064, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1767918088737201, |
|
"grad_norm": 0.06799128651618958, |
|
"learning_rate": 0.00017791685359128633, |
|
"loss": 0.43, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.1795221843003414, |
|
"grad_norm": 0.059587378054857254, |
|
"learning_rate": 0.000177767110732988, |
|
"loss": 0.4366, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.1822525597269624, |
|
"grad_norm": 0.06191541254520416, |
|
"learning_rate": 0.00017761692536380928, |
|
"loss": 0.415, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.1849829351535837, |
|
"grad_norm": 0.0611693374812603, |
|
"learning_rate": 0.00017746629833833585, |
|
"loss": 0.4396, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.1877133105802047, |
|
"grad_norm": 0.06228373572230339, |
|
"learning_rate": 0.00017731523051366658, |
|
"loss": 0.431, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.190443686006826, |
|
"grad_norm": 0.06130995601415634, |
|
"learning_rate": 0.00017716372274940843, |
|
"loss": 0.4538, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.1931740614334472, |
|
"grad_norm": 0.06163164600729942, |
|
"learning_rate": 0.00017701177590767183, |
|
"loss": 0.4251, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.1959044368600682, |
|
"grad_norm": 0.061723340302705765, |
|
"learning_rate": 0.00017685939085306562, |
|
"loss": 0.4274, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.1986348122866894, |
|
"grad_norm": 0.06078750640153885, |
|
"learning_rate": 0.00017670656845269214, |
|
"loss": 0.4432, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.2013651877133107, |
|
"grad_norm": 0.05991605296730995, |
|
"learning_rate": 0.00017655330957614234, |
|
"loss": 0.4167, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.2040955631399317, |
|
"grad_norm": 0.05879712477326393, |
|
"learning_rate": 0.00017639961509549078, |
|
"loss": 0.4232, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.206825938566553, |
|
"grad_norm": 0.060264360159635544, |
|
"learning_rate": 0.00017624548588529072, |
|
"loss": 0.4361, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.209556313993174, |
|
"grad_norm": 0.06511180847883224, |
|
"learning_rate": 0.00017609092282256912, |
|
"loss": 0.4327, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.2122866894197952, |
|
"grad_norm": 0.06026393920183182, |
|
"learning_rate": 0.00017593592678682166, |
|
"loss": 0.4195, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.2150170648464165, |
|
"grad_norm": 0.06378287822008133, |
|
"learning_rate": 0.0001757804986600077, |
|
"loss": 0.4404, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.2177474402730375, |
|
"grad_norm": 0.0656813457608223, |
|
"learning_rate": 0.0001756246393265453, |
|
"loss": 0.4354, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.2204778156996587, |
|
"grad_norm": 0.05804288387298584, |
|
"learning_rate": 0.00017546834967330617, |
|
"loss": 0.4352, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.2232081911262798, |
|
"grad_norm": 0.06775437295436859, |
|
"learning_rate": 0.00017531163058961066, |
|
"loss": 0.4393, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.225938566552901, |
|
"grad_norm": 0.06272158026695251, |
|
"learning_rate": 0.00017515448296722262, |
|
"loss": 0.4178, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.2286689419795223, |
|
"grad_norm": 0.06508231163024902, |
|
"learning_rate": 0.00017499690770034443, |
|
"loss": 0.4322, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.2313993174061433, |
|
"grad_norm": 0.05709952861070633, |
|
"learning_rate": 0.00017483890568561173, |
|
"loss": 0.4337, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.2341296928327645, |
|
"grad_norm": 0.061706554144620895, |
|
"learning_rate": 0.00017468047782208865, |
|
"loss": 0.4126, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.2368600682593858, |
|
"grad_norm": 0.056757740676403046, |
|
"learning_rate": 0.00017452162501126227, |
|
"loss": 0.4287, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.2395904436860068, |
|
"grad_norm": 0.05650217831134796, |
|
"learning_rate": 0.00017436234815703788, |
|
"loss": 0.4224, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.242320819112628, |
|
"grad_norm": 0.05224541947245598, |
|
"learning_rate": 0.0001742026481657335, |
|
"loss": 0.4166, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.245051194539249, |
|
"grad_norm": 0.06731689721345901, |
|
"learning_rate": 0.0001740425259460751, |
|
"loss": 0.4538, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.2477815699658703, |
|
"grad_norm": 0.060736652463674545, |
|
"learning_rate": 0.00017388198240919102, |
|
"loss": 0.4329, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.2505119453924913, |
|
"grad_norm": 0.05695323646068573, |
|
"learning_rate": 0.00017372101846860707, |
|
"loss": 0.4412, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.2532423208191126, |
|
"grad_norm": 0.056898247450590134, |
|
"learning_rate": 0.00017355963504024123, |
|
"loss": 0.4418, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.2559726962457338, |
|
"grad_norm": 0.059471502900123596, |
|
"learning_rate": 0.00017339783304239843, |
|
"loss": 0.4136, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.2587030716723548, |
|
"grad_norm": 0.05504520982503891, |
|
"learning_rate": 0.00017323561339576543, |
|
"loss": 0.4263, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.261433447098976, |
|
"grad_norm": 0.059035494923591614, |
|
"learning_rate": 0.0001730729770234054, |
|
"loss": 0.4362, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.2641638225255973, |
|
"grad_norm": 0.05722351744771004, |
|
"learning_rate": 0.00017290992485075282, |
|
"loss": 0.4239, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.2668941979522184, |
|
"grad_norm": 0.057449549436569214, |
|
"learning_rate": 0.0001727464578056081, |
|
"loss": 0.4357, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.2696245733788396, |
|
"grad_norm": 0.0636393278837204, |
|
"learning_rate": 0.00017258257681813244, |
|
"loss": 0.433, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.2723549488054609, |
|
"grad_norm": 0.061772268265485764, |
|
"learning_rate": 0.0001724182828208424, |
|
"loss": 0.4365, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.2750853242320819, |
|
"grad_norm": 0.053929511457681656, |
|
"learning_rate": 0.0001722535767486047, |
|
"loss": 0.4346, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.2778156996587031, |
|
"grad_norm": 0.05948130041360855, |
|
"learning_rate": 0.00017208845953863076, |
|
"loss": 0.4342, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.2805460750853244, |
|
"grad_norm": 0.05833544209599495, |
|
"learning_rate": 0.0001719229321304716, |
|
"loss": 0.4309, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.2832764505119454, |
|
"grad_norm": 0.055491410195827484, |
|
"learning_rate": 0.00017175699546601223, |
|
"loss": 0.4279, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.2860068259385666, |
|
"grad_norm": 0.05924072489142418, |
|
"learning_rate": 0.00017159065048946644, |
|
"loss": 0.432, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.2887372013651877, |
|
"grad_norm": 0.05847487971186638, |
|
"learning_rate": 0.00017142389814737142, |
|
"loss": 0.424, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.291467576791809, |
|
"grad_norm": 0.05650070682168007, |
|
"learning_rate": 0.00017125673938858237, |
|
"loss": 0.4134, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.29419795221843, |
|
"grad_norm": 0.059648044407367706, |
|
"learning_rate": 0.00017108917516426704, |
|
"loss": 0.4279, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.2969283276450512, |
|
"grad_norm": 0.060436248779296875, |
|
"learning_rate": 0.00017092120642790042, |
|
"loss": 0.4091, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.2996587030716724, |
|
"grad_norm": 0.06787759065628052, |
|
"learning_rate": 0.00017075283413525916, |
|
"loss": 0.4107, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.3023890784982934, |
|
"grad_norm": 0.06723356992006302, |
|
"learning_rate": 0.00017058405924441636, |
|
"loss": 0.4339, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.3051194539249147, |
|
"grad_norm": 0.058346495032310486, |
|
"learning_rate": 0.00017041488271573587, |
|
"loss": 0.441, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.307849829351536, |
|
"grad_norm": 0.059269823133945465, |
|
"learning_rate": 0.00017024530551186702, |
|
"loss": 0.4338, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.310580204778157, |
|
"grad_norm": 0.05570577457547188, |
|
"learning_rate": 0.000170075328597739, |
|
"loss": 0.4176, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.3133105802047782, |
|
"grad_norm": 0.05658780783414841, |
|
"learning_rate": 0.00016990495294055548, |
|
"loss": 0.4327, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.3160409556313994, |
|
"grad_norm": 0.06438103318214417, |
|
"learning_rate": 0.00016973417950978906, |
|
"loss": 0.4451, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.3187713310580205, |
|
"grad_norm": 0.06003286689519882, |
|
"learning_rate": 0.00016956300927717575, |
|
"loss": 0.4245, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.3215017064846417, |
|
"grad_norm": 0.06092451140284538, |
|
"learning_rate": 0.0001693914432167094, |
|
"loss": 0.4331, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.3242320819112627, |
|
"grad_norm": 0.059084732085466385, |
|
"learning_rate": 0.00016921948230463625, |
|
"loss": 0.4261, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.326962457337884, |
|
"grad_norm": 0.059612493962049484, |
|
"learning_rate": 0.00016904712751944931, |
|
"loss": 0.4356, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.329692832764505, |
|
"grad_norm": 0.05373890697956085, |
|
"learning_rate": 0.00016887437984188286, |
|
"loss": 0.4221, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.3324232081911263, |
|
"grad_norm": 0.06069657579064369, |
|
"learning_rate": 0.00016870124025490673, |
|
"loss": 0.4343, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.3351535836177475, |
|
"grad_norm": 0.058680132031440735, |
|
"learning_rate": 0.0001685277097437208, |
|
"loss": 0.4376, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.3378839590443685, |
|
"grad_norm": 0.052157819271087646, |
|
"learning_rate": 0.0001683537892957495, |
|
"loss": 0.4194, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.3406143344709898, |
|
"grad_norm": 0.05680167302489281, |
|
"learning_rate": 0.00016817947990063598, |
|
"loss": 0.4214, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.343344709897611, |
|
"grad_norm": 0.061938587576150894, |
|
"learning_rate": 0.0001680047825502366, |
|
"loss": 0.4413, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.346075085324232, |
|
"grad_norm": 0.05423510819673538, |
|
"learning_rate": 0.00016782969823861526, |
|
"loss": 0.4188, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.3488054607508533, |
|
"grad_norm": 0.059597909450531006, |
|
"learning_rate": 0.0001676542279620378, |
|
"loss": 0.4188, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.3515358361774745, |
|
"grad_norm": 0.05773560330271721, |
|
"learning_rate": 0.00016747837271896622, |
|
"loss": 0.4354, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.3542662116040955, |
|
"grad_norm": 0.06316240131855011, |
|
"learning_rate": 0.00016730213351005303, |
|
"loss": 0.4248, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.3569965870307168, |
|
"grad_norm": 0.056602396070957184, |
|
"learning_rate": 0.00016712551133813572, |
|
"loss": 0.4227, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.3597269624573378, |
|
"grad_norm": 0.06384044885635376, |
|
"learning_rate": 0.0001669485072082308, |
|
"loss": 0.4398, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.362457337883959, |
|
"grad_norm": 0.06040973588824272, |
|
"learning_rate": 0.00016677112212752824, |
|
"loss": 0.4168, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.36518771331058, |
|
"grad_norm": 0.05779508873820305, |
|
"learning_rate": 0.00016659335710538564, |
|
"loss": 0.4097, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3679180887372013, |
|
"grad_norm": 0.060474693775177, |
|
"learning_rate": 0.00016641521315332265, |
|
"loss": 0.4252, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.3706484641638226, |
|
"grad_norm": 0.05790797993540764, |
|
"learning_rate": 0.00016623669128501504, |
|
"loss": 0.4238, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.3733788395904436, |
|
"grad_norm": 0.06164141371846199, |
|
"learning_rate": 0.00016605779251628903, |
|
"loss": 0.4336, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.3761092150170648, |
|
"grad_norm": 0.055059127509593964, |
|
"learning_rate": 0.00016587851786511543, |
|
"loss": 0.4303, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.378839590443686, |
|
"grad_norm": 0.05771743133664131, |
|
"learning_rate": 0.00016569886835160399, |
|
"loss": 0.4352, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.3815699658703071, |
|
"grad_norm": 0.056050512939691544, |
|
"learning_rate": 0.0001655188449979974, |
|
"loss": 0.4233, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.3843003412969284, |
|
"grad_norm": 0.054744672030210495, |
|
"learning_rate": 0.00016533844882866568, |
|
"loss": 0.415, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.3870307167235496, |
|
"grad_norm": 0.060217492282390594, |
|
"learning_rate": 0.00016515768087010013, |
|
"loss": 0.3959, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.3897610921501706, |
|
"grad_norm": 0.0636279284954071, |
|
"learning_rate": 0.00016497654215090772, |
|
"loss": 0.4341, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.3924914675767919, |
|
"grad_norm": 0.05640679970383644, |
|
"learning_rate": 0.00016479503370180507, |
|
"loss": 0.3917, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.395221843003413, |
|
"grad_norm": 0.05939646065235138, |
|
"learning_rate": 0.00016461315655561263, |
|
"loss": 0.4378, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.3979522184300341, |
|
"grad_norm": 0.05862488970160484, |
|
"learning_rate": 0.00016443091174724885, |
|
"loss": 0.4017, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.4006825938566552, |
|
"grad_norm": 0.060345377773046494, |
|
"learning_rate": 0.00016424830031372425, |
|
"loss": 0.4248, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.4034129692832764, |
|
"grad_norm": 0.06127999722957611, |
|
"learning_rate": 0.00016406532329413546, |
|
"loss": 0.4129, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.4061433447098977, |
|
"grad_norm": 0.0599684976041317, |
|
"learning_rate": 0.00016388198172965942, |
|
"loss": 0.4223, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.4088737201365187, |
|
"grad_norm": 0.056950025260448456, |
|
"learning_rate": 0.00016369827666354745, |
|
"loss": 0.4293, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.41160409556314, |
|
"grad_norm": 0.05798695236444473, |
|
"learning_rate": 0.00016351420914111916, |
|
"loss": 0.4163, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.4143344709897612, |
|
"grad_norm": 0.056971821933984756, |
|
"learning_rate": 0.0001633297802097567, |
|
"loss": 0.4088, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.4170648464163822, |
|
"grad_norm": 0.06520035862922668, |
|
"learning_rate": 0.0001631449909188987, |
|
"loss": 0.4316, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.4197952218430034, |
|
"grad_norm": 0.054386623203754425, |
|
"learning_rate": 0.00016295984232003426, |
|
"loss": 0.4276, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.4225255972696247, |
|
"grad_norm": 0.06270336359739304, |
|
"learning_rate": 0.00016277433546669703, |
|
"loss": 0.4133, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.4252559726962457, |
|
"grad_norm": 0.05896778032183647, |
|
"learning_rate": 0.00016258847141445928, |
|
"loss": 0.4331, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.427986348122867, |
|
"grad_norm": 0.06417705118656158, |
|
"learning_rate": 0.00016240225122092573, |
|
"loss": 0.4306, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.430716723549488, |
|
"grad_norm": 0.06666136533021927, |
|
"learning_rate": 0.00016221567594572762, |
|
"loss": 0.4369, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.4334470989761092, |
|
"grad_norm": 0.06409899890422821, |
|
"learning_rate": 0.00016202874665051674, |
|
"loss": 0.442, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.4361774744027302, |
|
"grad_norm": 0.06460480391979218, |
|
"learning_rate": 0.00016184146439895928, |
|
"loss": 0.4114, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.4389078498293515, |
|
"grad_norm": 0.06045004725456238, |
|
"learning_rate": 0.00016165383025672981, |
|
"loss": 0.424, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.4416382252559727, |
|
"grad_norm": 0.0617341473698616, |
|
"learning_rate": 0.00016146584529150526, |
|
"loss": 0.4201, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.4443686006825938, |
|
"grad_norm": 0.06265206634998322, |
|
"learning_rate": 0.0001612775105729588, |
|
"loss": 0.4145, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.447098976109215, |
|
"grad_norm": 0.06431074440479279, |
|
"learning_rate": 0.00016108882717275384, |
|
"loss": 0.397, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.4498293515358363, |
|
"grad_norm": 0.05702768266201019, |
|
"learning_rate": 0.0001608997961645377, |
|
"loss": 0.4024, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.4525597269624573, |
|
"grad_norm": 0.06387649476528168, |
|
"learning_rate": 0.00016071041862393578, |
|
"loss": 0.4369, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.4552901023890785, |
|
"grad_norm": 0.06181952729821205, |
|
"learning_rate": 0.0001605206956285454, |
|
"loss": 0.4391, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.4580204778156998, |
|
"grad_norm": 0.060091473162174225, |
|
"learning_rate": 0.00016033062825792935, |
|
"loss": 0.4207, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.4607508532423208, |
|
"grad_norm": 0.059614650905132294, |
|
"learning_rate": 0.0001601402175936102, |
|
"loss": 0.409, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.463481228668942, |
|
"grad_norm": 0.06142239645123482, |
|
"learning_rate": 0.00015994946471906382, |
|
"loss": 0.4236, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.466211604095563, |
|
"grad_norm": 0.06790998578071594, |
|
"learning_rate": 0.0001597583707197134, |
|
"loss": 0.4131, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.4689419795221843, |
|
"grad_norm": 0.05919467657804489, |
|
"learning_rate": 0.00015956693668292313, |
|
"loss": 0.418, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.4716723549488053, |
|
"grad_norm": 0.06804287433624268, |
|
"learning_rate": 0.00015937516369799216, |
|
"loss": 0.4216, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.4744027303754266, |
|
"grad_norm": 0.061936333775520325, |
|
"learning_rate": 0.00015918305285614822, |
|
"loss": 0.4239, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.4771331058020478, |
|
"grad_norm": 0.06181802973151207, |
|
"learning_rate": 0.00015899060525054157, |
|
"loss": 0.4136, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.4798634812286688, |
|
"grad_norm": 0.05767858028411865, |
|
"learning_rate": 0.0001587978219762388, |
|
"loss": 0.4178, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.48259385665529, |
|
"grad_norm": 0.06959601491689682, |
|
"learning_rate": 0.00015860470413021642, |
|
"loss": 0.4271, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.4853242320819113, |
|
"grad_norm": 0.05592988058924675, |
|
"learning_rate": 0.00015841125281135473, |
|
"loss": 0.4165, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.4880546075085324, |
|
"grad_norm": 0.06603039801120758, |
|
"learning_rate": 0.00015821746912043165, |
|
"loss": 0.4359, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.4907849829351536, |
|
"grad_norm": 0.05518212914466858, |
|
"learning_rate": 0.00015802335416011625, |
|
"loss": 0.4284, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.4935153583617748, |
|
"grad_norm": 0.062445998191833496, |
|
"learning_rate": 0.00015782890903496264, |
|
"loss": 0.4171, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.4962457337883959, |
|
"grad_norm": 0.05508886277675629, |
|
"learning_rate": 0.00015763413485140365, |
|
"loss": 0.4001, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.4989761092150171, |
|
"grad_norm": 0.0545768216252327, |
|
"learning_rate": 0.00015743903271774455, |
|
"loss": 0.4081, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.5017064846416384, |
|
"grad_norm": 0.058887772262096405, |
|
"learning_rate": 0.0001572436037441566, |
|
"loss": 0.4224, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5044368600682594, |
|
"grad_norm": 0.05538494512438774, |
|
"learning_rate": 0.00015704784904267097, |
|
"loss": 0.4254, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.5071672354948804, |
|
"grad_norm": 0.05865982919931412, |
|
"learning_rate": 0.00015685176972717223, |
|
"loss": 0.4142, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.5098976109215017, |
|
"grad_norm": 0.05798998102545738, |
|
"learning_rate": 0.00015665536691339207, |
|
"loss": 0.4298, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.512627986348123, |
|
"grad_norm": 0.05779840052127838, |
|
"learning_rate": 0.00015645864171890295, |
|
"loss": 0.4145, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.515358361774744, |
|
"grad_norm": 0.05778159946203232, |
|
"learning_rate": 0.00015626159526311174, |
|
"loss": 0.4249, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.5180887372013652, |
|
"grad_norm": 0.0566212497651577, |
|
"learning_rate": 0.00015606422866725343, |
|
"loss": 0.4366, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.5208191126279864, |
|
"grad_norm": 0.05623873695731163, |
|
"learning_rate": 0.00015586654305438456, |
|
"loss": 0.4297, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.5235494880546074, |
|
"grad_norm": 0.05833446979522705, |
|
"learning_rate": 0.00015566853954937694, |
|
"loss": 0.4361, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.5262798634812287, |
|
"grad_norm": 0.05821897089481354, |
|
"learning_rate": 0.00015547021927891144, |
|
"loss": 0.4309, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.52901023890785, |
|
"grad_norm": 0.05831674486398697, |
|
"learning_rate": 0.00015527158337147112, |
|
"loss": 0.4228, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.531740614334471, |
|
"grad_norm": 0.05716761201620102, |
|
"learning_rate": 0.00015507263295733528, |
|
"loss": 0.4237, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.5344709897610922, |
|
"grad_norm": 0.061434000730514526, |
|
"learning_rate": 0.00015487336916857278, |
|
"loss": 0.4307, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.5372013651877134, |
|
"grad_norm": 0.055752865970134735, |
|
"learning_rate": 0.00015467379313903557, |
|
"loss": 0.4089, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.5399317406143345, |
|
"grad_norm": 0.05673924833536148, |
|
"learning_rate": 0.00015447390600435238, |
|
"loss": 0.3955, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.5426621160409555, |
|
"grad_norm": 0.05844118818640709, |
|
"learning_rate": 0.00015427370890192224, |
|
"loss": 0.4266, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.545392491467577, |
|
"grad_norm": 0.05962743982672691, |
|
"learning_rate": 0.00015407320297090786, |
|
"loss": 0.4063, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.548122866894198, |
|
"grad_norm": 0.05776818096637726, |
|
"learning_rate": 0.00015387238935222927, |
|
"loss": 0.4236, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.550853242320819, |
|
"grad_norm": 0.05769157037138939, |
|
"learning_rate": 0.00015367126918855738, |
|
"loss": 0.4183, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.5535836177474402, |
|
"grad_norm": 0.05596569553017616, |
|
"learning_rate": 0.0001534698436243073, |
|
"loss": 0.4074, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.5563139931740615, |
|
"grad_norm": 0.05986526980996132, |
|
"learning_rate": 0.00015326811380563204, |
|
"loss": 0.4166, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.5590443686006825, |
|
"grad_norm": 0.05552714318037033, |
|
"learning_rate": 0.0001530660808804158, |
|
"loss": 0.3986, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.5617747440273038, |
|
"grad_norm": 0.05853855237364769, |
|
"learning_rate": 0.00015286374599826754, |
|
"loss": 0.3964, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.564505119453925, |
|
"grad_norm": 0.06155244633555412, |
|
"learning_rate": 0.00015266111031051442, |
|
"loss": 0.4041, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.567235494880546, |
|
"grad_norm": 0.061913736164569855, |
|
"learning_rate": 0.00015245817497019524, |
|
"loss": 0.4228, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.5699658703071673, |
|
"grad_norm": 0.05519396439194679, |
|
"learning_rate": 0.00015225494113205393, |
|
"loss": 0.4124, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.5726962457337885, |
|
"grad_norm": 0.05629811808466911, |
|
"learning_rate": 0.00015205140995253283, |
|
"loss": 0.418, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.5754266211604095, |
|
"grad_norm": 0.051916785538196564, |
|
"learning_rate": 0.00015184758258976637, |
|
"loss": 0.4327, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.5781569965870306, |
|
"grad_norm": 0.05583992972970009, |
|
"learning_rate": 0.00015164346020357417, |
|
"loss": 0.417, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.580887372013652, |
|
"grad_norm": 0.05611740052700043, |
|
"learning_rate": 0.00015143904395545466, |
|
"loss": 0.413, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.583617747440273, |
|
"grad_norm": 0.05637525022029877, |
|
"learning_rate": 0.0001512343350085784, |
|
"loss": 0.4113, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.586348122866894, |
|
"grad_norm": 0.059624236077070236, |
|
"learning_rate": 0.0001510293345277815, |
|
"loss": 0.4321, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.5890784982935153, |
|
"grad_norm": 0.05502263084053993, |
|
"learning_rate": 0.0001508240436795589, |
|
"loss": 0.409, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.5918088737201366, |
|
"grad_norm": 0.05809929221868515, |
|
"learning_rate": 0.00015061846363205784, |
|
"loss": 0.4129, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.5945392491467576, |
|
"grad_norm": 0.05428490787744522, |
|
"learning_rate": 0.00015041259555507108, |
|
"loss": 0.4181, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.5972696245733788, |
|
"grad_norm": 0.05276649072766304, |
|
"learning_rate": 0.00015020644062003046, |
|
"loss": 0.3996, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.06145811080932617, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.4156, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.6027303754266211, |
|
"grad_norm": 0.05626256391406059, |
|
"learning_rate": 0.00014979327486966938, |
|
"loss": 0.4184, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.6054607508532424, |
|
"grad_norm": 0.06118204817175865, |
|
"learning_rate": 0.0001495862664053471, |
|
"loss": 0.4208, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.6081911262798636, |
|
"grad_norm": 0.06345456838607788, |
|
"learning_rate": 0.0001493789757849541, |
|
"loss": 0.4234, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.6109215017064846, |
|
"grad_norm": 0.058717817068099976, |
|
"learning_rate": 0.00014917140418801655, |
|
"loss": 0.4176, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.6136518771331056, |
|
"grad_norm": 0.05213068425655365, |
|
"learning_rate": 0.00014896355279565976, |
|
"loss": 0.3857, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.6163822525597271, |
|
"grad_norm": 0.056677792221307755, |
|
"learning_rate": 0.00014875542279060085, |
|
"loss": 0.4211, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.6191126279863481, |
|
"grad_norm": 0.058997780084609985, |
|
"learning_rate": 0.00014854701535714244, |
|
"loss": 0.4174, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.6218430034129692, |
|
"grad_norm": 0.0554414838552475, |
|
"learning_rate": 0.00014833833168116582, |
|
"loss": 0.4182, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.6245733788395904, |
|
"grad_norm": 0.06074132025241852, |
|
"learning_rate": 0.00014812937295012406, |
|
"loss": 0.4261, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.6273037542662117, |
|
"grad_norm": 0.05850062891840935, |
|
"learning_rate": 0.00014792014035303535, |
|
"loss": 0.4085, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.6300341296928327, |
|
"grad_norm": 0.06121140718460083, |
|
"learning_rate": 0.00014771063508047636, |
|
"loss": 0.4183, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.632764505119454, |
|
"grad_norm": 0.06299193948507309, |
|
"learning_rate": 0.00014750085832457519, |
|
"loss": 0.426, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.6354948805460752, |
|
"grad_norm": 0.06619743257761002, |
|
"learning_rate": 0.00014729081127900476, |
|
"loss": 0.4129, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.6382252559726962, |
|
"grad_norm": 0.05819617956876755, |
|
"learning_rate": 0.0001470804951389761, |
|
"loss": 0.4129, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6409556313993174, |
|
"grad_norm": 0.06314659863710403, |
|
"learning_rate": 0.00014686991110123135, |
|
"loss": 0.3967, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.6436860068259387, |
|
"grad_norm": 0.05983169004321098, |
|
"learning_rate": 0.00014665906036403706, |
|
"loss": 0.4161, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.6464163822525597, |
|
"grad_norm": 0.06163496896624565, |
|
"learning_rate": 0.00014644794412717736, |
|
"loss": 0.4103, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.6491467576791807, |
|
"grad_norm": 0.06737516075372696, |
|
"learning_rate": 0.00014623656359194712, |
|
"loss": 0.4215, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.6518771331058022, |
|
"grad_norm": 0.058461885899305344, |
|
"learning_rate": 0.00014602491996114516, |
|
"loss": 0.4168, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.6546075085324232, |
|
"grad_norm": 0.06050106883049011, |
|
"learning_rate": 0.0001458130144390673, |
|
"loss": 0.4184, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.6573378839590442, |
|
"grad_norm": 0.059844836592674255, |
|
"learning_rate": 0.00014560084823149965, |
|
"loss": 0.4181, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.6600682593856655, |
|
"grad_norm": 0.05483812466263771, |
|
"learning_rate": 0.0001453884225457116, |
|
"loss": 0.3996, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.6627986348122867, |
|
"grad_norm": 0.06310712546110153, |
|
"learning_rate": 0.00014517573859044907, |
|
"loss": 0.4266, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.6655290102389078, |
|
"grad_norm": 0.06159716099500656, |
|
"learning_rate": 0.00014496279757592766, |
|
"loss": 0.4248, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.668259385665529, |
|
"grad_norm": 0.058709222823381424, |
|
"learning_rate": 0.0001447496007138255, |
|
"loss": 0.4067, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.6709897610921502, |
|
"grad_norm": 0.05836094543337822, |
|
"learning_rate": 0.00014453614921727668, |
|
"loss": 0.4005, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.6737201365187713, |
|
"grad_norm": 0.05980111286044121, |
|
"learning_rate": 0.00014432244430086423, |
|
"loss": 0.4222, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.6764505119453925, |
|
"grad_norm": 0.05967998504638672, |
|
"learning_rate": 0.00014410848718061312, |
|
"loss": 0.4075, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.6791808873720138, |
|
"grad_norm": 0.05903726816177368, |
|
"learning_rate": 0.00014389427907398342, |
|
"loss": 0.4007, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.6819112627986348, |
|
"grad_norm": 0.05877222120761871, |
|
"learning_rate": 0.00014367982119986342, |
|
"loss": 0.4234, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.6846416382252558, |
|
"grad_norm": 0.0625043734908104, |
|
"learning_rate": 0.00014346511477856259, |
|
"loss": 0.4165, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.6873720136518773, |
|
"grad_norm": 0.05730627477169037, |
|
"learning_rate": 0.0001432501610318047, |
|
"loss": 0.4221, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.6901023890784983, |
|
"grad_norm": 0.05606284737586975, |
|
"learning_rate": 0.00014303496118272084, |
|
"loss": 0.4201, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.6928327645051193, |
|
"grad_norm": 0.056516390293836594, |
|
"learning_rate": 0.0001428195164558425, |
|
"loss": 0.4241, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.6955631399317406, |
|
"grad_norm": 0.0579177550971508, |
|
"learning_rate": 0.00014260382807709457, |
|
"loss": 0.4147, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.6982935153583618, |
|
"grad_norm": 0.05802591145038605, |
|
"learning_rate": 0.0001423878972737883, |
|
"loss": 0.409, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.7010238907849828, |
|
"grad_norm": 0.05921417847275734, |
|
"learning_rate": 0.0001421717252746145, |
|
"loss": 0.4126, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.703754266211604, |
|
"grad_norm": 0.0596776120364666, |
|
"learning_rate": 0.00014195531330963635, |
|
"loss": 0.405, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.7064846416382253, |
|
"grad_norm": 0.057035986334085464, |
|
"learning_rate": 0.0001417386626102825, |
|
"loss": 0.4208, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.7092150170648464, |
|
"grad_norm": 0.05868854373693466, |
|
"learning_rate": 0.00014152177440934012, |
|
"loss": 0.4186, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.7119453924914676, |
|
"grad_norm": 0.058524154126644135, |
|
"learning_rate": 0.0001413046499409477, |
|
"loss": 0.4072, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.7146757679180888, |
|
"grad_norm": 0.05203258991241455, |
|
"learning_rate": 0.0001410872904405882, |
|
"loss": 0.3929, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.7174061433447099, |
|
"grad_norm": 0.059925347566604614, |
|
"learning_rate": 0.00014086969714508196, |
|
"loss": 0.4211, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.7201365187713311, |
|
"grad_norm": 0.0577407106757164, |
|
"learning_rate": 0.00014065187129257964, |
|
"loss": 0.4128, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.7228668941979524, |
|
"grad_norm": 0.06548412144184113, |
|
"learning_rate": 0.00014043381412255526, |
|
"loss": 0.4117, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.7255972696245734, |
|
"grad_norm": 0.060420285910367966, |
|
"learning_rate": 0.00014021552687579902, |
|
"loss": 0.4176, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.7283276450511944, |
|
"grad_norm": 0.05787500739097595, |
|
"learning_rate": 0.00013999701079441028, |
|
"loss": 0.4173, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.7310580204778157, |
|
"grad_norm": 0.10321489721536636, |
|
"learning_rate": 0.00013977826712179058, |
|
"loss": 0.4098, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.733788395904437, |
|
"grad_norm": 0.05935697257518768, |
|
"learning_rate": 0.00013955929710263653, |
|
"loss": 0.433, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.736518771331058, |
|
"grad_norm": 0.05731033533811569, |
|
"learning_rate": 0.00013934010198293257, |
|
"loss": 0.4117, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.7392491467576792, |
|
"grad_norm": 0.05932068079710007, |
|
"learning_rate": 0.00013912068300994413, |
|
"loss": 0.4, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.7419795221843004, |
|
"grad_norm": 0.06352514028549194, |
|
"learning_rate": 0.0001389010414322104, |
|
"loss": 0.4135, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.7447098976109214, |
|
"grad_norm": 0.0548391118645668, |
|
"learning_rate": 0.0001386811784995371, |
|
"loss": 0.3998, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.7474402730375427, |
|
"grad_norm": 0.05962222442030907, |
|
"learning_rate": 0.00013846109546298971, |
|
"loss": 0.3982, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.750170648464164, |
|
"grad_norm": 0.056578923016786575, |
|
"learning_rate": 0.00013824079357488598, |
|
"loss": 0.4187, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.752901023890785, |
|
"grad_norm": 0.05794934183359146, |
|
"learning_rate": 0.0001380202740887891, |
|
"loss": 0.406, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.7556313993174062, |
|
"grad_norm": 0.056768182665109634, |
|
"learning_rate": 0.00013779953825950034, |
|
"loss": 0.4129, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.7583617747440274, |
|
"grad_norm": 0.06082385033369064, |
|
"learning_rate": 0.00013757858734305203, |
|
"loss": 0.4226, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.7610921501706485, |
|
"grad_norm": 0.059198446571826935, |
|
"learning_rate": 0.0001373574225967004, |
|
"loss": 0.405, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.7638225255972695, |
|
"grad_norm": 0.06012206897139549, |
|
"learning_rate": 0.00013713604527891844, |
|
"loss": 0.4192, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.7665529010238907, |
|
"grad_norm": 0.06151711568236351, |
|
"learning_rate": 0.00013691445664938866, |
|
"loss": 0.4206, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.769283276450512, |
|
"grad_norm": 0.06284491717815399, |
|
"learning_rate": 0.00013669265796899607, |
|
"loss": 0.4118, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.772013651877133, |
|
"grad_norm": 0.06001686304807663, |
|
"learning_rate": 0.00013647065049982078, |
|
"loss": 0.4293, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.7747440273037542, |
|
"grad_norm": 0.05952538549900055, |
|
"learning_rate": 0.0001362484355051311, |
|
"loss": 0.4114, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.7774744027303755, |
|
"grad_norm": 0.057195715606212616, |
|
"learning_rate": 0.00013602601424937604, |
|
"loss": 0.4104, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.7802047781569965, |
|
"grad_norm": 0.05979065224528313, |
|
"learning_rate": 0.00013580338799817844, |
|
"loss": 0.4321, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.7829351535836178, |
|
"grad_norm": 0.06188386306166649, |
|
"learning_rate": 0.00013558055801832748, |
|
"loss": 0.4044, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.785665529010239, |
|
"grad_norm": 0.060921113938093185, |
|
"learning_rate": 0.0001353575255777717, |
|
"loss": 0.422, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.78839590443686, |
|
"grad_norm": 0.0592602975666523, |
|
"learning_rate": 0.0001351342919456116, |
|
"loss": 0.3936, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.7911262798634813, |
|
"grad_norm": 0.06046243757009506, |
|
"learning_rate": 0.0001349108583920925, |
|
"loss": 0.4251, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.7938566552901025, |
|
"grad_norm": 0.05771365761756897, |
|
"learning_rate": 0.00013468722618859743, |
|
"loss": 0.4073, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.7965870307167235, |
|
"grad_norm": 0.05681789293885231, |
|
"learning_rate": 0.0001344633966076396, |
|
"loss": 0.4074, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.7993174061433446, |
|
"grad_norm": 0.05813178792595863, |
|
"learning_rate": 0.00013423937092285555, |
|
"loss": 0.3896, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.802047781569966, |
|
"grad_norm": 0.05757216364145279, |
|
"learning_rate": 0.00013401515040899746, |
|
"loss": 0.4178, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.804778156996587, |
|
"grad_norm": 0.057594846934080124, |
|
"learning_rate": 0.00013379073634192632, |
|
"loss": 0.3785, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.807508532423208, |
|
"grad_norm": 0.06386829912662506, |
|
"learning_rate": 0.00013356612999860436, |
|
"loss": 0.4017, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.8102389078498293, |
|
"grad_norm": 0.059352222830057144, |
|
"learning_rate": 0.000133341332657088, |
|
"loss": 0.4053, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.8129692832764506, |
|
"grad_norm": 0.058490559458732605, |
|
"learning_rate": 0.00013311634559652036, |
|
"loss": 0.4036, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.8156996587030716, |
|
"grad_norm": 0.0580880232155323, |
|
"learning_rate": 0.00013289117009712418, |
|
"loss": 0.4075, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.8184300341296928, |
|
"grad_norm": 0.054440416395664215, |
|
"learning_rate": 0.00013266580744019445, |
|
"loss": 0.4139, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.821160409556314, |
|
"grad_norm": 0.058102305978536606, |
|
"learning_rate": 0.00013244025890809112, |
|
"loss": 0.4051, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.823890784982935, |
|
"grad_norm": 0.06036128103733063, |
|
"learning_rate": 0.00013221452578423176, |
|
"loss": 0.4091, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.8266211604095564, |
|
"grad_norm": 0.061323538422584534, |
|
"learning_rate": 0.00013198860935308444, |
|
"loss": 0.4273, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.8293515358361776, |
|
"grad_norm": 0.06144220754504204, |
|
"learning_rate": 0.00013176251090016007, |
|
"loss": 0.4228, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.8320819112627986, |
|
"grad_norm": 0.05480247363448143, |
|
"learning_rate": 0.0001315362317120055, |
|
"loss": 0.4078, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.8348122866894196, |
|
"grad_norm": 0.0559588298201561, |
|
"learning_rate": 0.00013130977307619594, |
|
"loss": 0.4015, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.8375426621160411, |
|
"grad_norm": 0.0562249980866909, |
|
"learning_rate": 0.0001310831362813276, |
|
"loss": 0.4216, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.8402730375426621, |
|
"grad_norm": 0.05529346689581871, |
|
"learning_rate": 0.00013085632261701063, |
|
"loss": 0.3991, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.8430034129692832, |
|
"grad_norm": 0.055582497268915176, |
|
"learning_rate": 0.00013062933337386142, |
|
"loss": 0.3956, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.8457337883959044, |
|
"grad_norm": 0.057054124772548676, |
|
"learning_rate": 0.00013040216984349555, |
|
"loss": 0.398, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.8484641638225257, |
|
"grad_norm": 0.057355768978595734, |
|
"learning_rate": 0.00013017483331852035, |
|
"loss": 0.4059, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.8511945392491467, |
|
"grad_norm": 0.056889165192842484, |
|
"learning_rate": 0.00012994732509252744, |
|
"loss": 0.3806, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.853924914675768, |
|
"grad_norm": 0.057586781680583954, |
|
"learning_rate": 0.00012971964646008542, |
|
"loss": 0.4104, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.8566552901023892, |
|
"grad_norm": 0.059306979179382324, |
|
"learning_rate": 0.00012949179871673278, |
|
"loss": 0.4033, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.8593856655290102, |
|
"grad_norm": 0.057881347835063934, |
|
"learning_rate": 0.00012926378315896998, |
|
"loss": 0.4135, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.8621160409556314, |
|
"grad_norm": 0.06169261038303375, |
|
"learning_rate": 0.00012903560108425258, |
|
"loss": 0.412, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.8648464163822527, |
|
"grad_norm": 0.05441267788410187, |
|
"learning_rate": 0.00012880725379098352, |
|
"loss": 0.3986, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.8675767918088737, |
|
"grad_norm": 0.061068952083587646, |
|
"learning_rate": 0.00012857874257850605, |
|
"loss": 0.418, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.8703071672354947, |
|
"grad_norm": 0.058384671807289124, |
|
"learning_rate": 0.00012835006874709594, |
|
"loss": 0.4074, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.8730375426621162, |
|
"grad_norm": 0.0570659376680851, |
|
"learning_rate": 0.00012812123359795446, |
|
"loss": 0.4149, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.8757679180887372, |
|
"grad_norm": 0.05798759683966637, |
|
"learning_rate": 0.00012789223843320073, |
|
"loss": 0.4022, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.8784982935153582, |
|
"grad_norm": 0.059756677597761154, |
|
"learning_rate": 0.0001276630845558644, |
|
"loss": 0.4152, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.8812286689419795, |
|
"grad_norm": 0.05982014164328575, |
|
"learning_rate": 0.00012743377326987826, |
|
"loss": 0.4127, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.8839590443686007, |
|
"grad_norm": 0.05929556116461754, |
|
"learning_rate": 0.00012720430588007077, |
|
"loss": 0.405, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.8866894197952218, |
|
"grad_norm": 0.05722184479236603, |
|
"learning_rate": 0.00012697468369215863, |
|
"loss": 0.3978, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.889419795221843, |
|
"grad_norm": 0.05866376683115959, |
|
"learning_rate": 0.00012674490801273938, |
|
"loss": 0.417, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.8921501706484642, |
|
"grad_norm": 0.055445022881031036, |
|
"learning_rate": 0.00012651498014928402, |
|
"loss": 0.4161, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.8948805460750853, |
|
"grad_norm": 0.06086587905883789, |
|
"learning_rate": 0.00012628490141012937, |
|
"loss": 0.402, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.8976109215017065, |
|
"grad_norm": 0.06076718121767044, |
|
"learning_rate": 0.000126054673104471, |
|
"loss": 0.414, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.9003412969283278, |
|
"grad_norm": 0.055698879063129425, |
|
"learning_rate": 0.00012582429654235523, |
|
"loss": 0.3926, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.9030716723549488, |
|
"grad_norm": 0.056595612317323685, |
|
"learning_rate": 0.00012559377303467226, |
|
"loss": 0.4135, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.9058020477815698, |
|
"grad_norm": 0.05591044947504997, |
|
"learning_rate": 0.00012536310389314832, |
|
"loss": 0.4074, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.9085324232081913, |
|
"grad_norm": 0.06135864555835724, |
|
"learning_rate": 0.0001251322904303383, |
|
"loss": 0.4203, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.9112627986348123, |
|
"grad_norm": 0.058106984943151474, |
|
"learning_rate": 0.00012490133395961844, |
|
"loss": 0.4046, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.9139931740614333, |
|
"grad_norm": 0.059473518282175064, |
|
"learning_rate": 0.00012467023579517856, |
|
"loss": 0.4027, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.9167235494880546, |
|
"grad_norm": 0.057781342417001724, |
|
"learning_rate": 0.00012443899725201482, |
|
"loss": 0.4163, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.9194539249146758, |
|
"grad_norm": 0.0613093338906765, |
|
"learning_rate": 0.00012420761964592223, |
|
"loss": 0.4127, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.9221843003412968, |
|
"grad_norm": 0.05781256780028343, |
|
"learning_rate": 0.000123976104293487, |
|
"loss": 0.398, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.924914675767918, |
|
"grad_norm": 0.057743050158023834, |
|
"learning_rate": 0.00012374445251207914, |
|
"loss": 0.3969, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.9276450511945393, |
|
"grad_norm": 0.0608978345990181, |
|
"learning_rate": 0.00012351266561984507, |
|
"loss": 0.4037, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.9303754266211604, |
|
"grad_norm": 0.05937394127249718, |
|
"learning_rate": 0.00012328074493569993, |
|
"loss": 0.3964, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.9331058020477816, |
|
"grad_norm": 0.06584876775741577, |
|
"learning_rate": 0.0001230486917793202, |
|
"loss": 0.4186, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.9358361774744028, |
|
"grad_norm": 0.06222471594810486, |
|
"learning_rate": 0.00012281650747113612, |
|
"loss": 0.4178, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.9385665529010239, |
|
"grad_norm": 0.05962240695953369, |
|
"learning_rate": 0.0001225841933323242, |
|
"loss": 0.3898, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.9412969283276449, |
|
"grad_norm": 0.06118809059262276, |
|
"learning_rate": 0.00012235175068479984, |
|
"loss": 0.3926, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.9440273037542664, |
|
"grad_norm": 0.05581739544868469, |
|
"learning_rate": 0.00012211918085120954, |
|
"loss": 0.3907, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.9467576791808874, |
|
"grad_norm": 0.05898397043347359, |
|
"learning_rate": 0.00012188648515492355, |
|
"loss": 0.3979, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.9494880546075084, |
|
"grad_norm": 0.06158998981118202, |
|
"learning_rate": 0.00012165366492002832, |
|
"loss": 0.4138, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.9522184300341296, |
|
"grad_norm": 0.06278332322835922, |
|
"learning_rate": 0.00012142072147131898, |
|
"loss": 0.4141, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.954948805460751, |
|
"grad_norm": 0.06232950836420059, |
|
"learning_rate": 0.00012118765613429173, |
|
"loss": 0.4058, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.957679180887372, |
|
"grad_norm": 0.05726422742009163, |
|
"learning_rate": 0.0001209544702351363, |
|
"loss": 0.4021, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.9604095563139932, |
|
"grad_norm": 0.05597952753305435, |
|
"learning_rate": 0.00012072116510072858, |
|
"loss": 0.3965, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.9631399317406144, |
|
"grad_norm": 0.0619698166847229, |
|
"learning_rate": 0.00012048774205862279, |
|
"loss": 0.4112, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.9658703071672354, |
|
"grad_norm": 0.05994318053126335, |
|
"learning_rate": 0.0001202542024370441, |
|
"loss": 0.4186, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.9686006825938567, |
|
"grad_norm": 0.06278800964355469, |
|
"learning_rate": 0.00012002054756488115, |
|
"loss": 0.4122, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.971331058020478, |
|
"grad_norm": 0.06267794966697693, |
|
"learning_rate": 0.00011978677877167822, |
|
"loss": 0.4057, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.974061433447099, |
|
"grad_norm": 0.06913238018751144, |
|
"learning_rate": 0.00011955289738762796, |
|
"loss": 0.4069, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.9767918088737202, |
|
"grad_norm": 0.06418196856975555, |
|
"learning_rate": 0.00011931890474356358, |
|
"loss": 0.4078, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.9795221843003414, |
|
"grad_norm": 0.06403093785047531, |
|
"learning_rate": 0.00011908480217095141, |
|
"loss": 0.4062, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.9822525597269625, |
|
"grad_norm": 0.0585256926715374, |
|
"learning_rate": 0.00011885059100188341, |
|
"loss": 0.409, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.9849829351535835, |
|
"grad_norm": 0.06400654464960098, |
|
"learning_rate": 0.00011861627256906929, |
|
"loss": 0.4113, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.9877133105802047, |
|
"grad_norm": 0.06193806603550911, |
|
"learning_rate": 0.00011838184820582923, |
|
"loss": 0.4194, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.990443686006826, |
|
"grad_norm": 0.05865743011236191, |
|
"learning_rate": 0.00011814731924608616, |
|
"loss": 0.4002, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.993174061433447, |
|
"grad_norm": 0.05942784622311592, |
|
"learning_rate": 0.00011791268702435816, |
|
"loss": 0.4047, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.9959044368600682, |
|
"grad_norm": 0.056138355284929276, |
|
"learning_rate": 0.0001176779528757509, |
|
"loss": 0.4084, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.9986348122866895, |
|
"grad_norm": 0.058754485100507736, |
|
"learning_rate": 0.00011744311813595006, |
|
"loss": 0.3986, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.9986348122866895, |
|
"eval_loss": 0.40529951453208923, |
|
"eval_runtime": 310.303, |
|
"eval_samples_per_second": 8.392, |
|
"eval_steps_per_second": 1.051, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.0013651877133105, |
|
"grad_norm": 0.12337245792150497, |
|
"learning_rate": 0.00011720818414121368, |
|
"loss": 0.6736, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.0040955631399315, |
|
"grad_norm": 0.0677185207605362, |
|
"learning_rate": 0.00011697315222836458, |
|
"loss": 0.3943, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.006825938566553, |
|
"grad_norm": 0.06678740680217743, |
|
"learning_rate": 0.0001167380237347828, |
|
"loss": 0.3947, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.009556313993174, |
|
"grad_norm": 0.06545857340097427, |
|
"learning_rate": 0.00011650279999839787, |
|
"loss": 0.3893, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.012286689419795, |
|
"grad_norm": 0.07168494164943695, |
|
"learning_rate": 0.00011626748235768128, |
|
"loss": 0.3969, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 2.0150170648464165, |
|
"grad_norm": 0.06647184491157532, |
|
"learning_rate": 0.00011603207215163894, |
|
"loss": 0.3705, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.0177474402730375, |
|
"grad_norm": 0.06479815393686295, |
|
"learning_rate": 0.0001157965707198034, |
|
"loss": 0.4064, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.0204778156996586, |
|
"grad_norm": 0.06511078029870987, |
|
"learning_rate": 0.00011556097940222628, |
|
"loss": 0.408, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.02320819112628, |
|
"grad_norm": 0.06628479063510895, |
|
"learning_rate": 0.00011532529953947075, |
|
"loss": 0.4041, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.025938566552901, |
|
"grad_norm": 0.062073782086372375, |
|
"learning_rate": 0.00011508953247260379, |
|
"loss": 0.3935, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.028668941979522, |
|
"grad_norm": 0.0709017738699913, |
|
"learning_rate": 0.00011485367954318856, |
|
"loss": 0.3895, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.0313993174061435, |
|
"grad_norm": 0.06909438967704773, |
|
"learning_rate": 0.0001146177420932768, |
|
"loss": 0.398, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.0341296928327646, |
|
"grad_norm": 0.06830695271492004, |
|
"learning_rate": 0.00011438172146540123, |
|
"loss": 0.3915, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.0368600682593856, |
|
"grad_norm": 0.06455153971910477, |
|
"learning_rate": 0.00011414561900256784, |
|
"loss": 0.396, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.0395904436860066, |
|
"grad_norm": 0.06828158348798752, |
|
"learning_rate": 0.00011390943604824826, |
|
"loss": 0.4, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.042320819112628, |
|
"grad_norm": 0.06488997489213943, |
|
"learning_rate": 0.00011367317394637218, |
|
"loss": 0.3899, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.045051194539249, |
|
"grad_norm": 0.0722300335764885, |
|
"learning_rate": 0.00011343683404131964, |
|
"loss": 0.4103, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.04778156996587, |
|
"grad_norm": 0.06264421343803406, |
|
"learning_rate": 0.00011320041767791336, |
|
"loss": 0.3909, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.0505119453924916, |
|
"grad_norm": 0.06223292276263237, |
|
"learning_rate": 0.00011296392620141114, |
|
"loss": 0.4038, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 2.0532423208191126, |
|
"grad_norm": 0.06223985552787781, |
|
"learning_rate": 0.00011272736095749823, |
|
"loss": 0.3851, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.0559726962457336, |
|
"grad_norm": 0.06464383751153946, |
|
"learning_rate": 0.00011249072329227959, |
|
"loss": 0.4085, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.058703071672355, |
|
"grad_norm": 0.06551406532526016, |
|
"learning_rate": 0.0001122540145522723, |
|
"loss": 0.3954, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.061433447098976, |
|
"grad_norm": 0.0651601254940033, |
|
"learning_rate": 0.00011201723608439778, |
|
"loss": 0.3903, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.064163822525597, |
|
"grad_norm": 0.061696816235780716, |
|
"learning_rate": 0.0001117803892359744, |
|
"loss": 0.3909, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.0668941979522186, |
|
"grad_norm": 0.06516902893781662, |
|
"learning_rate": 0.00011154347535470947, |
|
"loss": 0.398, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 2.0696245733788396, |
|
"grad_norm": 0.06373114883899689, |
|
"learning_rate": 0.00011130649578869173, |
|
"loss": 0.4048, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.0723549488054607, |
|
"grad_norm": 0.06553075462579727, |
|
"learning_rate": 0.00011106945188638378, |
|
"loss": 0.3939, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.0750853242320817, |
|
"grad_norm": 0.0630226582288742, |
|
"learning_rate": 0.00011083234499661426, |
|
"loss": 0.3978, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.077815699658703, |
|
"grad_norm": 0.059685662388801575, |
|
"learning_rate": 0.00011059517646857023, |
|
"loss": 0.3995, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 2.080546075085324, |
|
"grad_norm": 0.06554658710956573, |
|
"learning_rate": 0.00011035794765178941, |
|
"loss": 0.392, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.083276450511945, |
|
"grad_norm": 0.06697095930576324, |
|
"learning_rate": 0.0001101206598961527, |
|
"loss": 0.3807, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 2.0860068259385667, |
|
"grad_norm": 0.06333647668361664, |
|
"learning_rate": 0.00010988331455187628, |
|
"loss": 0.3803, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.0887372013651877, |
|
"grad_norm": 0.06475751847028732, |
|
"learning_rate": 0.00010964591296950406, |
|
"loss": 0.3872, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.0914675767918087, |
|
"grad_norm": 0.06261032074689865, |
|
"learning_rate": 0.00010940845649989994, |
|
"loss": 0.3971, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.09419795221843, |
|
"grad_norm": 0.06801852583885193, |
|
"learning_rate": 0.00010917094649424018, |
|
"loss": 0.4013, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.096928327645051, |
|
"grad_norm": 0.06227778270840645, |
|
"learning_rate": 0.00010893338430400562, |
|
"loss": 0.3919, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.0996587030716722, |
|
"grad_norm": 0.06618170440196991, |
|
"learning_rate": 0.00010869577128097404, |
|
"loss": 0.3961, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 2.1023890784982937, |
|
"grad_norm": 0.061390649527311325, |
|
"learning_rate": 0.00010845810877721252, |
|
"loss": 0.3823, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.1051194539249147, |
|
"grad_norm": 0.058039017021656036, |
|
"learning_rate": 0.00010822039814506964, |
|
"loss": 0.3834, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.1078498293515358, |
|
"grad_norm": 0.061444416642189026, |
|
"learning_rate": 0.00010798264073716791, |
|
"loss": 0.3905, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.1105802047781568, |
|
"grad_norm": 0.05958331748843193, |
|
"learning_rate": 0.00010774483790639591, |
|
"loss": 0.376, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.1133105802047782, |
|
"grad_norm": 0.06220965459942818, |
|
"learning_rate": 0.00010750699100590076, |
|
"loss": 0.3722, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.1160409556313993, |
|
"grad_norm": 0.06390852481126785, |
|
"learning_rate": 0.00010726910138908032, |
|
"loss": 0.3707, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.1187713310580203, |
|
"grad_norm": 0.06177375093102455, |
|
"learning_rate": 0.00010703117040957553, |
|
"loss": 0.3904, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 2.1215017064846418, |
|
"grad_norm": 0.06506139785051346, |
|
"learning_rate": 0.00010679319942126264, |
|
"loss": 0.399, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 2.124232081911263, |
|
"grad_norm": 0.0721697136759758, |
|
"learning_rate": 0.00010655518977824566, |
|
"loss": 0.4076, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 2.126962457337884, |
|
"grad_norm": 0.06590563803911209, |
|
"learning_rate": 0.00010631714283484842, |
|
"loss": 0.3999, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 2.1296928327645053, |
|
"grad_norm": 0.0715552568435669, |
|
"learning_rate": 0.0001060790599456071, |
|
"loss": 0.3885, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.1324232081911263, |
|
"grad_norm": 0.06654267013072968, |
|
"learning_rate": 0.00010584094246526237, |
|
"loss": 0.3991, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.1351535836177473, |
|
"grad_norm": 0.06633856892585754, |
|
"learning_rate": 0.00010560279174875179, |
|
"loss": 0.4085, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.137883959044369, |
|
"grad_norm": 0.06462158262729645, |
|
"learning_rate": 0.0001053646091512019, |
|
"loss": 0.3848, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 2.14061433447099, |
|
"grad_norm": 0.061437733471393585, |
|
"learning_rate": 0.00010512639602792088, |
|
"loss": 0.3846, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 2.143344709897611, |
|
"grad_norm": 0.06375306099653244, |
|
"learning_rate": 0.00010488815373439036, |
|
"loss": 0.3989, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.146075085324232, |
|
"grad_norm": 0.06337495893239975, |
|
"learning_rate": 0.00010464988362625812, |
|
"loss": 0.3967, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.1488054607508533, |
|
"grad_norm": 0.07057306170463562, |
|
"learning_rate": 0.00010441158705933016, |
|
"loss": 0.4033, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 2.1515358361774743, |
|
"grad_norm": 0.06043674796819687, |
|
"learning_rate": 0.00010417326538956305, |
|
"loss": 0.3868, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 2.1542662116040954, |
|
"grad_norm": 0.06690146774053574, |
|
"learning_rate": 0.00010393491997305613, |
|
"loss": 0.3869, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 2.156996587030717, |
|
"grad_norm": 0.06832878291606903, |
|
"learning_rate": 0.00010369655216604397, |
|
"loss": 0.4094, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.159726962457338, |
|
"grad_norm": 0.06499913334846497, |
|
"learning_rate": 0.0001034581633248885, |
|
"loss": 0.3954, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 2.162457337883959, |
|
"grad_norm": 0.06597902625799179, |
|
"learning_rate": 0.00010321975480607129, |
|
"loss": 0.3965, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.1651877133105804, |
|
"grad_norm": 0.06236860156059265, |
|
"learning_rate": 0.00010298132796618596, |
|
"loss": 0.3925, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 2.1679180887372014, |
|
"grad_norm": 0.06529796868562698, |
|
"learning_rate": 0.00010274288416193034, |
|
"loss": 0.3724, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 2.1706484641638224, |
|
"grad_norm": 0.06897170096635818, |
|
"learning_rate": 0.0001025044247500988, |
|
"loss": 0.3862, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.173378839590444, |
|
"grad_norm": 0.06522727757692337, |
|
"learning_rate": 0.00010226595108757451, |
|
"loss": 0.3991, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.176109215017065, |
|
"grad_norm": 0.0633029192686081, |
|
"learning_rate": 0.00010202746453132172, |
|
"loss": 0.3797, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 2.178839590443686, |
|
"grad_norm": 0.06274284422397614, |
|
"learning_rate": 0.00010178896643837809, |
|
"loss": 0.4011, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.181569965870307, |
|
"grad_norm": 0.0627584382891655, |
|
"learning_rate": 0.00010155045816584691, |
|
"loss": 0.3905, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 2.1843003412969284, |
|
"grad_norm": 0.06605307757854462, |
|
"learning_rate": 0.00010131194107088935, |
|
"loss": 0.3937, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.1870307167235494, |
|
"grad_norm": 0.0742136538028717, |
|
"learning_rate": 0.00010107341651071684, |
|
"loss": 0.4042, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 2.1897610921501705, |
|
"grad_norm": 0.0774683728814125, |
|
"learning_rate": 0.00010083488584258326, |
|
"loss": 0.3971, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 2.192491467576792, |
|
"grad_norm": 0.06698737293481827, |
|
"learning_rate": 0.00010059635042377725, |
|
"loss": 0.4075, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 2.195221843003413, |
|
"grad_norm": 0.0646221786737442, |
|
"learning_rate": 0.00010035781161161446, |
|
"loss": 0.3831, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.197952218430034, |
|
"grad_norm": 0.06556056439876556, |
|
"learning_rate": 0.0001001192707634299, |
|
"loss": 0.3796, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.2006825938566554, |
|
"grad_norm": 0.061351120471954346, |
|
"learning_rate": 9.988072923657012e-05, |
|
"loss": 0.3657, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 2.2034129692832765, |
|
"grad_norm": 0.06275127828121185, |
|
"learning_rate": 9.964218838838554e-05, |
|
"loss": 0.3689, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 2.2061433447098975, |
|
"grad_norm": 0.07032433152198792, |
|
"learning_rate": 9.940364957622276e-05, |
|
"loss": 0.3963, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.208873720136519, |
|
"grad_norm": 0.07062618434429169, |
|
"learning_rate": 9.916511415741676e-05, |
|
"loss": 0.3691, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 2.21160409556314, |
|
"grad_norm": 0.06543855369091034, |
|
"learning_rate": 9.892658348928316e-05, |
|
"loss": 0.4048, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.214334470989761, |
|
"grad_norm": 0.06486646085977554, |
|
"learning_rate": 9.868805892911067e-05, |
|
"loss": 0.4024, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 2.217064846416382, |
|
"grad_norm": 0.06630420684814453, |
|
"learning_rate": 9.84495418341531e-05, |
|
"loss": 0.3809, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.2197952218430035, |
|
"grad_norm": 0.06499867141246796, |
|
"learning_rate": 9.821103356162189e-05, |
|
"loss": 0.399, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.2225255972696245, |
|
"grad_norm": 0.06988663226366043, |
|
"learning_rate": 9.797253546867831e-05, |
|
"loss": 0.3685, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.2252559726962455, |
|
"grad_norm": 0.06742274761199951, |
|
"learning_rate": 9.773404891242551e-05, |
|
"loss": 0.3753, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.227986348122867, |
|
"grad_norm": 0.06449928879737854, |
|
"learning_rate": 9.749557524990121e-05, |
|
"loss": 0.3958, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.230716723549488, |
|
"grad_norm": 0.06267012655735016, |
|
"learning_rate": 9.72571158380697e-05, |
|
"loss": 0.3969, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 2.233447098976109, |
|
"grad_norm": 0.06422371417284012, |
|
"learning_rate": 9.701867203381405e-05, |
|
"loss": 0.4004, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.2361774744027305, |
|
"grad_norm": 0.06123776733875275, |
|
"learning_rate": 9.678024519392871e-05, |
|
"loss": 0.3843, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.2389078498293515, |
|
"grad_norm": 0.06269904226064682, |
|
"learning_rate": 9.654183667511154e-05, |
|
"loss": 0.4116, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.2416382252559726, |
|
"grad_norm": 0.06304040551185608, |
|
"learning_rate": 9.630344783395604e-05, |
|
"loss": 0.394, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.244368600682594, |
|
"grad_norm": 0.06638960540294647, |
|
"learning_rate": 9.606508002694386e-05, |
|
"loss": 0.404, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.247098976109215, |
|
"grad_norm": 0.06530074775218964, |
|
"learning_rate": 9.5826734610437e-05, |
|
"loss": 0.3945, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.249829351535836, |
|
"grad_norm": 0.06665409356355667, |
|
"learning_rate": 9.558841294066985e-05, |
|
"loss": 0.4024, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.252559726962457, |
|
"grad_norm": 0.06718889623880386, |
|
"learning_rate": 9.535011637374189e-05, |
|
"loss": 0.383, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.2552901023890786, |
|
"grad_norm": 0.06695706397294998, |
|
"learning_rate": 9.511184626560968e-05, |
|
"loss": 0.3897, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.2580204778156996, |
|
"grad_norm": 0.061516858637332916, |
|
"learning_rate": 9.487360397207916e-05, |
|
"loss": 0.3792, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.260750853242321, |
|
"grad_norm": 0.06686560809612274, |
|
"learning_rate": 9.463539084879809e-05, |
|
"loss": 0.3872, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.263481228668942, |
|
"grad_norm": 0.06617554277181625, |
|
"learning_rate": 9.439720825124827e-05, |
|
"loss": 0.3982, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.266211604095563, |
|
"grad_norm": 0.06416121870279312, |
|
"learning_rate": 9.415905753473765e-05, |
|
"loss": 0.39, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.268941979522184, |
|
"grad_norm": 0.06259041279554367, |
|
"learning_rate": 9.392094005439291e-05, |
|
"loss": 0.3851, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.2716723549488056, |
|
"grad_norm": 0.06047592684626579, |
|
"learning_rate": 9.368285716515162e-05, |
|
"loss": 0.3709, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.2744027303754266, |
|
"grad_norm": 0.0637134537100792, |
|
"learning_rate": 9.344481022175436e-05, |
|
"loss": 0.3711, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.2771331058020476, |
|
"grad_norm": 0.06489844620227814, |
|
"learning_rate": 9.320680057873735e-05, |
|
"loss": 0.3717, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.279863481228669, |
|
"grad_norm": 0.06694884598255157, |
|
"learning_rate": 9.29688295904245e-05, |
|
"loss": 0.385, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.28259385665529, |
|
"grad_norm": 0.07286783307790756, |
|
"learning_rate": 9.273089861091969e-05, |
|
"loss": 0.3809, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.285324232081911, |
|
"grad_norm": 0.06461062282323837, |
|
"learning_rate": 9.249300899409924e-05, |
|
"loss": 0.3961, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.288054607508532, |
|
"grad_norm": 0.06771940737962723, |
|
"learning_rate": 9.225516209360413e-05, |
|
"loss": 0.3948, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.2907849829351536, |
|
"grad_norm": 0.06662525236606598, |
|
"learning_rate": 9.201735926283213e-05, |
|
"loss": 0.3835, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.2935153583617747, |
|
"grad_norm": 0.06741426885128021, |
|
"learning_rate": 9.177960185493036e-05, |
|
"loss": 0.4025, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.296245733788396, |
|
"grad_norm": 0.062717005610466, |
|
"learning_rate": 9.154189122278754e-05, |
|
"loss": 0.3812, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.298976109215017, |
|
"grad_norm": 0.06642817705869675, |
|
"learning_rate": 9.1304228719026e-05, |
|
"loss": 0.3783, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.301706484641638, |
|
"grad_norm": 0.06607065349817276, |
|
"learning_rate": 9.106661569599442e-05, |
|
"loss": 0.3935, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.304436860068259, |
|
"grad_norm": 0.0658462718129158, |
|
"learning_rate": 9.082905350575986e-05, |
|
"loss": 0.3865, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.3071672354948807, |
|
"grad_norm": 0.0657251700758934, |
|
"learning_rate": 9.059154350010008e-05, |
|
"loss": 0.3955, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.3098976109215017, |
|
"grad_norm": 0.0725063607096672, |
|
"learning_rate": 9.035408703049596e-05, |
|
"loss": 0.3836, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.3126279863481227, |
|
"grad_norm": 0.06905627250671387, |
|
"learning_rate": 9.011668544812377e-05, |
|
"loss": 0.3948, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.315358361774744, |
|
"grad_norm": 0.05961495637893677, |
|
"learning_rate": 8.987934010384733e-05, |
|
"loss": 0.3543, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.318088737201365, |
|
"grad_norm": 0.06361044198274612, |
|
"learning_rate": 8.96420523482106e-05, |
|
"loss": 0.3954, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.3208191126279862, |
|
"grad_norm": 0.06655801832675934, |
|
"learning_rate": 8.940482353142983e-05, |
|
"loss": 0.3895, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.3235494880546073, |
|
"grad_norm": 0.06384111195802689, |
|
"learning_rate": 8.916765500338575e-05, |
|
"loss": 0.4027, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.3262798634812287, |
|
"grad_norm": 0.06309723109006882, |
|
"learning_rate": 8.893054811361624e-05, |
|
"loss": 0.3768, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.3290102389078498, |
|
"grad_norm": 0.06191105768084526, |
|
"learning_rate": 8.869350421130831e-05, |
|
"loss": 0.3896, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.331740614334471, |
|
"grad_norm": 0.06459913402795792, |
|
"learning_rate": 8.845652464529057e-05, |
|
"loss": 0.3891, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.3344709897610922, |
|
"grad_norm": 0.06782979518175125, |
|
"learning_rate": 8.821961076402563e-05, |
|
"loss": 0.3947, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.3372013651877133, |
|
"grad_norm": 0.06412837654352188, |
|
"learning_rate": 8.79827639156022e-05, |
|
"loss": 0.3875, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.3399317406143343, |
|
"grad_norm": 0.06624305993318558, |
|
"learning_rate": 8.774598544772774e-05, |
|
"loss": 0.3846, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.3426621160409558, |
|
"grad_norm": 0.06252939254045486, |
|
"learning_rate": 8.750927670772044e-05, |
|
"loss": 0.3925, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.345392491467577, |
|
"grad_norm": 0.06812991946935654, |
|
"learning_rate": 8.727263904250178e-05, |
|
"loss": 0.3889, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.348122866894198, |
|
"grad_norm": 0.06583413481712341, |
|
"learning_rate": 8.703607379858889e-05, |
|
"loss": 0.371, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.3508532423208193, |
|
"grad_norm": 0.07355549931526184, |
|
"learning_rate": 8.679958232208668e-05, |
|
"loss": 0.3821, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.3535836177474403, |
|
"grad_norm": 0.06977757811546326, |
|
"learning_rate": 8.656316595868037e-05, |
|
"loss": 0.3866, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.3563139931740613, |
|
"grad_norm": 0.06983613967895508, |
|
"learning_rate": 8.632682605362784e-05, |
|
"loss": 0.3973, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.359044368600683, |
|
"grad_norm": 0.06457202136516571, |
|
"learning_rate": 8.609056395175175e-05, |
|
"loss": 0.3794, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.361774744027304, |
|
"grad_norm": 0.06292392313480377, |
|
"learning_rate": 8.585438099743217e-05, |
|
"loss": 0.3923, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.364505119453925, |
|
"grad_norm": 0.06382179260253906, |
|
"learning_rate": 8.56182785345988e-05, |
|
"loss": 0.3757, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.3672354948805463, |
|
"grad_norm": 0.0666775107383728, |
|
"learning_rate": 8.538225790672322e-05, |
|
"loss": 0.3761, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.3699658703071673, |
|
"grad_norm": 0.06406944990158081, |
|
"learning_rate": 8.514632045681145e-05, |
|
"loss": 0.3798, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.3726962457337883, |
|
"grad_norm": 0.06742346286773682, |
|
"learning_rate": 8.491046752739624e-05, |
|
"loss": 0.3922, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.3754266211604094, |
|
"grad_norm": 0.06776360422372818, |
|
"learning_rate": 8.467470046052927e-05, |
|
"loss": 0.399, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.378156996587031, |
|
"grad_norm": 0.07043983042240143, |
|
"learning_rate": 8.443902059777373e-05, |
|
"loss": 0.3854, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.380887372013652, |
|
"grad_norm": 0.07512562721967697, |
|
"learning_rate": 8.420342928019666e-05, |
|
"loss": 0.403, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.383617747440273, |
|
"grad_norm": 0.06623658537864685, |
|
"learning_rate": 8.396792784836108e-05, |
|
"loss": 0.3929, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.3863481228668944, |
|
"grad_norm": 0.06477659940719604, |
|
"learning_rate": 8.373251764231872e-05, |
|
"loss": 0.3873, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.3890784982935154, |
|
"grad_norm": 0.06675104796886444, |
|
"learning_rate": 8.349720000160218e-05, |
|
"loss": 0.382, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.3918088737201364, |
|
"grad_norm": 0.06558026373386383, |
|
"learning_rate": 8.326197626521723e-05, |
|
"loss": 0.379, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.394539249146758, |
|
"grad_norm": 0.06641051918268204, |
|
"learning_rate": 8.30268477716354e-05, |
|
"loss": 0.3976, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.397269624573379, |
|
"grad_norm": 0.061502814292907715, |
|
"learning_rate": 8.279181585878635e-05, |
|
"loss": 0.3762, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.06521192193031311, |
|
"learning_rate": 8.255688186404996e-05, |
|
"loss": 0.3882, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.4027303754266214, |
|
"grad_norm": 0.0659627914428711, |
|
"learning_rate": 8.232204712424911e-05, |
|
"loss": 0.3835, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.4054607508532424, |
|
"grad_norm": 0.06876777112483978, |
|
"learning_rate": 8.208731297564189e-05, |
|
"loss": 0.3751, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.4081911262798634, |
|
"grad_norm": 0.066194549202919, |
|
"learning_rate": 8.185268075391388e-05, |
|
"loss": 0.376, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.4109215017064844, |
|
"grad_norm": 0.0667635127902031, |
|
"learning_rate": 8.161815179417078e-05, |
|
"loss": 0.384, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.413651877133106, |
|
"grad_norm": 0.0693088099360466, |
|
"learning_rate": 8.138372743093076e-05, |
|
"loss": 0.3817, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.416382252559727, |
|
"grad_norm": 0.07052286714315414, |
|
"learning_rate": 8.114940899811662e-05, |
|
"loss": 0.3792, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.419112627986348, |
|
"grad_norm": 0.06873782724142075, |
|
"learning_rate": 8.091519782904857e-05, |
|
"loss": 0.3971, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.4218430034129694, |
|
"grad_norm": 0.06847493350505829, |
|
"learning_rate": 8.068109525643647e-05, |
|
"loss": 0.3848, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.4245733788395905, |
|
"grad_norm": 0.062080979347229004, |
|
"learning_rate": 8.044710261237207e-05, |
|
"loss": 0.3853, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.4273037542662115, |
|
"grad_norm": 0.06559818983078003, |
|
"learning_rate": 8.021322122832178e-05, |
|
"loss": 0.3677, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.430034129692833, |
|
"grad_norm": 0.06463391333818436, |
|
"learning_rate": 7.99794524351189e-05, |
|
"loss": 0.372, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.432764505119454, |
|
"grad_norm": 0.06920842081308365, |
|
"learning_rate": 7.974579756295591e-05, |
|
"loss": 0.3831, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.435494880546075, |
|
"grad_norm": 0.06424865126609802, |
|
"learning_rate": 7.951225794137724e-05, |
|
"loss": 0.3748, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.4382252559726965, |
|
"grad_norm": 0.06715277582406998, |
|
"learning_rate": 7.927883489927147e-05, |
|
"loss": 0.3915, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.4409556313993175, |
|
"grad_norm": 0.06705847382545471, |
|
"learning_rate": 7.904552976486372e-05, |
|
"loss": 0.3885, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.4436860068259385, |
|
"grad_norm": 0.06581594794988632, |
|
"learning_rate": 7.88123438657083e-05, |
|
"loss": 0.3856, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.4464163822525595, |
|
"grad_norm": 0.06164994090795517, |
|
"learning_rate": 7.857927852868107e-05, |
|
"loss": 0.3748, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.449146757679181, |
|
"grad_norm": 0.06487210094928741, |
|
"learning_rate": 7.83463350799717e-05, |
|
"loss": 0.3671, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.451877133105802, |
|
"grad_norm": 0.07010706514120102, |
|
"learning_rate": 7.811351484507647e-05, |
|
"loss": 0.3788, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.454607508532423, |
|
"grad_norm": 0.0727321207523346, |
|
"learning_rate": 7.788081914879051e-05, |
|
"loss": 0.3875, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.4573378839590445, |
|
"grad_norm": 0.07144790142774582, |
|
"learning_rate": 7.764824931520018e-05, |
|
"loss": 0.385, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.4600682593856655, |
|
"grad_norm": 0.06632732599973679, |
|
"learning_rate": 7.741580666767583e-05, |
|
"loss": 0.3876, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.4627986348122866, |
|
"grad_norm": 0.06642162799835205, |
|
"learning_rate": 7.718349252886395e-05, |
|
"loss": 0.3767, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.465529010238908, |
|
"grad_norm": 0.07363419234752655, |
|
"learning_rate": 7.695130822067984e-05, |
|
"loss": 0.3987, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.468259385665529, |
|
"grad_norm": 0.06822016835212708, |
|
"learning_rate": 7.67192550643001e-05, |
|
"loss": 0.3878, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.47098976109215, |
|
"grad_norm": 0.07104900479316711, |
|
"learning_rate": 7.648733438015493e-05, |
|
"loss": 0.369, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.4737201365187715, |
|
"grad_norm": 0.07460657507181168, |
|
"learning_rate": 7.625554748792085e-05, |
|
"loss": 0.3804, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.4764505119453926, |
|
"grad_norm": 0.0632706731557846, |
|
"learning_rate": 7.602389570651303e-05, |
|
"loss": 0.3768, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.4791808873720136, |
|
"grad_norm": 0.07213195413351059, |
|
"learning_rate": 7.579238035407776e-05, |
|
"loss": 0.3942, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.4819112627986346, |
|
"grad_norm": 0.06379958242177963, |
|
"learning_rate": 7.556100274798519e-05, |
|
"loss": 0.3619, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.484641638225256, |
|
"grad_norm": 0.07019772380590439, |
|
"learning_rate": 7.532976420482146e-05, |
|
"loss": 0.3684, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.487372013651877, |
|
"grad_norm": 0.06975077837705612, |
|
"learning_rate": 7.509866604038157e-05, |
|
"loss": 0.3908, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.490102389078498, |
|
"grad_norm": 0.07252513617277145, |
|
"learning_rate": 7.486770956966171e-05, |
|
"loss": 0.381, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.4928327645051196, |
|
"grad_norm": 0.06543651968240738, |
|
"learning_rate": 7.463689610685171e-05, |
|
"loss": 0.3783, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.4955631399317406, |
|
"grad_norm": 0.06449062377214432, |
|
"learning_rate": 7.440622696532775e-05, |
|
"loss": 0.3793, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.4982935153583616, |
|
"grad_norm": 0.07260416448116302, |
|
"learning_rate": 7.417570345764481e-05, |
|
"loss": 0.3559, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.5010238907849827, |
|
"grad_norm": 0.0706956535577774, |
|
"learning_rate": 7.394532689552905e-05, |
|
"loss": 0.3908, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.503754266211604, |
|
"grad_norm": 0.07324767857789993, |
|
"learning_rate": 7.371509858987061e-05, |
|
"loss": 0.3877, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.506484641638225, |
|
"grad_norm": 0.07116331160068512, |
|
"learning_rate": 7.348501985071603e-05, |
|
"loss": 0.3918, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.5092150170648466, |
|
"grad_norm": 0.06699459999799728, |
|
"learning_rate": 7.325509198726064e-05, |
|
"loss": 0.3903, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.5119453924914676, |
|
"grad_norm": 0.06897846609354019, |
|
"learning_rate": 7.302531630784137e-05, |
|
"loss": 0.3785, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.5146757679180887, |
|
"grad_norm": 0.06994078308343887, |
|
"learning_rate": 7.279569411992926e-05, |
|
"loss": 0.4009, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.5174061433447097, |
|
"grad_norm": 0.0688600167632103, |
|
"learning_rate": 7.256622673012175e-05, |
|
"loss": 0.3768, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.520136518771331, |
|
"grad_norm": 0.07253801077604294, |
|
"learning_rate": 7.233691544413558e-05, |
|
"loss": 0.3816, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.522866894197952, |
|
"grad_norm": 0.06901174038648605, |
|
"learning_rate": 7.210776156679931e-05, |
|
"loss": 0.3815, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.5255972696245736, |
|
"grad_norm": 0.06941503286361694, |
|
"learning_rate": 7.187876640204556e-05, |
|
"loss": 0.3871, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.5283276450511947, |
|
"grad_norm": 0.06918507814407349, |
|
"learning_rate": 7.164993125290407e-05, |
|
"loss": 0.3842, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.5310580204778157, |
|
"grad_norm": 0.06809090077877045, |
|
"learning_rate": 7.1421257421494e-05, |
|
"loss": 0.3894, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.5337883959044367, |
|
"grad_norm": 0.0676964670419693, |
|
"learning_rate": 7.119274620901649e-05, |
|
"loss": 0.3659, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.536518771331058, |
|
"grad_norm": 0.06432293355464935, |
|
"learning_rate": 7.096439891574745e-05, |
|
"loss": 0.3793, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.539249146757679, |
|
"grad_norm": 0.06571602076292038, |
|
"learning_rate": 7.073621684103007e-05, |
|
"loss": 0.3725, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.5419795221843002, |
|
"grad_norm": 0.06849601119756699, |
|
"learning_rate": 7.050820128326724e-05, |
|
"loss": 0.3911, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.5447098976109217, |
|
"grad_norm": 0.06598517298698425, |
|
"learning_rate": 7.028035353991456e-05, |
|
"loss": 0.3824, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.5474402730375427, |
|
"grad_norm": 0.06540702283382416, |
|
"learning_rate": 7.005267490747263e-05, |
|
"loss": 0.3529, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.5501706484641637, |
|
"grad_norm": 0.07165670394897461, |
|
"learning_rate": 6.982516668147967e-05, |
|
"loss": 0.3716, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.5529010238907848, |
|
"grad_norm": 0.07307778298854828, |
|
"learning_rate": 6.959783015650446e-05, |
|
"loss": 0.389, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.5556313993174062, |
|
"grad_norm": 0.07411336153745651, |
|
"learning_rate": 6.937066662613863e-05, |
|
"loss": 0.3576, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.5583617747440273, |
|
"grad_norm": 0.07051097601652145, |
|
"learning_rate": 6.914367738298941e-05, |
|
"loss": 0.3861, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.5610921501706487, |
|
"grad_norm": 0.07393593341112137, |
|
"learning_rate": 6.891686371867239e-05, |
|
"loss": 0.3836, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.5638225255972698, |
|
"grad_norm": 0.07074485719203949, |
|
"learning_rate": 6.869022692380411e-05, |
|
"loss": 0.39, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.5665529010238908, |
|
"grad_norm": 0.07122661918401718, |
|
"learning_rate": 6.846376828799451e-05, |
|
"loss": 0.3622, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.569283276450512, |
|
"grad_norm": 0.07139509916305542, |
|
"learning_rate": 6.823748909983994e-05, |
|
"loss": 0.3663, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.5720136518771333, |
|
"grad_norm": 0.07215254753828049, |
|
"learning_rate": 6.801139064691562e-05, |
|
"loss": 0.3958, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.5747440273037543, |
|
"grad_norm": 0.06885367631912231, |
|
"learning_rate": 6.778547421576825e-05, |
|
"loss": 0.3682, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.5774744027303753, |
|
"grad_norm": 0.06890679150819778, |
|
"learning_rate": 6.75597410919089e-05, |
|
"loss": 0.3747, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.580204778156997, |
|
"grad_norm": 0.06944692134857178, |
|
"learning_rate": 6.733419255980559e-05, |
|
"loss": 0.3696, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.582935153583618, |
|
"grad_norm": 0.07257425785064697, |
|
"learning_rate": 6.710882990287585e-05, |
|
"loss": 0.3801, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.585665529010239, |
|
"grad_norm": 0.07554417103528976, |
|
"learning_rate": 6.688365440347965e-05, |
|
"loss": 0.3863, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.58839590443686, |
|
"grad_norm": 0.06943865120410919, |
|
"learning_rate": 6.665866734291205e-05, |
|
"loss": 0.383, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.5911262798634813, |
|
"grad_norm": 0.06850133091211319, |
|
"learning_rate": 6.643387000139565e-05, |
|
"loss": 0.3813, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.5938566552901023, |
|
"grad_norm": 0.06953789293766022, |
|
"learning_rate": 6.620926365807372e-05, |
|
"loss": 0.385, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.596587030716724, |
|
"grad_norm": 0.06954308599233627, |
|
"learning_rate": 6.598484959100257e-05, |
|
"loss": 0.3745, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.599317406143345, |
|
"grad_norm": 0.06686001271009445, |
|
"learning_rate": 6.576062907714448e-05, |
|
"loss": 0.3511, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.602047781569966, |
|
"grad_norm": 0.0746379867196083, |
|
"learning_rate": 6.553660339236041e-05, |
|
"loss": 0.3818, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.604778156996587, |
|
"grad_norm": 0.073628731071949, |
|
"learning_rate": 6.53127738114026e-05, |
|
"loss": 0.3825, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.6075085324232083, |
|
"grad_norm": 0.07217229157686234, |
|
"learning_rate": 6.508914160790752e-05, |
|
"loss": 0.3864, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.6102389078498294, |
|
"grad_norm": 0.06748787313699722, |
|
"learning_rate": 6.486570805438843e-05, |
|
"loss": 0.3853, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.6129692832764504, |
|
"grad_norm": 0.0762786790728569, |
|
"learning_rate": 6.46424744222283e-05, |
|
"loss": 0.385, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.615699658703072, |
|
"grad_norm": 0.07065171003341675, |
|
"learning_rate": 6.441944198167253e-05, |
|
"loss": 0.3924, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.618430034129693, |
|
"grad_norm": 0.07208285480737686, |
|
"learning_rate": 6.419661200182158e-05, |
|
"loss": 0.3943, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.621160409556314, |
|
"grad_norm": 0.07120097428560257, |
|
"learning_rate": 6.397398575062396e-05, |
|
"loss": 0.3799, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.623890784982935, |
|
"grad_norm": 0.06891025602817535, |
|
"learning_rate": 6.375156449486895e-05, |
|
"loss": 0.3693, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.6266211604095564, |
|
"grad_norm": 0.07594846189022064, |
|
"learning_rate": 6.352934950017921e-05, |
|
"loss": 0.3708, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.6293515358361774, |
|
"grad_norm": 0.06709565222263336, |
|
"learning_rate": 6.330734203100394e-05, |
|
"loss": 0.3732, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.632081911262799, |
|
"grad_norm": 0.07154542207717896, |
|
"learning_rate": 6.308554335061135e-05, |
|
"loss": 0.3883, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.63481228668942, |
|
"grad_norm": 0.06966784596443176, |
|
"learning_rate": 6.286395472108158e-05, |
|
"loss": 0.3842, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.637542662116041, |
|
"grad_norm": 0.07383087277412415, |
|
"learning_rate": 6.26425774032996e-05, |
|
"loss": 0.3855, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.640273037542662, |
|
"grad_norm": 0.06765507906675339, |
|
"learning_rate": 6.2421412656948e-05, |
|
"loss": 0.3828, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 2.6430034129692834, |
|
"grad_norm": 0.06942715495824814, |
|
"learning_rate": 6.220046174049968e-05, |
|
"loss": 0.3579, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.6457337883959045, |
|
"grad_norm": 0.07347889989614487, |
|
"learning_rate": 6.19797259112109e-05, |
|
"loss": 0.3648, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.6484641638225255, |
|
"grad_norm": 0.0671333596110344, |
|
"learning_rate": 6.175920642511404e-05, |
|
"loss": 0.3794, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.651194539249147, |
|
"grad_norm": 0.06726156920194626, |
|
"learning_rate": 6.153890453701031e-05, |
|
"loss": 0.3673, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 2.653924914675768, |
|
"grad_norm": 0.06900251656770706, |
|
"learning_rate": 6.131882150046291e-05, |
|
"loss": 0.3818, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 2.656655290102389, |
|
"grad_norm": 0.07286951690912247, |
|
"learning_rate": 6.109895856778967e-05, |
|
"loss": 0.3765, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 2.65938566552901, |
|
"grad_norm": 0.07597585022449493, |
|
"learning_rate": 6.087931699005588e-05, |
|
"loss": 0.3897, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 2.6621160409556315, |
|
"grad_norm": 0.0680689737200737, |
|
"learning_rate": 6.065989801706744e-05, |
|
"loss": 0.3727, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.6648464163822525, |
|
"grad_norm": 0.0685984194278717, |
|
"learning_rate": 6.044070289736352e-05, |
|
"loss": 0.3726, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 2.667576791808874, |
|
"grad_norm": 0.07317140698432922, |
|
"learning_rate": 6.0221732878209425e-05, |
|
"loss": 0.3825, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.670307167235495, |
|
"grad_norm": 0.07575836777687073, |
|
"learning_rate": 6.0002989205589734e-05, |
|
"loss": 0.3895, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.673037542662116, |
|
"grad_norm": 0.07128394395112991, |
|
"learning_rate": 5.978447312420103e-05, |
|
"loss": 0.3716, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 2.675767918088737, |
|
"grad_norm": 0.0750584676861763, |
|
"learning_rate": 5.9566185877444755e-05, |
|
"loss": 0.3799, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.6784982935153585, |
|
"grad_norm": 0.07472096383571625, |
|
"learning_rate": 5.934812870742036e-05, |
|
"loss": 0.3968, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.6812286689419795, |
|
"grad_norm": 0.07020480930805206, |
|
"learning_rate": 5.913030285491808e-05, |
|
"loss": 0.3686, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 2.6839590443686006, |
|
"grad_norm": 0.06642840802669525, |
|
"learning_rate": 5.891270955941184e-05, |
|
"loss": 0.3775, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 2.686689419795222, |
|
"grad_norm": 0.07389365881681442, |
|
"learning_rate": 5.869535005905232e-05, |
|
"loss": 0.3698, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.689419795221843, |
|
"grad_norm": 0.07101259380578995, |
|
"learning_rate": 5.847822559065992e-05, |
|
"loss": 0.3801, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.692150170648464, |
|
"grad_norm": 0.07233007252216339, |
|
"learning_rate": 5.8261337389717506e-05, |
|
"loss": 0.3828, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.694880546075085, |
|
"grad_norm": 0.07280164211988449, |
|
"learning_rate": 5.804468669036369e-05, |
|
"loss": 0.3775, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.6976109215017066, |
|
"grad_norm": 0.07394791394472122, |
|
"learning_rate": 5.7828274725385544e-05, |
|
"loss": 0.3617, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.7003412969283276, |
|
"grad_norm": 0.0712592750787735, |
|
"learning_rate": 5.761210272621175e-05, |
|
"loss": 0.375, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.703071672354949, |
|
"grad_norm": 0.07412825524806976, |
|
"learning_rate": 5.739617192290545e-05, |
|
"loss": 0.3899, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.70580204778157, |
|
"grad_norm": 0.06890816986560822, |
|
"learning_rate": 5.7180483544157546e-05, |
|
"loss": 0.3801, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.708532423208191, |
|
"grad_norm": 0.0753878653049469, |
|
"learning_rate": 5.696503881727917e-05, |
|
"loss": 0.3764, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.711262798634812, |
|
"grad_norm": 0.07268603146076202, |
|
"learning_rate": 5.6749838968195326e-05, |
|
"loss": 0.3823, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.7139931740614336, |
|
"grad_norm": 0.07173381745815277, |
|
"learning_rate": 5.653488522143744e-05, |
|
"loss": 0.3812, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.7167235494880546, |
|
"grad_norm": 0.07240818440914154, |
|
"learning_rate": 5.6320178800136626e-05, |
|
"loss": 0.3726, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.7194539249146756, |
|
"grad_norm": 0.07028204202651978, |
|
"learning_rate": 5.610572092601659e-05, |
|
"loss": 0.3802, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.722184300341297, |
|
"grad_norm": 0.07140224426984787, |
|
"learning_rate": 5.589151281938695e-05, |
|
"loss": 0.3672, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.724914675767918, |
|
"grad_norm": 0.07478421926498413, |
|
"learning_rate": 5.56775556991358e-05, |
|
"loss": 0.3821, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.727645051194539, |
|
"grad_norm": 0.07046322524547577, |
|
"learning_rate": 5.5463850782723346e-05, |
|
"loss": 0.3876, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.73037542662116, |
|
"grad_norm": 0.07505001872777939, |
|
"learning_rate": 5.5250399286174546e-05, |
|
"loss": 0.3903, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.7331058020477816, |
|
"grad_norm": 0.07295075058937073, |
|
"learning_rate": 5.50372024240724e-05, |
|
"loss": 0.3771, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.7358361774744027, |
|
"grad_norm": 0.07162769138813019, |
|
"learning_rate": 5.48242614095509e-05, |
|
"loss": 0.3861, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.738566552901024, |
|
"grad_norm": 0.06978384405374527, |
|
"learning_rate": 5.461157745428841e-05, |
|
"loss": 0.3842, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.741296928327645, |
|
"grad_norm": 0.0743769034743309, |
|
"learning_rate": 5.439915176850037e-05, |
|
"loss": 0.363, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.744027303754266, |
|
"grad_norm": 0.07271228730678558, |
|
"learning_rate": 5.418698556093271e-05, |
|
"loss": 0.3813, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.746757679180887, |
|
"grad_norm": 0.07161740958690643, |
|
"learning_rate": 5.397508003885483e-05, |
|
"loss": 0.3873, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.7494880546075087, |
|
"grad_norm": 0.07506071776151657, |
|
"learning_rate": 5.3763436408052904e-05, |
|
"loss": 0.394, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.7522184300341297, |
|
"grad_norm": 0.06938126683235168, |
|
"learning_rate": 5.3552055872822636e-05, |
|
"loss": 0.3677, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.7549488054607507, |
|
"grad_norm": 0.07216595858335495, |
|
"learning_rate": 5.334093963596294e-05, |
|
"loss": 0.3794, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.757679180887372, |
|
"grad_norm": 0.07905741035938263, |
|
"learning_rate": 5.313008889876865e-05, |
|
"loss": 0.3772, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.760409556313993, |
|
"grad_norm": 0.07164405286312103, |
|
"learning_rate": 5.2919504861023903e-05, |
|
"loss": 0.3774, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.7631399317406142, |
|
"grad_norm": 0.07869692891836166, |
|
"learning_rate": 5.270918872099522e-05, |
|
"loss": 0.3872, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.7658703071672353, |
|
"grad_norm": 0.07745044678449631, |
|
"learning_rate": 5.249914167542486e-05, |
|
"loss": 0.3786, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.7686006825938567, |
|
"grad_norm": 0.06824437528848648, |
|
"learning_rate": 5.228936491952363e-05, |
|
"loss": 0.3546, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.7713310580204777, |
|
"grad_norm": 0.07147728651762009, |
|
"learning_rate": 5.207985964696462e-05, |
|
"loss": 0.3603, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.774061433447099, |
|
"grad_norm": 0.06916683167219162, |
|
"learning_rate": 5.1870627049875954e-05, |
|
"loss": 0.3509, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.7767918088737202, |
|
"grad_norm": 0.06777457147836685, |
|
"learning_rate": 5.16616683188342e-05, |
|
"loss": 0.377, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.7795221843003413, |
|
"grad_norm": 0.06878098845481873, |
|
"learning_rate": 5.145298464285757e-05, |
|
"loss": 0.3787, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.7822525597269623, |
|
"grad_norm": 0.07464558631181717, |
|
"learning_rate": 5.12445772093992e-05, |
|
"loss": 0.368, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.7849829351535837, |
|
"grad_norm": 0.07435291260480881, |
|
"learning_rate": 5.103644720434027e-05, |
|
"loss": 0.3805, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.7877133105802048, |
|
"grad_norm": 0.07324662059545517, |
|
"learning_rate": 5.082859581198344e-05, |
|
"loss": 0.3744, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.790443686006826, |
|
"grad_norm": 0.0732622966170311, |
|
"learning_rate": 5.062102421504593e-05, |
|
"loss": 0.3733, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.7931740614334473, |
|
"grad_norm": 0.07599971443414688, |
|
"learning_rate": 5.041373359465289e-05, |
|
"loss": 0.3582, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.7959044368600683, |
|
"grad_norm": 0.07411503791809082, |
|
"learning_rate": 5.020672513033066e-05, |
|
"loss": 0.3707, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.7986348122866893, |
|
"grad_norm": 0.07376236468553543, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.3854, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.8013651877133103, |
|
"grad_norm": 0.07350826263427734, |
|
"learning_rate": 4.9793559379969566e-05, |
|
"loss": 0.3692, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.804095563139932, |
|
"grad_norm": 0.07208722829818726, |
|
"learning_rate": 4.958740444492892e-05, |
|
"loss": 0.3694, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.806825938566553, |
|
"grad_norm": 0.07334735989570618, |
|
"learning_rate": 4.9381536367942195e-05, |
|
"loss": 0.367, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.8095563139931743, |
|
"grad_norm": 0.0769776999950409, |
|
"learning_rate": 4.917595632044113e-05, |
|
"loss": 0.3646, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.8122866894197953, |
|
"grad_norm": 0.07780209183692932, |
|
"learning_rate": 4.8970665472218537e-05, |
|
"loss": 0.3918, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.8150170648464163, |
|
"grad_norm": 0.07993920892477036, |
|
"learning_rate": 4.8765664991421634e-05, |
|
"loss": 0.3703, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.8177474402730374, |
|
"grad_norm": 0.07131262123584747, |
|
"learning_rate": 4.856095604454539e-05, |
|
"loss": 0.3482, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.820477815699659, |
|
"grad_norm": 0.0709707960486412, |
|
"learning_rate": 4.835653979642585e-05, |
|
"loss": 0.3742, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.82320819112628, |
|
"grad_norm": 0.07897651195526123, |
|
"learning_rate": 4.815241741023367e-05, |
|
"loss": 0.3647, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.825938566552901, |
|
"grad_norm": 0.06946137547492981, |
|
"learning_rate": 4.7948590047467153e-05, |
|
"loss": 0.3688, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.8286689419795223, |
|
"grad_norm": 0.07583218812942505, |
|
"learning_rate": 4.774505886794609e-05, |
|
"loss": 0.3841, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.8313993174061434, |
|
"grad_norm": 0.07795160263776779, |
|
"learning_rate": 4.754182502980477e-05, |
|
"loss": 0.3697, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.8341296928327644, |
|
"grad_norm": 0.07436595857143402, |
|
"learning_rate": 4.7338889689485624e-05, |
|
"loss": 0.3754, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.8368600682593854, |
|
"grad_norm": 0.08026924729347229, |
|
"learning_rate": 4.713625400173247e-05, |
|
"loss": 0.3819, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.839590443686007, |
|
"grad_norm": 0.07545596361160278, |
|
"learning_rate": 4.693391911958426e-05, |
|
"loss": 0.3669, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.842320819112628, |
|
"grad_norm": 0.0736890509724617, |
|
"learning_rate": 4.673188619436798e-05, |
|
"loss": 0.3822, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.8450511945392494, |
|
"grad_norm": 0.07859516888856888, |
|
"learning_rate": 4.6530156375692726e-05, |
|
"loss": 0.3766, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.8477815699658704, |
|
"grad_norm": 0.07504123449325562, |
|
"learning_rate": 4.632873081144267e-05, |
|
"loss": 0.3593, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.8505119453924914, |
|
"grad_norm": 0.07197951525449753, |
|
"learning_rate": 4.6127610647770767e-05, |
|
"loss": 0.3794, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.8532423208191124, |
|
"grad_norm": 0.07802017033100128, |
|
"learning_rate": 4.592679702909216e-05, |
|
"loss": 0.3662, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.855972696245734, |
|
"grad_norm": 0.07301507890224457, |
|
"learning_rate": 4.572629109807782e-05, |
|
"loss": 0.3677, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.858703071672355, |
|
"grad_norm": 0.07427814602851868, |
|
"learning_rate": 4.552609399564762e-05, |
|
"loss": 0.3539, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.861433447098976, |
|
"grad_norm": 0.07539915293455124, |
|
"learning_rate": 4.532620686096446e-05, |
|
"loss": 0.3702, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.8641638225255974, |
|
"grad_norm": 0.07405383884906769, |
|
"learning_rate": 4.5126630831427264e-05, |
|
"loss": 0.3735, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.8668941979522184, |
|
"grad_norm": 0.07310394942760468, |
|
"learning_rate": 4.492736704266475e-05, |
|
"loss": 0.3739, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.8696245733788395, |
|
"grad_norm": 0.07626067847013474, |
|
"learning_rate": 4.472841662852888e-05, |
|
"loss": 0.3678, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.8723549488054605, |
|
"grad_norm": 0.07700545340776443, |
|
"learning_rate": 4.452978072108859e-05, |
|
"loss": 0.3825, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.875085324232082, |
|
"grad_norm": 0.070135198533535, |
|
"learning_rate": 4.4331460450623064e-05, |
|
"loss": 0.373, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.877815699658703, |
|
"grad_norm": 0.07328429073095322, |
|
"learning_rate": 4.413345694561549e-05, |
|
"loss": 0.3691, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.8805460750853245, |
|
"grad_norm": 0.0815819501876831, |
|
"learning_rate": 4.393577133274658e-05, |
|
"loss": 0.3548, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.8832764505119455, |
|
"grad_norm": 0.08154749870300293, |
|
"learning_rate": 4.373840473688829e-05, |
|
"loss": 0.381, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.8860068259385665, |
|
"grad_norm": 0.07183761149644852, |
|
"learning_rate": 4.354135828109707e-05, |
|
"loss": 0.3609, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.8887372013651875, |
|
"grad_norm": 0.07250452786684036, |
|
"learning_rate": 4.3344633086607955e-05, |
|
"loss": 0.3725, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.891467576791809, |
|
"grad_norm": 0.07028324902057648, |
|
"learning_rate": 4.3148230272827784e-05, |
|
"loss": 0.34, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.89419795221843, |
|
"grad_norm": 0.07634340226650238, |
|
"learning_rate": 4.295215095732904e-05, |
|
"loss": 0.3749, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.896928327645051, |
|
"grad_norm": 0.07495231181383133, |
|
"learning_rate": 4.275639625584338e-05, |
|
"loss": 0.3769, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.8996587030716725, |
|
"grad_norm": 0.0772211030125618, |
|
"learning_rate": 4.256096728225548e-05, |
|
"loss": 0.3678, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.9023890784982935, |
|
"grad_norm": 0.08062466979026794, |
|
"learning_rate": 4.236586514859633e-05, |
|
"loss": 0.3616, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.9051194539249146, |
|
"grad_norm": 0.08190742880105972, |
|
"learning_rate": 4.217109096503736e-05, |
|
"loss": 0.3818, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.9078498293515356, |
|
"grad_norm": 0.07490533590316772, |
|
"learning_rate": 4.197664583988376e-05, |
|
"loss": 0.373, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.910580204778157, |
|
"grad_norm": 0.07723496109247208, |
|
"learning_rate": 4.1782530879568374e-05, |
|
"loss": 0.3831, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.913310580204778, |
|
"grad_norm": 0.07193005830049515, |
|
"learning_rate": 4.1588747188645275e-05, |
|
"loss": 0.3855, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.9160409556313995, |
|
"grad_norm": 0.07526146620512009, |
|
"learning_rate": 4.1395295869783615e-05, |
|
"loss": 0.3737, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.9187713310580206, |
|
"grad_norm": 0.07713403552770615, |
|
"learning_rate": 4.1202178023761195e-05, |
|
"loss": 0.37, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.9215017064846416, |
|
"grad_norm": 0.07559187710285187, |
|
"learning_rate": 4.100939474945843e-05, |
|
"loss": 0.3644, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.9242320819112626, |
|
"grad_norm": 0.07406629621982574, |
|
"learning_rate": 4.0816947143851816e-05, |
|
"loss": 0.3647, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.926962457337884, |
|
"grad_norm": 0.07426485419273376, |
|
"learning_rate": 4.0624836302007886e-05, |
|
"loss": 0.3722, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.929692832764505, |
|
"grad_norm": 0.07818352431058884, |
|
"learning_rate": 4.0433063317076893e-05, |
|
"loss": 0.3656, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.932423208191126, |
|
"grad_norm": 0.07689237594604492, |
|
"learning_rate": 4.024162928028663e-05, |
|
"loss": 0.3612, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.9351535836177476, |
|
"grad_norm": 0.0766364261507988, |
|
"learning_rate": 4.0050535280936205e-05, |
|
"loss": 0.3612, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.9378839590443686, |
|
"grad_norm": 0.07136540114879608, |
|
"learning_rate": 3.985978240638981e-05, |
|
"loss": 0.3627, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.9406143344709896, |
|
"grad_norm": 0.07516707479953766, |
|
"learning_rate": 3.966937174207066e-05, |
|
"loss": 0.3837, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.9433447098976107, |
|
"grad_norm": 0.07119621336460114, |
|
"learning_rate": 3.947930437145464e-05, |
|
"loss": 0.3563, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.946075085324232, |
|
"grad_norm": 0.07871522009372711, |
|
"learning_rate": 3.928958137606421e-05, |
|
"loss": 0.3715, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.948805460750853, |
|
"grad_norm": 0.07658346742391586, |
|
"learning_rate": 3.910020383546233e-05, |
|
"loss": 0.3711, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.9515358361774746, |
|
"grad_norm": 0.07438597828149796, |
|
"learning_rate": 3.8911172827246215e-05, |
|
"loss": 0.3703, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.9542662116040956, |
|
"grad_norm": 0.07197795808315277, |
|
"learning_rate": 3.8722489427041185e-05, |
|
"loss": 0.3621, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.9569965870307167, |
|
"grad_norm": 0.07549617439508438, |
|
"learning_rate": 3.853415470849479e-05, |
|
"loss": 0.3736, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.9597269624573377, |
|
"grad_norm": 0.0718371719121933, |
|
"learning_rate": 3.834616974327021e-05, |
|
"loss": 0.3755, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.962457337883959, |
|
"grad_norm": 0.07231539487838745, |
|
"learning_rate": 3.815853560104075e-05, |
|
"loss": 0.3786, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.96518771331058, |
|
"grad_norm": 0.07186778634786606, |
|
"learning_rate": 3.7971253349483285e-05, |
|
"loss": 0.3679, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.967918088737201, |
|
"grad_norm": 0.07404022663831711, |
|
"learning_rate": 3.7784324054272405e-05, |
|
"loss": 0.3638, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.9706484641638227, |
|
"grad_norm": 0.07804732024669647, |
|
"learning_rate": 3.759774877907428e-05, |
|
"loss": 0.3728, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.9733788395904437, |
|
"grad_norm": 0.07797016203403473, |
|
"learning_rate": 3.741152858554077e-05, |
|
"loss": 0.3704, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.9761092150170647, |
|
"grad_norm": 0.07321101427078247, |
|
"learning_rate": 3.722566453330298e-05, |
|
"loss": 0.3517, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.9788395904436857, |
|
"grad_norm": 0.08448322117328644, |
|
"learning_rate": 3.7040157679965796e-05, |
|
"loss": 0.3875, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.981569965870307, |
|
"grad_norm": 0.07892005890607834, |
|
"learning_rate": 3.6855009081101355e-05, |
|
"loss": 0.3686, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.9843003412969282, |
|
"grad_norm": 0.07619816064834595, |
|
"learning_rate": 3.6670219790243344e-05, |
|
"loss": 0.356, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.9870307167235497, |
|
"grad_norm": 0.07540540397167206, |
|
"learning_rate": 3.648579085888085e-05, |
|
"loss": 0.3564, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.9897610921501707, |
|
"grad_norm": 0.07253746688365936, |
|
"learning_rate": 3.630172333645261e-05, |
|
"loss": 0.3771, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.9924914675767917, |
|
"grad_norm": 0.07068438827991486, |
|
"learning_rate": 3.611801827034059e-05, |
|
"loss": 0.3645, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.9952218430034128, |
|
"grad_norm": 0.07300886511802673, |
|
"learning_rate": 3.593467670586457e-05, |
|
"loss": 0.3619, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.9979522184300342, |
|
"grad_norm": 0.071599081158638, |
|
"learning_rate": 3.5751699686275786e-05, |
|
"loss": 0.369, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.9979522184300342, |
|
"eval_loss": 0.37550708651542664, |
|
"eval_runtime": 309.4712, |
|
"eval_samples_per_second": 8.414, |
|
"eval_steps_per_second": 1.053, |
|
"step": 1098 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1464, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.555865714861015e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|