|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1035, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002898550724637681, |
|
"grad_norm": 1.7796895708856793, |
|
"learning_rate": 3.125e-06, |
|
"loss": 1.8514, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005797101449275362, |
|
"grad_norm": 1.742548277798407, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.937, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008695652173913044, |
|
"grad_norm": 1.5905530955603362, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 1.8724, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.011594202898550725, |
|
"grad_norm": 1.6592768688949988, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.8877, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.014492753623188406, |
|
"grad_norm": 1.4035260613846172, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 1.8086, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.017391304347826087, |
|
"grad_norm": 0.9555449880629443, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 1.7276, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.020289855072463767, |
|
"grad_norm": 0.7915967541673472, |
|
"learning_rate": 2.1875e-05, |
|
"loss": 1.771, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02318840579710145, |
|
"grad_norm": 0.7599954441380682, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.7122, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02608695652173913, |
|
"grad_norm": 0.7128173682386719, |
|
"learning_rate": 2.8125000000000003e-05, |
|
"loss": 1.5764, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.028985507246376812, |
|
"grad_norm": 0.6773249478496584, |
|
"learning_rate": 3.125e-05, |
|
"loss": 1.5811, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03188405797101449, |
|
"grad_norm": 0.6571598838212039, |
|
"learning_rate": 3.4375e-05, |
|
"loss": 1.6191, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.034782608695652174, |
|
"grad_norm": 0.6261792389264198, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.5684, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03768115942028986, |
|
"grad_norm": 0.5143810493601375, |
|
"learning_rate": 4.0625000000000005e-05, |
|
"loss": 1.5375, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04057971014492753, |
|
"grad_norm": 0.4855788824689092, |
|
"learning_rate": 4.375e-05, |
|
"loss": 1.5076, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.043478260869565216, |
|
"grad_norm": 0.473950934451779, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 1.5083, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0463768115942029, |
|
"grad_norm": 0.48567111749562547, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6137, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04927536231884058, |
|
"grad_norm": 0.43610179775052604, |
|
"learning_rate": 5.3125000000000004e-05, |
|
"loss": 1.5325, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05217391304347826, |
|
"grad_norm": 0.4412188197378122, |
|
"learning_rate": 5.6250000000000005e-05, |
|
"loss": 1.555, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05507246376811594, |
|
"grad_norm": 0.43034730708585867, |
|
"learning_rate": 5.9375e-05, |
|
"loss": 1.5453, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.057971014492753624, |
|
"grad_norm": 0.41694045848699307, |
|
"learning_rate": 6.25e-05, |
|
"loss": 1.5362, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06086956521739131, |
|
"grad_norm": 0.4093648088428465, |
|
"learning_rate": 6.562500000000001e-05, |
|
"loss": 1.4596, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06376811594202898, |
|
"grad_norm": 0.42036605295826535, |
|
"learning_rate": 6.875e-05, |
|
"loss": 1.5156, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06666666666666667, |
|
"grad_norm": 0.4140215214641256, |
|
"learning_rate": 7.1875e-05, |
|
"loss": 1.5021, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06956521739130435, |
|
"grad_norm": 0.41797125446436384, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.5595, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07246376811594203, |
|
"grad_norm": 0.40448941023881985, |
|
"learning_rate": 7.8125e-05, |
|
"loss": 1.5284, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07536231884057971, |
|
"grad_norm": 0.36201429136045177, |
|
"learning_rate": 8.125000000000001e-05, |
|
"loss": 1.5402, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0782608695652174, |
|
"grad_norm": 0.38159291388896194, |
|
"learning_rate": 8.4375e-05, |
|
"loss": 1.4545, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08115942028985507, |
|
"grad_norm": 0.39563825256543766, |
|
"learning_rate": 8.75e-05, |
|
"loss": 1.476, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08405797101449275, |
|
"grad_norm": 0.3853757557962818, |
|
"learning_rate": 9.062500000000001e-05, |
|
"loss": 1.5553, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08695652173913043, |
|
"grad_norm": 0.3715963100647608, |
|
"learning_rate": 9.375e-05, |
|
"loss": 1.4923, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08985507246376812, |
|
"grad_norm": 0.3972739650610925, |
|
"learning_rate": 9.687500000000001e-05, |
|
"loss": 1.4166, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0927536231884058, |
|
"grad_norm": 0.3709663185634906, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4904, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09565217391304348, |
|
"grad_norm": 0.37818493311274604, |
|
"learning_rate": 9.999975473389572e-05, |
|
"loss": 1.4303, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09855072463768116, |
|
"grad_norm": 0.3727893878233448, |
|
"learning_rate": 9.999901893798909e-05, |
|
"loss": 1.5126, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10144927536231885, |
|
"grad_norm": 0.3573590861971531, |
|
"learning_rate": 9.999779261949875e-05, |
|
"loss": 1.4088, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10434782608695652, |
|
"grad_norm": 0.3962649324463349, |
|
"learning_rate": 9.999607579045565e-05, |
|
"loss": 1.4718, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1072463768115942, |
|
"grad_norm": 0.3629563065883299, |
|
"learning_rate": 9.999386846770303e-05, |
|
"loss": 1.5376, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11014492753623188, |
|
"grad_norm": 0.37698476595481845, |
|
"learning_rate": 9.99911706728961e-05, |
|
"loss": 1.5497, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11304347826086956, |
|
"grad_norm": 0.36517596222828796, |
|
"learning_rate": 9.9987982432502e-05, |
|
"loss": 1.3701, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11594202898550725, |
|
"grad_norm": 0.3754942171540997, |
|
"learning_rate": 9.998430377779942e-05, |
|
"loss": 1.4751, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11884057971014493, |
|
"grad_norm": 0.37273876645697823, |
|
"learning_rate": 9.998013474487833e-05, |
|
"loss": 1.4959, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.12173913043478261, |
|
"grad_norm": 0.36526298295975423, |
|
"learning_rate": 9.99754753746396e-05, |
|
"loss": 1.477, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1246376811594203, |
|
"grad_norm": 0.4028151666513751, |
|
"learning_rate": 9.99703257127947e-05, |
|
"loss": 1.4273, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12753623188405797, |
|
"grad_norm": 0.3669671633234476, |
|
"learning_rate": 9.99646858098651e-05, |
|
"loss": 1.3938, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.13043478260869565, |
|
"grad_norm": 0.33083829945323007, |
|
"learning_rate": 9.995855572118186e-05, |
|
"loss": 1.4102, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.3478285593739705, |
|
"learning_rate": 9.995193550688517e-05, |
|
"loss": 1.4027, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.13623188405797101, |
|
"grad_norm": 0.37609834638001705, |
|
"learning_rate": 9.994482523192352e-05, |
|
"loss": 1.4909, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1391304347826087, |
|
"grad_norm": 0.3544704730906117, |
|
"learning_rate": 9.993722496605333e-05, |
|
"loss": 1.4603, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14202898550724638, |
|
"grad_norm": 0.35471120831090747, |
|
"learning_rate": 9.99291347838381e-05, |
|
"loss": 1.4591, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.14492753623188406, |
|
"grad_norm": 0.3522333621422469, |
|
"learning_rate": 9.992055476464772e-05, |
|
"loss": 1.4661, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14782608695652175, |
|
"grad_norm": 0.40369049060969037, |
|
"learning_rate": 9.991148499265771e-05, |
|
"loss": 1.3549, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.15072463768115943, |
|
"grad_norm": 0.37654258677829533, |
|
"learning_rate": 9.990192555684837e-05, |
|
"loss": 1.4566, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1536231884057971, |
|
"grad_norm": 0.35023666520198726, |
|
"learning_rate": 9.989187655100394e-05, |
|
"loss": 1.4291, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1565217391304348, |
|
"grad_norm": 0.3713582044260089, |
|
"learning_rate": 9.98813380737116e-05, |
|
"loss": 1.4899, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.15942028985507245, |
|
"grad_norm": 0.3483542245496034, |
|
"learning_rate": 9.987031022836066e-05, |
|
"loss": 1.422, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.16231884057971013, |
|
"grad_norm": 0.3428096360294795, |
|
"learning_rate": 9.985879312314135e-05, |
|
"loss": 1.417, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.16521739130434782, |
|
"grad_norm": 0.3645827259974512, |
|
"learning_rate": 9.984678687104389e-05, |
|
"loss": 1.4285, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1681159420289855, |
|
"grad_norm": 0.35685607542080316, |
|
"learning_rate": 9.983429158985736e-05, |
|
"loss": 1.3918, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.17101449275362318, |
|
"grad_norm": 0.3370796491973602, |
|
"learning_rate": 9.982130740216849e-05, |
|
"loss": 1.4129, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.17391304347826086, |
|
"grad_norm": 0.3444756598243817, |
|
"learning_rate": 9.980783443536057e-05, |
|
"loss": 1.4355, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17681159420289855, |
|
"grad_norm": 0.3436241209978691, |
|
"learning_rate": 9.979387282161206e-05, |
|
"loss": 1.4583, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.17971014492753623, |
|
"grad_norm": 0.32218525116364366, |
|
"learning_rate": 9.977942269789537e-05, |
|
"loss": 1.4524, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1826086956521739, |
|
"grad_norm": 0.385973703132524, |
|
"learning_rate": 9.976448420597556e-05, |
|
"loss": 1.4419, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1855072463768116, |
|
"grad_norm": 1.7247641389853836, |
|
"learning_rate": 9.974905749240882e-05, |
|
"loss": 1.3425, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.18840579710144928, |
|
"grad_norm": 0.3447341772023887, |
|
"learning_rate": 9.973314270854115e-05, |
|
"loss": 1.528, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.19130434782608696, |
|
"grad_norm": 0.35835098628054646, |
|
"learning_rate": 9.971674001050686e-05, |
|
"loss": 1.4713, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.19420289855072465, |
|
"grad_norm": 0.365150351821878, |
|
"learning_rate": 9.969984955922697e-05, |
|
"loss": 1.4537, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.19710144927536233, |
|
"grad_norm": 0.3866963594083402, |
|
"learning_rate": 9.968247152040768e-05, |
|
"loss": 1.5055, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.35045697501626877, |
|
"learning_rate": 9.966460606453875e-05, |
|
"loss": 1.4434, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.2028985507246377, |
|
"grad_norm": 0.36817264001563493, |
|
"learning_rate": 9.964625336689181e-05, |
|
"loss": 1.4294, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.20579710144927535, |
|
"grad_norm": 0.3654904538276859, |
|
"learning_rate": 9.962741360751866e-05, |
|
"loss": 1.4308, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.20869565217391303, |
|
"grad_norm": 0.3781497670043016, |
|
"learning_rate": 9.960808697124946e-05, |
|
"loss": 1.4685, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.21159420289855072, |
|
"grad_norm": 0.36156099913405126, |
|
"learning_rate": 9.958827364769097e-05, |
|
"loss": 1.4062, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.2144927536231884, |
|
"grad_norm": 0.35552781851256704, |
|
"learning_rate": 9.956797383122463e-05, |
|
"loss": 1.4428, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.21739130434782608, |
|
"grad_norm": 0.3335062272759448, |
|
"learning_rate": 9.954718772100476e-05, |
|
"loss": 1.4467, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22028985507246376, |
|
"grad_norm": 0.3427215995763061, |
|
"learning_rate": 9.952591552095646e-05, |
|
"loss": 1.5089, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.22318840579710145, |
|
"grad_norm": 0.34794374393691757, |
|
"learning_rate": 9.950415743977373e-05, |
|
"loss": 1.4051, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.22608695652173913, |
|
"grad_norm": 0.3404770224687481, |
|
"learning_rate": 9.948191369091735e-05, |
|
"loss": 1.3876, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2289855072463768, |
|
"grad_norm": 0.34102132992338396, |
|
"learning_rate": 9.945918449261282e-05, |
|
"loss": 1.4369, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2318840579710145, |
|
"grad_norm": 0.33638460547428023, |
|
"learning_rate": 9.943597006784825e-05, |
|
"loss": 1.4164, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23478260869565218, |
|
"grad_norm": 0.35290031375473546, |
|
"learning_rate": 9.941227064437207e-05, |
|
"loss": 1.3796, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.23768115942028986, |
|
"grad_norm": 0.3463360857934043, |
|
"learning_rate": 9.93880864546909e-05, |
|
"loss": 1.4276, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.24057971014492754, |
|
"grad_norm": 0.3566368609252091, |
|
"learning_rate": 9.936341773606723e-05, |
|
"loss": 1.4967, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.24347826086956523, |
|
"grad_norm": 0.3373773040313267, |
|
"learning_rate": 9.933826473051707e-05, |
|
"loss": 1.4079, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2463768115942029, |
|
"grad_norm": 0.3393580838287239, |
|
"learning_rate": 9.93126276848076e-05, |
|
"loss": 1.4131, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2492753623188406, |
|
"grad_norm": 0.3520135073078003, |
|
"learning_rate": 9.928650685045477e-05, |
|
"loss": 1.4729, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.25217391304347825, |
|
"grad_norm": 0.3526725034511152, |
|
"learning_rate": 9.925990248372076e-05, |
|
"loss": 1.4314, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.25507246376811593, |
|
"grad_norm": 0.3433193515525383, |
|
"learning_rate": 9.92328148456116e-05, |
|
"loss": 1.4505, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2579710144927536, |
|
"grad_norm": 0.33837489039921237, |
|
"learning_rate": 9.920524420187443e-05, |
|
"loss": 1.4481, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2608695652173913, |
|
"grad_norm": 0.33988682832234424, |
|
"learning_rate": 9.917719082299501e-05, |
|
"loss": 1.4149, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.263768115942029, |
|
"grad_norm": 0.33940846094652855, |
|
"learning_rate": 9.91486549841951e-05, |
|
"loss": 1.3847, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.31996832381114065, |
|
"learning_rate": 9.911963696542963e-05, |
|
"loss": 1.3112, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.26956521739130435, |
|
"grad_norm": 0.31493707135599436, |
|
"learning_rate": 9.909013705138406e-05, |
|
"loss": 1.4216, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.27246376811594203, |
|
"grad_norm": 0.3204454590090509, |
|
"learning_rate": 9.906015553147158e-05, |
|
"loss": 1.3755, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2753623188405797, |
|
"grad_norm": 0.3408318845906397, |
|
"learning_rate": 9.902969269983018e-05, |
|
"loss": 1.4574, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2782608695652174, |
|
"grad_norm": 0.3196195350266631, |
|
"learning_rate": 9.899874885531987e-05, |
|
"loss": 1.4022, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2811594202898551, |
|
"grad_norm": 0.33440793327421947, |
|
"learning_rate": 9.89673243015197e-05, |
|
"loss": 1.3766, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.28405797101449276, |
|
"grad_norm": 0.33693013386726023, |
|
"learning_rate": 9.893541934672479e-05, |
|
"loss": 1.4676, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.28695652173913044, |
|
"grad_norm": 0.3467550636007772, |
|
"learning_rate": 9.890303430394328e-05, |
|
"loss": 1.365, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2898550724637681, |
|
"grad_norm": 0.3333645230781809, |
|
"learning_rate": 9.887016949089333e-05, |
|
"loss": 1.3514, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2927536231884058, |
|
"grad_norm": 0.34610516226844007, |
|
"learning_rate": 9.883682522999992e-05, |
|
"loss": 1.4499, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2956521739130435, |
|
"grad_norm": 0.3268443889818303, |
|
"learning_rate": 9.88030018483917e-05, |
|
"loss": 1.4303, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2985507246376812, |
|
"grad_norm": 0.33465469810861087, |
|
"learning_rate": 9.876869967789788e-05, |
|
"loss": 1.3757, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.30144927536231886, |
|
"grad_norm": 0.33038430224796766, |
|
"learning_rate": 9.87339190550448e-05, |
|
"loss": 1.3676, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.30434782608695654, |
|
"grad_norm": 0.3404214439604057, |
|
"learning_rate": 9.86986603210528e-05, |
|
"loss": 1.3974, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.3072463768115942, |
|
"grad_norm": 0.32959296551839845, |
|
"learning_rate": 9.866292382183278e-05, |
|
"loss": 1.3484, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3101449275362319, |
|
"grad_norm": 0.381137959130174, |
|
"learning_rate": 9.86267099079828e-05, |
|
"loss": 1.4149, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3130434782608696, |
|
"grad_norm": 0.33114126577828235, |
|
"learning_rate": 9.859001893478468e-05, |
|
"loss": 1.3599, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.3159420289855073, |
|
"grad_norm": 0.36021993638794775, |
|
"learning_rate": 9.855285126220053e-05, |
|
"loss": 1.413, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3188405797101449, |
|
"grad_norm": 0.355739607205717, |
|
"learning_rate": 9.851520725486914e-05, |
|
"loss": 1.4064, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3217391304347826, |
|
"grad_norm": 0.3263260079885549, |
|
"learning_rate": 9.847708728210246e-05, |
|
"loss": 1.4048, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.32463768115942027, |
|
"grad_norm": 0.3199488973648368, |
|
"learning_rate": 9.8438491717882e-05, |
|
"loss": 1.3944, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.32753623188405795, |
|
"grad_norm": 0.3336592320156713, |
|
"learning_rate": 9.839942094085511e-05, |
|
"loss": 1.3799, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.33043478260869563, |
|
"grad_norm": 0.32960061743745567, |
|
"learning_rate": 9.835987533433126e-05, |
|
"loss": 1.43, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.35822567336767946, |
|
"learning_rate": 9.831985528627834e-05, |
|
"loss": 1.4404, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.336231884057971, |
|
"grad_norm": 0.32466006600725356, |
|
"learning_rate": 9.82793611893188e-05, |
|
"loss": 1.391, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3391304347826087, |
|
"grad_norm": 0.3452303089687653, |
|
"learning_rate": 9.82383934407258e-05, |
|
"loss": 1.4571, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.34202898550724636, |
|
"grad_norm": 0.3531330388118067, |
|
"learning_rate": 9.819695244241936e-05, |
|
"loss": 1.4726, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.34492753623188405, |
|
"grad_norm": 0.3284144929554227, |
|
"learning_rate": 9.815503860096238e-05, |
|
"loss": 1.4636, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.34782608695652173, |
|
"grad_norm": 0.33589451825622024, |
|
"learning_rate": 9.811265232755662e-05, |
|
"loss": 1.4076, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3507246376811594, |
|
"grad_norm": 0.33465490795732467, |
|
"learning_rate": 9.806979403803873e-05, |
|
"loss": 1.3757, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3536231884057971, |
|
"grad_norm": 0.35161889623674547, |
|
"learning_rate": 9.802646415287615e-05, |
|
"loss": 1.4065, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3565217391304348, |
|
"grad_norm": 0.31894482948146224, |
|
"learning_rate": 9.798266309716295e-05, |
|
"loss": 1.4455, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.35942028985507246, |
|
"grad_norm": 0.3263915498362111, |
|
"learning_rate": 9.793839130061573e-05, |
|
"loss": 1.3291, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.36231884057971014, |
|
"grad_norm": 0.3264781414125749, |
|
"learning_rate": 9.78936491975693e-05, |
|
"loss": 1.3977, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3652173913043478, |
|
"grad_norm": 0.3322110798968971, |
|
"learning_rate": 9.784843722697253e-05, |
|
"loss": 1.4516, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3681159420289855, |
|
"grad_norm": 0.33040915159162, |
|
"learning_rate": 9.780275583238397e-05, |
|
"loss": 1.4418, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3710144927536232, |
|
"grad_norm": 0.32982903923865825, |
|
"learning_rate": 9.775660546196753e-05, |
|
"loss": 1.399, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3739130434782609, |
|
"grad_norm": 0.3398856478969671, |
|
"learning_rate": 9.770998656848806e-05, |
|
"loss": 1.4917, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.37681159420289856, |
|
"grad_norm": 0.33812428837562564, |
|
"learning_rate": 9.766289960930697e-05, |
|
"loss": 1.4136, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.37971014492753624, |
|
"grad_norm": 0.32546513362934915, |
|
"learning_rate": 9.761534504637761e-05, |
|
"loss": 1.4245, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3826086956521739, |
|
"grad_norm": 0.3379554295481369, |
|
"learning_rate": 9.756732334624093e-05, |
|
"loss": 1.3917, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3855072463768116, |
|
"grad_norm": 0.3196806084479148, |
|
"learning_rate": 9.751883498002071e-05, |
|
"loss": 1.3608, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3884057971014493, |
|
"grad_norm": 0.366228317842041, |
|
"learning_rate": 9.746988042341906e-05, |
|
"loss": 1.3728, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.391304347826087, |
|
"grad_norm": 0.3769852522598798, |
|
"learning_rate": 9.742046015671174e-05, |
|
"loss": 1.4481, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.39420289855072466, |
|
"grad_norm": 0.34122072082269356, |
|
"learning_rate": 9.737057466474336e-05, |
|
"loss": 1.4195, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.39710144927536234, |
|
"grad_norm": 0.3322686505315165, |
|
"learning_rate": 9.732022443692276e-05, |
|
"loss": 1.399, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.3296309366287408, |
|
"learning_rate": 9.726940996721811e-05, |
|
"loss": 1.421, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.4028985507246377, |
|
"grad_norm": 0.37435872581479346, |
|
"learning_rate": 9.721813175415208e-05, |
|
"loss": 1.4244, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.4057971014492754, |
|
"grad_norm": 0.3268496453435604, |
|
"learning_rate": 9.716639030079697e-05, |
|
"loss": 1.4099, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.40869565217391307, |
|
"grad_norm": 0.3554430337628762, |
|
"learning_rate": 9.711418611476977e-05, |
|
"loss": 1.4446, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.4115942028985507, |
|
"grad_norm": 0.33834590076214077, |
|
"learning_rate": 9.706151970822718e-05, |
|
"loss": 1.3205, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.4144927536231884, |
|
"grad_norm": 0.3414240635513846, |
|
"learning_rate": 9.700839159786057e-05, |
|
"loss": 1.4534, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.41739130434782606, |
|
"grad_norm": 0.32930885329942156, |
|
"learning_rate": 9.695480230489093e-05, |
|
"loss": 1.3587, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.42028985507246375, |
|
"grad_norm": 0.3390309331331547, |
|
"learning_rate": 9.690075235506374e-05, |
|
"loss": 1.339, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.42318840579710143, |
|
"grad_norm": 0.33898351347591354, |
|
"learning_rate": 9.684624227864383e-05, |
|
"loss": 1.3774, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4260869565217391, |
|
"grad_norm": 0.3229718369377447, |
|
"learning_rate": 9.679127261041015e-05, |
|
"loss": 1.3538, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4289855072463768, |
|
"grad_norm": 0.3375751395632948, |
|
"learning_rate": 9.673584388965058e-05, |
|
"loss": 1.4375, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.4318840579710145, |
|
"grad_norm": 0.3267376187700775, |
|
"learning_rate": 9.667995666015654e-05, |
|
"loss": 1.4029, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.43478260869565216, |
|
"grad_norm": 0.34796705983800497, |
|
"learning_rate": 9.662361147021779e-05, |
|
"loss": 1.4493, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.43768115942028984, |
|
"grad_norm": 0.3182925069013053, |
|
"learning_rate": 9.656680887261693e-05, |
|
"loss": 1.3708, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4405797101449275, |
|
"grad_norm": 0.3408199380471595, |
|
"learning_rate": 9.650954942462401e-05, |
|
"loss": 1.4098, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.4434782608695652, |
|
"grad_norm": 0.33412473685571564, |
|
"learning_rate": 9.645183368799113e-05, |
|
"loss": 1.4252, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4463768115942029, |
|
"grad_norm": 0.3318159670621602, |
|
"learning_rate": 9.639366222894682e-05, |
|
"loss": 1.4233, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4492753623188406, |
|
"grad_norm": 0.34440731389898754, |
|
"learning_rate": 9.63350356181906e-05, |
|
"loss": 1.3829, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.45217391304347826, |
|
"grad_norm": 0.35692903412852806, |
|
"learning_rate": 9.627595443088724e-05, |
|
"loss": 1.357, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.45507246376811594, |
|
"grad_norm": 0.33466758251653783, |
|
"learning_rate": 9.621641924666127e-05, |
|
"loss": 1.406, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4579710144927536, |
|
"grad_norm": 0.3366286518639209, |
|
"learning_rate": 9.615643064959122e-05, |
|
"loss": 1.4249, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4608695652173913, |
|
"grad_norm": 0.32884355157952677, |
|
"learning_rate": 9.609598922820382e-05, |
|
"loss": 1.4149, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.463768115942029, |
|
"grad_norm": 0.3323077335804954, |
|
"learning_rate": 9.60350955754684e-05, |
|
"loss": 1.3898, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4666666666666667, |
|
"grad_norm": 0.3284011884136777, |
|
"learning_rate": 9.597375028879088e-05, |
|
"loss": 1.3761, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.46956521739130436, |
|
"grad_norm": 0.33628429126159637, |
|
"learning_rate": 9.591195397000805e-05, |
|
"loss": 1.4473, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.47246376811594204, |
|
"grad_norm": 0.3479467044598075, |
|
"learning_rate": 9.584970722538162e-05, |
|
"loss": 1.4025, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4753623188405797, |
|
"grad_norm": 0.34445922830801295, |
|
"learning_rate": 9.578701066559225e-05, |
|
"loss": 1.397, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4782608695652174, |
|
"grad_norm": 0.3398702574419618, |
|
"learning_rate": 9.572386490573357e-05, |
|
"loss": 1.3751, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4811594202898551, |
|
"grad_norm": 0.31614740777820005, |
|
"learning_rate": 9.566027056530615e-05, |
|
"loss": 1.3098, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.48405797101449277, |
|
"grad_norm": 0.3444149821598331, |
|
"learning_rate": 9.559622826821145e-05, |
|
"loss": 1.3685, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.48695652173913045, |
|
"grad_norm": 0.3455185724902944, |
|
"learning_rate": 9.553173864274567e-05, |
|
"loss": 1.4413, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.48985507246376814, |
|
"grad_norm": 0.32774886376386325, |
|
"learning_rate": 9.546680232159355e-05, |
|
"loss": 1.4031, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4927536231884058, |
|
"grad_norm": 0.32560244502643815, |
|
"learning_rate": 9.540141994182225e-05, |
|
"loss": 1.4364, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4956521739130435, |
|
"grad_norm": 0.34398546887992665, |
|
"learning_rate": 9.533559214487503e-05, |
|
"loss": 1.409, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4985507246376812, |
|
"grad_norm": 0.39583900001909544, |
|
"learning_rate": 9.526931957656497e-05, |
|
"loss": 1.4527, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.5014492753623189, |
|
"grad_norm": 0.4626708756395286, |
|
"learning_rate": 9.520260288706867e-05, |
|
"loss": 1.4624, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.5043478260869565, |
|
"grad_norm": 0.3664093495829884, |
|
"learning_rate": 9.513544273091983e-05, |
|
"loss": 1.4639, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.5072463768115942, |
|
"grad_norm": 0.36499531804230495, |
|
"learning_rate": 9.506783976700285e-05, |
|
"loss": 1.4065, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5101449275362319, |
|
"grad_norm": 0.33176315803612266, |
|
"learning_rate": 9.499979465854633e-05, |
|
"loss": 1.3712, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5130434782608696, |
|
"grad_norm": 0.31906615813652695, |
|
"learning_rate": 9.493130807311663e-05, |
|
"loss": 1.4081, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5159420289855072, |
|
"grad_norm": 0.34052218389638056, |
|
"learning_rate": 9.486238068261129e-05, |
|
"loss": 1.4268, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.518840579710145, |
|
"grad_norm": 0.3336134893967437, |
|
"learning_rate": 9.479301316325237e-05, |
|
"loss": 1.4078, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5217391304347826, |
|
"grad_norm": 0.3360766687427952, |
|
"learning_rate": 9.472320619557997e-05, |
|
"loss": 1.3766, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5246376811594203, |
|
"grad_norm": 0.3221253265397745, |
|
"learning_rate": 9.465296046444538e-05, |
|
"loss": 1.3538, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.527536231884058, |
|
"grad_norm": 0.33953118483885136, |
|
"learning_rate": 9.458227665900446e-05, |
|
"loss": 1.3964, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5304347826086957, |
|
"grad_norm": 0.33685849921565403, |
|
"learning_rate": 9.45111554727109e-05, |
|
"loss": 1.4249, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.35947381917427984, |
|
"learning_rate": 9.443959760330934e-05, |
|
"loss": 1.4087, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5362318840579711, |
|
"grad_norm": 0.33994296278210917, |
|
"learning_rate": 9.436760375282859e-05, |
|
"loss": 1.3951, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5391304347826087, |
|
"grad_norm": 0.3470448028628382, |
|
"learning_rate": 9.429517462757467e-05, |
|
"loss": 1.3688, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5420289855072464, |
|
"grad_norm": 0.33294443162653775, |
|
"learning_rate": 9.422231093812398e-05, |
|
"loss": 1.3679, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5449275362318841, |
|
"grad_norm": 0.31454677711788814, |
|
"learning_rate": 9.414901339931624e-05, |
|
"loss": 1.4419, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5478260869565217, |
|
"grad_norm": 0.3434839073644547, |
|
"learning_rate": 9.407528273024752e-05, |
|
"loss": 1.3949, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5507246376811594, |
|
"grad_norm": 0.3351386886311035, |
|
"learning_rate": 9.400111965426319e-05, |
|
"loss": 1.4022, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.553623188405797, |
|
"grad_norm": 0.3358706804811382, |
|
"learning_rate": 9.39265248989508e-05, |
|
"loss": 1.3474, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5565217391304348, |
|
"grad_norm": 0.3572071382586898, |
|
"learning_rate": 9.385149919613292e-05, |
|
"loss": 1.3889, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5594202898550724, |
|
"grad_norm": 0.3287944467382312, |
|
"learning_rate": 9.377604328186008e-05, |
|
"loss": 1.3805, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5623188405797102, |
|
"grad_norm": 0.36810650453304095, |
|
"learning_rate": 9.370015789640334e-05, |
|
"loss": 1.4075, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5652173913043478, |
|
"grad_norm": 0.3868422779658168, |
|
"learning_rate": 9.362384378424726e-05, |
|
"loss": 1.4251, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5681159420289855, |
|
"grad_norm": 0.3295019502277694, |
|
"learning_rate": 9.354710169408243e-05, |
|
"loss": 1.4139, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5710144927536231, |
|
"grad_norm": 0.3468700259339786, |
|
"learning_rate": 9.346993237879817e-05, |
|
"loss": 1.366, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5739130434782609, |
|
"grad_norm": 0.3397883227300112, |
|
"learning_rate": 9.339233659547521e-05, |
|
"loss": 1.4216, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5768115942028985, |
|
"grad_norm": 0.3430862510854982, |
|
"learning_rate": 9.331431510537816e-05, |
|
"loss": 1.407, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5797101449275363, |
|
"grad_norm": 0.3463403087156221, |
|
"learning_rate": 9.323586867394807e-05, |
|
"loss": 1.3894, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5826086956521739, |
|
"grad_norm": 0.3280253585339611, |
|
"learning_rate": 9.315699807079497e-05, |
|
"loss": 1.3499, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5855072463768116, |
|
"grad_norm": 0.3465548223811757, |
|
"learning_rate": 9.30777040696903e-05, |
|
"loss": 1.3635, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5884057971014492, |
|
"grad_norm": 0.36685509209544426, |
|
"learning_rate": 9.29979874485593e-05, |
|
"loss": 1.4247, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.591304347826087, |
|
"grad_norm": 0.3642879429079575, |
|
"learning_rate": 9.291784898947336e-05, |
|
"loss": 1.4265, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5942028985507246, |
|
"grad_norm": 0.3369650372143289, |
|
"learning_rate": 9.283728947864237e-05, |
|
"loss": 1.3543, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5971014492753624, |
|
"grad_norm": 0.3498733941972242, |
|
"learning_rate": 9.275630970640705e-05, |
|
"loss": 1.3867, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.3265518670612826, |
|
"learning_rate": 9.267491046723111e-05, |
|
"loss": 1.404, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.6028985507246377, |
|
"grad_norm": 0.3318790134308843, |
|
"learning_rate": 9.259309255969354e-05, |
|
"loss": 1.4059, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.6057971014492753, |
|
"grad_norm": 0.34642031197798473, |
|
"learning_rate": 9.251085678648072e-05, |
|
"loss": 1.4259, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.6086956521739131, |
|
"grad_norm": 0.3419250092734196, |
|
"learning_rate": 9.242820395437854e-05, |
|
"loss": 1.3711, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6115942028985507, |
|
"grad_norm": 0.3461578047587994, |
|
"learning_rate": 9.234513487426453e-05, |
|
"loss": 1.4579, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.6144927536231884, |
|
"grad_norm": 0.351627952691499, |
|
"learning_rate": 9.226165036109988e-05, |
|
"loss": 1.4399, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.6173913043478261, |
|
"grad_norm": 0.3307586411986757, |
|
"learning_rate": 9.217775123392145e-05, |
|
"loss": 1.3946, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6202898550724638, |
|
"grad_norm": 0.3354295846624239, |
|
"learning_rate": 9.209343831583373e-05, |
|
"loss": 1.3682, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.6231884057971014, |
|
"grad_norm": 0.3643294550764089, |
|
"learning_rate": 9.200871243400073e-05, |
|
"loss": 1.4177, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6260869565217392, |
|
"grad_norm": 0.34428635756537734, |
|
"learning_rate": 9.192357441963795e-05, |
|
"loss": 1.4487, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6289855072463768, |
|
"grad_norm": 0.33609027458329577, |
|
"learning_rate": 9.183802510800415e-05, |
|
"loss": 1.4307, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.6318840579710145, |
|
"grad_norm": 0.3563038361945473, |
|
"learning_rate": 9.175206533839318e-05, |
|
"loss": 1.4172, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6347826086956522, |
|
"grad_norm": 0.3288387667207579, |
|
"learning_rate": 9.166569595412575e-05, |
|
"loss": 1.3713, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6376811594202898, |
|
"grad_norm": 0.34157440710913767, |
|
"learning_rate": 9.157891780254117e-05, |
|
"loss": 1.3679, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6405797101449275, |
|
"grad_norm": 0.3151382251052811, |
|
"learning_rate": 9.1491731734989e-05, |
|
"loss": 1.3795, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6434782608695652, |
|
"grad_norm": 0.33817165115588743, |
|
"learning_rate": 9.140413860682073e-05, |
|
"loss": 1.3586, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6463768115942029, |
|
"grad_norm": 0.3277750425977871, |
|
"learning_rate": 9.131613927738138e-05, |
|
"loss": 1.3885, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6492753623188405, |
|
"grad_norm": 0.31658312922359383, |
|
"learning_rate": 9.122773461000103e-05, |
|
"loss": 1.4149, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6521739130434783, |
|
"grad_norm": 0.3193871223544036, |
|
"learning_rate": 9.113892547198643e-05, |
|
"loss": 1.322, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6550724637681159, |
|
"grad_norm": 0.3302835747056366, |
|
"learning_rate": 9.104971273461243e-05, |
|
"loss": 1.3769, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6579710144927536, |
|
"grad_norm": 0.3186189847015454, |
|
"learning_rate": 9.096009727311347e-05, |
|
"loss": 1.3406, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6608695652173913, |
|
"grad_norm": 0.3389034868184038, |
|
"learning_rate": 9.087007996667494e-05, |
|
"loss": 1.3658, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.663768115942029, |
|
"grad_norm": 0.33474986537379237, |
|
"learning_rate": 9.077966169842459e-05, |
|
"loss": 1.3651, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.3556022501007949, |
|
"learning_rate": 9.068884335542389e-05, |
|
"loss": 1.4237, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6695652173913044, |
|
"grad_norm": 0.3216681338623573, |
|
"learning_rate": 9.05976258286593e-05, |
|
"loss": 1.3785, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.672463768115942, |
|
"grad_norm": 0.33533701380419384, |
|
"learning_rate": 9.05060100130335e-05, |
|
"loss": 1.4665, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6753623188405797, |
|
"grad_norm": 0.3314963078807375, |
|
"learning_rate": 9.041399680735664e-05, |
|
"loss": 1.4036, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6782608695652174, |
|
"grad_norm": 0.33542193989045377, |
|
"learning_rate": 9.03215871143376e-05, |
|
"loss": 1.4348, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6811594202898551, |
|
"grad_norm": 0.3547005064725891, |
|
"learning_rate": 9.022878184057492e-05, |
|
"loss": 1.4272, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6840579710144927, |
|
"grad_norm": 0.33291554897811426, |
|
"learning_rate": 9.013558189654819e-05, |
|
"loss": 1.4591, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6869565217391305, |
|
"grad_norm": 0.3379014298685863, |
|
"learning_rate": 9.004198819660885e-05, |
|
"loss": 1.4567, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6898550724637681, |
|
"grad_norm": 0.3297563945475019, |
|
"learning_rate": 8.99480016589714e-05, |
|
"loss": 1.3799, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6927536231884058, |
|
"grad_norm": 0.34042084947510615, |
|
"learning_rate": 8.985362320570432e-05, |
|
"loss": 1.3697, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6956521739130435, |
|
"grad_norm": 0.3374245817202305, |
|
"learning_rate": 8.975885376272102e-05, |
|
"loss": 1.4046, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6985507246376812, |
|
"grad_norm": 0.3732847854755435, |
|
"learning_rate": 8.966369425977082e-05, |
|
"loss": 1.3491, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.7014492753623188, |
|
"grad_norm": 0.35958390600115686, |
|
"learning_rate": 8.956814563042968e-05, |
|
"loss": 1.3671, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.7043478260869566, |
|
"grad_norm": 0.3572722721866322, |
|
"learning_rate": 8.947220881209126e-05, |
|
"loss": 1.4003, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.7072463768115942, |
|
"grad_norm": 0.34273191632214844, |
|
"learning_rate": 8.937588474595753e-05, |
|
"loss": 1.4104, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.7101449275362319, |
|
"grad_norm": 0.34878139471777386, |
|
"learning_rate": 8.927917437702962e-05, |
|
"loss": 1.3896, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.7130434782608696, |
|
"grad_norm": 0.33111504592475566, |
|
"learning_rate": 8.918207865409856e-05, |
|
"loss": 1.3313, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.7159420289855073, |
|
"grad_norm": 0.3438939035436239, |
|
"learning_rate": 8.908459852973594e-05, |
|
"loss": 1.3429, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.7188405797101449, |
|
"grad_norm": 0.3312679125692785, |
|
"learning_rate": 8.898673496028456e-05, |
|
"loss": 1.4395, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.7217391304347827, |
|
"grad_norm": 0.34484942367124294, |
|
"learning_rate": 8.888848890584907e-05, |
|
"loss": 1.3712, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7246376811594203, |
|
"grad_norm": 0.340709492347014, |
|
"learning_rate": 8.878986133028657e-05, |
|
"loss": 1.37, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7275362318840579, |
|
"grad_norm": 0.33398944764147226, |
|
"learning_rate": 8.86908532011971e-05, |
|
"loss": 1.3892, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.7304347826086957, |
|
"grad_norm": 0.35175222311902715, |
|
"learning_rate": 8.85914654899142e-05, |
|
"loss": 1.4108, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.7333333333333333, |
|
"grad_norm": 0.3484995200225896, |
|
"learning_rate": 8.849169917149531e-05, |
|
"loss": 1.3833, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.736231884057971, |
|
"grad_norm": 0.3532075346234238, |
|
"learning_rate": 8.839155522471232e-05, |
|
"loss": 1.313, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7391304347826086, |
|
"grad_norm": 0.32136667953567727, |
|
"learning_rate": 8.829103463204182e-05, |
|
"loss": 1.3504, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7420289855072464, |
|
"grad_norm": 0.3229081190755409, |
|
"learning_rate": 8.81901383796556e-05, |
|
"loss": 1.3771, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.744927536231884, |
|
"grad_norm": 0.3440518639418747, |
|
"learning_rate": 8.808886745741089e-05, |
|
"loss": 1.4158, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7478260869565218, |
|
"grad_norm": 0.3352706545420464, |
|
"learning_rate": 8.798722285884066e-05, |
|
"loss": 1.4394, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7507246376811594, |
|
"grad_norm": 0.33559926414830077, |
|
"learning_rate": 8.788520558114391e-05, |
|
"loss": 1.3911, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7536231884057971, |
|
"grad_norm": 0.3216071156149776, |
|
"learning_rate": 8.778281662517583e-05, |
|
"loss": 1.429, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7565217391304347, |
|
"grad_norm": 0.32211563215549827, |
|
"learning_rate": 8.768005699543806e-05, |
|
"loss": 1.3127, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7594202898550725, |
|
"grad_norm": 0.34108464165661373, |
|
"learning_rate": 8.757692770006876e-05, |
|
"loss": 1.3773, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7623188405797101, |
|
"grad_norm": 0.32535926486459094, |
|
"learning_rate": 8.747342975083272e-05, |
|
"loss": 1.3664, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7652173913043478, |
|
"grad_norm": 0.33852048574771015, |
|
"learning_rate": 8.736956416311154e-05, |
|
"loss": 1.3663, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.7681159420289855, |
|
"grad_norm": 0.33710327017540265, |
|
"learning_rate": 8.72653319558935e-05, |
|
"loss": 1.4091, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7710144927536232, |
|
"grad_norm": 0.3529196648547696, |
|
"learning_rate": 8.716073415176374e-05, |
|
"loss": 1.442, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.7739130434782608, |
|
"grad_norm": 0.34337677669937877, |
|
"learning_rate": 8.705577177689403e-05, |
|
"loss": 1.3316, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.7768115942028986, |
|
"grad_norm": 0.3354333510851631, |
|
"learning_rate": 8.695044586103296e-05, |
|
"loss": 1.3616, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7797101449275362, |
|
"grad_norm": 0.3479441013536178, |
|
"learning_rate": 8.684475743749556e-05, |
|
"loss": 1.395, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.782608695652174, |
|
"grad_norm": 0.37463973489254887, |
|
"learning_rate": 8.673870754315336e-05, |
|
"loss": 1.401, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7855072463768116, |
|
"grad_norm": 0.31175117798278007, |
|
"learning_rate": 8.663229721842415e-05, |
|
"loss": 1.3223, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7884057971014493, |
|
"grad_norm": 0.38303494453595516, |
|
"learning_rate": 8.652552750726175e-05, |
|
"loss": 1.4301, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7913043478260869, |
|
"grad_norm": 0.3573014147864106, |
|
"learning_rate": 8.64183994571458e-05, |
|
"loss": 1.4263, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7942028985507247, |
|
"grad_norm": 0.3211993716597447, |
|
"learning_rate": 8.631091411907149e-05, |
|
"loss": 1.3578, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7971014492753623, |
|
"grad_norm": 0.37834773248299663, |
|
"learning_rate": 8.620307254753923e-05, |
|
"loss": 1.3745, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.31593418933802786, |
|
"learning_rate": 8.609487580054428e-05, |
|
"loss": 1.3654, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.8028985507246377, |
|
"grad_norm": 0.31504634745000243, |
|
"learning_rate": 8.598632493956644e-05, |
|
"loss": 1.4, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.8057971014492754, |
|
"grad_norm": 0.3384470107062998, |
|
"learning_rate": 8.58774210295596e-05, |
|
"loss": 1.3941, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.808695652173913, |
|
"grad_norm": 0.3260030165566468, |
|
"learning_rate": 8.576816513894125e-05, |
|
"loss": 1.348, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.8115942028985508, |
|
"grad_norm": 0.3527150892760629, |
|
"learning_rate": 8.565855833958206e-05, |
|
"loss": 1.4058, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8144927536231884, |
|
"grad_norm": 0.3861860908831136, |
|
"learning_rate": 8.554860170679534e-05, |
|
"loss": 1.4282, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.8173913043478261, |
|
"grad_norm": 0.3137903423216692, |
|
"learning_rate": 8.543829631932649e-05, |
|
"loss": 1.352, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.8202898550724638, |
|
"grad_norm": 0.34862718728490294, |
|
"learning_rate": 8.532764325934239e-05, |
|
"loss": 1.4282, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.8231884057971014, |
|
"grad_norm": 0.3150871399912744, |
|
"learning_rate": 8.521664361242089e-05, |
|
"loss": 1.3802, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.8260869565217391, |
|
"grad_norm": 0.3107741737666529, |
|
"learning_rate": 8.510529846753998e-05, |
|
"loss": 1.4077, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8289855072463768, |
|
"grad_norm": 0.33269493424037233, |
|
"learning_rate": 8.499360891706729e-05, |
|
"loss": 1.3348, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.8318840579710145, |
|
"grad_norm": 0.31493592697757294, |
|
"learning_rate": 8.488157605674925e-05, |
|
"loss": 1.3418, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.8347826086956521, |
|
"grad_norm": 0.3328720547121984, |
|
"learning_rate": 8.476920098570036e-05, |
|
"loss": 1.3832, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8376811594202899, |
|
"grad_norm": 0.3157756166632203, |
|
"learning_rate": 8.465648480639248e-05, |
|
"loss": 1.3274, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.8405797101449275, |
|
"grad_norm": 0.33662897796614577, |
|
"learning_rate": 8.454342862464395e-05, |
|
"loss": 1.3086, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8434782608695652, |
|
"grad_norm": 0.3272252672648793, |
|
"learning_rate": 8.443003354960872e-05, |
|
"loss": 1.4232, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.8463768115942029, |
|
"grad_norm": 0.35218283346681617, |
|
"learning_rate": 8.431630069376552e-05, |
|
"loss": 1.4371, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8492753623188406, |
|
"grad_norm": 0.3436413205889393, |
|
"learning_rate": 8.420223117290695e-05, |
|
"loss": 1.3696, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8521739130434782, |
|
"grad_norm": 0.34426616560941314, |
|
"learning_rate": 8.408782610612849e-05, |
|
"loss": 1.4137, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.855072463768116, |
|
"grad_norm": 0.31419677902933213, |
|
"learning_rate": 8.39730866158175e-05, |
|
"loss": 1.3294, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8579710144927536, |
|
"grad_norm": 0.31097415762768543, |
|
"learning_rate": 8.385801382764233e-05, |
|
"loss": 1.3796, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8608695652173913, |
|
"grad_norm": 0.3351050938384504, |
|
"learning_rate": 8.374260887054116e-05, |
|
"loss": 1.4819, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.863768115942029, |
|
"grad_norm": 0.3151109176190777, |
|
"learning_rate": 8.362687287671094e-05, |
|
"loss": 1.3711, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.8666666666666667, |
|
"grad_norm": 0.337074633378245, |
|
"learning_rate": 8.351080698159632e-05, |
|
"loss": 1.3923, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 0.3371311952402845, |
|
"learning_rate": 8.339441232387853e-05, |
|
"loss": 1.3789, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8724637681159421, |
|
"grad_norm": 0.3356424382906388, |
|
"learning_rate": 8.32776900454641e-05, |
|
"loss": 1.4003, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8753623188405797, |
|
"grad_norm": 0.33796299079575864, |
|
"learning_rate": 8.31606412914738e-05, |
|
"loss": 1.4341, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.8782608695652174, |
|
"grad_norm": 0.32018941976781934, |
|
"learning_rate": 8.30432672102313e-05, |
|
"loss": 1.4523, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.881159420289855, |
|
"grad_norm": 0.3368637827820196, |
|
"learning_rate": 8.292556895325194e-05, |
|
"loss": 1.3903, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.8840579710144928, |
|
"grad_norm": 0.31352167875853487, |
|
"learning_rate": 8.280754767523144e-05, |
|
"loss": 1.3581, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.8869565217391304, |
|
"grad_norm": 0.31484573995633375, |
|
"learning_rate": 8.268920453403457e-05, |
|
"loss": 1.3967, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.8898550724637682, |
|
"grad_norm": 0.31504188464216054, |
|
"learning_rate": 8.257054069068374e-05, |
|
"loss": 1.3985, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.8927536231884058, |
|
"grad_norm": 0.32015281024694753, |
|
"learning_rate": 8.245155730934777e-05, |
|
"loss": 1.3273, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.8956521739130435, |
|
"grad_norm": 0.3183790437483911, |
|
"learning_rate": 8.233225555733022e-05, |
|
"loss": 1.2672, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.8985507246376812, |
|
"grad_norm": 0.32150150116629717, |
|
"learning_rate": 8.221263660505813e-05, |
|
"loss": 1.3995, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9014492753623189, |
|
"grad_norm": 0.3132580361772673, |
|
"learning_rate": 8.20927016260705e-05, |
|
"loss": 1.3899, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.9043478260869565, |
|
"grad_norm": 0.3557171808896923, |
|
"learning_rate": 8.197245179700673e-05, |
|
"loss": 1.3861, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.9072463768115943, |
|
"grad_norm": 0.32080932799331907, |
|
"learning_rate": 8.185188829759505e-05, |
|
"loss": 1.2657, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.9101449275362319, |
|
"grad_norm": 0.33323239514109537, |
|
"learning_rate": 8.173101231064113e-05, |
|
"loss": 1.331, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.9130434782608695, |
|
"grad_norm": 0.33932442141864444, |
|
"learning_rate": 8.160982502201624e-05, |
|
"loss": 1.3583, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.9159420289855073, |
|
"grad_norm": 0.41517663636078217, |
|
"learning_rate": 8.148832762064573e-05, |
|
"loss": 1.4196, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.9188405797101449, |
|
"grad_norm": 0.3479488422667109, |
|
"learning_rate": 8.136652129849738e-05, |
|
"loss": 1.3765, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.9217391304347826, |
|
"grad_norm": 0.3250773691234272, |
|
"learning_rate": 8.124440725056969e-05, |
|
"loss": 1.3998, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.9246376811594202, |
|
"grad_norm": 0.630703005417282, |
|
"learning_rate": 8.112198667488012e-05, |
|
"loss": 1.2986, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.927536231884058, |
|
"grad_norm": 0.34656213869069796, |
|
"learning_rate": 8.099926077245337e-05, |
|
"loss": 1.4085, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9304347826086956, |
|
"grad_norm": 0.3595735041645428, |
|
"learning_rate": 8.08762307473096e-05, |
|
"loss": 1.3973, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 0.3492788413407257, |
|
"learning_rate": 8.075289780645264e-05, |
|
"loss": 1.3912, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.936231884057971, |
|
"grad_norm": 0.3576330587050802, |
|
"learning_rate": 8.062926315985803e-05, |
|
"loss": 1.4256, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.9391304347826087, |
|
"grad_norm": 0.3410475477414221, |
|
"learning_rate": 8.050532802046135e-05, |
|
"loss": 1.3586, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9420289855072463, |
|
"grad_norm": 0.32056313028041444, |
|
"learning_rate": 8.038109360414614e-05, |
|
"loss": 1.3443, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9449275362318841, |
|
"grad_norm": 0.32894846650068166, |
|
"learning_rate": 8.025656112973202e-05, |
|
"loss": 1.3798, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.9478260869565217, |
|
"grad_norm": 0.3255639658134978, |
|
"learning_rate": 8.013173181896283e-05, |
|
"loss": 1.3383, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.9507246376811594, |
|
"grad_norm": 0.31966797580007494, |
|
"learning_rate": 8.000660689649449e-05, |
|
"loss": 1.3544, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9536231884057971, |
|
"grad_norm": 0.32692090968009707, |
|
"learning_rate": 7.98811875898831e-05, |
|
"loss": 1.4088, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9565217391304348, |
|
"grad_norm": 0.3372144496418016, |
|
"learning_rate": 7.975547512957285e-05, |
|
"loss": 1.4309, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9594202898550724, |
|
"grad_norm": 0.3246412166131606, |
|
"learning_rate": 7.962947074888394e-05, |
|
"loss": 1.3916, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9623188405797102, |
|
"grad_norm": 0.34634645274643355, |
|
"learning_rate": 7.950317568400054e-05, |
|
"loss": 1.4104, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.9652173913043478, |
|
"grad_norm": 0.3256987549913797, |
|
"learning_rate": 7.937659117395858e-05, |
|
"loss": 1.3544, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.9681159420289855, |
|
"grad_norm": 0.33356722481281487, |
|
"learning_rate": 7.924971846063365e-05, |
|
"loss": 1.342, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.9710144927536232, |
|
"grad_norm": 0.3260083753687772, |
|
"learning_rate": 7.912255878872878e-05, |
|
"loss": 1.4006, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9739130434782609, |
|
"grad_norm": 0.3768462741234547, |
|
"learning_rate": 7.899511340576229e-05, |
|
"loss": 1.4014, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.9768115942028985, |
|
"grad_norm": 0.33594184989494874, |
|
"learning_rate": 7.886738356205546e-05, |
|
"loss": 1.3538, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.9797101449275363, |
|
"grad_norm": 0.3538141580905989, |
|
"learning_rate": 7.873937051072035e-05, |
|
"loss": 1.4112, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.9826086956521739, |
|
"grad_norm": 0.33768085173175694, |
|
"learning_rate": 7.861107550764744e-05, |
|
"loss": 1.4318, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.9855072463768116, |
|
"grad_norm": 0.3103190809712041, |
|
"learning_rate": 7.848249981149338e-05, |
|
"loss": 1.3934, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9884057971014493, |
|
"grad_norm": 0.35049170901785537, |
|
"learning_rate": 7.835364468366856e-05, |
|
"loss": 1.3604, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.991304347826087, |
|
"grad_norm": 0.32828748932738266, |
|
"learning_rate": 7.822451138832478e-05, |
|
"loss": 1.3985, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.9942028985507246, |
|
"grad_norm": 0.33349918656348, |
|
"learning_rate": 7.809510119234287e-05, |
|
"loss": 1.4051, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.9971014492753624, |
|
"grad_norm": 0.31203624586969825, |
|
"learning_rate": 7.796541536532019e-05, |
|
"loss": 1.4114, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.3240751813149832, |
|
"learning_rate": 7.783545517955826e-05, |
|
"loss": 1.3441, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.0028985507246377, |
|
"grad_norm": 0.3039393246768782, |
|
"learning_rate": 7.77052219100502e-05, |
|
"loss": 1.2368, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.0057971014492753, |
|
"grad_norm": 0.31372425053284514, |
|
"learning_rate": 7.757471683446833e-05, |
|
"loss": 1.1765, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.008695652173913, |
|
"grad_norm": 0.2985654423691086, |
|
"learning_rate": 7.744394123315146e-05, |
|
"loss": 1.2387, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.0115942028985507, |
|
"grad_norm": 0.30668006943966447, |
|
"learning_rate": 7.731289638909248e-05, |
|
"loss": 1.2512, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.0144927536231885, |
|
"grad_norm": 0.3297662794021686, |
|
"learning_rate": 7.718158358792574e-05, |
|
"loss": 1.2466, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.017391304347826, |
|
"grad_norm": 0.36571397703464864, |
|
"learning_rate": 7.705000411791441e-05, |
|
"loss": 1.2095, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.0202898550724637, |
|
"grad_norm": 0.36789475981765535, |
|
"learning_rate": 7.691815926993785e-05, |
|
"loss": 1.2127, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.0231884057971015, |
|
"grad_norm": 0.34691008452093475, |
|
"learning_rate": 7.678605033747894e-05, |
|
"loss": 1.1754, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.0260869565217392, |
|
"grad_norm": 0.3381901577900874, |
|
"learning_rate": 7.665367861661142e-05, |
|
"loss": 1.2585, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.0289855072463767, |
|
"grad_norm": 0.3456016883168296, |
|
"learning_rate": 7.652104540598712e-05, |
|
"loss": 1.2565, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.0318840579710145, |
|
"grad_norm": 0.3340793379287121, |
|
"learning_rate": 7.638815200682331e-05, |
|
"loss": 1.286, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.0347826086956522, |
|
"grad_norm": 0.3329632889293724, |
|
"learning_rate": 7.62549997228898e-05, |
|
"loss": 1.2579, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.03768115942029, |
|
"grad_norm": 0.32945204903041203, |
|
"learning_rate": 7.612158986049632e-05, |
|
"loss": 1.1978, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.0405797101449274, |
|
"grad_norm": 0.3240289810339555, |
|
"learning_rate": 7.598792372847952e-05, |
|
"loss": 1.1871, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.0434782608695652, |
|
"grad_norm": 0.3497054137706393, |
|
"learning_rate": 7.585400263819025e-05, |
|
"loss": 1.2407, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.046376811594203, |
|
"grad_norm": 0.3334051709529727, |
|
"learning_rate": 7.571982790348071e-05, |
|
"loss": 1.2475, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.0492753623188407, |
|
"grad_norm": 0.3216924338385901, |
|
"learning_rate": 7.558540084069145e-05, |
|
"loss": 1.2178, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.0521739130434782, |
|
"grad_norm": 0.3770387844464867, |
|
"learning_rate": 7.545072276863858e-05, |
|
"loss": 1.2979, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.055072463768116, |
|
"grad_norm": 0.33349794524452664, |
|
"learning_rate": 7.531579500860069e-05, |
|
"loss": 1.2679, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.0579710144927537, |
|
"grad_norm": 0.3410677559200434, |
|
"learning_rate": 7.518061888430609e-05, |
|
"loss": 1.3029, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.0608695652173914, |
|
"grad_norm": 0.32421257826543254, |
|
"learning_rate": 7.50451957219196e-05, |
|
"loss": 1.2383, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.063768115942029, |
|
"grad_norm": 0.33207438928525995, |
|
"learning_rate": 7.490952685002965e-05, |
|
"loss": 1.2317, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 0.32506432414586334, |
|
"learning_rate": 7.477361359963533e-05, |
|
"loss": 1.1661, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.0695652173913044, |
|
"grad_norm": 0.32495557198051783, |
|
"learning_rate": 7.463745730413313e-05, |
|
"loss": 1.2343, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.0724637681159421, |
|
"grad_norm": 0.33951747813529576, |
|
"learning_rate": 7.450105929930403e-05, |
|
"loss": 1.1765, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0753623188405796, |
|
"grad_norm": 0.3960232594734765, |
|
"learning_rate": 7.436442092330033e-05, |
|
"loss": 1.1708, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.0782608695652174, |
|
"grad_norm": 0.34965839265944354, |
|
"learning_rate": 7.422754351663252e-05, |
|
"loss": 1.1557, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.0811594202898551, |
|
"grad_norm": 0.3465625398151273, |
|
"learning_rate": 7.409042842215611e-05, |
|
"loss": 1.2163, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.0840579710144929, |
|
"grad_norm": 0.3441278544713875, |
|
"learning_rate": 7.395307698505851e-05, |
|
"loss": 1.2522, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 0.34316475519905354, |
|
"learning_rate": 7.381549055284582e-05, |
|
"loss": 1.2401, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0898550724637681, |
|
"grad_norm": 0.3468405311381756, |
|
"learning_rate": 7.367767047532955e-05, |
|
"loss": 1.2297, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.0927536231884059, |
|
"grad_norm": 0.35424537263860967, |
|
"learning_rate": 7.353961810461343e-05, |
|
"loss": 1.1903, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.0956521739130434, |
|
"grad_norm": 0.35865745036758906, |
|
"learning_rate": 7.340133479508015e-05, |
|
"loss": 1.2238, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.098550724637681, |
|
"grad_norm": 0.33961205561899227, |
|
"learning_rate": 7.326282190337807e-05, |
|
"loss": 1.2353, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.1014492753623188, |
|
"grad_norm": 0.3410877787281011, |
|
"learning_rate": 7.312408078840788e-05, |
|
"loss": 1.1938, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1043478260869566, |
|
"grad_norm": 0.3261974323058093, |
|
"learning_rate": 7.298511281130928e-05, |
|
"loss": 1.2283, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.107246376811594, |
|
"grad_norm": 0.3375439427532852, |
|
"learning_rate": 7.284591933544764e-05, |
|
"loss": 1.166, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.1101449275362318, |
|
"grad_norm": 0.34226748130902523, |
|
"learning_rate": 7.270650172640065e-05, |
|
"loss": 1.2268, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.1130434782608696, |
|
"grad_norm": 0.34975018354668974, |
|
"learning_rate": 7.256686135194483e-05, |
|
"loss": 1.2753, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.1159420289855073, |
|
"grad_norm": 0.36870818906061614, |
|
"learning_rate": 7.242699958204225e-05, |
|
"loss": 1.2427, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.1188405797101448, |
|
"grad_norm": 0.35097638947331306, |
|
"learning_rate": 7.228691778882693e-05, |
|
"loss": 1.2588, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.1217391304347826, |
|
"grad_norm": 0.35715131379127846, |
|
"learning_rate": 7.21466173465915e-05, |
|
"loss": 1.2349, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.1246376811594203, |
|
"grad_norm": 0.3554441755613845, |
|
"learning_rate": 7.200609963177367e-05, |
|
"loss": 1.2218, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.127536231884058, |
|
"grad_norm": 0.35332606995255955, |
|
"learning_rate": 7.186536602294278e-05, |
|
"loss": 1.233, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.1304347826086956, |
|
"grad_norm": 0.34659479615561295, |
|
"learning_rate": 7.172441790078614e-05, |
|
"loss": 1.2277, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.1333333333333333, |
|
"grad_norm": 0.3634661952802433, |
|
"learning_rate": 7.158325664809566e-05, |
|
"loss": 1.1815, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.136231884057971, |
|
"grad_norm": 0.3483946097126382, |
|
"learning_rate": 7.144188364975415e-05, |
|
"loss": 1.2296, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.1391304347826088, |
|
"grad_norm": 0.3458491663438552, |
|
"learning_rate": 7.130030029272179e-05, |
|
"loss": 1.2762, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.1420289855072463, |
|
"grad_norm": 0.36175639738964943, |
|
"learning_rate": 7.11585079660225e-05, |
|
"loss": 1.1942, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.144927536231884, |
|
"grad_norm": 0.3593818284728034, |
|
"learning_rate": 7.101650806073038e-05, |
|
"loss": 1.2068, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.1478260869565218, |
|
"grad_norm": 0.334166827563346, |
|
"learning_rate": 7.087430196995593e-05, |
|
"loss": 1.1819, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.1507246376811595, |
|
"grad_norm": 0.3636336066976543, |
|
"learning_rate": 7.073189108883255e-05, |
|
"loss": 1.2438, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.153623188405797, |
|
"grad_norm": 0.35550038414146484, |
|
"learning_rate": 7.058927681450269e-05, |
|
"loss": 1.2546, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.1565217391304348, |
|
"grad_norm": 0.3638989954332178, |
|
"learning_rate": 7.044646054610426e-05, |
|
"loss": 1.2817, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.1594202898550725, |
|
"grad_norm": 0.36528513619908154, |
|
"learning_rate": 7.030344368475684e-05, |
|
"loss": 1.2634, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1623188405797102, |
|
"grad_norm": 0.348052355901968, |
|
"learning_rate": 7.016022763354798e-05, |
|
"loss": 1.2002, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.1652173913043478, |
|
"grad_norm": 0.3595684193169886, |
|
"learning_rate": 7.00168137975194e-05, |
|
"loss": 1.1864, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.1681159420289855, |
|
"grad_norm": 0.35070589944718533, |
|
"learning_rate": 6.98732035836532e-05, |
|
"loss": 1.1749, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.1710144927536232, |
|
"grad_norm": 0.3583364136698803, |
|
"learning_rate": 6.972939840085809e-05, |
|
"loss": 1.2362, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.1739130434782608, |
|
"grad_norm": 0.3411795291050965, |
|
"learning_rate": 6.958539965995558e-05, |
|
"loss": 1.2365, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.1768115942028985, |
|
"grad_norm": 0.37126831887596484, |
|
"learning_rate": 6.944120877366604e-05, |
|
"loss": 1.2547, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.1797101449275362, |
|
"grad_norm": 0.3615486523323878, |
|
"learning_rate": 6.929682715659496e-05, |
|
"loss": 1.2008, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.182608695652174, |
|
"grad_norm": 0.3495522144501781, |
|
"learning_rate": 6.915225622521901e-05, |
|
"loss": 1.2137, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.1855072463768117, |
|
"grad_norm": 0.34558559090876845, |
|
"learning_rate": 6.900749739787216e-05, |
|
"loss": 1.1948, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.1884057971014492, |
|
"grad_norm": 0.3534560464350228, |
|
"learning_rate": 6.886255209473174e-05, |
|
"loss": 1.2296, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.191304347826087, |
|
"grad_norm": 0.38654103329628986, |
|
"learning_rate": 6.871742173780458e-05, |
|
"loss": 1.2375, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.1942028985507247, |
|
"grad_norm": 0.4990410023234168, |
|
"learning_rate": 6.857210775091292e-05, |
|
"loss": 1.1972, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.1971014492753622, |
|
"grad_norm": 0.3283618367174733, |
|
"learning_rate": 6.842661155968062e-05, |
|
"loss": 1.2236, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.3501614388462517, |
|
"learning_rate": 6.828093459151902e-05, |
|
"loss": 1.2599, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.2028985507246377, |
|
"grad_norm": 0.3566983584982769, |
|
"learning_rate": 6.813507827561301e-05, |
|
"loss": 1.2592, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.2057971014492754, |
|
"grad_norm": 0.35438824536081337, |
|
"learning_rate": 6.798904404290703e-05, |
|
"loss": 1.219, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.208695652173913, |
|
"grad_norm": 0.36738665957897987, |
|
"learning_rate": 6.784283332609096e-05, |
|
"loss": 1.2787, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.2115942028985507, |
|
"grad_norm": 0.3618484779747058, |
|
"learning_rate": 6.769644755958614e-05, |
|
"loss": 1.2557, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.2144927536231884, |
|
"grad_norm": 0.3475615543784353, |
|
"learning_rate": 6.754988817953121e-05, |
|
"loss": 1.2519, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.2173913043478262, |
|
"grad_norm": 0.3498171433494951, |
|
"learning_rate": 6.740315662376808e-05, |
|
"loss": 1.1832, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.2202898550724637, |
|
"grad_norm": 0.3485237559097342, |
|
"learning_rate": 6.725625433182788e-05, |
|
"loss": 1.1686, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.2231884057971014, |
|
"grad_norm": 0.3365638116771253, |
|
"learning_rate": 6.710918274491668e-05, |
|
"loss": 1.161, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.2260869565217392, |
|
"grad_norm": 0.339262847480053, |
|
"learning_rate": 6.696194330590151e-05, |
|
"loss": 1.3032, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.228985507246377, |
|
"grad_norm": 0.3695849544204241, |
|
"learning_rate": 6.681453745929613e-05, |
|
"loss": 1.2505, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.2318840579710144, |
|
"grad_norm": 0.3810556641153086, |
|
"learning_rate": 6.666696665124682e-05, |
|
"loss": 1.2176, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.2347826086956522, |
|
"grad_norm": 0.3794002652671474, |
|
"learning_rate": 6.651923232951829e-05, |
|
"loss": 1.2922, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.23768115942029, |
|
"grad_norm": 0.37219002176219357, |
|
"learning_rate": 6.637133594347938e-05, |
|
"loss": 1.2919, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.2405797101449276, |
|
"grad_norm": 0.3748146640073023, |
|
"learning_rate": 6.62232789440889e-05, |
|
"loss": 1.2549, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.2434782608695651, |
|
"grad_norm": 0.3431018972364436, |
|
"learning_rate": 6.607506278388144e-05, |
|
"loss": 1.1907, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.2463768115942029, |
|
"grad_norm": 0.3685201234625515, |
|
"learning_rate": 6.592668891695298e-05, |
|
"loss": 1.2368, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.2492753623188406, |
|
"grad_norm": 0.3638027931128809, |
|
"learning_rate": 6.57781587989467e-05, |
|
"loss": 1.2695, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.2521739130434781, |
|
"grad_norm": 0.3392431416089568, |
|
"learning_rate": 6.562947388703879e-05, |
|
"loss": 1.2651, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.2550724637681159, |
|
"grad_norm": 0.3523863327979242, |
|
"learning_rate": 6.548063563992397e-05, |
|
"loss": 1.2633, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.2579710144927536, |
|
"grad_norm": 0.3773185628146933, |
|
"learning_rate": 6.533164551780134e-05, |
|
"loss": 1.2669, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.2608695652173914, |
|
"grad_norm": 0.37080955852894376, |
|
"learning_rate": 6.518250498235996e-05, |
|
"loss": 1.2055, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.263768115942029, |
|
"grad_norm": 0.3610115012833989, |
|
"learning_rate": 6.50332154967646e-05, |
|
"loss": 1.2558, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.2666666666666666, |
|
"grad_norm": 0.36419810462728663, |
|
"learning_rate": 6.488377852564125e-05, |
|
"loss": 1.2273, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.2695652173913043, |
|
"grad_norm": 0.36955352159431015, |
|
"learning_rate": 6.473419553506285e-05, |
|
"loss": 1.1592, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.272463768115942, |
|
"grad_norm": 0.4000451451417096, |
|
"learning_rate": 6.45844679925349e-05, |
|
"loss": 1.2585, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.2753623188405796, |
|
"grad_norm": 0.3674813225161034, |
|
"learning_rate": 6.443459736698105e-05, |
|
"loss": 1.207, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.2782608695652173, |
|
"grad_norm": 0.36342273693767024, |
|
"learning_rate": 6.428458512872868e-05, |
|
"loss": 1.207, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.281159420289855, |
|
"grad_norm": 0.3772811021851, |
|
"learning_rate": 6.413443274949446e-05, |
|
"loss": 1.249, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.2840579710144928, |
|
"grad_norm": 0.3574482885159096, |
|
"learning_rate": 6.398414170237001e-05, |
|
"loss": 1.2111, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.2869565217391306, |
|
"grad_norm": 0.34461226274334095, |
|
"learning_rate": 6.383371346180725e-05, |
|
"loss": 1.2042, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.289855072463768, |
|
"grad_norm": 0.35375827819704075, |
|
"learning_rate": 6.368314950360415e-05, |
|
"loss": 1.2183, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.2927536231884058, |
|
"grad_norm": 0.3494607679069863, |
|
"learning_rate": 6.353245130489012e-05, |
|
"loss": 1.2267, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.2956521739130435, |
|
"grad_norm": 0.3376350549359254, |
|
"learning_rate": 6.338162034411158e-05, |
|
"loss": 1.2514, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.298550724637681, |
|
"grad_norm": 0.3514507439505588, |
|
"learning_rate": 6.323065810101741e-05, |
|
"loss": 1.2055, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.3014492753623188, |
|
"grad_norm": 0.374192088646086, |
|
"learning_rate": 6.307956605664447e-05, |
|
"loss": 1.2149, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.3043478260869565, |
|
"grad_norm": 0.36836907141990205, |
|
"learning_rate": 6.292834569330301e-05, |
|
"loss": 1.332, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.3072463768115943, |
|
"grad_norm": 0.35436366268435593, |
|
"learning_rate": 6.277699849456224e-05, |
|
"loss": 1.2918, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.310144927536232, |
|
"grad_norm": 0.3535565794861321, |
|
"learning_rate": 6.262552594523565e-05, |
|
"loss": 1.2382, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.3130434782608695, |
|
"grad_norm": 0.3923107343675531, |
|
"learning_rate": 6.247392953136655e-05, |
|
"loss": 1.2614, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.3159420289855073, |
|
"grad_norm": 0.3566047611610826, |
|
"learning_rate": 6.23222107402134e-05, |
|
"loss": 1.2574, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.318840579710145, |
|
"grad_norm": 0.3444110335156092, |
|
"learning_rate": 6.217037106023527e-05, |
|
"loss": 1.2158, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.3217391304347825, |
|
"grad_norm": 0.34800059904629854, |
|
"learning_rate": 6.201841198107724e-05, |
|
"loss": 1.2691, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.3246376811594203, |
|
"grad_norm": 0.3704659760771806, |
|
"learning_rate": 6.186633499355576e-05, |
|
"loss": 1.1669, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.327536231884058, |
|
"grad_norm": 0.35589030087499396, |
|
"learning_rate": 6.171414158964402e-05, |
|
"loss": 1.2421, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.3304347826086955, |
|
"grad_norm": 0.41000043026343475, |
|
"learning_rate": 6.156183326245738e-05, |
|
"loss": 1.1528, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.3545298846533197, |
|
"learning_rate": 6.140941150623865e-05, |
|
"loss": 1.3154, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.336231884057971, |
|
"grad_norm": 0.3632756192190139, |
|
"learning_rate": 6.12568778163434e-05, |
|
"loss": 1.2769, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.3391304347826087, |
|
"grad_norm": 0.3766419178772542, |
|
"learning_rate": 6.110423368922544e-05, |
|
"loss": 1.215, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.3420289855072465, |
|
"grad_norm": 0.35769930623122026, |
|
"learning_rate": 6.095148062242196e-05, |
|
"loss": 1.2226, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.344927536231884, |
|
"grad_norm": 0.3652620834683046, |
|
"learning_rate": 6.079862011453893e-05, |
|
"loss": 1.2217, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.3478260869565217, |
|
"grad_norm": 0.37380916243000584, |
|
"learning_rate": 6.064565366523641e-05, |
|
"loss": 1.2051, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.3507246376811595, |
|
"grad_norm": 0.38594446149133127, |
|
"learning_rate": 6.0492582775213825e-05, |
|
"loss": 1.2652, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.353623188405797, |
|
"grad_norm": 0.3461990145984557, |
|
"learning_rate": 6.0339408946195185e-05, |
|
"loss": 1.2554, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.3565217391304347, |
|
"grad_norm": 0.3748678338524721, |
|
"learning_rate": 6.0186133680914445e-05, |
|
"loss": 1.191, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.3594202898550725, |
|
"grad_norm": 0.37370664196717224, |
|
"learning_rate": 6.003275848310067e-05, |
|
"loss": 1.2706, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.3623188405797102, |
|
"grad_norm": 0.36194306306178214, |
|
"learning_rate": 5.9879284857463356e-05, |
|
"loss": 1.2187, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.365217391304348, |
|
"grad_norm": 0.36087008057820225, |
|
"learning_rate": 5.972571430967764e-05, |
|
"loss": 1.2456, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.3681159420289855, |
|
"grad_norm": 0.36273835372082425, |
|
"learning_rate": 5.9572048346369515e-05, |
|
"loss": 1.2277, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.3710144927536232, |
|
"grad_norm": 0.37085205673967797, |
|
"learning_rate": 5.941828847510108e-05, |
|
"loss": 1.2768, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.373913043478261, |
|
"grad_norm": 0.3755185129215953, |
|
"learning_rate": 5.9264436204355724e-05, |
|
"loss": 1.2031, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.3768115942028984, |
|
"grad_norm": 0.37382431917426745, |
|
"learning_rate": 5.911049304352332e-05, |
|
"loss": 1.2843, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.3797101449275362, |
|
"grad_norm": 0.37855680727333874, |
|
"learning_rate": 5.895646050288543e-05, |
|
"loss": 1.2912, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.382608695652174, |
|
"grad_norm": 0.3654439184708917, |
|
"learning_rate": 5.8802340093600495e-05, |
|
"loss": 1.2292, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.3855072463768117, |
|
"grad_norm": 0.3846140132825601, |
|
"learning_rate": 5.8648133327689036e-05, |
|
"loss": 1.2675, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.3884057971014494, |
|
"grad_norm": 0.3766180728314526, |
|
"learning_rate": 5.849384171801876e-05, |
|
"loss": 1.205, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.391304347826087, |
|
"grad_norm": 0.35496774282385274, |
|
"learning_rate": 5.8339466778289745e-05, |
|
"loss": 1.2035, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.3942028985507247, |
|
"grad_norm": 0.35882380091220856, |
|
"learning_rate": 5.818501002301959e-05, |
|
"loss": 1.2047, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.3971014492753624, |
|
"grad_norm": 0.36361359874976407, |
|
"learning_rate": 5.803047296752856e-05, |
|
"loss": 1.2068, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.35304052394158203, |
|
"learning_rate": 5.7875857127924704e-05, |
|
"loss": 1.2039, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.4028985507246376, |
|
"grad_norm": 0.3767536613499123, |
|
"learning_rate": 5.772116402108903e-05, |
|
"loss": 1.1734, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.4057971014492754, |
|
"grad_norm": 0.3673108485371312, |
|
"learning_rate": 5.756639516466056e-05, |
|
"loss": 1.2631, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.4086956521739131, |
|
"grad_norm": 0.37033398981771753, |
|
"learning_rate": 5.741155207702146e-05, |
|
"loss": 1.2284, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.4115942028985506, |
|
"grad_norm": 0.3803519741849858, |
|
"learning_rate": 5.7256636277282193e-05, |
|
"loss": 1.2512, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.4144927536231884, |
|
"grad_norm": 0.3822460303571093, |
|
"learning_rate": 5.7101649285266524e-05, |
|
"loss": 1.2285, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.4173913043478261, |
|
"grad_norm": 0.366694568605544, |
|
"learning_rate": 5.694659262149666e-05, |
|
"loss": 1.2652, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.4202898550724639, |
|
"grad_norm": 0.3599613129529298, |
|
"learning_rate": 5.679146780717841e-05, |
|
"loss": 1.199, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.4231884057971014, |
|
"grad_norm": 0.36225487078774454, |
|
"learning_rate": 5.6636276364186105e-05, |
|
"loss": 1.1848, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.4260869565217391, |
|
"grad_norm": 0.3599718189253672, |
|
"learning_rate": 5.648101981504775e-05, |
|
"loss": 1.2082, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.4289855072463769, |
|
"grad_norm": 0.37863788166143847, |
|
"learning_rate": 5.6325699682930145e-05, |
|
"loss": 1.2391, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.4318840579710144, |
|
"grad_norm": 0.3803432660363016, |
|
"learning_rate": 5.617031749162381e-05, |
|
"loss": 1.161, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.434782608695652, |
|
"grad_norm": 0.35786784027090707, |
|
"learning_rate": 5.6014874765528124e-05, |
|
"loss": 1.2861, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.4376811594202898, |
|
"grad_norm": 0.3642405560037894, |
|
"learning_rate": 5.58593730296364e-05, |
|
"loss": 1.2349, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.4405797101449276, |
|
"grad_norm": 0.369598439136747, |
|
"learning_rate": 5.57038138095208e-05, |
|
"loss": 1.285, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.4434782608695653, |
|
"grad_norm": 0.3555670502464068, |
|
"learning_rate": 5.5548198631317494e-05, |
|
"loss": 1.2145, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.4463768115942028, |
|
"grad_norm": 0.376327361594081, |
|
"learning_rate": 5.539252902171164e-05, |
|
"loss": 1.2245, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.4492753623188406, |
|
"grad_norm": 0.37654715270476347, |
|
"learning_rate": 5.523680650792237e-05, |
|
"loss": 1.2419, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4521739130434783, |
|
"grad_norm": 0.5779377636764227, |
|
"learning_rate": 5.508103261768783e-05, |
|
"loss": 1.239, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.4550724637681158, |
|
"grad_norm": 0.37430911277789075, |
|
"learning_rate": 5.492520887925028e-05, |
|
"loss": 1.2577, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.4579710144927536, |
|
"grad_norm": 0.36147621449440515, |
|
"learning_rate": 5.4769336821340936e-05, |
|
"loss": 1.2851, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.4608695652173913, |
|
"grad_norm": 0.3731800543772072, |
|
"learning_rate": 5.4613417973165106e-05, |
|
"loss": 1.1851, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.463768115942029, |
|
"grad_norm": 0.38025435659821, |
|
"learning_rate": 5.445745386438713e-05, |
|
"loss": 1.2853, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.4666666666666668, |
|
"grad_norm": 0.3806710140744915, |
|
"learning_rate": 5.430144602511539e-05, |
|
"loss": 1.2698, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.4695652173913043, |
|
"grad_norm": 0.40891604532181375, |
|
"learning_rate": 5.4145395985887246e-05, |
|
"loss": 1.2388, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.472463768115942, |
|
"grad_norm": 0.3545961610157745, |
|
"learning_rate": 5.3989305277654156e-05, |
|
"loss": 1.19, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.4753623188405798, |
|
"grad_norm": 0.3648442660384036, |
|
"learning_rate": 5.383317543176649e-05, |
|
"loss": 1.203, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.4782608695652173, |
|
"grad_norm": 0.3850663135269365, |
|
"learning_rate": 5.367700797995863e-05, |
|
"loss": 1.2297, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.481159420289855, |
|
"grad_norm": 0.35394244670279573, |
|
"learning_rate": 5.352080445433385e-05, |
|
"loss": 1.2044, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.4840579710144928, |
|
"grad_norm": 0.3866450435083724, |
|
"learning_rate": 5.336456638734938e-05, |
|
"loss": 1.2203, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.4869565217391305, |
|
"grad_norm": 0.3800225621052723, |
|
"learning_rate": 5.320829531180128e-05, |
|
"loss": 1.2147, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.4898550724637682, |
|
"grad_norm": 0.37391354192034965, |
|
"learning_rate": 5.30519927608095e-05, |
|
"loss": 1.2173, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.4927536231884058, |
|
"grad_norm": 0.3908730346775049, |
|
"learning_rate": 5.2895660267802714e-05, |
|
"loss": 1.179, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.4956521739130435, |
|
"grad_norm": 0.3797397244263353, |
|
"learning_rate": 5.27392993665034e-05, |
|
"loss": 1.2397, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.4985507246376812, |
|
"grad_norm": 0.3698351874885442, |
|
"learning_rate": 5.258291159091273e-05, |
|
"loss": 1.292, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.5014492753623188, |
|
"grad_norm": 0.3680512756549276, |
|
"learning_rate": 5.242649847529551e-05, |
|
"loss": 1.1788, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.5043478260869565, |
|
"grad_norm": 0.3603216123639398, |
|
"learning_rate": 5.227006155416517e-05, |
|
"loss": 1.1539, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.5072463768115942, |
|
"grad_norm": 0.3830020055397342, |
|
"learning_rate": 5.2113602362268674e-05, |
|
"loss": 1.1658, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.5101449275362318, |
|
"grad_norm": 0.37049306835431794, |
|
"learning_rate": 5.1957122434571485e-05, |
|
"loss": 1.2754, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.5130434782608697, |
|
"grad_norm": 0.36878581085745593, |
|
"learning_rate": 5.180062330624248e-05, |
|
"loss": 1.26, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.5159420289855072, |
|
"grad_norm": 0.3932729911977662, |
|
"learning_rate": 5.164410651263895e-05, |
|
"loss": 1.2411, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.518840579710145, |
|
"grad_norm": 0.37380205081558054, |
|
"learning_rate": 5.1487573589291424e-05, |
|
"loss": 1.2778, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.5217391304347827, |
|
"grad_norm": 0.39041353684960733, |
|
"learning_rate": 5.133102607188874e-05, |
|
"loss": 1.1484, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.5246376811594202, |
|
"grad_norm": 0.37594098481535654, |
|
"learning_rate": 5.117446549626289e-05, |
|
"loss": 1.2161, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.527536231884058, |
|
"grad_norm": 0.38365451143587687, |
|
"learning_rate": 5.101789339837396e-05, |
|
"loss": 1.2256, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.5304347826086957, |
|
"grad_norm": 0.3855037750389005, |
|
"learning_rate": 5.086131131429509e-05, |
|
"loss": 1.2209, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.5333333333333332, |
|
"grad_norm": 0.3890790766439738, |
|
"learning_rate": 5.07047207801974e-05, |
|
"loss": 1.2338, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.5362318840579712, |
|
"grad_norm": 0.3700881037410359, |
|
"learning_rate": 5.0548123332334896e-05, |
|
"loss": 1.2475, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.5391304347826087, |
|
"grad_norm": 0.3743561390377829, |
|
"learning_rate": 5.0391520507029424e-05, |
|
"loss": 1.2239, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.5420289855072464, |
|
"grad_norm": 0.37802774104497083, |
|
"learning_rate": 5.023491384065555e-05, |
|
"loss": 1.2324, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.5449275362318842, |
|
"grad_norm": 0.36820878715854055, |
|
"learning_rate": 5.0078304869625595e-05, |
|
"loss": 1.2404, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.5478260869565217, |
|
"grad_norm": 0.3632460544127689, |
|
"learning_rate": 4.992169513037441e-05, |
|
"loss": 1.177, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.5507246376811594, |
|
"grad_norm": 0.3683252664871912, |
|
"learning_rate": 4.9765086159344445e-05, |
|
"loss": 1.182, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.5536231884057972, |
|
"grad_norm": 0.3831233196950789, |
|
"learning_rate": 4.9608479492970594e-05, |
|
"loss": 1.1991, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.5565217391304347, |
|
"grad_norm": 0.37245646640167623, |
|
"learning_rate": 4.9451876667665116e-05, |
|
"loss": 1.2376, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.5594202898550724, |
|
"grad_norm": 0.36522555829264214, |
|
"learning_rate": 4.929527921980261e-05, |
|
"loss": 1.2871, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.5623188405797102, |
|
"grad_norm": 0.35901097232709117, |
|
"learning_rate": 4.9138688685704916e-05, |
|
"loss": 1.2094, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.5652173913043477, |
|
"grad_norm": 0.3520423753812632, |
|
"learning_rate": 4.898210660162605e-05, |
|
"loss": 1.2363, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.5681159420289856, |
|
"grad_norm": 0.40852366010005403, |
|
"learning_rate": 4.882553450373712e-05, |
|
"loss": 1.2352, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.5710144927536231, |
|
"grad_norm": 0.3651205273751799, |
|
"learning_rate": 4.866897392811126e-05, |
|
"loss": 1.222, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.5739130434782609, |
|
"grad_norm": 0.3699594416077427, |
|
"learning_rate": 4.851242641070859e-05, |
|
"loss": 1.2149, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.5768115942028986, |
|
"grad_norm": 0.38193530242722756, |
|
"learning_rate": 4.8355893487361084e-05, |
|
"loss": 1.2766, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.5797101449275361, |
|
"grad_norm": 0.38568456101700965, |
|
"learning_rate": 4.8199376693757544e-05, |
|
"loss": 1.2844, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.5826086956521739, |
|
"grad_norm": 0.36059528632874444, |
|
"learning_rate": 4.804287756542852e-05, |
|
"loss": 1.2726, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.5855072463768116, |
|
"grad_norm": 0.36513879678761724, |
|
"learning_rate": 4.788639763773133e-05, |
|
"loss": 1.1763, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.5884057971014491, |
|
"grad_norm": 0.387466168821441, |
|
"learning_rate": 4.772993844583483e-05, |
|
"loss": 1.2544, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.591304347826087, |
|
"grad_norm": 0.5520887828224808, |
|
"learning_rate": 4.75735015247045e-05, |
|
"loss": 1.2285, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.5942028985507246, |
|
"grad_norm": 0.389584382030089, |
|
"learning_rate": 4.7417088409087285e-05, |
|
"loss": 1.2463, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5971014492753624, |
|
"grad_norm": 0.3963144528047638, |
|
"learning_rate": 4.7260700633496605e-05, |
|
"loss": 1.1914, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.36855199490556523, |
|
"learning_rate": 4.71043397321973e-05, |
|
"loss": 1.2395, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.6028985507246376, |
|
"grad_norm": 0.3887397654253079, |
|
"learning_rate": 4.6948007239190514e-05, |
|
"loss": 1.2639, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.6057971014492753, |
|
"grad_norm": 0.3697755928376452, |
|
"learning_rate": 4.6791704688198724e-05, |
|
"loss": 1.1648, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.608695652173913, |
|
"grad_norm": 0.38405410279449403, |
|
"learning_rate": 4.663543361265064e-05, |
|
"loss": 1.2424, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.6115942028985506, |
|
"grad_norm": 0.36889274593199667, |
|
"learning_rate": 4.647919554566616e-05, |
|
"loss": 1.2037, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.6144927536231886, |
|
"grad_norm": 0.38742028194651634, |
|
"learning_rate": 4.63229920200414e-05, |
|
"loss": 1.144, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.617391304347826, |
|
"grad_norm": 0.3771419221596441, |
|
"learning_rate": 4.61668245682335e-05, |
|
"loss": 1.2386, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.6202898550724638, |
|
"grad_norm": 0.36745992758167406, |
|
"learning_rate": 4.601069472234584e-05, |
|
"loss": 1.2439, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.6231884057971016, |
|
"grad_norm": 0.37299246443958567, |
|
"learning_rate": 4.585460401411275e-05, |
|
"loss": 1.1891, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.626086956521739, |
|
"grad_norm": 0.39436742226379295, |
|
"learning_rate": 4.569855397488462e-05, |
|
"loss": 1.2345, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.6289855072463768, |
|
"grad_norm": 0.38332200212622664, |
|
"learning_rate": 4.554254613561289e-05, |
|
"loss": 1.221, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.6318840579710145, |
|
"grad_norm": 0.3668234731737798, |
|
"learning_rate": 4.5386582026834906e-05, |
|
"loss": 1.1407, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.634782608695652, |
|
"grad_norm": 0.3886901538482464, |
|
"learning_rate": 4.5230663178659075e-05, |
|
"loss": 1.2372, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.6376811594202898, |
|
"grad_norm": 0.3690709201915018, |
|
"learning_rate": 4.507479112074974e-05, |
|
"loss": 1.2135, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.6405797101449275, |
|
"grad_norm": 0.36879231080045594, |
|
"learning_rate": 4.491896738231218e-05, |
|
"loss": 1.1641, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.643478260869565, |
|
"grad_norm": 0.36645636944065885, |
|
"learning_rate": 4.476319349207766e-05, |
|
"loss": 1.1852, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.646376811594203, |
|
"grad_norm": 0.3431665404786532, |
|
"learning_rate": 4.460747097828838e-05, |
|
"loss": 1.1573, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.6492753623188405, |
|
"grad_norm": 0.3758095567042996, |
|
"learning_rate": 4.445180136868252e-05, |
|
"loss": 1.2862, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.6521739130434783, |
|
"grad_norm": 0.3747562731763405, |
|
"learning_rate": 4.4296186190479203e-05, |
|
"loss": 1.2232, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.655072463768116, |
|
"grad_norm": 0.3680948045233427, |
|
"learning_rate": 4.414062697036361e-05, |
|
"loss": 1.2261, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.6579710144927535, |
|
"grad_norm": 0.3951307328237191, |
|
"learning_rate": 4.3985125234471874e-05, |
|
"loss": 1.2456, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.6608695652173913, |
|
"grad_norm": 0.39734232299660693, |
|
"learning_rate": 4.3829682508376194e-05, |
|
"loss": 1.1953, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.663768115942029, |
|
"grad_norm": 0.3784998636514162, |
|
"learning_rate": 4.367430031706987e-05, |
|
"loss": 1.2367, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.39715845084791845, |
|
"learning_rate": 4.351898018495225e-05, |
|
"loss": 1.2279, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.6695652173913045, |
|
"grad_norm": 0.378181731966129, |
|
"learning_rate": 4.336372363581391e-05, |
|
"loss": 1.2075, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.672463768115942, |
|
"grad_norm": 0.3690996052960561, |
|
"learning_rate": 4.32085321928216e-05, |
|
"loss": 1.0945, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.6753623188405797, |
|
"grad_norm": 0.3661279761386217, |
|
"learning_rate": 4.305340737850334e-05, |
|
"loss": 1.2039, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.6782608695652175, |
|
"grad_norm": 0.3703501070974622, |
|
"learning_rate": 4.28983507147335e-05, |
|
"loss": 1.1634, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.681159420289855, |
|
"grad_norm": 0.37705477138544613, |
|
"learning_rate": 4.2743363722717825e-05, |
|
"loss": 1.233, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.6840579710144927, |
|
"grad_norm": 0.37944231677619733, |
|
"learning_rate": 4.258844792297855e-05, |
|
"loss": 1.2484, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.6869565217391305, |
|
"grad_norm": 0.36121328853497303, |
|
"learning_rate": 4.2433604835339445e-05, |
|
"loss": 1.2517, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.689855072463768, |
|
"grad_norm": 0.3658490072297351, |
|
"learning_rate": 4.227883597891098e-05, |
|
"loss": 1.2833, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.692753623188406, |
|
"grad_norm": 0.3742426427268219, |
|
"learning_rate": 4.21241428720753e-05, |
|
"loss": 1.2188, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.6956521739130435, |
|
"grad_norm": 0.3833395112583662, |
|
"learning_rate": 4.196952703247145e-05, |
|
"loss": 1.265, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.6985507246376812, |
|
"grad_norm": 0.36472794357808286, |
|
"learning_rate": 4.181498997698042e-05, |
|
"loss": 1.1679, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.701449275362319, |
|
"grad_norm": 0.36498141790011873, |
|
"learning_rate": 4.1660533221710266e-05, |
|
"loss": 1.2138, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.7043478260869565, |
|
"grad_norm": 0.37102421652558093, |
|
"learning_rate": 4.150615828198125e-05, |
|
"loss": 1.2176, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.7072463768115942, |
|
"grad_norm": 0.36544210520658216, |
|
"learning_rate": 4.135186667231097e-05, |
|
"loss": 1.2098, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.710144927536232, |
|
"grad_norm": 0.3612434641690313, |
|
"learning_rate": 4.119765990639952e-05, |
|
"loss": 1.1763, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.7130434782608694, |
|
"grad_norm": 0.3620969506592556, |
|
"learning_rate": 4.1043539497114605e-05, |
|
"loss": 1.1872, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.7159420289855074, |
|
"grad_norm": 0.39393702299078354, |
|
"learning_rate": 4.088950695647671e-05, |
|
"loss": 1.2687, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.718840579710145, |
|
"grad_norm": 0.3817467440217286, |
|
"learning_rate": 4.0735563795644294e-05, |
|
"loss": 1.2771, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.7217391304347827, |
|
"grad_norm": 0.3927298023358771, |
|
"learning_rate": 4.058171152489891e-05, |
|
"loss": 1.2733, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.7246376811594204, |
|
"grad_norm": 0.3674064366862089, |
|
"learning_rate": 4.042795165363048e-05, |
|
"loss": 1.2438, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.727536231884058, |
|
"grad_norm": 0.3719771458126402, |
|
"learning_rate": 4.0274285690322366e-05, |
|
"loss": 1.2539, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.7304347826086957, |
|
"grad_norm": 0.37286309136721435, |
|
"learning_rate": 4.012071514253665e-05, |
|
"loss": 1.2219, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.7333333333333334, |
|
"grad_norm": 0.37200008726902983, |
|
"learning_rate": 3.996724151689934e-05, |
|
"loss": 1.1937, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.736231884057971, |
|
"grad_norm": 0.3769662425580422, |
|
"learning_rate": 3.981386631908557e-05, |
|
"loss": 1.1795, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 0.38896295738805997, |
|
"learning_rate": 3.966059105380483e-05, |
|
"loss": 1.262, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.7420289855072464, |
|
"grad_norm": 0.38088532712001094, |
|
"learning_rate": 3.9507417224786193e-05, |
|
"loss": 1.2626, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.744927536231884, |
|
"grad_norm": 0.3906788265447541, |
|
"learning_rate": 3.93543463347636e-05, |
|
"loss": 1.1918, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.7478260869565219, |
|
"grad_norm": 0.3691860050404467, |
|
"learning_rate": 3.920137988546109e-05, |
|
"loss": 1.1616, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.7507246376811594, |
|
"grad_norm": 0.3792592507880301, |
|
"learning_rate": 3.9048519377578064e-05, |
|
"loss": 1.1926, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.7536231884057971, |
|
"grad_norm": 0.37902398772592705, |
|
"learning_rate": 3.8895766310774574e-05, |
|
"loss": 1.3234, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.7565217391304349, |
|
"grad_norm": 0.3808967277084784, |
|
"learning_rate": 3.87431221836566e-05, |
|
"loss": 1.2678, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.7594202898550724, |
|
"grad_norm": 0.3768612203952316, |
|
"learning_rate": 3.859058849376136e-05, |
|
"loss": 1.2442, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.76231884057971, |
|
"grad_norm": 0.3661782288025134, |
|
"learning_rate": 3.843816673754262e-05, |
|
"loss": 1.2757, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.7652173913043478, |
|
"grad_norm": 0.3746443716611926, |
|
"learning_rate": 3.8285858410355984e-05, |
|
"loss": 1.234, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.7681159420289854, |
|
"grad_norm": 0.38619920952815956, |
|
"learning_rate": 3.8133665006444255e-05, |
|
"loss": 1.2229, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.7710144927536233, |
|
"grad_norm": 0.37016562757932, |
|
"learning_rate": 3.798158801892277e-05, |
|
"loss": 1.2112, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.7739130434782608, |
|
"grad_norm": 0.39144763721074394, |
|
"learning_rate": 3.782962893976475e-05, |
|
"loss": 1.1941, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.7768115942028986, |
|
"grad_norm": 0.372157745001237, |
|
"learning_rate": 3.7677789259786615e-05, |
|
"loss": 1.1607, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.7797101449275363, |
|
"grad_norm": 0.38017415387323344, |
|
"learning_rate": 3.7526070468633464e-05, |
|
"loss": 1.2251, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.7826086956521738, |
|
"grad_norm": 0.3764265620005903, |
|
"learning_rate": 3.737447405476436e-05, |
|
"loss": 1.2389, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.7855072463768116, |
|
"grad_norm": 0.36301297876352934, |
|
"learning_rate": 3.7223001505437775e-05, |
|
"loss": 1.1647, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.7884057971014493, |
|
"grad_norm": 0.3589005180459851, |
|
"learning_rate": 3.7071654306697003e-05, |
|
"loss": 1.2044, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.7913043478260868, |
|
"grad_norm": 0.38118628063662097, |
|
"learning_rate": 3.692043394335556e-05, |
|
"loss": 1.2063, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.7942028985507248, |
|
"grad_norm": 0.37713318727543105, |
|
"learning_rate": 3.676934189898259e-05, |
|
"loss": 1.3151, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.7971014492753623, |
|
"grad_norm": 0.38497109120391243, |
|
"learning_rate": 3.661837965588842e-05, |
|
"loss": 1.1582, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.3958884224922945, |
|
"learning_rate": 3.646754869510988e-05, |
|
"loss": 1.2598, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.8028985507246378, |
|
"grad_norm": 0.370532843067504, |
|
"learning_rate": 3.631685049639586e-05, |
|
"loss": 1.2128, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.8057971014492753, |
|
"grad_norm": 0.40047093677653156, |
|
"learning_rate": 3.616628653819276e-05, |
|
"loss": 1.2316, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.808695652173913, |
|
"grad_norm": 0.37643906872365784, |
|
"learning_rate": 3.6015858297630004e-05, |
|
"loss": 1.2171, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.8115942028985508, |
|
"grad_norm": 0.39490427844818465, |
|
"learning_rate": 3.5865567250505536e-05, |
|
"loss": 1.2416, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.8144927536231883, |
|
"grad_norm": 0.3631993323865769, |
|
"learning_rate": 3.5715414871271336e-05, |
|
"loss": 1.2147, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.8173913043478263, |
|
"grad_norm": 0.35840772617807537, |
|
"learning_rate": 3.556540263301896e-05, |
|
"loss": 1.2015, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.8202898550724638, |
|
"grad_norm": 0.3791997912963071, |
|
"learning_rate": 3.541553200746511e-05, |
|
"loss": 1.1583, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.8231884057971013, |
|
"grad_norm": 0.37805560040982356, |
|
"learning_rate": 3.526580446493717e-05, |
|
"loss": 1.2238, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.8260869565217392, |
|
"grad_norm": 0.382383828357578, |
|
"learning_rate": 3.511622147435877e-05, |
|
"loss": 1.2201, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.8289855072463768, |
|
"grad_norm": 0.38874429445479597, |
|
"learning_rate": 3.4966784503235394e-05, |
|
"loss": 1.2319, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.8318840579710145, |
|
"grad_norm": 0.38625077800174934, |
|
"learning_rate": 3.481749501764002e-05, |
|
"loss": 1.2326, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.8347826086956522, |
|
"grad_norm": 0.37805590288266955, |
|
"learning_rate": 3.466835448219867e-05, |
|
"loss": 1.2072, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.8376811594202898, |
|
"grad_norm": 0.3876007771372343, |
|
"learning_rate": 3.4519364360076045e-05, |
|
"loss": 1.2188, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.8405797101449275, |
|
"grad_norm": 0.36997413690862124, |
|
"learning_rate": 3.437052611296123e-05, |
|
"loss": 1.2974, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.8434782608695652, |
|
"grad_norm": 0.38893326272743267, |
|
"learning_rate": 3.422184120105331e-05, |
|
"loss": 1.2325, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.8463768115942027, |
|
"grad_norm": 0.38534863103441785, |
|
"learning_rate": 3.407331108304704e-05, |
|
"loss": 1.2881, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.8492753623188407, |
|
"grad_norm": 0.35237887662066153, |
|
"learning_rate": 3.392493721611857e-05, |
|
"loss": 1.1636, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.8521739130434782, |
|
"grad_norm": 0.3522129349688945, |
|
"learning_rate": 3.37767210559111e-05, |
|
"loss": 1.2069, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.855072463768116, |
|
"grad_norm": 0.3828825108660318, |
|
"learning_rate": 3.3628664056520645e-05, |
|
"loss": 1.1511, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.8579710144927537, |
|
"grad_norm": 0.38984016931652277, |
|
"learning_rate": 3.348076767048174e-05, |
|
"loss": 1.2204, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.8608695652173912, |
|
"grad_norm": 0.36523507158461577, |
|
"learning_rate": 3.3333033348753196e-05, |
|
"loss": 1.262, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.863768115942029, |
|
"grad_norm": 0.37220367890890976, |
|
"learning_rate": 3.3185462540703874e-05, |
|
"loss": 1.2262, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.8666666666666667, |
|
"grad_norm": 0.3694812470086758, |
|
"learning_rate": 3.303805669409848e-05, |
|
"loss": 1.2474, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.8695652173913042, |
|
"grad_norm": 0.36698538082460586, |
|
"learning_rate": 3.289081725508333e-05, |
|
"loss": 1.2088, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.8724637681159422, |
|
"grad_norm": 0.3778477738916828, |
|
"learning_rate": 3.2743745668172135e-05, |
|
"loss": 1.1314, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.8753623188405797, |
|
"grad_norm": 0.35885473738105417, |
|
"learning_rate": 3.259684337623192e-05, |
|
"loss": 1.1323, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.8782608695652174, |
|
"grad_norm": 0.3865523562816111, |
|
"learning_rate": 3.245011182046881e-05, |
|
"loss": 1.2147, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.8811594202898552, |
|
"grad_norm": 0.530703476143991, |
|
"learning_rate": 3.230355244041387e-05, |
|
"loss": 1.294, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.8840579710144927, |
|
"grad_norm": 0.37902082343553395, |
|
"learning_rate": 3.215716667390905e-05, |
|
"loss": 1.2446, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.8869565217391304, |
|
"grad_norm": 0.3635449013765209, |
|
"learning_rate": 3.201095595709298e-05, |
|
"loss": 1.1876, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.8898550724637682, |
|
"grad_norm": 0.38375684981250285, |
|
"learning_rate": 3.1864921724387e-05, |
|
"loss": 1.2511, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.8927536231884057, |
|
"grad_norm": 0.374887470810997, |
|
"learning_rate": 3.1719065408481005e-05, |
|
"loss": 1.2076, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.8956521739130436, |
|
"grad_norm": 0.3788733526902221, |
|
"learning_rate": 3.1573388440319404e-05, |
|
"loss": 1.1485, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.8985507246376812, |
|
"grad_norm": 0.37343821294935253, |
|
"learning_rate": 3.142789224908709e-05, |
|
"loss": 1.2417, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.901449275362319, |
|
"grad_norm": 0.36972719766904644, |
|
"learning_rate": 3.128257826219544e-05, |
|
"loss": 1.1924, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.9043478260869566, |
|
"grad_norm": 0.39152027197251665, |
|
"learning_rate": 3.1137447905268264e-05, |
|
"loss": 1.2334, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.9072463768115941, |
|
"grad_norm": 0.3793593937622258, |
|
"learning_rate": 3.099250260212785e-05, |
|
"loss": 1.2044, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.9101449275362319, |
|
"grad_norm": 0.37274932277970574, |
|
"learning_rate": 3.0847743774781e-05, |
|
"loss": 1.2396, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.9130434782608696, |
|
"grad_norm": 0.3917130499161079, |
|
"learning_rate": 3.070317284340505e-05, |
|
"loss": 1.2224, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.9159420289855071, |
|
"grad_norm": 0.3730432872342999, |
|
"learning_rate": 3.055879122633397e-05, |
|
"loss": 1.1523, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.9188405797101449, |
|
"grad_norm": 0.38603243505310325, |
|
"learning_rate": 3.041460034004443e-05, |
|
"loss": 1.2139, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.9217391304347826, |
|
"grad_norm": 0.3705238103870671, |
|
"learning_rate": 3.0270601599141912e-05, |
|
"loss": 1.2359, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.9246376811594201, |
|
"grad_norm": 0.37597496158367705, |
|
"learning_rate": 3.0126796416346814e-05, |
|
"loss": 1.2185, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.927536231884058, |
|
"grad_norm": 0.3685212983823541, |
|
"learning_rate": 2.9983186202480623e-05, |
|
"loss": 1.1696, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.9304347826086956, |
|
"grad_norm": 0.369031802362704, |
|
"learning_rate": 2.9839772366452035e-05, |
|
"loss": 1.1996, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.9333333333333333, |
|
"grad_norm": 0.37822154642489714, |
|
"learning_rate": 2.969655631524316e-05, |
|
"loss": 1.2732, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.936231884057971, |
|
"grad_norm": 0.37245983427478613, |
|
"learning_rate": 2.9553539453895755e-05, |
|
"loss": 1.2615, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.9391304347826086, |
|
"grad_norm": 0.3778250952875639, |
|
"learning_rate": 2.9410723185497324e-05, |
|
"loss": 1.2146, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.9420289855072463, |
|
"grad_norm": 0.3745452473168881, |
|
"learning_rate": 2.9268108911167457e-05, |
|
"loss": 1.2042, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.944927536231884, |
|
"grad_norm": 0.37312413882240314, |
|
"learning_rate": 2.9125698030044068e-05, |
|
"loss": 1.1911, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.9478260869565216, |
|
"grad_norm": 0.4061345062579341, |
|
"learning_rate": 2.8983491939269634e-05, |
|
"loss": 1.2611, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.9507246376811596, |
|
"grad_norm": 0.3849328956575118, |
|
"learning_rate": 2.8841492033977503e-05, |
|
"loss": 1.2108, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.953623188405797, |
|
"grad_norm": 0.38053458611756497, |
|
"learning_rate": 2.8699699707278223e-05, |
|
"loss": 1.2144, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.9565217391304348, |
|
"grad_norm": 0.39621473951535024, |
|
"learning_rate": 2.8558116350245854e-05, |
|
"loss": 1.2493, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.9594202898550726, |
|
"grad_norm": 0.3695671513205437, |
|
"learning_rate": 2.841674335190434e-05, |
|
"loss": 1.2519, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.96231884057971, |
|
"grad_norm": 0.3830315846006876, |
|
"learning_rate": 2.827558209921386e-05, |
|
"loss": 1.2074, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.9652173913043478, |
|
"grad_norm": 0.3877343629077828, |
|
"learning_rate": 2.8134633977057235e-05, |
|
"loss": 1.2333, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.9681159420289855, |
|
"grad_norm": 0.39689935141233373, |
|
"learning_rate": 2.7993900368226333e-05, |
|
"loss": 1.2128, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.971014492753623, |
|
"grad_norm": 0.37755832002907747, |
|
"learning_rate": 2.785338265340852e-05, |
|
"loss": 1.1728, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.973913043478261, |
|
"grad_norm": 0.38446867990310063, |
|
"learning_rate": 2.771308221117309e-05, |
|
"loss": 1.1602, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.9768115942028985, |
|
"grad_norm": 0.3785335064750929, |
|
"learning_rate": 2.757300041795776e-05, |
|
"loss": 1.2085, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.9797101449275363, |
|
"grad_norm": 0.3879694395220702, |
|
"learning_rate": 2.7433138648055168e-05, |
|
"loss": 1.2096, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.982608695652174, |
|
"grad_norm": 0.38604305997893856, |
|
"learning_rate": 2.729349827359936e-05, |
|
"loss": 1.2739, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.9855072463768115, |
|
"grad_norm": 0.3795112440774168, |
|
"learning_rate": 2.715408066455236e-05, |
|
"loss": 1.2666, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.9884057971014493, |
|
"grad_norm": 0.3625119163490855, |
|
"learning_rate": 2.701488718869073e-05, |
|
"loss": 1.2317, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.991304347826087, |
|
"grad_norm": 0.3680979908316257, |
|
"learning_rate": 2.6875919211592137e-05, |
|
"loss": 1.2673, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.9942028985507245, |
|
"grad_norm": 0.39366314079628106, |
|
"learning_rate": 2.673717809662194e-05, |
|
"loss": 1.215, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.9971014492753625, |
|
"grad_norm": 0.3711217421698582, |
|
"learning_rate": 2.659866520491986e-05, |
|
"loss": 1.2061, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.3619509926469052, |
|
"learning_rate": 2.646038189538659e-05, |
|
"loss": 1.0882, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.0028985507246375, |
|
"grad_norm": 0.36298590926269914, |
|
"learning_rate": 2.632232952467047e-05, |
|
"loss": 1.0538, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 2.0057971014492755, |
|
"grad_norm": 0.36532280808197115, |
|
"learning_rate": 2.6184509447154193e-05, |
|
"loss": 1.1357, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 2.008695652173913, |
|
"grad_norm": 0.39561521212011347, |
|
"learning_rate": 2.6046923014941494e-05, |
|
"loss": 0.9882, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 2.0115942028985505, |
|
"grad_norm": 0.3663184321766037, |
|
"learning_rate": 2.5909571577843905e-05, |
|
"loss": 1.0739, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 2.0144927536231885, |
|
"grad_norm": 0.3719396287060232, |
|
"learning_rate": 2.5772456483367497e-05, |
|
"loss": 1.0861, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.017391304347826, |
|
"grad_norm": 0.39175032329764664, |
|
"learning_rate": 2.563557907669968e-05, |
|
"loss": 1.0997, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.020289855072464, |
|
"grad_norm": 0.3842127505386081, |
|
"learning_rate": 2.5498940700695978e-05, |
|
"loss": 1.0833, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 2.0231884057971015, |
|
"grad_norm": 0.41296235407870646, |
|
"learning_rate": 2.5362542695866885e-05, |
|
"loss": 1.0784, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 2.026086956521739, |
|
"grad_norm": 0.40929280219103825, |
|
"learning_rate": 2.5226386400364686e-05, |
|
"loss": 1.0951, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 2.028985507246377, |
|
"grad_norm": 0.39727740475543244, |
|
"learning_rate": 2.5090473149970357e-05, |
|
"loss": 0.9986, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.0318840579710145, |
|
"grad_norm": 0.39777015075034217, |
|
"learning_rate": 2.4954804278080423e-05, |
|
"loss": 1.0739, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 2.034782608695652, |
|
"grad_norm": 0.40515813767942754, |
|
"learning_rate": 2.4819381115693923e-05, |
|
"loss": 1.1273, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.03768115942029, |
|
"grad_norm": 0.3928754252415712, |
|
"learning_rate": 2.4684204991399312e-05, |
|
"loss": 1.0047, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 2.0405797101449274, |
|
"grad_norm": 0.39235743857450184, |
|
"learning_rate": 2.4549277231361438e-05, |
|
"loss": 1.0452, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 2.0434782608695654, |
|
"grad_norm": 0.41751282512992466, |
|
"learning_rate": 2.4414599159308553e-05, |
|
"loss": 1.0451, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.046376811594203, |
|
"grad_norm": 0.40629312672049445, |
|
"learning_rate": 2.4280172096519298e-05, |
|
"loss": 1.1042, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 2.0492753623188404, |
|
"grad_norm": 0.4057666557957047, |
|
"learning_rate": 2.4145997361809758e-05, |
|
"loss": 1.0483, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 2.0521739130434784, |
|
"grad_norm": 0.4116946242019697, |
|
"learning_rate": 2.4012076271520495e-05, |
|
"loss": 1.1184, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.055072463768116, |
|
"grad_norm": 0.4127782071588422, |
|
"learning_rate": 2.3878410139503693e-05, |
|
"loss": 1.1238, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 2.0579710144927534, |
|
"grad_norm": 0.3964820416953686, |
|
"learning_rate": 2.3745000277110197e-05, |
|
"loss": 1.0499, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.0608695652173914, |
|
"grad_norm": 0.43556452448044664, |
|
"learning_rate": 2.36118479931767e-05, |
|
"loss": 1.0943, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 2.063768115942029, |
|
"grad_norm": 0.3995865010547347, |
|
"learning_rate": 2.347895459401288e-05, |
|
"loss": 1.04, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 2.066666666666667, |
|
"grad_norm": 0.4221661952062326, |
|
"learning_rate": 2.334632138338859e-05, |
|
"loss": 0.9803, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 2.0695652173913044, |
|
"grad_norm": 0.41950916776520863, |
|
"learning_rate": 2.3213949662521066e-05, |
|
"loss": 1.0886, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.072463768115942, |
|
"grad_norm": 0.4173493785071151, |
|
"learning_rate": 2.308184073006216e-05, |
|
"loss": 1.0596, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.07536231884058, |
|
"grad_norm": 0.39623286465989827, |
|
"learning_rate": 2.2949995882085595e-05, |
|
"loss": 1.0871, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 2.0782608695652174, |
|
"grad_norm": 0.39259310137723663, |
|
"learning_rate": 2.2818416412074267e-05, |
|
"loss": 1.0324, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 2.081159420289855, |
|
"grad_norm": 0.3822283284054439, |
|
"learning_rate": 2.2687103610907534e-05, |
|
"loss": 1.1117, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 2.084057971014493, |
|
"grad_norm": 0.407037401843374, |
|
"learning_rate": 2.255605876684856e-05, |
|
"loss": 1.0225, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 2.0869565217391304, |
|
"grad_norm": 0.4184329997154531, |
|
"learning_rate": 2.2425283165531685e-05, |
|
"loss": 1.0084, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.0898550724637683, |
|
"grad_norm": 0.4131172741343908, |
|
"learning_rate": 2.22947780899498e-05, |
|
"loss": 1.0207, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 2.092753623188406, |
|
"grad_norm": 0.4143196275192534, |
|
"learning_rate": 2.216454482044176e-05, |
|
"loss": 1.0337, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 2.0956521739130434, |
|
"grad_norm": 0.40754060408579984, |
|
"learning_rate": 2.203458463467983e-05, |
|
"loss": 1.1537, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 2.0985507246376813, |
|
"grad_norm": 0.42013725925992734, |
|
"learning_rate": 2.1904898807657152e-05, |
|
"loss": 0.9899, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.101449275362319, |
|
"grad_norm": 0.41687669776278075, |
|
"learning_rate": 2.1775488611675233e-05, |
|
"loss": 1.0832, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.1043478260869564, |
|
"grad_norm": 0.4286213604830879, |
|
"learning_rate": 2.1646355316331458e-05, |
|
"loss": 1.0802, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.1072463768115943, |
|
"grad_norm": 0.4042262579626966, |
|
"learning_rate": 2.151750018850663e-05, |
|
"loss": 1.0538, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 2.110144927536232, |
|
"grad_norm": 0.4010423956906586, |
|
"learning_rate": 2.1388924492352565e-05, |
|
"loss": 1.0897, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 2.1130434782608694, |
|
"grad_norm": 0.4120035283147293, |
|
"learning_rate": 2.126062948927966e-05, |
|
"loss": 1.1104, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 2.1159420289855073, |
|
"grad_norm": 0.4300470148265316, |
|
"learning_rate": 2.1132616437944547e-05, |
|
"loss": 1.0457, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.118840579710145, |
|
"grad_norm": 0.4153085209481317, |
|
"learning_rate": 2.100488659423772e-05, |
|
"loss": 1.0856, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 2.121739130434783, |
|
"grad_norm": 0.4060830438581685, |
|
"learning_rate": 2.087744121127122e-05, |
|
"loss": 1.0801, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.1246376811594203, |
|
"grad_norm": 0.4267224449360045, |
|
"learning_rate": 2.075028153936636e-05, |
|
"loss": 1.0158, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.127536231884058, |
|
"grad_norm": 0.4092513929978087, |
|
"learning_rate": 2.062340882604143e-05, |
|
"loss": 1.0211, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.130434782608696, |
|
"grad_norm": 0.4297526463869587, |
|
"learning_rate": 2.049682431599947e-05, |
|
"loss": 1.1129, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 0.4636326790218994, |
|
"learning_rate": 2.0370529251116067e-05, |
|
"loss": 1.1291, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.136231884057971, |
|
"grad_norm": 0.3974548122667625, |
|
"learning_rate": 2.0244524870427172e-05, |
|
"loss": 0.9923, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 2.139130434782609, |
|
"grad_norm": 0.4038721913341886, |
|
"learning_rate": 2.0118812410116915e-05, |
|
"loss": 1.0817, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.1420289855072463, |
|
"grad_norm": 0.41807115165201914, |
|
"learning_rate": 1.999339310350551e-05, |
|
"loss": 1.09, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.1449275362318843, |
|
"grad_norm": 0.40763130794004726, |
|
"learning_rate": 1.9868268181037185e-05, |
|
"loss": 1.0475, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.1478260869565218, |
|
"grad_norm": 0.4099162086697869, |
|
"learning_rate": 1.9743438870267988e-05, |
|
"loss": 1.0527, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.1507246376811593, |
|
"grad_norm": 0.4046969215163759, |
|
"learning_rate": 1.961890639585388e-05, |
|
"loss": 1.0224, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.1536231884057973, |
|
"grad_norm": 0.40495982818104165, |
|
"learning_rate": 1.949467197953866e-05, |
|
"loss": 0.9912, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.1565217391304348, |
|
"grad_norm": 0.4115616809855344, |
|
"learning_rate": 1.9370736840141978e-05, |
|
"loss": 1.0773, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.1594202898550723, |
|
"grad_norm": 0.42477438614499907, |
|
"learning_rate": 1.9247102193547384e-05, |
|
"loss": 1.0183, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.1623188405797102, |
|
"grad_norm": 0.39454596479550186, |
|
"learning_rate": 1.912376925269041e-05, |
|
"loss": 1.0548, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.1652173913043478, |
|
"grad_norm": 0.4324946159925722, |
|
"learning_rate": 1.900073922754665e-05, |
|
"loss": 1.0532, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.1681159420289857, |
|
"grad_norm": 0.40496616232865795, |
|
"learning_rate": 1.8878013325119902e-05, |
|
"loss": 1.1552, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.1710144927536232, |
|
"grad_norm": 0.41915807837518143, |
|
"learning_rate": 1.8755592749430322e-05, |
|
"loss": 1.0243, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 0.4186007202451323, |
|
"learning_rate": 1.8633478701502628e-05, |
|
"loss": 1.0744, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.1768115942028987, |
|
"grad_norm": 0.42045626939886377, |
|
"learning_rate": 1.8511672379354284e-05, |
|
"loss": 1.068, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 2.1797101449275362, |
|
"grad_norm": 0.4045186001077355, |
|
"learning_rate": 1.8390174977983778e-05, |
|
"loss": 1.0957, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.1826086956521737, |
|
"grad_norm": 0.4478832702569865, |
|
"learning_rate": 1.8268987689358874e-05, |
|
"loss": 1.0909, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.1855072463768117, |
|
"grad_norm": 0.4164615953299648, |
|
"learning_rate": 1.814811170240495e-05, |
|
"loss": 1.0386, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.1884057971014492, |
|
"grad_norm": 0.41902328103819775, |
|
"learning_rate": 1.80275482029933e-05, |
|
"loss": 1.0344, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.1913043478260867, |
|
"grad_norm": 0.41670788409755355, |
|
"learning_rate": 1.7907298373929517e-05, |
|
"loss": 0.9878, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.1942028985507247, |
|
"grad_norm": 0.4294226441948201, |
|
"learning_rate": 1.7787363394941875e-05, |
|
"loss": 1.0175, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 2.197101449275362, |
|
"grad_norm": 0.4254645454494433, |
|
"learning_rate": 1.7667744442669793e-05, |
|
"loss": 1.0615, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.4099964946904337, |
|
"learning_rate": 1.7548442690652238e-05, |
|
"loss": 0.9919, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.2028985507246377, |
|
"grad_norm": 0.42880536140401987, |
|
"learning_rate": 1.7429459309316254e-05, |
|
"loss": 1.0661, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.205797101449275, |
|
"grad_norm": 0.4173497311104388, |
|
"learning_rate": 1.7310795465965452e-05, |
|
"loss": 1.0304, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 2.208695652173913, |
|
"grad_norm": 0.4181309528124866, |
|
"learning_rate": 1.7192452324768577e-05, |
|
"loss": 1.1069, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.2115942028985507, |
|
"grad_norm": 0.4253296723606123, |
|
"learning_rate": 1.7074431046748075e-05, |
|
"loss": 1.1159, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 2.214492753623188, |
|
"grad_norm": 0.4140966246574362, |
|
"learning_rate": 1.69567327897687e-05, |
|
"loss": 1.035, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.217391304347826, |
|
"grad_norm": 0.4360262256456945, |
|
"learning_rate": 1.683935870852621e-05, |
|
"loss": 1.0341, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.2202898550724637, |
|
"grad_norm": 0.4129314987978601, |
|
"learning_rate": 1.6722309954535915e-05, |
|
"loss": 1.0361, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.2231884057971016, |
|
"grad_norm": 0.44728638008426197, |
|
"learning_rate": 1.6605587676121492e-05, |
|
"loss": 0.982, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.226086956521739, |
|
"grad_norm": 0.4142277894364414, |
|
"learning_rate": 1.6489193018403694e-05, |
|
"loss": 1.0186, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.2289855072463767, |
|
"grad_norm": 0.42466461089685326, |
|
"learning_rate": 1.6373127123289082e-05, |
|
"loss": 1.0878, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 2.2318840579710146, |
|
"grad_norm": 0.4255999017930268, |
|
"learning_rate": 1.6257391129458866e-05, |
|
"loss": 0.9795, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.234782608695652, |
|
"grad_norm": 0.4214111455741252, |
|
"learning_rate": 1.614198617235768e-05, |
|
"loss": 1.0523, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.2376811594202897, |
|
"grad_norm": 0.40833801140318804, |
|
"learning_rate": 1.6026913384182513e-05, |
|
"loss": 1.0665, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.2405797101449276, |
|
"grad_norm": 0.4060043083014689, |
|
"learning_rate": 1.5912173893871534e-05, |
|
"loss": 1.0294, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.243478260869565, |
|
"grad_norm": 0.441842102392729, |
|
"learning_rate": 1.5797768827093055e-05, |
|
"loss": 1.0781, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.246376811594203, |
|
"grad_norm": 0.42451158383299736, |
|
"learning_rate": 1.5683699306234483e-05, |
|
"loss": 1.03, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.2492753623188406, |
|
"grad_norm": 0.43280564540973687, |
|
"learning_rate": 1.5569966450391273e-05, |
|
"loss": 1.0932, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 2.252173913043478, |
|
"grad_norm": 0.4260799476878949, |
|
"learning_rate": 1.5456571375356045e-05, |
|
"loss": 0.9906, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 2.255072463768116, |
|
"grad_norm": 0.4289868937899867, |
|
"learning_rate": 1.534351519360752e-05, |
|
"loss": 1.1224, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 2.2579710144927536, |
|
"grad_norm": 0.4184482349129135, |
|
"learning_rate": 1.5230799014299651e-05, |
|
"loss": 1.0492, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 2.260869565217391, |
|
"grad_norm": 0.4169287607356858, |
|
"learning_rate": 1.5118423943250771e-05, |
|
"loss": 1.0076, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.263768115942029, |
|
"grad_norm": 0.4437723000239763, |
|
"learning_rate": 1.500639108293272e-05, |
|
"loss": 1.0756, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.2666666666666666, |
|
"grad_norm": 2.438737443529068, |
|
"learning_rate": 1.4894701532460026e-05, |
|
"loss": 1.0372, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.269565217391304, |
|
"grad_norm": 0.4259694730355945, |
|
"learning_rate": 1.4783356387579123e-05, |
|
"loss": 1.0914, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 2.272463768115942, |
|
"grad_norm": 0.42609879566763975, |
|
"learning_rate": 1.4672356740657612e-05, |
|
"loss": 1.1024, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 2.2753623188405796, |
|
"grad_norm": 0.41473766458960193, |
|
"learning_rate": 1.4561703680673528e-05, |
|
"loss": 1.0437, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.2782608695652176, |
|
"grad_norm": 0.41138794322562033, |
|
"learning_rate": 1.4451398293204671e-05, |
|
"loss": 0.9883, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.281159420289855, |
|
"grad_norm": 0.4345116661977155, |
|
"learning_rate": 1.4341441660417948e-05, |
|
"loss": 1.0405, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 2.2840579710144926, |
|
"grad_norm": 0.43156004240612655, |
|
"learning_rate": 1.423183486105874e-05, |
|
"loss": 1.0858, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 2.2869565217391306, |
|
"grad_norm": 0.43394375495039533, |
|
"learning_rate": 1.4122578970440392e-05, |
|
"loss": 1.013, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 2.289855072463768, |
|
"grad_norm": 0.42318889929148634, |
|
"learning_rate": 1.4013675060433562e-05, |
|
"loss": 1.0667, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.292753623188406, |
|
"grad_norm": 0.4338786349395585, |
|
"learning_rate": 1.3905124199455733e-05, |
|
"loss": 0.9574, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 2.2956521739130435, |
|
"grad_norm": 0.4263774516063788, |
|
"learning_rate": 1.379692745246079e-05, |
|
"loss": 1.0388, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.298550724637681, |
|
"grad_norm": 0.4578203586741276, |
|
"learning_rate": 1.368908588092852e-05, |
|
"loss": 1.0852, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 2.301449275362319, |
|
"grad_norm": 0.4223544444704819, |
|
"learning_rate": 1.3581600542854211e-05, |
|
"loss": 1.0764, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 2.3043478260869565, |
|
"grad_norm": 0.42040297195621995, |
|
"learning_rate": 1.3474472492738266e-05, |
|
"loss": 1.0818, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.307246376811594, |
|
"grad_norm": 0.42233699920038903, |
|
"learning_rate": 1.3367702781575858e-05, |
|
"loss": 1.0144, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.310144927536232, |
|
"grad_norm": 0.42739886636894053, |
|
"learning_rate": 1.3261292456846647e-05, |
|
"loss": 1.011, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 2.3130434782608695, |
|
"grad_norm": 0.4319353955954341, |
|
"learning_rate": 1.315524256250445e-05, |
|
"loss": 0.9984, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.315942028985507, |
|
"grad_norm": 0.4240304031792234, |
|
"learning_rate": 1.3049554138967051e-05, |
|
"loss": 1.0865, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 2.318840579710145, |
|
"grad_norm": 0.44946527738642017, |
|
"learning_rate": 1.2944228223105953e-05, |
|
"loss": 1.0496, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.3217391304347825, |
|
"grad_norm": 0.42198617091436585, |
|
"learning_rate": 1.2839265848236271e-05, |
|
"loss": 1.0357, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 2.3246376811594205, |
|
"grad_norm": 0.42787604239445254, |
|
"learning_rate": 1.273466804410649e-05, |
|
"loss": 1.0624, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 2.327536231884058, |
|
"grad_norm": 0.4259453527555043, |
|
"learning_rate": 1.2630435836888477e-05, |
|
"loss": 1.0371, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 2.3304347826086955, |
|
"grad_norm": 0.4405744784698457, |
|
"learning_rate": 1.2526570249167285e-05, |
|
"loss": 1.0722, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.3333333333333335, |
|
"grad_norm": 0.44433415788871033, |
|
"learning_rate": 1.242307229993126e-05, |
|
"loss": 1.1003, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.336231884057971, |
|
"grad_norm": 0.44002850613090233, |
|
"learning_rate": 1.2319943004561951e-05, |
|
"loss": 1.0334, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 2.3391304347826085, |
|
"grad_norm": 0.4327626792123435, |
|
"learning_rate": 1.2217183374824182e-05, |
|
"loss": 1.0841, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 2.3420289855072465, |
|
"grad_norm": 0.44177237553294435, |
|
"learning_rate": 1.2114794418856112e-05, |
|
"loss": 1.1006, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.344927536231884, |
|
"grad_norm": 0.4252814673055529, |
|
"learning_rate": 1.2012777141159359e-05, |
|
"loss": 1.0902, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 2.3478260869565215, |
|
"grad_norm": 0.44481606310880256, |
|
"learning_rate": 1.1911132542589126e-05, |
|
"loss": 1.0663, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.3507246376811595, |
|
"grad_norm": 0.44531350592923585, |
|
"learning_rate": 1.180986162034441e-05, |
|
"loss": 1.0395, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 2.353623188405797, |
|
"grad_norm": 0.4403754842576467, |
|
"learning_rate": 1.1708965367958175e-05, |
|
"loss": 1.0367, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.356521739130435, |
|
"grad_norm": 0.44504741014172594, |
|
"learning_rate": 1.160844477528768e-05, |
|
"loss": 1.0668, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.3594202898550725, |
|
"grad_norm": 0.45218366246573805, |
|
"learning_rate": 1.150830082850468e-05, |
|
"loss": 1.0078, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.36231884057971, |
|
"grad_norm": 0.4400472472708365, |
|
"learning_rate": 1.1408534510085805e-05, |
|
"loss": 1.0535, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.365217391304348, |
|
"grad_norm": 0.429340428309833, |
|
"learning_rate": 1.130914679880291e-05, |
|
"loss": 1.0736, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.3681159420289855, |
|
"grad_norm": 0.41976853039844914, |
|
"learning_rate": 1.1210138669713444e-05, |
|
"loss": 0.9793, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 2.3710144927536234, |
|
"grad_norm": 0.430344411304319, |
|
"learning_rate": 1.1111511094150945e-05, |
|
"loss": 0.9848, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.373913043478261, |
|
"grad_norm": 0.431007787368086, |
|
"learning_rate": 1.1013265039715465e-05, |
|
"loss": 0.9797, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.3768115942028984, |
|
"grad_norm": 0.43768154374858875, |
|
"learning_rate": 1.0915401470264081e-05, |
|
"loss": 1.0339, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.3797101449275364, |
|
"grad_norm": 0.4153960922316617, |
|
"learning_rate": 1.081792134590145e-05, |
|
"loss": 1.0726, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.382608695652174, |
|
"grad_norm": 0.4261661560061093, |
|
"learning_rate": 1.0720825622970387e-05, |
|
"loss": 1.0732, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.3855072463768114, |
|
"grad_norm": 0.46272436711753084, |
|
"learning_rate": 1.0624115254042482e-05, |
|
"loss": 1.0509, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.3884057971014494, |
|
"grad_norm": 0.4159332663897536, |
|
"learning_rate": 1.0527791187908736e-05, |
|
"loss": 1.0301, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.391304347826087, |
|
"grad_norm": 0.41855139337790126, |
|
"learning_rate": 1.0431854369570316e-05, |
|
"loss": 0.98, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.3942028985507244, |
|
"grad_norm": 0.4407049676844984, |
|
"learning_rate": 1.0336305740229196e-05, |
|
"loss": 1.0198, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.3971014492753624, |
|
"grad_norm": 0.44469510783381666, |
|
"learning_rate": 1.0241146237278975e-05, |
|
"loss": 1.0142, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.4204751833047234, |
|
"learning_rate": 1.0146376794295698e-05, |
|
"loss": 1.0435, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.402898550724638, |
|
"grad_norm": 0.43076006527935645, |
|
"learning_rate": 1.0051998341028618e-05, |
|
"loss": 1.0329, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.4057971014492754, |
|
"grad_norm": 0.4212241503239106, |
|
"learning_rate": 9.958011803391166e-06, |
|
"loss": 1.0517, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.408695652173913, |
|
"grad_norm": 0.43752577070512094, |
|
"learning_rate": 9.864418103451828e-06, |
|
"loss": 1.05, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.411594202898551, |
|
"grad_norm": 0.4539932456655938, |
|
"learning_rate": 9.771218159425084e-06, |
|
"loss": 1.0501, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.4144927536231884, |
|
"grad_norm": 0.44298901817857494, |
|
"learning_rate": 9.678412885662418e-06, |
|
"loss": 1.0399, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.417391304347826, |
|
"grad_norm": 0.44330383234774, |
|
"learning_rate": 9.586003192643362e-06, |
|
"loss": 1.0242, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.420289855072464, |
|
"grad_norm": 0.42235580319917715, |
|
"learning_rate": 9.493989986966518e-06, |
|
"loss": 1.0961, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.4231884057971014, |
|
"grad_norm": 0.42412654756876644, |
|
"learning_rate": 9.402374171340705e-06, |
|
"loss": 1.0747, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.426086956521739, |
|
"grad_norm": 0.4604003701876417, |
|
"learning_rate": 9.311156644576108e-06, |
|
"loss": 0.9956, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.428985507246377, |
|
"grad_norm": 0.4355065867115315, |
|
"learning_rate": 9.220338301575414e-06, |
|
"loss": 1.0515, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.4318840579710144, |
|
"grad_norm": 0.41606575435043913, |
|
"learning_rate": 9.129920033325068e-06, |
|
"loss": 1.0834, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.4347826086956523, |
|
"grad_norm": 0.41400057706555543, |
|
"learning_rate": 9.039902726886535e-06, |
|
"loss": 1.025, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.43768115942029, |
|
"grad_norm": 0.4212465286811161, |
|
"learning_rate": 8.95028726538758e-06, |
|
"loss": 1.0888, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.4405797101449274, |
|
"grad_norm": 0.44292414437801153, |
|
"learning_rate": 8.861074528013586e-06, |
|
"loss": 1.1063, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.4434782608695653, |
|
"grad_norm": 0.4618762426767351, |
|
"learning_rate": 8.77226538999899e-06, |
|
"loss": 1.0861, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.446376811594203, |
|
"grad_norm": 0.42934378228075604, |
|
"learning_rate": 8.683860722618641e-06, |
|
"loss": 1.0674, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.449275362318841, |
|
"grad_norm": 0.44137968841741865, |
|
"learning_rate": 8.595861393179277e-06, |
|
"loss": 1.0248, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.4521739130434783, |
|
"grad_norm": 0.45115385912472034, |
|
"learning_rate": 8.508268265011005e-06, |
|
"loss": 1.0471, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.455072463768116, |
|
"grad_norm": 0.44160775586291273, |
|
"learning_rate": 8.42108219745884e-06, |
|
"loss": 1.0375, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.457971014492754, |
|
"grad_norm": 0.44498128589628316, |
|
"learning_rate": 8.334304045874247e-06, |
|
"loss": 1.0928, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.4608695652173913, |
|
"grad_norm": 0.42944613569509194, |
|
"learning_rate": 8.247934661606826e-06, |
|
"loss": 1.0611, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.463768115942029, |
|
"grad_norm": 0.4293984310812336, |
|
"learning_rate": 8.161974891995855e-06, |
|
"loss": 1.0425, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.466666666666667, |
|
"grad_norm": 0.43223021088950386, |
|
"learning_rate": 8.076425580362052e-06, |
|
"loss": 1.0966, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.4695652173913043, |
|
"grad_norm": 0.4511615485513439, |
|
"learning_rate": 7.991287565999272e-06, |
|
"loss": 0.9823, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.472463768115942, |
|
"grad_norm": 0.43175751442143545, |
|
"learning_rate": 7.906561684166275e-06, |
|
"loss": 1.046, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.47536231884058, |
|
"grad_norm": 0.4398354654162565, |
|
"learning_rate": 7.822248766078555e-06, |
|
"loss": 1.1159, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.4782608695652173, |
|
"grad_norm": 0.4217658734022817, |
|
"learning_rate": 7.738349638900127e-06, |
|
"loss": 1.0605, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.4811594202898553, |
|
"grad_norm": 0.4463848438795895, |
|
"learning_rate": 7.654865125735483e-06, |
|
"loss": 0.987, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.4840579710144928, |
|
"grad_norm": 0.4553067045132744, |
|
"learning_rate": 7.571796045621482e-06, |
|
"loss": 1.049, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.4869565217391303, |
|
"grad_norm": 0.4470257852745124, |
|
"learning_rate": 7.489143213519301e-06, |
|
"loss": 1.0841, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.4898550724637682, |
|
"grad_norm": 0.42594930418564064, |
|
"learning_rate": 7.406907440306471e-06, |
|
"loss": 1.0877, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.4927536231884058, |
|
"grad_norm": 0.4284878480179994, |
|
"learning_rate": 7.325089532768892e-06, |
|
"loss": 1.0765, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.4956521739130437, |
|
"grad_norm": 0.44182270672000895, |
|
"learning_rate": 7.243690293592959e-06, |
|
"loss": 1.0233, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.4985507246376812, |
|
"grad_norm": 0.43871383223404364, |
|
"learning_rate": 7.1627105213576355e-06, |
|
"loss": 1.0702, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.5014492753623188, |
|
"grad_norm": 0.4277793635895529, |
|
"learning_rate": 7.08215101052665e-06, |
|
"loss": 1.0573, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.5043478260869563, |
|
"grad_norm": 0.4406001751473407, |
|
"learning_rate": 7.002012551440701e-06, |
|
"loss": 1.0316, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.5072463768115942, |
|
"grad_norm": 0.5413472127354161, |
|
"learning_rate": 6.922295930309691e-06, |
|
"loss": 1.0798, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.5101449275362318, |
|
"grad_norm": 0.4301282293831735, |
|
"learning_rate": 6.84300192920504e-06, |
|
"loss": 1.0723, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.5130434782608697, |
|
"grad_norm": 0.43181259980748293, |
|
"learning_rate": 6.764131326051953e-06, |
|
"loss": 1.0395, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.5159420289855072, |
|
"grad_norm": 0.4357413758485379, |
|
"learning_rate": 6.6856848946218635e-06, |
|
"loss": 1.04, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.5188405797101447, |
|
"grad_norm": 0.4441512604958444, |
|
"learning_rate": 6.607663404524795e-06, |
|
"loss": 1.02, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.5217391304347827, |
|
"grad_norm": 0.4403400361786895, |
|
"learning_rate": 6.53006762120183e-06, |
|
"loss": 0.9813, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.52463768115942, |
|
"grad_norm": 0.4295706766182875, |
|
"learning_rate": 6.452898305917587e-06, |
|
"loss": 1.0977, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.527536231884058, |
|
"grad_norm": 0.4500164864119338, |
|
"learning_rate": 6.376156215752743e-06, |
|
"loss": 1.046, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.5304347826086957, |
|
"grad_norm": 0.4295283517592817, |
|
"learning_rate": 6.299842103596665e-06, |
|
"loss": 0.9962, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.533333333333333, |
|
"grad_norm": 0.4298591342734868, |
|
"learning_rate": 6.223956718139939e-06, |
|
"loss": 1.0351, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.536231884057971, |
|
"grad_norm": 0.41916133011716233, |
|
"learning_rate": 6.14850080386708e-06, |
|
"loss": 0.9795, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.5391304347826087, |
|
"grad_norm": 0.450757056089375, |
|
"learning_rate": 6.073475101049209e-06, |
|
"loss": 1.0287, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.5420289855072467, |
|
"grad_norm": 0.4428910375540849, |
|
"learning_rate": 5.998880345736812e-06, |
|
"loss": 1.0841, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.544927536231884, |
|
"grad_norm": 0.4370122339112871, |
|
"learning_rate": 5.924717269752478e-06, |
|
"loss": 1.0355, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.5478260869565217, |
|
"grad_norm": 0.4328546688643461, |
|
"learning_rate": 5.8509866006837725e-06, |
|
"loss": 1.0458, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.550724637681159, |
|
"grad_norm": 0.45457918016504273, |
|
"learning_rate": 5.777689061876035e-06, |
|
"loss": 1.0407, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.553623188405797, |
|
"grad_norm": 0.41666707799866615, |
|
"learning_rate": 5.704825372425343e-06, |
|
"loss": 1.0336, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.5565217391304347, |
|
"grad_norm": 0.4500898444777061, |
|
"learning_rate": 5.6323962471714286e-06, |
|
"loss": 1.0082, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.5594202898550726, |
|
"grad_norm": 0.43189682364915644, |
|
"learning_rate": 5.560402396690667e-06, |
|
"loss": 1.0732, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.56231884057971, |
|
"grad_norm": 0.4517991164758783, |
|
"learning_rate": 5.4888445272891e-06, |
|
"loss": 1.0565, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.5652173913043477, |
|
"grad_norm": 0.43585727349975845, |
|
"learning_rate": 5.417723340995545e-06, |
|
"loss": 1.0569, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.5681159420289856, |
|
"grad_norm": 0.4451555207263539, |
|
"learning_rate": 5.347039535554632e-06, |
|
"loss": 1.0934, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.571014492753623, |
|
"grad_norm": 0.44753595012523295, |
|
"learning_rate": 5.276793804420033e-06, |
|
"loss": 1.0129, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.573913043478261, |
|
"grad_norm": 0.43340171540500966, |
|
"learning_rate": 5.206986836747624e-06, |
|
"loss": 1.057, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.5768115942028986, |
|
"grad_norm": 0.41103056048092484, |
|
"learning_rate": 5.13761931738872e-06, |
|
"loss": 1.0629, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.579710144927536, |
|
"grad_norm": 0.4379217485808061, |
|
"learning_rate": 5.068691926883367e-06, |
|
"loss": 1.1122, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.5826086956521737, |
|
"grad_norm": 0.4367395495858654, |
|
"learning_rate": 5.000205341453679e-06, |
|
"loss": 1.0641, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.5855072463768116, |
|
"grad_norm": 0.4346646618624072, |
|
"learning_rate": 4.9321602329971735e-06, |
|
"loss": 1.0247, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.588405797101449, |
|
"grad_norm": 0.4266332511623276, |
|
"learning_rate": 4.864557269080183e-06, |
|
"loss": 1.1, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.591304347826087, |
|
"grad_norm": 0.4280568908138626, |
|
"learning_rate": 4.7973971129313455e-06, |
|
"loss": 0.9916, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.5942028985507246, |
|
"grad_norm": 0.4157220462493493, |
|
"learning_rate": 4.730680423435046e-06, |
|
"loss": 1.0384, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.597101449275362, |
|
"grad_norm": 0.4657661567334127, |
|
"learning_rate": 4.6644078551249916e-06, |
|
"loss": 1.0206, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.4402043390402084, |
|
"learning_rate": 4.59858005817776e-06, |
|
"loss": 1.0051, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.6028985507246376, |
|
"grad_norm": 0.47342746863944507, |
|
"learning_rate": 4.533197678406459e-06, |
|
"loss": 0.9908, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.6057971014492756, |
|
"grad_norm": 0.44686945552614565, |
|
"learning_rate": 4.468261357254339e-06, |
|
"loss": 1.0194, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"grad_norm": 0.45848518372098457, |
|
"learning_rate": 4.403771731788547e-06, |
|
"loss": 1.0751, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.6115942028985506, |
|
"grad_norm": 0.41833931514497974, |
|
"learning_rate": 4.339729434693851e-06, |
|
"loss": 1.0486, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.6144927536231886, |
|
"grad_norm": 0.4154891635226541, |
|
"learning_rate": 4.276135094266437e-06, |
|
"loss": 1.0246, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.617391304347826, |
|
"grad_norm": 0.42902378243746886, |
|
"learning_rate": 4.212989334407752e-06, |
|
"loss": 1.0367, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.620289855072464, |
|
"grad_norm": 0.4413147059304679, |
|
"learning_rate": 4.150292774618386e-06, |
|
"loss": 1.0377, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.6231884057971016, |
|
"grad_norm": 0.4326053305994359, |
|
"learning_rate": 4.088046029991954e-06, |
|
"loss": 1.0321, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.626086956521739, |
|
"grad_norm": 0.43297947767772066, |
|
"learning_rate": 4.026249711209134e-06, |
|
"loss": 1.0814, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.6289855072463766, |
|
"grad_norm": 0.42391791250689304, |
|
"learning_rate": 3.964904424531623e-06, |
|
"loss": 1.1435, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.6318840579710145, |
|
"grad_norm": 0.44465042718334696, |
|
"learning_rate": 3.90401077179619e-06, |
|
"loss": 1.0755, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.634782608695652, |
|
"grad_norm": 0.4379840802629311, |
|
"learning_rate": 3.843569350408799e-06, |
|
"loss": 1.0326, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.63768115942029, |
|
"grad_norm": 0.4380256503816688, |
|
"learning_rate": 3.7835807533387336e-06, |
|
"loss": 0.9959, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.6405797101449275, |
|
"grad_norm": 0.4250114900172059, |
|
"learning_rate": 3.724045569112766e-06, |
|
"loss": 1.0413, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.643478260869565, |
|
"grad_norm": 0.43495634484636064, |
|
"learning_rate": 3.664964381809416e-06, |
|
"loss": 1.0502, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.646376811594203, |
|
"grad_norm": 0.41338659373447945, |
|
"learning_rate": 3.606337771053181e-06, |
|
"loss": 1.0322, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.6492753623188405, |
|
"grad_norm": 0.4607899596362807, |
|
"learning_rate": 3.548166312008877e-06, |
|
"loss": 1.062, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.6521739130434785, |
|
"grad_norm": 0.4456807876825619, |
|
"learning_rate": 3.4904505753759863e-06, |
|
"loss": 1.049, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.655072463768116, |
|
"grad_norm": 0.45066296980753234, |
|
"learning_rate": 3.4331911273830784e-06, |
|
"loss": 1.1202, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.6579710144927535, |
|
"grad_norm": 0.42887756180559006, |
|
"learning_rate": 3.376388529782215e-06, |
|
"loss": 1.0579, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.660869565217391, |
|
"grad_norm": 0.4242946529545818, |
|
"learning_rate": 3.320043339843465e-06, |
|
"loss": 1.0094, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.663768115942029, |
|
"grad_norm": 0.4509087953831623, |
|
"learning_rate": 3.2641561103494424e-06, |
|
"loss": 1.126, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.4582297576992613, |
|
"learning_rate": 3.2087273895898606e-06, |
|
"loss": 1.0978, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.6695652173913045, |
|
"grad_norm": 0.41892321793577525, |
|
"learning_rate": 3.153757721356182e-06, |
|
"loss": 1.0188, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.672463768115942, |
|
"grad_norm": 0.43091493659712077, |
|
"learning_rate": 3.0992476449362653e-06, |
|
"loss": 1.0657, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.6753623188405795, |
|
"grad_norm": 0.4484469573992589, |
|
"learning_rate": 3.0451976951090757e-06, |
|
"loss": 1.0578, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.6782608695652175, |
|
"grad_norm": 0.45221935250795153, |
|
"learning_rate": 2.991608402139434e-06, |
|
"loss": 1.0728, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.681159420289855, |
|
"grad_norm": 0.42748137661848884, |
|
"learning_rate": 2.938480291772827e-06, |
|
"loss": 1.0517, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.684057971014493, |
|
"grad_norm": 0.4338746720819457, |
|
"learning_rate": 2.8858138852302374e-06, |
|
"loss": 1.0192, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.6869565217391305, |
|
"grad_norm": 0.44271385780896827, |
|
"learning_rate": 2.833609699203038e-06, |
|
"loss": 1.0409, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.689855072463768, |
|
"grad_norm": 0.44168360737350637, |
|
"learning_rate": 2.7818682458479294e-06, |
|
"loss": 1.0353, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.692753623188406, |
|
"grad_norm": 0.44662829054916564, |
|
"learning_rate": 2.7305900327818936e-06, |
|
"loss": 1.0321, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.6956521739130435, |
|
"grad_norm": 0.4372789501470448, |
|
"learning_rate": 2.679775563077247e-06, |
|
"loss": 1.0469, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.6985507246376814, |
|
"grad_norm": 0.4170715080873589, |
|
"learning_rate": 2.6294253352566466e-06, |
|
"loss": 1.0717, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.701449275362319, |
|
"grad_norm": 0.44425061018773043, |
|
"learning_rate": 2.5795398432882756e-06, |
|
"loss": 1.0892, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.7043478260869565, |
|
"grad_norm": 0.43077942102243316, |
|
"learning_rate": 2.530119576580936e-06, |
|
"loss": 1.0542, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.707246376811594, |
|
"grad_norm": 0.4370359842657613, |
|
"learning_rate": 2.4811650199792924e-06, |
|
"loss": 1.0096, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.710144927536232, |
|
"grad_norm": 0.43626145737902144, |
|
"learning_rate": 2.4326766537590693e-06, |
|
"loss": 1.081, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.7130434782608694, |
|
"grad_norm": 0.47685901764854666, |
|
"learning_rate": 2.384654953622384e-06, |
|
"loss": 1.1176, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.7159420289855074, |
|
"grad_norm": 0.45228260777925117, |
|
"learning_rate": 2.3371003906930423e-06, |
|
"loss": 1.0481, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.718840579710145, |
|
"grad_norm": 0.44256756961973887, |
|
"learning_rate": 2.290013431511945e-06, |
|
"loss": 1.0347, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.7217391304347824, |
|
"grad_norm": 0.4402726419838423, |
|
"learning_rate": 2.243394538032484e-06, |
|
"loss": 1.0369, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.7246376811594204, |
|
"grad_norm": 0.45365804923951414, |
|
"learning_rate": 2.197244167616047e-06, |
|
"loss": 1.0973, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.727536231884058, |
|
"grad_norm": 0.4525083377542681, |
|
"learning_rate": 2.1515627730274822e-06, |
|
"loss": 1.0616, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.730434782608696, |
|
"grad_norm": 0.41867968643258735, |
|
"learning_rate": 2.106350802430718e-06, |
|
"loss": 1.0361, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.7333333333333334, |
|
"grad_norm": 0.44410487106485796, |
|
"learning_rate": 2.0616086993842876e-06, |
|
"loss": 1.0262, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.736231884057971, |
|
"grad_norm": 0.42533114796177457, |
|
"learning_rate": 2.0173369028370583e-06, |
|
"loss": 1.0324, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.7391304347826084, |
|
"grad_norm": 0.41967355790971034, |
|
"learning_rate": 1.9735358471238586e-06, |
|
"loss": 1.0439, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.7420289855072464, |
|
"grad_norm": 0.4313810499422798, |
|
"learning_rate": 1.9302059619612787e-06, |
|
"loss": 1.0067, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.744927536231884, |
|
"grad_norm": 0.4457644564670882, |
|
"learning_rate": 1.8873476724433902e-06, |
|
"loss": 1.0433, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.747826086956522, |
|
"grad_norm": 0.44140575476367844, |
|
"learning_rate": 1.8449613990376313e-06, |
|
"loss": 1.0281, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.7507246376811594, |
|
"grad_norm": 0.41388990569707274, |
|
"learning_rate": 1.8030475575806394e-06, |
|
"loss": 1.0779, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.753623188405797, |
|
"grad_norm": 0.44319022004684594, |
|
"learning_rate": 1.7616065592742038e-06, |
|
"loss": 1.0709, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.756521739130435, |
|
"grad_norm": 0.42280831552653275, |
|
"learning_rate": 1.7206388106812077e-06, |
|
"loss": 1.0602, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.7594202898550724, |
|
"grad_norm": 0.41831113949584664, |
|
"learning_rate": 1.6801447137216652e-06, |
|
"loss": 1.0519, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.7623188405797103, |
|
"grad_norm": 0.42149777436767877, |
|
"learning_rate": 1.6401246656687463e-06, |
|
"loss": 1.0568, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.765217391304348, |
|
"grad_norm": 0.429110137697547, |
|
"learning_rate": 1.6005790591448966e-06, |
|
"loss": 1.1177, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.7681159420289854, |
|
"grad_norm": 0.46048857323106746, |
|
"learning_rate": 1.5615082821180071e-06, |
|
"loss": 1.0583, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.7710144927536233, |
|
"grad_norm": 0.4299763555661624, |
|
"learning_rate": 1.522912717897551e-06, |
|
"loss": 1.1047, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.773913043478261, |
|
"grad_norm": 0.47595502230009035, |
|
"learning_rate": 1.4847927451308753e-06, |
|
"loss": 1.0598, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.776811594202899, |
|
"grad_norm": 0.44472688488854684, |
|
"learning_rate": 1.447148737799481e-06, |
|
"loss": 1.0717, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.7797101449275363, |
|
"grad_norm": 0.446411341344231, |
|
"learning_rate": 1.4099810652153212e-06, |
|
"loss": 1.0873, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.782608695652174, |
|
"grad_norm": 0.4395447440323806, |
|
"learning_rate": 1.3732900920172154e-06, |
|
"loss": 1.0097, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.7855072463768114, |
|
"grad_norm": 0.4374552230480354, |
|
"learning_rate": 1.3370761781672346e-06, |
|
"loss": 1.0025, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.7884057971014493, |
|
"grad_norm": 0.4585611691245378, |
|
"learning_rate": 1.3013396789472055e-06, |
|
"loss": 0.9921, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.791304347826087, |
|
"grad_norm": 0.4367319010484946, |
|
"learning_rate": 1.2660809449552058e-06, |
|
"loss": 1.005, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.794202898550725, |
|
"grad_norm": 0.41818614449882124, |
|
"learning_rate": 1.2313003221021302e-06, |
|
"loss": 1.0392, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.7971014492753623, |
|
"grad_norm": 0.43712018288101745, |
|
"learning_rate": 1.1969981516082972e-06, |
|
"loss": 1.0703, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.4330052924141849, |
|
"learning_rate": 1.163174770000086e-06, |
|
"loss": 1.0149, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.802898550724638, |
|
"grad_norm": 0.4637514588180937, |
|
"learning_rate": 1.1298305091066664e-06, |
|
"loss": 1.054, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 2.8057971014492753, |
|
"grad_norm": 0.4328211094942756, |
|
"learning_rate": 1.0969656960567177e-06, |
|
"loss": 1.1024, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.8086956521739133, |
|
"grad_norm": 0.49114261638602824, |
|
"learning_rate": 1.0645806532752156e-06, |
|
"loss": 1.0506, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.8115942028985508, |
|
"grad_norm": 0.43504595478449676, |
|
"learning_rate": 1.0326756984803065e-06, |
|
"loss": 1.0711, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.8144927536231883, |
|
"grad_norm": 0.4348937962062495, |
|
"learning_rate": 1.0012511446801377e-06, |
|
"loss": 1.1078, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 2.8173913043478263, |
|
"grad_norm": 0.44058656927819256, |
|
"learning_rate": 9.70307300169826e-07, |
|
"loss": 1.0991, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 2.8202898550724638, |
|
"grad_norm": 0.4295244566694527, |
|
"learning_rate": 9.39844468528428e-07, |
|
"loss": 0.9995, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 2.8231884057971013, |
|
"grad_norm": 0.4367203092602682, |
|
"learning_rate": 9.09862948615936e-07, |
|
"loss": 1.0519, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 2.8260869565217392, |
|
"grad_norm": 0.4449664564834592, |
|
"learning_rate": 8.803630345703751e-07, |
|
"loss": 1.0474, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.8289855072463768, |
|
"grad_norm": 0.4297347658970927, |
|
"learning_rate": 8.513450158049108e-07, |
|
"loss": 1.0695, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 2.8318840579710143, |
|
"grad_norm": 0.4486135418859604, |
|
"learning_rate": 8.228091770049961e-07, |
|
"loss": 1.0164, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.8347826086956522, |
|
"grad_norm": 0.43980229550927924, |
|
"learning_rate": 7.947557981255904e-07, |
|
"loss": 1.0317, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.8376811594202898, |
|
"grad_norm": 0.44553738280573807, |
|
"learning_rate": 7.671851543884112e-07, |
|
"loss": 1.0946, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 2.8405797101449277, |
|
"grad_norm": 0.4363004911544926, |
|
"learning_rate": 7.400975162792367e-07, |
|
"loss": 1.003, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.8434782608695652, |
|
"grad_norm": 0.4413405166653603, |
|
"learning_rate": 7.134931495452413e-07, |
|
"loss": 1.0882, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.8463768115942027, |
|
"grad_norm": 0.44085985363028396, |
|
"learning_rate": 6.873723151924027e-07, |
|
"loss": 0.9974, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 2.8492753623188407, |
|
"grad_norm": 0.44891911764344156, |
|
"learning_rate": 6.617352694829381e-07, |
|
"loss": 0.9997, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 2.8521739130434782, |
|
"grad_norm": 0.4297742893819775, |
|
"learning_rate": 6.365822639327723e-07, |
|
"loss": 1.0248, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.855072463768116, |
|
"grad_norm": 0.44307938049828505, |
|
"learning_rate": 6.119135453090952e-07, |
|
"loss": 1.0523, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.8579710144927537, |
|
"grad_norm": 0.4219491261370554, |
|
"learning_rate": 5.877293556279306e-07, |
|
"loss": 1.0316, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.860869565217391, |
|
"grad_norm": 0.4441565730933646, |
|
"learning_rate": 5.64029932151755e-07, |
|
"loss": 1.0601, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.8637681159420287, |
|
"grad_norm": 0.43904823016047406, |
|
"learning_rate": 5.408155073871768e-07, |
|
"loss": 1.0962, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.8666666666666667, |
|
"grad_norm": 0.4380193819651974, |
|
"learning_rate": 5.180863090826604e-07, |
|
"loss": 1.0828, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.869565217391304, |
|
"grad_norm": 0.46490668660417783, |
|
"learning_rate": 4.95842560226284e-07, |
|
"loss": 0.9954, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.872463768115942, |
|
"grad_norm": 0.44779443933129964, |
|
"learning_rate": 4.7408447904354614e-07, |
|
"loss": 0.9894, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.8753623188405797, |
|
"grad_norm": 0.44039118698865287, |
|
"learning_rate": 4.52812278995246e-07, |
|
"loss": 0.9391, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.878260869565217, |
|
"grad_norm": 0.44888017878839825, |
|
"learning_rate": 4.3202616877536793e-07, |
|
"loss": 1.044, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.881159420289855, |
|
"grad_norm": 0.4412322695340127, |
|
"learning_rate": 4.117263523090442e-07, |
|
"loss": 1.1098, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.8840579710144927, |
|
"grad_norm": 0.42595193117492713, |
|
"learning_rate": 3.919130287505457e-07, |
|
"loss": 1.0755, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.8869565217391306, |
|
"grad_norm": 0.44081324693289337, |
|
"learning_rate": 3.725863924813389e-07, |
|
"loss": 1.0776, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.889855072463768, |
|
"grad_norm": 0.45676229278822633, |
|
"learning_rate": 3.5374663310818735e-07, |
|
"loss": 1.121, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.8927536231884057, |
|
"grad_norm": 0.42858508933481326, |
|
"learning_rate": 3.3539393546124784e-07, |
|
"loss": 1.0342, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.8956521739130436, |
|
"grad_norm": 0.4554639141142107, |
|
"learning_rate": 3.1752847959232167e-07, |
|
"loss": 1.0403, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.898550724637681, |
|
"grad_norm": 0.4443160110274387, |
|
"learning_rate": 3.0015044077303933e-07, |
|
"loss": 0.9923, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.901449275362319, |
|
"grad_norm": 0.45114283690245177, |
|
"learning_rate": 2.8325998949314536e-07, |
|
"loss": 1.0137, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.9043478260869566, |
|
"grad_norm": 0.440281019286359, |
|
"learning_rate": 2.668572914588496e-07, |
|
"loss": 1.0009, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.907246376811594, |
|
"grad_norm": 0.42131395328506477, |
|
"learning_rate": 2.509425075911953e-07, |
|
"loss": 1.0864, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.9101449275362317, |
|
"grad_norm": 0.4431327301308889, |
|
"learning_rate": 2.3551579402445455e-07, |
|
"loss": 1.0369, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.9130434782608696, |
|
"grad_norm": 0.437441254967641, |
|
"learning_rate": 2.2057730210462979e-07, |
|
"loss": 1.0946, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.915942028985507, |
|
"grad_norm": 0.44460142080563914, |
|
"learning_rate": 2.0612717838794926e-07, |
|
"loss": 1.0682, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.918840579710145, |
|
"grad_norm": 0.46357594598759, |
|
"learning_rate": 1.9216556463943492e-07, |
|
"loss": 1.0347, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.9217391304347826, |
|
"grad_norm": 0.4280959868112658, |
|
"learning_rate": 1.7869259783150905e-07, |
|
"loss": 1.0446, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.92463768115942, |
|
"grad_norm": 0.4391861785357275, |
|
"learning_rate": 1.657084101426565e-07, |
|
"loss": 1.0055, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.927536231884058, |
|
"grad_norm": 0.43467829714626893, |
|
"learning_rate": 1.5321312895612007e-07, |
|
"loss": 1.0468, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.9304347826086956, |
|
"grad_norm": 0.436157471233564, |
|
"learning_rate": 1.4120687685866274e-07, |
|
"loss": 1.003, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.9333333333333336, |
|
"grad_norm": 0.4387651565287021, |
|
"learning_rate": 1.2968977163934638e-07, |
|
"loss": 1.0961, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.936231884057971, |
|
"grad_norm": 0.4544754835767558, |
|
"learning_rate": 1.1866192628839368e-07, |
|
"loss": 1.1016, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.9391304347826086, |
|
"grad_norm": 0.4558428482092103, |
|
"learning_rate": 1.0812344899607252e-07, |
|
"loss": 1.0319, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.942028985507246, |
|
"grad_norm": 0.4298065423481269, |
|
"learning_rate": 9.807444315163006e-08, |
|
"loss": 1.0564, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.944927536231884, |
|
"grad_norm": 0.45987333857679424, |
|
"learning_rate": 8.851500734229357e-08, |
|
"loss": 1.0879, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.9478260869565216, |
|
"grad_norm": 0.42633685574770663, |
|
"learning_rate": 7.944523535228233e-08, |
|
"loss": 1.02, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.9507246376811596, |
|
"grad_norm": 0.42941746517921314, |
|
"learning_rate": 7.086521616190279e-08, |
|
"loss": 1.0368, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.953623188405797, |
|
"grad_norm": 0.4500990712597483, |
|
"learning_rate": 6.27750339466715e-08, |
|
"loss": 1.0091, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.9565217391304346, |
|
"grad_norm": 0.43281524715248404, |
|
"learning_rate": 5.517476807648248e-08, |
|
"loss": 1.0871, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.9594202898550726, |
|
"grad_norm": 0.4404835864216849, |
|
"learning_rate": 4.806449311484107e-08, |
|
"loss": 1.1031, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.96231884057971, |
|
"grad_norm": 0.4292383359696952, |
|
"learning_rate": 4.144427881813129e-08, |
|
"loss": 0.9651, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.965217391304348, |
|
"grad_norm": 0.43976585710369, |
|
"learning_rate": 3.531419013491632e-08, |
|
"loss": 1.0691, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.9681159420289855, |
|
"grad_norm": 0.43252864631461296, |
|
"learning_rate": 2.967428720531129e-08, |
|
"loss": 0.9949, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.971014492753623, |
|
"grad_norm": 0.4477919897543057, |
|
"learning_rate": 2.4524625360400345e-08, |
|
"loss": 1.0986, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.973913043478261, |
|
"grad_norm": 0.4289179803109601, |
|
"learning_rate": 1.986525512168158e-08, |
|
"loss": 1.0116, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.9768115942028985, |
|
"grad_norm": 0.45865303578317895, |
|
"learning_rate": 1.5696222200578535e-08, |
|
"loss": 1.0639, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.9797101449275365, |
|
"grad_norm": 0.43468926771375377, |
|
"learning_rate": 1.2017567497996097e-08, |
|
"loss": 0.9828, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.982608695652174, |
|
"grad_norm": 0.4353013480109291, |
|
"learning_rate": 8.82932710389861e-09, |
|
"loss": 1.0111, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.9855072463768115, |
|
"grad_norm": 0.435625700326904, |
|
"learning_rate": 6.131532296982379e-09, |
|
"loss": 1.0963, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.988405797101449, |
|
"grad_norm": 0.4393642554858853, |
|
"learning_rate": 3.9242095443481345e-09, |
|
"loss": 1.1145, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.991304347826087, |
|
"grad_norm": 0.43072368766038216, |
|
"learning_rate": 2.207380501262346e-09, |
|
"loss": 1.0647, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.9942028985507245, |
|
"grad_norm": 0.45588828392520236, |
|
"learning_rate": 9.810620109129698e-10, |
|
"loss": 1.0432, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.9971014492753625, |
|
"grad_norm": 0.4459564292216382, |
|
"learning_rate": 2.452661042817717e-10, |
|
"loss": 1.1399, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.4383463614226025, |
|
"learning_rate": 0.0, |
|
"loss": 0.9416, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1035, |
|
"total_flos": 238917794807808.0, |
|
"train_loss": 1.2324695289422924, |
|
"train_runtime": 15380.0554, |
|
"train_samples_per_second": 2.148, |
|
"train_steps_per_second": 0.067 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1035, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 238917794807808.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|