|
{ |
|
"best_metric": 1.0085912942886353, |
|
"best_model_checkpoint": "/root/finetuning_executions/dynamtests_01_codet5p_src_fm_fc_dctx/checkpoint-39484", |
|
"epoch": 3.999797396545611, |
|
"eval_steps": 500, |
|
"global_step": 39484, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025325431798612165, |
|
"grad_norm": 1.3758184909820557, |
|
"learning_rate": 2.462006079027356e-06, |
|
"loss": 2.5021, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05065086359722433, |
|
"grad_norm": 1.0281180143356323, |
|
"learning_rate": 4.994934143870315e-06, |
|
"loss": 1.2333, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0759762953958365, |
|
"grad_norm": 1.0643247365951538, |
|
"learning_rate": 7.527862208713273e-06, |
|
"loss": 1.1968, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.10130172719444866, |
|
"grad_norm": 1.0396573543548584, |
|
"learning_rate": 1.006079027355623e-05, |
|
"loss": 1.1765, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12662715899306085, |
|
"grad_norm": 1.0461889505386353, |
|
"learning_rate": 1.259371833839919e-05, |
|
"loss": 1.154, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.151952590791673, |
|
"grad_norm": 0.9765501022338867, |
|
"learning_rate": 1.5126646403242148e-05, |
|
"loss": 1.1434, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17727802259028516, |
|
"grad_norm": 1.5702638626098633, |
|
"learning_rate": 1.765957446808511e-05, |
|
"loss": 1.1293, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.20260345438889732, |
|
"grad_norm": 0.9935035705566406, |
|
"learning_rate": 1.998986936816849e-05, |
|
"loss": 1.1071, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2279288861875095, |
|
"grad_norm": 0.9715584516525269, |
|
"learning_rate": 1.985657158091176e-05, |
|
"loss": 1.0968, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.2532543179861217, |
|
"grad_norm": 0.9306278228759766, |
|
"learning_rate": 1.9723273793655026e-05, |
|
"loss": 1.0923, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2785797497847338, |
|
"grad_norm": 1.0911701917648315, |
|
"learning_rate": 1.9589976006398295e-05, |
|
"loss": 1.0749, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.303905181583346, |
|
"grad_norm": 1.024587869644165, |
|
"learning_rate": 1.9456678219141565e-05, |
|
"loss": 1.0695, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.32923061338195814, |
|
"grad_norm": 0.9198426008224487, |
|
"learning_rate": 1.932338043188483e-05, |
|
"loss": 1.0677, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.3545560451805703, |
|
"grad_norm": 0.97704017162323, |
|
"learning_rate": 1.91900826446281e-05, |
|
"loss": 1.0551, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.3798814769791825, |
|
"grad_norm": 0.8315013647079468, |
|
"learning_rate": 1.905678485737137e-05, |
|
"loss": 1.0388, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.40520690877779464, |
|
"grad_norm": 0.8992215991020203, |
|
"learning_rate": 1.8923487070114637e-05, |
|
"loss": 1.0451, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4305323405764068, |
|
"grad_norm": 1.0379681587219238, |
|
"learning_rate": 1.8790189282857906e-05, |
|
"loss": 1.0355, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.455857772375019, |
|
"grad_norm": 0.9491779804229736, |
|
"learning_rate": 1.8656891495601176e-05, |
|
"loss": 1.0374, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.48118320417363114, |
|
"grad_norm": 0.8982920050621033, |
|
"learning_rate": 1.8523593708344442e-05, |
|
"loss": 1.0267, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.5065086359722434, |
|
"grad_norm": 0.8307796716690063, |
|
"learning_rate": 1.8390295921087712e-05, |
|
"loss": 1.0161, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5318340677708555, |
|
"grad_norm": 0.9225348830223083, |
|
"learning_rate": 1.825699813383098e-05, |
|
"loss": 1.0142, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.5571594995694676, |
|
"grad_norm": 0.8506412506103516, |
|
"learning_rate": 1.8123700346574248e-05, |
|
"loss": 1.0091, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5824849313680798, |
|
"grad_norm": 0.9221129417419434, |
|
"learning_rate": 1.7990402559317517e-05, |
|
"loss": 1.0004, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.607810363166692, |
|
"grad_norm": 0.9478843212127686, |
|
"learning_rate": 1.7857104772060784e-05, |
|
"loss": 1.0046, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6331357949653041, |
|
"grad_norm": 2.4678380489349365, |
|
"learning_rate": 1.7723806984804053e-05, |
|
"loss": 1.0008, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.6584612267639163, |
|
"grad_norm": 0.8661640286445618, |
|
"learning_rate": 1.7590509197547323e-05, |
|
"loss": 1.0027, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.6837866585625285, |
|
"grad_norm": 0.8918209671974182, |
|
"learning_rate": 1.7457211410290592e-05, |
|
"loss": 0.9995, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.7091120903611406, |
|
"grad_norm": 0.8873420357704163, |
|
"learning_rate": 1.732391362303386e-05, |
|
"loss": 0.9932, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7344375221597528, |
|
"grad_norm": 0.8874693512916565, |
|
"learning_rate": 1.719061583577713e-05, |
|
"loss": 0.9785, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.759762953958365, |
|
"grad_norm": 0.8625825643539429, |
|
"learning_rate": 1.7057318048520395e-05, |
|
"loss": 0.9817, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.7850883857569771, |
|
"grad_norm": 0.9080167412757874, |
|
"learning_rate": 1.6924020261263664e-05, |
|
"loss": 0.9783, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.8104138175555893, |
|
"grad_norm": 0.9515653848648071, |
|
"learning_rate": 1.6790722474006934e-05, |
|
"loss": 0.984, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8357392493542015, |
|
"grad_norm": 0.8340937495231628, |
|
"learning_rate": 1.66574246867502e-05, |
|
"loss": 0.9819, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.8610646811528136, |
|
"grad_norm": 0.9471110701560974, |
|
"learning_rate": 1.652412689949347e-05, |
|
"loss": 0.974, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.8863901129514258, |
|
"grad_norm": 0.8862300515174866, |
|
"learning_rate": 1.639082911223674e-05, |
|
"loss": 0.9653, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.911715544750038, |
|
"grad_norm": 0.8293271064758301, |
|
"learning_rate": 1.6257531324980006e-05, |
|
"loss": 0.9615, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.9370409765486502, |
|
"grad_norm": 0.9127717614173889, |
|
"learning_rate": 1.6124233537723275e-05, |
|
"loss": 0.9671, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.9623664083472623, |
|
"grad_norm": 0.8955530524253845, |
|
"learning_rate": 1.599093575046654e-05, |
|
"loss": 0.9606, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.9876918401458745, |
|
"grad_norm": 0.9058429002761841, |
|
"learning_rate": 1.585763796320981e-05, |
|
"loss": 0.9586, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.9999493491364028, |
|
"eval_loss": 1.0199180841445923, |
|
"eval_runtime": 1639.424, |
|
"eval_samples_per_second": 40.384, |
|
"eval_steps_per_second": 1.262, |
|
"step": 9871 |
|
}, |
|
{ |
|
"epoch": 1.0130172719444868, |
|
"grad_norm": 0.8560991287231445, |
|
"learning_rate": 1.572434017595308e-05, |
|
"loss": 0.939, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0383427037430988, |
|
"grad_norm": 0.8284989595413208, |
|
"learning_rate": 1.559104238869635e-05, |
|
"loss": 0.9317, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.063668135541711, |
|
"grad_norm": 0.8807125091552734, |
|
"learning_rate": 1.5457744601439617e-05, |
|
"loss": 0.9363, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.088993567340323, |
|
"grad_norm": 0.8796108961105347, |
|
"learning_rate": 1.5324446814182886e-05, |
|
"loss": 0.9236, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 1.1143189991389353, |
|
"grad_norm": 0.8540758490562439, |
|
"learning_rate": 1.5191149026926156e-05, |
|
"loss": 0.9233, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.1396444309375475, |
|
"grad_norm": 0.8657658696174622, |
|
"learning_rate": 1.5057851239669424e-05, |
|
"loss": 0.9346, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 1.1649698627361595, |
|
"grad_norm": 0.8995893001556396, |
|
"learning_rate": 1.4924553452412692e-05, |
|
"loss": 0.917, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.1902952945347718, |
|
"grad_norm": 0.9290043115615845, |
|
"learning_rate": 1.479125566515596e-05, |
|
"loss": 0.9338, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 1.215620726333384, |
|
"grad_norm": 0.8952407836914062, |
|
"learning_rate": 1.4657957877899228e-05, |
|
"loss": 0.9257, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.240946158131996, |
|
"grad_norm": 0.8839919567108154, |
|
"learning_rate": 1.4524660090642497e-05, |
|
"loss": 0.9104, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 1.2662715899306083, |
|
"grad_norm": 0.9677265286445618, |
|
"learning_rate": 1.4391362303385765e-05, |
|
"loss": 0.9259, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.2915970217292205, |
|
"grad_norm": 0.9325098395347595, |
|
"learning_rate": 1.4258064516129033e-05, |
|
"loss": 0.9203, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 1.3169224535278325, |
|
"grad_norm": 0.778640866279602, |
|
"learning_rate": 1.4124766728872301e-05, |
|
"loss": 0.912, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.3422478853264448, |
|
"grad_norm": 0.8638414144515991, |
|
"learning_rate": 1.399146894161557e-05, |
|
"loss": 0.9181, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.367573317125057, |
|
"grad_norm": 1.0181560516357422, |
|
"learning_rate": 1.3858171154358839e-05, |
|
"loss": 0.9146, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.392898748923669, |
|
"grad_norm": 0.9884174466133118, |
|
"learning_rate": 1.3724873367102107e-05, |
|
"loss": 0.9037, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.4182241807222813, |
|
"grad_norm": 1.1058709621429443, |
|
"learning_rate": 1.3591575579845375e-05, |
|
"loss": 0.9159, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.4435496125208935, |
|
"grad_norm": 1.0129822492599487, |
|
"learning_rate": 1.3458277792588642e-05, |
|
"loss": 0.8993, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 1.4688750443195056, |
|
"grad_norm": 0.8782840967178345, |
|
"learning_rate": 1.3324980005331914e-05, |
|
"loss": 0.9061, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.4942004761181178, |
|
"grad_norm": 0.9015256762504578, |
|
"learning_rate": 1.3191682218075182e-05, |
|
"loss": 0.9122, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 1.51952590791673, |
|
"grad_norm": 0.9482327103614807, |
|
"learning_rate": 1.305838443081845e-05, |
|
"loss": 0.9091, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.544851339715342, |
|
"grad_norm": 0.8400648236274719, |
|
"learning_rate": 1.2925086643561718e-05, |
|
"loss": 0.9064, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 1.5701767715139543, |
|
"grad_norm": 0.9606112837791443, |
|
"learning_rate": 1.2791788856304987e-05, |
|
"loss": 0.8983, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.5955022033125665, |
|
"grad_norm": 0.944854736328125, |
|
"learning_rate": 1.2658491069048255e-05, |
|
"loss": 0.9055, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 1.6208276351111786, |
|
"grad_norm": 0.8674355745315552, |
|
"learning_rate": 1.2525193281791523e-05, |
|
"loss": 0.8895, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.6461530669097908, |
|
"grad_norm": 0.8632267713546753, |
|
"learning_rate": 1.2391895494534791e-05, |
|
"loss": 0.8876, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 1.671478498708403, |
|
"grad_norm": 0.903851330280304, |
|
"learning_rate": 1.2258597707278059e-05, |
|
"loss": 0.9007, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.696803930507015, |
|
"grad_norm": 0.9242746829986572, |
|
"learning_rate": 1.2125833111170356e-05, |
|
"loss": 0.8953, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 1.7221293623056273, |
|
"grad_norm": 0.9627535343170166, |
|
"learning_rate": 1.1992535323913624e-05, |
|
"loss": 0.8881, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.7474547941042395, |
|
"grad_norm": 0.8524439334869385, |
|
"learning_rate": 1.1859237536656893e-05, |
|
"loss": 0.8913, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 1.7727802259028516, |
|
"grad_norm": 0.9666581749916077, |
|
"learning_rate": 1.1726472940549186e-05, |
|
"loss": 0.8972, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.7981056577014638, |
|
"grad_norm": 0.8809413909912109, |
|
"learning_rate": 1.1593175153292454e-05, |
|
"loss": 0.8858, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 1.823431089500076, |
|
"grad_norm": 0.903626024723053, |
|
"learning_rate": 1.1459877366035726e-05, |
|
"loss": 0.8907, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.848756521298688, |
|
"grad_norm": 0.8203657865524292, |
|
"learning_rate": 1.1326579578778994e-05, |
|
"loss": 0.8849, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 1.8740819530973003, |
|
"grad_norm": 0.8978894948959351, |
|
"learning_rate": 1.1193281791522262e-05, |
|
"loss": 0.8831, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.8994073848959125, |
|
"grad_norm": 0.9283676743507385, |
|
"learning_rate": 1.105998400426553e-05, |
|
"loss": 0.8828, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 1.9247328166945246, |
|
"grad_norm": 0.9514400959014893, |
|
"learning_rate": 1.09266862170088e-05, |
|
"loss": 0.8761, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.9500582484931368, |
|
"grad_norm": 0.8809083104133606, |
|
"learning_rate": 1.0793388429752067e-05, |
|
"loss": 0.8861, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 1.975383680291749, |
|
"grad_norm": 0.8767380118370056, |
|
"learning_rate": 1.0660090642495335e-05, |
|
"loss": 0.8811, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.0132982730865479, |
|
"eval_runtime": 1638.7739, |
|
"eval_samples_per_second": 40.4, |
|
"eval_steps_per_second": 1.263, |
|
"step": 19743 |
|
}, |
|
{ |
|
"epoch": 2.000709112090361, |
|
"grad_norm": 0.9499657154083252, |
|
"learning_rate": 1.0526792855238603e-05, |
|
"loss": 0.8821, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 2.0260345438889735, |
|
"grad_norm": 0.7973400950431824, |
|
"learning_rate": 1.0393495067981871e-05, |
|
"loss": 0.8608, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.0513599756875855, |
|
"grad_norm": 0.9120876789093018, |
|
"learning_rate": 1.0260197280725142e-05, |
|
"loss": 0.8574, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 2.0766854074861976, |
|
"grad_norm": 0.8534213304519653, |
|
"learning_rate": 1.012689949346841e-05, |
|
"loss": 0.8604, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.1020108392848096, |
|
"grad_norm": 0.8855152726173401, |
|
"learning_rate": 9.993601706211678e-06, |
|
"loss": 0.8696, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 2.127336271083422, |
|
"grad_norm": 0.9473037123680115, |
|
"learning_rate": 9.860303918954946e-06, |
|
"loss": 0.859, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.152661702882034, |
|
"grad_norm": 0.9424493908882141, |
|
"learning_rate": 9.727006131698214e-06, |
|
"loss": 0.8577, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 2.177987134680646, |
|
"grad_norm": 0.8066829442977905, |
|
"learning_rate": 9.593708344441484e-06, |
|
"loss": 0.8611, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.2033125664792585, |
|
"grad_norm": 0.9358799457550049, |
|
"learning_rate": 9.460410557184752e-06, |
|
"loss": 0.8572, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 2.2286379982778706, |
|
"grad_norm": 0.9982028007507324, |
|
"learning_rate": 9.32711276992802e-06, |
|
"loss": 0.8492, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.2539634300764826, |
|
"grad_norm": 0.8830463290214539, |
|
"learning_rate": 9.19381498267129e-06, |
|
"loss": 0.8539, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 2.279288861875095, |
|
"grad_norm": 0.9708883762359619, |
|
"learning_rate": 9.061050386563584e-06, |
|
"loss": 0.8495, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.304614293673707, |
|
"grad_norm": 0.8464154005050659, |
|
"learning_rate": 8.927752599306852e-06, |
|
"loss": 0.8519, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 2.329939725472319, |
|
"grad_norm": 0.9446752667427063, |
|
"learning_rate": 8.79445481205012e-06, |
|
"loss": 0.8545, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.3552651572709316, |
|
"grad_norm": 0.9621785283088684, |
|
"learning_rate": 8.66115702479339e-06, |
|
"loss": 0.8482, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 2.3805905890695436, |
|
"grad_norm": 0.9067039489746094, |
|
"learning_rate": 8.527859237536658e-06, |
|
"loss": 0.8582, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.4059160208681556, |
|
"grad_norm": 0.9858378767967224, |
|
"learning_rate": 8.394561450279926e-06, |
|
"loss": 0.8565, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 2.431241452666768, |
|
"grad_norm": 0.9362533092498779, |
|
"learning_rate": 8.261263663023195e-06, |
|
"loss": 0.8531, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.45656688446538, |
|
"grad_norm": 0.9225192666053772, |
|
"learning_rate": 8.127965875766463e-06, |
|
"loss": 0.8555, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 2.481892316263992, |
|
"grad_norm": 0.9358901381492615, |
|
"learning_rate": 7.994668088509731e-06, |
|
"loss": 0.8523, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.5072177480626046, |
|
"grad_norm": 0.9531691670417786, |
|
"learning_rate": 7.861370301252999e-06, |
|
"loss": 0.8443, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 2.5325431798612166, |
|
"grad_norm": 0.9370359182357788, |
|
"learning_rate": 7.728072513996269e-06, |
|
"loss": 0.8451, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.5578686116598286, |
|
"grad_norm": 0.8840625882148743, |
|
"learning_rate": 7.5947747267395365e-06, |
|
"loss": 0.8533, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 2.583194043458441, |
|
"grad_norm": 0.9283475875854492, |
|
"learning_rate": 7.461476939482805e-06, |
|
"loss": 0.8425, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.608519475257053, |
|
"grad_norm": 0.908301055431366, |
|
"learning_rate": 7.328179152226073e-06, |
|
"loss": 0.8433, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 2.633844907055665, |
|
"grad_norm": 0.9126138091087341, |
|
"learning_rate": 7.194881364969342e-06, |
|
"loss": 0.8401, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.6591703388542776, |
|
"grad_norm": 0.8935621976852417, |
|
"learning_rate": 7.061583577712611e-06, |
|
"loss": 0.8418, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 2.6844957706528896, |
|
"grad_norm": 0.8745056986808777, |
|
"learning_rate": 6.928285790455879e-06, |
|
"loss": 0.837, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.7098212024515016, |
|
"grad_norm": 0.948512077331543, |
|
"learning_rate": 6.795521194348175e-06, |
|
"loss": 0.8411, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 2.735146634250114, |
|
"grad_norm": 1.008754014968872, |
|
"learning_rate": 6.6622234070914425e-06, |
|
"loss": 0.8355, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.760472066048726, |
|
"grad_norm": 1.0162386894226074, |
|
"learning_rate": 6.528925619834712e-06, |
|
"loss": 0.8405, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 2.785797497847338, |
|
"grad_norm": 0.9260863661766052, |
|
"learning_rate": 6.39562783257798e-06, |
|
"loss": 0.8354, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.8111229296459506, |
|
"grad_norm": 0.9513674378395081, |
|
"learning_rate": 6.262330045321248e-06, |
|
"loss": 0.8392, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 2.8364483614445626, |
|
"grad_norm": 1.0211256742477417, |
|
"learning_rate": 6.129032258064517e-06, |
|
"loss": 0.8324, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.8617737932431746, |
|
"grad_norm": 0.9345864057540894, |
|
"learning_rate": 5.995734470807785e-06, |
|
"loss": 0.8412, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 2.887099225041787, |
|
"grad_norm": 0.8973652124404907, |
|
"learning_rate": 5.8624366835510535e-06, |
|
"loss": 0.8368, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.912424656840399, |
|
"grad_norm": 0.8682575225830078, |
|
"learning_rate": 5.7291388962943215e-06, |
|
"loss": 0.8316, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 2.937750088639011, |
|
"grad_norm": 0.9307655096054077, |
|
"learning_rate": 5.595841109037591e-06, |
|
"loss": 0.8365, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.9630755204376236, |
|
"grad_norm": 1.0200505256652832, |
|
"learning_rate": 5.462543321780859e-06, |
|
"loss": 0.8395, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 2.9884009522362356, |
|
"grad_norm": 1.0397480726242065, |
|
"learning_rate": 5.329245534524128e-06, |
|
"loss": 0.834, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.999949349136403, |
|
"eval_loss": 1.0098419189453125, |
|
"eval_runtime": 1638.8769, |
|
"eval_samples_per_second": 40.398, |
|
"eval_steps_per_second": 1.262, |
|
"step": 29614 |
|
}, |
|
{ |
|
"epoch": 3.0137263840348476, |
|
"grad_norm": 1.0044381618499756, |
|
"learning_rate": 5.195947747267396e-06, |
|
"loss": 0.8305, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 3.03905181583346, |
|
"grad_norm": 1.0036746263504028, |
|
"learning_rate": 5.062649960010664e-06, |
|
"loss": 0.8284, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.064377247632072, |
|
"grad_norm": 0.8896681070327759, |
|
"learning_rate": 4.9293521727539325e-06, |
|
"loss": 0.8296, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 3.089702679430684, |
|
"grad_norm": 0.9313392639160156, |
|
"learning_rate": 4.796587576646228e-06, |
|
"loss": 0.8249, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.1150281112292966, |
|
"grad_norm": 0.9054111838340759, |
|
"learning_rate": 4.663822980538523e-06, |
|
"loss": 0.8217, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 3.1403535430279086, |
|
"grad_norm": 0.9691897630691528, |
|
"learning_rate": 4.530525193281792e-06, |
|
"loss": 0.8182, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.1656789748265206, |
|
"grad_norm": 1.0348809957504272, |
|
"learning_rate": 4.39722740602506e-06, |
|
"loss": 0.8205, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 3.191004406625133, |
|
"grad_norm": 0.8919842839241028, |
|
"learning_rate": 4.263929618768329e-06, |
|
"loss": 0.8142, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.216329838423745, |
|
"grad_norm": 0.8951621651649475, |
|
"learning_rate": 4.130631831511598e-06, |
|
"loss": 0.8264, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 3.241655270222357, |
|
"grad_norm": 0.9754297733306885, |
|
"learning_rate": 3.9973340442548655e-06, |
|
"loss": 0.8176, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.2669807020209696, |
|
"grad_norm": 1.0148935317993164, |
|
"learning_rate": 3.864036256998134e-06, |
|
"loss": 0.8252, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 3.2923061338195816, |
|
"grad_norm": 0.9025924801826477, |
|
"learning_rate": 3.7307384697414027e-06, |
|
"loss": 0.816, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 3.3176315656181936, |
|
"grad_norm": 0.9092098474502563, |
|
"learning_rate": 3.597440682484671e-06, |
|
"loss": 0.8201, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 3.342956997416806, |
|
"grad_norm": 0.9289584755897522, |
|
"learning_rate": 3.4641428952279394e-06, |
|
"loss": 0.8161, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.368282429215418, |
|
"grad_norm": 0.9961521029472351, |
|
"learning_rate": 3.3308451079712077e-06, |
|
"loss": 0.8188, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 3.39360786101403, |
|
"grad_norm": 1.0093194246292114, |
|
"learning_rate": 3.1975473207144765e-06, |
|
"loss": 0.8188, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.4189332928126426, |
|
"grad_norm": 1.016876220703125, |
|
"learning_rate": 3.064249533457745e-06, |
|
"loss": 0.8119, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 3.4442587246112546, |
|
"grad_norm": 1.0821869373321533, |
|
"learning_rate": 2.9309517462010133e-06, |
|
"loss": 0.8221, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.4695841564098666, |
|
"grad_norm": 0.9766045808792114, |
|
"learning_rate": 2.797653958944282e-06, |
|
"loss": 0.8132, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 3.494909588208479, |
|
"grad_norm": 0.8767127990722656, |
|
"learning_rate": 2.6643561716875504e-06, |
|
"loss": 0.8166, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.520235020007091, |
|
"grad_norm": 0.9653995633125305, |
|
"learning_rate": 2.5310583844308183e-06, |
|
"loss": 0.8074, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 3.545560451805703, |
|
"grad_norm": 0.8945389986038208, |
|
"learning_rate": 2.397760597174087e-06, |
|
"loss": 0.8176, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.5708858836043156, |
|
"grad_norm": 0.9447450637817383, |
|
"learning_rate": 2.2649960010663825e-06, |
|
"loss": 0.8235, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 3.5962113154029276, |
|
"grad_norm": 1.0400015115737915, |
|
"learning_rate": 2.131698213809651e-06, |
|
"loss": 0.8136, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.6215367472015396, |
|
"grad_norm": 0.9300839900970459, |
|
"learning_rate": 1.9984004265529192e-06, |
|
"loss": 0.8197, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 3.646862179000152, |
|
"grad_norm": 0.9101824164390564, |
|
"learning_rate": 1.865102639296188e-06, |
|
"loss": 0.8078, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.672187610798764, |
|
"grad_norm": 0.9514500498771667, |
|
"learning_rate": 1.7318048520394562e-06, |
|
"loss": 0.8084, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 3.697513042597376, |
|
"grad_norm": 0.9441540241241455, |
|
"learning_rate": 1.5985070647827248e-06, |
|
"loss": 0.8213, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.7228384743959886, |
|
"grad_norm": 1.0184293985366821, |
|
"learning_rate": 1.4652092775259933e-06, |
|
"loss": 0.8094, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 3.7481639061946006, |
|
"grad_norm": 0.991316556930542, |
|
"learning_rate": 1.3319114902692617e-06, |
|
"loss": 0.8106, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.7734893379932126, |
|
"grad_norm": 0.9887702465057373, |
|
"learning_rate": 1.19861370301253e-06, |
|
"loss": 0.8185, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 3.798814769791825, |
|
"grad_norm": 0.9897658228874207, |
|
"learning_rate": 1.0653159157557984e-06, |
|
"loss": 0.8069, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.824140201590437, |
|
"grad_norm": 0.9137114882469177, |
|
"learning_rate": 9.32018128499067e-07, |
|
"loss": 0.8114, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 3.849465633389049, |
|
"grad_norm": 0.9579175710678101, |
|
"learning_rate": 7.987203412423355e-07, |
|
"loss": 0.819, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.8747910651876616, |
|
"grad_norm": 0.9389879107475281, |
|
"learning_rate": 6.654225539856039e-07, |
|
"loss": 0.8165, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 3.9001164969862736, |
|
"grad_norm": 0.9765516519546509, |
|
"learning_rate": 5.321247667288723e-07, |
|
"loss": 0.8191, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.9254419287848856, |
|
"grad_norm": 1.0299735069274902, |
|
"learning_rate": 3.9882697947214085e-07, |
|
"loss": 0.8182, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 3.950767360583498, |
|
"grad_norm": 0.9844255447387695, |
|
"learning_rate": 2.6552919221540927e-07, |
|
"loss": 0.8175, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.97609279238211, |
|
"grad_norm": 0.9425697922706604, |
|
"learning_rate": 1.3223140495867768e-07, |
|
"loss": 0.813, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 3.999797396545611, |
|
"eval_loss": 1.0085912942886353, |
|
"eval_runtime": 1638.7267, |
|
"eval_samples_per_second": 40.401, |
|
"eval_steps_per_second": 1.263, |
|
"step": 39484 |
|
}, |
|
{ |
|
"epoch": 3.999797396545611, |
|
"step": 39484, |
|
"total_flos": 3.077643948911493e+18, |
|
"train_loss": 0.9114745237365729, |
|
"train_runtime": 159688.734, |
|
"train_samples_per_second": 15.825, |
|
"train_steps_per_second": 0.247 |
|
} |
|
], |
|
"logging_steps": 250, |
|
"max_steps": 39484, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 3.077643948911493e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|