diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.8505146128020254, + "epoch": 1.0006054268259124, "eval_steps": 500, - "global_step": 15453, + "global_step": 18180, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -108178,6 +108178,19095 @@ "learning_rate": 6.18751132978102e-06, "loss": 0.7619, "step": 15453 + }, + { + "epoch": 0.8505696516043811, + "grad_norm": 0.7532587647438049, + "learning_rate": 6.1870902614890384e-06, + "loss": 0.6929, + "step": 15454 + }, + { + "epoch": 0.8506246904067367, + "grad_norm": 0.5978666543960571, + "learning_rate": 6.186669184275204e-06, + "loss": 0.6246, + "step": 15455 + }, + { + "epoch": 0.8506797292090924, + "grad_norm": 0.8646023869514465, + "learning_rate": 6.186248098142681e-06, + "loss": 0.8215, + "step": 15456 + }, + { + "epoch": 0.8507347680114481, + "grad_norm": 0.635597825050354, + "learning_rate": 6.1858270030946355e-06, + "loss": 0.7683, + "step": 15457 + }, + { + "epoch": 0.8507898068138038, + "grad_norm": 0.7014510631561279, + "learning_rate": 6.185405899134231e-06, + "loss": 0.708, + "step": 15458 + }, + { + "epoch": 0.8508448456161594, + "grad_norm": 1.6040544509887695, + "learning_rate": 6.184984786264633e-06, + "loss": 0.893, + "step": 15459 + }, + { + "epoch": 0.850899884418515, + "grad_norm": 0.8098211288452148, + "learning_rate": 6.184563664489007e-06, + "loss": 0.7704, + "step": 15460 + }, + { + "epoch": 0.8509549232208707, + "grad_norm": 0.8390217423439026, + "learning_rate": 6.184142533810518e-06, + "loss": 0.7161, + "step": 15461 + }, + { + "epoch": 0.8510099620232264, + "grad_norm": 0.792433500289917, + "learning_rate": 6.183721394232329e-06, + "loss": 0.7247, + "step": 15462 + }, + { + "epoch": 0.851065000825582, + "grad_norm": 0.6644124984741211, + "learning_rate": 6.183300245757609e-06, + "loss": 0.7996, + "step": 15463 + }, + { + "epoch": 0.8511200396279377, + "grad_norm": 0.7543407082557678, + "learning_rate": 6.182879088389521e-06, + "loss": 0.7912, + "step": 15464 + }, + { + "epoch": 0.8511750784302934, + "grad_norm": 0.7752966284751892, + "learning_rate": 6.18245792213123e-06, + "loss": 0.7222, + "step": 15465 + }, + { + "epoch": 0.851230117232649, + "grad_norm": 0.6863895654678345, + "learning_rate": 6.182036746985901e-06, + "loss": 0.7375, + "step": 15466 + }, + { + "epoch": 0.8512851560350047, + "grad_norm": 0.6404759287834167, + "learning_rate": 6.1816155629567006e-06, + "loss": 0.7666, + "step": 15467 + }, + { + "epoch": 0.8513401948373603, + "grad_norm": 0.6879389882087708, + "learning_rate": 6.181194370046795e-06, + "loss": 0.697, + "step": 15468 + }, + { + "epoch": 0.851395233639716, + "grad_norm": 0.6798561215400696, + "learning_rate": 6.180773168259347e-06, + "loss": 0.7401, + "step": 15469 + }, + { + "epoch": 0.8514502724420716, + "grad_norm": 0.6846516728401184, + "learning_rate": 6.180351957597524e-06, + "loss": 0.7512, + "step": 15470 + }, + { + "epoch": 0.8515053112444273, + "grad_norm": 0.6618537902832031, + "learning_rate": 6.1799307380644925e-06, + "loss": 0.7577, + "step": 15471 + }, + { + "epoch": 0.851560350046783, + "grad_norm": 0.7780229449272156, + "learning_rate": 6.179509509663417e-06, + "loss": 0.7145, + "step": 15472 + }, + { + "epoch": 0.8516153888491387, + "grad_norm": 0.8041914701461792, + "learning_rate": 6.179088272397464e-06, + "loss": 0.7662, + "step": 15473 + }, + { + "epoch": 0.8516704276514943, + "grad_norm": 0.6719852685928345, + "learning_rate": 6.178667026269799e-06, + "loss": 0.7269, + "step": 15474 + }, + { + "epoch": 0.8517254664538499, + "grad_norm": 0.6677427291870117, + "learning_rate": 6.178245771283589e-06, + "loss": 0.6949, + "step": 15475 + }, + { + "epoch": 0.8517805052562056, + "grad_norm": 0.7873098850250244, + "learning_rate": 6.177824507441998e-06, + "loss": 0.7435, + "step": 15476 + }, + { + "epoch": 0.8518355440585613, + "grad_norm": 0.726902186870575, + "learning_rate": 6.1774032347481935e-06, + "loss": 0.8712, + "step": 15477 + }, + { + "epoch": 0.8518905828609169, + "grad_norm": 0.6984882950782776, + "learning_rate": 6.176981953205342e-06, + "loss": 0.8705, + "step": 15478 + }, + { + "epoch": 0.8519456216632726, + "grad_norm": 0.9757348895072937, + "learning_rate": 6.176560662816609e-06, + "loss": 0.8172, + "step": 15479 + }, + { + "epoch": 0.8520006604656283, + "grad_norm": 0.8358891606330872, + "learning_rate": 6.1761393635851615e-06, + "loss": 0.7576, + "step": 15480 + }, + { + "epoch": 0.852055699267984, + "grad_norm": 0.7324516177177429, + "learning_rate": 6.175718055514165e-06, + "loss": 0.6785, + "step": 15481 + }, + { + "epoch": 0.8521107380703395, + "grad_norm": 0.7352122664451599, + "learning_rate": 6.175296738606785e-06, + "loss": 0.6998, + "step": 15482 + }, + { + "epoch": 0.8521657768726952, + "grad_norm": 0.7331148982048035, + "learning_rate": 6.17487541286619e-06, + "loss": 0.7318, + "step": 15483 + }, + { + "epoch": 0.8522208156750509, + "grad_norm": 0.7264272570610046, + "learning_rate": 6.174454078295547e-06, + "loss": 0.8328, + "step": 15484 + }, + { + "epoch": 0.8522758544774066, + "grad_norm": 0.6304524540901184, + "learning_rate": 6.174032734898021e-06, + "loss": 0.6982, + "step": 15485 + }, + { + "epoch": 0.8523308932797622, + "grad_norm": 0.709864616394043, + "learning_rate": 6.173611382676778e-06, + "loss": 0.784, + "step": 15486 + }, + { + "epoch": 0.8523859320821179, + "grad_norm": 0.689153254032135, + "learning_rate": 6.173190021634987e-06, + "loss": 0.596, + "step": 15487 + }, + { + "epoch": 0.8524409708844736, + "grad_norm": 0.6987396478652954, + "learning_rate": 6.172768651775815e-06, + "loss": 0.776, + "step": 15488 + }, + { + "epoch": 0.8524960096868293, + "grad_norm": 0.6473208665847778, + "learning_rate": 6.172347273102427e-06, + "loss": 0.7084, + "step": 15489 + }, + { + "epoch": 0.8525510484891848, + "grad_norm": 0.7659436464309692, + "learning_rate": 6.17192588561799e-06, + "loss": 0.6916, + "step": 15490 + }, + { + "epoch": 0.8526060872915405, + "grad_norm": 0.6404485702514648, + "learning_rate": 6.171504489325673e-06, + "loss": 0.6761, + "step": 15491 + }, + { + "epoch": 0.8526611260938962, + "grad_norm": 0.7309281826019287, + "learning_rate": 6.171083084228641e-06, + "loss": 0.7993, + "step": 15492 + }, + { + "epoch": 0.8527161648962519, + "grad_norm": 0.6718485355377197, + "learning_rate": 6.170661670330062e-06, + "loss": 0.719, + "step": 15493 + }, + { + "epoch": 0.8527712036986075, + "grad_norm": 0.7705026865005493, + "learning_rate": 6.170240247633101e-06, + "loss": 0.7148, + "step": 15494 + }, + { + "epoch": 0.8528262425009632, + "grad_norm": 0.9131157398223877, + "learning_rate": 6.169818816140931e-06, + "loss": 0.7439, + "step": 15495 + }, + { + "epoch": 0.8528812813033189, + "grad_norm": 0.7396848201751709, + "learning_rate": 6.169397375856715e-06, + "loss": 0.7745, + "step": 15496 + }, + { + "epoch": 0.8529363201056746, + "grad_norm": 0.613767147064209, + "learning_rate": 6.168975926783621e-06, + "loss": 0.6724, + "step": 15497 + }, + { + "epoch": 0.8529913589080301, + "grad_norm": 0.8665019869804382, + "learning_rate": 6.168554468924815e-06, + "loss": 0.6896, + "step": 15498 + }, + { + "epoch": 0.8530463977103858, + "grad_norm": 0.7870811223983765, + "learning_rate": 6.168133002283469e-06, + "loss": 0.7177, + "step": 15499 + }, + { + "epoch": 0.8531014365127415, + "grad_norm": 0.5830097794532776, + "learning_rate": 6.167711526862747e-06, + "loss": 0.6465, + "step": 15500 + }, + { + "epoch": 0.8531564753150972, + "grad_norm": 0.6497567892074585, + "learning_rate": 6.167290042665819e-06, + "loss": 0.6842, + "step": 15501 + }, + { + "epoch": 0.8532115141174528, + "grad_norm": 0.6574105620384216, + "learning_rate": 6.1668685496958515e-06, + "loss": 0.7197, + "step": 15502 + }, + { + "epoch": 0.8532665529198085, + "grad_norm": 0.7069656252861023, + "learning_rate": 6.166447047956011e-06, + "loss": 0.8031, + "step": 15503 + }, + { + "epoch": 0.8533215917221642, + "grad_norm": 0.700334370136261, + "learning_rate": 6.166025537449467e-06, + "loss": 0.7731, + "step": 15504 + }, + { + "epoch": 0.8533766305245198, + "grad_norm": 0.7227431535720825, + "learning_rate": 6.165604018179388e-06, + "loss": 0.7741, + "step": 15505 + }, + { + "epoch": 0.8534316693268754, + "grad_norm": 0.6752750277519226, + "learning_rate": 6.16518249014894e-06, + "loss": 0.7176, + "step": 15506 + }, + { + "epoch": 0.8534867081292311, + "grad_norm": 0.6595750451087952, + "learning_rate": 6.1647609533612925e-06, + "loss": 0.7758, + "step": 15507 + }, + { + "epoch": 0.8535417469315868, + "grad_norm": 0.7232886552810669, + "learning_rate": 6.1643394078196136e-06, + "loss": 0.667, + "step": 15508 + }, + { + "epoch": 0.8535967857339424, + "grad_norm": 0.7297642827033997, + "learning_rate": 6.163917853527072e-06, + "loss": 0.6951, + "step": 15509 + }, + { + "epoch": 0.8536518245362981, + "grad_norm": 0.6766324043273926, + "learning_rate": 6.163496290486834e-06, + "loss": 0.7429, + "step": 15510 + }, + { + "epoch": 0.8537068633386538, + "grad_norm": 0.6861003041267395, + "learning_rate": 6.16307471870207e-06, + "loss": 0.8159, + "step": 15511 + }, + { + "epoch": 0.8537619021410094, + "grad_norm": 0.687471330165863, + "learning_rate": 6.1626531381759494e-06, + "loss": 0.7001, + "step": 15512 + }, + { + "epoch": 0.853816940943365, + "grad_norm": 0.8295855522155762, + "learning_rate": 6.162231548911637e-06, + "loss": 0.824, + "step": 15513 + }, + { + "epoch": 0.8538719797457207, + "grad_norm": 0.7513163685798645, + "learning_rate": 6.161809950912304e-06, + "loss": 0.7563, + "step": 15514 + }, + { + "epoch": 0.8539270185480764, + "grad_norm": 0.824586033821106, + "learning_rate": 6.161388344181119e-06, + "loss": 0.9607, + "step": 15515 + }, + { + "epoch": 0.8539820573504321, + "grad_norm": 0.9423270225524902, + "learning_rate": 6.160966728721249e-06, + "loss": 0.7527, + "step": 15516 + }, + { + "epoch": 0.8540370961527877, + "grad_norm": 0.6734938621520996, + "learning_rate": 6.160545104535866e-06, + "loss": 0.7741, + "step": 15517 + }, + { + "epoch": 0.8540921349551434, + "grad_norm": 0.6953195929527283, + "learning_rate": 6.160123471628133e-06, + "loss": 0.7844, + "step": 15518 + }, + { + "epoch": 0.854147173757499, + "grad_norm": 0.8023058772087097, + "learning_rate": 6.1597018300012245e-06, + "loss": 0.815, + "step": 15519 + }, + { + "epoch": 0.8542022125598547, + "grad_norm": 1.1232868432998657, + "learning_rate": 6.159280179658308e-06, + "loss": 0.7702, + "step": 15520 + }, + { + "epoch": 0.8542572513622103, + "grad_norm": 0.6074864268302917, + "learning_rate": 6.158858520602552e-06, + "loss": 0.6025, + "step": 15521 + }, + { + "epoch": 0.854312290164566, + "grad_norm": 0.6659427285194397, + "learning_rate": 6.158436852837124e-06, + "loss": 0.7019, + "step": 15522 + }, + { + "epoch": 0.8543673289669217, + "grad_norm": 0.6226561665534973, + "learning_rate": 6.158015176365197e-06, + "loss": 0.6748, + "step": 15523 + }, + { + "epoch": 0.8544223677692774, + "grad_norm": 0.6821898818016052, + "learning_rate": 6.157593491189936e-06, + "loss": 0.7617, + "step": 15524 + }, + { + "epoch": 0.854477406571633, + "grad_norm": 0.6755489110946655, + "learning_rate": 6.157171797314513e-06, + "loss": 0.6664, + "step": 15525 + }, + { + "epoch": 0.8545324453739886, + "grad_norm": 0.5896545052528381, + "learning_rate": 6.156750094742096e-06, + "loss": 0.7147, + "step": 15526 + }, + { + "epoch": 0.8545874841763443, + "grad_norm": 0.7024844288825989, + "learning_rate": 6.1563283834758555e-06, + "loss": 0.7884, + "step": 15527 + }, + { + "epoch": 0.8546425229787, + "grad_norm": 0.8020251393318176, + "learning_rate": 6.15590666351896e-06, + "loss": 0.8014, + "step": 15528 + }, + { + "epoch": 0.8546975617810556, + "grad_norm": 0.7213570475578308, + "learning_rate": 6.1554849348745805e-06, + "loss": 0.7289, + "step": 15529 + }, + { + "epoch": 0.8547526005834113, + "grad_norm": 0.618235170841217, + "learning_rate": 6.155063197545884e-06, + "loss": 0.7241, + "step": 15530 + }, + { + "epoch": 0.854807639385767, + "grad_norm": 0.7157233953475952, + "learning_rate": 6.154641451536042e-06, + "loss": 0.7591, + "step": 15531 + }, + { + "epoch": 0.8548626781881227, + "grad_norm": 0.7156147360801697, + "learning_rate": 6.1542196968482245e-06, + "loss": 0.7337, + "step": 15532 + }, + { + "epoch": 0.8549177169904782, + "grad_norm": 0.7722104787826538, + "learning_rate": 6.153797933485601e-06, + "loss": 0.7287, + "step": 15533 + }, + { + "epoch": 0.8549727557928339, + "grad_norm": 0.8379881381988525, + "learning_rate": 6.1533761614513394e-06, + "loss": 0.7898, + "step": 15534 + }, + { + "epoch": 0.8550277945951896, + "grad_norm": 0.6861830949783325, + "learning_rate": 6.152954380748614e-06, + "loss": 0.7838, + "step": 15535 + }, + { + "epoch": 0.8550828333975453, + "grad_norm": 0.6730731725692749, + "learning_rate": 6.15253259138059e-06, + "loss": 0.7606, + "step": 15536 + }, + { + "epoch": 0.8551378721999009, + "grad_norm": 0.6933832168579102, + "learning_rate": 6.152110793350441e-06, + "loss": 0.7637, + "step": 15537 + }, + { + "epoch": 0.8551929110022566, + "grad_norm": 0.9518343806266785, + "learning_rate": 6.151688986661335e-06, + "loss": 0.7715, + "step": 15538 + }, + { + "epoch": 0.8552479498046123, + "grad_norm": 0.7800498008728027, + "learning_rate": 6.151267171316442e-06, + "loss": 0.7933, + "step": 15539 + }, + { + "epoch": 0.855302988606968, + "grad_norm": 0.8873908519744873, + "learning_rate": 6.150845347318934e-06, + "loss": 0.8349, + "step": 15540 + }, + { + "epoch": 0.8553580274093235, + "grad_norm": 0.6778621673583984, + "learning_rate": 6.15042351467198e-06, + "loss": 0.7855, + "step": 15541 + }, + { + "epoch": 0.8554130662116792, + "grad_norm": 0.6535203456878662, + "learning_rate": 6.150001673378751e-06, + "loss": 0.7288, + "step": 15542 + }, + { + "epoch": 0.8554681050140349, + "grad_norm": 0.7087036967277527, + "learning_rate": 6.149579823442418e-06, + "loss": 0.8542, + "step": 15543 + }, + { + "epoch": 0.8555231438163906, + "grad_norm": 0.8136983513832092, + "learning_rate": 6.1491579648661495e-06, + "loss": 0.8618, + "step": 15544 + }, + { + "epoch": 0.8555781826187462, + "grad_norm": 0.7439128756523132, + "learning_rate": 6.148736097653118e-06, + "loss": 0.7257, + "step": 15545 + }, + { + "epoch": 0.8556332214211019, + "grad_norm": 0.853769838809967, + "learning_rate": 6.148314221806493e-06, + "loss": 0.8759, + "step": 15546 + }, + { + "epoch": 0.8556882602234576, + "grad_norm": 0.6681458950042725, + "learning_rate": 6.147892337329446e-06, + "loss": 0.6993, + "step": 15547 + }, + { + "epoch": 0.8557432990258133, + "grad_norm": 0.6452274918556213, + "learning_rate": 6.147470444225147e-06, + "loss": 0.6838, + "step": 15548 + }, + { + "epoch": 0.8557983378281688, + "grad_norm": 0.7074391841888428, + "learning_rate": 6.147048542496769e-06, + "loss": 0.7978, + "step": 15549 + }, + { + "epoch": 0.8558533766305245, + "grad_norm": 0.634824275970459, + "learning_rate": 6.14662663214748e-06, + "loss": 0.6602, + "step": 15550 + }, + { + "epoch": 0.8559084154328802, + "grad_norm": 0.7253528237342834, + "learning_rate": 6.146204713180453e-06, + "loss": 0.8662, + "step": 15551 + }, + { + "epoch": 0.8559634542352358, + "grad_norm": 0.6583418846130371, + "learning_rate": 6.145782785598858e-06, + "loss": 0.7502, + "step": 15552 + }, + { + "epoch": 0.8560184930375915, + "grad_norm": 0.7276360392570496, + "learning_rate": 6.1453608494058645e-06, + "loss": 0.7066, + "step": 15553 + }, + { + "epoch": 0.8560735318399472, + "grad_norm": 0.6554223299026489, + "learning_rate": 6.144938904604646e-06, + "loss": 0.7468, + "step": 15554 + }, + { + "epoch": 0.8561285706423029, + "grad_norm": 0.6767130494117737, + "learning_rate": 6.144516951198374e-06, + "loss": 0.724, + "step": 15555 + }, + { + "epoch": 0.8561836094446584, + "grad_norm": 0.7025824785232544, + "learning_rate": 6.144094989190219e-06, + "loss": 0.7658, + "step": 15556 + }, + { + "epoch": 0.8562386482470141, + "grad_norm": 0.6780791282653809, + "learning_rate": 6.143673018583353e-06, + "loss": 0.7273, + "step": 15557 + }, + { + "epoch": 0.8562936870493698, + "grad_norm": 0.6552621722221375, + "learning_rate": 6.143251039380944e-06, + "loss": 0.7374, + "step": 15558 + }, + { + "epoch": 0.8563487258517255, + "grad_norm": 0.7451765537261963, + "learning_rate": 6.142829051586169e-06, + "loss": 0.7918, + "step": 15559 + }, + { + "epoch": 0.8564037646540811, + "grad_norm": 0.7365521788597107, + "learning_rate": 6.142407055202195e-06, + "loss": 0.7142, + "step": 15560 + }, + { + "epoch": 0.8564588034564368, + "grad_norm": 0.6708245277404785, + "learning_rate": 6.1419850502321976e-06, + "loss": 0.773, + "step": 15561 + }, + { + "epoch": 0.8565138422587925, + "grad_norm": 0.8878017067909241, + "learning_rate": 6.141563036679344e-06, + "loss": 0.7704, + "step": 15562 + }, + { + "epoch": 0.8565688810611481, + "grad_norm": 0.8903444409370422, + "learning_rate": 6.14114101454681e-06, + "loss": 0.8422, + "step": 15563 + }, + { + "epoch": 0.8566239198635037, + "grad_norm": 0.7124255895614624, + "learning_rate": 6.140718983837764e-06, + "loss": 0.74, + "step": 15564 + }, + { + "epoch": 0.8566789586658594, + "grad_norm": 0.7107509970664978, + "learning_rate": 6.14029694455538e-06, + "loss": 0.7052, + "step": 15565 + }, + { + "epoch": 0.8567339974682151, + "grad_norm": 0.6864815950393677, + "learning_rate": 6.13987489670283e-06, + "loss": 0.7541, + "step": 15566 + }, + { + "epoch": 0.8567890362705708, + "grad_norm": 0.696169912815094, + "learning_rate": 6.1394528402832845e-06, + "loss": 0.7829, + "step": 15567 + }, + { + "epoch": 0.8568440750729264, + "grad_norm": 0.5650855302810669, + "learning_rate": 6.139030775299917e-06, + "loss": 0.6268, + "step": 15568 + }, + { + "epoch": 0.8568991138752821, + "grad_norm": 0.8753485083580017, + "learning_rate": 6.138608701755899e-06, + "loss": 0.7658, + "step": 15569 + }, + { + "epoch": 0.8569541526776377, + "grad_norm": 0.6950936317443848, + "learning_rate": 6.138186619654401e-06, + "loss": 0.8013, + "step": 15570 + }, + { + "epoch": 0.8570091914799934, + "grad_norm": 0.6608526110649109, + "learning_rate": 6.1377645289986e-06, + "loss": 0.8095, + "step": 15571 + }, + { + "epoch": 0.857064230282349, + "grad_norm": 0.6566348075866699, + "learning_rate": 6.137342429791664e-06, + "loss": 0.787, + "step": 15572 + }, + { + "epoch": 0.8571192690847047, + "grad_norm": 0.6536870002746582, + "learning_rate": 6.136920322036768e-06, + "loss": 0.8754, + "step": 15573 + }, + { + "epoch": 0.8571743078870604, + "grad_norm": 0.6461239457130432, + "learning_rate": 6.136498205737081e-06, + "loss": 0.6948, + "step": 15574 + }, + { + "epoch": 0.8572293466894161, + "grad_norm": 0.6441778540611267, + "learning_rate": 6.13607608089578e-06, + "loss": 0.7097, + "step": 15575 + }, + { + "epoch": 0.8572843854917717, + "grad_norm": 0.6770008206367493, + "learning_rate": 6.135653947516034e-06, + "loss": 0.7497, + "step": 15576 + }, + { + "epoch": 0.8573394242941274, + "grad_norm": 0.6479504704475403, + "learning_rate": 6.1352318056010175e-06, + "loss": 0.6975, + "step": 15577 + }, + { + "epoch": 0.857394463096483, + "grad_norm": 0.6586747765541077, + "learning_rate": 6.134809655153901e-06, + "loss": 0.7977, + "step": 15578 + }, + { + "epoch": 0.8574495018988387, + "grad_norm": 0.6888973116874695, + "learning_rate": 6.1343874961778604e-06, + "loss": 0.7399, + "step": 15579 + }, + { + "epoch": 0.8575045407011943, + "grad_norm": 0.6897402405738831, + "learning_rate": 6.133965328676066e-06, + "loss": 0.7507, + "step": 15580 + }, + { + "epoch": 0.85755957950355, + "grad_norm": 0.6857936382293701, + "learning_rate": 6.133543152651693e-06, + "loss": 0.76, + "step": 15581 + }, + { + "epoch": 0.8576146183059057, + "grad_norm": 0.8104296922683716, + "learning_rate": 6.133120968107912e-06, + "loss": 0.711, + "step": 15582 + }, + { + "epoch": 0.8576696571082614, + "grad_norm": 0.786551296710968, + "learning_rate": 6.132698775047897e-06, + "loss": 0.7603, + "step": 15583 + }, + { + "epoch": 0.857724695910617, + "grad_norm": 0.6685918569564819, + "learning_rate": 6.132276573474822e-06, + "loss": 0.6986, + "step": 15584 + }, + { + "epoch": 0.8577797347129726, + "grad_norm": 0.8557218909263611, + "learning_rate": 6.131854363391859e-06, + "loss": 0.795, + "step": 15585 + }, + { + "epoch": 0.8578347735153283, + "grad_norm": 0.6823254823684692, + "learning_rate": 6.1314321448021825e-06, + "loss": 0.7349, + "step": 15586 + }, + { + "epoch": 0.857889812317684, + "grad_norm": 0.772792637348175, + "learning_rate": 6.131009917708965e-06, + "loss": 0.7547, + "step": 15587 + }, + { + "epoch": 0.8579448511200396, + "grad_norm": 0.7231488227844238, + "learning_rate": 6.130587682115379e-06, + "loss": 0.7997, + "step": 15588 + }, + { + "epoch": 0.8579998899223953, + "grad_norm": 0.6683667898178101, + "learning_rate": 6.130165438024598e-06, + "loss": 0.7863, + "step": 15589 + }, + { + "epoch": 0.858054928724751, + "grad_norm": 0.6588496565818787, + "learning_rate": 6.129743185439796e-06, + "loss": 0.7859, + "step": 15590 + }, + { + "epoch": 0.8581099675271067, + "grad_norm": 0.6130164861679077, + "learning_rate": 6.129320924364147e-06, + "loss": 0.7042, + "step": 15591 + }, + { + "epoch": 0.8581650063294622, + "grad_norm": 0.610054612159729, + "learning_rate": 6.128898654800824e-06, + "loss": 0.6645, + "step": 15592 + }, + { + "epoch": 0.8582200451318179, + "grad_norm": 0.6974982023239136, + "learning_rate": 6.128476376753002e-06, + "loss": 0.8146, + "step": 15593 + }, + { + "epoch": 0.8582750839341736, + "grad_norm": 0.7313922047615051, + "learning_rate": 6.128054090223853e-06, + "loss": 0.7055, + "step": 15594 + }, + { + "epoch": 0.8583301227365292, + "grad_norm": 0.7004476189613342, + "learning_rate": 6.12763179521655e-06, + "loss": 0.7848, + "step": 15595 + }, + { + "epoch": 0.8583851615388849, + "grad_norm": 0.6916295289993286, + "learning_rate": 6.127209491734269e-06, + "loss": 0.7711, + "step": 15596 + }, + { + "epoch": 0.8584402003412406, + "grad_norm": 0.648551881313324, + "learning_rate": 6.126787179780185e-06, + "loss": 0.7098, + "step": 15597 + }, + { + "epoch": 0.8584952391435963, + "grad_norm": 0.6482384204864502, + "learning_rate": 6.126364859357469e-06, + "loss": 0.7596, + "step": 15598 + }, + { + "epoch": 0.8585502779459518, + "grad_norm": 0.7109531164169312, + "learning_rate": 6.125942530469297e-06, + "loss": 0.7539, + "step": 15599 + }, + { + "epoch": 0.8586053167483075, + "grad_norm": 0.6109207272529602, + "learning_rate": 6.125520193118841e-06, + "loss": 0.6764, + "step": 15600 + }, + { + "epoch": 0.8586603555506632, + "grad_norm": 0.7050053477287292, + "learning_rate": 6.125097847309277e-06, + "loss": 0.7304, + "step": 15601 + }, + { + "epoch": 0.8587153943530189, + "grad_norm": 0.653078019618988, + "learning_rate": 6.124675493043779e-06, + "loss": 0.6985, + "step": 15602 + }, + { + "epoch": 0.8587704331553745, + "grad_norm": 0.8391665816307068, + "learning_rate": 6.124253130325521e-06, + "loss": 0.8011, + "step": 15603 + }, + { + "epoch": 0.8588254719577302, + "grad_norm": 0.6978835463523865, + "learning_rate": 6.123830759157676e-06, + "loss": 0.6783, + "step": 15604 + }, + { + "epoch": 0.8588805107600859, + "grad_norm": 0.7796862125396729, + "learning_rate": 6.123408379543422e-06, + "loss": 0.7237, + "step": 15605 + }, + { + "epoch": 0.8589355495624416, + "grad_norm": 0.7162224054336548, + "learning_rate": 6.12298599148593e-06, + "loss": 0.8095, + "step": 15606 + }, + { + "epoch": 0.8589905883647971, + "grad_norm": 0.7654495239257812, + "learning_rate": 6.122563594988375e-06, + "loss": 0.7149, + "step": 15607 + }, + { + "epoch": 0.8590456271671528, + "grad_norm": 0.6186618804931641, + "learning_rate": 6.122141190053935e-06, + "loss": 0.6687, + "step": 15608 + }, + { + "epoch": 0.8591006659695085, + "grad_norm": 0.6669701337814331, + "learning_rate": 6.121718776685781e-06, + "loss": 0.7281, + "step": 15609 + }, + { + "epoch": 0.8591557047718642, + "grad_norm": 0.6581971645355225, + "learning_rate": 6.121296354887089e-06, + "loss": 0.7158, + "step": 15610 + }, + { + "epoch": 0.8592107435742198, + "grad_norm": 0.698243260383606, + "learning_rate": 6.120873924661034e-06, + "loss": 0.7894, + "step": 15611 + }, + { + "epoch": 0.8592657823765755, + "grad_norm": 0.6746723651885986, + "learning_rate": 6.120451486010791e-06, + "loss": 0.7993, + "step": 15612 + }, + { + "epoch": 0.8593208211789312, + "grad_norm": 0.727219820022583, + "learning_rate": 6.1200290389395335e-06, + "loss": 0.8446, + "step": 15613 + }, + { + "epoch": 0.8593758599812868, + "grad_norm": 0.7818809151649475, + "learning_rate": 6.119606583450438e-06, + "loss": 0.7167, + "step": 15614 + }, + { + "epoch": 0.8594308987836424, + "grad_norm": 0.692720890045166, + "learning_rate": 6.119184119546679e-06, + "loss": 0.705, + "step": 15615 + }, + { + "epoch": 0.8594859375859981, + "grad_norm": 0.6671997308731079, + "learning_rate": 6.1187616472314315e-06, + "loss": 0.8383, + "step": 15616 + }, + { + "epoch": 0.8595409763883538, + "grad_norm": 0.8043667674064636, + "learning_rate": 6.118339166507872e-06, + "loss": 0.6775, + "step": 15617 + }, + { + "epoch": 0.8595960151907095, + "grad_norm": 0.6313692927360535, + "learning_rate": 6.117916677379173e-06, + "loss": 0.6327, + "step": 15618 + }, + { + "epoch": 0.8596510539930651, + "grad_norm": 0.6770568490028381, + "learning_rate": 6.117494179848512e-06, + "loss": 0.741, + "step": 15619 + }, + { + "epoch": 0.8597060927954208, + "grad_norm": 0.6715630292892456, + "learning_rate": 6.117071673919064e-06, + "loss": 0.7105, + "step": 15620 + }, + { + "epoch": 0.8597611315977765, + "grad_norm": 0.618145763874054, + "learning_rate": 6.116649159594006e-06, + "loss": 0.7316, + "step": 15621 + }, + { + "epoch": 0.8598161704001321, + "grad_norm": 0.7127259969711304, + "learning_rate": 6.11622663687651e-06, + "loss": 0.7736, + "step": 15622 + }, + { + "epoch": 0.8598712092024877, + "grad_norm": 0.6675243377685547, + "learning_rate": 6.115804105769754e-06, + "loss": 0.6747, + "step": 15623 + }, + { + "epoch": 0.8599262480048434, + "grad_norm": 0.7965354323387146, + "learning_rate": 6.115381566276912e-06, + "loss": 0.7524, + "step": 15624 + }, + { + "epoch": 0.8599812868071991, + "grad_norm": 0.5921181440353394, + "learning_rate": 6.114959018401163e-06, + "loss": 0.679, + "step": 15625 + }, + { + "epoch": 0.8600363256095548, + "grad_norm": 0.635802149772644, + "learning_rate": 6.1145364621456795e-06, + "loss": 0.699, + "step": 15626 + }, + { + "epoch": 0.8600913644119104, + "grad_norm": 0.7159842252731323, + "learning_rate": 6.114113897513636e-06, + "loss": 0.7112, + "step": 15627 + }, + { + "epoch": 0.860146403214266, + "grad_norm": 0.7100176215171814, + "learning_rate": 6.113691324508213e-06, + "loss": 0.7459, + "step": 15628 + }, + { + "epoch": 0.8602014420166217, + "grad_norm": 0.6484093070030212, + "learning_rate": 6.113268743132583e-06, + "loss": 0.6779, + "step": 15629 + }, + { + "epoch": 0.8602564808189774, + "grad_norm": 0.6825945377349854, + "learning_rate": 6.112846153389924e-06, + "loss": 0.7607, + "step": 15630 + }, + { + "epoch": 0.860311519621333, + "grad_norm": 0.7553657293319702, + "learning_rate": 6.112423555283411e-06, + "loss": 0.6945, + "step": 15631 + }, + { + "epoch": 0.8603665584236887, + "grad_norm": 0.7892605662345886, + "learning_rate": 6.11200094881622e-06, + "loss": 0.767, + "step": 15632 + }, + { + "epoch": 0.8604215972260444, + "grad_norm": 0.6485433578491211, + "learning_rate": 6.111578333991528e-06, + "loss": 0.7302, + "step": 15633 + }, + { + "epoch": 0.8604766360284001, + "grad_norm": 0.6713895201683044, + "learning_rate": 6.111155710812511e-06, + "loss": 0.774, + "step": 15634 + }, + { + "epoch": 0.8605316748307557, + "grad_norm": 0.9890132546424866, + "learning_rate": 6.110733079282345e-06, + "loss": 0.7549, + "step": 15635 + }, + { + "epoch": 0.8605867136331113, + "grad_norm": 0.6421818137168884, + "learning_rate": 6.110310439404206e-06, + "loss": 0.7004, + "step": 15636 + }, + { + "epoch": 0.860641752435467, + "grad_norm": 0.6384093165397644, + "learning_rate": 6.109887791181272e-06, + "loss": 0.7465, + "step": 15637 + }, + { + "epoch": 0.8606967912378226, + "grad_norm": 0.7991462349891663, + "learning_rate": 6.109465134616717e-06, + "loss": 0.8041, + "step": 15638 + }, + { + "epoch": 0.8607518300401783, + "grad_norm": 0.661189615726471, + "learning_rate": 6.1090424697137185e-06, + "loss": 0.7717, + "step": 15639 + }, + { + "epoch": 0.860806868842534, + "grad_norm": 0.6952805519104004, + "learning_rate": 6.108619796475455e-06, + "loss": 0.7149, + "step": 15640 + }, + { + "epoch": 0.8608619076448897, + "grad_norm": 0.7330671548843384, + "learning_rate": 6.108197114905102e-06, + "loss": 0.7229, + "step": 15641 + }, + { + "epoch": 0.8609169464472453, + "grad_norm": 0.6831181049346924, + "learning_rate": 6.107774425005836e-06, + "loss": 0.6937, + "step": 15642 + }, + { + "epoch": 0.860971985249601, + "grad_norm": 0.7261425852775574, + "learning_rate": 6.107351726780833e-06, + "loss": 0.7963, + "step": 15643 + }, + { + "epoch": 0.8610270240519566, + "grad_norm": 0.6796271800994873, + "learning_rate": 6.106929020233272e-06, + "loss": 0.7785, + "step": 15644 + }, + { + "epoch": 0.8610820628543123, + "grad_norm": 0.6772015690803528, + "learning_rate": 6.106506305366328e-06, + "loss": 0.7732, + "step": 15645 + }, + { + "epoch": 0.8611371016566679, + "grad_norm": 0.6153992414474487, + "learning_rate": 6.10608358218318e-06, + "loss": 0.6361, + "step": 15646 + }, + { + "epoch": 0.8611921404590236, + "grad_norm": 0.9580141305923462, + "learning_rate": 6.105660850687003e-06, + "loss": 0.7178, + "step": 15647 + }, + { + "epoch": 0.8612471792613793, + "grad_norm": 0.8536281585693359, + "learning_rate": 6.105238110880975e-06, + "loss": 0.6689, + "step": 15648 + }, + { + "epoch": 0.861302218063735, + "grad_norm": 0.6578275561332703, + "learning_rate": 6.104815362768274e-06, + "loss": 0.7474, + "step": 15649 + }, + { + "epoch": 0.8613572568660905, + "grad_norm": 0.7298864126205444, + "learning_rate": 6.104392606352075e-06, + "loss": 0.7134, + "step": 15650 + }, + { + "epoch": 0.8614122956684462, + "grad_norm": 1.0637938976287842, + "learning_rate": 6.103969841635557e-06, + "loss": 0.8614, + "step": 15651 + }, + { + "epoch": 0.8614673344708019, + "grad_norm": 0.6678902506828308, + "learning_rate": 6.103547068621898e-06, + "loss": 0.6736, + "step": 15652 + }, + { + "epoch": 0.8615223732731576, + "grad_norm": 0.9442873001098633, + "learning_rate": 6.103124287314275e-06, + "loss": 0.8002, + "step": 15653 + }, + { + "epoch": 0.8615774120755132, + "grad_norm": 0.7156786322593689, + "learning_rate": 6.102701497715864e-06, + "loss": 0.8764, + "step": 15654 + }, + { + "epoch": 0.8616324508778689, + "grad_norm": 0.7954290509223938, + "learning_rate": 6.102278699829843e-06, + "loss": 0.7308, + "step": 15655 + }, + { + "epoch": 0.8616874896802246, + "grad_norm": 0.7544524073600769, + "learning_rate": 6.101855893659392e-06, + "loss": 0.7064, + "step": 15656 + }, + { + "epoch": 0.8617425284825803, + "grad_norm": 0.652656078338623, + "learning_rate": 6.101433079207687e-06, + "loss": 0.7264, + "step": 15657 + }, + { + "epoch": 0.8617975672849358, + "grad_norm": 0.6478716135025024, + "learning_rate": 6.101010256477906e-06, + "loss": 0.6265, + "step": 15658 + }, + { + "epoch": 0.8618526060872915, + "grad_norm": 0.5916007161140442, + "learning_rate": 6.1005874254732256e-06, + "loss": 0.6485, + "step": 15659 + }, + { + "epoch": 0.8619076448896472, + "grad_norm": 0.7353591322898865, + "learning_rate": 6.1001645861968264e-06, + "loss": 0.759, + "step": 15660 + }, + { + "epoch": 0.8619626836920029, + "grad_norm": 0.7352280020713806, + "learning_rate": 6.099741738651883e-06, + "loss": 0.7392, + "step": 15661 + }, + { + "epoch": 0.8620177224943585, + "grad_norm": 0.6027048230171204, + "learning_rate": 6.099318882841576e-06, + "loss": 0.7039, + "step": 15662 + }, + { + "epoch": 0.8620727612967142, + "grad_norm": 0.6907329559326172, + "learning_rate": 6.09889601876908e-06, + "loss": 0.8164, + "step": 15663 + }, + { + "epoch": 0.8621278000990699, + "grad_norm": 0.7133687138557434, + "learning_rate": 6.098473146437579e-06, + "loss": 0.791, + "step": 15664 + }, + { + "epoch": 0.8621828389014256, + "grad_norm": 0.7229570746421814, + "learning_rate": 6.098050265850246e-06, + "loss": 0.7839, + "step": 15665 + }, + { + "epoch": 0.8622378777037811, + "grad_norm": 0.7066009640693665, + "learning_rate": 6.097627377010262e-06, + "loss": 0.6816, + "step": 15666 + }, + { + "epoch": 0.8622929165061368, + "grad_norm": 0.7801152467727661, + "learning_rate": 6.097204479920804e-06, + "loss": 0.7402, + "step": 15667 + }, + { + "epoch": 0.8623479553084925, + "grad_norm": 0.6149227023124695, + "learning_rate": 6.096781574585051e-06, + "loss": 0.6964, + "step": 15668 + }, + { + "epoch": 0.8624029941108482, + "grad_norm": 0.6978023648262024, + "learning_rate": 6.096358661006181e-06, + "loss": 0.6982, + "step": 15669 + }, + { + "epoch": 0.8624580329132038, + "grad_norm": 0.6561335325241089, + "learning_rate": 6.095935739187373e-06, + "loss": 0.74, + "step": 15670 + }, + { + "epoch": 0.8625130717155595, + "grad_norm": 0.7627743482589722, + "learning_rate": 6.0955128091318065e-06, + "loss": 0.6886, + "step": 15671 + }, + { + "epoch": 0.8625681105179152, + "grad_norm": 0.919551432132721, + "learning_rate": 6.095089870842657e-06, + "loss": 0.7453, + "step": 15672 + }, + { + "epoch": 0.8626231493202708, + "grad_norm": 0.732641875743866, + "learning_rate": 6.094666924323107e-06, + "loss": 0.7502, + "step": 15673 + }, + { + "epoch": 0.8626781881226264, + "grad_norm": 0.7035035490989685, + "learning_rate": 6.094243969576332e-06, + "loss": 0.7825, + "step": 15674 + }, + { + "epoch": 0.8627332269249821, + "grad_norm": 0.648766279220581, + "learning_rate": 6.093821006605513e-06, + "loss": 0.6771, + "step": 15675 + }, + { + "epoch": 0.8627882657273378, + "grad_norm": 0.6031193137168884, + "learning_rate": 6.093398035413828e-06, + "loss": 0.6246, + "step": 15676 + }, + { + "epoch": 0.8628433045296935, + "grad_norm": 0.76170414686203, + "learning_rate": 6.0929750560044555e-06, + "loss": 0.713, + "step": 15677 + }, + { + "epoch": 0.8628983433320491, + "grad_norm": 1.023805022239685, + "learning_rate": 6.092552068380575e-06, + "loss": 0.6824, + "step": 15678 + }, + { + "epoch": 0.8629533821344048, + "grad_norm": 0.7333651185035706, + "learning_rate": 6.092129072545366e-06, + "loss": 0.7213, + "step": 15679 + }, + { + "epoch": 0.8630084209367604, + "grad_norm": 0.6620833873748779, + "learning_rate": 6.091706068502007e-06, + "loss": 0.7436, + "step": 15680 + }, + { + "epoch": 0.863063459739116, + "grad_norm": 0.5971367359161377, + "learning_rate": 6.091283056253679e-06, + "loss": 0.6774, + "step": 15681 + }, + { + "epoch": 0.8631184985414717, + "grad_norm": 0.6435208320617676, + "learning_rate": 6.090860035803558e-06, + "loss": 0.7556, + "step": 15682 + }, + { + "epoch": 0.8631735373438274, + "grad_norm": 0.6666582822799683, + "learning_rate": 6.090437007154824e-06, + "loss": 0.7533, + "step": 15683 + }, + { + "epoch": 0.8632285761461831, + "grad_norm": 0.665928840637207, + "learning_rate": 6.09001397031066e-06, + "loss": 0.7325, + "step": 15684 + }, + { + "epoch": 0.8632836149485387, + "grad_norm": 0.6638591885566711, + "learning_rate": 6.0895909252742414e-06, + "loss": 0.8256, + "step": 15685 + }, + { + "epoch": 0.8633386537508944, + "grad_norm": 0.6556721925735474, + "learning_rate": 6.089167872048749e-06, + "loss": 0.728, + "step": 15686 + }, + { + "epoch": 0.86339369255325, + "grad_norm": 0.6327305436134338, + "learning_rate": 6.088744810637361e-06, + "loss": 0.7584, + "step": 15687 + }, + { + "epoch": 0.8634487313556057, + "grad_norm": 0.676216185092926, + "learning_rate": 6.088321741043262e-06, + "loss": 0.7868, + "step": 15688 + }, + { + "epoch": 0.8635037701579613, + "grad_norm": 0.646700918674469, + "learning_rate": 6.0878986632696255e-06, + "loss": 0.7248, + "step": 15689 + }, + { + "epoch": 0.863558808960317, + "grad_norm": 0.6748735308647156, + "learning_rate": 6.087475577319635e-06, + "loss": 0.7657, + "step": 15690 + }, + { + "epoch": 0.8636138477626727, + "grad_norm": 0.6363335251808167, + "learning_rate": 6.087052483196467e-06, + "loss": 0.7273, + "step": 15691 + }, + { + "epoch": 0.8636688865650284, + "grad_norm": 0.6166467666625977, + "learning_rate": 6.086629380903305e-06, + "loss": 0.6642, + "step": 15692 + }, + { + "epoch": 0.863723925367384, + "grad_norm": 1.3258485794067383, + "learning_rate": 6.086206270443328e-06, + "loss": 0.7227, + "step": 15693 + }, + { + "epoch": 0.8637789641697396, + "grad_norm": 0.8923795223236084, + "learning_rate": 6.085783151819716e-06, + "loss": 0.7513, + "step": 15694 + }, + { + "epoch": 0.8638340029720953, + "grad_norm": 0.7227154970169067, + "learning_rate": 6.085360025035647e-06, + "loss": 0.7078, + "step": 15695 + }, + { + "epoch": 0.863889041774451, + "grad_norm": 0.6465400457382202, + "learning_rate": 6.084936890094303e-06, + "loss": 0.7541, + "step": 15696 + }, + { + "epoch": 0.8639440805768066, + "grad_norm": 0.6628104448318481, + "learning_rate": 6.084513746998865e-06, + "loss": 0.7121, + "step": 15697 + }, + { + "epoch": 0.8639991193791623, + "grad_norm": 0.6723392605781555, + "learning_rate": 6.08409059575251e-06, + "loss": 0.7086, + "step": 15698 + }, + { + "epoch": 0.864054158181518, + "grad_norm": 0.7443264126777649, + "learning_rate": 6.08366743635842e-06, + "loss": 0.7098, + "step": 15699 + }, + { + "epoch": 0.8641091969838737, + "grad_norm": 0.7792028188705444, + "learning_rate": 6.083244268819777e-06, + "loss": 0.8472, + "step": 15700 + }, + { + "epoch": 0.8641642357862293, + "grad_norm": 0.7211549878120422, + "learning_rate": 6.08282109313976e-06, + "loss": 0.7917, + "step": 15701 + }, + { + "epoch": 0.8642192745885849, + "grad_norm": 0.6670592427253723, + "learning_rate": 6.082397909321549e-06, + "loss": 0.7758, + "step": 15702 + }, + { + "epoch": 0.8642743133909406, + "grad_norm": 0.8279144167900085, + "learning_rate": 6.0819747173683255e-06, + "loss": 0.7355, + "step": 15703 + }, + { + "epoch": 0.8643293521932963, + "grad_norm": 0.7409362196922302, + "learning_rate": 6.081551517283269e-06, + "loss": 0.7283, + "step": 15704 + }, + { + "epoch": 0.8643843909956519, + "grad_norm": 0.6700742840766907, + "learning_rate": 6.081128309069562e-06, + "loss": 0.6555, + "step": 15705 + }, + { + "epoch": 0.8644394297980076, + "grad_norm": 0.7364388108253479, + "learning_rate": 6.080705092730383e-06, + "loss": 0.6652, + "step": 15706 + }, + { + "epoch": 0.8644944686003633, + "grad_norm": 0.778404176235199, + "learning_rate": 6.080281868268913e-06, + "loss": 0.7774, + "step": 15707 + }, + { + "epoch": 0.864549507402719, + "grad_norm": 0.6663825511932373, + "learning_rate": 6.079858635688336e-06, + "loss": 0.7665, + "step": 15708 + }, + { + "epoch": 0.8646045462050745, + "grad_norm": 0.7061408758163452, + "learning_rate": 6.079435394991829e-06, + "loss": 0.7824, + "step": 15709 + }, + { + "epoch": 0.8646595850074302, + "grad_norm": 0.6537507176399231, + "learning_rate": 6.079012146182576e-06, + "loss": 0.7313, + "step": 15710 + }, + { + "epoch": 0.8647146238097859, + "grad_norm": 0.6154575943946838, + "learning_rate": 6.078588889263754e-06, + "loss": 0.7066, + "step": 15711 + }, + { + "epoch": 0.8647696626121416, + "grad_norm": 0.659093976020813, + "learning_rate": 6.078165624238548e-06, + "loss": 0.7495, + "step": 15712 + }, + { + "epoch": 0.8648247014144972, + "grad_norm": 0.677669107913971, + "learning_rate": 6.077742351110138e-06, + "loss": 0.7072, + "step": 15713 + }, + { + "epoch": 0.8648797402168529, + "grad_norm": 0.7204097509384155, + "learning_rate": 6.077319069881705e-06, + "loss": 0.7181, + "step": 15714 + }, + { + "epoch": 0.8649347790192086, + "grad_norm": 0.6903330683708191, + "learning_rate": 6.076895780556429e-06, + "loss": 0.8565, + "step": 15715 + }, + { + "epoch": 0.8649898178215643, + "grad_norm": 0.8147342205047607, + "learning_rate": 6.076472483137493e-06, + "loss": 0.6916, + "step": 15716 + }, + { + "epoch": 0.8650448566239198, + "grad_norm": 0.7021569013595581, + "learning_rate": 6.076049177628079e-06, + "loss": 0.6893, + "step": 15717 + }, + { + "epoch": 0.8650998954262755, + "grad_norm": 0.6534682512283325, + "learning_rate": 6.075625864031368e-06, + "loss": 0.6313, + "step": 15718 + }, + { + "epoch": 0.8651549342286312, + "grad_norm": 0.7883698344230652, + "learning_rate": 6.07520254235054e-06, + "loss": 0.7204, + "step": 15719 + }, + { + "epoch": 0.8652099730309869, + "grad_norm": 0.6255857944488525, + "learning_rate": 6.074779212588777e-06, + "loss": 0.7137, + "step": 15720 + }, + { + "epoch": 0.8652650118333425, + "grad_norm": 0.7278919816017151, + "learning_rate": 6.074355874749261e-06, + "loss": 0.8003, + "step": 15721 + }, + { + "epoch": 0.8653200506356982, + "grad_norm": 0.7809221744537354, + "learning_rate": 6.073932528835176e-06, + "loss": 0.8652, + "step": 15722 + }, + { + "epoch": 0.8653750894380539, + "grad_norm": 0.6781452894210815, + "learning_rate": 6.0735091748496985e-06, + "loss": 0.6111, + "step": 15723 + }, + { + "epoch": 0.8654301282404094, + "grad_norm": 0.6400741934776306, + "learning_rate": 6.073085812796015e-06, + "loss": 0.7126, + "step": 15724 + }, + { + "epoch": 0.8654851670427651, + "grad_norm": 0.6753132343292236, + "learning_rate": 6.072662442677305e-06, + "loss": 0.7389, + "step": 15725 + }, + { + "epoch": 0.8655402058451208, + "grad_norm": 0.6688135266304016, + "learning_rate": 6.072239064496752e-06, + "loss": 0.6356, + "step": 15726 + }, + { + "epoch": 0.8655952446474765, + "grad_norm": 0.664271354675293, + "learning_rate": 6.0718156782575365e-06, + "loss": 0.6949, + "step": 15727 + }, + { + "epoch": 0.8656502834498321, + "grad_norm": 0.6760862469673157, + "learning_rate": 6.071392283962843e-06, + "loss": 0.7279, + "step": 15728 + }, + { + "epoch": 0.8657053222521878, + "grad_norm": 0.6911706924438477, + "learning_rate": 6.07096888161585e-06, + "loss": 0.8132, + "step": 15729 + }, + { + "epoch": 0.8657603610545435, + "grad_norm": 0.7274359464645386, + "learning_rate": 6.070545471219743e-06, + "loss": 0.7894, + "step": 15730 + }, + { + "epoch": 0.8658153998568991, + "grad_norm": 0.7742472290992737, + "learning_rate": 6.070122052777703e-06, + "loss": 0.8057, + "step": 15731 + }, + { + "epoch": 0.8658704386592547, + "grad_norm": 0.8446773290634155, + "learning_rate": 6.06969862629291e-06, + "loss": 0.7816, + "step": 15732 + }, + { + "epoch": 0.8659254774616104, + "grad_norm": 0.669518232345581, + "learning_rate": 6.06927519176855e-06, + "loss": 0.696, + "step": 15733 + }, + { + "epoch": 0.8659805162639661, + "grad_norm": 0.6845564842224121, + "learning_rate": 6.068851749207803e-06, + "loss": 0.7486, + "step": 15734 + }, + { + "epoch": 0.8660355550663218, + "grad_norm": 0.6650436520576477, + "learning_rate": 6.068428298613853e-06, + "loss": 0.7215, + "step": 15735 + }, + { + "epoch": 0.8660905938686774, + "grad_norm": 0.67397540807724, + "learning_rate": 6.068004839989881e-06, + "loss": 0.7458, + "step": 15736 + }, + { + "epoch": 0.8661456326710331, + "grad_norm": 0.7140672206878662, + "learning_rate": 6.067581373339072e-06, + "loss": 0.8213, + "step": 15737 + }, + { + "epoch": 0.8662006714733888, + "grad_norm": 0.8632931113243103, + "learning_rate": 6.067157898664606e-06, + "loss": 0.8109, + "step": 15738 + }, + { + "epoch": 0.8662557102757444, + "grad_norm": 0.6106804013252258, + "learning_rate": 6.066734415969669e-06, + "loss": 0.7183, + "step": 15739 + }, + { + "epoch": 0.8663107490781, + "grad_norm": 0.8055095672607422, + "learning_rate": 6.066310925257438e-06, + "loss": 0.7871, + "step": 15740 + }, + { + "epoch": 0.8663657878804557, + "grad_norm": 0.6310189366340637, + "learning_rate": 6.065887426531102e-06, + "loss": 0.5873, + "step": 15741 + }, + { + "epoch": 0.8664208266828114, + "grad_norm": 0.6704412698745728, + "learning_rate": 6.065463919793842e-06, + "loss": 0.6838, + "step": 15742 + }, + { + "epoch": 0.8664758654851671, + "grad_norm": 0.6292148232460022, + "learning_rate": 6.06504040504884e-06, + "loss": 0.6886, + "step": 15743 + }, + { + "epoch": 0.8665309042875227, + "grad_norm": 0.8556584715843201, + "learning_rate": 6.064616882299277e-06, + "loss": 0.8967, + "step": 15744 + }, + { + "epoch": 0.8665859430898784, + "grad_norm": 0.6956119537353516, + "learning_rate": 6.064193351548341e-06, + "loss": 0.7444, + "step": 15745 + }, + { + "epoch": 0.866640981892234, + "grad_norm": 1.01414954662323, + "learning_rate": 6.063769812799212e-06, + "loss": 0.9216, + "step": 15746 + }, + { + "epoch": 0.8666960206945897, + "grad_norm": 0.6685424447059631, + "learning_rate": 6.063346266055073e-06, + "loss": 0.6795, + "step": 15747 + }, + { + "epoch": 0.8667510594969453, + "grad_norm": 0.6735886335372925, + "learning_rate": 6.062922711319108e-06, + "loss": 0.6805, + "step": 15748 + }, + { + "epoch": 0.866806098299301, + "grad_norm": 0.6536576747894287, + "learning_rate": 6.062499148594502e-06, + "loss": 0.6575, + "step": 15749 + }, + { + "epoch": 0.8668611371016567, + "grad_norm": 0.6739212870597839, + "learning_rate": 6.062075577884437e-06, + "loss": 0.6704, + "step": 15750 + }, + { + "epoch": 0.8669161759040124, + "grad_norm": 0.73397296667099, + "learning_rate": 6.061651999192094e-06, + "loss": 0.7892, + "step": 15751 + }, + { + "epoch": 0.866971214706368, + "grad_norm": 0.7974724769592285, + "learning_rate": 6.06122841252066e-06, + "loss": 0.7133, + "step": 15752 + }, + { + "epoch": 0.8670262535087236, + "grad_norm": 0.6199150681495667, + "learning_rate": 6.060804817873317e-06, + "loss": 0.765, + "step": 15753 + }, + { + "epoch": 0.8670812923110793, + "grad_norm": 0.709783673286438, + "learning_rate": 6.060381215253251e-06, + "loss": 0.7332, + "step": 15754 + }, + { + "epoch": 0.867136331113435, + "grad_norm": 0.6947084069252014, + "learning_rate": 6.059957604663642e-06, + "loss": 0.8224, + "step": 15755 + }, + { + "epoch": 0.8671913699157906, + "grad_norm": 0.9439684152603149, + "learning_rate": 6.059533986107674e-06, + "loss": 0.8347, + "step": 15756 + }, + { + "epoch": 0.8672464087181463, + "grad_norm": 0.806992769241333, + "learning_rate": 6.059110359588534e-06, + "loss": 0.8055, + "step": 15757 + }, + { + "epoch": 0.867301447520502, + "grad_norm": 0.659092128276825, + "learning_rate": 6.058686725109404e-06, + "loss": 0.6972, + "step": 15758 + }, + { + "epoch": 0.8673564863228577, + "grad_norm": 0.7345813512802124, + "learning_rate": 6.058263082673468e-06, + "loss": 0.8044, + "step": 15759 + }, + { + "epoch": 0.8674115251252132, + "grad_norm": 0.7216777205467224, + "learning_rate": 6.057839432283908e-06, + "loss": 0.7816, + "step": 15760 + }, + { + "epoch": 0.8674665639275689, + "grad_norm": 0.6828186511993408, + "learning_rate": 6.0574157739439125e-06, + "loss": 0.7534, + "step": 15761 + }, + { + "epoch": 0.8675216027299246, + "grad_norm": 0.7324418425559998, + "learning_rate": 6.0569921076566615e-06, + "loss": 0.7476, + "step": 15762 + }, + { + "epoch": 0.8675766415322803, + "grad_norm": 0.5894229412078857, + "learning_rate": 6.056568433425342e-06, + "loss": 0.6667, + "step": 15763 + }, + { + "epoch": 0.8676316803346359, + "grad_norm": 0.6743035912513733, + "learning_rate": 6.056144751253135e-06, + "loss": 0.6765, + "step": 15764 + }, + { + "epoch": 0.8676867191369916, + "grad_norm": 0.6885803937911987, + "learning_rate": 6.055721061143229e-06, + "loss": 0.6954, + "step": 15765 + }, + { + "epoch": 0.8677417579393473, + "grad_norm": 0.6543543338775635, + "learning_rate": 6.055297363098806e-06, + "loss": 0.6277, + "step": 15766 + }, + { + "epoch": 0.8677967967417028, + "grad_norm": 0.7671917080879211, + "learning_rate": 6.054873657123049e-06, + "loss": 0.7575, + "step": 15767 + }, + { + "epoch": 0.8678518355440585, + "grad_norm": 0.7491669654846191, + "learning_rate": 6.054449943219144e-06, + "loss": 0.727, + "step": 15768 + }, + { + "epoch": 0.8679068743464142, + "grad_norm": 0.7161419987678528, + "learning_rate": 6.0540262213902765e-06, + "loss": 0.7381, + "step": 15769 + }, + { + "epoch": 0.8679619131487699, + "grad_norm": 0.7061475515365601, + "learning_rate": 6.05360249163963e-06, + "loss": 0.7831, + "step": 15770 + }, + { + "epoch": 0.8680169519511255, + "grad_norm": 0.7481213212013245, + "learning_rate": 6.053178753970389e-06, + "loss": 0.7235, + "step": 15771 + }, + { + "epoch": 0.8680719907534812, + "grad_norm": 0.6475214958190918, + "learning_rate": 6.052755008385736e-06, + "loss": 0.6864, + "step": 15772 + }, + { + "epoch": 0.8681270295558369, + "grad_norm": 0.7365770936012268, + "learning_rate": 6.052331254888862e-06, + "loss": 0.7746, + "step": 15773 + }, + { + "epoch": 0.8681820683581926, + "grad_norm": 0.6339132785797119, + "learning_rate": 6.0519074934829456e-06, + "loss": 0.7102, + "step": 15774 + }, + { + "epoch": 0.8682371071605481, + "grad_norm": 0.691531240940094, + "learning_rate": 6.0514837241711754e-06, + "loss": 0.7896, + "step": 15775 + }, + { + "epoch": 0.8682921459629038, + "grad_norm": 0.6793948411941528, + "learning_rate": 6.051059946956734e-06, + "loss": 0.6514, + "step": 15776 + }, + { + "epoch": 0.8683471847652595, + "grad_norm": 0.6301077008247375, + "learning_rate": 6.050636161842809e-06, + "loss": 0.6831, + "step": 15777 + }, + { + "epoch": 0.8684022235676152, + "grad_norm": 0.7680420875549316, + "learning_rate": 6.0502123688325835e-06, + "loss": 0.7504, + "step": 15778 + }, + { + "epoch": 0.8684572623699708, + "grad_norm": 0.6260972619056702, + "learning_rate": 6.0497885679292415e-06, + "loss": 0.7066, + "step": 15779 + }, + { + "epoch": 0.8685123011723265, + "grad_norm": 0.663060188293457, + "learning_rate": 6.04936475913597e-06, + "loss": 0.6634, + "step": 15780 + }, + { + "epoch": 0.8685673399746822, + "grad_norm": 0.6798335313796997, + "learning_rate": 6.048940942455954e-06, + "loss": 0.7055, + "step": 15781 + }, + { + "epoch": 0.8686223787770379, + "grad_norm": 0.7080284953117371, + "learning_rate": 6.048517117892379e-06, + "loss": 0.7606, + "step": 15782 + }, + { + "epoch": 0.8686774175793934, + "grad_norm": 0.67658931016922, + "learning_rate": 6.04809328544843e-06, + "loss": 0.7656, + "step": 15783 + }, + { + "epoch": 0.8687324563817491, + "grad_norm": 0.6667472720146179, + "learning_rate": 6.047669445127291e-06, + "loss": 0.7275, + "step": 15784 + }, + { + "epoch": 0.8687874951841048, + "grad_norm": 0.782096266746521, + "learning_rate": 6.04724559693215e-06, + "loss": 0.7524, + "step": 15785 + }, + { + "epoch": 0.8688425339864605, + "grad_norm": 0.7733443379402161, + "learning_rate": 6.046821740866192e-06, + "loss": 0.7022, + "step": 15786 + }, + { + "epoch": 0.8688975727888161, + "grad_norm": 0.6487871408462524, + "learning_rate": 6.046397876932602e-06, + "loss": 0.7077, + "step": 15787 + }, + { + "epoch": 0.8689526115911718, + "grad_norm": 0.6294482350349426, + "learning_rate": 6.045974005134564e-06, + "loss": 0.6974, + "step": 15788 + }, + { + "epoch": 0.8690076503935275, + "grad_norm": 0.6573933362960815, + "learning_rate": 6.045550125475268e-06, + "loss": 0.735, + "step": 15789 + }, + { + "epoch": 0.8690626891958831, + "grad_norm": 0.6794875264167786, + "learning_rate": 6.045126237957895e-06, + "loss": 0.7511, + "step": 15790 + }, + { + "epoch": 0.8691177279982387, + "grad_norm": 0.687599778175354, + "learning_rate": 6.0447023425856345e-06, + "loss": 0.7164, + "step": 15791 + }, + { + "epoch": 0.8691727668005944, + "grad_norm": 0.6593008637428284, + "learning_rate": 6.04427843936167e-06, + "loss": 0.688, + "step": 15792 + }, + { + "epoch": 0.8692278056029501, + "grad_norm": 0.7226807475090027, + "learning_rate": 6.043854528289188e-06, + "loss": 0.7364, + "step": 15793 + }, + { + "epoch": 0.8692828444053058, + "grad_norm": 0.603318452835083, + "learning_rate": 6.043430609371375e-06, + "loss": 0.6933, + "step": 15794 + }, + { + "epoch": 0.8693378832076614, + "grad_norm": 0.8227141499519348, + "learning_rate": 6.043006682611416e-06, + "loss": 0.7039, + "step": 15795 + }, + { + "epoch": 0.869392922010017, + "grad_norm": 0.729284405708313, + "learning_rate": 6.042582748012499e-06, + "loss": 0.7288, + "step": 15796 + }, + { + "epoch": 0.8694479608123727, + "grad_norm": 0.8269371390342712, + "learning_rate": 6.042158805577809e-06, + "loss": 0.7419, + "step": 15797 + }, + { + "epoch": 0.8695029996147284, + "grad_norm": 0.6699450016021729, + "learning_rate": 6.0417348553105325e-06, + "loss": 0.7893, + "step": 15798 + }, + { + "epoch": 0.869558038417084, + "grad_norm": 0.7747042775154114, + "learning_rate": 6.041310897213856e-06, + "loss": 0.791, + "step": 15799 + }, + { + "epoch": 0.8696130772194397, + "grad_norm": 0.7503781318664551, + "learning_rate": 6.0408869312909645e-06, + "loss": 0.7204, + "step": 15800 + }, + { + "epoch": 0.8696681160217954, + "grad_norm": 0.6733731627464294, + "learning_rate": 6.0404629575450464e-06, + "loss": 0.815, + "step": 15801 + }, + { + "epoch": 0.8697231548241511, + "grad_norm": 0.6925041079521179, + "learning_rate": 6.040038975979288e-06, + "loss": 0.8096, + "step": 15802 + }, + { + "epoch": 0.8697781936265067, + "grad_norm": 0.7510724067687988, + "learning_rate": 6.039614986596873e-06, + "loss": 0.7957, + "step": 15803 + }, + { + "epoch": 0.8698332324288623, + "grad_norm": 0.9631650447845459, + "learning_rate": 6.039190989400991e-06, + "loss": 0.7574, + "step": 15804 + }, + { + "epoch": 0.869888271231218, + "grad_norm": 0.7080852389335632, + "learning_rate": 6.0387669843948285e-06, + "loss": 0.7037, + "step": 15805 + }, + { + "epoch": 0.8699433100335737, + "grad_norm": 0.723419725894928, + "learning_rate": 6.03834297158157e-06, + "loss": 0.7424, + "step": 15806 + }, + { + "epoch": 0.8699983488359293, + "grad_norm": 0.6093000173568726, + "learning_rate": 6.037918950964404e-06, + "loss": 0.6754, + "step": 15807 + }, + { + "epoch": 0.870053387638285, + "grad_norm": 0.7614741921424866, + "learning_rate": 6.037494922546518e-06, + "loss": 0.6856, + "step": 15808 + }, + { + "epoch": 0.8701084264406407, + "grad_norm": 0.6535844802856445, + "learning_rate": 6.0370708863310965e-06, + "loss": 0.8201, + "step": 15809 + }, + { + "epoch": 0.8701634652429963, + "grad_norm": 0.724897027015686, + "learning_rate": 6.036646842321329e-06, + "loss": 0.7399, + "step": 15810 + }, + { + "epoch": 0.870218504045352, + "grad_norm": 0.7602331638336182, + "learning_rate": 6.036222790520401e-06, + "loss": 0.8233, + "step": 15811 + }, + { + "epoch": 0.8702735428477076, + "grad_norm": 0.7890536189079285, + "learning_rate": 6.035798730931498e-06, + "loss": 0.8473, + "step": 15812 + }, + { + "epoch": 0.8703285816500633, + "grad_norm": 0.7241165637969971, + "learning_rate": 6.035374663557813e-06, + "loss": 0.7298, + "step": 15813 + }, + { + "epoch": 0.8703836204524189, + "grad_norm": 0.6661847829818726, + "learning_rate": 6.034950588402526e-06, + "loss": 0.7461, + "step": 15814 + }, + { + "epoch": 0.8704386592547746, + "grad_norm": 0.6431320309638977, + "learning_rate": 6.034526505468829e-06, + "loss": 0.7436, + "step": 15815 + }, + { + "epoch": 0.8704936980571303, + "grad_norm": 1.122704267501831, + "learning_rate": 6.0341024147599055e-06, + "loss": 0.7378, + "step": 15816 + }, + { + "epoch": 0.870548736859486, + "grad_norm": 0.6391544938087463, + "learning_rate": 6.033678316278947e-06, + "loss": 0.7517, + "step": 15817 + }, + { + "epoch": 0.8706037756618416, + "grad_norm": 0.6522098183631897, + "learning_rate": 6.033254210029139e-06, + "loss": 0.7188, + "step": 15818 + }, + { + "epoch": 0.8706588144641972, + "grad_norm": 0.7638733386993408, + "learning_rate": 6.0328300960136686e-06, + "loss": 0.8032, + "step": 15819 + }, + { + "epoch": 0.8707138532665529, + "grad_norm": 0.6374132633209229, + "learning_rate": 6.032405974235722e-06, + "loss": 0.7292, + "step": 15820 + }, + { + "epoch": 0.8707688920689086, + "grad_norm": 0.7061800360679626, + "learning_rate": 6.03198184469849e-06, + "loss": 0.7304, + "step": 15821 + }, + { + "epoch": 0.8708239308712642, + "grad_norm": 0.646089494228363, + "learning_rate": 6.031557707405159e-06, + "loss": 0.6762, + "step": 15822 + }, + { + "epoch": 0.8708789696736199, + "grad_norm": 0.8142202496528625, + "learning_rate": 6.031133562358916e-06, + "loss": 0.7789, + "step": 15823 + }, + { + "epoch": 0.8709340084759756, + "grad_norm": 0.6444084644317627, + "learning_rate": 6.030709409562949e-06, + "loss": 0.7383, + "step": 15824 + }, + { + "epoch": 0.8709890472783313, + "grad_norm": 0.8917344808578491, + "learning_rate": 6.030285249020448e-06, + "loss": 0.7527, + "step": 15825 + }, + { + "epoch": 0.8710440860806868, + "grad_norm": 0.6395692825317383, + "learning_rate": 6.029861080734597e-06, + "loss": 0.6923, + "step": 15826 + }, + { + "epoch": 0.8710991248830425, + "grad_norm": 0.6475933790206909, + "learning_rate": 6.029436904708586e-06, + "loss": 0.7495, + "step": 15827 + }, + { + "epoch": 0.8711541636853982, + "grad_norm": 0.7310789823532104, + "learning_rate": 6.029012720945602e-06, + "loss": 0.7541, + "step": 15828 + }, + { + "epoch": 0.8712092024877539, + "grad_norm": 0.8475071787834167, + "learning_rate": 6.028588529448835e-06, + "loss": 0.7397, + "step": 15829 + }, + { + "epoch": 0.8712642412901095, + "grad_norm": 0.6214048266410828, + "learning_rate": 6.028164330221471e-06, + "loss": 0.7365, + "step": 15830 + }, + { + "epoch": 0.8713192800924652, + "grad_norm": 0.6558026671409607, + "learning_rate": 6.0277401232667e-06, + "loss": 0.79, + "step": 15831 + }, + { + "epoch": 0.8713743188948209, + "grad_norm": 0.6652923226356506, + "learning_rate": 6.0273159085877074e-06, + "loss": 0.7539, + "step": 15832 + }, + { + "epoch": 0.8714293576971766, + "grad_norm": 0.7908313870429993, + "learning_rate": 6.026891686187686e-06, + "loss": 0.6776, + "step": 15833 + }, + { + "epoch": 0.8714843964995321, + "grad_norm": 0.6947218775749207, + "learning_rate": 6.02646745606982e-06, + "loss": 0.7375, + "step": 15834 + }, + { + "epoch": 0.8715394353018878, + "grad_norm": 0.7137001156806946, + "learning_rate": 6.0260432182373e-06, + "loss": 0.7213, + "step": 15835 + }, + { + "epoch": 0.8715944741042435, + "grad_norm": 0.6175974011421204, + "learning_rate": 6.025618972693314e-06, + "loss": 0.6468, + "step": 15836 + }, + { + "epoch": 0.8716495129065992, + "grad_norm": 0.6631742119789124, + "learning_rate": 6.0251947194410496e-06, + "loss": 0.7116, + "step": 15837 + }, + { + "epoch": 0.8717045517089548, + "grad_norm": 0.7667781710624695, + "learning_rate": 6.024770458483698e-06, + "loss": 0.8836, + "step": 15838 + }, + { + "epoch": 0.8717595905113105, + "grad_norm": 0.664364218711853, + "learning_rate": 6.024346189824444e-06, + "loss": 0.7719, + "step": 15839 + }, + { + "epoch": 0.8718146293136662, + "grad_norm": 0.7073011994361877, + "learning_rate": 6.023921913466477e-06, + "loss": 0.7117, + "step": 15840 + }, + { + "epoch": 0.8718696681160218, + "grad_norm": 0.7126373052597046, + "learning_rate": 6.02349762941299e-06, + "loss": 0.7693, + "step": 15841 + }, + { + "epoch": 0.8719247069183774, + "grad_norm": 0.7864155173301697, + "learning_rate": 6.0230733376671665e-06, + "loss": 0.8195, + "step": 15842 + }, + { + "epoch": 0.8719797457207331, + "grad_norm": 0.7260663509368896, + "learning_rate": 6.0226490382322e-06, + "loss": 0.7739, + "step": 15843 + }, + { + "epoch": 0.8720347845230888, + "grad_norm": 0.7656667232513428, + "learning_rate": 6.0222247311112745e-06, + "loss": 0.6552, + "step": 15844 + }, + { + "epoch": 0.8720898233254445, + "grad_norm": 0.7063844799995422, + "learning_rate": 6.0218004163075826e-06, + "loss": 0.7506, + "step": 15845 + }, + { + "epoch": 0.8721448621278001, + "grad_norm": 0.6452813744544983, + "learning_rate": 6.021376093824313e-06, + "loss": 0.6854, + "step": 15846 + }, + { + "epoch": 0.8721999009301558, + "grad_norm": 0.6507169008255005, + "learning_rate": 6.020951763664653e-06, + "loss": 0.7289, + "step": 15847 + }, + { + "epoch": 0.8722549397325114, + "grad_norm": 0.6529967784881592, + "learning_rate": 6.020527425831793e-06, + "loss": 0.7196, + "step": 15848 + }, + { + "epoch": 0.8723099785348671, + "grad_norm": 0.8070194125175476, + "learning_rate": 6.020103080328924e-06, + "loss": 0.7848, + "step": 15849 + }, + { + "epoch": 0.8723650173372227, + "grad_norm": 0.7091495394706726, + "learning_rate": 6.019678727159232e-06, + "loss": 0.7948, + "step": 15850 + }, + { + "epoch": 0.8724200561395784, + "grad_norm": 0.8268260955810547, + "learning_rate": 6.019254366325907e-06, + "loss": 0.7446, + "step": 15851 + }, + { + "epoch": 0.8724750949419341, + "grad_norm": 0.7777679562568665, + "learning_rate": 6.018829997832139e-06, + "loss": 0.8307, + "step": 15852 + }, + { + "epoch": 0.8725301337442897, + "grad_norm": 0.6404305696487427, + "learning_rate": 6.018405621681117e-06, + "loss": 0.6952, + "step": 15853 + }, + { + "epoch": 0.8725851725466454, + "grad_norm": 0.8895840644836426, + "learning_rate": 6.017981237876033e-06, + "loss": 0.7554, + "step": 15854 + }, + { + "epoch": 0.872640211349001, + "grad_norm": 0.6717105507850647, + "learning_rate": 6.017556846420073e-06, + "loss": 0.674, + "step": 15855 + }, + { + "epoch": 0.8726952501513567, + "grad_norm": 0.6096089482307434, + "learning_rate": 6.017132447316427e-06, + "loss": 0.7508, + "step": 15856 + }, + { + "epoch": 0.8727502889537123, + "grad_norm": 0.7513056397438049, + "learning_rate": 6.016708040568288e-06, + "loss": 0.717, + "step": 15857 + }, + { + "epoch": 0.872805327756068, + "grad_norm": 0.6977408528327942, + "learning_rate": 6.0162836261788425e-06, + "loss": 0.7002, + "step": 15858 + }, + { + "epoch": 0.8728603665584237, + "grad_norm": 0.6753636598587036, + "learning_rate": 6.015859204151282e-06, + "loss": 0.7414, + "step": 15859 + }, + { + "epoch": 0.8729154053607794, + "grad_norm": 0.7120729684829712, + "learning_rate": 6.015434774488795e-06, + "loss": 0.6774, + "step": 15860 + }, + { + "epoch": 0.872970444163135, + "grad_norm": 0.7560111880302429, + "learning_rate": 6.015010337194573e-06, + "loss": 0.6887, + "step": 15861 + }, + { + "epoch": 0.8730254829654907, + "grad_norm": 0.652497410774231, + "learning_rate": 6.0145858922718044e-06, + "loss": 0.76, + "step": 15862 + }, + { + "epoch": 0.8730805217678463, + "grad_norm": 0.7120025753974915, + "learning_rate": 6.01416143972368e-06, + "loss": 0.7008, + "step": 15863 + }, + { + "epoch": 0.873135560570202, + "grad_norm": 0.7517643570899963, + "learning_rate": 6.013736979553389e-06, + "loss": 0.8944, + "step": 15864 + }, + { + "epoch": 0.8731905993725576, + "grad_norm": 0.6225923299789429, + "learning_rate": 6.013312511764122e-06, + "loss": 0.6217, + "step": 15865 + }, + { + "epoch": 0.8732456381749133, + "grad_norm": 0.8815253376960754, + "learning_rate": 6.012888036359071e-06, + "loss": 0.8121, + "step": 15866 + }, + { + "epoch": 0.873300676977269, + "grad_norm": 0.676211953163147, + "learning_rate": 6.012463553341424e-06, + "loss": 0.7233, + "step": 15867 + }, + { + "epoch": 0.8733557157796247, + "grad_norm": 0.6566252708435059, + "learning_rate": 6.012039062714371e-06, + "loss": 0.8099, + "step": 15868 + }, + { + "epoch": 0.8734107545819803, + "grad_norm": 0.7964142560958862, + "learning_rate": 6.011614564481103e-06, + "loss": 0.758, + "step": 15869 + }, + { + "epoch": 0.8734657933843359, + "grad_norm": 0.6923096776008606, + "learning_rate": 6.011190058644811e-06, + "loss": 0.6997, + "step": 15870 + }, + { + "epoch": 0.8735208321866916, + "grad_norm": 0.6507520079612732, + "learning_rate": 6.010765545208687e-06, + "loss": 0.7046, + "step": 15871 + }, + { + "epoch": 0.8735758709890473, + "grad_norm": 0.8206372857093811, + "learning_rate": 6.010341024175918e-06, + "loss": 0.8568, + "step": 15872 + }, + { + "epoch": 0.8736309097914029, + "grad_norm": 0.6379685997962952, + "learning_rate": 6.0099164955496965e-06, + "loss": 0.7537, + "step": 15873 + }, + { + "epoch": 0.8736859485937586, + "grad_norm": 0.7258248925209045, + "learning_rate": 6.009491959333214e-06, + "loss": 0.6946, + "step": 15874 + }, + { + "epoch": 0.8737409873961143, + "grad_norm": 0.6882272362709045, + "learning_rate": 6.0090674155296606e-06, + "loss": 0.6508, + "step": 15875 + }, + { + "epoch": 0.87379602619847, + "grad_norm": 0.646864652633667, + "learning_rate": 6.0086428641422245e-06, + "loss": 0.7061, + "step": 15876 + }, + { + "epoch": 0.8738510650008255, + "grad_norm": 0.772055983543396, + "learning_rate": 6.008218305174099e-06, + "loss": 0.7435, + "step": 15877 + }, + { + "epoch": 0.8739061038031812, + "grad_norm": 0.660976767539978, + "learning_rate": 6.007793738628476e-06, + "loss": 0.6834, + "step": 15878 + }, + { + "epoch": 0.8739611426055369, + "grad_norm": 0.6279324293136597, + "learning_rate": 6.007369164508544e-06, + "loss": 0.6903, + "step": 15879 + }, + { + "epoch": 0.8740161814078926, + "grad_norm": 0.7111205458641052, + "learning_rate": 6.006944582817495e-06, + "loss": 0.7338, + "step": 15880 + }, + { + "epoch": 0.8740712202102482, + "grad_norm": 0.6149270534515381, + "learning_rate": 6.006519993558519e-06, + "loss": 0.6639, + "step": 15881 + }, + { + "epoch": 0.8741262590126039, + "grad_norm": 0.7477333545684814, + "learning_rate": 6.00609539673481e-06, + "loss": 0.8032, + "step": 15882 + }, + { + "epoch": 0.8741812978149596, + "grad_norm": 0.8613518476486206, + "learning_rate": 6.005670792349557e-06, + "loss": 0.7508, + "step": 15883 + }, + { + "epoch": 0.8742363366173153, + "grad_norm": 0.6627817153930664, + "learning_rate": 6.0052461804059515e-06, + "loss": 0.7898, + "step": 15884 + }, + { + "epoch": 0.8742913754196708, + "grad_norm": 0.6863798499107361, + "learning_rate": 6.004821560907185e-06, + "loss": 0.7674, + "step": 15885 + }, + { + "epoch": 0.8743464142220265, + "grad_norm": 0.6809577941894531, + "learning_rate": 6.004396933856449e-06, + "loss": 0.8094, + "step": 15886 + }, + { + "epoch": 0.8744014530243822, + "grad_norm": 0.6340956687927246, + "learning_rate": 6.003972299256934e-06, + "loss": 0.6508, + "step": 15887 + }, + { + "epoch": 0.8744564918267379, + "grad_norm": 0.6261658072471619, + "learning_rate": 6.003547657111831e-06, + "loss": 0.7375, + "step": 15888 + }, + { + "epoch": 0.8745115306290935, + "grad_norm": 0.7042009830474854, + "learning_rate": 6.003123007424332e-06, + "loss": 0.6817, + "step": 15889 + }, + { + "epoch": 0.8745665694314492, + "grad_norm": 0.719497561454773, + "learning_rate": 6.002698350197631e-06, + "loss": 0.7689, + "step": 15890 + }, + { + "epoch": 0.8746216082338049, + "grad_norm": 0.7034541964530945, + "learning_rate": 6.002273685434916e-06, + "loss": 0.7956, + "step": 15891 + }, + { + "epoch": 0.8746766470361605, + "grad_norm": 0.6404724717140198, + "learning_rate": 6.0018490131393815e-06, + "loss": 0.672, + "step": 15892 + }, + { + "epoch": 0.8747316858385161, + "grad_norm": 0.6812208294868469, + "learning_rate": 6.001424333314216e-06, + "loss": 0.6911, + "step": 15893 + }, + { + "epoch": 0.8747867246408718, + "grad_norm": 0.5907782912254333, + "learning_rate": 6.000999645962615e-06, + "loss": 0.6476, + "step": 15894 + }, + { + "epoch": 0.8748417634432275, + "grad_norm": 1.2116328477859497, + "learning_rate": 6.000574951087769e-06, + "loss": 0.733, + "step": 15895 + }, + { + "epoch": 0.8748968022455831, + "grad_norm": 0.6581991314888, + "learning_rate": 6.000150248692868e-06, + "loss": 0.7441, + "step": 15896 + }, + { + "epoch": 0.8749518410479388, + "grad_norm": 0.7342226505279541, + "learning_rate": 5.999725538781107e-06, + "loss": 0.6592, + "step": 15897 + }, + { + "epoch": 0.8750068798502945, + "grad_norm": 0.6864113211631775, + "learning_rate": 5.9993008213556766e-06, + "loss": 0.7652, + "step": 15898 + }, + { + "epoch": 0.8750619186526502, + "grad_norm": 0.6845645904541016, + "learning_rate": 5.9988760964197675e-06, + "loss": 0.7471, + "step": 15899 + }, + { + "epoch": 0.8751169574550057, + "grad_norm": 0.663165271282196, + "learning_rate": 5.998451363976574e-06, + "loss": 0.7667, + "step": 15900 + }, + { + "epoch": 0.8751719962573614, + "grad_norm": 0.6032472252845764, + "learning_rate": 5.998026624029286e-06, + "loss": 0.6309, + "step": 15901 + }, + { + "epoch": 0.8752270350597171, + "grad_norm": 0.6466236710548401, + "learning_rate": 5.997601876581098e-06, + "loss": 0.7418, + "step": 15902 + }, + { + "epoch": 0.8752820738620728, + "grad_norm": 0.6456779837608337, + "learning_rate": 5.997177121635201e-06, + "loss": 0.6598, + "step": 15903 + }, + { + "epoch": 0.8753371126644284, + "grad_norm": 0.7854783535003662, + "learning_rate": 5.996752359194788e-06, + "loss": 0.7545, + "step": 15904 + }, + { + "epoch": 0.8753921514667841, + "grad_norm": 0.7146682143211365, + "learning_rate": 5.99632758926305e-06, + "loss": 0.6856, + "step": 15905 + }, + { + "epoch": 0.8754471902691398, + "grad_norm": 0.7379660606384277, + "learning_rate": 5.995902811843181e-06, + "loss": 0.8601, + "step": 15906 + }, + { + "epoch": 0.8755022290714954, + "grad_norm": 0.6879674196243286, + "learning_rate": 5.995478026938375e-06, + "loss": 0.8147, + "step": 15907 + }, + { + "epoch": 0.875557267873851, + "grad_norm": 0.6188415884971619, + "learning_rate": 5.995053234551821e-06, + "loss": 0.6646, + "step": 15908 + }, + { + "epoch": 0.8756123066762067, + "grad_norm": 1.0765966176986694, + "learning_rate": 5.994628434686713e-06, + "loss": 0.6624, + "step": 15909 + }, + { + "epoch": 0.8756673454785624, + "grad_norm": 0.6391757726669312, + "learning_rate": 5.994203627346245e-06, + "loss": 0.7087, + "step": 15910 + }, + { + "epoch": 0.8757223842809181, + "grad_norm": 0.7664490938186646, + "learning_rate": 5.993778812533609e-06, + "loss": 0.6969, + "step": 15911 + }, + { + "epoch": 0.8757774230832737, + "grad_norm": 0.6901882290840149, + "learning_rate": 5.993353990251995e-06, + "loss": 0.7879, + "step": 15912 + }, + { + "epoch": 0.8758324618856294, + "grad_norm": 0.6871299147605896, + "learning_rate": 5.992929160504599e-06, + "loss": 0.6981, + "step": 15913 + }, + { + "epoch": 0.875887500687985, + "grad_norm": 0.754436731338501, + "learning_rate": 5.9925043232946145e-06, + "loss": 0.7549, + "step": 15914 + }, + { + "epoch": 0.8759425394903407, + "grad_norm": 0.7250627875328064, + "learning_rate": 5.992079478625232e-06, + "loss": 0.8134, + "step": 15915 + }, + { + "epoch": 0.8759975782926963, + "grad_norm": 0.7294771671295166, + "learning_rate": 5.991654626499647e-06, + "loss": 0.8551, + "step": 15916 + }, + { + "epoch": 0.876052617095052, + "grad_norm": 0.6388968229293823, + "learning_rate": 5.991229766921049e-06, + "loss": 0.7562, + "step": 15917 + }, + { + "epoch": 0.8761076558974077, + "grad_norm": 0.7206701636314392, + "learning_rate": 5.990804899892636e-06, + "loss": 0.6818, + "step": 15918 + }, + { + "epoch": 0.8761626946997634, + "grad_norm": 0.6607910394668579, + "learning_rate": 5.990380025417597e-06, + "loss": 0.6956, + "step": 15919 + }, + { + "epoch": 0.876217733502119, + "grad_norm": 0.6806843280792236, + "learning_rate": 5.9899551434991276e-06, + "loss": 0.7261, + "step": 15920 + }, + { + "epoch": 0.8762727723044746, + "grad_norm": 0.6994869709014893, + "learning_rate": 5.989530254140421e-06, + "loss": 0.7709, + "step": 15921 + }, + { + "epoch": 0.8763278111068303, + "grad_norm": 0.6707572937011719, + "learning_rate": 5.9891053573446685e-06, + "loss": 0.6962, + "step": 15922 + }, + { + "epoch": 0.876382849909186, + "grad_norm": 0.8244118690490723, + "learning_rate": 5.988680453115065e-06, + "loss": 0.7173, + "step": 15923 + }, + { + "epoch": 0.8764378887115416, + "grad_norm": 0.7745859026908875, + "learning_rate": 5.988255541454806e-06, + "loss": 0.7176, + "step": 15924 + }, + { + "epoch": 0.8764929275138973, + "grad_norm": 0.6572975516319275, + "learning_rate": 5.98783062236708e-06, + "loss": 0.6994, + "step": 15925 + }, + { + "epoch": 0.876547966316253, + "grad_norm": 0.7363128066062927, + "learning_rate": 5.9874056958550845e-06, + "loss": 0.7392, + "step": 15926 + }, + { + "epoch": 0.8766030051186087, + "grad_norm": 0.6638974547386169, + "learning_rate": 5.986980761922012e-06, + "loss": 0.693, + "step": 15927 + }, + { + "epoch": 0.8766580439209642, + "grad_norm": 0.6946521997451782, + "learning_rate": 5.9865558205710576e-06, + "loss": 0.7727, + "step": 15928 + }, + { + "epoch": 0.8767130827233199, + "grad_norm": 0.7787197232246399, + "learning_rate": 5.9861308718054115e-06, + "loss": 0.7203, + "step": 15929 + }, + { + "epoch": 0.8767681215256756, + "grad_norm": 0.6275830864906311, + "learning_rate": 5.985705915628271e-06, + "loss": 0.7613, + "step": 15930 + }, + { + "epoch": 0.8768231603280313, + "grad_norm": 0.5722871422767639, + "learning_rate": 5.985280952042829e-06, + "loss": 0.6635, + "step": 15931 + }, + { + "epoch": 0.8768781991303869, + "grad_norm": 0.6991726160049438, + "learning_rate": 5.984855981052278e-06, + "loss": 0.7965, + "step": 15932 + }, + { + "epoch": 0.8769332379327426, + "grad_norm": 0.6550602912902832, + "learning_rate": 5.984431002659815e-06, + "loss": 0.7591, + "step": 15933 + }, + { + "epoch": 0.8769882767350983, + "grad_norm": 0.703481137752533, + "learning_rate": 5.984006016868631e-06, + "loss": 0.7623, + "step": 15934 + }, + { + "epoch": 0.877043315537454, + "grad_norm": 0.5606309175491333, + "learning_rate": 5.983581023681922e-06, + "loss": 0.5957, + "step": 15935 + }, + { + "epoch": 0.8770983543398095, + "grad_norm": 0.6857683062553406, + "learning_rate": 5.98315602310288e-06, + "loss": 0.7629, + "step": 15936 + }, + { + "epoch": 0.8771533931421652, + "grad_norm": 0.6976568698883057, + "learning_rate": 5.982731015134699e-06, + "loss": 0.7259, + "step": 15937 + }, + { + "epoch": 0.8772084319445209, + "grad_norm": 0.7161057591438293, + "learning_rate": 5.982305999780578e-06, + "loss": 0.8572, + "step": 15938 + }, + { + "epoch": 0.8772634707468765, + "grad_norm": 0.7051637768745422, + "learning_rate": 5.981880977043706e-06, + "loss": 0.8251, + "step": 15939 + }, + { + "epoch": 0.8773185095492322, + "grad_norm": 0.6403392553329468, + "learning_rate": 5.98145594692728e-06, + "loss": 0.7021, + "step": 15940 + }, + { + "epoch": 0.8773735483515879, + "grad_norm": 0.8370338678359985, + "learning_rate": 5.981030909434493e-06, + "loss": 0.6376, + "step": 15941 + }, + { + "epoch": 0.8774285871539436, + "grad_norm": 0.7969813346862793, + "learning_rate": 5.980605864568541e-06, + "loss": 0.7935, + "step": 15942 + }, + { + "epoch": 0.8774836259562991, + "grad_norm": 0.7235258221626282, + "learning_rate": 5.980180812332619e-06, + "loss": 0.8024, + "step": 15943 + }, + { + "epoch": 0.8775386647586548, + "grad_norm": 0.6627998352050781, + "learning_rate": 5.97975575272992e-06, + "loss": 0.8184, + "step": 15944 + }, + { + "epoch": 0.8775937035610105, + "grad_norm": 1.123727798461914, + "learning_rate": 5.979330685763638e-06, + "loss": 0.9015, + "step": 15945 + }, + { + "epoch": 0.8776487423633662, + "grad_norm": 0.8116182088851929, + "learning_rate": 5.97890561143697e-06, + "loss": 0.7327, + "step": 15946 + }, + { + "epoch": 0.8777037811657218, + "grad_norm": 0.6537826657295227, + "learning_rate": 5.978480529753108e-06, + "loss": 0.7215, + "step": 15947 + }, + { + "epoch": 0.8777588199680775, + "grad_norm": 0.746971845626831, + "learning_rate": 5.978055440715249e-06, + "loss": 0.8549, + "step": 15948 + }, + { + "epoch": 0.8778138587704332, + "grad_norm": 0.7417864799499512, + "learning_rate": 5.9776303443265855e-06, + "loss": 0.7806, + "step": 15949 + }, + { + "epoch": 0.8778688975727889, + "grad_norm": 0.6726604700088501, + "learning_rate": 5.977205240590317e-06, + "loss": 0.7745, + "step": 15950 + }, + { + "epoch": 0.8779239363751444, + "grad_norm": 0.9483097791671753, + "learning_rate": 5.976780129509634e-06, + "loss": 0.81, + "step": 15951 + }, + { + "epoch": 0.8779789751775001, + "grad_norm": 0.7212807536125183, + "learning_rate": 5.976355011087734e-06, + "loss": 0.6534, + "step": 15952 + }, + { + "epoch": 0.8780340139798558, + "grad_norm": 0.725513756275177, + "learning_rate": 5.975929885327808e-06, + "loss": 0.6902, + "step": 15953 + }, + { + "epoch": 0.8780890527822115, + "grad_norm": 0.6132134795188904, + "learning_rate": 5.975504752233057e-06, + "loss": 0.7397, + "step": 15954 + }, + { + "epoch": 0.8781440915845671, + "grad_norm": 0.7595381736755371, + "learning_rate": 5.975079611806672e-06, + "loss": 0.5893, + "step": 15955 + }, + { + "epoch": 0.8781991303869228, + "grad_norm": 0.7069967985153198, + "learning_rate": 5.974654464051851e-06, + "loss": 0.7702, + "step": 15956 + }, + { + "epoch": 0.8782541691892785, + "grad_norm": 0.650039553642273, + "learning_rate": 5.974229308971788e-06, + "loss": 0.7708, + "step": 15957 + }, + { + "epoch": 0.8783092079916341, + "grad_norm": 0.6799747347831726, + "learning_rate": 5.973804146569677e-06, + "loss": 0.7094, + "step": 15958 + }, + { + "epoch": 0.8783642467939897, + "grad_norm": 0.653275728225708, + "learning_rate": 5.973378976848716e-06, + "loss": 0.623, + "step": 15959 + }, + { + "epoch": 0.8784192855963454, + "grad_norm": 0.6734615564346313, + "learning_rate": 5.972953799812098e-06, + "loss": 0.7071, + "step": 15960 + }, + { + "epoch": 0.8784743243987011, + "grad_norm": 0.6319865584373474, + "learning_rate": 5.9725286154630205e-06, + "loss": 0.7239, + "step": 15961 + }, + { + "epoch": 0.8785293632010568, + "grad_norm": 0.6933672428131104, + "learning_rate": 5.972103423804677e-06, + "loss": 0.7138, + "step": 15962 + }, + { + "epoch": 0.8785844020034124, + "grad_norm": 0.7323144674301147, + "learning_rate": 5.971678224840266e-06, + "loss": 0.7534, + "step": 15963 + }, + { + "epoch": 0.8786394408057681, + "grad_norm": 0.6736310720443726, + "learning_rate": 5.971253018572981e-06, + "loss": 0.7644, + "step": 15964 + }, + { + "epoch": 0.8786944796081237, + "grad_norm": 0.6524562835693359, + "learning_rate": 5.970827805006016e-06, + "loss": 0.7727, + "step": 15965 + }, + { + "epoch": 0.8787495184104794, + "grad_norm": 0.7736978530883789, + "learning_rate": 5.970402584142573e-06, + "loss": 0.7641, + "step": 15966 + }, + { + "epoch": 0.878804557212835, + "grad_norm": 0.7099476456642151, + "learning_rate": 5.969977355985842e-06, + "loss": 0.7064, + "step": 15967 + }, + { + "epoch": 0.8788595960151907, + "grad_norm": 0.6340349912643433, + "learning_rate": 5.969552120539021e-06, + "loss": 0.7135, + "step": 15968 + }, + { + "epoch": 0.8789146348175464, + "grad_norm": 0.649853527545929, + "learning_rate": 5.969126877805306e-06, + "loss": 0.7068, + "step": 15969 + }, + { + "epoch": 0.8789696736199021, + "grad_norm": 0.589920699596405, + "learning_rate": 5.9687016277878925e-06, + "loss": 0.6891, + "step": 15970 + }, + { + "epoch": 0.8790247124222577, + "grad_norm": 0.7485616207122803, + "learning_rate": 5.968276370489977e-06, + "loss": 0.7305, + "step": 15971 + }, + { + "epoch": 0.8790797512246133, + "grad_norm": 0.6765890121459961, + "learning_rate": 5.967851105914756e-06, + "loss": 0.6862, + "step": 15972 + }, + { + "epoch": 0.879134790026969, + "grad_norm": 0.7127717733383179, + "learning_rate": 5.967425834065423e-06, + "loss": 0.6255, + "step": 15973 + }, + { + "epoch": 0.8791898288293247, + "grad_norm": 1.1564292907714844, + "learning_rate": 5.967000554945179e-06, + "loss": 0.7351, + "step": 15974 + }, + { + "epoch": 0.8792448676316803, + "grad_norm": 0.7343658804893494, + "learning_rate": 5.966575268557217e-06, + "loss": 0.7605, + "step": 15975 + }, + { + "epoch": 0.879299906434036, + "grad_norm": 0.695280134677887, + "learning_rate": 5.966149974904733e-06, + "loss": 0.7425, + "step": 15976 + }, + { + "epoch": 0.8793549452363917, + "grad_norm": 0.7075724005699158, + "learning_rate": 5.965724673990925e-06, + "loss": 0.6501, + "step": 15977 + }, + { + "epoch": 0.8794099840387474, + "grad_norm": 0.6764316558837891, + "learning_rate": 5.96529936581899e-06, + "loss": 0.6871, + "step": 15978 + }, + { + "epoch": 0.879465022841103, + "grad_norm": 0.6782918572425842, + "learning_rate": 5.964874050392122e-06, + "loss": 0.7298, + "step": 15979 + }, + { + "epoch": 0.8795200616434586, + "grad_norm": 0.7106739282608032, + "learning_rate": 5.964448727713519e-06, + "loss": 0.755, + "step": 15980 + }, + { + "epoch": 0.8795751004458143, + "grad_norm": 0.7591151595115662, + "learning_rate": 5.964023397786378e-06, + "loss": 0.7603, + "step": 15981 + }, + { + "epoch": 0.8796301392481699, + "grad_norm": 0.6050585508346558, + "learning_rate": 5.963598060613896e-06, + "loss": 0.6895, + "step": 15982 + }, + { + "epoch": 0.8796851780505256, + "grad_norm": 0.7037108540534973, + "learning_rate": 5.963172716199267e-06, + "loss": 0.7475, + "step": 15983 + }, + { + "epoch": 0.8797402168528813, + "grad_norm": 0.6476989984512329, + "learning_rate": 5.962747364545692e-06, + "loss": 0.7925, + "step": 15984 + }, + { + "epoch": 0.879795255655237, + "grad_norm": 0.6409072875976562, + "learning_rate": 5.962322005656362e-06, + "loss": 0.8134, + "step": 15985 + }, + { + "epoch": 0.8798502944575926, + "grad_norm": 0.8730958700180054, + "learning_rate": 5.96189663953448e-06, + "loss": 0.8283, + "step": 15986 + }, + { + "epoch": 0.8799053332599482, + "grad_norm": 0.73405522108078, + "learning_rate": 5.96147126618324e-06, + "loss": 0.7351, + "step": 15987 + }, + { + "epoch": 0.8799603720623039, + "grad_norm": 0.6587926745414734, + "learning_rate": 5.961045885605839e-06, + "loss": 0.7713, + "step": 15988 + }, + { + "epoch": 0.8800154108646596, + "grad_norm": 0.8479684591293335, + "learning_rate": 5.9606204978054736e-06, + "loss": 0.7537, + "step": 15989 + }, + { + "epoch": 0.8800704496670152, + "grad_norm": 0.731315553188324, + "learning_rate": 5.960195102785343e-06, + "loss": 0.7363, + "step": 15990 + }, + { + "epoch": 0.8801254884693709, + "grad_norm": 0.7163324952125549, + "learning_rate": 5.9597697005486434e-06, + "loss": 0.718, + "step": 15991 + }, + { + "epoch": 0.8801805272717266, + "grad_norm": 0.6090518832206726, + "learning_rate": 5.9593442910985714e-06, + "loss": 0.6002, + "step": 15992 + }, + { + "epoch": 0.8802355660740823, + "grad_norm": 0.633756160736084, + "learning_rate": 5.958918874438324e-06, + "loss": 0.5981, + "step": 15993 + }, + { + "epoch": 0.8802906048764378, + "grad_norm": 0.728961169719696, + "learning_rate": 5.958493450571099e-06, + "loss": 0.7964, + "step": 15994 + }, + { + "epoch": 0.8803456436787935, + "grad_norm": 0.8194692134857178, + "learning_rate": 5.958068019500094e-06, + "loss": 0.748, + "step": 15995 + }, + { + "epoch": 0.8804006824811492, + "grad_norm": 0.9324885010719299, + "learning_rate": 5.957642581228506e-06, + "loss": 0.8263, + "step": 15996 + }, + { + "epoch": 0.8804557212835049, + "grad_norm": 0.7585923075675964, + "learning_rate": 5.957217135759532e-06, + "loss": 0.8208, + "step": 15997 + }, + { + "epoch": 0.8805107600858605, + "grad_norm": 0.726158618927002, + "learning_rate": 5.956791683096371e-06, + "loss": 0.7664, + "step": 15998 + }, + { + "epoch": 0.8805657988882162, + "grad_norm": 0.7648130059242249, + "learning_rate": 5.95636622324222e-06, + "loss": 0.8102, + "step": 15999 + }, + { + "epoch": 0.8806208376905719, + "grad_norm": 0.7053543925285339, + "learning_rate": 5.955940756200277e-06, + "loss": 0.8446, + "step": 16000 + }, + { + "epoch": 0.8806758764929276, + "grad_norm": 1.0602153539657593, + "learning_rate": 5.955515281973737e-06, + "loss": 0.8199, + "step": 16001 + }, + { + "epoch": 0.8807309152952831, + "grad_norm": 0.7527370452880859, + "learning_rate": 5.955089800565802e-06, + "loss": 0.7895, + "step": 16002 + }, + { + "epoch": 0.8807859540976388, + "grad_norm": 0.7191178202629089, + "learning_rate": 5.954664311979666e-06, + "loss": 0.8836, + "step": 16003 + }, + { + "epoch": 0.8808409928999945, + "grad_norm": 0.6993798017501831, + "learning_rate": 5.95423881621853e-06, + "loss": 0.7765, + "step": 16004 + }, + { + "epoch": 0.8808960317023502, + "grad_norm": 0.6863248944282532, + "learning_rate": 5.9538133132855915e-06, + "loss": 0.6294, + "step": 16005 + }, + { + "epoch": 0.8809510705047058, + "grad_norm": 0.7736430168151855, + "learning_rate": 5.953387803184046e-06, + "loss": 0.9003, + "step": 16006 + }, + { + "epoch": 0.8810061093070615, + "grad_norm": 0.6101526618003845, + "learning_rate": 5.9529622859170935e-06, + "loss": 0.7125, + "step": 16007 + }, + { + "epoch": 0.8810611481094172, + "grad_norm": 0.7688401341438293, + "learning_rate": 5.952536761487932e-06, + "loss": 0.7667, + "step": 16008 + }, + { + "epoch": 0.8811161869117728, + "grad_norm": 0.6438688039779663, + "learning_rate": 5.9521112298997575e-06, + "loss": 0.7327, + "step": 16009 + }, + { + "epoch": 0.8811712257141284, + "grad_norm": 0.7732130885124207, + "learning_rate": 5.951685691155769e-06, + "loss": 0.8466, + "step": 16010 + }, + { + "epoch": 0.8812262645164841, + "grad_norm": 0.755892813205719, + "learning_rate": 5.951260145259168e-06, + "loss": 0.8042, + "step": 16011 + }, + { + "epoch": 0.8812813033188398, + "grad_norm": 0.7132954001426697, + "learning_rate": 5.950834592213151e-06, + "loss": 0.7801, + "step": 16012 + }, + { + "epoch": 0.8813363421211955, + "grad_norm": 0.702319324016571, + "learning_rate": 5.950409032020914e-06, + "loss": 0.7487, + "step": 16013 + }, + { + "epoch": 0.8813913809235511, + "grad_norm": 0.6477691531181335, + "learning_rate": 5.949983464685656e-06, + "loss": 0.6942, + "step": 16014 + }, + { + "epoch": 0.8814464197259068, + "grad_norm": 0.6817807555198669, + "learning_rate": 5.949557890210578e-06, + "loss": 0.676, + "step": 16015 + }, + { + "epoch": 0.8815014585282624, + "grad_norm": 0.6980645060539246, + "learning_rate": 5.949132308598877e-06, + "loss": 0.7837, + "step": 16016 + }, + { + "epoch": 0.8815564973306181, + "grad_norm": 0.9056459665298462, + "learning_rate": 5.948706719853753e-06, + "loss": 0.7005, + "step": 16017 + }, + { + "epoch": 0.8816115361329737, + "grad_norm": 0.8172656297683716, + "learning_rate": 5.948281123978402e-06, + "loss": 0.7784, + "step": 16018 + }, + { + "epoch": 0.8816665749353294, + "grad_norm": 0.6387753486633301, + "learning_rate": 5.947855520976025e-06, + "loss": 0.8171, + "step": 16019 + }, + { + "epoch": 0.8817216137376851, + "grad_norm": 0.6150957345962524, + "learning_rate": 5.947429910849818e-06, + "loss": 0.6448, + "step": 16020 + }, + { + "epoch": 0.8817766525400408, + "grad_norm": 0.7051831483840942, + "learning_rate": 5.947004293602982e-06, + "loss": 0.7585, + "step": 16021 + }, + { + "epoch": 0.8818316913423964, + "grad_norm": 0.7967584729194641, + "learning_rate": 5.946578669238714e-06, + "loss": 0.8122, + "step": 16022 + }, + { + "epoch": 0.881886730144752, + "grad_norm": 0.6126663088798523, + "learning_rate": 5.946153037760216e-06, + "loss": 0.6149, + "step": 16023 + }, + { + "epoch": 0.8819417689471077, + "grad_norm": 0.6940233111381531, + "learning_rate": 5.945727399170684e-06, + "loss": 0.7373, + "step": 16024 + }, + { + "epoch": 0.8819968077494633, + "grad_norm": 0.652776837348938, + "learning_rate": 5.945301753473318e-06, + "loss": 0.7029, + "step": 16025 + }, + { + "epoch": 0.882051846551819, + "grad_norm": 0.7182415723800659, + "learning_rate": 5.944876100671317e-06, + "loss": 0.7343, + "step": 16026 + }, + { + "epoch": 0.8821068853541747, + "grad_norm": 0.6714525818824768, + "learning_rate": 5.944450440767881e-06, + "loss": 0.7402, + "step": 16027 + }, + { + "epoch": 0.8821619241565304, + "grad_norm": 0.7144107818603516, + "learning_rate": 5.944024773766208e-06, + "loss": 0.6775, + "step": 16028 + }, + { + "epoch": 0.882216962958886, + "grad_norm": 0.6483643054962158, + "learning_rate": 5.943599099669497e-06, + "loss": 0.698, + "step": 16029 + }, + { + "epoch": 0.8822720017612417, + "grad_norm": 0.6388065218925476, + "learning_rate": 5.943173418480949e-06, + "loss": 0.7814, + "step": 16030 + }, + { + "epoch": 0.8823270405635973, + "grad_norm": 0.6891177892684937, + "learning_rate": 5.942747730203761e-06, + "loss": 0.7423, + "step": 16031 + }, + { + "epoch": 0.882382079365953, + "grad_norm": 0.6425840258598328, + "learning_rate": 5.942322034841133e-06, + "loss": 0.7178, + "step": 16032 + }, + { + "epoch": 0.8824371181683086, + "grad_norm": 0.6308293342590332, + "learning_rate": 5.941896332396266e-06, + "loss": 0.6624, + "step": 16033 + }, + { + "epoch": 0.8824921569706643, + "grad_norm": 0.6310557126998901, + "learning_rate": 5.941470622872358e-06, + "loss": 0.7365, + "step": 16034 + }, + { + "epoch": 0.88254719577302, + "grad_norm": 0.7731123566627502, + "learning_rate": 5.941044906272609e-06, + "loss": 0.6385, + "step": 16035 + }, + { + "epoch": 0.8826022345753757, + "grad_norm": 0.7063789963722229, + "learning_rate": 5.940619182600217e-06, + "loss": 0.7606, + "step": 16036 + }, + { + "epoch": 0.8826572733777313, + "grad_norm": 0.6370593309402466, + "learning_rate": 5.940193451858384e-06, + "loss": 0.7332, + "step": 16037 + }, + { + "epoch": 0.8827123121800869, + "grad_norm": 0.7685242891311646, + "learning_rate": 5.9397677140503085e-06, + "loss": 0.7809, + "step": 16038 + }, + { + "epoch": 0.8827673509824426, + "grad_norm": 0.8439769148826599, + "learning_rate": 5.93934196917919e-06, + "loss": 0.7419, + "step": 16039 + }, + { + "epoch": 0.8828223897847983, + "grad_norm": 0.873909056186676, + "learning_rate": 5.93891621724823e-06, + "loss": 0.6559, + "step": 16040 + }, + { + "epoch": 0.8828774285871539, + "grad_norm": 0.711383581161499, + "learning_rate": 5.938490458260626e-06, + "loss": 0.718, + "step": 16041 + }, + { + "epoch": 0.8829324673895096, + "grad_norm": 0.6775739192962646, + "learning_rate": 5.938064692219579e-06, + "loss": 0.7062, + "step": 16042 + }, + { + "epoch": 0.8829875061918653, + "grad_norm": 0.687095582485199, + "learning_rate": 5.93763891912829e-06, + "loss": 0.7773, + "step": 16043 + }, + { + "epoch": 0.883042544994221, + "grad_norm": 0.6648910641670227, + "learning_rate": 5.937213138989957e-06, + "loss": 0.7267, + "step": 16044 + }, + { + "epoch": 0.8830975837965765, + "grad_norm": 0.6296299695968628, + "learning_rate": 5.9367873518077815e-06, + "loss": 0.7561, + "step": 16045 + }, + { + "epoch": 0.8831526225989322, + "grad_norm": 1.1233999729156494, + "learning_rate": 5.936361557584961e-06, + "loss": 0.7401, + "step": 16046 + }, + { + "epoch": 0.8832076614012879, + "grad_norm": 0.7307866811752319, + "learning_rate": 5.935935756324699e-06, + "loss": 0.7945, + "step": 16047 + }, + { + "epoch": 0.8832627002036436, + "grad_norm": 0.627402663230896, + "learning_rate": 5.9355099480301944e-06, + "loss": 0.7005, + "step": 16048 + }, + { + "epoch": 0.8833177390059992, + "grad_norm": 0.6698537468910217, + "learning_rate": 5.935084132704648e-06, + "loss": 0.7349, + "step": 16049 + }, + { + "epoch": 0.8833727778083549, + "grad_norm": 0.7348290681838989, + "learning_rate": 5.934658310351258e-06, + "loss": 0.8033, + "step": 16050 + }, + { + "epoch": 0.8834278166107106, + "grad_norm": 0.6543971300125122, + "learning_rate": 5.934232480973228e-06, + "loss": 0.706, + "step": 16051 + }, + { + "epoch": 0.8834828554130663, + "grad_norm": 0.7279872894287109, + "learning_rate": 5.933806644573756e-06, + "loss": 0.8142, + "step": 16052 + }, + { + "epoch": 0.8835378942154218, + "grad_norm": 0.6433993577957153, + "learning_rate": 5.933380801156044e-06, + "loss": 0.7092, + "step": 16053 + }, + { + "epoch": 0.8835929330177775, + "grad_norm": 0.7375844717025757, + "learning_rate": 5.932954950723291e-06, + "loss": 0.7632, + "step": 16054 + }, + { + "epoch": 0.8836479718201332, + "grad_norm": 0.6837477087974548, + "learning_rate": 5.932529093278698e-06, + "loss": 0.735, + "step": 16055 + }, + { + "epoch": 0.8837030106224889, + "grad_norm": 0.5978528261184692, + "learning_rate": 5.932103228825467e-06, + "loss": 0.6745, + "step": 16056 + }, + { + "epoch": 0.8837580494248445, + "grad_norm": 0.6475256085395813, + "learning_rate": 5.931677357366798e-06, + "loss": 0.7469, + "step": 16057 + }, + { + "epoch": 0.8838130882272002, + "grad_norm": 0.685108482837677, + "learning_rate": 5.931251478905888e-06, + "loss": 0.7528, + "step": 16058 + }, + { + "epoch": 0.8838681270295559, + "grad_norm": 0.7046063542366028, + "learning_rate": 5.930825593445945e-06, + "loss": 0.8157, + "step": 16059 + }, + { + "epoch": 0.8839231658319116, + "grad_norm": 0.6626511216163635, + "learning_rate": 5.930399700990165e-06, + "loss": 0.791, + "step": 16060 + }, + { + "epoch": 0.8839782046342671, + "grad_norm": 0.7249611020088196, + "learning_rate": 5.929973801541749e-06, + "loss": 0.8428, + "step": 16061 + }, + { + "epoch": 0.8840332434366228, + "grad_norm": 0.5953019261360168, + "learning_rate": 5.929547895103899e-06, + "loss": 0.6589, + "step": 16062 + }, + { + "epoch": 0.8840882822389785, + "grad_norm": 0.6847557425498962, + "learning_rate": 5.9291219816798165e-06, + "loss": 0.7976, + "step": 16063 + }, + { + "epoch": 0.8841433210413342, + "grad_norm": 0.7806814312934875, + "learning_rate": 5.928696061272701e-06, + "loss": 0.7879, + "step": 16064 + }, + { + "epoch": 0.8841983598436898, + "grad_norm": 1.028922200202942, + "learning_rate": 5.9282701338857555e-06, + "loss": 0.732, + "step": 16065 + }, + { + "epoch": 0.8842533986460455, + "grad_norm": 0.6143102645874023, + "learning_rate": 5.927844199522179e-06, + "loss": 0.7216, + "step": 16066 + }, + { + "epoch": 0.8843084374484012, + "grad_norm": 0.6739519834518433, + "learning_rate": 5.927418258185176e-06, + "loss": 0.7134, + "step": 16067 + }, + { + "epoch": 0.8843634762507567, + "grad_norm": 0.7888758778572083, + "learning_rate": 5.926992309877944e-06, + "loss": 0.7396, + "step": 16068 + }, + { + "epoch": 0.8844185150531124, + "grad_norm": 0.6926425695419312, + "learning_rate": 5.926566354603687e-06, + "loss": 0.7629, + "step": 16069 + }, + { + "epoch": 0.8844735538554681, + "grad_norm": 0.7800819277763367, + "learning_rate": 5.926140392365602e-06, + "loss": 0.834, + "step": 16070 + }, + { + "epoch": 0.8845285926578238, + "grad_norm": 0.711067259311676, + "learning_rate": 5.925714423166897e-06, + "loss": 0.7401, + "step": 16071 + }, + { + "epoch": 0.8845836314601794, + "grad_norm": 0.645727276802063, + "learning_rate": 5.92528844701077e-06, + "loss": 0.719, + "step": 16072 + }, + { + "epoch": 0.8846386702625351, + "grad_norm": 0.7098503112792969, + "learning_rate": 5.924862463900421e-06, + "loss": 0.7838, + "step": 16073 + }, + { + "epoch": 0.8846937090648908, + "grad_norm": 1.0021764039993286, + "learning_rate": 5.924436473839055e-06, + "loss": 0.9824, + "step": 16074 + }, + { + "epoch": 0.8847487478672464, + "grad_norm": 0.657049298286438, + "learning_rate": 5.924010476829871e-06, + "loss": 0.6797, + "step": 16075 + }, + { + "epoch": 0.884803786669602, + "grad_norm": 0.6779371500015259, + "learning_rate": 5.923584472876072e-06, + "loss": 0.6697, + "step": 16076 + }, + { + "epoch": 0.8848588254719577, + "grad_norm": 0.6699591279029846, + "learning_rate": 5.923158461980859e-06, + "loss": 0.6779, + "step": 16077 + }, + { + "epoch": 0.8849138642743134, + "grad_norm": 0.6137605905532837, + "learning_rate": 5.922732444147434e-06, + "loss": 0.7195, + "step": 16078 + }, + { + "epoch": 0.8849689030766691, + "grad_norm": 0.6648411750793457, + "learning_rate": 5.922306419379e-06, + "loss": 0.7027, + "step": 16079 + }, + { + "epoch": 0.8850239418790247, + "grad_norm": 0.6827279329299927, + "learning_rate": 5.921880387678758e-06, + "loss": 0.7582, + "step": 16080 + }, + { + "epoch": 0.8850789806813804, + "grad_norm": 0.6747342944145203, + "learning_rate": 5.921454349049909e-06, + "loss": 0.7174, + "step": 16081 + }, + { + "epoch": 0.885134019483736, + "grad_norm": 0.7580771446228027, + "learning_rate": 5.921028303495654e-06, + "loss": 0.7457, + "step": 16082 + }, + { + "epoch": 0.8851890582860917, + "grad_norm": 0.8015843033790588, + "learning_rate": 5.920602251019198e-06, + "loss": 0.7084, + "step": 16083 + }, + { + "epoch": 0.8852440970884473, + "grad_norm": 0.7056819796562195, + "learning_rate": 5.9201761916237434e-06, + "loss": 0.7363, + "step": 16084 + }, + { + "epoch": 0.885299135890803, + "grad_norm": 0.6790309548377991, + "learning_rate": 5.91975012531249e-06, + "loss": 0.7966, + "step": 16085 + }, + { + "epoch": 0.8853541746931587, + "grad_norm": 0.6311555504798889, + "learning_rate": 5.91932405208864e-06, + "loss": 0.7099, + "step": 16086 + }, + { + "epoch": 0.8854092134955144, + "grad_norm": 0.8329381942749023, + "learning_rate": 5.918897971955397e-06, + "loss": 0.8127, + "step": 16087 + }, + { + "epoch": 0.88546425229787, + "grad_norm": 0.6810656785964966, + "learning_rate": 5.918471884915964e-06, + "loss": 0.8052, + "step": 16088 + }, + { + "epoch": 0.8855192911002256, + "grad_norm": 0.7434407472610474, + "learning_rate": 5.918045790973541e-06, + "loss": 0.7936, + "step": 16089 + }, + { + "epoch": 0.8855743299025813, + "grad_norm": 0.6702361106872559, + "learning_rate": 5.917619690131332e-06, + "loss": 0.744, + "step": 16090 + }, + { + "epoch": 0.885629368704937, + "grad_norm": 0.6321039199829102, + "learning_rate": 5.9171935823925384e-06, + "loss": 0.6949, + "step": 16091 + }, + { + "epoch": 0.8856844075072926, + "grad_norm": 0.7133139371871948, + "learning_rate": 5.916767467760365e-06, + "loss": 0.7113, + "step": 16092 + }, + { + "epoch": 0.8857394463096483, + "grad_norm": 0.7414994239807129, + "learning_rate": 5.916341346238011e-06, + "loss": 0.8203, + "step": 16093 + }, + { + "epoch": 0.885794485112004, + "grad_norm": 0.6744404435157776, + "learning_rate": 5.91591521782868e-06, + "loss": 0.7869, + "step": 16094 + }, + { + "epoch": 0.8858495239143597, + "grad_norm": 0.7183727025985718, + "learning_rate": 5.915489082535577e-06, + "loss": 0.7375, + "step": 16095 + }, + { + "epoch": 0.8859045627167152, + "grad_norm": 0.740496814250946, + "learning_rate": 5.9150629403619035e-06, + "loss": 0.8134, + "step": 16096 + }, + { + "epoch": 0.8859596015190709, + "grad_norm": 0.696391224861145, + "learning_rate": 5.9146367913108605e-06, + "loss": 0.7137, + "step": 16097 + }, + { + "epoch": 0.8860146403214266, + "grad_norm": 0.6438629031181335, + "learning_rate": 5.914210635385652e-06, + "loss": 0.698, + "step": 16098 + }, + { + "epoch": 0.8860696791237823, + "grad_norm": 0.7644562125205994, + "learning_rate": 5.913784472589482e-06, + "loss": 0.771, + "step": 16099 + }, + { + "epoch": 0.8861247179261379, + "grad_norm": 0.7281080484390259, + "learning_rate": 5.913358302925553e-06, + "loss": 0.7281, + "step": 16100 + }, + { + "epoch": 0.8861797567284936, + "grad_norm": 0.7768884301185608, + "learning_rate": 5.912932126397067e-06, + "loss": 0.7859, + "step": 16101 + }, + { + "epoch": 0.8862347955308493, + "grad_norm": 0.6960753202438354, + "learning_rate": 5.9125059430072275e-06, + "loss": 0.702, + "step": 16102 + }, + { + "epoch": 0.886289834333205, + "grad_norm": 0.6299503445625305, + "learning_rate": 5.912079752759238e-06, + "loss": 0.6558, + "step": 16103 + }, + { + "epoch": 0.8863448731355605, + "grad_norm": 0.7048517465591431, + "learning_rate": 5.9116535556563005e-06, + "loss": 0.6915, + "step": 16104 + }, + { + "epoch": 0.8863999119379162, + "grad_norm": 1.0701110363006592, + "learning_rate": 5.9112273517016195e-06, + "loss": 0.7721, + "step": 16105 + }, + { + "epoch": 0.8864549507402719, + "grad_norm": 0.6834803223609924, + "learning_rate": 5.910801140898396e-06, + "loss": 0.7474, + "step": 16106 + }, + { + "epoch": 0.8865099895426276, + "grad_norm": 0.6799558401107788, + "learning_rate": 5.9103749232498366e-06, + "loss": 0.655, + "step": 16107 + }, + { + "epoch": 0.8865650283449832, + "grad_norm": 0.9704173803329468, + "learning_rate": 5.9099486987591425e-06, + "loss": 0.911, + "step": 16108 + }, + { + "epoch": 0.8866200671473389, + "grad_norm": 0.7304208278656006, + "learning_rate": 5.909522467429518e-06, + "loss": 0.7315, + "step": 16109 + }, + { + "epoch": 0.8866751059496946, + "grad_norm": 0.6966742277145386, + "learning_rate": 5.909096229264164e-06, + "loss": 0.8658, + "step": 16110 + }, + { + "epoch": 0.8867301447520501, + "grad_norm": 0.667934238910675, + "learning_rate": 5.908669984266289e-06, + "loss": 0.6654, + "step": 16111 + }, + { + "epoch": 0.8867851835544058, + "grad_norm": 0.6689571142196655, + "learning_rate": 5.908243732439092e-06, + "loss": 0.8669, + "step": 16112 + }, + { + "epoch": 0.8868402223567615, + "grad_norm": 0.6054841876029968, + "learning_rate": 5.9078174737857795e-06, + "loss": 0.6063, + "step": 16113 + }, + { + "epoch": 0.8868952611591172, + "grad_norm": 0.6113643050193787, + "learning_rate": 5.907391208309553e-06, + "loss": 0.5609, + "step": 16114 + }, + { + "epoch": 0.8869502999614728, + "grad_norm": 0.6858495473861694, + "learning_rate": 5.906964936013617e-06, + "loss": 0.7651, + "step": 16115 + }, + { + "epoch": 0.8870053387638285, + "grad_norm": 0.6587123870849609, + "learning_rate": 5.906538656901175e-06, + "loss": 0.6834, + "step": 16116 + }, + { + "epoch": 0.8870603775661842, + "grad_norm": 0.7558240294456482, + "learning_rate": 5.906112370975432e-06, + "loss": 0.7911, + "step": 16117 + }, + { + "epoch": 0.8871154163685399, + "grad_norm": 0.7324747443199158, + "learning_rate": 5.90568607823959e-06, + "loss": 0.7132, + "step": 16118 + }, + { + "epoch": 0.8871704551708954, + "grad_norm": 0.6696536540985107, + "learning_rate": 5.9052597786968545e-06, + "loss": 0.6761, + "step": 16119 + }, + { + "epoch": 0.8872254939732511, + "grad_norm": 0.6724241375923157, + "learning_rate": 5.904833472350429e-06, + "loss": 0.7367, + "step": 16120 + }, + { + "epoch": 0.8872805327756068, + "grad_norm": 0.8458320498466492, + "learning_rate": 5.904407159203517e-06, + "loss": 0.8542, + "step": 16121 + }, + { + "epoch": 0.8873355715779625, + "grad_norm": 0.6740517020225525, + "learning_rate": 5.903980839259323e-06, + "loss": 0.711, + "step": 16122 + }, + { + "epoch": 0.8873906103803181, + "grad_norm": 0.7465891242027283, + "learning_rate": 5.9035545125210505e-06, + "loss": 0.8501, + "step": 16123 + }, + { + "epoch": 0.8874456491826738, + "grad_norm": 0.9160736203193665, + "learning_rate": 5.903128178991905e-06, + "loss": 0.8055, + "step": 16124 + }, + { + "epoch": 0.8875006879850295, + "grad_norm": 0.8358868956565857, + "learning_rate": 5.902701838675089e-06, + "loss": 0.7946, + "step": 16125 + }, + { + "epoch": 0.8875557267873851, + "grad_norm": 0.760776162147522, + "learning_rate": 5.902275491573808e-06, + "loss": 0.7944, + "step": 16126 + }, + { + "epoch": 0.8876107655897407, + "grad_norm": 0.605964720249176, + "learning_rate": 5.901849137691267e-06, + "loss": 0.6512, + "step": 16127 + }, + { + "epoch": 0.8876658043920964, + "grad_norm": 1.4000526666641235, + "learning_rate": 5.9014227770306676e-06, + "loss": 0.8047, + "step": 16128 + }, + { + "epoch": 0.8877208431944521, + "grad_norm": 0.7314043045043945, + "learning_rate": 5.900996409595217e-06, + "loss": 0.7674, + "step": 16129 + }, + { + "epoch": 0.8877758819968078, + "grad_norm": 1.3130903244018555, + "learning_rate": 5.900570035388117e-06, + "loss": 0.6922, + "step": 16130 + }, + { + "epoch": 0.8878309207991634, + "grad_norm": 0.6799461841583252, + "learning_rate": 5.900143654412576e-06, + "loss": 0.6921, + "step": 16131 + }, + { + "epoch": 0.8878859596015191, + "grad_norm": 0.6657615900039673, + "learning_rate": 5.899717266671794e-06, + "loss": 0.642, + "step": 16132 + }, + { + "epoch": 0.8879409984038747, + "grad_norm": 0.6838696599006653, + "learning_rate": 5.899290872168979e-06, + "loss": 0.7077, + "step": 16133 + }, + { + "epoch": 0.8879960372062304, + "grad_norm": 0.7769932150840759, + "learning_rate": 5.898864470907334e-06, + "loss": 0.8155, + "step": 16134 + }, + { + "epoch": 0.888051076008586, + "grad_norm": 0.6874750852584839, + "learning_rate": 5.898438062890065e-06, + "loss": 0.7597, + "step": 16135 + }, + { + "epoch": 0.8881061148109417, + "grad_norm": 0.7016799449920654, + "learning_rate": 5.898011648120375e-06, + "loss": 0.7973, + "step": 16136 + }, + { + "epoch": 0.8881611536132974, + "grad_norm": 0.743046760559082, + "learning_rate": 5.897585226601471e-06, + "loss": 0.7217, + "step": 16137 + }, + { + "epoch": 0.8882161924156531, + "grad_norm": 0.6889417767524719, + "learning_rate": 5.8971587983365566e-06, + "loss": 0.7067, + "step": 16138 + }, + { + "epoch": 0.8882712312180087, + "grad_norm": 0.631155788898468, + "learning_rate": 5.896732363328836e-06, + "loss": 0.8113, + "step": 16139 + }, + { + "epoch": 0.8883262700203644, + "grad_norm": 0.64445960521698, + "learning_rate": 5.8963059215815165e-06, + "loss": 0.7801, + "step": 16140 + }, + { + "epoch": 0.88838130882272, + "grad_norm": 1.6496944427490234, + "learning_rate": 5.895879473097801e-06, + "loss": 0.7997, + "step": 16141 + }, + { + "epoch": 0.8884363476250757, + "grad_norm": 0.8304264545440674, + "learning_rate": 5.895453017880893e-06, + "loss": 0.7333, + "step": 16142 + }, + { + "epoch": 0.8884913864274313, + "grad_norm": 0.659909725189209, + "learning_rate": 5.895026555934002e-06, + "loss": 0.7924, + "step": 16143 + }, + { + "epoch": 0.888546425229787, + "grad_norm": 0.7013087272644043, + "learning_rate": 5.894600087260332e-06, + "loss": 0.7704, + "step": 16144 + }, + { + "epoch": 0.8886014640321427, + "grad_norm": 0.6914981603622437, + "learning_rate": 5.894173611863085e-06, + "loss": 0.7377, + "step": 16145 + }, + { + "epoch": 0.8886565028344984, + "grad_norm": 0.8310953378677368, + "learning_rate": 5.89374712974547e-06, + "loss": 0.7893, + "step": 16146 + }, + { + "epoch": 0.888711541636854, + "grad_norm": 0.6522740721702576, + "learning_rate": 5.8933206409106895e-06, + "loss": 0.6915, + "step": 16147 + }, + { + "epoch": 0.8887665804392096, + "grad_norm": 0.6072065234184265, + "learning_rate": 5.89289414536195e-06, + "loss": 0.5744, + "step": 16148 + }, + { + "epoch": 0.8888216192415653, + "grad_norm": 0.5975275635719299, + "learning_rate": 5.892467643102458e-06, + "loss": 0.6281, + "step": 16149 + }, + { + "epoch": 0.888876658043921, + "grad_norm": 0.9194470643997192, + "learning_rate": 5.892041134135418e-06, + "loss": 0.6909, + "step": 16150 + }, + { + "epoch": 0.8889316968462766, + "grad_norm": 0.5815016031265259, + "learning_rate": 5.891614618464037e-06, + "loss": 0.6342, + "step": 16151 + }, + { + "epoch": 0.8889867356486323, + "grad_norm": 0.666912853717804, + "learning_rate": 5.891188096091517e-06, + "loss": 0.8043, + "step": 16152 + }, + { + "epoch": 0.889041774450988, + "grad_norm": 0.7708194851875305, + "learning_rate": 5.890761567021067e-06, + "loss": 0.811, + "step": 16153 + }, + { + "epoch": 0.8890968132533436, + "grad_norm": 0.7158086895942688, + "learning_rate": 5.890335031255892e-06, + "loss": 0.7615, + "step": 16154 + }, + { + "epoch": 0.8891518520556992, + "grad_norm": 0.7432296872138977, + "learning_rate": 5.889908488799194e-06, + "loss": 0.8236, + "step": 16155 + }, + { + "epoch": 0.8892068908580549, + "grad_norm": 0.7223588228225708, + "learning_rate": 5.889481939654185e-06, + "loss": 0.7022, + "step": 16156 + }, + { + "epoch": 0.8892619296604106, + "grad_norm": 0.7680726647377014, + "learning_rate": 5.889055383824067e-06, + "loss": 0.7329, + "step": 16157 + }, + { + "epoch": 0.8893169684627662, + "grad_norm": 0.679315984249115, + "learning_rate": 5.888628821312048e-06, + "loss": 0.7213, + "step": 16158 + }, + { + "epoch": 0.8893720072651219, + "grad_norm": 0.9369942545890808, + "learning_rate": 5.88820225212133e-06, + "loss": 0.7661, + "step": 16159 + }, + { + "epoch": 0.8894270460674776, + "grad_norm": 0.710561990737915, + "learning_rate": 5.887775676255123e-06, + "loss": 0.7869, + "step": 16160 + }, + { + "epoch": 0.8894820848698333, + "grad_norm": 0.6641749143600464, + "learning_rate": 5.887349093716632e-06, + "loss": 0.748, + "step": 16161 + }, + { + "epoch": 0.8895371236721888, + "grad_norm": 0.6491042971611023, + "learning_rate": 5.886922504509062e-06, + "loss": 0.7208, + "step": 16162 + }, + { + "epoch": 0.8895921624745445, + "grad_norm": 0.706950843334198, + "learning_rate": 5.886495908635622e-06, + "loss": 0.7579, + "step": 16163 + }, + { + "epoch": 0.8896472012769002, + "grad_norm": 0.7884653806686401, + "learning_rate": 5.886069306099514e-06, + "loss": 0.7289, + "step": 16164 + }, + { + "epoch": 0.8897022400792559, + "grad_norm": 0.7089719176292419, + "learning_rate": 5.885642696903948e-06, + "loss": 0.7796, + "step": 16165 + }, + { + "epoch": 0.8897572788816115, + "grad_norm": 0.7245141267776489, + "learning_rate": 5.8852160810521275e-06, + "loss": 0.7357, + "step": 16166 + }, + { + "epoch": 0.8898123176839672, + "grad_norm": 0.74881511926651, + "learning_rate": 5.884789458547258e-06, + "loss": 0.707, + "step": 16167 + }, + { + "epoch": 0.8898673564863229, + "grad_norm": 0.623418390750885, + "learning_rate": 5.88436282939255e-06, + "loss": 0.5891, + "step": 16168 + }, + { + "epoch": 0.8899223952886786, + "grad_norm": 0.8884579539299011, + "learning_rate": 5.883936193591208e-06, + "loss": 0.9109, + "step": 16169 + }, + { + "epoch": 0.8899774340910341, + "grad_norm": 0.7089982628822327, + "learning_rate": 5.883509551146437e-06, + "loss": 0.8435, + "step": 16170 + }, + { + "epoch": 0.8900324728933898, + "grad_norm": 0.6861062049865723, + "learning_rate": 5.883082902061444e-06, + "loss": 0.5662, + "step": 16171 + }, + { + "epoch": 0.8900875116957455, + "grad_norm": 0.7688663005828857, + "learning_rate": 5.882656246339438e-06, + "loss": 0.7483, + "step": 16172 + }, + { + "epoch": 0.8901425504981012, + "grad_norm": 0.6451166868209839, + "learning_rate": 5.882229583983623e-06, + "loss": 0.8061, + "step": 16173 + }, + { + "epoch": 0.8901975893004568, + "grad_norm": 0.668999195098877, + "learning_rate": 5.881802914997208e-06, + "loss": 0.698, + "step": 16174 + }, + { + "epoch": 0.8902526281028125, + "grad_norm": 0.5772761702537537, + "learning_rate": 5.881376239383398e-06, + "loss": 0.6718, + "step": 16175 + }, + { + "epoch": 0.8903076669051682, + "grad_norm": 0.6677992343902588, + "learning_rate": 5.880949557145399e-06, + "loss": 0.7835, + "step": 16176 + }, + { + "epoch": 0.8903627057075238, + "grad_norm": 0.7227941751480103, + "learning_rate": 5.880522868286419e-06, + "loss": 0.7166, + "step": 16177 + }, + { + "epoch": 0.8904177445098794, + "grad_norm": 0.7365387082099915, + "learning_rate": 5.880096172809665e-06, + "loss": 0.7743, + "step": 16178 + }, + { + "epoch": 0.8904727833122351, + "grad_norm": 0.7826401591300964, + "learning_rate": 5.8796694707183435e-06, + "loss": 0.7963, + "step": 16179 + }, + { + "epoch": 0.8905278221145908, + "grad_norm": 0.6749493479728699, + "learning_rate": 5.879242762015662e-06, + "loss": 0.7023, + "step": 16180 + }, + { + "epoch": 0.8905828609169465, + "grad_norm": 0.7109015583992004, + "learning_rate": 5.8788160467048275e-06, + "loss": 0.8432, + "step": 16181 + }, + { + "epoch": 0.8906378997193021, + "grad_norm": 0.737983226776123, + "learning_rate": 5.878389324789047e-06, + "loss": 0.807, + "step": 16182 + }, + { + "epoch": 0.8906929385216578, + "grad_norm": 0.676296055316925, + "learning_rate": 5.877962596271526e-06, + "loss": 0.7894, + "step": 16183 + }, + { + "epoch": 0.8907479773240135, + "grad_norm": 0.6367083191871643, + "learning_rate": 5.877535861155474e-06, + "loss": 0.6995, + "step": 16184 + }, + { + "epoch": 0.8908030161263691, + "grad_norm": 0.7221261262893677, + "learning_rate": 5.877109119444099e-06, + "loss": 0.8032, + "step": 16185 + }, + { + "epoch": 0.8908580549287247, + "grad_norm": 0.9108307957649231, + "learning_rate": 5.8766823711406055e-06, + "loss": 0.6949, + "step": 16186 + }, + { + "epoch": 0.8909130937310804, + "grad_norm": 0.5985114574432373, + "learning_rate": 5.876255616248201e-06, + "loss": 0.6981, + "step": 16187 + }, + { + "epoch": 0.8909681325334361, + "grad_norm": 0.6146743297576904, + "learning_rate": 5.875828854770096e-06, + "loss": 0.6869, + "step": 16188 + }, + { + "epoch": 0.8910231713357918, + "grad_norm": 1.2942423820495605, + "learning_rate": 5.875402086709494e-06, + "loss": 0.8142, + "step": 16189 + }, + { + "epoch": 0.8910782101381474, + "grad_norm": 0.6676996350288391, + "learning_rate": 5.874975312069605e-06, + "loss": 0.7476, + "step": 16190 + }, + { + "epoch": 0.891133248940503, + "grad_norm": 0.6210917234420776, + "learning_rate": 5.874548530853635e-06, + "loss": 0.7248, + "step": 16191 + }, + { + "epoch": 0.8911882877428587, + "grad_norm": 0.7242050766944885, + "learning_rate": 5.874121743064792e-06, + "loss": 0.8378, + "step": 16192 + }, + { + "epoch": 0.8912433265452144, + "grad_norm": 0.7029538750648499, + "learning_rate": 5.873694948706286e-06, + "loss": 0.7487, + "step": 16193 + }, + { + "epoch": 0.89129836534757, + "grad_norm": 0.7620413899421692, + "learning_rate": 5.87326814778132e-06, + "loss": 0.7102, + "step": 16194 + }, + { + "epoch": 0.8913534041499257, + "grad_norm": 0.7075870633125305, + "learning_rate": 5.872841340293105e-06, + "loss": 0.7771, + "step": 16195 + }, + { + "epoch": 0.8914084429522814, + "grad_norm": 0.706533670425415, + "learning_rate": 5.8724145262448495e-06, + "loss": 0.8173, + "step": 16196 + }, + { + "epoch": 0.891463481754637, + "grad_norm": 0.6712881326675415, + "learning_rate": 5.871987705639759e-06, + "loss": 0.7933, + "step": 16197 + }, + { + "epoch": 0.8915185205569927, + "grad_norm": 0.6531795859336853, + "learning_rate": 5.871560878481043e-06, + "loss": 0.8013, + "step": 16198 + }, + { + "epoch": 0.8915735593593483, + "grad_norm": 0.7291449308395386, + "learning_rate": 5.8711340447719086e-06, + "loss": 0.7379, + "step": 16199 + }, + { + "epoch": 0.891628598161704, + "grad_norm": 0.7187185883522034, + "learning_rate": 5.870707204515564e-06, + "loss": 0.7627, + "step": 16200 + }, + { + "epoch": 0.8916836369640596, + "grad_norm": 0.6900884509086609, + "learning_rate": 5.870280357715217e-06, + "loss": 0.7779, + "step": 16201 + }, + { + "epoch": 0.8917386757664153, + "grad_norm": 0.647745668888092, + "learning_rate": 5.869853504374075e-06, + "loss": 0.7616, + "step": 16202 + }, + { + "epoch": 0.891793714568771, + "grad_norm": 0.6717308759689331, + "learning_rate": 5.869426644495347e-06, + "loss": 0.6673, + "step": 16203 + }, + { + "epoch": 0.8918487533711267, + "grad_norm": 0.7069498300552368, + "learning_rate": 5.868999778082242e-06, + "loss": 0.7349, + "step": 16204 + }, + { + "epoch": 0.8919037921734823, + "grad_norm": 0.72287917137146, + "learning_rate": 5.868572905137967e-06, + "loss": 0.7272, + "step": 16205 + }, + { + "epoch": 0.891958830975838, + "grad_norm": 0.6938319802284241, + "learning_rate": 5.868146025665731e-06, + "loss": 0.8285, + "step": 16206 + }, + { + "epoch": 0.8920138697781936, + "grad_norm": 1.5806810855865479, + "learning_rate": 5.867719139668739e-06, + "loss": 0.6881, + "step": 16207 + }, + { + "epoch": 0.8920689085805493, + "grad_norm": 0.7156746983528137, + "learning_rate": 5.867292247150206e-06, + "loss": 0.8212, + "step": 16208 + }, + { + "epoch": 0.8921239473829049, + "grad_norm": 0.6833271980285645, + "learning_rate": 5.866865348113335e-06, + "loss": 0.7741, + "step": 16209 + }, + { + "epoch": 0.8921789861852606, + "grad_norm": 0.6972640156745911, + "learning_rate": 5.866438442561336e-06, + "loss": 0.9058, + "step": 16210 + }, + { + "epoch": 0.8922340249876163, + "grad_norm": 0.697632372379303, + "learning_rate": 5.866011530497419e-06, + "loss": 0.8319, + "step": 16211 + }, + { + "epoch": 0.892289063789972, + "grad_norm": 0.7249447703361511, + "learning_rate": 5.865584611924789e-06, + "loss": 0.7491, + "step": 16212 + }, + { + "epoch": 0.8923441025923275, + "grad_norm": 0.7094838619232178, + "learning_rate": 5.865157686846659e-06, + "loss": 0.7371, + "step": 16213 + }, + { + "epoch": 0.8923991413946832, + "grad_norm": 0.7066075205802917, + "learning_rate": 5.864730755266233e-06, + "loss": 0.8273, + "step": 16214 + }, + { + "epoch": 0.8924541801970389, + "grad_norm": 0.7090823650360107, + "learning_rate": 5.864303817186723e-06, + "loss": 0.7642, + "step": 16215 + }, + { + "epoch": 0.8925092189993946, + "grad_norm": 0.7501302361488342, + "learning_rate": 5.863876872611337e-06, + "loss": 0.716, + "step": 16216 + }, + { + "epoch": 0.8925642578017502, + "grad_norm": 0.7354205250740051, + "learning_rate": 5.863449921543284e-06, + "loss": 0.7011, + "step": 16217 + }, + { + "epoch": 0.8926192966041059, + "grad_norm": 0.9364498853683472, + "learning_rate": 5.863022963985773e-06, + "loss": 0.7843, + "step": 16218 + }, + { + "epoch": 0.8926743354064616, + "grad_norm": 0.6501762270927429, + "learning_rate": 5.86259599994201e-06, + "loss": 0.7215, + "step": 16219 + }, + { + "epoch": 0.8927293742088173, + "grad_norm": 0.6883421540260315, + "learning_rate": 5.862169029415208e-06, + "loss": 0.8631, + "step": 16220 + }, + { + "epoch": 0.8927844130111728, + "grad_norm": 0.7614920735359192, + "learning_rate": 5.861742052408575e-06, + "loss": 0.8031, + "step": 16221 + }, + { + "epoch": 0.8928394518135285, + "grad_norm": 0.7668151259422302, + "learning_rate": 5.861315068925319e-06, + "loss": 0.8024, + "step": 16222 + }, + { + "epoch": 0.8928944906158842, + "grad_norm": 0.6772485971450806, + "learning_rate": 5.860888078968649e-06, + "loss": 0.72, + "step": 16223 + }, + { + "epoch": 0.8929495294182399, + "grad_norm": 0.742821216583252, + "learning_rate": 5.860461082541775e-06, + "loss": 0.7432, + "step": 16224 + }, + { + "epoch": 0.8930045682205955, + "grad_norm": 0.7056832909584045, + "learning_rate": 5.860034079647907e-06, + "loss": 0.7089, + "step": 16225 + }, + { + "epoch": 0.8930596070229512, + "grad_norm": 0.6898871660232544, + "learning_rate": 5.859607070290252e-06, + "loss": 0.7505, + "step": 16226 + }, + { + "epoch": 0.8931146458253069, + "grad_norm": 0.6888724565505981, + "learning_rate": 5.859180054472019e-06, + "loss": 0.7638, + "step": 16227 + }, + { + "epoch": 0.8931696846276626, + "grad_norm": 0.9010199308395386, + "learning_rate": 5.858753032196421e-06, + "loss": 0.775, + "step": 16228 + }, + { + "epoch": 0.8932247234300181, + "grad_norm": 0.6443523168563843, + "learning_rate": 5.858326003466663e-06, + "loss": 0.6702, + "step": 16229 + }, + { + "epoch": 0.8932797622323738, + "grad_norm": 0.6245587468147278, + "learning_rate": 5.857898968285957e-06, + "loss": 0.6907, + "step": 16230 + }, + { + "epoch": 0.8933348010347295, + "grad_norm": 0.6724962592124939, + "learning_rate": 5.857471926657512e-06, + "loss": 0.7381, + "step": 16231 + }, + { + "epoch": 0.8933898398370852, + "grad_norm": 1.0391525030136108, + "learning_rate": 5.857044878584539e-06, + "loss": 0.7919, + "step": 16232 + }, + { + "epoch": 0.8934448786394408, + "grad_norm": 0.8852080702781677, + "learning_rate": 5.8566178240702455e-06, + "loss": 0.8572, + "step": 16233 + }, + { + "epoch": 0.8934999174417965, + "grad_norm": 0.7087608575820923, + "learning_rate": 5.856190763117843e-06, + "loss": 0.7739, + "step": 16234 + }, + { + "epoch": 0.8935549562441522, + "grad_norm": 0.6688494086265564, + "learning_rate": 5.855763695730536e-06, + "loss": 0.6978, + "step": 16235 + }, + { + "epoch": 0.8936099950465078, + "grad_norm": 0.6174076795578003, + "learning_rate": 5.8553366219115415e-06, + "loss": 0.7838, + "step": 16236 + }, + { + "epoch": 0.8936650338488634, + "grad_norm": 0.6558929681777954, + "learning_rate": 5.854909541664065e-06, + "loss": 0.6071, + "step": 16237 + }, + { + "epoch": 0.8937200726512191, + "grad_norm": 0.678820013999939, + "learning_rate": 5.854482454991317e-06, + "loss": 0.792, + "step": 16238 + }, + { + "epoch": 0.8937751114535748, + "grad_norm": 0.6893227696418762, + "learning_rate": 5.854055361896507e-06, + "loss": 0.7182, + "step": 16239 + }, + { + "epoch": 0.8938301502559304, + "grad_norm": 0.6799605488777161, + "learning_rate": 5.853628262382847e-06, + "loss": 0.7011, + "step": 16240 + }, + { + "epoch": 0.8938851890582861, + "grad_norm": 0.7625865340232849, + "learning_rate": 5.853201156453544e-06, + "loss": 0.9217, + "step": 16241 + }, + { + "epoch": 0.8939402278606418, + "grad_norm": 0.6532776355743408, + "learning_rate": 5.8527740441118104e-06, + "loss": 0.7527, + "step": 16242 + }, + { + "epoch": 0.8939952666629974, + "grad_norm": 0.7904673218727112, + "learning_rate": 5.852346925360854e-06, + "loss": 0.8359, + "step": 16243 + }, + { + "epoch": 0.894050305465353, + "grad_norm": 0.7274239659309387, + "learning_rate": 5.851919800203888e-06, + "loss": 0.7335, + "step": 16244 + }, + { + "epoch": 0.8941053442677087, + "grad_norm": 0.843180239200592, + "learning_rate": 5.85149266864412e-06, + "loss": 0.7913, + "step": 16245 + }, + { + "epoch": 0.8941603830700644, + "grad_norm": 0.7756116390228271, + "learning_rate": 5.851065530684763e-06, + "loss": 0.7508, + "step": 16246 + }, + { + "epoch": 0.8942154218724201, + "grad_norm": 0.7086586952209473, + "learning_rate": 5.850638386329022e-06, + "loss": 0.7754, + "step": 16247 + }, + { + "epoch": 0.8942704606747757, + "grad_norm": 0.9373844265937805, + "learning_rate": 5.850211235580112e-06, + "loss": 0.7391, + "step": 16248 + }, + { + "epoch": 0.8943254994771314, + "grad_norm": 0.6847782135009766, + "learning_rate": 5.849784078441243e-06, + "loss": 0.6655, + "step": 16249 + }, + { + "epoch": 0.894380538279487, + "grad_norm": 0.6071921586990356, + "learning_rate": 5.849356914915624e-06, + "loss": 0.6933, + "step": 16250 + }, + { + "epoch": 0.8944355770818427, + "grad_norm": 0.712497889995575, + "learning_rate": 5.848929745006464e-06, + "loss": 0.8025, + "step": 16251 + }, + { + "epoch": 0.8944906158841983, + "grad_norm": 0.5942297577857971, + "learning_rate": 5.848502568716976e-06, + "loss": 0.621, + "step": 16252 + }, + { + "epoch": 0.894545654686554, + "grad_norm": 0.6706910729408264, + "learning_rate": 5.84807538605037e-06, + "loss": 0.7664, + "step": 16253 + }, + { + "epoch": 0.8946006934889097, + "grad_norm": 0.7494041919708252, + "learning_rate": 5.847648197009858e-06, + "loss": 0.7418, + "step": 16254 + }, + { + "epoch": 0.8946557322912654, + "grad_norm": 0.7373181581497192, + "learning_rate": 5.847221001598646e-06, + "loss": 0.7133, + "step": 16255 + }, + { + "epoch": 0.894710771093621, + "grad_norm": 0.8178310394287109, + "learning_rate": 5.84679379981995e-06, + "loss": 0.8562, + "step": 16256 + }, + { + "epoch": 0.8947658098959766, + "grad_norm": 0.6232174634933472, + "learning_rate": 5.8463665916769785e-06, + "loss": 0.7723, + "step": 16257 + }, + { + "epoch": 0.8948208486983323, + "grad_norm": 0.6817423701286316, + "learning_rate": 5.845939377172942e-06, + "loss": 0.6706, + "step": 16258 + }, + { + "epoch": 0.894875887500688, + "grad_norm": 0.8091211318969727, + "learning_rate": 5.845512156311051e-06, + "loss": 0.7453, + "step": 16259 + }, + { + "epoch": 0.8949309263030436, + "grad_norm": 0.7013124227523804, + "learning_rate": 5.845084929094518e-06, + "loss": 0.7104, + "step": 16260 + }, + { + "epoch": 0.8949859651053993, + "grad_norm": 0.8373565077781677, + "learning_rate": 5.844657695526552e-06, + "loss": 0.8181, + "step": 16261 + }, + { + "epoch": 0.895041003907755, + "grad_norm": 0.7339033484458923, + "learning_rate": 5.844230455610364e-06, + "loss": 0.7465, + "step": 16262 + }, + { + "epoch": 0.8950960427101107, + "grad_norm": 0.7680185437202454, + "learning_rate": 5.843803209349167e-06, + "loss": 0.7962, + "step": 16263 + }, + { + "epoch": 0.8951510815124663, + "grad_norm": 0.7889038920402527, + "learning_rate": 5.843375956746171e-06, + "loss": 0.7846, + "step": 16264 + }, + { + "epoch": 0.8952061203148219, + "grad_norm": 0.7361034154891968, + "learning_rate": 5.842948697804587e-06, + "loss": 0.7077, + "step": 16265 + }, + { + "epoch": 0.8952611591171776, + "grad_norm": 0.7543736100196838, + "learning_rate": 5.8425214325276255e-06, + "loss": 0.748, + "step": 16266 + }, + { + "epoch": 0.8953161979195333, + "grad_norm": 0.6194653511047363, + "learning_rate": 5.842094160918499e-06, + "loss": 0.8032, + "step": 16267 + }, + { + "epoch": 0.8953712367218889, + "grad_norm": 0.789439857006073, + "learning_rate": 5.841666882980418e-06, + "loss": 0.745, + "step": 16268 + }, + { + "epoch": 0.8954262755242446, + "grad_norm": 0.6813651919364929, + "learning_rate": 5.841239598716595e-06, + "loss": 0.7169, + "step": 16269 + }, + { + "epoch": 0.8954813143266003, + "grad_norm": 0.6128388047218323, + "learning_rate": 5.84081230813024e-06, + "loss": 0.7109, + "step": 16270 + }, + { + "epoch": 0.895536353128956, + "grad_norm": 0.7170562148094177, + "learning_rate": 5.8403850112245645e-06, + "loss": 0.7461, + "step": 16271 + }, + { + "epoch": 0.8955913919313115, + "grad_norm": 0.7415544986724854, + "learning_rate": 5.83995770800278e-06, + "loss": 0.7786, + "step": 16272 + }, + { + "epoch": 0.8956464307336672, + "grad_norm": 0.5996596813201904, + "learning_rate": 5.8395303984680985e-06, + "loss": 0.6926, + "step": 16273 + }, + { + "epoch": 0.8957014695360229, + "grad_norm": 0.7399057149887085, + "learning_rate": 5.839103082623732e-06, + "loss": 0.6953, + "step": 16274 + }, + { + "epoch": 0.8957565083383786, + "grad_norm": 0.7090675830841064, + "learning_rate": 5.838675760472888e-06, + "loss": 0.7638, + "step": 16275 + }, + { + "epoch": 0.8958115471407342, + "grad_norm": 0.6865240931510925, + "learning_rate": 5.838248432018785e-06, + "loss": 0.7023, + "step": 16276 + }, + { + "epoch": 0.8958665859430899, + "grad_norm": 0.604603111743927, + "learning_rate": 5.83782109726463e-06, + "loss": 0.6845, + "step": 16277 + }, + { + "epoch": 0.8959216247454456, + "grad_norm": 0.6722466349601746, + "learning_rate": 5.837393756213636e-06, + "loss": 0.7794, + "step": 16278 + }, + { + "epoch": 0.8959766635478013, + "grad_norm": 0.683106541633606, + "learning_rate": 5.836966408869014e-06, + "loss": 0.7988, + "step": 16279 + }, + { + "epoch": 0.8960317023501568, + "grad_norm": 0.7195246815681458, + "learning_rate": 5.8365390552339774e-06, + "loss": 0.7532, + "step": 16280 + }, + { + "epoch": 0.8960867411525125, + "grad_norm": 0.6945170760154724, + "learning_rate": 5.836111695311737e-06, + "loss": 0.7515, + "step": 16281 + }, + { + "epoch": 0.8961417799548682, + "grad_norm": 0.7424500584602356, + "learning_rate": 5.8356843291055065e-06, + "loss": 0.6813, + "step": 16282 + }, + { + "epoch": 0.8961968187572238, + "grad_norm": 0.673574686050415, + "learning_rate": 5.835256956618495e-06, + "loss": 0.7234, + "step": 16283 + }, + { + "epoch": 0.8962518575595795, + "grad_norm": 0.6816020011901855, + "learning_rate": 5.834829577853913e-06, + "loss": 0.7935, + "step": 16284 + }, + { + "epoch": 0.8963068963619352, + "grad_norm": 0.7598507404327393, + "learning_rate": 5.834402192814979e-06, + "loss": 0.8141, + "step": 16285 + }, + { + "epoch": 0.8963619351642909, + "grad_norm": 0.7720094323158264, + "learning_rate": 5.8339748015049e-06, + "loss": 0.8241, + "step": 16286 + }, + { + "epoch": 0.8964169739666464, + "grad_norm": 0.7409939169883728, + "learning_rate": 5.833547403926891e-06, + "loss": 0.7196, + "step": 16287 + }, + { + "epoch": 0.8964720127690021, + "grad_norm": 0.670557975769043, + "learning_rate": 5.83312000008416e-06, + "loss": 0.7931, + "step": 16288 + }, + { + "epoch": 0.8965270515713578, + "grad_norm": 0.6361322999000549, + "learning_rate": 5.832692589979925e-06, + "loss": 0.7292, + "step": 16289 + }, + { + "epoch": 0.8965820903737135, + "grad_norm": 0.6359429359436035, + "learning_rate": 5.832265173617393e-06, + "loss": 0.7705, + "step": 16290 + }, + { + "epoch": 0.8966371291760691, + "grad_norm": 0.7249873876571655, + "learning_rate": 5.831837750999781e-06, + "loss": 0.7741, + "step": 16291 + }, + { + "epoch": 0.8966921679784248, + "grad_norm": 0.6784750819206238, + "learning_rate": 5.831410322130296e-06, + "loss": 0.8434, + "step": 16292 + }, + { + "epoch": 0.8967472067807805, + "grad_norm": 0.7696726322174072, + "learning_rate": 5.830982887012157e-06, + "loss": 0.8218, + "step": 16293 + }, + { + "epoch": 0.8968022455831361, + "grad_norm": 0.5974952578544617, + "learning_rate": 5.830555445648572e-06, + "loss": 0.5864, + "step": 16294 + }, + { + "epoch": 0.8968572843854917, + "grad_norm": 1.4088029861450195, + "learning_rate": 5.830127998042755e-06, + "loss": 0.705, + "step": 16295 + }, + { + "epoch": 0.8969123231878474, + "grad_norm": 0.621288001537323, + "learning_rate": 5.8297005441979174e-06, + "loss": 0.6799, + "step": 16296 + }, + { + "epoch": 0.8969673619902031, + "grad_norm": 0.7229657173156738, + "learning_rate": 5.829273084117272e-06, + "loss": 0.7222, + "step": 16297 + }, + { + "epoch": 0.8970224007925588, + "grad_norm": 0.7076373100280762, + "learning_rate": 5.828845617804033e-06, + "loss": 0.7813, + "step": 16298 + }, + { + "epoch": 0.8970774395949144, + "grad_norm": 0.6931923627853394, + "learning_rate": 5.828418145261412e-06, + "loss": 0.6936, + "step": 16299 + }, + { + "epoch": 0.8971324783972701, + "grad_norm": 0.6719018220901489, + "learning_rate": 5.827990666492621e-06, + "loss": 0.747, + "step": 16300 + }, + { + "epoch": 0.8971875171996258, + "grad_norm": 0.6288262605667114, + "learning_rate": 5.827563181500875e-06, + "loss": 0.7731, + "step": 16301 + }, + { + "epoch": 0.8972425560019814, + "grad_norm": 0.6359015703201294, + "learning_rate": 5.8271356902893864e-06, + "loss": 0.6408, + "step": 16302 + }, + { + "epoch": 0.897297594804337, + "grad_norm": 0.6485893726348877, + "learning_rate": 5.826708192861365e-06, + "loss": 0.7739, + "step": 16303 + }, + { + "epoch": 0.8973526336066927, + "grad_norm": 0.7622523903846741, + "learning_rate": 5.826280689220027e-06, + "loss": 0.6087, + "step": 16304 + }, + { + "epoch": 0.8974076724090484, + "grad_norm": 0.650451123714447, + "learning_rate": 5.825853179368586e-06, + "loss": 0.6953, + "step": 16305 + }, + { + "epoch": 0.8974627112114041, + "grad_norm": 0.7266152501106262, + "learning_rate": 5.8254256633102535e-06, + "loss": 0.7762, + "step": 16306 + }, + { + "epoch": 0.8975177500137597, + "grad_norm": 0.6428011059761047, + "learning_rate": 5.824998141048241e-06, + "loss": 0.67, + "step": 16307 + }, + { + "epoch": 0.8975727888161154, + "grad_norm": 0.6991005539894104, + "learning_rate": 5.824570612585764e-06, + "loss": 0.79, + "step": 16308 + }, + { + "epoch": 0.897627827618471, + "grad_norm": 0.6385177969932556, + "learning_rate": 5.824143077926034e-06, + "loss": 0.6993, + "step": 16309 + }, + { + "epoch": 0.8976828664208267, + "grad_norm": 0.6891354322433472, + "learning_rate": 5.823715537072268e-06, + "loss": 0.7155, + "step": 16310 + }, + { + "epoch": 0.8977379052231823, + "grad_norm": 0.7448866367340088, + "learning_rate": 5.823287990027674e-06, + "loss": 0.674, + "step": 16311 + }, + { + "epoch": 0.897792944025538, + "grad_norm": 0.6892699599266052, + "learning_rate": 5.822860436795468e-06, + "loss": 0.7719, + "step": 16312 + }, + { + "epoch": 0.8978479828278937, + "grad_norm": 1.2621982097625732, + "learning_rate": 5.822432877378864e-06, + "loss": 0.6985, + "step": 16313 + }, + { + "epoch": 0.8979030216302494, + "grad_norm": 0.635137677192688, + "learning_rate": 5.822005311781075e-06, + "loss": 0.748, + "step": 16314 + }, + { + "epoch": 0.897958060432605, + "grad_norm": 0.8765038847923279, + "learning_rate": 5.821577740005313e-06, + "loss": 0.798, + "step": 16315 + }, + { + "epoch": 0.8980130992349606, + "grad_norm": 0.734259843826294, + "learning_rate": 5.8211501620547926e-06, + "loss": 0.7462, + "step": 16316 + }, + { + "epoch": 0.8980681380373163, + "grad_norm": 0.7057023048400879, + "learning_rate": 5.820722577932729e-06, + "loss": 0.8057, + "step": 16317 + }, + { + "epoch": 0.898123176839672, + "grad_norm": 0.7444988489151001, + "learning_rate": 5.8202949876423344e-06, + "loss": 0.7023, + "step": 16318 + }, + { + "epoch": 0.8981782156420276, + "grad_norm": 0.7205658555030823, + "learning_rate": 5.819867391186821e-06, + "loss": 0.6949, + "step": 16319 + }, + { + "epoch": 0.8982332544443833, + "grad_norm": 0.7285442352294922, + "learning_rate": 5.819439788569403e-06, + "loss": 0.6966, + "step": 16320 + }, + { + "epoch": 0.898288293246739, + "grad_norm": 0.7485014796257019, + "learning_rate": 5.819012179793295e-06, + "loss": 0.7676, + "step": 16321 + }, + { + "epoch": 0.8983433320490947, + "grad_norm": 0.7867493629455566, + "learning_rate": 5.818584564861712e-06, + "loss": 0.7601, + "step": 16322 + }, + { + "epoch": 0.8983983708514502, + "grad_norm": 0.6732510328292847, + "learning_rate": 5.818156943777867e-06, + "loss": 0.7068, + "step": 16323 + }, + { + "epoch": 0.8984534096538059, + "grad_norm": 0.648333728313446, + "learning_rate": 5.817729316544971e-06, + "loss": 0.762, + "step": 16324 + }, + { + "epoch": 0.8985084484561616, + "grad_norm": 0.9556308388710022, + "learning_rate": 5.817301683166241e-06, + "loss": 0.7177, + "step": 16325 + }, + { + "epoch": 0.8985634872585172, + "grad_norm": 0.7043321132659912, + "learning_rate": 5.816874043644891e-06, + "loss": 0.7206, + "step": 16326 + }, + { + "epoch": 0.8986185260608729, + "grad_norm": 0.6318387985229492, + "learning_rate": 5.816446397984136e-06, + "loss": 0.7306, + "step": 16327 + }, + { + "epoch": 0.8986735648632286, + "grad_norm": 0.7083125114440918, + "learning_rate": 5.816018746187186e-06, + "loss": 0.7204, + "step": 16328 + }, + { + "epoch": 0.8987286036655843, + "grad_norm": 0.6810079216957092, + "learning_rate": 5.815591088257259e-06, + "loss": 0.8575, + "step": 16329 + }, + { + "epoch": 0.8987836424679398, + "grad_norm": 0.7081509232521057, + "learning_rate": 5.815163424197567e-06, + "loss": 0.7198, + "step": 16330 + }, + { + "epoch": 0.8988386812702955, + "grad_norm": 1.1525241136550903, + "learning_rate": 5.814735754011325e-06, + "loss": 0.6222, + "step": 16331 + }, + { + "epoch": 0.8988937200726512, + "grad_norm": 0.724651575088501, + "learning_rate": 5.8143080777017456e-06, + "loss": 0.6937, + "step": 16332 + }, + { + "epoch": 0.8989487588750069, + "grad_norm": 0.7607846260070801, + "learning_rate": 5.813880395272047e-06, + "loss": 0.7552, + "step": 16333 + }, + { + "epoch": 0.8990037976773625, + "grad_norm": 0.6370975375175476, + "learning_rate": 5.813452706725441e-06, + "loss": 0.753, + "step": 16334 + }, + { + "epoch": 0.8990588364797182, + "grad_norm": 0.7018759250640869, + "learning_rate": 5.813025012065141e-06, + "loss": 0.8026, + "step": 16335 + }, + { + "epoch": 0.8991138752820739, + "grad_norm": 0.7106475234031677, + "learning_rate": 5.812597311294363e-06, + "loss": 0.6558, + "step": 16336 + }, + { + "epoch": 0.8991689140844296, + "grad_norm": 0.715859591960907, + "learning_rate": 5.812169604416321e-06, + "loss": 0.6581, + "step": 16337 + }, + { + "epoch": 0.8992239528867851, + "grad_norm": 1.1907461881637573, + "learning_rate": 5.811741891434231e-06, + "loss": 0.7618, + "step": 16338 + }, + { + "epoch": 0.8992789916891408, + "grad_norm": 0.6529675722122192, + "learning_rate": 5.811314172351304e-06, + "loss": 0.7317, + "step": 16339 + }, + { + "epoch": 0.8993340304914965, + "grad_norm": 0.6876475811004639, + "learning_rate": 5.810886447170758e-06, + "loss": 0.724, + "step": 16340 + }, + { + "epoch": 0.8993890692938522, + "grad_norm": 0.6798568964004517, + "learning_rate": 5.8104587158958084e-06, + "loss": 0.7173, + "step": 16341 + }, + { + "epoch": 0.8994441080962078, + "grad_norm": 1.0269527435302734, + "learning_rate": 5.8100309785296664e-06, + "loss": 0.737, + "step": 16342 + }, + { + "epoch": 0.8994991468985635, + "grad_norm": 0.7578931450843811, + "learning_rate": 5.809603235075547e-06, + "loss": 0.8119, + "step": 16343 + }, + { + "epoch": 0.8995541857009192, + "grad_norm": 0.7005903124809265, + "learning_rate": 5.8091754855366675e-06, + "loss": 0.7548, + "step": 16344 + }, + { + "epoch": 0.8996092245032749, + "grad_norm": 0.6554011702537537, + "learning_rate": 5.808747729916242e-06, + "loss": 0.6705, + "step": 16345 + }, + { + "epoch": 0.8996642633056304, + "grad_norm": 0.6242305040359497, + "learning_rate": 5.808319968217485e-06, + "loss": 0.7278, + "step": 16346 + }, + { + "epoch": 0.8997193021079861, + "grad_norm": 0.9426488876342773, + "learning_rate": 5.807892200443611e-06, + "loss": 0.7129, + "step": 16347 + }, + { + "epoch": 0.8997743409103418, + "grad_norm": 0.671768307685852, + "learning_rate": 5.807464426597835e-06, + "loss": 0.7688, + "step": 16348 + }, + { + "epoch": 0.8998293797126975, + "grad_norm": 0.672828197479248, + "learning_rate": 5.807036646683374e-06, + "loss": 0.8089, + "step": 16349 + }, + { + "epoch": 0.8998844185150531, + "grad_norm": 0.6689574718475342, + "learning_rate": 5.806608860703441e-06, + "loss": 0.7523, + "step": 16350 + }, + { + "epoch": 0.8999394573174088, + "grad_norm": 0.7440112233161926, + "learning_rate": 5.8061810686612514e-06, + "loss": 0.7802, + "step": 16351 + }, + { + "epoch": 0.8999944961197645, + "grad_norm": 0.6520549654960632, + "learning_rate": 5.8057532705600206e-06, + "loss": 0.6582, + "step": 16352 + }, + { + "epoch": 0.9000495349221201, + "grad_norm": 0.73201984167099, + "learning_rate": 5.805325466402965e-06, + "loss": 0.685, + "step": 16353 + }, + { + "epoch": 0.9001045737244757, + "grad_norm": 0.6377021074295044, + "learning_rate": 5.804897656193298e-06, + "loss": 0.802, + "step": 16354 + }, + { + "epoch": 0.9001596125268314, + "grad_norm": 0.6761305928230286, + "learning_rate": 5.8044698399342355e-06, + "loss": 0.7005, + "step": 16355 + }, + { + "epoch": 0.9002146513291871, + "grad_norm": 0.6993832588195801, + "learning_rate": 5.804042017628992e-06, + "loss": 0.848, + "step": 16356 + }, + { + "epoch": 0.9002696901315428, + "grad_norm": 0.6453230381011963, + "learning_rate": 5.803614189280786e-06, + "loss": 0.7525, + "step": 16357 + }, + { + "epoch": 0.9003247289338984, + "grad_norm": 0.620090663433075, + "learning_rate": 5.80318635489283e-06, + "loss": 0.668, + "step": 16358 + }, + { + "epoch": 0.900379767736254, + "grad_norm": 0.7285178899765015, + "learning_rate": 5.80275851446834e-06, + "loss": 0.7936, + "step": 16359 + }, + { + "epoch": 0.9004348065386097, + "grad_norm": 0.7555778622627258, + "learning_rate": 5.802330668010532e-06, + "loss": 0.7493, + "step": 16360 + }, + { + "epoch": 0.9004898453409654, + "grad_norm": 0.8527930974960327, + "learning_rate": 5.801902815522622e-06, + "loss": 0.7609, + "step": 16361 + }, + { + "epoch": 0.900544884143321, + "grad_norm": 0.9000833034515381, + "learning_rate": 5.801474957007824e-06, + "loss": 0.7719, + "step": 16362 + }, + { + "epoch": 0.9005999229456767, + "grad_norm": 0.7018047571182251, + "learning_rate": 5.801047092469356e-06, + "loss": 0.9116, + "step": 16363 + }, + { + "epoch": 0.9006549617480324, + "grad_norm": 0.7304424047470093, + "learning_rate": 5.800619221910432e-06, + "loss": 0.7339, + "step": 16364 + }, + { + "epoch": 0.900710000550388, + "grad_norm": 0.6775519251823425, + "learning_rate": 5.80019134533427e-06, + "loss": 0.6962, + "step": 16365 + }, + { + "epoch": 0.9007650393527437, + "grad_norm": 0.717262864112854, + "learning_rate": 5.799763462744081e-06, + "loss": 0.7518, + "step": 16366 + }, + { + "epoch": 0.9008200781550993, + "grad_norm": 0.9107728004455566, + "learning_rate": 5.799335574143086e-06, + "loss": 0.7778, + "step": 16367 + }, + { + "epoch": 0.900875116957455, + "grad_norm": 0.760864794254303, + "learning_rate": 5.798907679534498e-06, + "loss": 0.8022, + "step": 16368 + }, + { + "epoch": 0.9009301557598106, + "grad_norm": 0.6746829152107239, + "learning_rate": 5.798479778921533e-06, + "loss": 0.7105, + "step": 16369 + }, + { + "epoch": 0.9009851945621663, + "grad_norm": 0.6654032468795776, + "learning_rate": 5.798051872307409e-06, + "loss": 0.7946, + "step": 16370 + }, + { + "epoch": 0.901040233364522, + "grad_norm": 0.6802057027816772, + "learning_rate": 5.79762395969534e-06, + "loss": 0.7332, + "step": 16371 + }, + { + "epoch": 0.9010952721668777, + "grad_norm": 0.6586911678314209, + "learning_rate": 5.797196041088542e-06, + "loss": 0.7477, + "step": 16372 + }, + { + "epoch": 0.9011503109692333, + "grad_norm": 0.7057276964187622, + "learning_rate": 5.796768116490233e-06, + "loss": 0.7468, + "step": 16373 + }, + { + "epoch": 0.901205349771589, + "grad_norm": 0.6421266794204712, + "learning_rate": 5.796340185903629e-06, + "loss": 0.7073, + "step": 16374 + }, + { + "epoch": 0.9012603885739446, + "grad_norm": 0.752708911895752, + "learning_rate": 5.795912249331944e-06, + "loss": 0.723, + "step": 16375 + }, + { + "epoch": 0.9013154273763003, + "grad_norm": 0.6225395798683167, + "learning_rate": 5.795484306778395e-06, + "loss": 0.6317, + "step": 16376 + }, + { + "epoch": 0.9013704661786559, + "grad_norm": 0.6598436236381531, + "learning_rate": 5.795056358246201e-06, + "loss": 0.7429, + "step": 16377 + }, + { + "epoch": 0.9014255049810116, + "grad_norm": 0.6590927243232727, + "learning_rate": 5.794628403738576e-06, + "loss": 0.8009, + "step": 16378 + }, + { + "epoch": 0.9014805437833673, + "grad_norm": 0.7180964350700378, + "learning_rate": 5.794200443258736e-06, + "loss": 0.7438, + "step": 16379 + }, + { + "epoch": 0.901535582585723, + "grad_norm": 0.7282911539077759, + "learning_rate": 5.7937724768098965e-06, + "loss": 0.8067, + "step": 16380 + }, + { + "epoch": 0.9015906213880786, + "grad_norm": 0.7015960812568665, + "learning_rate": 5.793344504395278e-06, + "loss": 0.7094, + "step": 16381 + }, + { + "epoch": 0.9016456601904342, + "grad_norm": 0.7785707116127014, + "learning_rate": 5.792916526018093e-06, + "loss": 0.6816, + "step": 16382 + }, + { + "epoch": 0.9017006989927899, + "grad_norm": 0.5919010639190674, + "learning_rate": 5.79248854168156e-06, + "loss": 0.6592, + "step": 16383 + }, + { + "epoch": 0.9017557377951456, + "grad_norm": 0.8561486005783081, + "learning_rate": 5.792060551388894e-06, + "loss": 0.8305, + "step": 16384 + }, + { + "epoch": 0.9018107765975012, + "grad_norm": 0.6228078603744507, + "learning_rate": 5.791632555143315e-06, + "loss": 0.6535, + "step": 16385 + }, + { + "epoch": 0.9018658153998569, + "grad_norm": 0.7751365303993225, + "learning_rate": 5.791204552948037e-06, + "loss": 0.7738, + "step": 16386 + }, + { + "epoch": 0.9019208542022126, + "grad_norm": 0.6248418092727661, + "learning_rate": 5.790776544806277e-06, + "loss": 0.7464, + "step": 16387 + }, + { + "epoch": 0.9019758930045683, + "grad_norm": 0.6654400825500488, + "learning_rate": 5.790348530721253e-06, + "loss": 0.7345, + "step": 16388 + }, + { + "epoch": 0.9020309318069238, + "grad_norm": 0.7654403448104858, + "learning_rate": 5.789920510696181e-06, + "loss": 0.8147, + "step": 16389 + }, + { + "epoch": 0.9020859706092795, + "grad_norm": 0.630255937576294, + "learning_rate": 5.789492484734277e-06, + "loss": 0.7007, + "step": 16390 + }, + { + "epoch": 0.9021410094116352, + "grad_norm": 0.7767404317855835, + "learning_rate": 5.7890644528387595e-06, + "loss": 0.7547, + "step": 16391 + }, + { + "epoch": 0.9021960482139909, + "grad_norm": 0.8416337966918945, + "learning_rate": 5.788636415012842e-06, + "loss": 0.8771, + "step": 16392 + }, + { + "epoch": 0.9022510870163465, + "grad_norm": 0.6467620134353638, + "learning_rate": 5.7882083712597485e-06, + "loss": 0.651, + "step": 16393 + }, + { + "epoch": 0.9023061258187022, + "grad_norm": 0.8612825870513916, + "learning_rate": 5.78778032158269e-06, + "loss": 0.8127, + "step": 16394 + }, + { + "epoch": 0.9023611646210579, + "grad_norm": 0.7267431020736694, + "learning_rate": 5.787352265984886e-06, + "loss": 0.7616, + "step": 16395 + }, + { + "epoch": 0.9024162034234136, + "grad_norm": 0.736853837966919, + "learning_rate": 5.786924204469551e-06, + "loss": 0.7907, + "step": 16396 + }, + { + "epoch": 0.9024712422257691, + "grad_norm": 0.6155600547790527, + "learning_rate": 5.7864961370399065e-06, + "loss": 0.6326, + "step": 16397 + }, + { + "epoch": 0.9025262810281248, + "grad_norm": 0.7266953587532043, + "learning_rate": 5.7860680636991675e-06, + "loss": 0.817, + "step": 16398 + }, + { + "epoch": 0.9025813198304805, + "grad_norm": 0.7350000143051147, + "learning_rate": 5.785639984450551e-06, + "loss": 0.6602, + "step": 16399 + }, + { + "epoch": 0.9026363586328362, + "grad_norm": 1.1884604692459106, + "learning_rate": 5.785211899297274e-06, + "loss": 0.8311, + "step": 16400 + }, + { + "epoch": 0.9026913974351918, + "grad_norm": 0.7405989170074463, + "learning_rate": 5.784783808242555e-06, + "loss": 0.8444, + "step": 16401 + }, + { + "epoch": 0.9027464362375475, + "grad_norm": 0.9071193933486938, + "learning_rate": 5.7843557112896124e-06, + "loss": 0.8302, + "step": 16402 + }, + { + "epoch": 0.9028014750399032, + "grad_norm": 0.6949760317802429, + "learning_rate": 5.783927608441661e-06, + "loss": 0.8151, + "step": 16403 + }, + { + "epoch": 0.9028565138422588, + "grad_norm": 0.7762002348899841, + "learning_rate": 5.783499499701919e-06, + "loss": 0.8362, + "step": 16404 + }, + { + "epoch": 0.9029115526446144, + "grad_norm": 0.6971734762191772, + "learning_rate": 5.783071385073604e-06, + "loss": 0.7524, + "step": 16405 + }, + { + "epoch": 0.9029665914469701, + "grad_norm": 0.6840386390686035, + "learning_rate": 5.782643264559936e-06, + "loss": 0.7548, + "step": 16406 + }, + { + "epoch": 0.9030216302493258, + "grad_norm": 0.7312326431274414, + "learning_rate": 5.78221513816413e-06, + "loss": 0.7998, + "step": 16407 + }, + { + "epoch": 0.9030766690516814, + "grad_norm": 0.6572339534759521, + "learning_rate": 5.781787005889403e-06, + "loss": 0.7044, + "step": 16408 + }, + { + "epoch": 0.9031317078540371, + "grad_norm": 0.6981173157691956, + "learning_rate": 5.7813588677389755e-06, + "loss": 0.8207, + "step": 16409 + }, + { + "epoch": 0.9031867466563928, + "grad_norm": 0.6956539750099182, + "learning_rate": 5.780930723716064e-06, + "loss": 0.7379, + "step": 16410 + }, + { + "epoch": 0.9032417854587484, + "grad_norm": 0.6781721115112305, + "learning_rate": 5.780502573823886e-06, + "loss": 0.7051, + "step": 16411 + }, + { + "epoch": 0.903296824261104, + "grad_norm": 0.8202298283576965, + "learning_rate": 5.7800744180656596e-06, + "loss": 0.8002, + "step": 16412 + }, + { + "epoch": 0.9033518630634597, + "grad_norm": 0.6392808556556702, + "learning_rate": 5.779646256444603e-06, + "loss": 0.7204, + "step": 16413 + }, + { + "epoch": 0.9034069018658154, + "grad_norm": 0.6195122003555298, + "learning_rate": 5.779218088963934e-06, + "loss": 0.7297, + "step": 16414 + }, + { + "epoch": 0.9034619406681711, + "grad_norm": 0.6970216631889343, + "learning_rate": 5.77878991562687e-06, + "loss": 0.7572, + "step": 16415 + }, + { + "epoch": 0.9035169794705267, + "grad_norm": 0.6491253972053528, + "learning_rate": 5.778361736436628e-06, + "loss": 0.609, + "step": 16416 + }, + { + "epoch": 0.9035720182728824, + "grad_norm": 0.745171070098877, + "learning_rate": 5.777933551396429e-06, + "loss": 0.7413, + "step": 16417 + }, + { + "epoch": 0.903627057075238, + "grad_norm": 0.7239416241645813, + "learning_rate": 5.77750536050949e-06, + "loss": 0.7539, + "step": 16418 + }, + { + "epoch": 0.9036820958775937, + "grad_norm": 0.7006568312644958, + "learning_rate": 5.777077163779028e-06, + "loss": 0.7071, + "step": 16419 + }, + { + "epoch": 0.9037371346799493, + "grad_norm": 0.6700214147567749, + "learning_rate": 5.7766489612082625e-06, + "loss": 0.7743, + "step": 16420 + }, + { + "epoch": 0.903792173482305, + "grad_norm": 0.6496555805206299, + "learning_rate": 5.776220752800411e-06, + "loss": 0.7058, + "step": 16421 + }, + { + "epoch": 0.9038472122846607, + "grad_norm": 0.7568738460540771, + "learning_rate": 5.775792538558692e-06, + "loss": 0.7937, + "step": 16422 + }, + { + "epoch": 0.9039022510870164, + "grad_norm": 0.8599572777748108, + "learning_rate": 5.775364318486325e-06, + "loss": 0.6478, + "step": 16423 + }, + { + "epoch": 0.903957289889372, + "grad_norm": 0.80698162317276, + "learning_rate": 5.774936092586526e-06, + "loss": 0.7847, + "step": 16424 + }, + { + "epoch": 0.9040123286917277, + "grad_norm": 0.9321784973144531, + "learning_rate": 5.774507860862516e-06, + "loss": 0.8223, + "step": 16425 + }, + { + "epoch": 0.9040673674940833, + "grad_norm": 0.6712853312492371, + "learning_rate": 5.774079623317513e-06, + "loss": 0.6958, + "step": 16426 + }, + { + "epoch": 0.904122406296439, + "grad_norm": 0.6142114400863647, + "learning_rate": 5.7736513799547326e-06, + "loss": 0.6696, + "step": 16427 + }, + { + "epoch": 0.9041774450987946, + "grad_norm": 0.7185747027397156, + "learning_rate": 5.773223130777396e-06, + "loss": 0.7161, + "step": 16428 + }, + { + "epoch": 0.9042324839011503, + "grad_norm": 0.6980574727058411, + "learning_rate": 5.772794875788721e-06, + "loss": 0.8255, + "step": 16429 + }, + { + "epoch": 0.904287522703506, + "grad_norm": 0.6682069301605225, + "learning_rate": 5.772366614991927e-06, + "loss": 0.7574, + "step": 16430 + }, + { + "epoch": 0.9043425615058617, + "grad_norm": 0.6451525688171387, + "learning_rate": 5.771938348390233e-06, + "loss": 0.8147, + "step": 16431 + }, + { + "epoch": 0.9043976003082173, + "grad_norm": 0.7066215872764587, + "learning_rate": 5.771510075986857e-06, + "loss": 0.6447, + "step": 16432 + }, + { + "epoch": 0.9044526391105729, + "grad_norm": 0.591195285320282, + "learning_rate": 5.771081797785015e-06, + "loss": 0.6144, + "step": 16433 + }, + { + "epoch": 0.9045076779129286, + "grad_norm": 0.7004978656768799, + "learning_rate": 5.770653513787932e-06, + "loss": 0.6351, + "step": 16434 + }, + { + "epoch": 0.9045627167152843, + "grad_norm": 0.765402615070343, + "learning_rate": 5.770225223998822e-06, + "loss": 0.8448, + "step": 16435 + }, + { + "epoch": 0.9046177555176399, + "grad_norm": 0.7902443408966064, + "learning_rate": 5.769796928420907e-06, + "loss": 0.736, + "step": 16436 + }, + { + "epoch": 0.9046727943199956, + "grad_norm": 0.7513584494590759, + "learning_rate": 5.7693686270574026e-06, + "loss": 0.7939, + "step": 16437 + }, + { + "epoch": 0.9047278331223513, + "grad_norm": 0.7552775144577026, + "learning_rate": 5.7689403199115305e-06, + "loss": 0.7057, + "step": 16438 + }, + { + "epoch": 0.904782871924707, + "grad_norm": 0.6609054207801819, + "learning_rate": 5.768512006986508e-06, + "loss": 0.7442, + "step": 16439 + }, + { + "epoch": 0.9048379107270625, + "grad_norm": 0.7378336787223816, + "learning_rate": 5.768083688285555e-06, + "loss": 0.8066, + "step": 16440 + }, + { + "epoch": 0.9048929495294182, + "grad_norm": 0.6223191022872925, + "learning_rate": 5.76765536381189e-06, + "loss": 0.6754, + "step": 16441 + }, + { + "epoch": 0.9049479883317739, + "grad_norm": 0.707188069820404, + "learning_rate": 5.767227033568734e-06, + "loss": 0.8307, + "step": 16442 + }, + { + "epoch": 0.9050030271341296, + "grad_norm": 0.680635929107666, + "learning_rate": 5.7667986975593046e-06, + "loss": 0.7659, + "step": 16443 + }, + { + "epoch": 0.9050580659364852, + "grad_norm": 0.7325000166893005, + "learning_rate": 5.766370355786822e-06, + "loss": 0.8449, + "step": 16444 + }, + { + "epoch": 0.9051131047388409, + "grad_norm": 0.6698771715164185, + "learning_rate": 5.765942008254504e-06, + "loss": 0.7689, + "step": 16445 + }, + { + "epoch": 0.9051681435411966, + "grad_norm": 0.6456333994865417, + "learning_rate": 5.765513654965571e-06, + "loss": 0.8054, + "step": 16446 + }, + { + "epoch": 0.9052231823435523, + "grad_norm": 0.6837481260299683, + "learning_rate": 5.765085295923243e-06, + "loss": 0.7327, + "step": 16447 + }, + { + "epoch": 0.9052782211459078, + "grad_norm": 0.6525166034698486, + "learning_rate": 5.7646569311307385e-06, + "loss": 0.6838, + "step": 16448 + }, + { + "epoch": 0.9053332599482635, + "grad_norm": 0.6613319516181946, + "learning_rate": 5.764228560591277e-06, + "loss": 0.765, + "step": 16449 + }, + { + "epoch": 0.9053882987506192, + "grad_norm": 0.732072651386261, + "learning_rate": 5.763800184308078e-06, + "loss": 0.7969, + "step": 16450 + }, + { + "epoch": 0.9054433375529748, + "grad_norm": 0.6654138565063477, + "learning_rate": 5.763371802284362e-06, + "loss": 0.744, + "step": 16451 + }, + { + "epoch": 0.9054983763553305, + "grad_norm": 0.685528576374054, + "learning_rate": 5.762943414523347e-06, + "loss": 0.793, + "step": 16452 + }, + { + "epoch": 0.9055534151576862, + "grad_norm": 0.7957682013511658, + "learning_rate": 5.762515021028254e-06, + "loss": 0.7792, + "step": 16453 + }, + { + "epoch": 0.9056084539600419, + "grad_norm": 0.7029872536659241, + "learning_rate": 5.762086621802302e-06, + "loss": 0.7014, + "step": 16454 + }, + { + "epoch": 0.9056634927623974, + "grad_norm": 0.6274085640907288, + "learning_rate": 5.761658216848711e-06, + "loss": 0.7039, + "step": 16455 + }, + { + "epoch": 0.9057185315647531, + "grad_norm": 0.7196398377418518, + "learning_rate": 5.761229806170702e-06, + "loss": 0.7091, + "step": 16456 + }, + { + "epoch": 0.9057735703671088, + "grad_norm": 0.8225924372673035, + "learning_rate": 5.760801389771491e-06, + "loss": 0.8, + "step": 16457 + }, + { + "epoch": 0.9058286091694645, + "grad_norm": 0.659428596496582, + "learning_rate": 5.760372967654302e-06, + "loss": 0.7364, + "step": 16458 + }, + { + "epoch": 0.9058836479718201, + "grad_norm": 0.667123556137085, + "learning_rate": 5.759944539822353e-06, + "loss": 0.7754, + "step": 16459 + }, + { + "epoch": 0.9059386867741758, + "grad_norm": 0.6794472336769104, + "learning_rate": 5.759516106278864e-06, + "loss": 0.7747, + "step": 16460 + }, + { + "epoch": 0.9059937255765315, + "grad_norm": 0.652911365032196, + "learning_rate": 5.759087667027056e-06, + "loss": 0.7144, + "step": 16461 + }, + { + "epoch": 0.9060487643788872, + "grad_norm": 0.7240120768547058, + "learning_rate": 5.758659222070149e-06, + "loss": 0.7643, + "step": 16462 + }, + { + "epoch": 0.9061038031812427, + "grad_norm": 0.7705693244934082, + "learning_rate": 5.758230771411361e-06, + "loss": 0.6911, + "step": 16463 + }, + { + "epoch": 0.9061588419835984, + "grad_norm": 0.6969810128211975, + "learning_rate": 5.757802315053913e-06, + "loss": 0.7588, + "step": 16464 + }, + { + "epoch": 0.9062138807859541, + "grad_norm": 0.6787244081497192, + "learning_rate": 5.757373853001026e-06, + "loss": 0.7772, + "step": 16465 + }, + { + "epoch": 0.9062689195883098, + "grad_norm": 0.7181483507156372, + "learning_rate": 5.75694538525592e-06, + "loss": 0.7586, + "step": 16466 + }, + { + "epoch": 0.9063239583906654, + "grad_norm": 0.6730976700782776, + "learning_rate": 5.756516911821816e-06, + "loss": 0.758, + "step": 16467 + }, + { + "epoch": 0.9063789971930211, + "grad_norm": 0.752193808555603, + "learning_rate": 5.756088432701933e-06, + "loss": 0.7701, + "step": 16468 + }, + { + "epoch": 0.9064340359953768, + "grad_norm": 0.7424039244651794, + "learning_rate": 5.75565994789949e-06, + "loss": 0.777, + "step": 16469 + }, + { + "epoch": 0.9064890747977324, + "grad_norm": 0.7168293595314026, + "learning_rate": 5.755231457417711e-06, + "loss": 0.752, + "step": 16470 + }, + { + "epoch": 0.906544113600088, + "grad_norm": 0.566977858543396, + "learning_rate": 5.754802961259814e-06, + "loss": 0.6182, + "step": 16471 + }, + { + "epoch": 0.9065991524024437, + "grad_norm": 0.6081774234771729, + "learning_rate": 5.754374459429021e-06, + "loss": 0.7449, + "step": 16472 + }, + { + "epoch": 0.9066541912047994, + "grad_norm": 0.8636649250984192, + "learning_rate": 5.75394595192855e-06, + "loss": 0.7754, + "step": 16473 + }, + { + "epoch": 0.9067092300071551, + "grad_norm": 0.705915629863739, + "learning_rate": 5.753517438761624e-06, + "loss": 0.7838, + "step": 16474 + }, + { + "epoch": 0.9067642688095107, + "grad_norm": 0.6954970955848694, + "learning_rate": 5.753088919931462e-06, + "loss": 0.6729, + "step": 16475 + }, + { + "epoch": 0.9068193076118664, + "grad_norm": 0.6736747622489929, + "learning_rate": 5.752660395441286e-06, + "loss": 0.7344, + "step": 16476 + }, + { + "epoch": 0.906874346414222, + "grad_norm": 0.7642985582351685, + "learning_rate": 5.752231865294314e-06, + "loss": 0.7716, + "step": 16477 + }, + { + "epoch": 0.9069293852165777, + "grad_norm": 0.6003983616828918, + "learning_rate": 5.7518033294937705e-06, + "loss": 0.7094, + "step": 16478 + }, + { + "epoch": 0.9069844240189333, + "grad_norm": 0.6081627607345581, + "learning_rate": 5.751374788042875e-06, + "loss": 0.6534, + "step": 16479 + }, + { + "epoch": 0.907039462821289, + "grad_norm": 0.7221750617027283, + "learning_rate": 5.750946240944847e-06, + "loss": 0.7032, + "step": 16480 + }, + { + "epoch": 0.9070945016236447, + "grad_norm": 0.6791507601737976, + "learning_rate": 5.750517688202907e-06, + "loss": 0.7944, + "step": 16481 + }, + { + "epoch": 0.9071495404260004, + "grad_norm": 0.6808193922042847, + "learning_rate": 5.750089129820278e-06, + "loss": 0.7536, + "step": 16482 + }, + { + "epoch": 0.907204579228356, + "grad_norm": 0.6561774611473083, + "learning_rate": 5.7496605658001795e-06, + "loss": 0.755, + "step": 16483 + }, + { + "epoch": 0.9072596180307116, + "grad_norm": 0.6241487264633179, + "learning_rate": 5.749231996145833e-06, + "loss": 0.7474, + "step": 16484 + }, + { + "epoch": 0.9073146568330673, + "grad_norm": 0.676726758480072, + "learning_rate": 5.748803420860459e-06, + "loss": 0.7416, + "step": 16485 + }, + { + "epoch": 0.907369695635423, + "grad_norm": 0.6571675539016724, + "learning_rate": 5.748374839947279e-06, + "loss": 0.7428, + "step": 16486 + }, + { + "epoch": 0.9074247344377786, + "grad_norm": 0.6442016959190369, + "learning_rate": 5.7479462534095155e-06, + "loss": 0.7265, + "step": 16487 + }, + { + "epoch": 0.9074797732401343, + "grad_norm": 0.6728110909461975, + "learning_rate": 5.747517661250387e-06, + "loss": 0.6492, + "step": 16488 + }, + { + "epoch": 0.90753481204249, + "grad_norm": 1.114125370979309, + "learning_rate": 5.747089063473115e-06, + "loss": 0.7399, + "step": 16489 + }, + { + "epoch": 0.9075898508448457, + "grad_norm": 0.6066282391548157, + "learning_rate": 5.746660460080923e-06, + "loss": 0.5736, + "step": 16490 + }, + { + "epoch": 0.9076448896472012, + "grad_norm": 0.6982638835906982, + "learning_rate": 5.74623185107703e-06, + "loss": 0.765, + "step": 16491 + }, + { + "epoch": 0.9076999284495569, + "grad_norm": 0.6360225677490234, + "learning_rate": 5.745803236464659e-06, + "loss": 0.7082, + "step": 16492 + }, + { + "epoch": 0.9077549672519126, + "grad_norm": 0.6267019510269165, + "learning_rate": 5.745374616247029e-06, + "loss": 0.7498, + "step": 16493 + }, + { + "epoch": 0.9078100060542682, + "grad_norm": 0.6618008613586426, + "learning_rate": 5.744945990427364e-06, + "loss": 0.7073, + "step": 16494 + }, + { + "epoch": 0.9078650448566239, + "grad_norm": 0.7970854043960571, + "learning_rate": 5.744517359008884e-06, + "loss": 0.6968, + "step": 16495 + }, + { + "epoch": 0.9079200836589796, + "grad_norm": 0.6457371711730957, + "learning_rate": 5.744088721994811e-06, + "loss": 0.7102, + "step": 16496 + }, + { + "epoch": 0.9079751224613353, + "grad_norm": 0.6841073036193848, + "learning_rate": 5.743660079388367e-06, + "loss": 0.7448, + "step": 16497 + }, + { + "epoch": 0.9080301612636908, + "grad_norm": 0.6577959656715393, + "learning_rate": 5.7432314311927725e-06, + "loss": 0.6853, + "step": 16498 + }, + { + "epoch": 0.9080852000660465, + "grad_norm": 0.9146704077720642, + "learning_rate": 5.7428027774112494e-06, + "loss": 0.7052, + "step": 16499 + }, + { + "epoch": 0.9081402388684022, + "grad_norm": 0.7161448001861572, + "learning_rate": 5.7423741180470194e-06, + "loss": 0.775, + "step": 16500 + }, + { + "epoch": 0.9081952776707579, + "grad_norm": 0.6443139314651489, + "learning_rate": 5.741945453103304e-06, + "loss": 0.7169, + "step": 16501 + }, + { + "epoch": 0.9082503164731135, + "grad_norm": 0.6783748269081116, + "learning_rate": 5.7415167825833254e-06, + "loss": 0.7082, + "step": 16502 + }, + { + "epoch": 0.9083053552754692, + "grad_norm": 0.7961641550064087, + "learning_rate": 5.741088106490305e-06, + "loss": 0.769, + "step": 16503 + }, + { + "epoch": 0.9083603940778249, + "grad_norm": 0.6952986121177673, + "learning_rate": 5.740659424827465e-06, + "loss": 0.7978, + "step": 16504 + }, + { + "epoch": 0.9084154328801806, + "grad_norm": 0.6124480366706848, + "learning_rate": 5.740230737598026e-06, + "loss": 0.6824, + "step": 16505 + }, + { + "epoch": 0.9084704716825361, + "grad_norm": 0.6469553709030151, + "learning_rate": 5.739802044805211e-06, + "loss": 0.6558, + "step": 16506 + }, + { + "epoch": 0.9085255104848918, + "grad_norm": 0.6296014189720154, + "learning_rate": 5.739373346452244e-06, + "loss": 0.7079, + "step": 16507 + }, + { + "epoch": 0.9085805492872475, + "grad_norm": 0.6862651705741882, + "learning_rate": 5.7389446425423435e-06, + "loss": 0.7787, + "step": 16508 + }, + { + "epoch": 0.9086355880896032, + "grad_norm": 0.7167361974716187, + "learning_rate": 5.738515933078733e-06, + "loss": 0.8556, + "step": 16509 + }, + { + "epoch": 0.9086906268919588, + "grad_norm": 0.6557625532150269, + "learning_rate": 5.7380872180646356e-06, + "loss": 0.6326, + "step": 16510 + }, + { + "epoch": 0.9087456656943145, + "grad_norm": 0.6629947423934937, + "learning_rate": 5.737658497503271e-06, + "loss": 0.6692, + "step": 16511 + }, + { + "epoch": 0.9088007044966702, + "grad_norm": 0.723196804523468, + "learning_rate": 5.737229771397862e-06, + "loss": 0.837, + "step": 16512 + }, + { + "epoch": 0.9088557432990259, + "grad_norm": 0.6491259336471558, + "learning_rate": 5.7368010397516315e-06, + "loss": 0.7818, + "step": 16513 + }, + { + "epoch": 0.9089107821013814, + "grad_norm": 0.6776933073997498, + "learning_rate": 5.736372302567802e-06, + "loss": 0.668, + "step": 16514 + }, + { + "epoch": 0.9089658209037371, + "grad_norm": 0.5955449938774109, + "learning_rate": 5.735943559849596e-06, + "loss": 0.6875, + "step": 16515 + }, + { + "epoch": 0.9090208597060928, + "grad_norm": 0.6763395667076111, + "learning_rate": 5.735514811600235e-06, + "loss": 0.6882, + "step": 16516 + }, + { + "epoch": 0.9090758985084485, + "grad_norm": 0.794796884059906, + "learning_rate": 5.735086057822941e-06, + "loss": 0.8279, + "step": 16517 + }, + { + "epoch": 0.9091309373108041, + "grad_norm": 0.6420493721961975, + "learning_rate": 5.734657298520936e-06, + "loss": 0.6755, + "step": 16518 + }, + { + "epoch": 0.9091859761131598, + "grad_norm": 0.7267552614212036, + "learning_rate": 5.734228533697445e-06, + "loss": 0.7816, + "step": 16519 + }, + { + "epoch": 0.9092410149155155, + "grad_norm": 0.7506794929504395, + "learning_rate": 5.733799763355689e-06, + "loss": 0.6968, + "step": 16520 + }, + { + "epoch": 0.9092960537178711, + "grad_norm": 0.6953657269477844, + "learning_rate": 5.73337098749889e-06, + "loss": 0.7866, + "step": 16521 + }, + { + "epoch": 0.9093510925202267, + "grad_norm": 0.8068533539772034, + "learning_rate": 5.732942206130271e-06, + "loss": 0.7726, + "step": 16522 + }, + { + "epoch": 0.9094061313225824, + "grad_norm": 0.8853289484977722, + "learning_rate": 5.732513419253054e-06, + "loss": 0.7886, + "step": 16523 + }, + { + "epoch": 0.9094611701249381, + "grad_norm": 0.6110638380050659, + "learning_rate": 5.732084626870463e-06, + "loss": 0.6793, + "step": 16524 + }, + { + "epoch": 0.9095162089272938, + "grad_norm": 0.6836052536964417, + "learning_rate": 5.731655828985719e-06, + "loss": 0.7245, + "step": 16525 + }, + { + "epoch": 0.9095712477296494, + "grad_norm": 0.6699627041816711, + "learning_rate": 5.731227025602046e-06, + "loss": 0.7642, + "step": 16526 + }, + { + "epoch": 0.9096262865320051, + "grad_norm": 0.6030815243721008, + "learning_rate": 5.730798216722667e-06, + "loss": 0.7817, + "step": 16527 + }, + { + "epoch": 0.9096813253343607, + "grad_norm": 0.9264968633651733, + "learning_rate": 5.730369402350804e-06, + "loss": 0.8899, + "step": 16528 + }, + { + "epoch": 0.9097363641367164, + "grad_norm": 0.6469599604606628, + "learning_rate": 5.729940582489679e-06, + "loss": 0.7672, + "step": 16529 + }, + { + "epoch": 0.909791402939072, + "grad_norm": 0.6422780752182007, + "learning_rate": 5.729511757142518e-06, + "loss": 0.7308, + "step": 16530 + }, + { + "epoch": 0.9098464417414277, + "grad_norm": 0.7130196690559387, + "learning_rate": 5.72908292631254e-06, + "loss": 0.7283, + "step": 16531 + }, + { + "epoch": 0.9099014805437834, + "grad_norm": 0.6389671564102173, + "learning_rate": 5.7286540900029705e-06, + "loss": 0.6865, + "step": 16532 + }, + { + "epoch": 0.9099565193461391, + "grad_norm": 0.6116911768913269, + "learning_rate": 5.728225248217033e-06, + "loss": 0.7162, + "step": 16533 + }, + { + "epoch": 0.9100115581484947, + "grad_norm": 0.6901746392250061, + "learning_rate": 5.727796400957949e-06, + "loss": 0.7936, + "step": 16534 + }, + { + "epoch": 0.9100665969508503, + "grad_norm": 0.6178259253501892, + "learning_rate": 5.727367548228941e-06, + "loss": 0.7345, + "step": 16535 + }, + { + "epoch": 0.910121635753206, + "grad_norm": 0.6640061140060425, + "learning_rate": 5.726938690033235e-06, + "loss": 0.6847, + "step": 16536 + }, + { + "epoch": 0.9101766745555616, + "grad_norm": 0.5669469833374023, + "learning_rate": 5.726509826374049e-06, + "loss": 0.6054, + "step": 16537 + }, + { + "epoch": 0.9102317133579173, + "grad_norm": 0.6553897857666016, + "learning_rate": 5.7260809572546126e-06, + "loss": 0.7299, + "step": 16538 + }, + { + "epoch": 0.910286752160273, + "grad_norm": 0.6646978259086609, + "learning_rate": 5.725652082678146e-06, + "loss": 0.7952, + "step": 16539 + }, + { + "epoch": 0.9103417909626287, + "grad_norm": 0.7033318281173706, + "learning_rate": 5.725223202647872e-06, + "loss": 0.7477, + "step": 16540 + }, + { + "epoch": 0.9103968297649843, + "grad_norm": 0.804336667060852, + "learning_rate": 5.724794317167013e-06, + "loss": 0.7088, + "step": 16541 + }, + { + "epoch": 0.91045186856734, + "grad_norm": 0.6683815717697144, + "learning_rate": 5.724365426238796e-06, + "loss": 0.7497, + "step": 16542 + }, + { + "epoch": 0.9105069073696956, + "grad_norm": 0.7227790951728821, + "learning_rate": 5.723936529866442e-06, + "loss": 0.7673, + "step": 16543 + }, + { + "epoch": 0.9105619461720513, + "grad_norm": 0.7867640852928162, + "learning_rate": 5.723507628053175e-06, + "loss": 0.8351, + "step": 16544 + }, + { + "epoch": 0.9106169849744069, + "grad_norm": 0.6680448055267334, + "learning_rate": 5.7230787208022175e-06, + "loss": 0.7068, + "step": 16545 + }, + { + "epoch": 0.9106720237767626, + "grad_norm": 0.7167722582817078, + "learning_rate": 5.722649808116794e-06, + "loss": 0.771, + "step": 16546 + }, + { + "epoch": 0.9107270625791183, + "grad_norm": 0.6377316117286682, + "learning_rate": 5.72222089000013e-06, + "loss": 0.7535, + "step": 16547 + }, + { + "epoch": 0.910782101381474, + "grad_norm": 0.6644514799118042, + "learning_rate": 5.721791966455444e-06, + "loss": 0.6642, + "step": 16548 + }, + { + "epoch": 0.9108371401838296, + "grad_norm": 0.6652604341506958, + "learning_rate": 5.7213630374859644e-06, + "loss": 0.7192, + "step": 16549 + }, + { + "epoch": 0.9108921789861852, + "grad_norm": 0.7685999274253845, + "learning_rate": 5.720934103094913e-06, + "loss": 0.7497, + "step": 16550 + }, + { + "epoch": 0.9109472177885409, + "grad_norm": 0.6837451457977295, + "learning_rate": 5.7205051632855135e-06, + "loss": 0.8337, + "step": 16551 + }, + { + "epoch": 0.9110022565908966, + "grad_norm": 0.6328313946723938, + "learning_rate": 5.720076218060991e-06, + "loss": 0.7106, + "step": 16552 + }, + { + "epoch": 0.9110572953932522, + "grad_norm": 0.6779924631118774, + "learning_rate": 5.7196472674245675e-06, + "loss": 0.6518, + "step": 16553 + }, + { + "epoch": 0.9111123341956079, + "grad_norm": 0.7673786282539368, + "learning_rate": 5.719218311379468e-06, + "loss": 0.687, + "step": 16554 + }, + { + "epoch": 0.9111673729979636, + "grad_norm": 0.9567201137542725, + "learning_rate": 5.718789349928917e-06, + "loss": 0.8435, + "step": 16555 + }, + { + "epoch": 0.9112224118003193, + "grad_norm": 0.6433333158493042, + "learning_rate": 5.7183603830761365e-06, + "loss": 0.6911, + "step": 16556 + }, + { + "epoch": 0.9112774506026748, + "grad_norm": 0.7093275189399719, + "learning_rate": 5.717931410824352e-06, + "loss": 0.7928, + "step": 16557 + }, + { + "epoch": 0.9113324894050305, + "grad_norm": 0.6292399764060974, + "learning_rate": 5.717502433176788e-06, + "loss": 0.7971, + "step": 16558 + }, + { + "epoch": 0.9113875282073862, + "grad_norm": 0.7010529637336731, + "learning_rate": 5.717073450136668e-06, + "loss": 0.7363, + "step": 16559 + }, + { + "epoch": 0.9114425670097419, + "grad_norm": 0.6663999557495117, + "learning_rate": 5.716644461707215e-06, + "loss": 0.6988, + "step": 16560 + }, + { + "epoch": 0.9114976058120975, + "grad_norm": 0.6320468187332153, + "learning_rate": 5.716215467891654e-06, + "loss": 0.6654, + "step": 16561 + }, + { + "epoch": 0.9115526446144532, + "grad_norm": 0.6906135678291321, + "learning_rate": 5.715786468693208e-06, + "loss": 0.7749, + "step": 16562 + }, + { + "epoch": 0.9116076834168089, + "grad_norm": 0.7253930568695068, + "learning_rate": 5.715357464115104e-06, + "loss": 0.7325, + "step": 16563 + }, + { + "epoch": 0.9116627222191646, + "grad_norm": 0.6567997336387634, + "learning_rate": 5.7149284541605655e-06, + "loss": 0.7028, + "step": 16564 + }, + { + "epoch": 0.9117177610215201, + "grad_norm": 0.7034043669700623, + "learning_rate": 5.7144994388328155e-06, + "loss": 0.7328, + "step": 16565 + }, + { + "epoch": 0.9117727998238758, + "grad_norm": 0.6639497876167297, + "learning_rate": 5.714070418135078e-06, + "loss": 0.7289, + "step": 16566 + }, + { + "epoch": 0.9118278386262315, + "grad_norm": 0.6601856350898743, + "learning_rate": 5.713641392070579e-06, + "loss": 0.6857, + "step": 16567 + }, + { + "epoch": 0.9118828774285872, + "grad_norm": 1.0736916065216064, + "learning_rate": 5.713212360642543e-06, + "loss": 0.6254, + "step": 16568 + }, + { + "epoch": 0.9119379162309428, + "grad_norm": 0.6571956872940063, + "learning_rate": 5.712783323854193e-06, + "loss": 0.7931, + "step": 16569 + }, + { + "epoch": 0.9119929550332985, + "grad_norm": 0.7038640975952148, + "learning_rate": 5.712354281708754e-06, + "loss": 0.7065, + "step": 16570 + }, + { + "epoch": 0.9120479938356542, + "grad_norm": 0.6827693581581116, + "learning_rate": 5.711925234209452e-06, + "loss": 0.7971, + "step": 16571 + }, + { + "epoch": 0.9121030326380098, + "grad_norm": 0.6412268877029419, + "learning_rate": 5.71149618135951e-06, + "loss": 0.7899, + "step": 16572 + }, + { + "epoch": 0.9121580714403654, + "grad_norm": 0.753730833530426, + "learning_rate": 5.711067123162154e-06, + "loss": 0.6762, + "step": 16573 + }, + { + "epoch": 0.9122131102427211, + "grad_norm": 0.62330162525177, + "learning_rate": 5.710638059620606e-06, + "loss": 0.6661, + "step": 16574 + }, + { + "epoch": 0.9122681490450768, + "grad_norm": 0.7718592882156372, + "learning_rate": 5.710208990738093e-06, + "loss": 0.8146, + "step": 16575 + }, + { + "epoch": 0.9123231878474325, + "grad_norm": 0.7234812378883362, + "learning_rate": 5.709779916517842e-06, + "loss": 0.7633, + "step": 16576 + }, + { + "epoch": 0.9123782266497881, + "grad_norm": 0.7249013781547546, + "learning_rate": 5.709350836963072e-06, + "loss": 0.7339, + "step": 16577 + }, + { + "epoch": 0.9124332654521438, + "grad_norm": 0.643532395362854, + "learning_rate": 5.708921752077011e-06, + "loss": 0.6899, + "step": 16578 + }, + { + "epoch": 0.9124883042544994, + "grad_norm": 0.7240098118782043, + "learning_rate": 5.708492661862885e-06, + "loss": 0.6788, + "step": 16579 + }, + { + "epoch": 0.912543343056855, + "grad_norm": 0.6944229602813721, + "learning_rate": 5.708063566323918e-06, + "loss": 0.8284, + "step": 16580 + }, + { + "epoch": 0.9125983818592107, + "grad_norm": 0.7374451160430908, + "learning_rate": 5.707634465463334e-06, + "loss": 0.7414, + "step": 16581 + }, + { + "epoch": 0.9126534206615664, + "grad_norm": 0.6843305230140686, + "learning_rate": 5.70720535928436e-06, + "loss": 0.7765, + "step": 16582 + }, + { + "epoch": 0.9127084594639221, + "grad_norm": 0.6846521496772766, + "learning_rate": 5.70677624779022e-06, + "loss": 0.7526, + "step": 16583 + }, + { + "epoch": 0.9127634982662777, + "grad_norm": 0.782206654548645, + "learning_rate": 5.7063471309841375e-06, + "loss": 0.8384, + "step": 16584 + }, + { + "epoch": 0.9128185370686334, + "grad_norm": 0.590929388999939, + "learning_rate": 5.70591800886934e-06, + "loss": 0.5523, + "step": 16585 + }, + { + "epoch": 0.912873575870989, + "grad_norm": 0.7062276005744934, + "learning_rate": 5.705488881449051e-06, + "loss": 0.78, + "step": 16586 + }, + { + "epoch": 0.9129286146733447, + "grad_norm": 0.6782402992248535, + "learning_rate": 5.705059748726496e-06, + "loss": 0.7376, + "step": 16587 + }, + { + "epoch": 0.9129836534757003, + "grad_norm": 0.6320528984069824, + "learning_rate": 5.704630610704902e-06, + "loss": 0.6088, + "step": 16588 + }, + { + "epoch": 0.913038692278056, + "grad_norm": 0.62249356508255, + "learning_rate": 5.704201467387492e-06, + "loss": 0.648, + "step": 16589 + }, + { + "epoch": 0.9130937310804117, + "grad_norm": 0.6249415278434753, + "learning_rate": 5.703772318777492e-06, + "loss": 0.6843, + "step": 16590 + }, + { + "epoch": 0.9131487698827674, + "grad_norm": 0.7930389046669006, + "learning_rate": 5.703343164878128e-06, + "loss": 0.8037, + "step": 16591 + }, + { + "epoch": 0.913203808685123, + "grad_norm": 0.6056682467460632, + "learning_rate": 5.7029140056926256e-06, + "loss": 0.726, + "step": 16592 + }, + { + "epoch": 0.9132588474874787, + "grad_norm": 0.7311093807220459, + "learning_rate": 5.702484841224209e-06, + "loss": 0.757, + "step": 16593 + }, + { + "epoch": 0.9133138862898343, + "grad_norm": 0.6924651861190796, + "learning_rate": 5.702055671476105e-06, + "loss": 0.6812, + "step": 16594 + }, + { + "epoch": 0.91336892509219, + "grad_norm": 0.6961154937744141, + "learning_rate": 5.701626496451539e-06, + "loss": 0.7758, + "step": 16595 + }, + { + "epoch": 0.9134239638945456, + "grad_norm": 0.7138234972953796, + "learning_rate": 5.7011973161537345e-06, + "loss": 0.7691, + "step": 16596 + }, + { + "epoch": 0.9134790026969013, + "grad_norm": 0.6445374488830566, + "learning_rate": 5.70076813058592e-06, + "loss": 0.6223, + "step": 16597 + }, + { + "epoch": 0.913534041499257, + "grad_norm": 0.6478644013404846, + "learning_rate": 5.700338939751318e-06, + "loss": 0.6874, + "step": 16598 + }, + { + "epoch": 0.9135890803016127, + "grad_norm": 0.7351845502853394, + "learning_rate": 5.699909743653158e-06, + "loss": 0.751, + "step": 16599 + }, + { + "epoch": 0.9136441191039683, + "grad_norm": 0.675804078578949, + "learning_rate": 5.6994805422946634e-06, + "loss": 0.7637, + "step": 16600 + }, + { + "epoch": 0.9136991579063239, + "grad_norm": 0.7249841690063477, + "learning_rate": 5.69905133567906e-06, + "loss": 0.8253, + "step": 16601 + }, + { + "epoch": 0.9137541967086796, + "grad_norm": 0.7853312492370605, + "learning_rate": 5.698622123809573e-06, + "loss": 0.7469, + "step": 16602 + }, + { + "epoch": 0.9138092355110353, + "grad_norm": 0.719670295715332, + "learning_rate": 5.698192906689429e-06, + "loss": 0.7138, + "step": 16603 + }, + { + "epoch": 0.9138642743133909, + "grad_norm": 0.6555393934249878, + "learning_rate": 5.6977636843218555e-06, + "loss": 0.7134, + "step": 16604 + }, + { + "epoch": 0.9139193131157466, + "grad_norm": 0.60044926404953, + "learning_rate": 5.697334456710076e-06, + "loss": 0.6102, + "step": 16605 + }, + { + "epoch": 0.9139743519181023, + "grad_norm": 0.6154021620750427, + "learning_rate": 5.696905223857319e-06, + "loss": 0.7117, + "step": 16606 + }, + { + "epoch": 0.914029390720458, + "grad_norm": 0.7099533081054688, + "learning_rate": 5.696475985766807e-06, + "loss": 0.7042, + "step": 16607 + }, + { + "epoch": 0.9140844295228135, + "grad_norm": 0.6776680946350098, + "learning_rate": 5.696046742441769e-06, + "loss": 0.785, + "step": 16608 + }, + { + "epoch": 0.9141394683251692, + "grad_norm": 0.6876663565635681, + "learning_rate": 5.695617493885429e-06, + "loss": 0.7235, + "step": 16609 + }, + { + "epoch": 0.9141945071275249, + "grad_norm": 0.6305785775184631, + "learning_rate": 5.6951882401010136e-06, + "loss": 0.7578, + "step": 16610 + }, + { + "epoch": 0.9142495459298806, + "grad_norm": 0.5949695110321045, + "learning_rate": 5.694758981091752e-06, + "loss": 0.6303, + "step": 16611 + }, + { + "epoch": 0.9143045847322362, + "grad_norm": 0.6790008544921875, + "learning_rate": 5.6943297168608655e-06, + "loss": 0.7193, + "step": 16612 + }, + { + "epoch": 0.9143596235345919, + "grad_norm": 0.57261061668396, + "learning_rate": 5.693900447411585e-06, + "loss": 0.6036, + "step": 16613 + }, + { + "epoch": 0.9144146623369476, + "grad_norm": 0.7408223152160645, + "learning_rate": 5.693471172747131e-06, + "loss": 0.7957, + "step": 16614 + }, + { + "epoch": 0.9144697011393033, + "grad_norm": 0.6669179201126099, + "learning_rate": 5.693041892870736e-06, + "loss": 0.7269, + "step": 16615 + }, + { + "epoch": 0.9145247399416588, + "grad_norm": 0.9949795603752136, + "learning_rate": 5.692612607785622e-06, + "loss": 0.8756, + "step": 16616 + }, + { + "epoch": 0.9145797787440145, + "grad_norm": 0.7075115442276001, + "learning_rate": 5.6921833174950194e-06, + "loss": 0.7451, + "step": 16617 + }, + { + "epoch": 0.9146348175463702, + "grad_norm": 0.7605398297309875, + "learning_rate": 5.69175402200215e-06, + "loss": 0.8082, + "step": 16618 + }, + { + "epoch": 0.9146898563487259, + "grad_norm": 0.8290950655937195, + "learning_rate": 5.691324721310244e-06, + "loss": 0.8054, + "step": 16619 + }, + { + "epoch": 0.9147448951510815, + "grad_norm": 0.644469678401947, + "learning_rate": 5.690895415422526e-06, + "loss": 0.7695, + "step": 16620 + }, + { + "epoch": 0.9147999339534372, + "grad_norm": 0.6535455584526062, + "learning_rate": 5.690466104342224e-06, + "loss": 0.7285, + "step": 16621 + }, + { + "epoch": 0.9148549727557929, + "grad_norm": 0.6709266304969788, + "learning_rate": 5.690036788072562e-06, + "loss": 0.7393, + "step": 16622 + }, + { + "epoch": 0.9149100115581484, + "grad_norm": 0.6400435566902161, + "learning_rate": 5.689607466616767e-06, + "loss": 0.6981, + "step": 16623 + }, + { + "epoch": 0.9149650503605041, + "grad_norm": 0.6391820311546326, + "learning_rate": 5.68917813997807e-06, + "loss": 0.8183, + "step": 16624 + }, + { + "epoch": 0.9150200891628598, + "grad_norm": 0.6608666181564331, + "learning_rate": 5.688748808159693e-06, + "loss": 0.768, + "step": 16625 + }, + { + "epoch": 0.9150751279652155, + "grad_norm": 0.6189754605293274, + "learning_rate": 5.688319471164863e-06, + "loss": 0.6962, + "step": 16626 + }, + { + "epoch": 0.9151301667675711, + "grad_norm": 0.6954981684684753, + "learning_rate": 5.68789012899681e-06, + "loss": 0.7455, + "step": 16627 + }, + { + "epoch": 0.9151852055699268, + "grad_norm": 0.7653296589851379, + "learning_rate": 5.687460781658759e-06, + "loss": 0.7123, + "step": 16628 + }, + { + "epoch": 0.9152402443722825, + "grad_norm": 0.6047289371490479, + "learning_rate": 5.687031429153937e-06, + "loss": 0.6735, + "step": 16629 + }, + { + "epoch": 0.9152952831746382, + "grad_norm": 0.6627909541130066, + "learning_rate": 5.68660207148557e-06, + "loss": 0.6597, + "step": 16630 + }, + { + "epoch": 0.9153503219769937, + "grad_norm": 0.8544076085090637, + "learning_rate": 5.6861727086568855e-06, + "loss": 0.8044, + "step": 16631 + }, + { + "epoch": 0.9154053607793494, + "grad_norm": 0.7739614248275757, + "learning_rate": 5.685743340671111e-06, + "loss": 0.7395, + "step": 16632 + }, + { + "epoch": 0.9154603995817051, + "grad_norm": 0.7752935290336609, + "learning_rate": 5.6853139675314725e-06, + "loss": 0.8354, + "step": 16633 + }, + { + "epoch": 0.9155154383840608, + "grad_norm": 0.6166974306106567, + "learning_rate": 5.684884589241197e-06, + "loss": 0.7338, + "step": 16634 + }, + { + "epoch": 0.9155704771864164, + "grad_norm": 0.606935977935791, + "learning_rate": 5.684455205803513e-06, + "loss": 0.7381, + "step": 16635 + }, + { + "epoch": 0.9156255159887721, + "grad_norm": 0.9468479752540588, + "learning_rate": 5.684025817221647e-06, + "loss": 0.7959, + "step": 16636 + }, + { + "epoch": 0.9156805547911278, + "grad_norm": 0.6966477036476135, + "learning_rate": 5.683596423498827e-06, + "loss": 0.7251, + "step": 16637 + }, + { + "epoch": 0.9157355935934834, + "grad_norm": 0.630775511264801, + "learning_rate": 5.683167024638277e-06, + "loss": 0.7181, + "step": 16638 + }, + { + "epoch": 0.915790632395839, + "grad_norm": 0.6859539151191711, + "learning_rate": 5.682737620643229e-06, + "loss": 0.8418, + "step": 16639 + }, + { + "epoch": 0.9158456711981947, + "grad_norm": 0.782072901725769, + "learning_rate": 5.682308211516907e-06, + "loss": 0.687, + "step": 16640 + }, + { + "epoch": 0.9159007100005504, + "grad_norm": 0.6942490339279175, + "learning_rate": 5.6818787972625385e-06, + "loss": 0.771, + "step": 16641 + }, + { + "epoch": 0.9159557488029061, + "grad_norm": 0.7138404846191406, + "learning_rate": 5.681449377883352e-06, + "loss": 0.8126, + "step": 16642 + }, + { + "epoch": 0.9160107876052617, + "grad_norm": 0.7067154049873352, + "learning_rate": 5.681019953382575e-06, + "loss": 0.7199, + "step": 16643 + }, + { + "epoch": 0.9160658264076174, + "grad_norm": 0.7254672646522522, + "learning_rate": 5.680590523763433e-06, + "loss": 0.7619, + "step": 16644 + }, + { + "epoch": 0.916120865209973, + "grad_norm": 0.6963558197021484, + "learning_rate": 5.680161089029156e-06, + "loss": 0.8141, + "step": 16645 + }, + { + "epoch": 0.9161759040123287, + "grad_norm": 0.7634423971176147, + "learning_rate": 5.679731649182969e-06, + "loss": 0.6432, + "step": 16646 + }, + { + "epoch": 0.9162309428146843, + "grad_norm": 0.6261885166168213, + "learning_rate": 5.679302204228101e-06, + "loss": 0.6637, + "step": 16647 + }, + { + "epoch": 0.91628598161704, + "grad_norm": 0.666869580745697, + "learning_rate": 5.67887275416778e-06, + "loss": 0.7428, + "step": 16648 + }, + { + "epoch": 0.9163410204193957, + "grad_norm": 0.698590099811554, + "learning_rate": 5.678443299005234e-06, + "loss": 0.7404, + "step": 16649 + }, + { + "epoch": 0.9163960592217514, + "grad_norm": 0.7000448703765869, + "learning_rate": 5.678013838743687e-06, + "loss": 0.8314, + "step": 16650 + }, + { + "epoch": 0.916451098024107, + "grad_norm": 0.7588967680931091, + "learning_rate": 5.677584373386372e-06, + "loss": 0.795, + "step": 16651 + }, + { + "epoch": 0.9165061368264626, + "grad_norm": 1.0171421766281128, + "learning_rate": 5.677154902936515e-06, + "loss": 0.8069, + "step": 16652 + }, + { + "epoch": 0.9165611756288183, + "grad_norm": 0.699393093585968, + "learning_rate": 5.676725427397342e-06, + "loss": 0.8354, + "step": 16653 + }, + { + "epoch": 0.916616214431174, + "grad_norm": 0.7349410653114319, + "learning_rate": 5.676295946772081e-06, + "loss": 0.8218, + "step": 16654 + }, + { + "epoch": 0.9166712532335296, + "grad_norm": 0.6317818760871887, + "learning_rate": 5.6758664610639615e-06, + "loss": 0.6601, + "step": 16655 + }, + { + "epoch": 0.9167262920358853, + "grad_norm": 0.6990076899528503, + "learning_rate": 5.67543697027621e-06, + "loss": 0.7225, + "step": 16656 + }, + { + "epoch": 0.916781330838241, + "grad_norm": 0.6295570135116577, + "learning_rate": 5.675007474412055e-06, + "loss": 0.6699, + "step": 16657 + }, + { + "epoch": 0.9168363696405967, + "grad_norm": 0.6749002933502197, + "learning_rate": 5.674577973474724e-06, + "loss": 0.7686, + "step": 16658 + }, + { + "epoch": 0.9168914084429522, + "grad_norm": 0.7126505374908447, + "learning_rate": 5.674148467467446e-06, + "loss": 0.7407, + "step": 16659 + }, + { + "epoch": 0.9169464472453079, + "grad_norm": 0.7344494462013245, + "learning_rate": 5.67371895639345e-06, + "loss": 0.8363, + "step": 16660 + }, + { + "epoch": 0.9170014860476636, + "grad_norm": 1.137642741203308, + "learning_rate": 5.673289440255962e-06, + "loss": 0.7756, + "step": 16661 + }, + { + "epoch": 0.9170565248500193, + "grad_norm": 0.7077656388282776, + "learning_rate": 5.672859919058209e-06, + "loss": 0.7276, + "step": 16662 + }, + { + "epoch": 0.9171115636523749, + "grad_norm": 0.6685035824775696, + "learning_rate": 5.672430392803423e-06, + "loss": 0.6223, + "step": 16663 + }, + { + "epoch": 0.9171666024547306, + "grad_norm": 0.7401498556137085, + "learning_rate": 5.6720008614948296e-06, + "loss": 0.7722, + "step": 16664 + }, + { + "epoch": 0.9172216412570863, + "grad_norm": 0.7386918067932129, + "learning_rate": 5.6715713251356575e-06, + "loss": 0.7338, + "step": 16665 + }, + { + "epoch": 0.9172766800594419, + "grad_norm": 0.6631183624267578, + "learning_rate": 5.671141783729135e-06, + "loss": 0.7338, + "step": 16666 + }, + { + "epoch": 0.9173317188617975, + "grad_norm": 0.7793235778808594, + "learning_rate": 5.670712237278491e-06, + "loss": 0.7597, + "step": 16667 + }, + { + "epoch": 0.9173867576641532, + "grad_norm": 0.6626643538475037, + "learning_rate": 5.670282685786953e-06, + "loss": 0.6953, + "step": 16668 + }, + { + "epoch": 0.9174417964665089, + "grad_norm": 0.6923056840896606, + "learning_rate": 5.66985312925775e-06, + "loss": 0.6598, + "step": 16669 + }, + { + "epoch": 0.9174968352688645, + "grad_norm": 0.7240145802497864, + "learning_rate": 5.669423567694109e-06, + "loss": 0.7927, + "step": 16670 + }, + { + "epoch": 0.9175518740712202, + "grad_norm": 0.7715887427330017, + "learning_rate": 5.66899400109926e-06, + "loss": 0.7028, + "step": 16671 + }, + { + "epoch": 0.9176069128735759, + "grad_norm": 0.636465311050415, + "learning_rate": 5.668564429476433e-06, + "loss": 0.6956, + "step": 16672 + }, + { + "epoch": 0.9176619516759316, + "grad_norm": 0.7405944466590881, + "learning_rate": 5.668134852828853e-06, + "loss": 0.8548, + "step": 16673 + }, + { + "epoch": 0.9177169904782871, + "grad_norm": 0.7019765973091125, + "learning_rate": 5.6677052711597505e-06, + "loss": 0.7633, + "step": 16674 + }, + { + "epoch": 0.9177720292806428, + "grad_norm": 0.6769872903823853, + "learning_rate": 5.667275684472353e-06, + "loss": 0.7114, + "step": 16675 + }, + { + "epoch": 0.9178270680829985, + "grad_norm": 0.6730767488479614, + "learning_rate": 5.666846092769892e-06, + "loss": 0.6733, + "step": 16676 + }, + { + "epoch": 0.9178821068853542, + "grad_norm": 0.7447411417961121, + "learning_rate": 5.6664164960555935e-06, + "loss": 0.742, + "step": 16677 + }, + { + "epoch": 0.9179371456877098, + "grad_norm": 0.6733657717704773, + "learning_rate": 5.665986894332687e-06, + "loss": 0.7784, + "step": 16678 + }, + { + "epoch": 0.9179921844900655, + "grad_norm": 0.6672250032424927, + "learning_rate": 5.665557287604402e-06, + "loss": 0.7409, + "step": 16679 + }, + { + "epoch": 0.9180472232924212, + "grad_norm": 0.8029616475105286, + "learning_rate": 5.665127675873966e-06, + "loss": 0.8032, + "step": 16680 + }, + { + "epoch": 0.9181022620947769, + "grad_norm": 0.739986777305603, + "learning_rate": 5.664698059144609e-06, + "loss": 0.7831, + "step": 16681 + }, + { + "epoch": 0.9181573008971324, + "grad_norm": 0.6360197067260742, + "learning_rate": 5.6642684374195565e-06, + "loss": 0.739, + "step": 16682 + }, + { + "epoch": 0.9182123396994881, + "grad_norm": 0.6870883703231812, + "learning_rate": 5.663838810702043e-06, + "loss": 0.8035, + "step": 16683 + }, + { + "epoch": 0.9182673785018438, + "grad_norm": 0.6486376523971558, + "learning_rate": 5.663409178995294e-06, + "loss": 0.7534, + "step": 16684 + }, + { + "epoch": 0.9183224173041995, + "grad_norm": 0.6808338761329651, + "learning_rate": 5.662979542302539e-06, + "loss": 0.7155, + "step": 16685 + }, + { + "epoch": 0.9183774561065551, + "grad_norm": 0.7058351039886475, + "learning_rate": 5.6625499006270066e-06, + "loss": 0.7006, + "step": 16686 + }, + { + "epoch": 0.9184324949089108, + "grad_norm": 0.8551416397094727, + "learning_rate": 5.662120253971928e-06, + "loss": 0.6619, + "step": 16687 + }, + { + "epoch": 0.9184875337112665, + "grad_norm": 0.7475470304489136, + "learning_rate": 5.66169060234053e-06, + "loss": 0.8396, + "step": 16688 + }, + { + "epoch": 0.9185425725136221, + "grad_norm": 0.8063628673553467, + "learning_rate": 5.661260945736043e-06, + "loss": 0.7052, + "step": 16689 + }, + { + "epoch": 0.9185976113159777, + "grad_norm": 0.725739598274231, + "learning_rate": 5.660831284161695e-06, + "loss": 0.7793, + "step": 16690 + }, + { + "epoch": 0.9186526501183334, + "grad_norm": 0.6293089389801025, + "learning_rate": 5.660401617620718e-06, + "loss": 0.7364, + "step": 16691 + }, + { + "epoch": 0.9187076889206891, + "grad_norm": 0.7716445326805115, + "learning_rate": 5.659971946116337e-06, + "loss": 0.7512, + "step": 16692 + }, + { + "epoch": 0.9187627277230448, + "grad_norm": 0.671196460723877, + "learning_rate": 5.659542269651784e-06, + "loss": 0.724, + "step": 16693 + }, + { + "epoch": 0.9188177665254004, + "grad_norm": 0.679114043712616, + "learning_rate": 5.659112588230288e-06, + "loss": 0.8198, + "step": 16694 + }, + { + "epoch": 0.9188728053277561, + "grad_norm": 1.0804789066314697, + "learning_rate": 5.658682901855078e-06, + "loss": 0.7444, + "step": 16695 + }, + { + "epoch": 0.9189278441301117, + "grad_norm": 0.6216247081756592, + "learning_rate": 5.658253210529384e-06, + "loss": 0.7313, + "step": 16696 + }, + { + "epoch": 0.9189828829324674, + "grad_norm": 0.9815022349357605, + "learning_rate": 5.657823514256436e-06, + "loss": 0.7936, + "step": 16697 + }, + { + "epoch": 0.919037921734823, + "grad_norm": 0.6335939168930054, + "learning_rate": 5.6573938130394604e-06, + "loss": 0.6899, + "step": 16698 + }, + { + "epoch": 0.9190929605371787, + "grad_norm": 0.7065087556838989, + "learning_rate": 5.6569641068816895e-06, + "loss": 0.6891, + "step": 16699 + }, + { + "epoch": 0.9191479993395344, + "grad_norm": 0.6492599844932556, + "learning_rate": 5.656534395786353e-06, + "loss": 0.7571, + "step": 16700 + }, + { + "epoch": 0.9192030381418901, + "grad_norm": 0.9063841700553894, + "learning_rate": 5.656104679756679e-06, + "loss": 0.7297, + "step": 16701 + }, + { + "epoch": 0.9192580769442457, + "grad_norm": 0.7018927335739136, + "learning_rate": 5.655674958795898e-06, + "loss": 0.8285, + "step": 16702 + }, + { + "epoch": 0.9193131157466014, + "grad_norm": 0.7087565660476685, + "learning_rate": 5.65524523290724e-06, + "loss": 0.8399, + "step": 16703 + }, + { + "epoch": 0.919368154548957, + "grad_norm": 0.7383968830108643, + "learning_rate": 5.6548155020939344e-06, + "loss": 0.6995, + "step": 16704 + }, + { + "epoch": 0.9194231933513127, + "grad_norm": 0.7523428797721863, + "learning_rate": 5.6543857663592104e-06, + "loss": 0.7332, + "step": 16705 + }, + { + "epoch": 0.9194782321536683, + "grad_norm": 0.761272668838501, + "learning_rate": 5.653956025706299e-06, + "loss": 0.8085, + "step": 16706 + }, + { + "epoch": 0.919533270956024, + "grad_norm": 0.8626916408538818, + "learning_rate": 5.653526280138427e-06, + "loss": 0.8026, + "step": 16707 + }, + { + "epoch": 0.9195883097583797, + "grad_norm": 0.7932292222976685, + "learning_rate": 5.653096529658828e-06, + "loss": 0.7962, + "step": 16708 + }, + { + "epoch": 0.9196433485607353, + "grad_norm": 0.6478299498558044, + "learning_rate": 5.65266677427073e-06, + "loss": 0.7286, + "step": 16709 + }, + { + "epoch": 0.919698387363091, + "grad_norm": 0.5808230042457581, + "learning_rate": 5.652237013977363e-06, + "loss": 0.6787, + "step": 16710 + }, + { + "epoch": 0.9197534261654466, + "grad_norm": 0.6502211689949036, + "learning_rate": 5.651807248781956e-06, + "loss": 0.6574, + "step": 16711 + }, + { + "epoch": 0.9198084649678023, + "grad_norm": 0.7240869998931885, + "learning_rate": 5.651377478687743e-06, + "loss": 0.8532, + "step": 16712 + }, + { + "epoch": 0.9198635037701579, + "grad_norm": 0.8461616635322571, + "learning_rate": 5.6509477036979495e-06, + "loss": 0.8315, + "step": 16713 + }, + { + "epoch": 0.9199185425725136, + "grad_norm": 0.6744678616523743, + "learning_rate": 5.650517923815808e-06, + "loss": 0.7464, + "step": 16714 + }, + { + "epoch": 0.9199735813748693, + "grad_norm": 0.6698602437973022, + "learning_rate": 5.650088139044547e-06, + "loss": 0.7194, + "step": 16715 + }, + { + "epoch": 0.920028620177225, + "grad_norm": 0.7550846934318542, + "learning_rate": 5.649658349387399e-06, + "loss": 0.754, + "step": 16716 + }, + { + "epoch": 0.9200836589795806, + "grad_norm": 0.7450975775718689, + "learning_rate": 5.649228554847592e-06, + "loss": 0.7989, + "step": 16717 + }, + { + "epoch": 0.9201386977819362, + "grad_norm": 0.6790910363197327, + "learning_rate": 5.648798755428357e-06, + "loss": 0.7259, + "step": 16718 + }, + { + "epoch": 0.9201937365842919, + "grad_norm": 0.6760334372520447, + "learning_rate": 5.648368951132923e-06, + "loss": 0.7317, + "step": 16719 + }, + { + "epoch": 0.9202487753866476, + "grad_norm": 0.6798209547996521, + "learning_rate": 5.6479391419645225e-06, + "loss": 0.7226, + "step": 16720 + }, + { + "epoch": 0.9203038141890032, + "grad_norm": 0.6167044639587402, + "learning_rate": 5.647509327926386e-06, + "loss": 0.7325, + "step": 16721 + }, + { + "epoch": 0.9203588529913589, + "grad_norm": 0.7588209509849548, + "learning_rate": 5.6470795090217414e-06, + "loss": 0.8487, + "step": 16722 + }, + { + "epoch": 0.9204138917937146, + "grad_norm": 0.6457192301750183, + "learning_rate": 5.64664968525382e-06, + "loss": 0.7436, + "step": 16723 + }, + { + "epoch": 0.9204689305960703, + "grad_norm": 0.7606278657913208, + "learning_rate": 5.6462198566258534e-06, + "loss": 0.7431, + "step": 16724 + }, + { + "epoch": 0.9205239693984258, + "grad_norm": 0.7064768075942993, + "learning_rate": 5.645790023141071e-06, + "loss": 0.7231, + "step": 16725 + }, + { + "epoch": 0.9205790082007815, + "grad_norm": 0.6990096569061279, + "learning_rate": 5.645360184802704e-06, + "loss": 0.7883, + "step": 16726 + }, + { + "epoch": 0.9206340470031372, + "grad_norm": 0.6305971741676331, + "learning_rate": 5.6449303416139835e-06, + "loss": 0.7124, + "step": 16727 + }, + { + "epoch": 0.9206890858054929, + "grad_norm": 0.6472275853157043, + "learning_rate": 5.644500493578138e-06, + "loss": 0.6776, + "step": 16728 + }, + { + "epoch": 0.9207441246078485, + "grad_norm": 0.6112511157989502, + "learning_rate": 5.644070640698399e-06, + "loss": 0.6532, + "step": 16729 + }, + { + "epoch": 0.9207991634102042, + "grad_norm": 0.6955081224441528, + "learning_rate": 5.643640782977998e-06, + "loss": 0.6718, + "step": 16730 + }, + { + "epoch": 0.9208542022125599, + "grad_norm": 0.6596464514732361, + "learning_rate": 5.643210920420165e-06, + "loss": 0.7317, + "step": 16731 + }, + { + "epoch": 0.9209092410149156, + "grad_norm": 0.8585140705108643, + "learning_rate": 5.642781053028131e-06, + "loss": 0.747, + "step": 16732 + }, + { + "epoch": 0.9209642798172711, + "grad_norm": 0.6630815863609314, + "learning_rate": 5.642351180805127e-06, + "loss": 0.7175, + "step": 16733 + }, + { + "epoch": 0.9210193186196268, + "grad_norm": 0.6956483721733093, + "learning_rate": 5.641921303754383e-06, + "loss": 0.7509, + "step": 16734 + }, + { + "epoch": 0.9210743574219825, + "grad_norm": 0.7407554388046265, + "learning_rate": 5.641491421879131e-06, + "loss": 0.7198, + "step": 16735 + }, + { + "epoch": 0.9211293962243382, + "grad_norm": 0.760361909866333, + "learning_rate": 5.6410615351826e-06, + "loss": 0.8267, + "step": 16736 + }, + { + "epoch": 0.9211844350266938, + "grad_norm": 0.6338782906532288, + "learning_rate": 5.640631643668023e-06, + "loss": 0.7637, + "step": 16737 + }, + { + "epoch": 0.9212394738290495, + "grad_norm": 0.6906964182853699, + "learning_rate": 5.64020174733863e-06, + "loss": 0.7809, + "step": 16738 + }, + { + "epoch": 0.9212945126314052, + "grad_norm": 0.7165959477424622, + "learning_rate": 5.639771846197652e-06, + "loss": 0.7886, + "step": 16739 + }, + { + "epoch": 0.9213495514337608, + "grad_norm": 0.8010871410369873, + "learning_rate": 5.639341940248319e-06, + "loss": 0.9206, + "step": 16740 + }, + { + "epoch": 0.9214045902361164, + "grad_norm": 0.715826153755188, + "learning_rate": 5.638912029493865e-06, + "loss": 0.7848, + "step": 16741 + }, + { + "epoch": 0.9214596290384721, + "grad_norm": 0.7436164617538452, + "learning_rate": 5.638482113937519e-06, + "loss": 0.7267, + "step": 16742 + }, + { + "epoch": 0.9215146678408278, + "grad_norm": 0.6157627105712891, + "learning_rate": 5.6380521935825105e-06, + "loss": 0.6634, + "step": 16743 + }, + { + "epoch": 0.9215697066431835, + "grad_norm": 0.8817352652549744, + "learning_rate": 5.637622268432074e-06, + "loss": 0.8532, + "step": 16744 + }, + { + "epoch": 0.9216247454455391, + "grad_norm": 0.8750252723693848, + "learning_rate": 5.637192338489439e-06, + "loss": 0.732, + "step": 16745 + }, + { + "epoch": 0.9216797842478948, + "grad_norm": 0.787200391292572, + "learning_rate": 5.636762403757836e-06, + "loss": 0.7485, + "step": 16746 + }, + { + "epoch": 0.9217348230502505, + "grad_norm": 0.7141642570495605, + "learning_rate": 5.636332464240498e-06, + "loss": 0.6631, + "step": 16747 + }, + { + "epoch": 0.9217898618526061, + "grad_norm": 0.636040449142456, + "learning_rate": 5.6359025199406555e-06, + "loss": 0.7597, + "step": 16748 + }, + { + "epoch": 0.9218449006549617, + "grad_norm": 0.6707825660705566, + "learning_rate": 5.63547257086154e-06, + "loss": 0.7931, + "step": 16749 + }, + { + "epoch": 0.9218999394573174, + "grad_norm": 0.7076492309570312, + "learning_rate": 5.635042617006383e-06, + "loss": 0.7502, + "step": 16750 + }, + { + "epoch": 0.9219549782596731, + "grad_norm": 0.7007712721824646, + "learning_rate": 5.6346126583784156e-06, + "loss": 0.7147, + "step": 16751 + }, + { + "epoch": 0.9220100170620287, + "grad_norm": 0.6958491802215576, + "learning_rate": 5.634182694980869e-06, + "loss": 0.7522, + "step": 16752 + }, + { + "epoch": 0.9220650558643844, + "grad_norm": 0.7442490458488464, + "learning_rate": 5.633752726816976e-06, + "loss": 0.8033, + "step": 16753 + }, + { + "epoch": 0.92212009466674, + "grad_norm": 0.6746878623962402, + "learning_rate": 5.633322753889966e-06, + "loss": 0.6874, + "step": 16754 + }, + { + "epoch": 0.9221751334690957, + "grad_norm": 0.5542443990707397, + "learning_rate": 5.63289277620307e-06, + "loss": 0.5964, + "step": 16755 + }, + { + "epoch": 0.9222301722714513, + "grad_norm": 0.7200949192047119, + "learning_rate": 5.632462793759524e-06, + "loss": 0.7213, + "step": 16756 + }, + { + "epoch": 0.922285211073807, + "grad_norm": 0.6765961050987244, + "learning_rate": 5.632032806562557e-06, + "loss": 0.7509, + "step": 16757 + }, + { + "epoch": 0.9223402498761627, + "grad_norm": 0.6492508053779602, + "learning_rate": 5.6316028146154e-06, + "loss": 0.7374, + "step": 16758 + }, + { + "epoch": 0.9223952886785184, + "grad_norm": 0.6642841100692749, + "learning_rate": 5.631172817921284e-06, + "loss": 0.701, + "step": 16759 + }, + { + "epoch": 0.922450327480874, + "grad_norm": 0.6529877781867981, + "learning_rate": 5.630742816483443e-06, + "loss": 0.7891, + "step": 16760 + }, + { + "epoch": 0.9225053662832297, + "grad_norm": 0.7389785051345825, + "learning_rate": 5.630312810305109e-06, + "loss": 0.7081, + "step": 16761 + }, + { + "epoch": 0.9225604050855853, + "grad_norm": 0.6740532517433167, + "learning_rate": 5.629882799389511e-06, + "loss": 0.599, + "step": 16762 + }, + { + "epoch": 0.922615443887941, + "grad_norm": 0.6529923677444458, + "learning_rate": 5.629452783739884e-06, + "loss": 0.7476, + "step": 16763 + }, + { + "epoch": 0.9226704826902966, + "grad_norm": 0.7116754651069641, + "learning_rate": 5.629022763359457e-06, + "loss": 0.7355, + "step": 16764 + }, + { + "epoch": 0.9227255214926523, + "grad_norm": 0.6048651337623596, + "learning_rate": 5.628592738251465e-06, + "loss": 0.6945, + "step": 16765 + }, + { + "epoch": 0.922780560295008, + "grad_norm": 0.5997273921966553, + "learning_rate": 5.628162708419137e-06, + "loss": 0.7473, + "step": 16766 + }, + { + "epoch": 0.9228355990973637, + "grad_norm": 0.6367392539978027, + "learning_rate": 5.627732673865705e-06, + "loss": 0.7156, + "step": 16767 + }, + { + "epoch": 0.9228906378997193, + "grad_norm": 0.6315346360206604, + "learning_rate": 5.627302634594404e-06, + "loss": 0.5789, + "step": 16768 + }, + { + "epoch": 0.922945676702075, + "grad_norm": 0.6221422553062439, + "learning_rate": 5.626872590608465e-06, + "loss": 0.742, + "step": 16769 + }, + { + "epoch": 0.9230007155044306, + "grad_norm": 0.6551609039306641, + "learning_rate": 5.626442541911117e-06, + "loss": 0.7986, + "step": 16770 + }, + { + "epoch": 0.9230557543067863, + "grad_norm": 0.6395416259765625, + "learning_rate": 5.626012488505595e-06, + "loss": 0.719, + "step": 16771 + }, + { + "epoch": 0.9231107931091419, + "grad_norm": 0.784221351146698, + "learning_rate": 5.625582430395132e-06, + "loss": 0.8514, + "step": 16772 + }, + { + "epoch": 0.9231658319114976, + "grad_norm": 0.6133732199668884, + "learning_rate": 5.625152367582959e-06, + "loss": 0.6405, + "step": 16773 + }, + { + "epoch": 0.9232208707138533, + "grad_norm": 0.6891661286354065, + "learning_rate": 5.6247223000723075e-06, + "loss": 0.6904, + "step": 16774 + }, + { + "epoch": 0.923275909516209, + "grad_norm": 0.664445161819458, + "learning_rate": 5.62429222786641e-06, + "loss": 0.8226, + "step": 16775 + }, + { + "epoch": 0.9233309483185645, + "grad_norm": 0.8657387495040894, + "learning_rate": 5.6238621509685e-06, + "loss": 0.7789, + "step": 16776 + }, + { + "epoch": 0.9233859871209202, + "grad_norm": 0.6581356525421143, + "learning_rate": 5.623432069381809e-06, + "loss": 0.6324, + "step": 16777 + }, + { + "epoch": 0.9234410259232759, + "grad_norm": 0.6915359497070312, + "learning_rate": 5.623001983109569e-06, + "loss": 0.7759, + "step": 16778 + }, + { + "epoch": 0.9234960647256316, + "grad_norm": 0.6104419827461243, + "learning_rate": 5.622571892155013e-06, + "loss": 0.6507, + "step": 16779 + }, + { + "epoch": 0.9235511035279872, + "grad_norm": 0.7881014943122864, + "learning_rate": 5.622141796521373e-06, + "loss": 0.6996, + "step": 16780 + }, + { + "epoch": 0.9236061423303429, + "grad_norm": 0.6843274831771851, + "learning_rate": 5.621711696211882e-06, + "loss": 0.757, + "step": 16781 + }, + { + "epoch": 0.9236611811326986, + "grad_norm": 0.6329718232154846, + "learning_rate": 5.6212815912297735e-06, + "loss": 0.7618, + "step": 16782 + }, + { + "epoch": 0.9237162199350543, + "grad_norm": 0.7012547254562378, + "learning_rate": 5.6208514815782765e-06, + "loss": 0.8123, + "step": 16783 + }, + { + "epoch": 0.9237712587374098, + "grad_norm": 0.5882138013839722, + "learning_rate": 5.6204213672606275e-06, + "loss": 0.5835, + "step": 16784 + }, + { + "epoch": 0.9238262975397655, + "grad_norm": 0.7064856886863708, + "learning_rate": 5.619991248280058e-06, + "loss": 0.7661, + "step": 16785 + }, + { + "epoch": 0.9238813363421212, + "grad_norm": 0.6935905814170837, + "learning_rate": 5.619561124639801e-06, + "loss": 0.7613, + "step": 16786 + }, + { + "epoch": 0.9239363751444769, + "grad_norm": 0.6331823468208313, + "learning_rate": 5.619130996343086e-06, + "loss": 0.7639, + "step": 16787 + }, + { + "epoch": 0.9239914139468325, + "grad_norm": 0.9952171444892883, + "learning_rate": 5.61870086339315e-06, + "loss": 0.7417, + "step": 16788 + }, + { + "epoch": 0.9240464527491882, + "grad_norm": 0.7930924892425537, + "learning_rate": 5.618270725793223e-06, + "loss": 0.8565, + "step": 16789 + }, + { + "epoch": 0.9241014915515439, + "grad_norm": 0.6192849278450012, + "learning_rate": 5.617840583546539e-06, + "loss": 0.6792, + "step": 16790 + }, + { + "epoch": 0.9241565303538996, + "grad_norm": 0.7150148749351501, + "learning_rate": 5.61741043665633e-06, + "loss": 0.8016, + "step": 16791 + }, + { + "epoch": 0.9242115691562551, + "grad_norm": 0.7186499238014221, + "learning_rate": 5.616980285125829e-06, + "loss": 0.8054, + "step": 16792 + }, + { + "epoch": 0.9242666079586108, + "grad_norm": 0.765373706817627, + "learning_rate": 5.616550128958271e-06, + "loss": 0.7506, + "step": 16793 + }, + { + "epoch": 0.9243216467609665, + "grad_norm": 0.7462981939315796, + "learning_rate": 5.6161199681568865e-06, + "loss": 0.7942, + "step": 16794 + }, + { + "epoch": 0.9243766855633221, + "grad_norm": 0.8685716390609741, + "learning_rate": 5.615689802724908e-06, + "loss": 0.7345, + "step": 16795 + }, + { + "epoch": 0.9244317243656778, + "grad_norm": 0.7249016165733337, + "learning_rate": 5.615259632665572e-06, + "loss": 0.7717, + "step": 16796 + }, + { + "epoch": 0.9244867631680335, + "grad_norm": 0.6986450552940369, + "learning_rate": 5.614829457982108e-06, + "loss": 0.7059, + "step": 16797 + }, + { + "epoch": 0.9245418019703892, + "grad_norm": 0.6127453446388245, + "learning_rate": 5.614399278677753e-06, + "loss": 0.679, + "step": 16798 + }, + { + "epoch": 0.9245968407727447, + "grad_norm": 0.8027729392051697, + "learning_rate": 5.613969094755734e-06, + "loss": 0.8873, + "step": 16799 + }, + { + "epoch": 0.9246518795751004, + "grad_norm": 0.7740058898925781, + "learning_rate": 5.613538906219288e-06, + "loss": 0.7232, + "step": 16800 + }, + { + "epoch": 0.9247069183774561, + "grad_norm": 0.6654518246650696, + "learning_rate": 5.613108713071649e-06, + "loss": 0.6444, + "step": 16801 + }, + { + "epoch": 0.9247619571798118, + "grad_norm": 0.8171139359474182, + "learning_rate": 5.612678515316049e-06, + "loss": 0.7811, + "step": 16802 + }, + { + "epoch": 0.9248169959821674, + "grad_norm": 0.6470557451248169, + "learning_rate": 5.612248312955719e-06, + "loss": 0.724, + "step": 16803 + }, + { + "epoch": 0.9248720347845231, + "grad_norm": 0.6510154604911804, + "learning_rate": 5.611818105993897e-06, + "loss": 0.7413, + "step": 16804 + }, + { + "epoch": 0.9249270735868788, + "grad_norm": 0.6152756810188293, + "learning_rate": 5.611387894433814e-06, + "loss": 0.6736, + "step": 16805 + }, + { + "epoch": 0.9249821123892344, + "grad_norm": 0.7700890898704529, + "learning_rate": 5.610957678278702e-06, + "loss": 0.7899, + "step": 16806 + }, + { + "epoch": 0.92503715119159, + "grad_norm": 0.6250771284103394, + "learning_rate": 5.610527457531796e-06, + "loss": 0.6656, + "step": 16807 + }, + { + "epoch": 0.9250921899939457, + "grad_norm": 0.7004884481430054, + "learning_rate": 5.6100972321963295e-06, + "loss": 0.7679, + "step": 16808 + }, + { + "epoch": 0.9251472287963014, + "grad_norm": 0.7237122058868408, + "learning_rate": 5.609667002275535e-06, + "loss": 0.7014, + "step": 16809 + }, + { + "epoch": 0.9252022675986571, + "grad_norm": 0.7693857550621033, + "learning_rate": 5.609236767772649e-06, + "loss": 0.6886, + "step": 16810 + }, + { + "epoch": 0.9252573064010127, + "grad_norm": 0.738211989402771, + "learning_rate": 5.608806528690899e-06, + "loss": 0.759, + "step": 16811 + }, + { + "epoch": 0.9253123452033684, + "grad_norm": 0.6153057813644409, + "learning_rate": 5.608376285033524e-06, + "loss": 0.6771, + "step": 16812 + }, + { + "epoch": 0.925367384005724, + "grad_norm": 0.6428161263465881, + "learning_rate": 5.607946036803755e-06, + "loss": 0.5967, + "step": 16813 + }, + { + "epoch": 0.9254224228080797, + "grad_norm": 0.659237802028656, + "learning_rate": 5.607515784004827e-06, + "loss": 0.7997, + "step": 16814 + }, + { + "epoch": 0.9254774616104353, + "grad_norm": 0.7318007946014404, + "learning_rate": 5.60708552663997e-06, + "loss": 0.6912, + "step": 16815 + }, + { + "epoch": 0.925532500412791, + "grad_norm": 0.65810227394104, + "learning_rate": 5.606655264712424e-06, + "loss": 0.7702, + "step": 16816 + }, + { + "epoch": 0.9255875392151467, + "grad_norm": 0.6517447233200073, + "learning_rate": 5.606224998225419e-06, + "loss": 0.7757, + "step": 16817 + }, + { + "epoch": 0.9256425780175024, + "grad_norm": 0.8019997477531433, + "learning_rate": 5.605794727182189e-06, + "loss": 0.755, + "step": 16818 + }, + { + "epoch": 0.925697616819858, + "grad_norm": 0.5735001564025879, + "learning_rate": 5.605364451585966e-06, + "loss": 0.6594, + "step": 16819 + }, + { + "epoch": 0.9257526556222136, + "grad_norm": 0.7946392297744751, + "learning_rate": 5.604934171439989e-06, + "loss": 0.8566, + "step": 16820 + }, + { + "epoch": 0.9258076944245693, + "grad_norm": 0.6836885809898376, + "learning_rate": 5.6045038867474865e-06, + "loss": 0.7309, + "step": 16821 + }, + { + "epoch": 0.925862733226925, + "grad_norm": 0.7148752808570862, + "learning_rate": 5.604073597511697e-06, + "loss": 0.7961, + "step": 16822 + }, + { + "epoch": 0.9259177720292806, + "grad_norm": 0.5976021885871887, + "learning_rate": 5.60364330373585e-06, + "loss": 0.702, + "step": 16823 + }, + { + "epoch": 0.9259728108316363, + "grad_norm": 0.7915990948677063, + "learning_rate": 5.6032130054231815e-06, + "loss": 0.7248, + "step": 16824 + }, + { + "epoch": 0.926027849633992, + "grad_norm": 0.8617003560066223, + "learning_rate": 5.602782702576925e-06, + "loss": 0.7586, + "step": 16825 + }, + { + "epoch": 0.9260828884363477, + "grad_norm": 0.707922637462616, + "learning_rate": 5.602352395200317e-06, + "loss": 0.7649, + "step": 16826 + }, + { + "epoch": 0.9261379272387033, + "grad_norm": 0.702974259853363, + "learning_rate": 5.601922083296588e-06, + "loss": 0.7868, + "step": 16827 + }, + { + "epoch": 0.9261929660410589, + "grad_norm": 0.8666204810142517, + "learning_rate": 5.601491766868974e-06, + "loss": 0.6743, + "step": 16828 + }, + { + "epoch": 0.9262480048434146, + "grad_norm": 0.7228319644927979, + "learning_rate": 5.60106144592071e-06, + "loss": 0.9038, + "step": 16829 + }, + { + "epoch": 0.9263030436457703, + "grad_norm": 0.6495303511619568, + "learning_rate": 5.600631120455028e-06, + "loss": 0.675, + "step": 16830 + }, + { + "epoch": 0.9263580824481259, + "grad_norm": 0.909911572933197, + "learning_rate": 5.600200790475163e-06, + "loss": 0.8512, + "step": 16831 + }, + { + "epoch": 0.9264131212504816, + "grad_norm": 0.8628181219100952, + "learning_rate": 5.59977045598435e-06, + "loss": 0.8263, + "step": 16832 + }, + { + "epoch": 0.9264681600528373, + "grad_norm": 0.7378986477851868, + "learning_rate": 5.599340116985823e-06, + "loss": 0.6147, + "step": 16833 + }, + { + "epoch": 0.926523198855193, + "grad_norm": 0.7630714774131775, + "learning_rate": 5.598909773482818e-06, + "loss": 0.7488, + "step": 16834 + }, + { + "epoch": 0.9265782376575485, + "grad_norm": 0.6660358905792236, + "learning_rate": 5.598479425478564e-06, + "loss": 0.7846, + "step": 16835 + }, + { + "epoch": 0.9266332764599042, + "grad_norm": 0.633204996585846, + "learning_rate": 5.5980490729763e-06, + "loss": 0.719, + "step": 16836 + }, + { + "epoch": 0.9266883152622599, + "grad_norm": 0.7990162968635559, + "learning_rate": 5.5976187159792605e-06, + "loss": 0.7774, + "step": 16837 + }, + { + "epoch": 0.9267433540646155, + "grad_norm": 0.7380030155181885, + "learning_rate": 5.597188354490678e-06, + "loss": 0.6434, + "step": 16838 + }, + { + "epoch": 0.9267983928669712, + "grad_norm": 0.7048906683921814, + "learning_rate": 5.596757988513788e-06, + "loss": 0.7559, + "step": 16839 + }, + { + "epoch": 0.9268534316693269, + "grad_norm": 0.8689755201339722, + "learning_rate": 5.596327618051823e-06, + "loss": 0.7003, + "step": 16840 + }, + { + "epoch": 0.9269084704716826, + "grad_norm": 0.6776423454284668, + "learning_rate": 5.595897243108021e-06, + "loss": 0.7535, + "step": 16841 + }, + { + "epoch": 0.9269635092740381, + "grad_norm": 0.6138951778411865, + "learning_rate": 5.595466863685614e-06, + "loss": 0.7471, + "step": 16842 + }, + { + "epoch": 0.9270185480763938, + "grad_norm": 0.6530849933624268, + "learning_rate": 5.595036479787838e-06, + "loss": 0.7595, + "step": 16843 + }, + { + "epoch": 0.9270735868787495, + "grad_norm": 0.672170102596283, + "learning_rate": 5.594606091417926e-06, + "loss": 0.619, + "step": 16844 + }, + { + "epoch": 0.9271286256811052, + "grad_norm": 0.7782242894172668, + "learning_rate": 5.594175698579114e-06, + "loss": 0.7405, + "step": 16845 + }, + { + "epoch": 0.9271836644834608, + "grad_norm": 0.7308391332626343, + "learning_rate": 5.593745301274639e-06, + "loss": 0.766, + "step": 16846 + }, + { + "epoch": 0.9272387032858165, + "grad_norm": 0.8123987913131714, + "learning_rate": 5.593314899507731e-06, + "loss": 0.8137, + "step": 16847 + }, + { + "epoch": 0.9272937420881722, + "grad_norm": 0.6876010894775391, + "learning_rate": 5.592884493281626e-06, + "loss": 0.6828, + "step": 16848 + }, + { + "epoch": 0.9273487808905279, + "grad_norm": 0.7311702966690063, + "learning_rate": 5.5924540825995624e-06, + "loss": 0.8712, + "step": 16849 + }, + { + "epoch": 0.9274038196928834, + "grad_norm": 0.6796336770057678, + "learning_rate": 5.592023667464771e-06, + "loss": 0.7453, + "step": 16850 + }, + { + "epoch": 0.9274588584952391, + "grad_norm": 1.1471986770629883, + "learning_rate": 5.591593247880489e-06, + "loss": 0.7521, + "step": 16851 + }, + { + "epoch": 0.9275138972975948, + "grad_norm": 0.6603145599365234, + "learning_rate": 5.5911628238499496e-06, + "loss": 0.7757, + "step": 16852 + }, + { + "epoch": 0.9275689360999505, + "grad_norm": 0.6421443819999695, + "learning_rate": 5.590732395376388e-06, + "loss": 0.679, + "step": 16853 + }, + { + "epoch": 0.9276239749023061, + "grad_norm": 0.6478089690208435, + "learning_rate": 5.590301962463041e-06, + "loss": 0.6818, + "step": 16854 + }, + { + "epoch": 0.9276790137046618, + "grad_norm": 0.742957353591919, + "learning_rate": 5.589871525113143e-06, + "loss": 0.7611, + "step": 16855 + }, + { + "epoch": 0.9277340525070175, + "grad_norm": 0.760917603969574, + "learning_rate": 5.589441083329927e-06, + "loss": 0.7634, + "step": 16856 + }, + { + "epoch": 0.9277890913093731, + "grad_norm": 0.8060788512229919, + "learning_rate": 5.589010637116632e-06, + "loss": 0.8003, + "step": 16857 + }, + { + "epoch": 0.9278441301117287, + "grad_norm": 0.6400671601295471, + "learning_rate": 5.588580186476489e-06, + "loss": 0.6459, + "step": 16858 + }, + { + "epoch": 0.9278991689140844, + "grad_norm": 0.6653194427490234, + "learning_rate": 5.5881497314127354e-06, + "loss": 0.6536, + "step": 16859 + }, + { + "epoch": 0.9279542077164401, + "grad_norm": 0.7482097148895264, + "learning_rate": 5.587719271928605e-06, + "loss": 0.6866, + "step": 16860 + }, + { + "epoch": 0.9280092465187958, + "grad_norm": 0.6429533958435059, + "learning_rate": 5.5872888080273345e-06, + "loss": 0.7135, + "step": 16861 + }, + { + "epoch": 0.9280642853211514, + "grad_norm": 0.6756870746612549, + "learning_rate": 5.586858339712159e-06, + "loss": 0.6738, + "step": 16862 + }, + { + "epoch": 0.9281193241235071, + "grad_norm": 0.6506288051605225, + "learning_rate": 5.586427866986313e-06, + "loss": 0.6872, + "step": 16863 + }, + { + "epoch": 0.9281743629258628, + "grad_norm": 0.6222309470176697, + "learning_rate": 5.585997389853031e-06, + "loss": 0.6227, + "step": 16864 + }, + { + "epoch": 0.9282294017282184, + "grad_norm": 0.7656785845756531, + "learning_rate": 5.58556690831555e-06, + "loss": 0.7487, + "step": 16865 + }, + { + "epoch": 0.928284440530574, + "grad_norm": 0.8085651993751526, + "learning_rate": 5.585136422377105e-06, + "loss": 0.7495, + "step": 16866 + }, + { + "epoch": 0.9283394793329297, + "grad_norm": 0.7220577597618103, + "learning_rate": 5.584705932040933e-06, + "loss": 0.7616, + "step": 16867 + }, + { + "epoch": 0.9283945181352854, + "grad_norm": 0.6376538872718811, + "learning_rate": 5.5842754373102646e-06, + "loss": 0.6959, + "step": 16868 + }, + { + "epoch": 0.9284495569376411, + "grad_norm": 0.8235114216804504, + "learning_rate": 5.583844938188341e-06, + "loss": 0.8405, + "step": 16869 + }, + { + "epoch": 0.9285045957399967, + "grad_norm": 0.6148403286933899, + "learning_rate": 5.583414434678395e-06, + "loss": 0.6496, + "step": 16870 + }, + { + "epoch": 0.9285596345423524, + "grad_norm": 0.7475830912590027, + "learning_rate": 5.5829839267836606e-06, + "loss": 0.7728, + "step": 16871 + }, + { + "epoch": 0.928614673344708, + "grad_norm": 0.6615459322929382, + "learning_rate": 5.582553414507376e-06, + "loss": 0.7168, + "step": 16872 + }, + { + "epoch": 0.9286697121470637, + "grad_norm": 0.7308976054191589, + "learning_rate": 5.582122897852776e-06, + "loss": 0.8668, + "step": 16873 + }, + { + "epoch": 0.9287247509494193, + "grad_norm": 2.0933802127838135, + "learning_rate": 5.5816923768230955e-06, + "loss": 0.8744, + "step": 16874 + }, + { + "epoch": 0.928779789751775, + "grad_norm": 0.8197827935218811, + "learning_rate": 5.581261851421571e-06, + "loss": 0.965, + "step": 16875 + }, + { + "epoch": 0.9288348285541307, + "grad_norm": 0.7161306738853455, + "learning_rate": 5.580831321651437e-06, + "loss": 0.7199, + "step": 16876 + }, + { + "epoch": 0.9288898673564864, + "grad_norm": 0.6676005721092224, + "learning_rate": 5.580400787515932e-06, + "loss": 0.8178, + "step": 16877 + }, + { + "epoch": 0.928944906158842, + "grad_norm": 0.6874394416809082, + "learning_rate": 5.579970249018291e-06, + "loss": 0.7827, + "step": 16878 + }, + { + "epoch": 0.9289999449611976, + "grad_norm": 0.6925203800201416, + "learning_rate": 5.579539706161746e-06, + "loss": 0.7628, + "step": 16879 + }, + { + "epoch": 0.9290549837635533, + "grad_norm": 0.7088871598243713, + "learning_rate": 5.5791091589495375e-06, + "loss": 0.8104, + "step": 16880 + }, + { + "epoch": 0.9291100225659089, + "grad_norm": 0.640397846698761, + "learning_rate": 5.5786786073849e-06, + "loss": 0.7041, + "step": 16881 + }, + { + "epoch": 0.9291650613682646, + "grad_norm": 0.6750220060348511, + "learning_rate": 5.578248051471068e-06, + "loss": 0.7124, + "step": 16882 + }, + { + "epoch": 0.9292201001706203, + "grad_norm": 0.6361757516860962, + "learning_rate": 5.577817491211279e-06, + "loss": 0.6831, + "step": 16883 + }, + { + "epoch": 0.929275138972976, + "grad_norm": 0.710574746131897, + "learning_rate": 5.577386926608766e-06, + "loss": 0.8622, + "step": 16884 + }, + { + "epoch": 0.9293301777753316, + "grad_norm": 0.6523045301437378, + "learning_rate": 5.576956357666771e-06, + "loss": 0.7014, + "step": 16885 + }, + { + "epoch": 0.9293852165776872, + "grad_norm": 0.7073860764503479, + "learning_rate": 5.576525784388525e-06, + "loss": 0.7735, + "step": 16886 + }, + { + "epoch": 0.9294402553800429, + "grad_norm": 0.6221209764480591, + "learning_rate": 5.576095206777266e-06, + "loss": 0.7262, + "step": 16887 + }, + { + "epoch": 0.9294952941823986, + "grad_norm": 0.7045557498931885, + "learning_rate": 5.575664624836228e-06, + "loss": 0.8519, + "step": 16888 + }, + { + "epoch": 0.9295503329847542, + "grad_norm": 0.8938118815422058, + "learning_rate": 5.575234038568648e-06, + "loss": 0.8044, + "step": 16889 + }, + { + "epoch": 0.9296053717871099, + "grad_norm": 0.6333356499671936, + "learning_rate": 5.574803447977766e-06, + "loss": 0.7061, + "step": 16890 + }, + { + "epoch": 0.9296604105894656, + "grad_norm": 0.7838292717933655, + "learning_rate": 5.574372853066814e-06, + "loss": 0.8472, + "step": 16891 + }, + { + "epoch": 0.9297154493918213, + "grad_norm": 0.6740962862968445, + "learning_rate": 5.5739422538390285e-06, + "loss": 0.8737, + "step": 16892 + }, + { + "epoch": 0.9297704881941768, + "grad_norm": 0.6610066890716553, + "learning_rate": 5.57351165029765e-06, + "loss": 0.7749, + "step": 16893 + }, + { + "epoch": 0.9298255269965325, + "grad_norm": 0.6941941380500793, + "learning_rate": 5.573081042445908e-06, + "loss": 0.7125, + "step": 16894 + }, + { + "epoch": 0.9298805657988882, + "grad_norm": 0.6376040577888489, + "learning_rate": 5.572650430287043e-06, + "loss": 0.7232, + "step": 16895 + }, + { + "epoch": 0.9299356046012439, + "grad_norm": 0.6908338665962219, + "learning_rate": 5.57221981382429e-06, + "loss": 0.7282, + "step": 16896 + }, + { + "epoch": 0.9299906434035995, + "grad_norm": 0.7384136915206909, + "learning_rate": 5.571789193060887e-06, + "loss": 0.7049, + "step": 16897 + }, + { + "epoch": 0.9300456822059552, + "grad_norm": 0.6418812870979309, + "learning_rate": 5.57135856800007e-06, + "loss": 0.6943, + "step": 16898 + }, + { + "epoch": 0.9301007210083109, + "grad_norm": 0.6272100806236267, + "learning_rate": 5.570927938645075e-06, + "loss": 0.6018, + "step": 16899 + }, + { + "epoch": 0.9301557598106666, + "grad_norm": 0.6637504696846008, + "learning_rate": 5.570497304999136e-06, + "loss": 0.7459, + "step": 16900 + }, + { + "epoch": 0.9302107986130221, + "grad_norm": 0.6815645098686218, + "learning_rate": 5.570066667065495e-06, + "loss": 0.8375, + "step": 16901 + }, + { + "epoch": 0.9302658374153778, + "grad_norm": 0.7291944026947021, + "learning_rate": 5.569636024847384e-06, + "loss": 0.7447, + "step": 16902 + }, + { + "epoch": 0.9303208762177335, + "grad_norm": 0.7493074536323547, + "learning_rate": 5.569205378348041e-06, + "loss": 0.7485, + "step": 16903 + }, + { + "epoch": 0.9303759150200892, + "grad_norm": 0.6949830651283264, + "learning_rate": 5.568774727570702e-06, + "loss": 0.6928, + "step": 16904 + }, + { + "epoch": 0.9304309538224448, + "grad_norm": 0.7263016104698181, + "learning_rate": 5.568344072518606e-06, + "loss": 0.7687, + "step": 16905 + }, + { + "epoch": 0.9304859926248005, + "grad_norm": 0.6831687688827515, + "learning_rate": 5.567913413194989e-06, + "loss": 0.8186, + "step": 16906 + }, + { + "epoch": 0.9305410314271562, + "grad_norm": 0.6471529603004456, + "learning_rate": 5.567482749603086e-06, + "loss": 0.809, + "step": 16907 + }, + { + "epoch": 0.9305960702295119, + "grad_norm": 0.716443657875061, + "learning_rate": 5.5670520817461324e-06, + "loss": 0.7556, + "step": 16908 + }, + { + "epoch": 0.9306511090318674, + "grad_norm": 0.7684258222579956, + "learning_rate": 5.566621409627369e-06, + "loss": 0.7163, + "step": 16909 + }, + { + "epoch": 0.9307061478342231, + "grad_norm": 0.6364058256149292, + "learning_rate": 5.566190733250031e-06, + "loss": 0.7008, + "step": 16910 + }, + { + "epoch": 0.9307611866365788, + "grad_norm": 0.7371893525123596, + "learning_rate": 5.565760052617354e-06, + "loss": 0.7377, + "step": 16911 + }, + { + "epoch": 0.9308162254389345, + "grad_norm": 0.6785818934440613, + "learning_rate": 5.565329367732576e-06, + "loss": 0.6958, + "step": 16912 + }, + { + "epoch": 0.9308712642412901, + "grad_norm": 0.6949000954627991, + "learning_rate": 5.564898678598933e-06, + "loss": 0.7061, + "step": 16913 + }, + { + "epoch": 0.9309263030436458, + "grad_norm": 0.834689199924469, + "learning_rate": 5.5644679852196645e-06, + "loss": 0.7584, + "step": 16914 + }, + { + "epoch": 0.9309813418460015, + "grad_norm": 0.6549750566482544, + "learning_rate": 5.564037287598005e-06, + "loss": 0.7846, + "step": 16915 + }, + { + "epoch": 0.9310363806483571, + "grad_norm": 0.660965085029602, + "learning_rate": 5.563606585737192e-06, + "loss": 0.7106, + "step": 16916 + }, + { + "epoch": 0.9310914194507127, + "grad_norm": 0.6218705773353577, + "learning_rate": 5.563175879640464e-06, + "loss": 0.7455, + "step": 16917 + }, + { + "epoch": 0.9311464582530684, + "grad_norm": 0.6786651015281677, + "learning_rate": 5.562745169311057e-06, + "loss": 0.7967, + "step": 16918 + }, + { + "epoch": 0.9312014970554241, + "grad_norm": 0.6778925657272339, + "learning_rate": 5.562314454752207e-06, + "loss": 0.7567, + "step": 16919 + }, + { + "epoch": 0.9312565358577798, + "grad_norm": 0.7895017862319946, + "learning_rate": 5.561883735967151e-06, + "loss": 0.7321, + "step": 16920 + }, + { + "epoch": 0.9313115746601354, + "grad_norm": 0.7806753516197205, + "learning_rate": 5.561453012959128e-06, + "loss": 0.7469, + "step": 16921 + }, + { + "epoch": 0.931366613462491, + "grad_norm": 0.8205820918083191, + "learning_rate": 5.561022285731376e-06, + "loss": 0.7461, + "step": 16922 + }, + { + "epoch": 0.9314216522648467, + "grad_norm": 0.6211751103401184, + "learning_rate": 5.560591554287129e-06, + "loss": 0.668, + "step": 16923 + }, + { + "epoch": 0.9314766910672023, + "grad_norm": 0.7944413423538208, + "learning_rate": 5.5601608186296255e-06, + "loss": 0.7866, + "step": 16924 + }, + { + "epoch": 0.931531729869558, + "grad_norm": 0.7663695812225342, + "learning_rate": 5.559730078762103e-06, + "loss": 0.7402, + "step": 16925 + }, + { + "epoch": 0.9315867686719137, + "grad_norm": 0.6584487557411194, + "learning_rate": 5.559299334687801e-06, + "loss": 0.7251, + "step": 16926 + }, + { + "epoch": 0.9316418074742694, + "grad_norm": 0.740992546081543, + "learning_rate": 5.558868586409954e-06, + "loss": 0.7572, + "step": 16927 + }, + { + "epoch": 0.931696846276625, + "grad_norm": 0.7777884006500244, + "learning_rate": 5.558437833931801e-06, + "loss": 0.7817, + "step": 16928 + }, + { + "epoch": 0.9317518850789807, + "grad_norm": 0.6684641242027283, + "learning_rate": 5.5580070772565774e-06, + "loss": 0.7154, + "step": 16929 + }, + { + "epoch": 0.9318069238813363, + "grad_norm": 0.8053527474403381, + "learning_rate": 5.557576316387523e-06, + "loss": 0.7699, + "step": 16930 + }, + { + "epoch": 0.931861962683692, + "grad_norm": 0.6722555756568909, + "learning_rate": 5.557145551327875e-06, + "loss": 0.6554, + "step": 16931 + }, + { + "epoch": 0.9319170014860476, + "grad_norm": 0.6901096701622009, + "learning_rate": 5.556714782080867e-06, + "loss": 0.7889, + "step": 16932 + }, + { + "epoch": 0.9319720402884033, + "grad_norm": 0.6796066761016846, + "learning_rate": 5.556284008649743e-06, + "loss": 0.6839, + "step": 16933 + }, + { + "epoch": 0.932027079090759, + "grad_norm": 0.673685610294342, + "learning_rate": 5.555853231037735e-06, + "loss": 0.7677, + "step": 16934 + }, + { + "epoch": 0.9320821178931147, + "grad_norm": 0.6612395644187927, + "learning_rate": 5.555422449248085e-06, + "loss": 0.7566, + "step": 16935 + }, + { + "epoch": 0.9321371566954703, + "grad_norm": 0.6591265201568604, + "learning_rate": 5.554991663284026e-06, + "loss": 0.7503, + "step": 16936 + }, + { + "epoch": 0.932192195497826, + "grad_norm": 0.6197401285171509, + "learning_rate": 5.5545608731488e-06, + "loss": 0.7203, + "step": 16937 + }, + { + "epoch": 0.9322472343001816, + "grad_norm": 0.6715179681777954, + "learning_rate": 5.554130078845643e-06, + "loss": 0.7496, + "step": 16938 + }, + { + "epoch": 0.9323022731025373, + "grad_norm": 0.686224639415741, + "learning_rate": 5.553699280377793e-06, + "loss": 0.7466, + "step": 16939 + }, + { + "epoch": 0.9323573119048929, + "grad_norm": 0.7548978328704834, + "learning_rate": 5.553268477748487e-06, + "loss": 0.8279, + "step": 16940 + }, + { + "epoch": 0.9324123507072486, + "grad_norm": 0.5969768166542053, + "learning_rate": 5.552837670960962e-06, + "loss": 0.641, + "step": 16941 + }, + { + "epoch": 0.9324673895096043, + "grad_norm": 0.6729679703712463, + "learning_rate": 5.552406860018459e-06, + "loss": 0.718, + "step": 16942 + }, + { + "epoch": 0.93252242831196, + "grad_norm": 0.6350606679916382, + "learning_rate": 5.551976044924213e-06, + "loss": 0.7033, + "step": 16943 + }, + { + "epoch": 0.9325774671143156, + "grad_norm": 0.6564341187477112, + "learning_rate": 5.551545225681462e-06, + "loss": 0.6638, + "step": 16944 + }, + { + "epoch": 0.9326325059166712, + "grad_norm": 0.7151914238929749, + "learning_rate": 5.551114402293445e-06, + "loss": 0.8052, + "step": 16945 + }, + { + "epoch": 0.9326875447190269, + "grad_norm": 0.647455096244812, + "learning_rate": 5.5506835747634e-06, + "loss": 0.6903, + "step": 16946 + }, + { + "epoch": 0.9327425835213826, + "grad_norm": 0.7067738771438599, + "learning_rate": 5.5502527430945655e-06, + "loss": 0.7173, + "step": 16947 + }, + { + "epoch": 0.9327976223237382, + "grad_norm": 0.6301264762878418, + "learning_rate": 5.549821907290177e-06, + "loss": 0.7138, + "step": 16948 + }, + { + "epoch": 0.9328526611260939, + "grad_norm": 0.6322819590568542, + "learning_rate": 5.549391067353476e-06, + "loss": 0.7379, + "step": 16949 + }, + { + "epoch": 0.9329076999284496, + "grad_norm": 0.6628162860870361, + "learning_rate": 5.548960223287697e-06, + "loss": 0.7422, + "step": 16950 + }, + { + "epoch": 0.9329627387308053, + "grad_norm": 0.8131056427955627, + "learning_rate": 5.548529375096081e-06, + "loss": 0.808, + "step": 16951 + }, + { + "epoch": 0.9330177775331608, + "grad_norm": 0.6634977459907532, + "learning_rate": 5.548098522781866e-06, + "loss": 0.7963, + "step": 16952 + }, + { + "epoch": 0.9330728163355165, + "grad_norm": 0.706670343875885, + "learning_rate": 5.547667666348288e-06, + "loss": 0.7847, + "step": 16953 + }, + { + "epoch": 0.9331278551378722, + "grad_norm": 0.797380268573761, + "learning_rate": 5.547236805798587e-06, + "loss": 0.8007, + "step": 16954 + }, + { + "epoch": 0.9331828939402279, + "grad_norm": 0.6721376776695251, + "learning_rate": 5.546805941136e-06, + "loss": 0.7537, + "step": 16955 + }, + { + "epoch": 0.9332379327425835, + "grad_norm": 0.63679039478302, + "learning_rate": 5.546375072363765e-06, + "loss": 0.7198, + "step": 16956 + }, + { + "epoch": 0.9332929715449392, + "grad_norm": 0.6420717239379883, + "learning_rate": 5.5459441994851225e-06, + "loss": 0.7361, + "step": 16957 + }, + { + "epoch": 0.9333480103472949, + "grad_norm": 0.7549602389335632, + "learning_rate": 5.54551332250331e-06, + "loss": 0.7626, + "step": 16958 + }, + { + "epoch": 0.9334030491496506, + "grad_norm": 0.7319654226303101, + "learning_rate": 5.5450824414215656e-06, + "loss": 0.6942, + "step": 16959 + }, + { + "epoch": 0.9334580879520061, + "grad_norm": 0.7357600331306458, + "learning_rate": 5.544651556243124e-06, + "loss": 0.8573, + "step": 16960 + }, + { + "epoch": 0.9335131267543618, + "grad_norm": 0.6782594323158264, + "learning_rate": 5.5442206669712304e-06, + "loss": 0.7654, + "step": 16961 + }, + { + "epoch": 0.9335681655567175, + "grad_norm": 0.6621338129043579, + "learning_rate": 5.5437897736091194e-06, + "loss": 0.5882, + "step": 16962 + }, + { + "epoch": 0.9336232043590732, + "grad_norm": 0.6803955435752869, + "learning_rate": 5.543358876160031e-06, + "loss": 0.6574, + "step": 16963 + }, + { + "epoch": 0.9336782431614288, + "grad_norm": 0.6741437911987305, + "learning_rate": 5.542927974627202e-06, + "loss": 0.866, + "step": 16964 + }, + { + "epoch": 0.9337332819637845, + "grad_norm": 0.6904686093330383, + "learning_rate": 5.542497069013871e-06, + "loss": 0.7894, + "step": 16965 + }, + { + "epoch": 0.9337883207661402, + "grad_norm": 0.6671770215034485, + "learning_rate": 5.542066159323278e-06, + "loss": 0.7062, + "step": 16966 + }, + { + "epoch": 0.9338433595684957, + "grad_norm": 0.7025963664054871, + "learning_rate": 5.541635245558661e-06, + "loss": 0.7782, + "step": 16967 + }, + { + "epoch": 0.9338983983708514, + "grad_norm": 0.6948807239532471, + "learning_rate": 5.5412043277232566e-06, + "loss": 0.6972, + "step": 16968 + }, + { + "epoch": 0.9339534371732071, + "grad_norm": 0.6727368235588074, + "learning_rate": 5.540773405820307e-06, + "loss": 0.7303, + "step": 16969 + }, + { + "epoch": 0.9340084759755628, + "grad_norm": 0.7015120983123779, + "learning_rate": 5.540342479853048e-06, + "loss": 0.7417, + "step": 16970 + }, + { + "epoch": 0.9340635147779184, + "grad_norm": 0.6439093351364136, + "learning_rate": 5.539911549824722e-06, + "loss": 0.7465, + "step": 16971 + }, + { + "epoch": 0.9341185535802741, + "grad_norm": 0.6767433881759644, + "learning_rate": 5.539480615738562e-06, + "loss": 0.7867, + "step": 16972 + }, + { + "epoch": 0.9341735923826298, + "grad_norm": 0.6318436861038208, + "learning_rate": 5.539049677597812e-06, + "loss": 0.6693, + "step": 16973 + }, + { + "epoch": 0.9342286311849854, + "grad_norm": 0.604857861995697, + "learning_rate": 5.538618735405709e-06, + "loss": 0.6735, + "step": 16974 + }, + { + "epoch": 0.934283669987341, + "grad_norm": 0.7811105847358704, + "learning_rate": 5.538187789165491e-06, + "loss": 0.7997, + "step": 16975 + }, + { + "epoch": 0.9343387087896967, + "grad_norm": 0.644146203994751, + "learning_rate": 5.537756838880399e-06, + "loss": 0.7147, + "step": 16976 + }, + { + "epoch": 0.9343937475920524, + "grad_norm": 0.7074788808822632, + "learning_rate": 5.5373258845536705e-06, + "loss": 0.7487, + "step": 16977 + }, + { + "epoch": 0.9344487863944081, + "grad_norm": 0.597263753414154, + "learning_rate": 5.536894926188543e-06, + "loss": 0.6645, + "step": 16978 + }, + { + "epoch": 0.9345038251967637, + "grad_norm": 0.617344081401825, + "learning_rate": 5.5364639637882574e-06, + "loss": 0.6744, + "step": 16979 + }, + { + "epoch": 0.9345588639991194, + "grad_norm": 0.9057487845420837, + "learning_rate": 5.536032997356053e-06, + "loss": 0.8575, + "step": 16980 + }, + { + "epoch": 0.934613902801475, + "grad_norm": 0.6194222569465637, + "learning_rate": 5.535602026895167e-06, + "loss": 0.6823, + "step": 16981 + }, + { + "epoch": 0.9346689416038307, + "grad_norm": 0.7065734267234802, + "learning_rate": 5.535171052408841e-06, + "loss": 0.7394, + "step": 16982 + }, + { + "epoch": 0.9347239804061863, + "grad_norm": 0.6653978228569031, + "learning_rate": 5.534740073900311e-06, + "loss": 0.7709, + "step": 16983 + }, + { + "epoch": 0.934779019208542, + "grad_norm": 0.6902819275856018, + "learning_rate": 5.53430909137282e-06, + "loss": 0.7993, + "step": 16984 + }, + { + "epoch": 0.9348340580108977, + "grad_norm": 0.8288851976394653, + "learning_rate": 5.533878104829601e-06, + "loss": 0.6989, + "step": 16985 + }, + { + "epoch": 0.9348890968132534, + "grad_norm": 0.6423464417457581, + "learning_rate": 5.5334471142739e-06, + "loss": 0.6609, + "step": 16986 + }, + { + "epoch": 0.934944135615609, + "grad_norm": 0.7889821529388428, + "learning_rate": 5.533016119708953e-06, + "loss": 0.7466, + "step": 16987 + }, + { + "epoch": 0.9349991744179647, + "grad_norm": 0.6482957601547241, + "learning_rate": 5.5325851211379985e-06, + "loss": 0.7382, + "step": 16988 + }, + { + "epoch": 0.9350542132203203, + "grad_norm": 0.7158783078193665, + "learning_rate": 5.532154118564279e-06, + "loss": 0.7066, + "step": 16989 + }, + { + "epoch": 0.935109252022676, + "grad_norm": 0.708812952041626, + "learning_rate": 5.53172311199103e-06, + "loss": 0.7094, + "step": 16990 + }, + { + "epoch": 0.9351642908250316, + "grad_norm": 0.7140359282493591, + "learning_rate": 5.531292101421493e-06, + "loss": 0.7292, + "step": 16991 + }, + { + "epoch": 0.9352193296273873, + "grad_norm": 0.6114800572395325, + "learning_rate": 5.530861086858906e-06, + "loss": 0.6856, + "step": 16992 + }, + { + "epoch": 0.935274368429743, + "grad_norm": 0.6589435338973999, + "learning_rate": 5.53043006830651e-06, + "loss": 0.6854, + "step": 16993 + }, + { + "epoch": 0.9353294072320987, + "grad_norm": 0.6713253259658813, + "learning_rate": 5.5299990457675425e-06, + "loss": 0.7712, + "step": 16994 + }, + { + "epoch": 0.9353844460344543, + "grad_norm": 0.6345391273498535, + "learning_rate": 5.529568019245244e-06, + "loss": 0.6893, + "step": 16995 + }, + { + "epoch": 0.9354394848368099, + "grad_norm": 0.8601719737052917, + "learning_rate": 5.529136988742855e-06, + "loss": 0.7676, + "step": 16996 + }, + { + "epoch": 0.9354945236391656, + "grad_norm": 0.7791833281517029, + "learning_rate": 5.528705954263613e-06, + "loss": 0.7257, + "step": 16997 + }, + { + "epoch": 0.9355495624415213, + "grad_norm": 0.7072179317474365, + "learning_rate": 5.5282749158107594e-06, + "loss": 0.7292, + "step": 16998 + }, + { + "epoch": 0.9356046012438769, + "grad_norm": 0.762479841709137, + "learning_rate": 5.527843873387533e-06, + "loss": 0.8424, + "step": 16999 + }, + { + "epoch": 0.9356596400462326, + "grad_norm": 0.6964772939682007, + "learning_rate": 5.527412826997174e-06, + "loss": 0.6888, + "step": 17000 + }, + { + "epoch": 0.9357146788485883, + "grad_norm": 0.7196127772331238, + "learning_rate": 5.52698177664292e-06, + "loss": 0.6836, + "step": 17001 + }, + { + "epoch": 0.935769717650944, + "grad_norm": 0.6779214143753052, + "learning_rate": 5.526550722328012e-06, + "loss": 0.6476, + "step": 17002 + }, + { + "epoch": 0.9358247564532995, + "grad_norm": 0.7174015045166016, + "learning_rate": 5.526119664055691e-06, + "loss": 0.7115, + "step": 17003 + }, + { + "epoch": 0.9358797952556552, + "grad_norm": 0.6855511665344238, + "learning_rate": 5.525688601829195e-06, + "loss": 0.6881, + "step": 17004 + }, + { + "epoch": 0.9359348340580109, + "grad_norm": 0.716136634349823, + "learning_rate": 5.525257535651763e-06, + "loss": 0.7429, + "step": 17005 + }, + { + "epoch": 0.9359898728603666, + "grad_norm": 0.8645130395889282, + "learning_rate": 5.5248264655266374e-06, + "loss": 0.7413, + "step": 17006 + }, + { + "epoch": 0.9360449116627222, + "grad_norm": 0.6923744082450867, + "learning_rate": 5.5243953914570555e-06, + "loss": 0.6794, + "step": 17007 + }, + { + "epoch": 0.9360999504650779, + "grad_norm": 0.6838575601577759, + "learning_rate": 5.52396431344626e-06, + "loss": 0.8012, + "step": 17008 + }, + { + "epoch": 0.9361549892674336, + "grad_norm": 0.8331907987594604, + "learning_rate": 5.523533231497487e-06, + "loss": 0.8244, + "step": 17009 + }, + { + "epoch": 0.9362100280697891, + "grad_norm": 0.6853833794593811, + "learning_rate": 5.523102145613979e-06, + "loss": 0.7783, + "step": 17010 + }, + { + "epoch": 0.9362650668721448, + "grad_norm": 0.6622136831283569, + "learning_rate": 5.522671055798975e-06, + "loss": 0.7168, + "step": 17011 + }, + { + "epoch": 0.9363201056745005, + "grad_norm": 0.7320037484169006, + "learning_rate": 5.522239962055716e-06, + "loss": 0.7635, + "step": 17012 + }, + { + "epoch": 0.9363751444768562, + "grad_norm": 0.7859386801719666, + "learning_rate": 5.5218088643874415e-06, + "loss": 0.8936, + "step": 17013 + }, + { + "epoch": 0.9364301832792118, + "grad_norm": 0.7455195188522339, + "learning_rate": 5.521377762797391e-06, + "loss": 0.6984, + "step": 17014 + }, + { + "epoch": 0.9364852220815675, + "grad_norm": 0.6779624223709106, + "learning_rate": 5.520946657288804e-06, + "loss": 0.7338, + "step": 17015 + }, + { + "epoch": 0.9365402608839232, + "grad_norm": 0.6418783068656921, + "learning_rate": 5.520515547864922e-06, + "loss": 0.6768, + "step": 17016 + }, + { + "epoch": 0.9365952996862789, + "grad_norm": 0.8031061291694641, + "learning_rate": 5.520084434528984e-06, + "loss": 0.8285, + "step": 17017 + }, + { + "epoch": 0.9366503384886344, + "grad_norm": 0.638831377029419, + "learning_rate": 5.51965331728423e-06, + "loss": 0.6655, + "step": 17018 + }, + { + "epoch": 0.9367053772909901, + "grad_norm": 0.6730309724807739, + "learning_rate": 5.5192221961339024e-06, + "loss": 0.7191, + "step": 17019 + }, + { + "epoch": 0.9367604160933458, + "grad_norm": 0.7811747193336487, + "learning_rate": 5.51879107108124e-06, + "loss": 0.7116, + "step": 17020 + }, + { + "epoch": 0.9368154548957015, + "grad_norm": 0.6636793613433838, + "learning_rate": 5.51835994212948e-06, + "loss": 0.7178, + "step": 17021 + }, + { + "epoch": 0.9368704936980571, + "grad_norm": 0.6776058673858643, + "learning_rate": 5.517928809281867e-06, + "loss": 0.7542, + "step": 17022 + }, + { + "epoch": 0.9369255325004128, + "grad_norm": 0.6820049285888672, + "learning_rate": 5.5174976725416404e-06, + "loss": 0.7052, + "step": 17023 + }, + { + "epoch": 0.9369805713027685, + "grad_norm": 0.6562666296958923, + "learning_rate": 5.51706653191204e-06, + "loss": 0.7362, + "step": 17024 + }, + { + "epoch": 0.9370356101051242, + "grad_norm": 0.6575721502304077, + "learning_rate": 5.516635387396305e-06, + "loss": 0.7171, + "step": 17025 + }, + { + "epoch": 0.9370906489074797, + "grad_norm": 0.7353058457374573, + "learning_rate": 5.516204238997678e-06, + "loss": 0.7371, + "step": 17026 + }, + { + "epoch": 0.9371456877098354, + "grad_norm": 0.6880764961242676, + "learning_rate": 5.515773086719397e-06, + "loss": 0.7069, + "step": 17027 + }, + { + "epoch": 0.9372007265121911, + "grad_norm": 0.6665347814559937, + "learning_rate": 5.515341930564704e-06, + "loss": 0.7313, + "step": 17028 + }, + { + "epoch": 0.9372557653145468, + "grad_norm": 0.6450551152229309, + "learning_rate": 5.514910770536838e-06, + "loss": 0.6579, + "step": 17029 + }, + { + "epoch": 0.9373108041169024, + "grad_norm": 0.6448618769645691, + "learning_rate": 5.514479606639041e-06, + "loss": 0.6725, + "step": 17030 + }, + { + "epoch": 0.9373658429192581, + "grad_norm": 0.6275889873504639, + "learning_rate": 5.514048438874553e-06, + "loss": 0.7157, + "step": 17031 + }, + { + "epoch": 0.9374208817216138, + "grad_norm": 0.7542771100997925, + "learning_rate": 5.513617267246615e-06, + "loss": 0.8409, + "step": 17032 + }, + { + "epoch": 0.9374759205239694, + "grad_norm": 0.6698876619338989, + "learning_rate": 5.513186091758466e-06, + "loss": 0.7883, + "step": 17033 + }, + { + "epoch": 0.937530959326325, + "grad_norm": 0.6699510216712952, + "learning_rate": 5.512754912413349e-06, + "loss": 0.6719, + "step": 17034 + }, + { + "epoch": 0.9375859981286807, + "grad_norm": 0.6606268882751465, + "learning_rate": 5.512323729214502e-06, + "loss": 0.7626, + "step": 17035 + }, + { + "epoch": 0.9376410369310364, + "grad_norm": 0.6428219676017761, + "learning_rate": 5.511892542165169e-06, + "loss": 0.7135, + "step": 17036 + }, + { + "epoch": 0.9376960757333921, + "grad_norm": 0.6446773409843445, + "learning_rate": 5.511461351268588e-06, + "loss": 0.7064, + "step": 17037 + }, + { + "epoch": 0.9377511145357477, + "grad_norm": 0.730904757976532, + "learning_rate": 5.511030156528e-06, + "loss": 0.7543, + "step": 17038 + }, + { + "epoch": 0.9378061533381034, + "grad_norm": 0.6265544891357422, + "learning_rate": 5.5105989579466454e-06, + "loss": 0.648, + "step": 17039 + }, + { + "epoch": 0.937861192140459, + "grad_norm": 0.5883828401565552, + "learning_rate": 5.510167755527767e-06, + "loss": 0.6948, + "step": 17040 + }, + { + "epoch": 0.9379162309428147, + "grad_norm": 0.7379502654075623, + "learning_rate": 5.5097365492746024e-06, + "loss": 0.7034, + "step": 17041 + }, + { + "epoch": 0.9379712697451703, + "grad_norm": 0.7929210066795349, + "learning_rate": 5.509305339190396e-06, + "loss": 0.7117, + "step": 17042 + }, + { + "epoch": 0.938026308547526, + "grad_norm": 0.696765661239624, + "learning_rate": 5.5088741252783875e-06, + "loss": 0.7152, + "step": 17043 + }, + { + "epoch": 0.9380813473498817, + "grad_norm": 0.6686639785766602, + "learning_rate": 5.508442907541817e-06, + "loss": 0.7658, + "step": 17044 + }, + { + "epoch": 0.9381363861522374, + "grad_norm": 0.618567168712616, + "learning_rate": 5.508011685983923e-06, + "loss": 0.7133, + "step": 17045 + }, + { + "epoch": 0.938191424954593, + "grad_norm": 0.6887465119361877, + "learning_rate": 5.507580460607952e-06, + "loss": 0.7333, + "step": 17046 + }, + { + "epoch": 0.9382464637569486, + "grad_norm": 0.642488956451416, + "learning_rate": 5.5071492314171414e-06, + "loss": 0.7204, + "step": 17047 + }, + { + "epoch": 0.9383015025593043, + "grad_norm": 0.6381024718284607, + "learning_rate": 5.506717998414733e-06, + "loss": 0.7191, + "step": 17048 + }, + { + "epoch": 0.93835654136166, + "grad_norm": 0.7331018447875977, + "learning_rate": 5.506286761603968e-06, + "loss": 0.8042, + "step": 17049 + }, + { + "epoch": 0.9384115801640156, + "grad_norm": 0.636921763420105, + "learning_rate": 5.505855520988087e-06, + "loss": 0.6913, + "step": 17050 + }, + { + "epoch": 0.9384666189663713, + "grad_norm": 0.6855827569961548, + "learning_rate": 5.505424276570331e-06, + "loss": 0.8026, + "step": 17051 + }, + { + "epoch": 0.938521657768727, + "grad_norm": 0.6682537198066711, + "learning_rate": 5.504993028353942e-06, + "loss": 0.7942, + "step": 17052 + }, + { + "epoch": 0.9385766965710826, + "grad_norm": 0.6845211982727051, + "learning_rate": 5.50456177634216e-06, + "loss": 0.6827, + "step": 17053 + }, + { + "epoch": 0.9386317353734382, + "grad_norm": 0.7216134071350098, + "learning_rate": 5.504130520538227e-06, + "loss": 0.7557, + "step": 17054 + }, + { + "epoch": 0.9386867741757939, + "grad_norm": 0.6481857299804688, + "learning_rate": 5.503699260945384e-06, + "loss": 0.742, + "step": 17055 + }, + { + "epoch": 0.9387418129781496, + "grad_norm": 0.64216548204422, + "learning_rate": 5.503267997566873e-06, + "loss": 0.7219, + "step": 17056 + }, + { + "epoch": 0.9387968517805052, + "grad_norm": 0.7181951403617859, + "learning_rate": 5.502836730405931e-06, + "loss": 0.8311, + "step": 17057 + }, + { + "epoch": 0.9388518905828609, + "grad_norm": 0.841126024723053, + "learning_rate": 5.502405459465806e-06, + "loss": 0.7644, + "step": 17058 + }, + { + "epoch": 0.9389069293852166, + "grad_norm": 0.712687611579895, + "learning_rate": 5.5019741847497355e-06, + "loss": 0.5891, + "step": 17059 + }, + { + "epoch": 0.9389619681875723, + "grad_norm": 0.5507431030273438, + "learning_rate": 5.501542906260962e-06, + "loss": 0.6119, + "step": 17060 + }, + { + "epoch": 0.9390170069899278, + "grad_norm": 0.7180385589599609, + "learning_rate": 5.501111624002725e-06, + "loss": 0.7465, + "step": 17061 + }, + { + "epoch": 0.9390720457922835, + "grad_norm": 0.8892782330513, + "learning_rate": 5.500680337978268e-06, + "loss": 0.7639, + "step": 17062 + }, + { + "epoch": 0.9391270845946392, + "grad_norm": 0.7344280481338501, + "learning_rate": 5.500249048190832e-06, + "loss": 0.7659, + "step": 17063 + }, + { + "epoch": 0.9391821233969949, + "grad_norm": 0.6756631731987, + "learning_rate": 5.499817754643657e-06, + "loss": 0.6477, + "step": 17064 + }, + { + "epoch": 0.9392371621993505, + "grad_norm": 0.7020494341850281, + "learning_rate": 5.499386457339986e-06, + "loss": 0.6786, + "step": 17065 + }, + { + "epoch": 0.9392922010017062, + "grad_norm": 0.7595643997192383, + "learning_rate": 5.49895515628306e-06, + "loss": 0.7532, + "step": 17066 + }, + { + "epoch": 0.9393472398040619, + "grad_norm": 0.6780978441238403, + "learning_rate": 5.498523851476121e-06, + "loss": 0.6844, + "step": 17067 + }, + { + "epoch": 0.9394022786064176, + "grad_norm": 0.6908407807350159, + "learning_rate": 5.49809254292241e-06, + "loss": 0.6804, + "step": 17068 + }, + { + "epoch": 0.9394573174087731, + "grad_norm": 0.8891296982765198, + "learning_rate": 5.497661230625166e-06, + "loss": 0.7669, + "step": 17069 + }, + { + "epoch": 0.9395123562111288, + "grad_norm": 0.6702411770820618, + "learning_rate": 5.497229914587637e-06, + "loss": 0.7438, + "step": 17070 + }, + { + "epoch": 0.9395673950134845, + "grad_norm": 0.653181254863739, + "learning_rate": 5.4967985948130595e-06, + "loss": 0.7306, + "step": 17071 + }, + { + "epoch": 0.9396224338158402, + "grad_norm": 0.6638855934143066, + "learning_rate": 5.496367271304678e-06, + "loss": 0.7292, + "step": 17072 + }, + { + "epoch": 0.9396774726181958, + "grad_norm": 0.7425099015235901, + "learning_rate": 5.4959359440657324e-06, + "loss": 0.6555, + "step": 17073 + }, + { + "epoch": 0.9397325114205515, + "grad_norm": 0.6774802803993225, + "learning_rate": 5.495504613099465e-06, + "loss": 0.6969, + "step": 17074 + }, + { + "epoch": 0.9397875502229072, + "grad_norm": 0.7952350378036499, + "learning_rate": 5.495073278409118e-06, + "loss": 0.7323, + "step": 17075 + }, + { + "epoch": 0.9398425890252629, + "grad_norm": 0.7487592101097107, + "learning_rate": 5.494641939997932e-06, + "loss": 0.659, + "step": 17076 + }, + { + "epoch": 0.9398976278276184, + "grad_norm": 0.6290369033813477, + "learning_rate": 5.494210597869148e-06, + "loss": 0.5732, + "step": 17077 + }, + { + "epoch": 0.9399526666299741, + "grad_norm": 0.6333632469177246, + "learning_rate": 5.493779252026013e-06, + "loss": 0.7434, + "step": 17078 + }, + { + "epoch": 0.9400077054323298, + "grad_norm": 0.6158192157745361, + "learning_rate": 5.4933479024717625e-06, + "loss": 0.6443, + "step": 17079 + }, + { + "epoch": 0.9400627442346855, + "grad_norm": 0.639827311038971, + "learning_rate": 5.4929165492096425e-06, + "loss": 0.7668, + "step": 17080 + }, + { + "epoch": 0.9401177830370411, + "grad_norm": 0.6548694968223572, + "learning_rate": 5.492485192242893e-06, + "loss": 0.7439, + "step": 17081 + }, + { + "epoch": 0.9401728218393968, + "grad_norm": 0.7560970783233643, + "learning_rate": 5.492053831574756e-06, + "loss": 0.8069, + "step": 17082 + }, + { + "epoch": 0.9402278606417525, + "grad_norm": 0.792142391204834, + "learning_rate": 5.491622467208476e-06, + "loss": 0.7916, + "step": 17083 + }, + { + "epoch": 0.9402828994441081, + "grad_norm": 0.8506911993026733, + "learning_rate": 5.491191099147293e-06, + "loss": 0.6172, + "step": 17084 + }, + { + "epoch": 0.9403379382464637, + "grad_norm": 0.682349443435669, + "learning_rate": 5.490759727394448e-06, + "loss": 0.788, + "step": 17085 + }, + { + "epoch": 0.9403929770488194, + "grad_norm": 0.8406410813331604, + "learning_rate": 5.490328351953185e-06, + "loss": 0.7693, + "step": 17086 + }, + { + "epoch": 0.9404480158511751, + "grad_norm": 0.7017484903335571, + "learning_rate": 5.489896972826745e-06, + "loss": 0.8121, + "step": 17087 + }, + { + "epoch": 0.9405030546535308, + "grad_norm": 0.5880059599876404, + "learning_rate": 5.48946559001837e-06, + "loss": 0.6335, + "step": 17088 + }, + { + "epoch": 0.9405580934558864, + "grad_norm": 0.6772021651268005, + "learning_rate": 5.489034203531303e-06, + "loss": 0.7218, + "step": 17089 + }, + { + "epoch": 0.9406131322582421, + "grad_norm": 0.7308957576751709, + "learning_rate": 5.4886028133687865e-06, + "loss": 0.7566, + "step": 17090 + }, + { + "epoch": 0.9406681710605977, + "grad_norm": 0.5989949703216553, + "learning_rate": 5.488171419534061e-06, + "loss": 0.668, + "step": 17091 + }, + { + "epoch": 0.9407232098629534, + "grad_norm": 0.7060475945472717, + "learning_rate": 5.487740022030371e-06, + "loss": 0.7811, + "step": 17092 + }, + { + "epoch": 0.940778248665309, + "grad_norm": 0.7582124471664429, + "learning_rate": 5.487308620860956e-06, + "loss": 0.8345, + "step": 17093 + }, + { + "epoch": 0.9408332874676647, + "grad_norm": 0.67522794008255, + "learning_rate": 5.48687721602906e-06, + "loss": 0.6474, + "step": 17094 + }, + { + "epoch": 0.9408883262700204, + "grad_norm": 0.6196714639663696, + "learning_rate": 5.486445807537926e-06, + "loss": 0.6508, + "step": 17095 + }, + { + "epoch": 0.940943365072376, + "grad_norm": 0.7253809571266174, + "learning_rate": 5.486014395390796e-06, + "loss": 0.7439, + "step": 17096 + }, + { + "epoch": 0.9409984038747317, + "grad_norm": 0.7475833296775818, + "learning_rate": 5.485582979590912e-06, + "loss": 0.6686, + "step": 17097 + }, + { + "epoch": 0.9410534426770873, + "grad_norm": 0.598025918006897, + "learning_rate": 5.485151560141515e-06, + "loss": 0.6862, + "step": 17098 + }, + { + "epoch": 0.941108481479443, + "grad_norm": 0.644935131072998, + "learning_rate": 5.484720137045851e-06, + "loss": 0.6571, + "step": 17099 + }, + { + "epoch": 0.9411635202817986, + "grad_norm": 1.0013047456741333, + "learning_rate": 5.484288710307157e-06, + "loss": 0.7726, + "step": 17100 + }, + { + "epoch": 0.9412185590841543, + "grad_norm": 0.8886626958847046, + "learning_rate": 5.483857279928681e-06, + "loss": 0.8842, + "step": 17101 + }, + { + "epoch": 0.94127359788651, + "grad_norm": 0.6320830583572388, + "learning_rate": 5.483425845913662e-06, + "loss": 0.7534, + "step": 17102 + }, + { + "epoch": 0.9413286366888657, + "grad_norm": 0.684315025806427, + "learning_rate": 5.482994408265344e-06, + "loss": 0.777, + "step": 17103 + }, + { + "epoch": 0.9413836754912213, + "grad_norm": 0.6782847046852112, + "learning_rate": 5.48256296698697e-06, + "loss": 0.7018, + "step": 17104 + }, + { + "epoch": 0.941438714293577, + "grad_norm": 0.8408985137939453, + "learning_rate": 5.48213152208178e-06, + "loss": 0.8425, + "step": 17105 + }, + { + "epoch": 0.9414937530959326, + "grad_norm": 0.7615265846252441, + "learning_rate": 5.48170007355302e-06, + "loss": 0.7091, + "step": 17106 + }, + { + "epoch": 0.9415487918982883, + "grad_norm": 0.6782113313674927, + "learning_rate": 5.481268621403932e-06, + "loss": 0.7273, + "step": 17107 + }, + { + "epoch": 0.9416038307006439, + "grad_norm": 0.6413006782531738, + "learning_rate": 5.480837165637757e-06, + "loss": 0.6691, + "step": 17108 + }, + { + "epoch": 0.9416588695029996, + "grad_norm": 0.689999520778656, + "learning_rate": 5.480405706257739e-06, + "loss": 0.7615, + "step": 17109 + }, + { + "epoch": 0.9417139083053553, + "grad_norm": 0.7439923286437988, + "learning_rate": 5.47997424326712e-06, + "loss": 0.7569, + "step": 17110 + }, + { + "epoch": 0.941768947107711, + "grad_norm": 0.6936306953430176, + "learning_rate": 5.479542776669143e-06, + "loss": 0.713, + "step": 17111 + }, + { + "epoch": 0.9418239859100666, + "grad_norm": 1.0158600807189941, + "learning_rate": 5.479111306467051e-06, + "loss": 0.7874, + "step": 17112 + }, + { + "epoch": 0.9418790247124222, + "grad_norm": 0.7467468976974487, + "learning_rate": 5.478679832664087e-06, + "loss": 0.5757, + "step": 17113 + }, + { + "epoch": 0.9419340635147779, + "grad_norm": 0.6923582553863525, + "learning_rate": 5.478248355263492e-06, + "loss": 0.6983, + "step": 17114 + }, + { + "epoch": 0.9419891023171336, + "grad_norm": 0.6621877551078796, + "learning_rate": 5.4778168742685115e-06, + "loss": 0.7642, + "step": 17115 + }, + { + "epoch": 0.9420441411194892, + "grad_norm": 0.7422150373458862, + "learning_rate": 5.477385389682388e-06, + "loss": 0.8372, + "step": 17116 + }, + { + "epoch": 0.9420991799218449, + "grad_norm": 0.6306797862052917, + "learning_rate": 5.476953901508363e-06, + "loss": 0.698, + "step": 17117 + }, + { + "epoch": 0.9421542187242006, + "grad_norm": 0.6767764091491699, + "learning_rate": 5.476522409749679e-06, + "loss": 0.7251, + "step": 17118 + }, + { + "epoch": 0.9422092575265563, + "grad_norm": 0.86384117603302, + "learning_rate": 5.476090914409581e-06, + "loss": 0.7531, + "step": 17119 + }, + { + "epoch": 0.9422642963289118, + "grad_norm": 0.6221064329147339, + "learning_rate": 5.475659415491311e-06, + "loss": 0.735, + "step": 17120 + }, + { + "epoch": 0.9423193351312675, + "grad_norm": 0.6867205500602722, + "learning_rate": 5.475227912998113e-06, + "loss": 0.8036, + "step": 17121 + }, + { + "epoch": 0.9423743739336232, + "grad_norm": 0.6117410659790039, + "learning_rate": 5.474796406933229e-06, + "loss": 0.7011, + "step": 17122 + }, + { + "epoch": 0.9424294127359789, + "grad_norm": 0.7704444527626038, + "learning_rate": 5.474364897299902e-06, + "loss": 0.7836, + "step": 17123 + }, + { + "epoch": 0.9424844515383345, + "grad_norm": 0.7088763117790222, + "learning_rate": 5.473933384101376e-06, + "loss": 0.7613, + "step": 17124 + }, + { + "epoch": 0.9425394903406902, + "grad_norm": 0.7116881608963013, + "learning_rate": 5.473501867340892e-06, + "loss": 0.5805, + "step": 17125 + }, + { + "epoch": 0.9425945291430459, + "grad_norm": 0.7216660380363464, + "learning_rate": 5.473070347021695e-06, + "loss": 0.8896, + "step": 17126 + }, + { + "epoch": 0.9426495679454016, + "grad_norm": 0.6709206700325012, + "learning_rate": 5.4726388231470295e-06, + "loss": 0.7507, + "step": 17127 + }, + { + "epoch": 0.9427046067477571, + "grad_norm": 0.6911896467208862, + "learning_rate": 5.472207295720135e-06, + "loss": 0.6555, + "step": 17128 + }, + { + "epoch": 0.9427596455501128, + "grad_norm": 0.7614631652832031, + "learning_rate": 5.4717757647442595e-06, + "loss": 0.7568, + "step": 17129 + }, + { + "epoch": 0.9428146843524685, + "grad_norm": 0.6653177738189697, + "learning_rate": 5.47134423022264e-06, + "loss": 0.7626, + "step": 17130 + }, + { + "epoch": 0.9428697231548242, + "grad_norm": 0.7247204780578613, + "learning_rate": 5.470912692158527e-06, + "loss": 0.7038, + "step": 17131 + }, + { + "epoch": 0.9429247619571798, + "grad_norm": 0.7067399621009827, + "learning_rate": 5.470481150555158e-06, + "loss": 0.8066, + "step": 17132 + }, + { + "epoch": 0.9429798007595355, + "grad_norm": 0.6550384163856506, + "learning_rate": 5.47004960541578e-06, + "loss": 0.6447, + "step": 17133 + }, + { + "epoch": 0.9430348395618912, + "grad_norm": 0.6620935797691345, + "learning_rate": 5.469618056743635e-06, + "loss": 0.7816, + "step": 17134 + }, + { + "epoch": 0.9430898783642468, + "grad_norm": 0.6929067373275757, + "learning_rate": 5.469186504541966e-06, + "loss": 0.743, + "step": 17135 + }, + { + "epoch": 0.9431449171666024, + "grad_norm": 0.7585862278938293, + "learning_rate": 5.468754948814017e-06, + "loss": 0.7276, + "step": 17136 + }, + { + "epoch": 0.9431999559689581, + "grad_norm": 0.7555626034736633, + "learning_rate": 5.46832338956303e-06, + "loss": 0.7551, + "step": 17137 + }, + { + "epoch": 0.9432549947713138, + "grad_norm": 0.6756212115287781, + "learning_rate": 5.467891826792251e-06, + "loss": 0.6925, + "step": 17138 + }, + { + "epoch": 0.9433100335736694, + "grad_norm": 0.7939002513885498, + "learning_rate": 5.467460260504922e-06, + "loss": 0.7021, + "step": 17139 + }, + { + "epoch": 0.9433650723760251, + "grad_norm": 0.6600267887115479, + "learning_rate": 5.467028690704287e-06, + "loss": 0.7502, + "step": 17140 + }, + { + "epoch": 0.9434201111783808, + "grad_norm": 0.6323744058609009, + "learning_rate": 5.46659711739359e-06, + "loss": 0.6778, + "step": 17141 + }, + { + "epoch": 0.9434751499807364, + "grad_norm": 0.7724072933197021, + "learning_rate": 5.4661655405760716e-06, + "loss": 0.7722, + "step": 17142 + }, + { + "epoch": 0.943530188783092, + "grad_norm": 0.784764289855957, + "learning_rate": 5.465733960254979e-06, + "loss": 0.6578, + "step": 17143 + }, + { + "epoch": 0.9435852275854477, + "grad_norm": 0.6818285584449768, + "learning_rate": 5.465302376433556e-06, + "loss": 0.7913, + "step": 17144 + }, + { + "epoch": 0.9436402663878034, + "grad_norm": 0.6029468774795532, + "learning_rate": 5.464870789115043e-06, + "loss": 0.5936, + "step": 17145 + }, + { + "epoch": 0.9436953051901591, + "grad_norm": 0.7085309028625488, + "learning_rate": 5.464439198302687e-06, + "loss": 0.8497, + "step": 17146 + }, + { + "epoch": 0.9437503439925147, + "grad_norm": 0.6141021251678467, + "learning_rate": 5.46400760399973e-06, + "loss": 0.6056, + "step": 17147 + }, + { + "epoch": 0.9438053827948704, + "grad_norm": 0.6860122680664062, + "learning_rate": 5.4635760062094154e-06, + "loss": 0.8339, + "step": 17148 + }, + { + "epoch": 0.943860421597226, + "grad_norm": 0.8153133392333984, + "learning_rate": 5.463144404934989e-06, + "loss": 0.7799, + "step": 17149 + }, + { + "epoch": 0.9439154603995817, + "grad_norm": 0.7422170639038086, + "learning_rate": 5.462712800179691e-06, + "loss": 0.7581, + "step": 17150 + }, + { + "epoch": 0.9439704992019373, + "grad_norm": 0.6634333729743958, + "learning_rate": 5.462281191946769e-06, + "loss": 0.7292, + "step": 17151 + }, + { + "epoch": 0.944025538004293, + "grad_norm": 0.6939749717712402, + "learning_rate": 5.4618495802394645e-06, + "loss": 0.7568, + "step": 17152 + }, + { + "epoch": 0.9440805768066487, + "grad_norm": 0.6564925909042358, + "learning_rate": 5.461417965061023e-06, + "loss": 0.6955, + "step": 17153 + }, + { + "epoch": 0.9441356156090044, + "grad_norm": 0.7528273463249207, + "learning_rate": 5.460986346414686e-06, + "loss": 0.8456, + "step": 17154 + }, + { + "epoch": 0.94419065441136, + "grad_norm": 0.6330366730690002, + "learning_rate": 5.4605547243037015e-06, + "loss": 0.6921, + "step": 17155 + }, + { + "epoch": 0.9442456932137157, + "grad_norm": 0.8115606307983398, + "learning_rate": 5.460123098731309e-06, + "loss": 0.8116, + "step": 17156 + }, + { + "epoch": 0.9443007320160713, + "grad_norm": 0.6605154275894165, + "learning_rate": 5.459691469700756e-06, + "loss": 0.7205, + "step": 17157 + }, + { + "epoch": 0.944355770818427, + "grad_norm": 0.6622677445411682, + "learning_rate": 5.459259837215284e-06, + "loss": 0.7324, + "step": 17158 + }, + { + "epoch": 0.9444108096207826, + "grad_norm": 0.6821356415748596, + "learning_rate": 5.458828201278139e-06, + "loss": 0.7984, + "step": 17159 + }, + { + "epoch": 0.9444658484231383, + "grad_norm": 0.6941211223602295, + "learning_rate": 5.4583965618925625e-06, + "loss": 0.7305, + "step": 17160 + }, + { + "epoch": 0.944520887225494, + "grad_norm": 0.7761551141738892, + "learning_rate": 5.4579649190618025e-06, + "loss": 0.7898, + "step": 17161 + }, + { + "epoch": 0.9445759260278497, + "grad_norm": 0.6097804307937622, + "learning_rate": 5.457533272789097e-06, + "loss": 0.7151, + "step": 17162 + }, + { + "epoch": 0.9446309648302053, + "grad_norm": 0.6361019611358643, + "learning_rate": 5.457101623077696e-06, + "loss": 0.7037, + "step": 17163 + }, + { + "epoch": 0.9446860036325609, + "grad_norm": 0.638758659362793, + "learning_rate": 5.456669969930842e-06, + "loss": 0.6714, + "step": 17164 + }, + { + "epoch": 0.9447410424349166, + "grad_norm": 0.6280048489570618, + "learning_rate": 5.4562383133517795e-06, + "loss": 0.6839, + "step": 17165 + }, + { + "epoch": 0.9447960812372723, + "grad_norm": 0.6959577798843384, + "learning_rate": 5.4558066533437495e-06, + "loss": 0.7493, + "step": 17166 + }, + { + "epoch": 0.9448511200396279, + "grad_norm": 0.7218848466873169, + "learning_rate": 5.455374989910001e-06, + "loss": 0.8123, + "step": 17167 + }, + { + "epoch": 0.9449061588419836, + "grad_norm": 0.6597707271575928, + "learning_rate": 5.454943323053774e-06, + "loss": 0.7575, + "step": 17168 + }, + { + "epoch": 0.9449611976443393, + "grad_norm": 0.6173719763755798, + "learning_rate": 5.454511652778316e-06, + "loss": 0.6715, + "step": 17169 + }, + { + "epoch": 0.945016236446695, + "grad_norm": 0.7458024024963379, + "learning_rate": 5.45407997908687e-06, + "loss": 0.7526, + "step": 17170 + }, + { + "epoch": 0.9450712752490505, + "grad_norm": 0.7513238787651062, + "learning_rate": 5.4536483019826806e-06, + "loss": 0.7001, + "step": 17171 + }, + { + "epoch": 0.9451263140514062, + "grad_norm": 0.6999634504318237, + "learning_rate": 5.453216621468992e-06, + "loss": 0.7665, + "step": 17172 + }, + { + "epoch": 0.9451813528537619, + "grad_norm": 0.6689110994338989, + "learning_rate": 5.452784937549048e-06, + "loss": 0.7758, + "step": 17173 + }, + { + "epoch": 0.9452363916561176, + "grad_norm": 0.61881422996521, + "learning_rate": 5.4523532502260935e-06, + "loss": 0.6624, + "step": 17174 + }, + { + "epoch": 0.9452914304584732, + "grad_norm": 0.748910665512085, + "learning_rate": 5.451921559503373e-06, + "loss": 0.8298, + "step": 17175 + }, + { + "epoch": 0.9453464692608289, + "grad_norm": 0.7247006893157959, + "learning_rate": 5.451489865384132e-06, + "loss": 0.7738, + "step": 17176 + }, + { + "epoch": 0.9454015080631846, + "grad_norm": 0.6910108923912048, + "learning_rate": 5.451058167871614e-06, + "loss": 0.6645, + "step": 17177 + }, + { + "epoch": 0.9454565468655403, + "grad_norm": 0.6405494213104248, + "learning_rate": 5.450626466969061e-06, + "loss": 0.779, + "step": 17178 + }, + { + "epoch": 0.9455115856678958, + "grad_norm": 0.6681149005889893, + "learning_rate": 5.450194762679722e-06, + "loss": 0.6453, + "step": 17179 + }, + { + "epoch": 0.9455666244702515, + "grad_norm": 0.7538934350013733, + "learning_rate": 5.44976305500684e-06, + "loss": 0.8169, + "step": 17180 + }, + { + "epoch": 0.9456216632726072, + "grad_norm": 0.7700850367546082, + "learning_rate": 5.4493313439536595e-06, + "loss": 0.7508, + "step": 17181 + }, + { + "epoch": 0.9456767020749628, + "grad_norm": 0.7019801735877991, + "learning_rate": 5.4488996295234256e-06, + "loss": 0.7687, + "step": 17182 + }, + { + "epoch": 0.9457317408773185, + "grad_norm": 0.6191794276237488, + "learning_rate": 5.448467911719381e-06, + "loss": 0.6303, + "step": 17183 + }, + { + "epoch": 0.9457867796796742, + "grad_norm": 0.738051176071167, + "learning_rate": 5.448036190544772e-06, + "loss": 0.7625, + "step": 17184 + }, + { + "epoch": 0.9458418184820299, + "grad_norm": 0.8286527991294861, + "learning_rate": 5.447604466002843e-06, + "loss": 0.7328, + "step": 17185 + }, + { + "epoch": 0.9458968572843854, + "grad_norm": 0.74991375207901, + "learning_rate": 5.447172738096837e-06, + "loss": 0.7674, + "step": 17186 + }, + { + "epoch": 0.9459518960867411, + "grad_norm": 1.20223069190979, + "learning_rate": 5.446741006830002e-06, + "loss": 0.8024, + "step": 17187 + }, + { + "epoch": 0.9460069348890968, + "grad_norm": 0.6366617679595947, + "learning_rate": 5.446309272205581e-06, + "loss": 0.702, + "step": 17188 + }, + { + "epoch": 0.9460619736914525, + "grad_norm": 0.8702724575996399, + "learning_rate": 5.445877534226819e-06, + "loss": 0.8237, + "step": 17189 + }, + { + "epoch": 0.9461170124938081, + "grad_norm": 0.6849148869514465, + "learning_rate": 5.445445792896959e-06, + "loss": 0.7883, + "step": 17190 + }, + { + "epoch": 0.9461720512961638, + "grad_norm": 0.7968994379043579, + "learning_rate": 5.445014048219251e-06, + "loss": 0.8019, + "step": 17191 + }, + { + "epoch": 0.9462270900985195, + "grad_norm": 0.6820282936096191, + "learning_rate": 5.444582300196934e-06, + "loss": 0.7058, + "step": 17192 + }, + { + "epoch": 0.9462821289008752, + "grad_norm": 0.8677741885185242, + "learning_rate": 5.444150548833257e-06, + "loss": 0.7753, + "step": 17193 + }, + { + "epoch": 0.9463371677032307, + "grad_norm": 0.6906048059463501, + "learning_rate": 5.443718794131463e-06, + "loss": 0.7275, + "step": 17194 + }, + { + "epoch": 0.9463922065055864, + "grad_norm": 0.6943230032920837, + "learning_rate": 5.4432870360947966e-06, + "loss": 0.7339, + "step": 17195 + }, + { + "epoch": 0.9464472453079421, + "grad_norm": 0.9276923537254333, + "learning_rate": 5.442855274726504e-06, + "loss": 0.8316, + "step": 17196 + }, + { + "epoch": 0.9465022841102978, + "grad_norm": 0.6322672963142395, + "learning_rate": 5.44242351002983e-06, + "loss": 0.7104, + "step": 17197 + }, + { + "epoch": 0.9465573229126534, + "grad_norm": 0.6659188270568848, + "learning_rate": 5.4419917420080185e-06, + "loss": 0.7575, + "step": 17198 + }, + { + "epoch": 0.9466123617150091, + "grad_norm": 0.6726362109184265, + "learning_rate": 5.441559970664315e-06, + "loss": 0.8168, + "step": 17199 + }, + { + "epoch": 0.9466674005173648, + "grad_norm": 0.6305935978889465, + "learning_rate": 5.441128196001966e-06, + "loss": 0.7447, + "step": 17200 + }, + { + "epoch": 0.9467224393197204, + "grad_norm": 0.6968098282814026, + "learning_rate": 5.4406964180242164e-06, + "loss": 0.7592, + "step": 17201 + }, + { + "epoch": 0.946777478122076, + "grad_norm": 0.8273503184318542, + "learning_rate": 5.440264636734308e-06, + "loss": 0.79, + "step": 17202 + }, + { + "epoch": 0.9468325169244317, + "grad_norm": 0.6939367055892944, + "learning_rate": 5.439832852135489e-06, + "loss": 0.7585, + "step": 17203 + }, + { + "epoch": 0.9468875557267874, + "grad_norm": 0.7276638746261597, + "learning_rate": 5.439401064231006e-06, + "loss": 0.8132, + "step": 17204 + }, + { + "epoch": 0.9469425945291431, + "grad_norm": 0.6231852769851685, + "learning_rate": 5.438969273024102e-06, + "loss": 0.7087, + "step": 17205 + }, + { + "epoch": 0.9469976333314987, + "grad_norm": 0.6458756923675537, + "learning_rate": 5.438537478518021e-06, + "loss": 0.6894, + "step": 17206 + }, + { + "epoch": 0.9470526721338544, + "grad_norm": 0.7376695871353149, + "learning_rate": 5.4381056807160104e-06, + "loss": 0.7008, + "step": 17207 + }, + { + "epoch": 0.94710771093621, + "grad_norm": 0.6636282205581665, + "learning_rate": 5.437673879621315e-06, + "loss": 0.7216, + "step": 17208 + }, + { + "epoch": 0.9471627497385657, + "grad_norm": 0.6254339814186096, + "learning_rate": 5.437242075237179e-06, + "loss": 0.7228, + "step": 17209 + }, + { + "epoch": 0.9472177885409213, + "grad_norm": 0.6461647748947144, + "learning_rate": 5.436810267566849e-06, + "loss": 0.6799, + "step": 17210 + }, + { + "epoch": 0.947272827343277, + "grad_norm": 0.6700971722602844, + "learning_rate": 5.4363784566135694e-06, + "loss": 0.8166, + "step": 17211 + }, + { + "epoch": 0.9473278661456327, + "grad_norm": 0.6764943599700928, + "learning_rate": 5.4359466423805875e-06, + "loss": 0.7565, + "step": 17212 + }, + { + "epoch": 0.9473829049479884, + "grad_norm": 0.6449782848358154, + "learning_rate": 5.4355148248711466e-06, + "loss": 0.6943, + "step": 17213 + }, + { + "epoch": 0.947437943750344, + "grad_norm": 0.7249940037727356, + "learning_rate": 5.435083004088492e-06, + "loss": 0.677, + "step": 17214 + }, + { + "epoch": 0.9474929825526996, + "grad_norm": 0.6520692706108093, + "learning_rate": 5.43465118003587e-06, + "loss": 0.8089, + "step": 17215 + }, + { + "epoch": 0.9475480213550553, + "grad_norm": 0.826518714427948, + "learning_rate": 5.4342193527165275e-06, + "loss": 0.6715, + "step": 17216 + }, + { + "epoch": 0.947603060157411, + "grad_norm": 0.6832894682884216, + "learning_rate": 5.433787522133709e-06, + "loss": 0.7615, + "step": 17217 + }, + { + "epoch": 0.9476580989597666, + "grad_norm": 0.6764441132545471, + "learning_rate": 5.433355688290658e-06, + "loss": 0.8082, + "step": 17218 + }, + { + "epoch": 0.9477131377621223, + "grad_norm": 0.8981256484985352, + "learning_rate": 5.4329238511906226e-06, + "loss": 0.7663, + "step": 17219 + }, + { + "epoch": 0.947768176564478, + "grad_norm": 0.6509552001953125, + "learning_rate": 5.432492010836847e-06, + "loss": 0.7375, + "step": 17220 + }, + { + "epoch": 0.9478232153668337, + "grad_norm": 0.7138388156890869, + "learning_rate": 5.4320601672325775e-06, + "loss": 0.8096, + "step": 17221 + }, + { + "epoch": 0.9478782541691892, + "grad_norm": 1.0833384990692139, + "learning_rate": 5.431628320381058e-06, + "loss": 0.7985, + "step": 17222 + }, + { + "epoch": 0.9479332929715449, + "grad_norm": 0.685681164264679, + "learning_rate": 5.431196470285538e-06, + "loss": 0.759, + "step": 17223 + }, + { + "epoch": 0.9479883317739006, + "grad_norm": 0.6390333771705627, + "learning_rate": 5.430764616949259e-06, + "loss": 0.7174, + "step": 17224 + }, + { + "epoch": 0.9480433705762562, + "grad_norm": 0.6927287578582764, + "learning_rate": 5.43033276037547e-06, + "loss": 0.7348, + "step": 17225 + }, + { + "epoch": 0.9480984093786119, + "grad_norm": 0.6661121845245361, + "learning_rate": 5.429900900567413e-06, + "loss": 0.7042, + "step": 17226 + }, + { + "epoch": 0.9481534481809676, + "grad_norm": 0.6327020525932312, + "learning_rate": 5.429469037528337e-06, + "loss": 0.6995, + "step": 17227 + }, + { + "epoch": 0.9482084869833233, + "grad_norm": 0.858727216720581, + "learning_rate": 5.429037171261488e-06, + "loss": 0.7893, + "step": 17228 + }, + { + "epoch": 0.9482635257856789, + "grad_norm": 0.6609689593315125, + "learning_rate": 5.428605301770109e-06, + "loss": 0.7258, + "step": 17229 + }, + { + "epoch": 0.9483185645880345, + "grad_norm": 0.811647355556488, + "learning_rate": 5.4281734290574475e-06, + "loss": 0.7341, + "step": 17230 + }, + { + "epoch": 0.9483736033903902, + "grad_norm": 0.5738475322723389, + "learning_rate": 5.427741553126748e-06, + "loss": 0.6227, + "step": 17231 + }, + { + "epoch": 0.9484286421927459, + "grad_norm": 0.6369803547859192, + "learning_rate": 5.42730967398126e-06, + "loss": 0.7188, + "step": 17232 + }, + { + "epoch": 0.9484836809951015, + "grad_norm": 0.6879351735115051, + "learning_rate": 5.426877791624225e-06, + "loss": 0.6031, + "step": 17233 + }, + { + "epoch": 0.9485387197974572, + "grad_norm": 0.872032880783081, + "learning_rate": 5.4264459060588905e-06, + "loss": 0.7003, + "step": 17234 + }, + { + "epoch": 0.9485937585998129, + "grad_norm": 0.6781120300292969, + "learning_rate": 5.426014017288504e-06, + "loss": 0.7876, + "step": 17235 + }, + { + "epoch": 0.9486487974021686, + "grad_norm": 1.026384711265564, + "learning_rate": 5.42558212531631e-06, + "loss": 0.709, + "step": 17236 + }, + { + "epoch": 0.9487038362045241, + "grad_norm": 0.6840795874595642, + "learning_rate": 5.4251502301455536e-06, + "loss": 0.7513, + "step": 17237 + }, + { + "epoch": 0.9487588750068798, + "grad_norm": 0.7462189793586731, + "learning_rate": 5.4247183317794825e-06, + "loss": 0.6966, + "step": 17238 + }, + { + "epoch": 0.9488139138092355, + "grad_norm": 0.7512027025222778, + "learning_rate": 5.424286430221342e-06, + "loss": 0.8355, + "step": 17239 + }, + { + "epoch": 0.9488689526115912, + "grad_norm": 0.8257582187652588, + "learning_rate": 5.423854525474379e-06, + "loss": 0.7876, + "step": 17240 + }, + { + "epoch": 0.9489239914139468, + "grad_norm": 0.6845347881317139, + "learning_rate": 5.423422617541838e-06, + "loss": 0.6217, + "step": 17241 + }, + { + "epoch": 0.9489790302163025, + "grad_norm": 0.7380620837211609, + "learning_rate": 5.422990706426967e-06, + "loss": 0.7257, + "step": 17242 + }, + { + "epoch": 0.9490340690186582, + "grad_norm": 0.703009307384491, + "learning_rate": 5.42255879213301e-06, + "loss": 0.7212, + "step": 17243 + }, + { + "epoch": 0.9490891078210139, + "grad_norm": 0.5682654976844788, + "learning_rate": 5.422126874663215e-06, + "loss": 0.6095, + "step": 17244 + }, + { + "epoch": 0.9491441466233694, + "grad_norm": 0.7798917293548584, + "learning_rate": 5.421694954020827e-06, + "loss": 0.8202, + "step": 17245 + }, + { + "epoch": 0.9491991854257251, + "grad_norm": 0.9012196063995361, + "learning_rate": 5.421263030209092e-06, + "loss": 0.7277, + "step": 17246 + }, + { + "epoch": 0.9492542242280808, + "grad_norm": 0.7454085350036621, + "learning_rate": 5.4208311032312575e-06, + "loss": 0.709, + "step": 17247 + }, + { + "epoch": 0.9493092630304365, + "grad_norm": 0.6721041798591614, + "learning_rate": 5.420399173090569e-06, + "loss": 0.7932, + "step": 17248 + }, + { + "epoch": 0.9493643018327921, + "grad_norm": 0.6498880386352539, + "learning_rate": 5.419967239790273e-06, + "loss": 0.6983, + "step": 17249 + }, + { + "epoch": 0.9494193406351478, + "grad_norm": 0.6301897764205933, + "learning_rate": 5.419535303333615e-06, + "loss": 0.6722, + "step": 17250 + }, + { + "epoch": 0.9494743794375035, + "grad_norm": 0.7664110660552979, + "learning_rate": 5.419103363723843e-06, + "loss": 0.7823, + "step": 17251 + }, + { + "epoch": 0.9495294182398591, + "grad_norm": 0.6919211149215698, + "learning_rate": 5.4186714209642026e-06, + "loss": 0.7807, + "step": 17252 + }, + { + "epoch": 0.9495844570422147, + "grad_norm": 0.6624665260314941, + "learning_rate": 5.418239475057938e-06, + "loss": 0.7101, + "step": 17253 + }, + { + "epoch": 0.9496394958445704, + "grad_norm": 0.6163734197616577, + "learning_rate": 5.417807526008299e-06, + "loss": 0.6766, + "step": 17254 + }, + { + "epoch": 0.9496945346469261, + "grad_norm": 0.6248084306716919, + "learning_rate": 5.41737557381853e-06, + "loss": 0.7704, + "step": 17255 + }, + { + "epoch": 0.9497495734492818, + "grad_norm": 0.6190524101257324, + "learning_rate": 5.416943618491878e-06, + "loss": 0.7616, + "step": 17256 + }, + { + "epoch": 0.9498046122516374, + "grad_norm": 0.7721768617630005, + "learning_rate": 5.416511660031589e-06, + "loss": 0.7005, + "step": 17257 + }, + { + "epoch": 0.9498596510539931, + "grad_norm": 0.6752305030822754, + "learning_rate": 5.4160796984409115e-06, + "loss": 0.7614, + "step": 17258 + }, + { + "epoch": 0.9499146898563487, + "grad_norm": 0.7479132413864136, + "learning_rate": 5.415647733723087e-06, + "loss": 0.73, + "step": 17259 + }, + { + "epoch": 0.9499697286587044, + "grad_norm": 0.6660789847373962, + "learning_rate": 5.415215765881367e-06, + "loss": 0.6687, + "step": 17260 + }, + { + "epoch": 0.95002476746106, + "grad_norm": 0.6333804130554199, + "learning_rate": 5.414783794918998e-06, + "loss": 0.6641, + "step": 17261 + }, + { + "epoch": 0.9500798062634157, + "grad_norm": 0.7207759618759155, + "learning_rate": 5.4143518208392245e-06, + "loss": 0.7361, + "step": 17262 + }, + { + "epoch": 0.9501348450657714, + "grad_norm": 0.9466403722763062, + "learning_rate": 5.413919843645291e-06, + "loss": 0.7898, + "step": 17263 + }, + { + "epoch": 0.9501898838681271, + "grad_norm": 0.7350644469261169, + "learning_rate": 5.41348786334045e-06, + "loss": 0.7646, + "step": 17264 + }, + { + "epoch": 0.9502449226704827, + "grad_norm": 0.7812394499778748, + "learning_rate": 5.413055879927943e-06, + "loss": 0.807, + "step": 17265 + }, + { + "epoch": 0.9502999614728384, + "grad_norm": 0.6254860758781433, + "learning_rate": 5.41262389341102e-06, + "loss": 0.6722, + "step": 17266 + }, + { + "epoch": 0.950355000275194, + "grad_norm": 0.6452184319496155, + "learning_rate": 5.412191903792925e-06, + "loss": 0.7019, + "step": 17267 + }, + { + "epoch": 0.9504100390775496, + "grad_norm": 0.6686402559280396, + "learning_rate": 5.411759911076907e-06, + "loss": 0.7344, + "step": 17268 + }, + { + "epoch": 0.9504650778799053, + "grad_norm": 0.6803160309791565, + "learning_rate": 5.411327915266211e-06, + "loss": 0.7334, + "step": 17269 + }, + { + "epoch": 0.950520116682261, + "grad_norm": 0.6542620658874512, + "learning_rate": 5.410895916364085e-06, + "loss": 0.7176, + "step": 17270 + }, + { + "epoch": 0.9505751554846167, + "grad_norm": 0.6867213845252991, + "learning_rate": 5.410463914373775e-06, + "loss": 0.6591, + "step": 17271 + }, + { + "epoch": 0.9506301942869723, + "grad_norm": 0.7018343210220337, + "learning_rate": 5.410031909298528e-06, + "loss": 0.8, + "step": 17272 + }, + { + "epoch": 0.950685233089328, + "grad_norm": 0.6558470129966736, + "learning_rate": 5.409599901141591e-06, + "loss": 0.728, + "step": 17273 + }, + { + "epoch": 0.9507402718916836, + "grad_norm": 0.7012515068054199, + "learning_rate": 5.409167889906211e-06, + "loss": 0.7611, + "step": 17274 + }, + { + "epoch": 0.9507953106940393, + "grad_norm": 0.661029040813446, + "learning_rate": 5.408735875595633e-06, + "loss": 0.7216, + "step": 17275 + }, + { + "epoch": 0.9508503494963949, + "grad_norm": 0.6965559124946594, + "learning_rate": 5.408303858213107e-06, + "loss": 0.813, + "step": 17276 + }, + { + "epoch": 0.9509053882987506, + "grad_norm": 0.8969947099685669, + "learning_rate": 5.407871837761879e-06, + "loss": 0.8326, + "step": 17277 + }, + { + "epoch": 0.9509604271011063, + "grad_norm": 0.6230607032775879, + "learning_rate": 5.407439814245196e-06, + "loss": 0.7211, + "step": 17278 + }, + { + "epoch": 0.951015465903462, + "grad_norm": 0.6495433449745178, + "learning_rate": 5.407007787666303e-06, + "loss": 0.6629, + "step": 17279 + }, + { + "epoch": 0.9510705047058176, + "grad_norm": 0.782513439655304, + "learning_rate": 5.406575758028448e-06, + "loss": 0.7655, + "step": 17280 + }, + { + "epoch": 0.9511255435081732, + "grad_norm": 0.7411374449729919, + "learning_rate": 5.40614372533488e-06, + "loss": 0.7893, + "step": 17281 + }, + { + "epoch": 0.9511805823105289, + "grad_norm": 0.7352038621902466, + "learning_rate": 5.405711689588844e-06, + "loss": 0.7761, + "step": 17282 + }, + { + "epoch": 0.9512356211128846, + "grad_norm": 0.6346562504768372, + "learning_rate": 5.405279650793587e-06, + "loss": 0.7511, + "step": 17283 + }, + { + "epoch": 0.9512906599152402, + "grad_norm": 0.6457065343856812, + "learning_rate": 5.404847608952356e-06, + "loss": 0.6964, + "step": 17284 + }, + { + "epoch": 0.9513456987175959, + "grad_norm": 0.7764767408370972, + "learning_rate": 5.4044155640684e-06, + "loss": 0.7979, + "step": 17285 + }, + { + "epoch": 0.9514007375199516, + "grad_norm": 0.7626819610595703, + "learning_rate": 5.403983516144965e-06, + "loss": 0.7799, + "step": 17286 + }, + { + "epoch": 0.9514557763223073, + "grad_norm": 0.8251420855522156, + "learning_rate": 5.403551465185296e-06, + "loss": 0.7631, + "step": 17287 + }, + { + "epoch": 0.9515108151246628, + "grad_norm": 0.7766129970550537, + "learning_rate": 5.403119411192644e-06, + "loss": 0.7879, + "step": 17288 + }, + { + "epoch": 0.9515658539270185, + "grad_norm": 0.7220279574394226, + "learning_rate": 5.402687354170255e-06, + "loss": 0.8042, + "step": 17289 + }, + { + "epoch": 0.9516208927293742, + "grad_norm": 0.6045637726783752, + "learning_rate": 5.4022552941213745e-06, + "loss": 0.6854, + "step": 17290 + }, + { + "epoch": 0.9516759315317299, + "grad_norm": 0.6809129118919373, + "learning_rate": 5.401823231049251e-06, + "loss": 0.6861, + "step": 17291 + }, + { + "epoch": 0.9517309703340855, + "grad_norm": 0.654763400554657, + "learning_rate": 5.401391164957133e-06, + "loss": 0.794, + "step": 17292 + }, + { + "epoch": 0.9517860091364412, + "grad_norm": 0.6468398571014404, + "learning_rate": 5.4009590958482645e-06, + "loss": 0.7368, + "step": 17293 + }, + { + "epoch": 0.9518410479387969, + "grad_norm": 0.657477617263794, + "learning_rate": 5.400527023725896e-06, + "loss": 0.6998, + "step": 17294 + }, + { + "epoch": 0.9518960867411526, + "grad_norm": 0.6647237539291382, + "learning_rate": 5.400094948593272e-06, + "loss": 0.7825, + "step": 17295 + }, + { + "epoch": 0.9519511255435081, + "grad_norm": 0.6871651411056519, + "learning_rate": 5.399662870453643e-06, + "loss": 0.7089, + "step": 17296 + }, + { + "epoch": 0.9520061643458638, + "grad_norm": 0.7344084978103638, + "learning_rate": 5.399230789310254e-06, + "loss": 0.7541, + "step": 17297 + }, + { + "epoch": 0.9520612031482195, + "grad_norm": 0.7122900485992432, + "learning_rate": 5.3987987051663545e-06, + "loss": 0.7669, + "step": 17298 + }, + { + "epoch": 0.9521162419505752, + "grad_norm": 0.628433108329773, + "learning_rate": 5.39836661802519e-06, + "loss": 0.7065, + "step": 17299 + }, + { + "epoch": 0.9521712807529308, + "grad_norm": 0.6206123232841492, + "learning_rate": 5.397934527890007e-06, + "loss": 0.7048, + "step": 17300 + }, + { + "epoch": 0.9522263195552865, + "grad_norm": 0.7941209673881531, + "learning_rate": 5.397502434764057e-06, + "loss": 0.7336, + "step": 17301 + }, + { + "epoch": 0.9522813583576422, + "grad_norm": 0.7397454380989075, + "learning_rate": 5.397070338650584e-06, + "loss": 0.7303, + "step": 17302 + }, + { + "epoch": 0.9523363971599978, + "grad_norm": 0.6538659930229187, + "learning_rate": 5.396638239552837e-06, + "loss": 0.7292, + "step": 17303 + }, + { + "epoch": 0.9523914359623534, + "grad_norm": 0.7217223048210144, + "learning_rate": 5.396206137474064e-06, + "loss": 0.7441, + "step": 17304 + }, + { + "epoch": 0.9524464747647091, + "grad_norm": 0.765256404876709, + "learning_rate": 5.395774032417511e-06, + "loss": 0.7527, + "step": 17305 + }, + { + "epoch": 0.9525015135670648, + "grad_norm": 0.6991282105445862, + "learning_rate": 5.395341924386426e-06, + "loss": 0.5727, + "step": 17306 + }, + { + "epoch": 0.9525565523694205, + "grad_norm": 0.7076554894447327, + "learning_rate": 5.394909813384056e-06, + "loss": 0.7784, + "step": 17307 + }, + { + "epoch": 0.9526115911717761, + "grad_norm": 0.7152629494667053, + "learning_rate": 5.3944776994136505e-06, + "loss": 0.722, + "step": 17308 + }, + { + "epoch": 0.9526666299741318, + "grad_norm": 0.7322426438331604, + "learning_rate": 5.394045582478457e-06, + "loss": 0.7983, + "step": 17309 + }, + { + "epoch": 0.9527216687764875, + "grad_norm": 0.7486420273780823, + "learning_rate": 5.393613462581721e-06, + "loss": 0.7514, + "step": 17310 + }, + { + "epoch": 0.952776707578843, + "grad_norm": 0.8110840320587158, + "learning_rate": 5.393181339726692e-06, + "loss": 0.6736, + "step": 17311 + }, + { + "epoch": 0.9528317463811987, + "grad_norm": 0.6394487023353577, + "learning_rate": 5.392749213916617e-06, + "loss": 0.7039, + "step": 17312 + }, + { + "epoch": 0.9528867851835544, + "grad_norm": 0.6446092128753662, + "learning_rate": 5.3923170851547445e-06, + "loss": 0.6985, + "step": 17313 + }, + { + "epoch": 0.9529418239859101, + "grad_norm": 0.6536337733268738, + "learning_rate": 5.391884953444323e-06, + "loss": 0.7503, + "step": 17314 + }, + { + "epoch": 0.9529968627882657, + "grad_norm": 0.7714772820472717, + "learning_rate": 5.3914528187885975e-06, + "loss": 0.7068, + "step": 17315 + }, + { + "epoch": 0.9530519015906214, + "grad_norm": 0.6758143901824951, + "learning_rate": 5.391020681190818e-06, + "loss": 0.689, + "step": 17316 + }, + { + "epoch": 0.953106940392977, + "grad_norm": 0.730246365070343, + "learning_rate": 5.390588540654232e-06, + "loss": 0.7242, + "step": 17317 + }, + { + "epoch": 0.9531619791953327, + "grad_norm": 0.7205057144165039, + "learning_rate": 5.390156397182087e-06, + "loss": 0.764, + "step": 17318 + }, + { + "epoch": 0.9532170179976883, + "grad_norm": 0.6799334287643433, + "learning_rate": 5.3897242507776295e-06, + "loss": 0.6869, + "step": 17319 + }, + { + "epoch": 0.953272056800044, + "grad_norm": 0.7646334171295166, + "learning_rate": 5.38929210144411e-06, + "loss": 0.7594, + "step": 17320 + }, + { + "epoch": 0.9533270956023997, + "grad_norm": 0.8450561761856079, + "learning_rate": 5.388859949184776e-06, + "loss": 0.841, + "step": 17321 + }, + { + "epoch": 0.9533821344047554, + "grad_norm": 0.6637916564941406, + "learning_rate": 5.388427794002874e-06, + "loss": 0.8445, + "step": 17322 + }, + { + "epoch": 0.953437173207111, + "grad_norm": 0.6471973657608032, + "learning_rate": 5.387995635901652e-06, + "loss": 0.7442, + "step": 17323 + }, + { + "epoch": 0.9534922120094667, + "grad_norm": 0.7502676844596863, + "learning_rate": 5.3875634748843585e-06, + "loss": 0.7112, + "step": 17324 + }, + { + "epoch": 0.9535472508118223, + "grad_norm": 0.6649467349052429, + "learning_rate": 5.3871313109542435e-06, + "loss": 0.7605, + "step": 17325 + }, + { + "epoch": 0.953602289614178, + "grad_norm": 0.6515121459960938, + "learning_rate": 5.3866991441145535e-06, + "loss": 0.7372, + "step": 17326 + }, + { + "epoch": 0.9536573284165336, + "grad_norm": 0.6831742525100708, + "learning_rate": 5.386266974368533e-06, + "loss": 0.762, + "step": 17327 + }, + { + "epoch": 0.9537123672188893, + "grad_norm": 0.7404910922050476, + "learning_rate": 5.3858348017194364e-06, + "loss": 0.7875, + "step": 17328 + }, + { + "epoch": 0.953767406021245, + "grad_norm": 0.7611355185508728, + "learning_rate": 5.385402626170507e-06, + "loss": 0.7679, + "step": 17329 + }, + { + "epoch": 0.9538224448236007, + "grad_norm": 0.8916192054748535, + "learning_rate": 5.384970447724995e-06, + "loss": 0.785, + "step": 17330 + }, + { + "epoch": 0.9538774836259563, + "grad_norm": 0.694205641746521, + "learning_rate": 5.3845382663861475e-06, + "loss": 0.7292, + "step": 17331 + }, + { + "epoch": 0.953932522428312, + "grad_norm": 0.7292189598083496, + "learning_rate": 5.384106082157214e-06, + "loss": 0.8187, + "step": 17332 + }, + { + "epoch": 0.9539875612306676, + "grad_norm": 0.7211722135543823, + "learning_rate": 5.3836738950414424e-06, + "loss": 0.6727, + "step": 17333 + }, + { + "epoch": 0.9540426000330233, + "grad_norm": 0.7116650342941284, + "learning_rate": 5.38324170504208e-06, + "loss": 0.774, + "step": 17334 + }, + { + "epoch": 0.9540976388353789, + "grad_norm": 0.6604741215705872, + "learning_rate": 5.3828095121623755e-06, + "loss": 0.6726, + "step": 17335 + }, + { + "epoch": 0.9541526776377346, + "grad_norm": 0.7605896592140198, + "learning_rate": 5.382377316405578e-06, + "loss": 0.7965, + "step": 17336 + }, + { + "epoch": 0.9542077164400903, + "grad_norm": 0.811404287815094, + "learning_rate": 5.3819451177749345e-06, + "loss": 0.6583, + "step": 17337 + }, + { + "epoch": 0.954262755242446, + "grad_norm": 0.6798723340034485, + "learning_rate": 5.381512916273695e-06, + "loss": 0.6482, + "step": 17338 + }, + { + "epoch": 0.9543177940448015, + "grad_norm": 0.8835861682891846, + "learning_rate": 5.381080711905105e-06, + "loss": 0.8003, + "step": 17339 + }, + { + "epoch": 0.9543728328471572, + "grad_norm": 0.6525741219520569, + "learning_rate": 5.380648504672413e-06, + "loss": 0.7198, + "step": 17340 + }, + { + "epoch": 0.9544278716495129, + "grad_norm": 0.738264262676239, + "learning_rate": 5.380216294578871e-06, + "loss": 0.7832, + "step": 17341 + }, + { + "epoch": 0.9544829104518686, + "grad_norm": 0.7376875877380371, + "learning_rate": 5.379784081627725e-06, + "loss": 0.8544, + "step": 17342 + }, + { + "epoch": 0.9545379492542242, + "grad_norm": 0.6113360524177551, + "learning_rate": 5.379351865822222e-06, + "loss": 0.7242, + "step": 17343 + }, + { + "epoch": 0.9545929880565799, + "grad_norm": 0.7817113399505615, + "learning_rate": 5.378919647165613e-06, + "loss": 0.7488, + "step": 17344 + }, + { + "epoch": 0.9546480268589356, + "grad_norm": 0.6994492411613464, + "learning_rate": 5.3784874256611455e-06, + "loss": 0.7752, + "step": 17345 + }, + { + "epoch": 0.9547030656612913, + "grad_norm": 0.7286459803581238, + "learning_rate": 5.378055201312067e-06, + "loss": 0.6519, + "step": 17346 + }, + { + "epoch": 0.9547581044636468, + "grad_norm": 0.6674445271492004, + "learning_rate": 5.377622974121627e-06, + "loss": 0.8182, + "step": 17347 + }, + { + "epoch": 0.9548131432660025, + "grad_norm": 0.6189805269241333, + "learning_rate": 5.377190744093074e-06, + "loss": 0.6936, + "step": 17348 + }, + { + "epoch": 0.9548681820683582, + "grad_norm": 0.6603719592094421, + "learning_rate": 5.376758511229656e-06, + "loss": 0.6622, + "step": 17349 + }, + { + "epoch": 0.9549232208707139, + "grad_norm": 0.71391761302948, + "learning_rate": 5.3763262755346245e-06, + "loss": 0.7519, + "step": 17350 + }, + { + "epoch": 0.9549782596730695, + "grad_norm": 0.6202741265296936, + "learning_rate": 5.375894037011222e-06, + "loss": 0.6252, + "step": 17351 + }, + { + "epoch": 0.9550332984754252, + "grad_norm": 0.6846199631690979, + "learning_rate": 5.375461795662702e-06, + "loss": 0.7232, + "step": 17352 + }, + { + "epoch": 0.9550883372777809, + "grad_norm": 0.6886401772499084, + "learning_rate": 5.375029551492312e-06, + "loss": 0.6587, + "step": 17353 + }, + { + "epoch": 0.9551433760801364, + "grad_norm": 0.6783032417297363, + "learning_rate": 5.3745973045033e-06, + "loss": 0.761, + "step": 17354 + }, + { + "epoch": 0.9551984148824921, + "grad_norm": 0.947640597820282, + "learning_rate": 5.374165054698913e-06, + "loss": 0.8755, + "step": 17355 + }, + { + "epoch": 0.9552534536848478, + "grad_norm": 0.8471985459327698, + "learning_rate": 5.3737328020824045e-06, + "loss": 0.8288, + "step": 17356 + }, + { + "epoch": 0.9553084924872035, + "grad_norm": 0.6486181616783142, + "learning_rate": 5.373300546657019e-06, + "loss": 0.7299, + "step": 17357 + }, + { + "epoch": 0.9553635312895591, + "grad_norm": 0.7250335216522217, + "learning_rate": 5.372868288426007e-06, + "loss": 0.7624, + "step": 17358 + }, + { + "epoch": 0.9554185700919148, + "grad_norm": 0.7069470286369324, + "learning_rate": 5.372436027392615e-06, + "loss": 0.8266, + "step": 17359 + }, + { + "epoch": 0.9554736088942705, + "grad_norm": 0.7567838430404663, + "learning_rate": 5.372003763560095e-06, + "loss": 0.7348, + "step": 17360 + }, + { + "epoch": 0.9555286476966262, + "grad_norm": 0.707920491695404, + "learning_rate": 5.371571496931694e-06, + "loss": 0.7466, + "step": 17361 + }, + { + "epoch": 0.9555836864989817, + "grad_norm": 0.7111881375312805, + "learning_rate": 5.371139227510662e-06, + "loss": 0.7965, + "step": 17362 + }, + { + "epoch": 0.9556387253013374, + "grad_norm": 0.5888195633888245, + "learning_rate": 5.370706955300245e-06, + "loss": 0.6534, + "step": 17363 + }, + { + "epoch": 0.9556937641036931, + "grad_norm": 0.6748310327529907, + "learning_rate": 5.370274680303697e-06, + "loss": 0.6856, + "step": 17364 + }, + { + "epoch": 0.9557488029060488, + "grad_norm": 0.6798354387283325, + "learning_rate": 5.369842402524261e-06, + "loss": 0.7213, + "step": 17365 + }, + { + "epoch": 0.9558038417084044, + "grad_norm": 0.7532860636711121, + "learning_rate": 5.369410121965189e-06, + "loss": 0.705, + "step": 17366 + }, + { + "epoch": 0.9558588805107601, + "grad_norm": 0.6346868872642517, + "learning_rate": 5.368977838629728e-06, + "loss": 0.6898, + "step": 17367 + }, + { + "epoch": 0.9559139193131158, + "grad_norm": 0.6765021681785583, + "learning_rate": 5.368545552521131e-06, + "loss": 0.7351, + "step": 17368 + }, + { + "epoch": 0.9559689581154714, + "grad_norm": 0.8616527915000916, + "learning_rate": 5.368113263642643e-06, + "loss": 0.8277, + "step": 17369 + }, + { + "epoch": 0.956023996917827, + "grad_norm": 0.6542890667915344, + "learning_rate": 5.367680971997514e-06, + "loss": 0.7375, + "step": 17370 + }, + { + "epoch": 0.9560790357201827, + "grad_norm": 0.6682239174842834, + "learning_rate": 5.367248677588993e-06, + "loss": 0.713, + "step": 17371 + }, + { + "epoch": 0.9561340745225384, + "grad_norm": 0.8059317469596863, + "learning_rate": 5.36681638042033e-06, + "loss": 0.7312, + "step": 17372 + }, + { + "epoch": 0.9561891133248941, + "grad_norm": 0.6048741936683655, + "learning_rate": 5.366384080494773e-06, + "loss": 0.6356, + "step": 17373 + }, + { + "epoch": 0.9562441521272497, + "grad_norm": 0.625925600528717, + "learning_rate": 5.365951777815572e-06, + "loss": 0.7489, + "step": 17374 + }, + { + "epoch": 0.9562991909296054, + "grad_norm": 0.6174320578575134, + "learning_rate": 5.365519472385975e-06, + "loss": 0.674, + "step": 17375 + }, + { + "epoch": 0.956354229731961, + "grad_norm": 0.6741634607315063, + "learning_rate": 5.365087164209231e-06, + "loss": 0.7194, + "step": 17376 + }, + { + "epoch": 0.9564092685343167, + "grad_norm": 0.7016188502311707, + "learning_rate": 5.36465485328859e-06, + "loss": 0.8047, + "step": 17377 + }, + { + "epoch": 0.9564643073366723, + "grad_norm": 0.6745482087135315, + "learning_rate": 5.364222539627301e-06, + "loss": 0.7661, + "step": 17378 + }, + { + "epoch": 0.956519346139028, + "grad_norm": 1.3635177612304688, + "learning_rate": 5.363790223228611e-06, + "loss": 0.7666, + "step": 17379 + }, + { + "epoch": 0.9565743849413837, + "grad_norm": 0.6775792837142944, + "learning_rate": 5.363357904095773e-06, + "loss": 0.7406, + "step": 17380 + }, + { + "epoch": 0.9566294237437394, + "grad_norm": 0.6810624003410339, + "learning_rate": 5.362925582232035e-06, + "loss": 0.6818, + "step": 17381 + }, + { + "epoch": 0.956684462546095, + "grad_norm": 0.6558675765991211, + "learning_rate": 5.362493257640644e-06, + "loss": 0.8256, + "step": 17382 + }, + { + "epoch": 0.9567395013484506, + "grad_norm": 0.8928041458129883, + "learning_rate": 5.362060930324849e-06, + "loss": 0.8213, + "step": 17383 + }, + { + "epoch": 0.9567945401508063, + "grad_norm": 0.705581784248352, + "learning_rate": 5.361628600287904e-06, + "loss": 0.7597, + "step": 17384 + }, + { + "epoch": 0.956849578953162, + "grad_norm": 0.701968789100647, + "learning_rate": 5.3611962675330545e-06, + "loss": 0.7499, + "step": 17385 + }, + { + "epoch": 0.9569046177555176, + "grad_norm": 0.671457052230835, + "learning_rate": 5.360763932063551e-06, + "loss": 0.6984, + "step": 17386 + }, + { + "epoch": 0.9569596565578733, + "grad_norm": 0.6033732891082764, + "learning_rate": 5.36033159388264e-06, + "loss": 0.6182, + "step": 17387 + }, + { + "epoch": 0.957014695360229, + "grad_norm": 0.6885262131690979, + "learning_rate": 5.359899252993576e-06, + "loss": 0.7577, + "step": 17388 + }, + { + "epoch": 0.9570697341625847, + "grad_norm": 0.6726160645484924, + "learning_rate": 5.359466909399604e-06, + "loss": 0.745, + "step": 17389 + }, + { + "epoch": 0.9571247729649403, + "grad_norm": 0.6888847947120667, + "learning_rate": 5.359034563103975e-06, + "loss": 0.8358, + "step": 17390 + }, + { + "epoch": 0.9571798117672959, + "grad_norm": 0.6707327961921692, + "learning_rate": 5.358602214109939e-06, + "loss": 0.7217, + "step": 17391 + }, + { + "epoch": 0.9572348505696516, + "grad_norm": 0.72385174036026, + "learning_rate": 5.358169862420745e-06, + "loss": 0.8016, + "step": 17392 + }, + { + "epoch": 0.9572898893720073, + "grad_norm": 0.6428029537200928, + "learning_rate": 5.357737508039641e-06, + "loss": 0.7241, + "step": 17393 + }, + { + "epoch": 0.9573449281743629, + "grad_norm": 0.6318633556365967, + "learning_rate": 5.357305150969878e-06, + "loss": 0.7515, + "step": 17394 + }, + { + "epoch": 0.9573999669767186, + "grad_norm": 0.7007773518562317, + "learning_rate": 5.3568727912147055e-06, + "loss": 0.7852, + "step": 17395 + }, + { + "epoch": 0.9574550057790743, + "grad_norm": 0.6996271014213562, + "learning_rate": 5.356440428777372e-06, + "loss": 0.8811, + "step": 17396 + }, + { + "epoch": 0.9575100445814299, + "grad_norm": 0.6028952598571777, + "learning_rate": 5.356008063661128e-06, + "loss": 0.7801, + "step": 17397 + }, + { + "epoch": 0.9575650833837855, + "grad_norm": 0.6873906850814819, + "learning_rate": 5.355575695869224e-06, + "loss": 0.6855, + "step": 17398 + }, + { + "epoch": 0.9576201221861412, + "grad_norm": 0.6360284686088562, + "learning_rate": 5.355143325404908e-06, + "loss": 0.7041, + "step": 17399 + }, + { + "epoch": 0.9576751609884969, + "grad_norm": 0.7114169597625732, + "learning_rate": 5.354710952271427e-06, + "loss": 0.7394, + "step": 17400 + }, + { + "epoch": 0.9577301997908525, + "grad_norm": 0.6143094301223755, + "learning_rate": 5.354278576472036e-06, + "loss": 0.7438, + "step": 17401 + }, + { + "epoch": 0.9577852385932082, + "grad_norm": 0.6581745147705078, + "learning_rate": 5.353846198009981e-06, + "loss": 0.7388, + "step": 17402 + }, + { + "epoch": 0.9578402773955639, + "grad_norm": 0.735107958316803, + "learning_rate": 5.353413816888513e-06, + "loss": 0.7853, + "step": 17403 + }, + { + "epoch": 0.9578953161979196, + "grad_norm": 0.7374551892280579, + "learning_rate": 5.352981433110881e-06, + "loss": 0.8393, + "step": 17404 + }, + { + "epoch": 0.9579503550002751, + "grad_norm": 0.7229042053222656, + "learning_rate": 5.352549046680335e-06, + "loss": 0.7052, + "step": 17405 + }, + { + "epoch": 0.9580053938026308, + "grad_norm": 0.6857532262802124, + "learning_rate": 5.352116657600126e-06, + "loss": 0.7484, + "step": 17406 + }, + { + "epoch": 0.9580604326049865, + "grad_norm": 0.6766275763511658, + "learning_rate": 5.351684265873501e-06, + "loss": 0.7739, + "step": 17407 + }, + { + "epoch": 0.9581154714073422, + "grad_norm": 0.7877463698387146, + "learning_rate": 5.351251871503712e-06, + "loss": 0.7681, + "step": 17408 + }, + { + "epoch": 0.9581705102096978, + "grad_norm": 0.8620951771736145, + "learning_rate": 5.350819474494009e-06, + "loss": 0.8691, + "step": 17409 + }, + { + "epoch": 0.9582255490120535, + "grad_norm": 0.6989312171936035, + "learning_rate": 5.350387074847639e-06, + "loss": 0.74, + "step": 17410 + }, + { + "epoch": 0.9582805878144092, + "grad_norm": 0.6328219771385193, + "learning_rate": 5.349954672567854e-06, + "loss": 0.6934, + "step": 17411 + }, + { + "epoch": 0.9583356266167649, + "grad_norm": 0.6802984476089478, + "learning_rate": 5.349522267657903e-06, + "loss": 0.6071, + "step": 17412 + }, + { + "epoch": 0.9583906654191204, + "grad_norm": 0.579208493232727, + "learning_rate": 5.349089860121037e-06, + "loss": 0.6382, + "step": 17413 + }, + { + "epoch": 0.9584457042214761, + "grad_norm": 0.712049663066864, + "learning_rate": 5.348657449960505e-06, + "loss": 0.6865, + "step": 17414 + }, + { + "epoch": 0.9585007430238318, + "grad_norm": 0.759441614151001, + "learning_rate": 5.3482250371795566e-06, + "loss": 0.7735, + "step": 17415 + }, + { + "epoch": 0.9585557818261875, + "grad_norm": 0.7504081130027771, + "learning_rate": 5.347792621781441e-06, + "loss": 0.7704, + "step": 17416 + }, + { + "epoch": 0.9586108206285431, + "grad_norm": 0.6927568912506104, + "learning_rate": 5.34736020376941e-06, + "loss": 0.7529, + "step": 17417 + }, + { + "epoch": 0.9586658594308988, + "grad_norm": 0.6337310075759888, + "learning_rate": 5.346927783146714e-06, + "loss": 0.717, + "step": 17418 + }, + { + "epoch": 0.9587208982332545, + "grad_norm": 0.6488094925880432, + "learning_rate": 5.3464953599166e-06, + "loss": 0.7084, + "step": 17419 + }, + { + "epoch": 0.9587759370356101, + "grad_norm": 0.6433645486831665, + "learning_rate": 5.34606293408232e-06, + "loss": 0.6765, + "step": 17420 + }, + { + "epoch": 0.9588309758379657, + "grad_norm": 0.6713243722915649, + "learning_rate": 5.345630505647125e-06, + "loss": 0.7357, + "step": 17421 + }, + { + "epoch": 0.9588860146403214, + "grad_norm": 0.6575330495834351, + "learning_rate": 5.345198074614262e-06, + "loss": 0.7312, + "step": 17422 + }, + { + "epoch": 0.9589410534426771, + "grad_norm": 0.642666757106781, + "learning_rate": 5.344765640986984e-06, + "loss": 0.7355, + "step": 17423 + }, + { + "epoch": 0.9589960922450328, + "grad_norm": 0.7026614546775818, + "learning_rate": 5.344333204768537e-06, + "loss": 0.7811, + "step": 17424 + }, + { + "epoch": 0.9590511310473884, + "grad_norm": 0.6988122463226318, + "learning_rate": 5.343900765962177e-06, + "loss": 0.6903, + "step": 17425 + }, + { + "epoch": 0.9591061698497441, + "grad_norm": 0.682156503200531, + "learning_rate": 5.34346832457115e-06, + "loss": 0.7504, + "step": 17426 + }, + { + "epoch": 0.9591612086520998, + "grad_norm": 0.6704639196395874, + "learning_rate": 5.343035880598706e-06, + "loss": 0.7432, + "step": 17427 + }, + { + "epoch": 0.9592162474544554, + "grad_norm": 0.6490553617477417, + "learning_rate": 5.342603434048096e-06, + "loss": 0.7347, + "step": 17428 + }, + { + "epoch": 0.959271286256811, + "grad_norm": 0.657006561756134, + "learning_rate": 5.342170984922571e-06, + "loss": 0.772, + "step": 17429 + }, + { + "epoch": 0.9593263250591667, + "grad_norm": 0.7124611735343933, + "learning_rate": 5.34173853322538e-06, + "loss": 0.7326, + "step": 17430 + }, + { + "epoch": 0.9593813638615224, + "grad_norm": 0.6327362060546875, + "learning_rate": 5.341306078959775e-06, + "loss": 0.7248, + "step": 17431 + }, + { + "epoch": 0.9594364026638781, + "grad_norm": 0.6160357594490051, + "learning_rate": 5.3408736221290025e-06, + "loss": 0.6655, + "step": 17432 + }, + { + "epoch": 0.9594914414662337, + "grad_norm": 0.729807436466217, + "learning_rate": 5.340441162736319e-06, + "loss": 0.6996, + "step": 17433 + }, + { + "epoch": 0.9595464802685894, + "grad_norm": 0.6158677339553833, + "learning_rate": 5.340008700784968e-06, + "loss": 0.6189, + "step": 17434 + }, + { + "epoch": 0.959601519070945, + "grad_norm": 0.6435769200325012, + "learning_rate": 5.339576236278203e-06, + "loss": 0.7464, + "step": 17435 + }, + { + "epoch": 0.9596565578733007, + "grad_norm": 0.7240917682647705, + "learning_rate": 5.339143769219272e-06, + "loss": 0.7844, + "step": 17436 + }, + { + "epoch": 0.9597115966756563, + "grad_norm": 0.6149145364761353, + "learning_rate": 5.33871129961143e-06, + "loss": 0.6943, + "step": 17437 + }, + { + "epoch": 0.959766635478012, + "grad_norm": 0.7776970267295837, + "learning_rate": 5.338278827457924e-06, + "loss": 0.7434, + "step": 17438 + }, + { + "epoch": 0.9598216742803677, + "grad_norm": 0.5908457636833191, + "learning_rate": 5.337846352762005e-06, + "loss": 0.6682, + "step": 17439 + }, + { + "epoch": 0.9598767130827233, + "grad_norm": 0.7880268692970276, + "learning_rate": 5.337413875526923e-06, + "loss": 0.7958, + "step": 17440 + }, + { + "epoch": 0.959931751885079, + "grad_norm": 0.8126769065856934, + "learning_rate": 5.336981395755928e-06, + "loss": 0.8199, + "step": 17441 + }, + { + "epoch": 0.9599867906874346, + "grad_norm": 0.7309434413909912, + "learning_rate": 5.336548913452271e-06, + "loss": 0.8179, + "step": 17442 + }, + { + "epoch": 0.9600418294897903, + "grad_norm": 0.7369129657745361, + "learning_rate": 5.3361164286192034e-06, + "loss": 0.7206, + "step": 17443 + }, + { + "epoch": 0.9600968682921459, + "grad_norm": 0.716317892074585, + "learning_rate": 5.3356839412599736e-06, + "loss": 0.6901, + "step": 17444 + }, + { + "epoch": 0.9601519070945016, + "grad_norm": 0.9500069618225098, + "learning_rate": 5.335251451377835e-06, + "loss": 0.7165, + "step": 17445 + }, + { + "epoch": 0.9602069458968573, + "grad_norm": 0.6800429821014404, + "learning_rate": 5.334818958976035e-06, + "loss": 0.684, + "step": 17446 + }, + { + "epoch": 0.960261984699213, + "grad_norm": 0.7164038419723511, + "learning_rate": 5.334386464057825e-06, + "loss": 0.7046, + "step": 17447 + }, + { + "epoch": 0.9603170235015686, + "grad_norm": 0.6348811984062195, + "learning_rate": 5.3339539666264564e-06, + "loss": 0.7413, + "step": 17448 + }, + { + "epoch": 0.9603720623039242, + "grad_norm": 0.6681154370307922, + "learning_rate": 5.333521466685179e-06, + "loss": 0.6845, + "step": 17449 + }, + { + "epoch": 0.9604271011062799, + "grad_norm": 0.6277575492858887, + "learning_rate": 5.333088964237243e-06, + "loss": 0.7471, + "step": 17450 + }, + { + "epoch": 0.9604821399086356, + "grad_norm": 0.6492090225219727, + "learning_rate": 5.332656459285901e-06, + "loss": 0.7499, + "step": 17451 + }, + { + "epoch": 0.9605371787109912, + "grad_norm": 0.6493937373161316, + "learning_rate": 5.3322239518344e-06, + "loss": 0.73, + "step": 17452 + }, + { + "epoch": 0.9605922175133469, + "grad_norm": 0.7362397313117981, + "learning_rate": 5.331791441885994e-06, + "loss": 0.7677, + "step": 17453 + }, + { + "epoch": 0.9606472563157026, + "grad_norm": 0.6337111592292786, + "learning_rate": 5.331358929443932e-06, + "loss": 0.7343, + "step": 17454 + }, + { + "epoch": 0.9607022951180583, + "grad_norm": 0.6027660965919495, + "learning_rate": 5.330926414511466e-06, + "loss": 0.6505, + "step": 17455 + }, + { + "epoch": 0.9607573339204138, + "grad_norm": 0.6437423229217529, + "learning_rate": 5.330493897091844e-06, + "loss": 0.6762, + "step": 17456 + }, + { + "epoch": 0.9608123727227695, + "grad_norm": 0.7299374938011169, + "learning_rate": 5.33006137718832e-06, + "loss": 0.7085, + "step": 17457 + }, + { + "epoch": 0.9608674115251252, + "grad_norm": 0.6796537637710571, + "learning_rate": 5.329628854804142e-06, + "loss": 0.763, + "step": 17458 + }, + { + "epoch": 0.9609224503274809, + "grad_norm": 0.6185942888259888, + "learning_rate": 5.329196329942563e-06, + "loss": 0.672, + "step": 17459 + }, + { + "epoch": 0.9609774891298365, + "grad_norm": 0.695350706577301, + "learning_rate": 5.32876380260683e-06, + "loss": 0.7561, + "step": 17460 + }, + { + "epoch": 0.9610325279321922, + "grad_norm": 0.6831729412078857, + "learning_rate": 5.328331272800198e-06, + "loss": 0.7777, + "step": 17461 + }, + { + "epoch": 0.9610875667345479, + "grad_norm": 0.6294922828674316, + "learning_rate": 5.327898740525916e-06, + "loss": 0.6117, + "step": 17462 + }, + { + "epoch": 0.9611426055369036, + "grad_norm": 0.8314191102981567, + "learning_rate": 5.327466205787235e-06, + "loss": 0.7627, + "step": 17463 + }, + { + "epoch": 0.9611976443392591, + "grad_norm": 0.7389507293701172, + "learning_rate": 5.327033668587404e-06, + "loss": 0.727, + "step": 17464 + }, + { + "epoch": 0.9612526831416148, + "grad_norm": 0.7043427228927612, + "learning_rate": 5.326601128929677e-06, + "loss": 0.7905, + "step": 17465 + }, + { + "epoch": 0.9613077219439705, + "grad_norm": 0.7443289756774902, + "learning_rate": 5.326168586817303e-06, + "loss": 0.7561, + "step": 17466 + }, + { + "epoch": 0.9613627607463262, + "grad_norm": 0.687877357006073, + "learning_rate": 5.3257360422535345e-06, + "loss": 0.7341, + "step": 17467 + }, + { + "epoch": 0.9614177995486818, + "grad_norm": 0.6753028035163879, + "learning_rate": 5.325303495241618e-06, + "loss": 0.7707, + "step": 17468 + }, + { + "epoch": 0.9614728383510375, + "grad_norm": 0.7854021787643433, + "learning_rate": 5.324870945784811e-06, + "loss": 0.7471, + "step": 17469 + }, + { + "epoch": 0.9615278771533932, + "grad_norm": 0.7157363891601562, + "learning_rate": 5.32443839388636e-06, + "loss": 0.8361, + "step": 17470 + }, + { + "epoch": 0.9615829159557489, + "grad_norm": 0.7041062712669373, + "learning_rate": 5.324005839549517e-06, + "loss": 0.7341, + "step": 17471 + }, + { + "epoch": 0.9616379547581044, + "grad_norm": 1.242568016052246, + "learning_rate": 5.32357328277753e-06, + "loss": 0.6966, + "step": 17472 + }, + { + "epoch": 0.9616929935604601, + "grad_norm": 0.773711085319519, + "learning_rate": 5.323140723573655e-06, + "loss": 0.8137, + "step": 17473 + }, + { + "epoch": 0.9617480323628158, + "grad_norm": 0.8130469918251038, + "learning_rate": 5.322708161941141e-06, + "loss": 0.8386, + "step": 17474 + }, + { + "epoch": 0.9618030711651715, + "grad_norm": 0.6662439107894897, + "learning_rate": 5.322275597883239e-06, + "loss": 0.758, + "step": 17475 + }, + { + "epoch": 0.9618581099675271, + "grad_norm": 0.6854259371757507, + "learning_rate": 5.321843031403197e-06, + "loss": 0.7073, + "step": 17476 + }, + { + "epoch": 0.9619131487698828, + "grad_norm": 0.7293241024017334, + "learning_rate": 5.321410462504273e-06, + "loss": 0.7156, + "step": 17477 + }, + { + "epoch": 0.9619681875722385, + "grad_norm": 0.9480264782905579, + "learning_rate": 5.320977891189712e-06, + "loss": 0.7912, + "step": 17478 + }, + { + "epoch": 0.9620232263745941, + "grad_norm": 0.7539064884185791, + "learning_rate": 5.320545317462767e-06, + "loss": 0.7198, + "step": 17479 + }, + { + "epoch": 0.9620782651769497, + "grad_norm": 0.6641668081283569, + "learning_rate": 5.320112741326691e-06, + "loss": 0.6635, + "step": 17480 + }, + { + "epoch": 0.9621333039793054, + "grad_norm": 0.6983955502510071, + "learning_rate": 5.319680162784732e-06, + "loss": 0.7335, + "step": 17481 + }, + { + "epoch": 0.9621883427816611, + "grad_norm": 0.5957023501396179, + "learning_rate": 5.319247581840143e-06, + "loss": 0.6946, + "step": 17482 + }, + { + "epoch": 0.9622433815840167, + "grad_norm": 0.7082545757293701, + "learning_rate": 5.3188149984961735e-06, + "loss": 0.665, + "step": 17483 + }, + { + "epoch": 0.9622984203863724, + "grad_norm": 0.62541264295578, + "learning_rate": 5.318382412756076e-06, + "loss": 0.707, + "step": 17484 + }, + { + "epoch": 0.962353459188728, + "grad_norm": 0.7225793600082397, + "learning_rate": 5.317949824623102e-06, + "loss": 0.7426, + "step": 17485 + }, + { + "epoch": 0.9624084979910837, + "grad_norm": 0.6111204624176025, + "learning_rate": 5.317517234100502e-06, + "loss": 0.6987, + "step": 17486 + }, + { + "epoch": 0.9624635367934393, + "grad_norm": 0.6709753274917603, + "learning_rate": 5.317084641191528e-06, + "loss": 0.7365, + "step": 17487 + }, + { + "epoch": 0.962518575595795, + "grad_norm": 0.7895059585571289, + "learning_rate": 5.3166520458994286e-06, + "loss": 0.8158, + "step": 17488 + }, + { + "epoch": 0.9625736143981507, + "grad_norm": 1.045240879058838, + "learning_rate": 5.31621944822746e-06, + "loss": 0.6528, + "step": 17489 + }, + { + "epoch": 0.9626286532005064, + "grad_norm": 0.7611956000328064, + "learning_rate": 5.3157868481788685e-06, + "loss": 0.8025, + "step": 17490 + }, + { + "epoch": 0.962683692002862, + "grad_norm": 0.6797771453857422, + "learning_rate": 5.315354245756909e-06, + "loss": 0.8072, + "step": 17491 + }, + { + "epoch": 0.9627387308052177, + "grad_norm": 1.1472023725509644, + "learning_rate": 5.314921640964832e-06, + "loss": 0.7167, + "step": 17492 + }, + { + "epoch": 0.9627937696075733, + "grad_norm": 0.5967952013015747, + "learning_rate": 5.3144890338058875e-06, + "loss": 0.6456, + "step": 17493 + }, + { + "epoch": 0.962848808409929, + "grad_norm": 0.731985867023468, + "learning_rate": 5.314056424283327e-06, + "loss": 0.7599, + "step": 17494 + }, + { + "epoch": 0.9629038472122846, + "grad_norm": 0.6571676135063171, + "learning_rate": 5.313623812400403e-06, + "loss": 0.7211, + "step": 17495 + }, + { + "epoch": 0.9629588860146403, + "grad_norm": 0.6877505779266357, + "learning_rate": 5.313191198160366e-06, + "loss": 0.7073, + "step": 17496 + }, + { + "epoch": 0.963013924816996, + "grad_norm": 0.7390667796134949, + "learning_rate": 5.312758581566468e-06, + "loss": 0.7676, + "step": 17497 + }, + { + "epoch": 0.9630689636193517, + "grad_norm": 0.6747639179229736, + "learning_rate": 5.31232596262196e-06, + "loss": 0.7069, + "step": 17498 + }, + { + "epoch": 0.9631240024217073, + "grad_norm": 0.6707483530044556, + "learning_rate": 5.311893341330094e-06, + "loss": 0.7882, + "step": 17499 + }, + { + "epoch": 0.963179041224063, + "grad_norm": 0.678855836391449, + "learning_rate": 5.31146071769412e-06, + "loss": 0.8008, + "step": 17500 + }, + { + "epoch": 0.9632340800264186, + "grad_norm": 0.7658770084381104, + "learning_rate": 5.311028091717292e-06, + "loss": 0.7275, + "step": 17501 + }, + { + "epoch": 0.9632891188287743, + "grad_norm": 0.6684555411338806, + "learning_rate": 5.31059546340286e-06, + "loss": 0.6935, + "step": 17502 + }, + { + "epoch": 0.9633441576311299, + "grad_norm": 0.6478149890899658, + "learning_rate": 5.310162832754076e-06, + "loss": 0.743, + "step": 17503 + }, + { + "epoch": 0.9633991964334856, + "grad_norm": 0.7288548946380615, + "learning_rate": 5.30973019977419e-06, + "loss": 0.6683, + "step": 17504 + }, + { + "epoch": 0.9634542352358413, + "grad_norm": 0.7290934920310974, + "learning_rate": 5.309297564466455e-06, + "loss": 0.6866, + "step": 17505 + }, + { + "epoch": 0.963509274038197, + "grad_norm": 0.6582328081130981, + "learning_rate": 5.308864926834124e-06, + "loss": 0.7615, + "step": 17506 + }, + { + "epoch": 0.9635643128405526, + "grad_norm": 0.688706636428833, + "learning_rate": 5.308432286880446e-06, + "loss": 0.8026, + "step": 17507 + }, + { + "epoch": 0.9636193516429082, + "grad_norm": 0.6733599305152893, + "learning_rate": 5.307999644608673e-06, + "loss": 0.7274, + "step": 17508 + }, + { + "epoch": 0.9636743904452639, + "grad_norm": 0.6621059775352478, + "learning_rate": 5.307567000022055e-06, + "loss": 0.6909, + "step": 17509 + }, + { + "epoch": 0.9637294292476196, + "grad_norm": 0.8380454778671265, + "learning_rate": 5.30713435312385e-06, + "loss": 0.7591, + "step": 17510 + }, + { + "epoch": 0.9637844680499752, + "grad_norm": 0.7065680623054504, + "learning_rate": 5.306701703917303e-06, + "loss": 0.7793, + "step": 17511 + }, + { + "epoch": 0.9638395068523309, + "grad_norm": 0.7197430729866028, + "learning_rate": 5.306269052405667e-06, + "loss": 0.7114, + "step": 17512 + }, + { + "epoch": 0.9638945456546866, + "grad_norm": 0.7427436113357544, + "learning_rate": 5.3058363985921965e-06, + "loss": 0.6783, + "step": 17513 + }, + { + "epoch": 0.9639495844570423, + "grad_norm": 0.6908861994743347, + "learning_rate": 5.305403742480141e-06, + "loss": 0.7839, + "step": 17514 + }, + { + "epoch": 0.9640046232593978, + "grad_norm": 0.6575813293457031, + "learning_rate": 5.304971084072753e-06, + "loss": 0.7608, + "step": 17515 + }, + { + "epoch": 0.9640596620617535, + "grad_norm": 0.7817708253860474, + "learning_rate": 5.304538423373285e-06, + "loss": 0.6945, + "step": 17516 + }, + { + "epoch": 0.9641147008641092, + "grad_norm": 0.7376648187637329, + "learning_rate": 5.304105760384987e-06, + "loss": 0.7437, + "step": 17517 + }, + { + "epoch": 0.9641697396664649, + "grad_norm": 0.8665770292282104, + "learning_rate": 5.3036730951111105e-06, + "loss": 0.6309, + "step": 17518 + }, + { + "epoch": 0.9642247784688205, + "grad_norm": 0.7191612124443054, + "learning_rate": 5.303240427554909e-06, + "loss": 0.7783, + "step": 17519 + }, + { + "epoch": 0.9642798172711762, + "grad_norm": 0.6969373822212219, + "learning_rate": 5.302807757719633e-06, + "loss": 0.7916, + "step": 17520 + }, + { + "epoch": 0.9643348560735319, + "grad_norm": 0.7042273283004761, + "learning_rate": 5.302375085608535e-06, + "loss": 0.7401, + "step": 17521 + }, + { + "epoch": 0.9643898948758876, + "grad_norm": 0.6587864756584167, + "learning_rate": 5.3019424112248676e-06, + "loss": 0.6923, + "step": 17522 + }, + { + "epoch": 0.9644449336782431, + "grad_norm": 0.6853402256965637, + "learning_rate": 5.301509734571881e-06, + "loss": 0.7811, + "step": 17523 + }, + { + "epoch": 0.9644999724805988, + "grad_norm": 0.8109264969825745, + "learning_rate": 5.301077055652828e-06, + "loss": 0.7365, + "step": 17524 + }, + { + "epoch": 0.9645550112829545, + "grad_norm": 0.6986764073371887, + "learning_rate": 5.300644374470961e-06, + "loss": 0.7721, + "step": 17525 + }, + { + "epoch": 0.9646100500853101, + "grad_norm": 0.6346173286437988, + "learning_rate": 5.300211691029532e-06, + "loss": 0.6719, + "step": 17526 + }, + { + "epoch": 0.9646650888876658, + "grad_norm": 0.7523975372314453, + "learning_rate": 5.299779005331792e-06, + "loss": 0.718, + "step": 17527 + }, + { + "epoch": 0.9647201276900215, + "grad_norm": 0.7034041881561279, + "learning_rate": 5.2993463173809925e-06, + "loss": 0.7858, + "step": 17528 + }, + { + "epoch": 0.9647751664923772, + "grad_norm": 0.7195404767990112, + "learning_rate": 5.2989136271803864e-06, + "loss": 0.7105, + "step": 17529 + }, + { + "epoch": 0.9648302052947327, + "grad_norm": 0.6533499360084534, + "learning_rate": 5.298480934733226e-06, + "loss": 0.7639, + "step": 17530 + }, + { + "epoch": 0.9648852440970884, + "grad_norm": 0.7042137980461121, + "learning_rate": 5.298048240042762e-06, + "loss": 0.8254, + "step": 17531 + }, + { + "epoch": 0.9649402828994441, + "grad_norm": 0.7466374635696411, + "learning_rate": 5.297615543112249e-06, + "loss": 0.7034, + "step": 17532 + }, + { + "epoch": 0.9649953217017998, + "grad_norm": 0.7622665166854858, + "learning_rate": 5.297182843944934e-06, + "loss": 0.6669, + "step": 17533 + }, + { + "epoch": 0.9650503605041554, + "grad_norm": 0.6359787583351135, + "learning_rate": 5.296750142544075e-06, + "loss": 0.701, + "step": 17534 + }, + { + "epoch": 0.9651053993065111, + "grad_norm": 0.6798627376556396, + "learning_rate": 5.29631743891292e-06, + "loss": 0.7014, + "step": 17535 + }, + { + "epoch": 0.9651604381088668, + "grad_norm": 0.7485163807868958, + "learning_rate": 5.295884733054723e-06, + "loss": 0.758, + "step": 17536 + }, + { + "epoch": 0.9652154769112224, + "grad_norm": 0.6358698606491089, + "learning_rate": 5.295452024972735e-06, + "loss": 0.7043, + "step": 17537 + }, + { + "epoch": 0.965270515713578, + "grad_norm": 0.7724503874778748, + "learning_rate": 5.29501931467021e-06, + "loss": 0.8167, + "step": 17538 + }, + { + "epoch": 0.9653255545159337, + "grad_norm": 0.695549726486206, + "learning_rate": 5.294586602150398e-06, + "loss": 0.7455, + "step": 17539 + }, + { + "epoch": 0.9653805933182894, + "grad_norm": 0.6918785572052002, + "learning_rate": 5.294153887416552e-06, + "loss": 0.7833, + "step": 17540 + }, + { + "epoch": 0.9654356321206451, + "grad_norm": 0.6794410943984985, + "learning_rate": 5.293721170471924e-06, + "loss": 0.6935, + "step": 17541 + }, + { + "epoch": 0.9654906709230007, + "grad_norm": 0.8318895101547241, + "learning_rate": 5.293288451319767e-06, + "loss": 0.9019, + "step": 17542 + }, + { + "epoch": 0.9655457097253564, + "grad_norm": 0.6449755430221558, + "learning_rate": 5.292855729963332e-06, + "loss": 0.7133, + "step": 17543 + }, + { + "epoch": 0.965600748527712, + "grad_norm": 0.6092707514762878, + "learning_rate": 5.292423006405871e-06, + "loss": 0.654, + "step": 17544 + }, + { + "epoch": 0.9656557873300677, + "grad_norm": 0.6690982580184937, + "learning_rate": 5.291990280650637e-06, + "loss": 0.7909, + "step": 17545 + }, + { + "epoch": 0.9657108261324233, + "grad_norm": 0.7012231945991516, + "learning_rate": 5.291557552700883e-06, + "loss": 0.755, + "step": 17546 + }, + { + "epoch": 0.965765864934779, + "grad_norm": 0.6244442462921143, + "learning_rate": 5.29112482255986e-06, + "loss": 0.6907, + "step": 17547 + }, + { + "epoch": 0.9658209037371347, + "grad_norm": 0.6664257049560547, + "learning_rate": 5.2906920902308215e-06, + "loss": 0.8044, + "step": 17548 + }, + { + "epoch": 0.9658759425394904, + "grad_norm": 0.7767570614814758, + "learning_rate": 5.290259355717018e-06, + "loss": 0.8169, + "step": 17549 + }, + { + "epoch": 0.965930981341846, + "grad_norm": 0.8205220699310303, + "learning_rate": 5.2898266190217025e-06, + "loss": 0.8301, + "step": 17550 + }, + { + "epoch": 0.9659860201442017, + "grad_norm": 0.5859615206718445, + "learning_rate": 5.2893938801481295e-06, + "loss": 0.6929, + "step": 17551 + }, + { + "epoch": 0.9660410589465573, + "grad_norm": 0.6787375211715698, + "learning_rate": 5.288961139099549e-06, + "loss": 0.76, + "step": 17552 + }, + { + "epoch": 0.966096097748913, + "grad_norm": 0.6915340423583984, + "learning_rate": 5.288528395879214e-06, + "loss": 0.6894, + "step": 17553 + }, + { + "epoch": 0.9661511365512686, + "grad_norm": 0.7098847031593323, + "learning_rate": 5.2880956504903765e-06, + "loss": 0.6713, + "step": 17554 + }, + { + "epoch": 0.9662061753536243, + "grad_norm": 0.6361868977546692, + "learning_rate": 5.287662902936289e-06, + "loss": 0.6729, + "step": 17555 + }, + { + "epoch": 0.96626121415598, + "grad_norm": 0.6745426654815674, + "learning_rate": 5.287230153220205e-06, + "loss": 0.7441, + "step": 17556 + }, + { + "epoch": 0.9663162529583357, + "grad_norm": 0.6941108703613281, + "learning_rate": 5.286797401345374e-06, + "loss": 0.7468, + "step": 17557 + }, + { + "epoch": 0.9663712917606913, + "grad_norm": 0.6621807217597961, + "learning_rate": 5.286364647315052e-06, + "loss": 0.8005, + "step": 17558 + }, + { + "epoch": 0.9664263305630469, + "grad_norm": 0.5958573222160339, + "learning_rate": 5.28593189113249e-06, + "loss": 0.7685, + "step": 17559 + }, + { + "epoch": 0.9664813693654026, + "grad_norm": 0.6775822043418884, + "learning_rate": 5.285499132800941e-06, + "loss": 0.6828, + "step": 17560 + }, + { + "epoch": 0.9665364081677583, + "grad_norm": 0.7009457945823669, + "learning_rate": 5.285066372323655e-06, + "loss": 0.8319, + "step": 17561 + }, + { + "epoch": 0.9665914469701139, + "grad_norm": 0.6704908609390259, + "learning_rate": 5.284633609703887e-06, + "loss": 0.8294, + "step": 17562 + }, + { + "epoch": 0.9666464857724696, + "grad_norm": 0.6544950604438782, + "learning_rate": 5.284200844944891e-06, + "loss": 0.7558, + "step": 17563 + }, + { + "epoch": 0.9667015245748253, + "grad_norm": 0.6781743168830872, + "learning_rate": 5.283768078049915e-06, + "loss": 0.7093, + "step": 17564 + }, + { + "epoch": 0.966756563377181, + "grad_norm": 0.6010674238204956, + "learning_rate": 5.283335309022216e-06, + "loss": 0.6998, + "step": 17565 + }, + { + "epoch": 0.9668116021795365, + "grad_norm": 0.6528247594833374, + "learning_rate": 5.282902537865044e-06, + "loss": 0.7307, + "step": 17566 + }, + { + "epoch": 0.9668666409818922, + "grad_norm": 0.6252716779708862, + "learning_rate": 5.282469764581652e-06, + "loss": 0.6509, + "step": 17567 + }, + { + "epoch": 0.9669216797842479, + "grad_norm": 0.6801924705505371, + "learning_rate": 5.282036989175294e-06, + "loss": 0.7988, + "step": 17568 + }, + { + "epoch": 0.9669767185866035, + "grad_norm": 0.6112244129180908, + "learning_rate": 5.281604211649219e-06, + "loss": 0.6524, + "step": 17569 + }, + { + "epoch": 0.9670317573889592, + "grad_norm": 0.7375578284263611, + "learning_rate": 5.281171432006684e-06, + "loss": 0.7292, + "step": 17570 + }, + { + "epoch": 0.9670867961913149, + "grad_norm": 0.60760897397995, + "learning_rate": 5.28073865025094e-06, + "loss": 0.6621, + "step": 17571 + }, + { + "epoch": 0.9671418349936706, + "grad_norm": 0.6792896389961243, + "learning_rate": 5.280305866385239e-06, + "loss": 0.7818, + "step": 17572 + }, + { + "epoch": 0.9671968737960261, + "grad_norm": 0.7662883400917053, + "learning_rate": 5.279873080412833e-06, + "loss": 0.8246, + "step": 17573 + }, + { + "epoch": 0.9672519125983818, + "grad_norm": 0.7479282021522522, + "learning_rate": 5.279440292336977e-06, + "loss": 0.7974, + "step": 17574 + }, + { + "epoch": 0.9673069514007375, + "grad_norm": 0.8267967104911804, + "learning_rate": 5.279007502160922e-06, + "loss": 0.6963, + "step": 17575 + }, + { + "epoch": 0.9673619902030932, + "grad_norm": 0.7001194357872009, + "learning_rate": 5.278574709887923e-06, + "loss": 0.6533, + "step": 17576 + }, + { + "epoch": 0.9674170290054488, + "grad_norm": 0.6630476117134094, + "learning_rate": 5.27814191552123e-06, + "loss": 0.7644, + "step": 17577 + }, + { + "epoch": 0.9674720678078045, + "grad_norm": 0.6703379154205322, + "learning_rate": 5.277709119064096e-06, + "loss": 0.7285, + "step": 17578 + }, + { + "epoch": 0.9675271066101602, + "grad_norm": 0.7047588229179382, + "learning_rate": 5.277276320519775e-06, + "loss": 0.7993, + "step": 17579 + }, + { + "epoch": 0.9675821454125159, + "grad_norm": 0.7320923209190369, + "learning_rate": 5.27684351989152e-06, + "loss": 0.8014, + "step": 17580 + }, + { + "epoch": 0.9676371842148714, + "grad_norm": 0.6852779388427734, + "learning_rate": 5.276410717182582e-06, + "loss": 0.7805, + "step": 17581 + }, + { + "epoch": 0.9676922230172271, + "grad_norm": 0.7098143696784973, + "learning_rate": 5.275977912396216e-06, + "loss": 0.7196, + "step": 17582 + }, + { + "epoch": 0.9677472618195828, + "grad_norm": 0.6526379585266113, + "learning_rate": 5.275545105535674e-06, + "loss": 0.6948, + "step": 17583 + }, + { + "epoch": 0.9678023006219385, + "grad_norm": 0.6695026159286499, + "learning_rate": 5.275112296604209e-06, + "loss": 0.7167, + "step": 17584 + }, + { + "epoch": 0.9678573394242941, + "grad_norm": 0.825526773929596, + "learning_rate": 5.274679485605072e-06, + "loss": 0.6848, + "step": 17585 + }, + { + "epoch": 0.9679123782266498, + "grad_norm": 0.7333941459655762, + "learning_rate": 5.2742466725415185e-06, + "loss": 0.7888, + "step": 17586 + }, + { + "epoch": 0.9679674170290055, + "grad_norm": 0.6386739611625671, + "learning_rate": 5.2738138574168e-06, + "loss": 0.6829, + "step": 17587 + }, + { + "epoch": 0.9680224558313612, + "grad_norm": 0.7038518190383911, + "learning_rate": 5.273381040234172e-06, + "loss": 0.8408, + "step": 17588 + }, + { + "epoch": 0.9680774946337167, + "grad_norm": 0.6579585075378418, + "learning_rate": 5.272948220996883e-06, + "loss": 0.7605, + "step": 17589 + }, + { + "epoch": 0.9681325334360724, + "grad_norm": 0.614625096321106, + "learning_rate": 5.272515399708189e-06, + "loss": 0.6965, + "step": 17590 + }, + { + "epoch": 0.9681875722384281, + "grad_norm": 0.6300467848777771, + "learning_rate": 5.272082576371342e-06, + "loss": 0.7148, + "step": 17591 + }, + { + "epoch": 0.9682426110407838, + "grad_norm": 0.634800136089325, + "learning_rate": 5.271649750989596e-06, + "loss": 0.7119, + "step": 17592 + }, + { + "epoch": 0.9682976498431394, + "grad_norm": 0.7334730625152588, + "learning_rate": 5.271216923566201e-06, + "loss": 0.7616, + "step": 17593 + }, + { + "epoch": 0.9683526886454951, + "grad_norm": 0.649853527545929, + "learning_rate": 5.270784094104414e-06, + "loss": 0.718, + "step": 17594 + }, + { + "epoch": 0.9684077274478508, + "grad_norm": 0.7468090653419495, + "learning_rate": 5.270351262607486e-06, + "loss": 0.723, + "step": 17595 + }, + { + "epoch": 0.9684627662502064, + "grad_norm": 0.8167023658752441, + "learning_rate": 5.269918429078671e-06, + "loss": 0.7052, + "step": 17596 + }, + { + "epoch": 0.968517805052562, + "grad_norm": 0.6515017747879028, + "learning_rate": 5.26948559352122e-06, + "loss": 0.7679, + "step": 17597 + }, + { + "epoch": 0.9685728438549177, + "grad_norm": 0.6585719585418701, + "learning_rate": 5.269052755938388e-06, + "loss": 0.7396, + "step": 17598 + }, + { + "epoch": 0.9686278826572734, + "grad_norm": 0.6206550598144531, + "learning_rate": 5.268619916333427e-06, + "loss": 0.7786, + "step": 17599 + }, + { + "epoch": 0.9686829214596291, + "grad_norm": 0.6914154291152954, + "learning_rate": 5.268187074709591e-06, + "loss": 0.6792, + "step": 17600 + }, + { + "epoch": 0.9687379602619847, + "grad_norm": 0.619126558303833, + "learning_rate": 5.267754231070134e-06, + "loss": 0.7567, + "step": 17601 + }, + { + "epoch": 0.9687929990643404, + "grad_norm": 0.6083783507347107, + "learning_rate": 5.267321385418306e-06, + "loss": 0.6357, + "step": 17602 + }, + { + "epoch": 0.968848037866696, + "grad_norm": 0.6827127933502197, + "learning_rate": 5.266888537757363e-06, + "loss": 0.7282, + "step": 17603 + }, + { + "epoch": 0.9689030766690517, + "grad_norm": 0.7914609909057617, + "learning_rate": 5.266455688090557e-06, + "loss": 0.8448, + "step": 17604 + }, + { + "epoch": 0.9689581154714073, + "grad_norm": 0.6890830397605896, + "learning_rate": 5.26602283642114e-06, + "loss": 0.7391, + "step": 17605 + }, + { + "epoch": 0.969013154273763, + "grad_norm": 0.7047440409660339, + "learning_rate": 5.265589982752368e-06, + "loss": 0.6348, + "step": 17606 + }, + { + "epoch": 0.9690681930761187, + "grad_norm": 0.7744457125663757, + "learning_rate": 5.265157127087493e-06, + "loss": 0.7639, + "step": 17607 + }, + { + "epoch": 0.9691232318784744, + "grad_norm": 0.6565386056900024, + "learning_rate": 5.264724269429767e-06, + "loss": 0.7674, + "step": 17608 + }, + { + "epoch": 0.96917827068083, + "grad_norm": 0.7823582887649536, + "learning_rate": 5.264291409782445e-06, + "loss": 0.7053, + "step": 17609 + }, + { + "epoch": 0.9692333094831856, + "grad_norm": 0.6871463656425476, + "learning_rate": 5.263858548148779e-06, + "loss": 0.6983, + "step": 17610 + }, + { + "epoch": 0.9692883482855413, + "grad_norm": 0.6161954402923584, + "learning_rate": 5.263425684532023e-06, + "loss": 0.7442, + "step": 17611 + }, + { + "epoch": 0.9693433870878969, + "grad_norm": 0.6763638854026794, + "learning_rate": 5.26299281893543e-06, + "loss": 0.7545, + "step": 17612 + }, + { + "epoch": 0.9693984258902526, + "grad_norm": 0.6834864020347595, + "learning_rate": 5.2625599513622534e-06, + "loss": 0.6359, + "step": 17613 + }, + { + "epoch": 0.9694534646926083, + "grad_norm": 0.8389854431152344, + "learning_rate": 5.262127081815748e-06, + "loss": 0.7517, + "step": 17614 + }, + { + "epoch": 0.969508503494964, + "grad_norm": 0.6721356511116028, + "learning_rate": 5.261694210299164e-06, + "loss": 0.6466, + "step": 17615 + }, + { + "epoch": 0.9695635422973196, + "grad_norm": 0.6551144123077393, + "learning_rate": 5.261261336815757e-06, + "loss": 0.7571, + "step": 17616 + }, + { + "epoch": 0.9696185810996752, + "grad_norm": 0.6491858959197998, + "learning_rate": 5.260828461368779e-06, + "loss": 0.7274, + "step": 17617 + }, + { + "epoch": 0.9696736199020309, + "grad_norm": 0.6955786347389221, + "learning_rate": 5.260395583961484e-06, + "loss": 0.7589, + "step": 17618 + }, + { + "epoch": 0.9697286587043866, + "grad_norm": 0.6789944171905518, + "learning_rate": 5.259962704597127e-06, + "loss": 0.7439, + "step": 17619 + }, + { + "epoch": 0.9697836975067422, + "grad_norm": 0.6398529410362244, + "learning_rate": 5.259529823278958e-06, + "loss": 0.6743, + "step": 17620 + }, + { + "epoch": 0.9698387363090979, + "grad_norm": 0.7643429040908813, + "learning_rate": 5.2590969400102335e-06, + "loss": 0.7176, + "step": 17621 + }, + { + "epoch": 0.9698937751114536, + "grad_norm": 0.6158431768417358, + "learning_rate": 5.258664054794205e-06, + "loss": 0.6963, + "step": 17622 + }, + { + "epoch": 0.9699488139138093, + "grad_norm": 0.644513726234436, + "learning_rate": 5.258231167634127e-06, + "loss": 0.7126, + "step": 17623 + }, + { + "epoch": 0.9700038527161648, + "grad_norm": 0.6312246918678284, + "learning_rate": 5.257798278533253e-06, + "loss": 0.7946, + "step": 17624 + }, + { + "epoch": 0.9700588915185205, + "grad_norm": 0.8247089385986328, + "learning_rate": 5.257365387494836e-06, + "loss": 0.8014, + "step": 17625 + }, + { + "epoch": 0.9701139303208762, + "grad_norm": 0.6787695288658142, + "learning_rate": 5.25693249452213e-06, + "loss": 0.775, + "step": 17626 + }, + { + "epoch": 0.9701689691232319, + "grad_norm": 0.675240695476532, + "learning_rate": 5.256499599618388e-06, + "loss": 0.7655, + "step": 17627 + }, + { + "epoch": 0.9702240079255875, + "grad_norm": 0.604271411895752, + "learning_rate": 5.256066702786864e-06, + "loss": 0.7278, + "step": 17628 + }, + { + "epoch": 0.9702790467279432, + "grad_norm": 0.7096530795097351, + "learning_rate": 5.2556338040308095e-06, + "loss": 0.8267, + "step": 17629 + }, + { + "epoch": 0.9703340855302989, + "grad_norm": 0.7375852465629578, + "learning_rate": 5.255200903353481e-06, + "loss": 0.7844, + "step": 17630 + }, + { + "epoch": 0.9703891243326546, + "grad_norm": 0.7275679707527161, + "learning_rate": 5.25476800075813e-06, + "loss": 0.7814, + "step": 17631 + }, + { + "epoch": 0.9704441631350101, + "grad_norm": 0.655462384223938, + "learning_rate": 5.254335096248012e-06, + "loss": 0.7401, + "step": 17632 + }, + { + "epoch": 0.9704992019373658, + "grad_norm": 0.6601149439811707, + "learning_rate": 5.253902189826379e-06, + "loss": 0.6787, + "step": 17633 + }, + { + "epoch": 0.9705542407397215, + "grad_norm": 0.7337445020675659, + "learning_rate": 5.253469281496485e-06, + "loss": 0.744, + "step": 17634 + }, + { + "epoch": 0.9706092795420772, + "grad_norm": 0.7127711772918701, + "learning_rate": 5.253036371261584e-06, + "loss": 0.6862, + "step": 17635 + }, + { + "epoch": 0.9706643183444328, + "grad_norm": 0.6720691919326782, + "learning_rate": 5.25260345912493e-06, + "loss": 0.6828, + "step": 17636 + }, + { + "epoch": 0.9707193571467885, + "grad_norm": 0.707231879234314, + "learning_rate": 5.252170545089775e-06, + "loss": 0.7162, + "step": 17637 + }, + { + "epoch": 0.9707743959491442, + "grad_norm": 0.561342179775238, + "learning_rate": 5.251737629159374e-06, + "loss": 0.558, + "step": 17638 + }, + { + "epoch": 0.9708294347514999, + "grad_norm": 0.6514197587966919, + "learning_rate": 5.251304711336981e-06, + "loss": 0.699, + "step": 17639 + }, + { + "epoch": 0.9708844735538554, + "grad_norm": 0.5802120566368103, + "learning_rate": 5.250871791625849e-06, + "loss": 0.5976, + "step": 17640 + }, + { + "epoch": 0.9709395123562111, + "grad_norm": 0.659600019454956, + "learning_rate": 5.25043887002923e-06, + "loss": 0.6831, + "step": 17641 + }, + { + "epoch": 0.9709945511585668, + "grad_norm": 0.7798045873641968, + "learning_rate": 5.250005946550381e-06, + "loss": 0.7274, + "step": 17642 + }, + { + "epoch": 0.9710495899609225, + "grad_norm": 0.6244889497756958, + "learning_rate": 5.249573021192553e-06, + "loss": 0.6973, + "step": 17643 + }, + { + "epoch": 0.9711046287632781, + "grad_norm": 0.7149257659912109, + "learning_rate": 5.249140093959003e-06, + "loss": 0.7197, + "step": 17644 + }, + { + "epoch": 0.9711596675656338, + "grad_norm": 0.7044658660888672, + "learning_rate": 5.248707164852981e-06, + "loss": 0.7254, + "step": 17645 + }, + { + "epoch": 0.9712147063679895, + "grad_norm": 0.8093852400779724, + "learning_rate": 5.248274233877742e-06, + "loss": 0.7622, + "step": 17646 + }, + { + "epoch": 0.9712697451703451, + "grad_norm": 0.6021914482116699, + "learning_rate": 5.2478413010365425e-06, + "loss": 0.6356, + "step": 17647 + }, + { + "epoch": 0.9713247839727007, + "grad_norm": 0.7819526791572571, + "learning_rate": 5.247408366332633e-06, + "loss": 0.6843, + "step": 17648 + }, + { + "epoch": 0.9713798227750564, + "grad_norm": 0.7436261773109436, + "learning_rate": 5.246975429769269e-06, + "loss": 0.7383, + "step": 17649 + }, + { + "epoch": 0.9714348615774121, + "grad_norm": 0.6402625441551208, + "learning_rate": 5.246542491349703e-06, + "loss": 0.7297, + "step": 17650 + }, + { + "epoch": 0.9714899003797678, + "grad_norm": 0.6183507442474365, + "learning_rate": 5.246109551077191e-06, + "loss": 0.6971, + "step": 17651 + }, + { + "epoch": 0.9715449391821234, + "grad_norm": 0.693608820438385, + "learning_rate": 5.245676608954985e-06, + "loss": 0.7626, + "step": 17652 + }, + { + "epoch": 0.9715999779844791, + "grad_norm": 0.6693928837776184, + "learning_rate": 5.245243664986337e-06, + "loss": 0.7379, + "step": 17653 + }, + { + "epoch": 0.9716550167868347, + "grad_norm": 0.6615895628929138, + "learning_rate": 5.2448107191745055e-06, + "loss": 0.6611, + "step": 17654 + }, + { + "epoch": 0.9717100555891903, + "grad_norm": 0.6616193056106567, + "learning_rate": 5.244377771522741e-06, + "loss": 0.8005, + "step": 17655 + }, + { + "epoch": 0.971765094391546, + "grad_norm": 0.7025266289710999, + "learning_rate": 5.243944822034301e-06, + "loss": 0.756, + "step": 17656 + }, + { + "epoch": 0.9718201331939017, + "grad_norm": 0.6651642322540283, + "learning_rate": 5.243511870712434e-06, + "loss": 0.7113, + "step": 17657 + }, + { + "epoch": 0.9718751719962574, + "grad_norm": 0.6264922618865967, + "learning_rate": 5.243078917560398e-06, + "loss": 0.7355, + "step": 17658 + }, + { + "epoch": 0.971930210798613, + "grad_norm": 0.6707214117050171, + "learning_rate": 5.2426459625814475e-06, + "loss": 0.7703, + "step": 17659 + }, + { + "epoch": 0.9719852496009687, + "grad_norm": 0.7076506614685059, + "learning_rate": 5.242213005778833e-06, + "loss": 0.7704, + "step": 17660 + }, + { + "epoch": 0.9720402884033243, + "grad_norm": 0.7259695529937744, + "learning_rate": 5.241780047155811e-06, + "loss": 0.6814, + "step": 17661 + }, + { + "epoch": 0.97209532720568, + "grad_norm": 0.6943030953407288, + "learning_rate": 5.241347086715636e-06, + "loss": 0.696, + "step": 17662 + }, + { + "epoch": 0.9721503660080356, + "grad_norm": 1.216528296470642, + "learning_rate": 5.240914124461559e-06, + "loss": 0.7686, + "step": 17663 + }, + { + "epoch": 0.9722054048103913, + "grad_norm": 0.6558219790458679, + "learning_rate": 5.240481160396838e-06, + "loss": 0.7056, + "step": 17664 + }, + { + "epoch": 0.972260443612747, + "grad_norm": 0.7323386669158936, + "learning_rate": 5.2400481945247224e-06, + "loss": 0.7147, + "step": 17665 + }, + { + "epoch": 0.9723154824151027, + "grad_norm": 0.7845921516418457, + "learning_rate": 5.23961522684847e-06, + "loss": 0.7745, + "step": 17666 + }, + { + "epoch": 0.9723705212174583, + "grad_norm": 0.6414793729782104, + "learning_rate": 5.239182257371335e-06, + "loss": 0.7127, + "step": 17667 + }, + { + "epoch": 0.972425560019814, + "grad_norm": 0.7045019268989563, + "learning_rate": 5.238749286096568e-06, + "loss": 0.7783, + "step": 17668 + }, + { + "epoch": 0.9724805988221696, + "grad_norm": 0.6710691452026367, + "learning_rate": 5.2383163130274274e-06, + "loss": 0.8069, + "step": 17669 + }, + { + "epoch": 0.9725356376245253, + "grad_norm": 0.7799990773200989, + "learning_rate": 5.2378833381671626e-06, + "loss": 0.7133, + "step": 17670 + }, + { + "epoch": 0.9725906764268809, + "grad_norm": 0.6748518943786621, + "learning_rate": 5.237450361519032e-06, + "loss": 0.7588, + "step": 17671 + }, + { + "epoch": 0.9726457152292366, + "grad_norm": 0.6174660325050354, + "learning_rate": 5.237017383086288e-06, + "loss": 0.6363, + "step": 17672 + }, + { + "epoch": 0.9727007540315923, + "grad_norm": 0.7074185013771057, + "learning_rate": 5.236584402872185e-06, + "loss": 0.7307, + "step": 17673 + }, + { + "epoch": 0.972755792833948, + "grad_norm": 0.6636162996292114, + "learning_rate": 5.236151420879977e-06, + "loss": 0.7476, + "step": 17674 + }, + { + "epoch": 0.9728108316363036, + "grad_norm": 0.8502483367919922, + "learning_rate": 5.2357184371129175e-06, + "loss": 0.8123, + "step": 17675 + }, + { + "epoch": 0.9728658704386592, + "grad_norm": 0.7215339541435242, + "learning_rate": 5.235285451574262e-06, + "loss": 0.6557, + "step": 17676 + }, + { + "epoch": 0.9729209092410149, + "grad_norm": 0.6733965277671814, + "learning_rate": 5.2348524642672635e-06, + "loss": 0.7316, + "step": 17677 + }, + { + "epoch": 0.9729759480433706, + "grad_norm": 0.6186164617538452, + "learning_rate": 5.234419475195176e-06, + "loss": 0.741, + "step": 17678 + }, + { + "epoch": 0.9730309868457262, + "grad_norm": 0.6414602994918823, + "learning_rate": 5.233986484361255e-06, + "loss": 0.7266, + "step": 17679 + }, + { + "epoch": 0.9730860256480819, + "grad_norm": 0.8314804434776306, + "learning_rate": 5.2335534917687545e-06, + "loss": 0.7368, + "step": 17680 + }, + { + "epoch": 0.9731410644504376, + "grad_norm": 0.6001927256584167, + "learning_rate": 5.23312049742093e-06, + "loss": 0.6717, + "step": 17681 + }, + { + "epoch": 0.9731961032527933, + "grad_norm": 0.7511568069458008, + "learning_rate": 5.232687501321031e-06, + "loss": 0.8037, + "step": 17682 + }, + { + "epoch": 0.9732511420551488, + "grad_norm": 0.6779069900512695, + "learning_rate": 5.232254503472317e-06, + "loss": 0.7341, + "step": 17683 + }, + { + "epoch": 0.9733061808575045, + "grad_norm": 1.6375025510787964, + "learning_rate": 5.23182150387804e-06, + "loss": 1.0429, + "step": 17684 + }, + { + "epoch": 0.9733612196598602, + "grad_norm": 0.7395290732383728, + "learning_rate": 5.231388502541454e-06, + "loss": 0.7704, + "step": 17685 + }, + { + "epoch": 0.9734162584622159, + "grad_norm": 0.6683037281036377, + "learning_rate": 5.230955499465815e-06, + "loss": 0.755, + "step": 17686 + }, + { + "epoch": 0.9734712972645715, + "grad_norm": 1.0014123916625977, + "learning_rate": 5.230522494654377e-06, + "loss": 0.902, + "step": 17687 + }, + { + "epoch": 0.9735263360669272, + "grad_norm": 0.8084937334060669, + "learning_rate": 5.230089488110392e-06, + "loss": 0.7252, + "step": 17688 + }, + { + "epoch": 0.9735813748692829, + "grad_norm": 0.6769001483917236, + "learning_rate": 5.229656479837117e-06, + "loss": 0.7287, + "step": 17689 + }, + { + "epoch": 0.9736364136716386, + "grad_norm": 0.6600124835968018, + "learning_rate": 5.229223469837804e-06, + "loss": 0.7659, + "step": 17690 + }, + { + "epoch": 0.9736914524739941, + "grad_norm": 0.7093310952186584, + "learning_rate": 5.22879045811571e-06, + "loss": 0.83, + "step": 17691 + }, + { + "epoch": 0.9737464912763498, + "grad_norm": 0.6043664216995239, + "learning_rate": 5.228357444674088e-06, + "loss": 0.6321, + "step": 17692 + }, + { + "epoch": 0.9738015300787055, + "grad_norm": 0.7609572410583496, + "learning_rate": 5.227924429516192e-06, + "loss": 0.7391, + "step": 17693 + }, + { + "epoch": 0.9738565688810612, + "grad_norm": 0.6878551244735718, + "learning_rate": 5.227491412645277e-06, + "loss": 0.7947, + "step": 17694 + }, + { + "epoch": 0.9739116076834168, + "grad_norm": 0.6588627099990845, + "learning_rate": 5.227058394064598e-06, + "loss": 0.6916, + "step": 17695 + }, + { + "epoch": 0.9739666464857725, + "grad_norm": 0.6774247288703918, + "learning_rate": 5.22662537377741e-06, + "loss": 0.6332, + "step": 17696 + }, + { + "epoch": 0.9740216852881282, + "grad_norm": 0.9965986013412476, + "learning_rate": 5.226192351786965e-06, + "loss": 0.7915, + "step": 17697 + }, + { + "epoch": 0.9740767240904837, + "grad_norm": 0.6406721472740173, + "learning_rate": 5.225759328096519e-06, + "loss": 0.7314, + "step": 17698 + }, + { + "epoch": 0.9741317628928394, + "grad_norm": 0.8169941306114197, + "learning_rate": 5.225326302709326e-06, + "loss": 0.8155, + "step": 17699 + }, + { + "epoch": 0.9741868016951951, + "grad_norm": 0.6238623857498169, + "learning_rate": 5.224893275628642e-06, + "loss": 0.7004, + "step": 17700 + }, + { + "epoch": 0.9742418404975508, + "grad_norm": 0.7599614858627319, + "learning_rate": 5.224460246857719e-06, + "loss": 0.7761, + "step": 17701 + }, + { + "epoch": 0.9742968792999064, + "grad_norm": 0.5887364745140076, + "learning_rate": 5.224027216399812e-06, + "loss": 0.6816, + "step": 17702 + }, + { + "epoch": 0.9743519181022621, + "grad_norm": 0.8065565228462219, + "learning_rate": 5.2235941842581785e-06, + "loss": 0.8206, + "step": 17703 + }, + { + "epoch": 0.9744069569046178, + "grad_norm": 0.7422066926956177, + "learning_rate": 5.2231611504360705e-06, + "loss": 0.8321, + "step": 17704 + }, + { + "epoch": 0.9744619957069734, + "grad_norm": 0.6249799132347107, + "learning_rate": 5.222728114936742e-06, + "loss": 0.7404, + "step": 17705 + }, + { + "epoch": 0.974517034509329, + "grad_norm": 0.5743978023529053, + "learning_rate": 5.222295077763448e-06, + "loss": 0.663, + "step": 17706 + }, + { + "epoch": 0.9745720733116847, + "grad_norm": 0.6484057307243347, + "learning_rate": 5.221862038919446e-06, + "loss": 0.7183, + "step": 17707 + }, + { + "epoch": 0.9746271121140404, + "grad_norm": 0.7640897631645203, + "learning_rate": 5.221428998407988e-06, + "loss": 0.7929, + "step": 17708 + }, + { + "epoch": 0.9746821509163961, + "grad_norm": 0.9045720100402832, + "learning_rate": 5.220995956232327e-06, + "loss": 0.8641, + "step": 17709 + }, + { + "epoch": 0.9747371897187517, + "grad_norm": 0.6404321789741516, + "learning_rate": 5.2205629123957225e-06, + "loss": 0.7055, + "step": 17710 + }, + { + "epoch": 0.9747922285211074, + "grad_norm": 0.5746439099311829, + "learning_rate": 5.220129866901424e-06, + "loss": 0.6194, + "step": 17711 + }, + { + "epoch": 0.974847267323463, + "grad_norm": 0.7163496017456055, + "learning_rate": 5.2196968197526885e-06, + "loss": 0.7532, + "step": 17712 + }, + { + "epoch": 0.9749023061258187, + "grad_norm": 0.6287344098091125, + "learning_rate": 5.219263770952771e-06, + "loss": 0.6701, + "step": 17713 + }, + { + "epoch": 0.9749573449281743, + "grad_norm": 0.6174989938735962, + "learning_rate": 5.218830720504925e-06, + "loss": 0.7362, + "step": 17714 + }, + { + "epoch": 0.97501238373053, + "grad_norm": 0.6240310668945312, + "learning_rate": 5.218397668412407e-06, + "loss": 0.6776, + "step": 17715 + }, + { + "epoch": 0.9750674225328857, + "grad_norm": 0.699640154838562, + "learning_rate": 5.2179646146784704e-06, + "loss": 0.7044, + "step": 17716 + }, + { + "epoch": 0.9751224613352414, + "grad_norm": 0.9073681235313416, + "learning_rate": 5.2175315593063695e-06, + "loss": 0.7662, + "step": 17717 + }, + { + "epoch": 0.975177500137597, + "grad_norm": 0.7450598478317261, + "learning_rate": 5.2170985022993595e-06, + "loss": 0.7926, + "step": 17718 + }, + { + "epoch": 0.9752325389399527, + "grad_norm": 0.8336604833602905, + "learning_rate": 5.2166654436606955e-06, + "loss": 0.7843, + "step": 17719 + }, + { + "epoch": 0.9752875777423083, + "grad_norm": 0.7027361392974854, + "learning_rate": 5.216232383393633e-06, + "loss": 0.7175, + "step": 17720 + }, + { + "epoch": 0.975342616544664, + "grad_norm": 1.23917818069458, + "learning_rate": 5.215799321501424e-06, + "loss": 0.8413, + "step": 17721 + }, + { + "epoch": 0.9753976553470196, + "grad_norm": 0.6831318736076355, + "learning_rate": 5.215366257987326e-06, + "loss": 0.735, + "step": 17722 + }, + { + "epoch": 0.9754526941493753, + "grad_norm": 0.6283305883407593, + "learning_rate": 5.2149331928545935e-06, + "loss": 0.6441, + "step": 17723 + }, + { + "epoch": 0.975507732951731, + "grad_norm": 0.7063291072845459, + "learning_rate": 5.21450012610648e-06, + "loss": 0.6778, + "step": 17724 + }, + { + "epoch": 0.9755627717540867, + "grad_norm": 0.6691693067550659, + "learning_rate": 5.21406705774624e-06, + "loss": 0.7197, + "step": 17725 + }, + { + "epoch": 0.9756178105564423, + "grad_norm": 0.7046432495117188, + "learning_rate": 5.21363398777713e-06, + "loss": 0.7688, + "step": 17726 + }, + { + "epoch": 0.975672849358798, + "grad_norm": 0.836552619934082, + "learning_rate": 5.213200916202404e-06, + "loss": 0.6908, + "step": 17727 + }, + { + "epoch": 0.9757278881611536, + "grad_norm": 0.6683027744293213, + "learning_rate": 5.2127678430253185e-06, + "loss": 0.7254, + "step": 17728 + }, + { + "epoch": 0.9757829269635093, + "grad_norm": 0.7955693006515503, + "learning_rate": 5.212334768249125e-06, + "loss": 0.7707, + "step": 17729 + }, + { + "epoch": 0.9758379657658649, + "grad_norm": 0.7328525185585022, + "learning_rate": 5.21190169187708e-06, + "loss": 0.742, + "step": 17730 + }, + { + "epoch": 0.9758930045682206, + "grad_norm": 0.7649689316749573, + "learning_rate": 5.21146861391244e-06, + "loss": 0.8682, + "step": 17731 + }, + { + "epoch": 0.9759480433705763, + "grad_norm": 0.697281002998352, + "learning_rate": 5.2110355343584585e-06, + "loss": 0.7577, + "step": 17732 + }, + { + "epoch": 0.976003082172932, + "grad_norm": 0.736832320690155, + "learning_rate": 5.21060245321839e-06, + "loss": 0.6747, + "step": 17733 + }, + { + "epoch": 0.9760581209752875, + "grad_norm": 0.7473930716514587, + "learning_rate": 5.210169370495491e-06, + "loss": 0.781, + "step": 17734 + }, + { + "epoch": 0.9761131597776432, + "grad_norm": 0.6661664247512817, + "learning_rate": 5.209736286193013e-06, + "loss": 0.7436, + "step": 17735 + }, + { + "epoch": 0.9761681985799989, + "grad_norm": 0.6759940385818481, + "learning_rate": 5.209303200314215e-06, + "loss": 0.831, + "step": 17736 + }, + { + "epoch": 0.9762232373823546, + "grad_norm": 0.7469375133514404, + "learning_rate": 5.20887011286235e-06, + "loss": 0.7741, + "step": 17737 + }, + { + "epoch": 0.9762782761847102, + "grad_norm": 0.7340208888053894, + "learning_rate": 5.208437023840671e-06, + "loss": 0.8867, + "step": 17738 + }, + { + "epoch": 0.9763333149870659, + "grad_norm": 0.8390399217605591, + "learning_rate": 5.208003933252437e-06, + "loss": 0.7465, + "step": 17739 + }, + { + "epoch": 0.9763883537894216, + "grad_norm": 0.6884977221488953, + "learning_rate": 5.207570841100901e-06, + "loss": 0.8155, + "step": 17740 + }, + { + "epoch": 0.9764433925917771, + "grad_norm": 0.7563072443008423, + "learning_rate": 5.207137747389318e-06, + "loss": 0.6696, + "step": 17741 + }, + { + "epoch": 0.9764984313941328, + "grad_norm": 0.5912995934486389, + "learning_rate": 5.206704652120942e-06, + "loss": 0.6378, + "step": 17742 + }, + { + "epoch": 0.9765534701964885, + "grad_norm": 0.6421001553535461, + "learning_rate": 5.2062715552990304e-06, + "loss": 0.7501, + "step": 17743 + }, + { + "epoch": 0.9766085089988442, + "grad_norm": 0.7305091023445129, + "learning_rate": 5.205838456926837e-06, + "loss": 0.7331, + "step": 17744 + }, + { + "epoch": 0.9766635478011998, + "grad_norm": 0.7133939862251282, + "learning_rate": 5.2054053570076165e-06, + "loss": 0.7701, + "step": 17745 + }, + { + "epoch": 0.9767185866035555, + "grad_norm": 0.7652276754379272, + "learning_rate": 5.204972255544626e-06, + "loss": 0.72, + "step": 17746 + }, + { + "epoch": 0.9767736254059112, + "grad_norm": 0.6148221492767334, + "learning_rate": 5.204539152541117e-06, + "loss": 0.6939, + "step": 17747 + }, + { + "epoch": 0.9768286642082669, + "grad_norm": 0.6623834371566772, + "learning_rate": 5.204106048000347e-06, + "loss": 0.7687, + "step": 17748 + }, + { + "epoch": 0.9768837030106224, + "grad_norm": 0.68714439868927, + "learning_rate": 5.2036729419255705e-06, + "loss": 0.8574, + "step": 17749 + }, + { + "epoch": 0.9769387418129781, + "grad_norm": 0.7681630849838257, + "learning_rate": 5.203239834320042e-06, + "loss": 0.8091, + "step": 17750 + }, + { + "epoch": 0.9769937806153338, + "grad_norm": 0.7655820250511169, + "learning_rate": 5.202806725187018e-06, + "loss": 0.7951, + "step": 17751 + }, + { + "epoch": 0.9770488194176895, + "grad_norm": 0.6182354092597961, + "learning_rate": 5.202373614529754e-06, + "loss": 0.6923, + "step": 17752 + }, + { + "epoch": 0.9771038582200451, + "grad_norm": 0.6287481188774109, + "learning_rate": 5.2019405023515024e-06, + "loss": 0.7334, + "step": 17753 + }, + { + "epoch": 0.9771588970224008, + "grad_norm": 0.7055238485336304, + "learning_rate": 5.20150738865552e-06, + "loss": 0.8105, + "step": 17754 + }, + { + "epoch": 0.9772139358247565, + "grad_norm": 0.8190330862998962, + "learning_rate": 5.201074273445063e-06, + "loss": 0.8578, + "step": 17755 + }, + { + "epoch": 0.9772689746271122, + "grad_norm": 0.6595628261566162, + "learning_rate": 5.200641156723385e-06, + "loss": 0.6637, + "step": 17756 + }, + { + "epoch": 0.9773240134294677, + "grad_norm": 0.742751955986023, + "learning_rate": 5.200208038493743e-06, + "loss": 0.7245, + "step": 17757 + }, + { + "epoch": 0.9773790522318234, + "grad_norm": 0.6058782935142517, + "learning_rate": 5.199774918759389e-06, + "loss": 0.6248, + "step": 17758 + }, + { + "epoch": 0.9774340910341791, + "grad_norm": 0.9763246178627014, + "learning_rate": 5.199341797523582e-06, + "loss": 0.7447, + "step": 17759 + }, + { + "epoch": 0.9774891298365348, + "grad_norm": 0.6947417259216309, + "learning_rate": 5.198908674789575e-06, + "loss": 0.7376, + "step": 17760 + }, + { + "epoch": 0.9775441686388904, + "grad_norm": 0.7201378345489502, + "learning_rate": 5.198475550560623e-06, + "loss": 0.7177, + "step": 17761 + }, + { + "epoch": 0.9775992074412461, + "grad_norm": 0.6808243989944458, + "learning_rate": 5.198042424839981e-06, + "loss": 0.7674, + "step": 17762 + }, + { + "epoch": 0.9776542462436018, + "grad_norm": 0.7210538983345032, + "learning_rate": 5.197609297630907e-06, + "loss": 0.744, + "step": 17763 + }, + { + "epoch": 0.9777092850459574, + "grad_norm": 0.6032211780548096, + "learning_rate": 5.197176168936653e-06, + "loss": 0.7356, + "step": 17764 + }, + { + "epoch": 0.977764323848313, + "grad_norm": 0.6586793661117554, + "learning_rate": 5.1967430387604766e-06, + "loss": 0.8117, + "step": 17765 + }, + { + "epoch": 0.9778193626506687, + "grad_norm": 0.6356725692749023, + "learning_rate": 5.196309907105631e-06, + "loss": 0.6829, + "step": 17766 + }, + { + "epoch": 0.9778744014530244, + "grad_norm": 0.6270668506622314, + "learning_rate": 5.1958767739753745e-06, + "loss": 0.6983, + "step": 17767 + }, + { + "epoch": 0.9779294402553801, + "grad_norm": 0.7375897169113159, + "learning_rate": 5.19544363937296e-06, + "loss": 0.7163, + "step": 17768 + }, + { + "epoch": 0.9779844790577357, + "grad_norm": 0.8138049244880676, + "learning_rate": 5.195010503301643e-06, + "loss": 0.7936, + "step": 17769 + }, + { + "epoch": 0.9780395178600914, + "grad_norm": 0.7183161377906799, + "learning_rate": 5.1945773657646795e-06, + "loss": 0.7003, + "step": 17770 + }, + { + "epoch": 0.978094556662447, + "grad_norm": 0.6597039699554443, + "learning_rate": 5.194144226765325e-06, + "loss": 0.6303, + "step": 17771 + }, + { + "epoch": 0.9781495954648027, + "grad_norm": 0.6552296876907349, + "learning_rate": 5.193711086306834e-06, + "loss": 0.7564, + "step": 17772 + }, + { + "epoch": 0.9782046342671583, + "grad_norm": 0.7361578941345215, + "learning_rate": 5.193277944392463e-06, + "loss": 0.8041, + "step": 17773 + }, + { + "epoch": 0.978259673069514, + "grad_norm": 0.6351678967475891, + "learning_rate": 5.192844801025465e-06, + "loss": 0.6201, + "step": 17774 + }, + { + "epoch": 0.9783147118718697, + "grad_norm": 0.6355697512626648, + "learning_rate": 5.192411656209097e-06, + "loss": 0.7349, + "step": 17775 + }, + { + "epoch": 0.9783697506742254, + "grad_norm": 0.6302974820137024, + "learning_rate": 5.191978509946617e-06, + "loss": 0.6817, + "step": 17776 + }, + { + "epoch": 0.978424789476581, + "grad_norm": 0.6403247117996216, + "learning_rate": 5.191545362241276e-06, + "loss": 0.7492, + "step": 17777 + }, + { + "epoch": 0.9784798282789366, + "grad_norm": 0.6228166818618774, + "learning_rate": 5.191112213096331e-06, + "loss": 0.7064, + "step": 17778 + }, + { + "epoch": 0.9785348670812923, + "grad_norm": 0.6551523804664612, + "learning_rate": 5.190679062515039e-06, + "loss": 0.8182, + "step": 17779 + }, + { + "epoch": 0.978589905883648, + "grad_norm": 0.6780593991279602, + "learning_rate": 5.190245910500654e-06, + "loss": 0.6726, + "step": 17780 + }, + { + "epoch": 0.9786449446860036, + "grad_norm": 0.7101402282714844, + "learning_rate": 5.189812757056431e-06, + "loss": 0.7933, + "step": 17781 + }, + { + "epoch": 0.9786999834883593, + "grad_norm": 0.9489981532096863, + "learning_rate": 5.189379602185628e-06, + "loss": 0.7285, + "step": 17782 + }, + { + "epoch": 0.978755022290715, + "grad_norm": 0.7283034324645996, + "learning_rate": 5.188946445891497e-06, + "loss": 0.7448, + "step": 17783 + }, + { + "epoch": 0.9788100610930706, + "grad_norm": 0.7265629768371582, + "learning_rate": 5.188513288177296e-06, + "loss": 0.7444, + "step": 17784 + }, + { + "epoch": 0.9788650998954262, + "grad_norm": 0.7622523903846741, + "learning_rate": 5.188080129046279e-06, + "loss": 0.7651, + "step": 17785 + }, + { + "epoch": 0.9789201386977819, + "grad_norm": 0.6390808820724487, + "learning_rate": 5.1876469685017e-06, + "loss": 0.7189, + "step": 17786 + }, + { + "epoch": 0.9789751775001376, + "grad_norm": 0.6932275295257568, + "learning_rate": 5.1872138065468195e-06, + "loss": 0.7565, + "step": 17787 + }, + { + "epoch": 0.9790302163024932, + "grad_norm": 0.5994375348091125, + "learning_rate": 5.18678064318489e-06, + "loss": 0.6513, + "step": 17788 + }, + { + "epoch": 0.9790852551048489, + "grad_norm": 0.687844455242157, + "learning_rate": 5.1863474784191655e-06, + "loss": 0.7315, + "step": 17789 + }, + { + "epoch": 0.9791402939072046, + "grad_norm": 0.6444111466407776, + "learning_rate": 5.1859143122529046e-06, + "loss": 0.8145, + "step": 17790 + }, + { + "epoch": 0.9791953327095603, + "grad_norm": 0.6670806407928467, + "learning_rate": 5.18548114468936e-06, + "loss": 0.7711, + "step": 17791 + }, + { + "epoch": 0.9792503715119159, + "grad_norm": 0.7144771814346313, + "learning_rate": 5.18504797573179e-06, + "loss": 0.7265, + "step": 17792 + }, + { + "epoch": 0.9793054103142715, + "grad_norm": 0.7024721503257751, + "learning_rate": 5.18461480538345e-06, + "loss": 0.7601, + "step": 17793 + }, + { + "epoch": 0.9793604491166272, + "grad_norm": 0.7342267632484436, + "learning_rate": 5.184181633647592e-06, + "loss": 0.8719, + "step": 17794 + }, + { + "epoch": 0.9794154879189829, + "grad_norm": 0.6706385016441345, + "learning_rate": 5.183748460527477e-06, + "loss": 0.7708, + "step": 17795 + }, + { + "epoch": 0.9794705267213385, + "grad_norm": 0.6513368487358093, + "learning_rate": 5.183315286026356e-06, + "loss": 0.7379, + "step": 17796 + }, + { + "epoch": 0.9795255655236942, + "grad_norm": 0.698735773563385, + "learning_rate": 5.182882110147487e-06, + "loss": 0.8, + "step": 17797 + }, + { + "epoch": 0.9795806043260499, + "grad_norm": 0.6671596765518188, + "learning_rate": 5.182448932894123e-06, + "loss": 0.7867, + "step": 17798 + }, + { + "epoch": 0.9796356431284056, + "grad_norm": 0.6650397181510925, + "learning_rate": 5.182015754269524e-06, + "loss": 0.6933, + "step": 17799 + }, + { + "epoch": 0.9796906819307611, + "grad_norm": 0.6519697904586792, + "learning_rate": 5.181582574276942e-06, + "loss": 0.6558, + "step": 17800 + }, + { + "epoch": 0.9797457207331168, + "grad_norm": 0.7513225078582764, + "learning_rate": 5.181149392919635e-06, + "loss": 0.7471, + "step": 17801 + }, + { + "epoch": 0.9798007595354725, + "grad_norm": 0.8155935406684875, + "learning_rate": 5.1807162102008564e-06, + "loss": 0.7971, + "step": 17802 + }, + { + "epoch": 0.9798557983378282, + "grad_norm": 0.6526888012886047, + "learning_rate": 5.180283026123864e-06, + "loss": 0.7107, + "step": 17803 + }, + { + "epoch": 0.9799108371401838, + "grad_norm": 0.699776291847229, + "learning_rate": 5.179849840691913e-06, + "loss": 0.7673, + "step": 17804 + }, + { + "epoch": 0.9799658759425395, + "grad_norm": 0.6678556799888611, + "learning_rate": 5.179416653908259e-06, + "loss": 0.7455, + "step": 17805 + }, + { + "epoch": 0.9800209147448952, + "grad_norm": 0.7483236789703369, + "learning_rate": 5.178983465776156e-06, + "loss": 0.7547, + "step": 17806 + }, + { + "epoch": 0.9800759535472509, + "grad_norm": 0.6862025260925293, + "learning_rate": 5.178550276298862e-06, + "loss": 0.725, + "step": 17807 + }, + { + "epoch": 0.9801309923496064, + "grad_norm": 0.7763676643371582, + "learning_rate": 5.178117085479633e-06, + "loss": 0.8345, + "step": 17808 + }, + { + "epoch": 0.9801860311519621, + "grad_norm": 0.670272707939148, + "learning_rate": 5.177683893321722e-06, + "loss": 0.7093, + "step": 17809 + }, + { + "epoch": 0.9802410699543178, + "grad_norm": 0.651607871055603, + "learning_rate": 5.177250699828388e-06, + "loss": 0.6746, + "step": 17810 + }, + { + "epoch": 0.9802961087566735, + "grad_norm": 0.6883462071418762, + "learning_rate": 5.176817505002884e-06, + "loss": 0.7514, + "step": 17811 + }, + { + "epoch": 0.9803511475590291, + "grad_norm": 0.6640946269035339, + "learning_rate": 5.1763843088484665e-06, + "loss": 0.7558, + "step": 17812 + }, + { + "epoch": 0.9804061863613848, + "grad_norm": 0.7128181457519531, + "learning_rate": 5.175951111368393e-06, + "loss": 0.7759, + "step": 17813 + }, + { + "epoch": 0.9804612251637405, + "grad_norm": 0.6555806398391724, + "learning_rate": 5.1755179125659175e-06, + "loss": 0.7697, + "step": 17814 + }, + { + "epoch": 0.9805162639660961, + "grad_norm": 0.7027010917663574, + "learning_rate": 5.175084712444295e-06, + "loss": 0.7325, + "step": 17815 + }, + { + "epoch": 0.9805713027684517, + "grad_norm": 0.6129525303840637, + "learning_rate": 5.1746515110067845e-06, + "loss": 0.7128, + "step": 17816 + }, + { + "epoch": 0.9806263415708074, + "grad_norm": 0.719196081161499, + "learning_rate": 5.1742183082566386e-06, + "loss": 0.8094, + "step": 17817 + }, + { + "epoch": 0.9806813803731631, + "grad_norm": 0.6782718896865845, + "learning_rate": 5.173785104197115e-06, + "loss": 0.7794, + "step": 17818 + }, + { + "epoch": 0.9807364191755188, + "grad_norm": 0.6790881752967834, + "learning_rate": 5.1733518988314705e-06, + "loss": 0.7603, + "step": 17819 + }, + { + "epoch": 0.9807914579778744, + "grad_norm": 0.691223680973053, + "learning_rate": 5.1729186921629595e-06, + "loss": 0.8255, + "step": 17820 + }, + { + "epoch": 0.9808464967802301, + "grad_norm": 0.6151034235954285, + "learning_rate": 5.172485484194836e-06, + "loss": 0.7015, + "step": 17821 + }, + { + "epoch": 0.9809015355825857, + "grad_norm": 0.6329746246337891, + "learning_rate": 5.172052274930359e-06, + "loss": 0.6421, + "step": 17822 + }, + { + "epoch": 0.9809565743849414, + "grad_norm": 0.6794832348823547, + "learning_rate": 5.171619064372781e-06, + "loss": 0.786, + "step": 17823 + }, + { + "epoch": 0.981011613187297, + "grad_norm": 0.7153908014297485, + "learning_rate": 5.171185852525363e-06, + "loss": 0.639, + "step": 17824 + }, + { + "epoch": 0.9810666519896527, + "grad_norm": 0.6464501619338989, + "learning_rate": 5.170752639391357e-06, + "loss": 0.7714, + "step": 17825 + }, + { + "epoch": 0.9811216907920084, + "grad_norm": 0.8912076950073242, + "learning_rate": 5.17031942497402e-06, + "loss": 0.8305, + "step": 17826 + }, + { + "epoch": 0.981176729594364, + "grad_norm": 0.6318126320838928, + "learning_rate": 5.169886209276607e-06, + "loss": 0.704, + "step": 17827 + }, + { + "epoch": 0.9812317683967197, + "grad_norm": 0.6664106249809265, + "learning_rate": 5.1694529923023755e-06, + "loss": 0.7478, + "step": 17828 + }, + { + "epoch": 0.9812868071990754, + "grad_norm": 0.6642717123031616, + "learning_rate": 5.16901977405458e-06, + "loss": 0.6419, + "step": 17829 + }, + { + "epoch": 0.981341846001431, + "grad_norm": 0.6837498545646667, + "learning_rate": 5.168586554536478e-06, + "loss": 0.709, + "step": 17830 + }, + { + "epoch": 0.9813968848037866, + "grad_norm": 0.7012450695037842, + "learning_rate": 5.168153333751323e-06, + "loss": 0.7386, + "step": 17831 + }, + { + "epoch": 0.9814519236061423, + "grad_norm": 0.7230706214904785, + "learning_rate": 5.1677201117023744e-06, + "loss": 0.6954, + "step": 17832 + }, + { + "epoch": 0.981506962408498, + "grad_norm": 0.9957376718521118, + "learning_rate": 5.167286888392886e-06, + "loss": 0.9409, + "step": 17833 + }, + { + "epoch": 0.9815620012108537, + "grad_norm": 0.6738237142562866, + "learning_rate": 5.1668536638261135e-06, + "loss": 0.7469, + "step": 17834 + }, + { + "epoch": 0.9816170400132093, + "grad_norm": 0.689182460308075, + "learning_rate": 5.166420438005313e-06, + "loss": 0.8144, + "step": 17835 + }, + { + "epoch": 0.981672078815565, + "grad_norm": 0.7391964793205261, + "learning_rate": 5.165987210933741e-06, + "loss": 0.8118, + "step": 17836 + }, + { + "epoch": 0.9817271176179206, + "grad_norm": 0.6194493770599365, + "learning_rate": 5.165553982614655e-06, + "loss": 0.6802, + "step": 17837 + }, + { + "epoch": 0.9817821564202763, + "grad_norm": 0.7108197808265686, + "learning_rate": 5.165120753051309e-06, + "loss": 0.812, + "step": 17838 + }, + { + "epoch": 0.9818371952226319, + "grad_norm": 0.599499523639679, + "learning_rate": 5.164687522246959e-06, + "loss": 0.6162, + "step": 17839 + }, + { + "epoch": 0.9818922340249876, + "grad_norm": 0.636405885219574, + "learning_rate": 5.164254290204862e-06, + "loss": 0.741, + "step": 17840 + }, + { + "epoch": 0.9819472728273433, + "grad_norm": 0.6845248937606812, + "learning_rate": 5.163821056928274e-06, + "loss": 0.723, + "step": 17841 + }, + { + "epoch": 0.982002311629699, + "grad_norm": 0.6328433156013489, + "learning_rate": 5.1633878224204515e-06, + "loss": 0.6717, + "step": 17842 + }, + { + "epoch": 0.9820573504320546, + "grad_norm": 0.6898068189620972, + "learning_rate": 5.162954586684649e-06, + "loss": 0.7641, + "step": 17843 + }, + { + "epoch": 0.9821123892344102, + "grad_norm": 0.563658595085144, + "learning_rate": 5.162521349724123e-06, + "loss": 0.6147, + "step": 17844 + }, + { + "epoch": 0.9821674280367659, + "grad_norm": 0.6722010970115662, + "learning_rate": 5.1620881115421314e-06, + "loss": 0.7132, + "step": 17845 + }, + { + "epoch": 0.9822224668391216, + "grad_norm": 0.6915282607078552, + "learning_rate": 5.161654872141928e-06, + "loss": 0.6803, + "step": 17846 + }, + { + "epoch": 0.9822775056414772, + "grad_norm": 0.6965909004211426, + "learning_rate": 5.161221631526769e-06, + "loss": 0.7267, + "step": 17847 + }, + { + "epoch": 0.9823325444438329, + "grad_norm": 0.704317569732666, + "learning_rate": 5.1607883896999126e-06, + "loss": 0.732, + "step": 17848 + }, + { + "epoch": 0.9823875832461886, + "grad_norm": 0.6609948873519897, + "learning_rate": 5.160355146664614e-06, + "loss": 0.7353, + "step": 17849 + }, + { + "epoch": 0.9824426220485443, + "grad_norm": 0.6089136600494385, + "learning_rate": 5.159921902424128e-06, + "loss": 0.6752, + "step": 17850 + }, + { + "epoch": 0.9824976608508998, + "grad_norm": 0.7590814828872681, + "learning_rate": 5.159488656981712e-06, + "loss": 0.7075, + "step": 17851 + }, + { + "epoch": 0.9825526996532555, + "grad_norm": 0.7506054639816284, + "learning_rate": 5.159055410340622e-06, + "loss": 0.6706, + "step": 17852 + }, + { + "epoch": 0.9826077384556112, + "grad_norm": 0.726391613483429, + "learning_rate": 5.158622162504115e-06, + "loss": 0.7227, + "step": 17853 + }, + { + "epoch": 0.9826627772579669, + "grad_norm": 0.7026383280754089, + "learning_rate": 5.158188913475445e-06, + "loss": 0.7934, + "step": 17854 + }, + { + "epoch": 0.9827178160603225, + "grad_norm": 0.6941219568252563, + "learning_rate": 5.1577556632578705e-06, + "loss": 0.7664, + "step": 17855 + }, + { + "epoch": 0.9827728548626782, + "grad_norm": 0.6923757195472717, + "learning_rate": 5.157322411854646e-06, + "loss": 0.8274, + "step": 17856 + }, + { + "epoch": 0.9828278936650339, + "grad_norm": 0.6614245772361755, + "learning_rate": 5.156889159269029e-06, + "loss": 0.7289, + "step": 17857 + }, + { + "epoch": 0.9828829324673896, + "grad_norm": 0.7060726284980774, + "learning_rate": 5.156455905504275e-06, + "loss": 0.6819, + "step": 17858 + }, + { + "epoch": 0.9829379712697451, + "grad_norm": 0.6427240967750549, + "learning_rate": 5.156022650563639e-06, + "loss": 0.7237, + "step": 17859 + }, + { + "epoch": 0.9829930100721008, + "grad_norm": 0.6128491163253784, + "learning_rate": 5.15558939445038e-06, + "loss": 0.7319, + "step": 17860 + }, + { + "epoch": 0.9830480488744565, + "grad_norm": 0.6768588423728943, + "learning_rate": 5.155156137167751e-06, + "loss": 0.673, + "step": 17861 + }, + { + "epoch": 0.9831030876768122, + "grad_norm": 0.7171456217765808, + "learning_rate": 5.1547228787190125e-06, + "loss": 0.7823, + "step": 17862 + }, + { + "epoch": 0.9831581264791678, + "grad_norm": 0.6802380681037903, + "learning_rate": 5.154289619107416e-06, + "loss": 0.7811, + "step": 17863 + }, + { + "epoch": 0.9832131652815235, + "grad_norm": 0.7872791886329651, + "learning_rate": 5.1538563583362215e-06, + "loss": 0.8189, + "step": 17864 + }, + { + "epoch": 0.9832682040838792, + "grad_norm": 0.7060483694076538, + "learning_rate": 5.153423096408683e-06, + "loss": 0.7343, + "step": 17865 + }, + { + "epoch": 0.9833232428862348, + "grad_norm": 0.710662305355072, + "learning_rate": 5.152989833328058e-06, + "loss": 0.7252, + "step": 17866 + }, + { + "epoch": 0.9833782816885904, + "grad_norm": 0.6512863039970398, + "learning_rate": 5.1525565690976035e-06, + "loss": 0.73, + "step": 17867 + }, + { + "epoch": 0.9834333204909461, + "grad_norm": 0.7715367078781128, + "learning_rate": 5.152123303720573e-06, + "loss": 0.736, + "step": 17868 + }, + { + "epoch": 0.9834883592933018, + "grad_norm": 0.6657119393348694, + "learning_rate": 5.151690037200225e-06, + "loss": 0.7094, + "step": 17869 + }, + { + "epoch": 0.9835433980956574, + "grad_norm": 0.6604039669036865, + "learning_rate": 5.151256769539815e-06, + "loss": 0.7337, + "step": 17870 + }, + { + "epoch": 0.9835984368980131, + "grad_norm": 0.6652948260307312, + "learning_rate": 5.150823500742599e-06, + "loss": 0.7307, + "step": 17871 + }, + { + "epoch": 0.9836534757003688, + "grad_norm": 0.7212419509887695, + "learning_rate": 5.150390230811835e-06, + "loss": 0.728, + "step": 17872 + }, + { + "epoch": 0.9837085145027245, + "grad_norm": 0.7573513984680176, + "learning_rate": 5.149956959750778e-06, + "loss": 0.7901, + "step": 17873 + }, + { + "epoch": 0.98376355330508, + "grad_norm": 0.6553870439529419, + "learning_rate": 5.149523687562685e-06, + "loss": 0.7, + "step": 17874 + }, + { + "epoch": 0.9838185921074357, + "grad_norm": 0.6913946270942688, + "learning_rate": 5.14909041425081e-06, + "loss": 0.7547, + "step": 17875 + }, + { + "epoch": 0.9838736309097914, + "grad_norm": 0.6566945910453796, + "learning_rate": 5.148657139818413e-06, + "loss": 0.7256, + "step": 17876 + }, + { + "epoch": 0.9839286697121471, + "grad_norm": 0.6469670534133911, + "learning_rate": 5.14822386426875e-06, + "loss": 0.6803, + "step": 17877 + }, + { + "epoch": 0.9839837085145027, + "grad_norm": 0.6760450005531311, + "learning_rate": 5.147790587605076e-06, + "loss": 0.7567, + "step": 17878 + }, + { + "epoch": 0.9840387473168584, + "grad_norm": 0.7145360708236694, + "learning_rate": 5.1473573098306455e-06, + "loss": 0.7758, + "step": 17879 + }, + { + "epoch": 0.984093786119214, + "grad_norm": 0.8869039416313171, + "learning_rate": 5.1469240309487175e-06, + "loss": 0.708, + "step": 17880 + }, + { + "epoch": 0.9841488249215697, + "grad_norm": 0.7354567050933838, + "learning_rate": 5.146490750962548e-06, + "loss": 0.7731, + "step": 17881 + }, + { + "epoch": 0.9842038637239253, + "grad_norm": 0.6989348530769348, + "learning_rate": 5.146057469875394e-06, + "loss": 0.7419, + "step": 17882 + }, + { + "epoch": 0.984258902526281, + "grad_norm": 0.6871246695518494, + "learning_rate": 5.145624187690509e-06, + "loss": 0.7305, + "step": 17883 + }, + { + "epoch": 0.9843139413286367, + "grad_norm": 1.8921427726745605, + "learning_rate": 5.1451909044111535e-06, + "loss": 0.918, + "step": 17884 + }, + { + "epoch": 0.9843689801309924, + "grad_norm": 0.7058244347572327, + "learning_rate": 5.144757620040582e-06, + "loss": 0.7247, + "step": 17885 + }, + { + "epoch": 0.984424018933348, + "grad_norm": 0.811503529548645, + "learning_rate": 5.14432433458205e-06, + "loss": 0.8942, + "step": 17886 + }, + { + "epoch": 0.9844790577357037, + "grad_norm": 0.7346917390823364, + "learning_rate": 5.143891048038815e-06, + "loss": 0.847, + "step": 17887 + }, + { + "epoch": 0.9845340965380593, + "grad_norm": 0.6721924543380737, + "learning_rate": 5.143457760414135e-06, + "loss": 0.708, + "step": 17888 + }, + { + "epoch": 0.984589135340415, + "grad_norm": 0.6473612785339355, + "learning_rate": 5.143024471711263e-06, + "loss": 0.6688, + "step": 17889 + }, + { + "epoch": 0.9846441741427706, + "grad_norm": 0.7375944256782532, + "learning_rate": 5.14259118193346e-06, + "loss": 0.8455, + "step": 17890 + }, + { + "epoch": 0.9846992129451263, + "grad_norm": 0.7279950976371765, + "learning_rate": 5.142157891083977e-06, + "loss": 0.7632, + "step": 17891 + }, + { + "epoch": 0.984754251747482, + "grad_norm": 0.7861246466636658, + "learning_rate": 5.141724599166074e-06, + "loss": 0.7698, + "step": 17892 + }, + { + "epoch": 0.9848092905498377, + "grad_norm": 0.6509001851081848, + "learning_rate": 5.141291306183007e-06, + "loss": 0.7519, + "step": 17893 + }, + { + "epoch": 0.9848643293521933, + "grad_norm": 0.6098498702049255, + "learning_rate": 5.140858012138032e-06, + "loss": 0.6977, + "step": 17894 + }, + { + "epoch": 0.984919368154549, + "grad_norm": 0.7857867479324341, + "learning_rate": 5.140424717034405e-06, + "loss": 0.6591, + "step": 17895 + }, + { + "epoch": 0.9849744069569046, + "grad_norm": 0.7155032157897949, + "learning_rate": 5.139991420875386e-06, + "loss": 0.821, + "step": 17896 + }, + { + "epoch": 0.9850294457592603, + "grad_norm": 0.6748823523521423, + "learning_rate": 5.139558123664228e-06, + "loss": 0.7852, + "step": 17897 + }, + { + "epoch": 0.9850844845616159, + "grad_norm": 0.7474187016487122, + "learning_rate": 5.139124825404188e-06, + "loss": 0.735, + "step": 17898 + }, + { + "epoch": 0.9851395233639716, + "grad_norm": 0.6562691926956177, + "learning_rate": 5.138691526098522e-06, + "loss": 0.775, + "step": 17899 + }, + { + "epoch": 0.9851945621663273, + "grad_norm": 0.6765697002410889, + "learning_rate": 5.13825822575049e-06, + "loss": 0.6383, + "step": 17900 + }, + { + "epoch": 0.985249600968683, + "grad_norm": 0.6247798800468445, + "learning_rate": 5.137824924363345e-06, + "loss": 0.7331, + "step": 17901 + }, + { + "epoch": 0.9853046397710385, + "grad_norm": 0.6596871018409729, + "learning_rate": 5.1373916219403465e-06, + "loss": 0.7698, + "step": 17902 + }, + { + "epoch": 0.9853596785733942, + "grad_norm": 0.7124335765838623, + "learning_rate": 5.136958318484746e-06, + "loss": 0.8027, + "step": 17903 + }, + { + "epoch": 0.9854147173757499, + "grad_norm": 0.6025542616844177, + "learning_rate": 5.136525013999805e-06, + "loss": 0.6862, + "step": 17904 + }, + { + "epoch": 0.9854697561781056, + "grad_norm": 0.8168695569038391, + "learning_rate": 5.136091708488779e-06, + "loss": 0.7608, + "step": 17905 + }, + { + "epoch": 0.9855247949804612, + "grad_norm": 0.660522997379303, + "learning_rate": 5.135658401954925e-06, + "loss": 0.7814, + "step": 17906 + }, + { + "epoch": 0.9855798337828169, + "grad_norm": 0.6121677160263062, + "learning_rate": 5.1352250944014955e-06, + "loss": 0.6691, + "step": 17907 + }, + { + "epoch": 0.9856348725851726, + "grad_norm": 0.7214170694351196, + "learning_rate": 5.134791785831753e-06, + "loss": 0.8601, + "step": 17908 + }, + { + "epoch": 0.9856899113875283, + "grad_norm": 0.7069474458694458, + "learning_rate": 5.1343584762489515e-06, + "loss": 0.6515, + "step": 17909 + }, + { + "epoch": 0.9857449501898838, + "grad_norm": 0.6847478747367859, + "learning_rate": 5.133925165656347e-06, + "loss": 0.8301, + "step": 17910 + }, + { + "epoch": 0.9857999889922395, + "grad_norm": 0.6535064578056335, + "learning_rate": 5.133491854057196e-06, + "loss": 0.7305, + "step": 17911 + }, + { + "epoch": 0.9858550277945952, + "grad_norm": 0.6921623349189758, + "learning_rate": 5.133058541454757e-06, + "loss": 0.7876, + "step": 17912 + }, + { + "epoch": 0.9859100665969508, + "grad_norm": 0.6774876117706299, + "learning_rate": 5.132625227852286e-06, + "loss": 0.7518, + "step": 17913 + }, + { + "epoch": 0.9859651053993065, + "grad_norm": 0.7022778987884521, + "learning_rate": 5.1321919132530405e-06, + "loss": 0.733, + "step": 17914 + }, + { + "epoch": 0.9860201442016622, + "grad_norm": 0.6287358403205872, + "learning_rate": 5.131758597660273e-06, + "loss": 0.7287, + "step": 17915 + }, + { + "epoch": 0.9860751830040179, + "grad_norm": 0.6294651031494141, + "learning_rate": 5.131325281077245e-06, + "loss": 0.6955, + "step": 17916 + }, + { + "epoch": 0.9861302218063734, + "grad_norm": 0.6313046813011169, + "learning_rate": 5.130891963507211e-06, + "loss": 0.6264, + "step": 17917 + }, + { + "epoch": 0.9861852606087291, + "grad_norm": 0.6451496481895447, + "learning_rate": 5.130458644953428e-06, + "loss": 0.6956, + "step": 17918 + }, + { + "epoch": 0.9862402994110848, + "grad_norm": 0.8261640071868896, + "learning_rate": 5.130025325419151e-06, + "loss": 0.7542, + "step": 17919 + }, + { + "epoch": 0.9862953382134405, + "grad_norm": 0.7422053813934326, + "learning_rate": 5.129592004907641e-06, + "loss": 0.8106, + "step": 17920 + }, + { + "epoch": 0.9863503770157961, + "grad_norm": 0.6633789539337158, + "learning_rate": 5.12915868342215e-06, + "loss": 0.7814, + "step": 17921 + }, + { + "epoch": 0.9864054158181518, + "grad_norm": 0.716140627861023, + "learning_rate": 5.12872536096594e-06, + "loss": 0.7172, + "step": 17922 + }, + { + "epoch": 0.9864604546205075, + "grad_norm": 0.8944414854049683, + "learning_rate": 5.128292037542261e-06, + "loss": 0.8734, + "step": 17923 + }, + { + "epoch": 0.9865154934228632, + "grad_norm": 0.6309584379196167, + "learning_rate": 5.127858713154375e-06, + "loss": 0.722, + "step": 17924 + }, + { + "epoch": 0.9865705322252187, + "grad_norm": 0.6496456861495972, + "learning_rate": 5.127425387805538e-06, + "loss": 0.6808, + "step": 17925 + }, + { + "epoch": 0.9866255710275744, + "grad_norm": 0.7771233320236206, + "learning_rate": 5.126992061499007e-06, + "loss": 0.8073, + "step": 17926 + }, + { + "epoch": 0.9866806098299301, + "grad_norm": 0.6993723511695862, + "learning_rate": 5.126558734238034e-06, + "loss": 0.689, + "step": 17927 + }, + { + "epoch": 0.9867356486322858, + "grad_norm": 0.7060848474502563, + "learning_rate": 5.126125406025881e-06, + "loss": 0.7604, + "step": 17928 + }, + { + "epoch": 0.9867906874346414, + "grad_norm": 0.7408806681632996, + "learning_rate": 5.125692076865804e-06, + "loss": 0.787, + "step": 17929 + }, + { + "epoch": 0.9868457262369971, + "grad_norm": 0.6351509094238281, + "learning_rate": 5.125258746761059e-06, + "loss": 0.7295, + "step": 17930 + }, + { + "epoch": 0.9869007650393528, + "grad_norm": 0.6362680196762085, + "learning_rate": 5.124825415714902e-06, + "loss": 0.7115, + "step": 17931 + }, + { + "epoch": 0.9869558038417084, + "grad_norm": 0.651877224445343, + "learning_rate": 5.12439208373059e-06, + "loss": 0.7006, + "step": 17932 + }, + { + "epoch": 0.987010842644064, + "grad_norm": 0.6457502245903015, + "learning_rate": 5.123958750811382e-06, + "loss": 0.6986, + "step": 17933 + }, + { + "epoch": 0.9870658814464197, + "grad_norm": 0.6840323805809021, + "learning_rate": 5.123525416960533e-06, + "loss": 0.8027, + "step": 17934 + }, + { + "epoch": 0.9871209202487754, + "grad_norm": 0.6861988306045532, + "learning_rate": 5.123092082181298e-06, + "loss": 0.7044, + "step": 17935 + }, + { + "epoch": 0.9871759590511311, + "grad_norm": 0.6497409343719482, + "learning_rate": 5.122658746476937e-06, + "loss": 0.6721, + "step": 17936 + }, + { + "epoch": 0.9872309978534867, + "grad_norm": 0.7505937814712524, + "learning_rate": 5.122225409850707e-06, + "loss": 0.7436, + "step": 17937 + }, + { + "epoch": 0.9872860366558424, + "grad_norm": 0.7507444024085999, + "learning_rate": 5.1217920723058635e-06, + "loss": 0.8291, + "step": 17938 + }, + { + "epoch": 0.987341075458198, + "grad_norm": 0.793416440486908, + "learning_rate": 5.1213587338456615e-06, + "loss": 0.7431, + "step": 17939 + }, + { + "epoch": 0.9873961142605537, + "grad_norm": 0.6958576440811157, + "learning_rate": 5.120925394473362e-06, + "loss": 0.7345, + "step": 17940 + }, + { + "epoch": 0.9874511530629093, + "grad_norm": 0.6618613600730896, + "learning_rate": 5.120492054192218e-06, + "loss": 0.7458, + "step": 17941 + }, + { + "epoch": 0.987506191865265, + "grad_norm": 0.6849207282066345, + "learning_rate": 5.120058713005488e-06, + "loss": 0.7965, + "step": 17942 + }, + { + "epoch": 0.9875612306676207, + "grad_norm": 0.717399001121521, + "learning_rate": 5.119625370916429e-06, + "loss": 0.7462, + "step": 17943 + }, + { + "epoch": 0.9876162694699764, + "grad_norm": 0.6913535594940186, + "learning_rate": 5.119192027928297e-06, + "loss": 0.6916, + "step": 17944 + }, + { + "epoch": 0.987671308272332, + "grad_norm": 0.6635895371437073, + "learning_rate": 5.118758684044351e-06, + "loss": 0.7063, + "step": 17945 + }, + { + "epoch": 0.9877263470746876, + "grad_norm": 0.6347431540489197, + "learning_rate": 5.1183253392678465e-06, + "loss": 0.7639, + "step": 17946 + }, + { + "epoch": 0.9877813858770433, + "grad_norm": 0.7334713935852051, + "learning_rate": 5.11789199360204e-06, + "loss": 0.7947, + "step": 17947 + }, + { + "epoch": 0.987836424679399, + "grad_norm": 0.6637980937957764, + "learning_rate": 5.117458647050188e-06, + "loss": 0.7521, + "step": 17948 + }, + { + "epoch": 0.9878914634817546, + "grad_norm": 0.656301736831665, + "learning_rate": 5.11702529961555e-06, + "loss": 0.5714, + "step": 17949 + }, + { + "epoch": 0.9879465022841103, + "grad_norm": 0.7977020144462585, + "learning_rate": 5.116591951301381e-06, + "loss": 0.7905, + "step": 17950 + }, + { + "epoch": 0.988001541086466, + "grad_norm": 0.6231863498687744, + "learning_rate": 5.1161586021109365e-06, + "loss": 0.6689, + "step": 17951 + }, + { + "epoch": 0.9880565798888217, + "grad_norm": 0.6863840818405151, + "learning_rate": 5.115725252047475e-06, + "loss": 0.8173, + "step": 17952 + }, + { + "epoch": 0.9881116186911773, + "grad_norm": 0.6148532032966614, + "learning_rate": 5.115291901114255e-06, + "loss": 0.734, + "step": 17953 + }, + { + "epoch": 0.9881666574935329, + "grad_norm": 0.6900758743286133, + "learning_rate": 5.114858549314531e-06, + "loss": 0.7317, + "step": 17954 + }, + { + "epoch": 0.9882216962958886, + "grad_norm": 0.6875699758529663, + "learning_rate": 5.1144251966515615e-06, + "loss": 0.6788, + "step": 17955 + }, + { + "epoch": 0.9882767350982442, + "grad_norm": 0.6588704586029053, + "learning_rate": 5.1139918431286014e-06, + "loss": 0.7728, + "step": 17956 + }, + { + "epoch": 0.9883317739005999, + "grad_norm": 0.6850310564041138, + "learning_rate": 5.113558488748911e-06, + "loss": 0.6942, + "step": 17957 + }, + { + "epoch": 0.9883868127029556, + "grad_norm": 0.7054475545883179, + "learning_rate": 5.113125133515744e-06, + "loss": 0.7564, + "step": 17958 + }, + { + "epoch": 0.9884418515053113, + "grad_norm": 0.9986534118652344, + "learning_rate": 5.112691777432361e-06, + "loss": 0.9043, + "step": 17959 + }, + { + "epoch": 0.9884968903076669, + "grad_norm": 0.6844736933708191, + "learning_rate": 5.112258420502013e-06, + "loss": 0.7999, + "step": 17960 + }, + { + "epoch": 0.9885519291100225, + "grad_norm": 0.5971188545227051, + "learning_rate": 5.111825062727964e-06, + "loss": 0.625, + "step": 17961 + }, + { + "epoch": 0.9886069679123782, + "grad_norm": 0.9815204739570618, + "learning_rate": 5.111391704113467e-06, + "loss": 0.6988, + "step": 17962 + }, + { + "epoch": 0.9886620067147339, + "grad_norm": 0.867161750793457, + "learning_rate": 5.11095834466178e-06, + "loss": 0.7014, + "step": 17963 + }, + { + "epoch": 0.9887170455170895, + "grad_norm": 0.8310801982879639, + "learning_rate": 5.1105249843761585e-06, + "loss": 0.7411, + "step": 17964 + }, + { + "epoch": 0.9887720843194452, + "grad_norm": 0.7921817898750305, + "learning_rate": 5.1100916232598614e-06, + "loss": 0.718, + "step": 17965 + }, + { + "epoch": 0.9888271231218009, + "grad_norm": 0.6440842747688293, + "learning_rate": 5.109658261316146e-06, + "loss": 0.6906, + "step": 17966 + }, + { + "epoch": 0.9888821619241566, + "grad_norm": 0.6158735156059265, + "learning_rate": 5.109224898548267e-06, + "loss": 0.6686, + "step": 17967 + }, + { + "epoch": 0.9889372007265121, + "grad_norm": 0.6632601618766785, + "learning_rate": 5.108791534959483e-06, + "loss": 0.7527, + "step": 17968 + }, + { + "epoch": 0.9889922395288678, + "grad_norm": 0.6995930671691895, + "learning_rate": 5.108358170553051e-06, + "loss": 0.8132, + "step": 17969 + }, + { + "epoch": 0.9890472783312235, + "grad_norm": 0.6986364126205444, + "learning_rate": 5.107924805332229e-06, + "loss": 0.7744, + "step": 17970 + }, + { + "epoch": 0.9891023171335792, + "grad_norm": 0.6733060479164124, + "learning_rate": 5.107491439300274e-06, + "loss": 0.6433, + "step": 17971 + }, + { + "epoch": 0.9891573559359348, + "grad_norm": 0.6814376711845398, + "learning_rate": 5.107058072460439e-06, + "loss": 0.6767, + "step": 17972 + }, + { + "epoch": 0.9892123947382905, + "grad_norm": 0.7519477009773254, + "learning_rate": 5.106624704815988e-06, + "loss": 0.7345, + "step": 17973 + }, + { + "epoch": 0.9892674335406462, + "grad_norm": 0.8123548030853271, + "learning_rate": 5.106191336370172e-06, + "loss": 0.7438, + "step": 17974 + }, + { + "epoch": 0.9893224723430019, + "grad_norm": 0.7073908448219299, + "learning_rate": 5.10575796712625e-06, + "loss": 0.8435, + "step": 17975 + }, + { + "epoch": 0.9893775111453574, + "grad_norm": 0.8263893723487854, + "learning_rate": 5.10532459708748e-06, + "loss": 0.9237, + "step": 17976 + }, + { + "epoch": 0.9894325499477131, + "grad_norm": 0.7631207704544067, + "learning_rate": 5.104891226257118e-06, + "loss": 0.7481, + "step": 17977 + }, + { + "epoch": 0.9894875887500688, + "grad_norm": 0.7013441324234009, + "learning_rate": 5.104457854638423e-06, + "loss": 0.6627, + "step": 17978 + }, + { + "epoch": 0.9895426275524245, + "grad_norm": 0.6746618151664734, + "learning_rate": 5.1040244822346496e-06, + "loss": 0.7585, + "step": 17979 + }, + { + "epoch": 0.9895976663547801, + "grad_norm": 0.707109808921814, + "learning_rate": 5.1035911090490555e-06, + "loss": 0.7628, + "step": 17980 + }, + { + "epoch": 0.9896527051571358, + "grad_norm": 0.7457565665245056, + "learning_rate": 5.103157735084899e-06, + "loss": 0.7375, + "step": 17981 + }, + { + "epoch": 0.9897077439594915, + "grad_norm": 0.8174850344657898, + "learning_rate": 5.102724360345437e-06, + "loss": 0.732, + "step": 17982 + }, + { + "epoch": 0.9897627827618471, + "grad_norm": 0.7620667219161987, + "learning_rate": 5.1022909848339265e-06, + "loss": 0.6926, + "step": 17983 + }, + { + "epoch": 0.9898178215642027, + "grad_norm": 0.6727546453475952, + "learning_rate": 5.101857608553622e-06, + "loss": 0.6986, + "step": 17984 + }, + { + "epoch": 0.9898728603665584, + "grad_norm": 0.6382732391357422, + "learning_rate": 5.101424231507786e-06, + "loss": 0.7489, + "step": 17985 + }, + { + "epoch": 0.9899278991689141, + "grad_norm": 0.678092360496521, + "learning_rate": 5.100990853699672e-06, + "loss": 0.7242, + "step": 17986 + }, + { + "epoch": 0.9899829379712698, + "grad_norm": 0.7694140672683716, + "learning_rate": 5.100557475132537e-06, + "loss": 0.8451, + "step": 17987 + }, + { + "epoch": 0.9900379767736254, + "grad_norm": 0.672201931476593, + "learning_rate": 5.100124095809638e-06, + "loss": 0.7942, + "step": 17988 + }, + { + "epoch": 0.9900930155759811, + "grad_norm": 0.7260831594467163, + "learning_rate": 5.099690715734234e-06, + "loss": 0.7073, + "step": 17989 + }, + { + "epoch": 0.9901480543783368, + "grad_norm": 0.6417271494865417, + "learning_rate": 5.099257334909582e-06, + "loss": 0.7272, + "step": 17990 + }, + { + "epoch": 0.9902030931806924, + "grad_norm": 0.6794359683990479, + "learning_rate": 5.098823953338939e-06, + "loss": 0.7286, + "step": 17991 + }, + { + "epoch": 0.990258131983048, + "grad_norm": 0.7701132297515869, + "learning_rate": 5.098390571025559e-06, + "loss": 0.8191, + "step": 17992 + }, + { + "epoch": 0.9903131707854037, + "grad_norm": 0.7108728289604187, + "learning_rate": 5.097957187972703e-06, + "loss": 0.6644, + "step": 17993 + }, + { + "epoch": 0.9903682095877594, + "grad_norm": 0.6622050404548645, + "learning_rate": 5.097523804183627e-06, + "loss": 0.713, + "step": 17994 + }, + { + "epoch": 0.9904232483901151, + "grad_norm": 0.664191722869873, + "learning_rate": 5.097090419661589e-06, + "loss": 0.7217, + "step": 17995 + }, + { + "epoch": 0.9904782871924707, + "grad_norm": 0.6588128209114075, + "learning_rate": 5.096657034409844e-06, + "loss": 0.8139, + "step": 17996 + }, + { + "epoch": 0.9905333259948264, + "grad_norm": 0.6252101063728333, + "learning_rate": 5.096223648431653e-06, + "loss": 0.6455, + "step": 17997 + }, + { + "epoch": 0.990588364797182, + "grad_norm": 0.6657758951187134, + "learning_rate": 5.095790261730269e-06, + "loss": 0.7416, + "step": 17998 + }, + { + "epoch": 0.9906434035995376, + "grad_norm": 0.8094693422317505, + "learning_rate": 5.095356874308952e-06, + "loss": 0.8645, + "step": 17999 + }, + { + "epoch": 0.9906984424018933, + "grad_norm": 0.7278556823730469, + "learning_rate": 5.094923486170957e-06, + "loss": 0.7261, + "step": 18000 + }, + { + "epoch": 0.990753481204249, + "grad_norm": 0.6950508952140808, + "learning_rate": 5.094490097319542e-06, + "loss": 0.7876, + "step": 18001 + }, + { + "epoch": 0.9908085200066047, + "grad_norm": 0.6594102382659912, + "learning_rate": 5.094056707757966e-06, + "loss": 0.8644, + "step": 18002 + }, + { + "epoch": 0.9908635588089603, + "grad_norm": 0.7983774542808533, + "learning_rate": 5.0936233174894855e-06, + "loss": 0.7356, + "step": 18003 + }, + { + "epoch": 0.990918597611316, + "grad_norm": 0.6786965727806091, + "learning_rate": 5.093189926517356e-06, + "loss": 0.6377, + "step": 18004 + }, + { + "epoch": 0.9909736364136716, + "grad_norm": 0.7368496656417847, + "learning_rate": 5.092756534844836e-06, + "loss": 0.7468, + "step": 18005 + }, + { + "epoch": 0.9910286752160273, + "grad_norm": 0.8692933917045593, + "learning_rate": 5.092323142475183e-06, + "loss": 0.7972, + "step": 18006 + }, + { + "epoch": 0.9910837140183829, + "grad_norm": 0.7743621468544006, + "learning_rate": 5.091889749411655e-06, + "loss": 0.643, + "step": 18007 + }, + { + "epoch": 0.9911387528207386, + "grad_norm": 0.6345173716545105, + "learning_rate": 5.0914563556575056e-06, + "loss": 0.7626, + "step": 18008 + }, + { + "epoch": 0.9911937916230943, + "grad_norm": 0.7068833112716675, + "learning_rate": 5.0910229612159974e-06, + "loss": 0.7439, + "step": 18009 + }, + { + "epoch": 0.99124883042545, + "grad_norm": 0.7917591333389282, + "learning_rate": 5.090589566090385e-06, + "loss": 0.8218, + "step": 18010 + }, + { + "epoch": 0.9913038692278056, + "grad_norm": 0.609967827796936, + "learning_rate": 5.0901561702839255e-06, + "loss": 0.6452, + "step": 18011 + }, + { + "epoch": 0.9913589080301612, + "grad_norm": 0.7628803253173828, + "learning_rate": 5.0897227737998745e-06, + "loss": 0.7487, + "step": 18012 + }, + { + "epoch": 0.9914139468325169, + "grad_norm": 0.6240081787109375, + "learning_rate": 5.089289376641492e-06, + "loss": 0.6527, + "step": 18013 + }, + { + "epoch": 0.9914689856348726, + "grad_norm": 0.789800763130188, + "learning_rate": 5.088855978812035e-06, + "loss": 0.7043, + "step": 18014 + }, + { + "epoch": 0.9915240244372282, + "grad_norm": 0.9605777263641357, + "learning_rate": 5.0884225803147604e-06, + "loss": 0.8486, + "step": 18015 + }, + { + "epoch": 0.9915790632395839, + "grad_norm": 0.9741759300231934, + "learning_rate": 5.087989181152924e-06, + "loss": 0.737, + "step": 18016 + }, + { + "epoch": 0.9916341020419396, + "grad_norm": 0.7788803577423096, + "learning_rate": 5.087555781329785e-06, + "loss": 0.8205, + "step": 18017 + }, + { + "epoch": 0.9916891408442953, + "grad_norm": 0.782712459564209, + "learning_rate": 5.0871223808486e-06, + "loss": 0.6982, + "step": 18018 + }, + { + "epoch": 0.9917441796466508, + "grad_norm": 0.6824339628219604, + "learning_rate": 5.086688979712627e-06, + "loss": 0.7448, + "step": 18019 + }, + { + "epoch": 0.9917992184490065, + "grad_norm": 0.6738336682319641, + "learning_rate": 5.0862555779251235e-06, + "loss": 0.6756, + "step": 18020 + }, + { + "epoch": 0.9918542572513622, + "grad_norm": 0.6139102578163147, + "learning_rate": 5.0858221754893455e-06, + "loss": 0.669, + "step": 18021 + }, + { + "epoch": 0.9919092960537179, + "grad_norm": 0.7581914067268372, + "learning_rate": 5.085388772408552e-06, + "loss": 0.7653, + "step": 18022 + }, + { + "epoch": 0.9919643348560735, + "grad_norm": 0.6985335946083069, + "learning_rate": 5.084955368685999e-06, + "loss": 0.7317, + "step": 18023 + }, + { + "epoch": 0.9920193736584292, + "grad_norm": 0.7850139737129211, + "learning_rate": 5.084521964324942e-06, + "loss": 0.8323, + "step": 18024 + }, + { + "epoch": 0.9920744124607849, + "grad_norm": 0.6971372365951538, + "learning_rate": 5.084088559328643e-06, + "loss": 0.8073, + "step": 18025 + }, + { + "epoch": 0.9921294512631406, + "grad_norm": 0.7196096181869507, + "learning_rate": 5.083655153700355e-06, + "loss": 0.793, + "step": 18026 + }, + { + "epoch": 0.9921844900654961, + "grad_norm": 0.6821334362030029, + "learning_rate": 5.083221747443339e-06, + "loss": 0.7266, + "step": 18027 + }, + { + "epoch": 0.9922395288678518, + "grad_norm": 0.6544287204742432, + "learning_rate": 5.082788340560849e-06, + "loss": 0.7812, + "step": 18028 + }, + { + "epoch": 0.9922945676702075, + "grad_norm": 0.6568353176116943, + "learning_rate": 5.082354933056145e-06, + "loss": 0.5682, + "step": 18029 + }, + { + "epoch": 0.9923496064725632, + "grad_norm": 0.7107832431793213, + "learning_rate": 5.081921524932483e-06, + "loss": 0.7068, + "step": 18030 + }, + { + "epoch": 0.9924046452749188, + "grad_norm": 0.6856156587600708, + "learning_rate": 5.081488116193122e-06, + "loss": 0.774, + "step": 18031 + }, + { + "epoch": 0.9924596840772745, + "grad_norm": 0.7203328013420105, + "learning_rate": 5.0810547068413165e-06, + "loss": 0.736, + "step": 18032 + }, + { + "epoch": 0.9925147228796302, + "grad_norm": 0.6304858326911926, + "learning_rate": 5.080621296880327e-06, + "loss": 0.6814, + "step": 18033 + }, + { + "epoch": 0.9925697616819859, + "grad_norm": 0.645539402961731, + "learning_rate": 5.080187886313408e-06, + "loss": 0.7984, + "step": 18034 + }, + { + "epoch": 0.9926248004843414, + "grad_norm": 0.655221700668335, + "learning_rate": 5.079754475143819e-06, + "loss": 0.7022, + "step": 18035 + }, + { + "epoch": 0.9926798392866971, + "grad_norm": 0.6705572605133057, + "learning_rate": 5.079321063374816e-06, + "loss": 0.82, + "step": 18036 + }, + { + "epoch": 0.9927348780890528, + "grad_norm": 0.6417540907859802, + "learning_rate": 5.078887651009657e-06, + "loss": 0.6941, + "step": 18037 + }, + { + "epoch": 0.9927899168914085, + "grad_norm": 0.6728167533874512, + "learning_rate": 5.078454238051601e-06, + "loss": 0.655, + "step": 18038 + }, + { + "epoch": 0.9928449556937641, + "grad_norm": 0.6670491099357605, + "learning_rate": 5.078020824503903e-06, + "loss": 0.7286, + "step": 18039 + }, + { + "epoch": 0.9928999944961198, + "grad_norm": 0.7217230200767517, + "learning_rate": 5.077587410369821e-06, + "loss": 0.7635, + "step": 18040 + }, + { + "epoch": 0.9929550332984755, + "grad_norm": 0.6922209858894348, + "learning_rate": 5.077153995652614e-06, + "loss": 0.6762, + "step": 18041 + }, + { + "epoch": 0.993010072100831, + "grad_norm": 0.8064020872116089, + "learning_rate": 5.0767205803555376e-06, + "loss": 0.6568, + "step": 18042 + }, + { + "epoch": 0.9930651109031867, + "grad_norm": 0.6940247416496277, + "learning_rate": 5.07628716448185e-06, + "loss": 0.7986, + "step": 18043 + }, + { + "epoch": 0.9931201497055424, + "grad_norm": 0.6729159951210022, + "learning_rate": 5.075853748034808e-06, + "loss": 0.7476, + "step": 18044 + }, + { + "epoch": 0.9931751885078981, + "grad_norm": 0.7298180460929871, + "learning_rate": 5.07542033101767e-06, + "loss": 0.7146, + "step": 18045 + }, + { + "epoch": 0.9932302273102537, + "grad_norm": 0.6649488806724548, + "learning_rate": 5.0749869134336935e-06, + "loss": 0.7668, + "step": 18046 + }, + { + "epoch": 0.9932852661126094, + "grad_norm": 0.6678916811943054, + "learning_rate": 5.0745534952861346e-06, + "loss": 0.6365, + "step": 18047 + }, + { + "epoch": 0.993340304914965, + "grad_norm": 0.8051722049713135, + "learning_rate": 5.0741200765782506e-06, + "loss": 0.7426, + "step": 18048 + }, + { + "epoch": 0.9933953437173207, + "grad_norm": 0.6319746375083923, + "learning_rate": 5.073686657313302e-06, + "loss": 0.6855, + "step": 18049 + }, + { + "epoch": 0.9934503825196763, + "grad_norm": 0.6837119460105896, + "learning_rate": 5.0732532374945434e-06, + "loss": 0.7657, + "step": 18050 + }, + { + "epoch": 0.993505421322032, + "grad_norm": 0.6625218391418457, + "learning_rate": 5.072819817125233e-06, + "loss": 0.7836, + "step": 18051 + }, + { + "epoch": 0.9935604601243877, + "grad_norm": 0.6353194117546082, + "learning_rate": 5.072386396208627e-06, + "loss": 0.7298, + "step": 18052 + }, + { + "epoch": 0.9936154989267434, + "grad_norm": 0.702965259552002, + "learning_rate": 5.071952974747986e-06, + "loss": 0.8423, + "step": 18053 + }, + { + "epoch": 0.993670537729099, + "grad_norm": 0.7337152361869812, + "learning_rate": 5.071519552746566e-06, + "loss": 0.734, + "step": 18054 + }, + { + "epoch": 0.9937255765314547, + "grad_norm": 0.6457077264785767, + "learning_rate": 5.071086130207623e-06, + "loss": 0.8179, + "step": 18055 + }, + { + "epoch": 0.9937806153338103, + "grad_norm": 0.6137452125549316, + "learning_rate": 5.070652707134416e-06, + "loss": 0.6268, + "step": 18056 + }, + { + "epoch": 0.993835654136166, + "grad_norm": 0.6552117466926575, + "learning_rate": 5.070219283530202e-06, + "loss": 0.7754, + "step": 18057 + }, + { + "epoch": 0.9938906929385216, + "grad_norm": 0.7026165127754211, + "learning_rate": 5.06978585939824e-06, + "loss": 0.7684, + "step": 18058 + }, + { + "epoch": 0.9939457317408773, + "grad_norm": 0.5770609974861145, + "learning_rate": 5.069352434741786e-06, + "loss": 0.6603, + "step": 18059 + }, + { + "epoch": 0.994000770543233, + "grad_norm": 0.6504804491996765, + "learning_rate": 5.068919009564095e-06, + "loss": 0.7602, + "step": 18060 + }, + { + "epoch": 0.9940558093455887, + "grad_norm": 0.7996575832366943, + "learning_rate": 5.068485583868429e-06, + "loss": 0.8378, + "step": 18061 + }, + { + "epoch": 0.9941108481479443, + "grad_norm": 0.6521992087364197, + "learning_rate": 5.068052157658044e-06, + "loss": 0.7303, + "step": 18062 + }, + { + "epoch": 0.9941658869503, + "grad_norm": 0.7057589292526245, + "learning_rate": 5.067618730936197e-06, + "loss": 0.7986, + "step": 18063 + }, + { + "epoch": 0.9942209257526556, + "grad_norm": 0.7683775424957275, + "learning_rate": 5.067185303706144e-06, + "loss": 0.7051, + "step": 18064 + }, + { + "epoch": 0.9942759645550113, + "grad_norm": 0.6067343950271606, + "learning_rate": 5.0667518759711465e-06, + "loss": 0.6399, + "step": 18065 + }, + { + "epoch": 0.9943310033573669, + "grad_norm": 0.7575204372406006, + "learning_rate": 5.066318447734459e-06, + "loss": 0.7124, + "step": 18066 + }, + { + "epoch": 0.9943860421597226, + "grad_norm": 0.7211230397224426, + "learning_rate": 5.06588501899934e-06, + "loss": 0.7719, + "step": 18067 + }, + { + "epoch": 0.9944410809620783, + "grad_norm": 0.8543131351470947, + "learning_rate": 5.065451589769047e-06, + "loss": 0.7033, + "step": 18068 + }, + { + "epoch": 0.994496119764434, + "grad_norm": 0.6685993075370789, + "learning_rate": 5.065018160046837e-06, + "loss": 0.7331, + "step": 18069 + }, + { + "epoch": 0.9945511585667896, + "grad_norm": 0.7711308002471924, + "learning_rate": 5.064584729835969e-06, + "loss": 0.7389, + "step": 18070 + }, + { + "epoch": 0.9946061973691452, + "grad_norm": 0.8691509366035461, + "learning_rate": 5.064151299139698e-06, + "loss": 0.7457, + "step": 18071 + }, + { + "epoch": 0.9946612361715009, + "grad_norm": 0.7130528688430786, + "learning_rate": 5.063717867961282e-06, + "loss": 0.7442, + "step": 18072 + }, + { + "epoch": 0.9947162749738566, + "grad_norm": 0.6554980278015137, + "learning_rate": 5.063284436303982e-06, + "loss": 0.6587, + "step": 18073 + }, + { + "epoch": 0.9947713137762122, + "grad_norm": 0.7127482295036316, + "learning_rate": 5.062851004171052e-06, + "loss": 0.7282, + "step": 18074 + }, + { + "epoch": 0.9948263525785679, + "grad_norm": 0.7662289142608643, + "learning_rate": 5.062417571565751e-06, + "loss": 0.7576, + "step": 18075 + }, + { + "epoch": 0.9948813913809236, + "grad_norm": 0.9702945351600647, + "learning_rate": 5.061984138491335e-06, + "loss": 0.8, + "step": 18076 + }, + { + "epoch": 0.9949364301832793, + "grad_norm": 0.6914392709732056, + "learning_rate": 5.061550704951064e-06, + "loss": 0.6912, + "step": 18077 + }, + { + "epoch": 0.9949914689856348, + "grad_norm": 0.5724602341651917, + "learning_rate": 5.061117270948194e-06, + "loss": 0.636, + "step": 18078 + }, + { + "epoch": 0.9950465077879905, + "grad_norm": 0.8456641435623169, + "learning_rate": 5.060683836485983e-06, + "loss": 0.7298, + "step": 18079 + }, + { + "epoch": 0.9951015465903462, + "grad_norm": 0.7085068821907043, + "learning_rate": 5.060250401567689e-06, + "loss": 0.7567, + "step": 18080 + }, + { + "epoch": 0.9951565853927019, + "grad_norm": 0.7004247307777405, + "learning_rate": 5.059816966196569e-06, + "loss": 0.7666, + "step": 18081 + }, + { + "epoch": 0.9952116241950575, + "grad_norm": 0.6310703158378601, + "learning_rate": 5.059383530375881e-06, + "loss": 0.712, + "step": 18082 + }, + { + "epoch": 0.9952666629974132, + "grad_norm": 0.6922436356544495, + "learning_rate": 5.058950094108882e-06, + "loss": 0.775, + "step": 18083 + }, + { + "epoch": 0.9953217017997689, + "grad_norm": 0.7536594867706299, + "learning_rate": 5.058516657398829e-06, + "loss": 0.7872, + "step": 18084 + }, + { + "epoch": 0.9953767406021244, + "grad_norm": 0.6446521282196045, + "learning_rate": 5.058083220248981e-06, + "loss": 0.6646, + "step": 18085 + }, + { + "epoch": 0.9954317794044801, + "grad_norm": 0.6904979348182678, + "learning_rate": 5.057649782662595e-06, + "loss": 0.7559, + "step": 18086 + }, + { + "epoch": 0.9954868182068358, + "grad_norm": 0.7692407965660095, + "learning_rate": 5.057216344642929e-06, + "loss": 0.7622, + "step": 18087 + }, + { + "epoch": 0.9955418570091915, + "grad_norm": 0.660089910030365, + "learning_rate": 5.0567829061932405e-06, + "loss": 0.7823, + "step": 18088 + }, + { + "epoch": 0.9955968958115471, + "grad_norm": 0.6194438934326172, + "learning_rate": 5.056349467316785e-06, + "loss": 0.6406, + "step": 18089 + }, + { + "epoch": 0.9956519346139028, + "grad_norm": 0.7209805250167847, + "learning_rate": 5.055916028016824e-06, + "loss": 0.7373, + "step": 18090 + }, + { + "epoch": 0.9957069734162585, + "grad_norm": 0.8307274580001831, + "learning_rate": 5.055482588296613e-06, + "loss": 0.7486, + "step": 18091 + }, + { + "epoch": 0.9957620122186142, + "grad_norm": 0.6076368093490601, + "learning_rate": 5.055049148159409e-06, + "loss": 0.7009, + "step": 18092 + }, + { + "epoch": 0.9958170510209697, + "grad_norm": 0.6484249830245972, + "learning_rate": 5.054615707608471e-06, + "loss": 0.7158, + "step": 18093 + }, + { + "epoch": 0.9958720898233254, + "grad_norm": 0.6629005670547485, + "learning_rate": 5.054182266647055e-06, + "loss": 0.7392, + "step": 18094 + }, + { + "epoch": 0.9959271286256811, + "grad_norm": 0.8084813952445984, + "learning_rate": 5.0537488252784205e-06, + "loss": 0.7418, + "step": 18095 + }, + { + "epoch": 0.9959821674280368, + "grad_norm": 0.9468874931335449, + "learning_rate": 5.053315383505824e-06, + "loss": 0.8699, + "step": 18096 + }, + { + "epoch": 0.9960372062303924, + "grad_norm": 0.7731055617332458, + "learning_rate": 5.052881941332522e-06, + "loss": 0.9144, + "step": 18097 + }, + { + "epoch": 0.9960922450327481, + "grad_norm": 0.7422712445259094, + "learning_rate": 5.052448498761776e-06, + "loss": 0.7075, + "step": 18098 + }, + { + "epoch": 0.9961472838351038, + "grad_norm": 0.7030091881752014, + "learning_rate": 5.05201505579684e-06, + "loss": 0.782, + "step": 18099 + }, + { + "epoch": 0.9962023226374594, + "grad_norm": 0.7698554396629333, + "learning_rate": 5.051581612440972e-06, + "loss": 0.7743, + "step": 18100 + }, + { + "epoch": 0.996257361439815, + "grad_norm": 0.6205971837043762, + "learning_rate": 5.051148168697431e-06, + "loss": 0.6862, + "step": 18101 + }, + { + "epoch": 0.9963124002421707, + "grad_norm": 0.7322680354118347, + "learning_rate": 5.050714724569474e-06, + "loss": 0.7658, + "step": 18102 + }, + { + "epoch": 0.9963674390445264, + "grad_norm": 1.250335454940796, + "learning_rate": 5.050281280060358e-06, + "loss": 0.7925, + "step": 18103 + }, + { + "epoch": 0.9964224778468821, + "grad_norm": 1.0011868476867676, + "learning_rate": 5.049847835173344e-06, + "loss": 0.6378, + "step": 18104 + }, + { + "epoch": 0.9964775166492377, + "grad_norm": 0.6508066058158875, + "learning_rate": 5.049414389911684e-06, + "loss": 0.7329, + "step": 18105 + }, + { + "epoch": 0.9965325554515934, + "grad_norm": 0.6632688641548157, + "learning_rate": 5.0489809442786406e-06, + "loss": 0.7599, + "step": 18106 + }, + { + "epoch": 0.996587594253949, + "grad_norm": 0.7015318274497986, + "learning_rate": 5.048547498277469e-06, + "loss": 0.7626, + "step": 18107 + }, + { + "epoch": 0.9966426330563047, + "grad_norm": 0.7433881759643555, + "learning_rate": 5.048114051911427e-06, + "loss": 0.7612, + "step": 18108 + }, + { + "epoch": 0.9966976718586603, + "grad_norm": 0.7714030742645264, + "learning_rate": 5.047680605183772e-06, + "loss": 0.875, + "step": 18109 + }, + { + "epoch": 0.996752710661016, + "grad_norm": 0.7467747330665588, + "learning_rate": 5.047247158097764e-06, + "loss": 0.8541, + "step": 18110 + }, + { + "epoch": 0.9968077494633717, + "grad_norm": 0.6674730181694031, + "learning_rate": 5.0468137106566586e-06, + "loss": 0.7139, + "step": 18111 + }, + { + "epoch": 0.9968627882657274, + "grad_norm": 0.6534250974655151, + "learning_rate": 5.046380262863713e-06, + "loss": 0.782, + "step": 18112 + }, + { + "epoch": 0.996917827068083, + "grad_norm": 0.6498619318008423, + "learning_rate": 5.0459468147221846e-06, + "loss": 0.7743, + "step": 18113 + }, + { + "epoch": 0.9969728658704387, + "grad_norm": 0.8043835163116455, + "learning_rate": 5.045513366235335e-06, + "loss": 0.7288, + "step": 18114 + }, + { + "epoch": 0.9970279046727943, + "grad_norm": 0.7055554389953613, + "learning_rate": 5.045079917406418e-06, + "loss": 0.7245, + "step": 18115 + }, + { + "epoch": 0.99708294347515, + "grad_norm": 0.7295365333557129, + "learning_rate": 5.044646468238692e-06, + "loss": 0.7464, + "step": 18116 + }, + { + "epoch": 0.9971379822775056, + "grad_norm": 0.6557313799858093, + "learning_rate": 5.0442130187354155e-06, + "loss": 0.6831, + "step": 18117 + }, + { + "epoch": 0.9971930210798613, + "grad_norm": 0.7435111403465271, + "learning_rate": 5.043779568899846e-06, + "loss": 0.8412, + "step": 18118 + }, + { + "epoch": 0.997248059882217, + "grad_norm": 0.6655508875846863, + "learning_rate": 5.043346118735242e-06, + "loss": 0.7027, + "step": 18119 + }, + { + "epoch": 0.9973030986845727, + "grad_norm": 0.5845352411270142, + "learning_rate": 5.042912668244858e-06, + "loss": 0.6496, + "step": 18120 + }, + { + "epoch": 0.9973581374869283, + "grad_norm": 0.8248149156570435, + "learning_rate": 5.042479217431954e-06, + "loss": 0.8128, + "step": 18121 + }, + { + "epoch": 0.9974131762892839, + "grad_norm": 0.7040210962295532, + "learning_rate": 5.042045766299789e-06, + "loss": 0.7452, + "step": 18122 + }, + { + "epoch": 0.9974682150916396, + "grad_norm": 0.6569299101829529, + "learning_rate": 5.0416123148516185e-06, + "loss": 0.7664, + "step": 18123 + }, + { + "epoch": 0.9975232538939953, + "grad_norm": 0.6373575329780579, + "learning_rate": 5.041178863090701e-06, + "loss": 0.7861, + "step": 18124 + }, + { + "epoch": 0.9975782926963509, + "grad_norm": 0.793760359287262, + "learning_rate": 5.040745411020294e-06, + "loss": 0.7909, + "step": 18125 + }, + { + "epoch": 0.9976333314987066, + "grad_norm": 0.6696873307228088, + "learning_rate": 5.0403119586436555e-06, + "loss": 0.754, + "step": 18126 + }, + { + "epoch": 0.9976883703010623, + "grad_norm": 0.6404829621315002, + "learning_rate": 5.039878505964043e-06, + "loss": 0.7401, + "step": 18127 + }, + { + "epoch": 0.9977434091034179, + "grad_norm": 0.7170735001564026, + "learning_rate": 5.039445052984715e-06, + "loss": 0.7789, + "step": 18128 + }, + { + "epoch": 0.9977984479057735, + "grad_norm": 0.7306370735168457, + "learning_rate": 5.039011599708928e-06, + "loss": 0.7786, + "step": 18129 + }, + { + "epoch": 0.9978534867081292, + "grad_norm": 0.5817254781723022, + "learning_rate": 5.0385781461399405e-06, + "loss": 0.6853, + "step": 18130 + }, + { + "epoch": 0.9979085255104849, + "grad_norm": 0.6285548210144043, + "learning_rate": 5.038144692281011e-06, + "loss": 0.7597, + "step": 18131 + }, + { + "epoch": 0.9979635643128405, + "grad_norm": 0.6597462892532349, + "learning_rate": 5.037711238135395e-06, + "loss": 0.7448, + "step": 18132 + }, + { + "epoch": 0.9980186031151962, + "grad_norm": 0.6609601378440857, + "learning_rate": 5.0372777837063505e-06, + "loss": 0.7324, + "step": 18133 + }, + { + "epoch": 0.9980736419175519, + "grad_norm": 0.6518424153327942, + "learning_rate": 5.036844328997138e-06, + "loss": 0.8015, + "step": 18134 + }, + { + "epoch": 0.9981286807199076, + "grad_norm": 0.7128679156303406, + "learning_rate": 5.036410874011013e-06, + "loss": 0.7436, + "step": 18135 + }, + { + "epoch": 0.9981837195222631, + "grad_norm": 0.7561799883842468, + "learning_rate": 5.0359774187512324e-06, + "loss": 0.8486, + "step": 18136 + }, + { + "epoch": 0.9982387583246188, + "grad_norm": 0.702702522277832, + "learning_rate": 5.035543963221056e-06, + "loss": 0.7645, + "step": 18137 + }, + { + "epoch": 0.9982937971269745, + "grad_norm": 0.636001467704773, + "learning_rate": 5.0351105074237405e-06, + "loss": 0.6667, + "step": 18138 + }, + { + "epoch": 0.9983488359293302, + "grad_norm": 0.6555122137069702, + "learning_rate": 5.034677051362545e-06, + "loss": 0.6966, + "step": 18139 + }, + { + "epoch": 0.9984038747316858, + "grad_norm": 0.7465553879737854, + "learning_rate": 5.034243595040724e-06, + "loss": 0.8167, + "step": 18140 + }, + { + "epoch": 0.9984589135340415, + "grad_norm": 0.7388782501220703, + "learning_rate": 5.033810138461539e-06, + "loss": 0.7637, + "step": 18141 + }, + { + "epoch": 0.9985139523363972, + "grad_norm": 0.7342116832733154, + "learning_rate": 5.033376681628245e-06, + "loss": 0.6742, + "step": 18142 + }, + { + "epoch": 0.9985689911387529, + "grad_norm": 0.6627763509750366, + "learning_rate": 5.0329432245441016e-06, + "loss": 0.6908, + "step": 18143 + }, + { + "epoch": 0.9986240299411084, + "grad_norm": 0.6392931938171387, + "learning_rate": 5.032509767212366e-06, + "loss": 0.6588, + "step": 18144 + }, + { + "epoch": 0.9986790687434641, + "grad_norm": 0.7912692427635193, + "learning_rate": 5.032076309636294e-06, + "loss": 0.8287, + "step": 18145 + }, + { + "epoch": 0.9987341075458198, + "grad_norm": 0.6602104306221008, + "learning_rate": 5.031642851819146e-06, + "loss": 0.7309, + "step": 18146 + }, + { + "epoch": 0.9987891463481755, + "grad_norm": 0.7000023126602173, + "learning_rate": 5.0312093937641794e-06, + "loss": 0.6837, + "step": 18147 + }, + { + "epoch": 0.9988441851505311, + "grad_norm": 0.6883202791213989, + "learning_rate": 5.03077593547465e-06, + "loss": 0.7644, + "step": 18148 + }, + { + "epoch": 0.9988992239528868, + "grad_norm": 0.6720017194747925, + "learning_rate": 5.030342476953817e-06, + "loss": 0.7135, + "step": 18149 + }, + { + "epoch": 0.9989542627552425, + "grad_norm": 0.7414998412132263, + "learning_rate": 5.029909018204939e-06, + "loss": 0.7674, + "step": 18150 + }, + { + "epoch": 0.9990093015575982, + "grad_norm": 0.7004234790802002, + "learning_rate": 5.029475559231273e-06, + "loss": 0.8119, + "step": 18151 + }, + { + "epoch": 0.9990643403599537, + "grad_norm": 0.5972886681556702, + "learning_rate": 5.029042100036077e-06, + "loss": 0.6716, + "step": 18152 + }, + { + "epoch": 0.9991193791623094, + "grad_norm": 0.6771349906921387, + "learning_rate": 5.028608640622606e-06, + "loss": 0.7743, + "step": 18153 + }, + { + "epoch": 0.9991744179646651, + "grad_norm": 0.6927672028541565, + "learning_rate": 5.0281751809941225e-06, + "loss": 0.7718, + "step": 18154 + }, + { + "epoch": 0.9992294567670208, + "grad_norm": 0.8227388262748718, + "learning_rate": 5.0277417211538815e-06, + "loss": 0.823, + "step": 18155 + }, + { + "epoch": 0.9992844955693764, + "grad_norm": 0.6523411870002747, + "learning_rate": 5.027308261105141e-06, + "loss": 0.6624, + "step": 18156 + }, + { + "epoch": 0.9993395343717321, + "grad_norm": 0.7758201956748962, + "learning_rate": 5.026874800851157e-06, + "loss": 0.8567, + "step": 18157 + }, + { + "epoch": 0.9993945731740878, + "grad_norm": 0.6323235630989075, + "learning_rate": 5.026441340395192e-06, + "loss": 0.7494, + "step": 18158 + }, + { + "epoch": 0.9994496119764434, + "grad_norm": 0.6327700018882751, + "learning_rate": 5.0260078797405e-06, + "loss": 0.67, + "step": 18159 + }, + { + "epoch": 0.999504650778799, + "grad_norm": 0.6610170602798462, + "learning_rate": 5.025574418890339e-06, + "loss": 0.7279, + "step": 18160 + }, + { + "epoch": 0.9995596895811547, + "grad_norm": 0.677026629447937, + "learning_rate": 5.025140957847967e-06, + "loss": 0.7256, + "step": 18161 + }, + { + "epoch": 0.9996147283835104, + "grad_norm": 0.8088963031768799, + "learning_rate": 5.024707496616643e-06, + "loss": 0.7456, + "step": 18162 + }, + { + "epoch": 0.9996697671858661, + "grad_norm": 0.6300736665725708, + "learning_rate": 5.024274035199626e-06, + "loss": 0.758, + "step": 18163 + }, + { + "epoch": 0.9997248059882217, + "grad_norm": 0.6184163093566895, + "learning_rate": 5.023840573600169e-06, + "loss": 0.6011, + "step": 18164 + }, + { + "epoch": 0.9997798447905774, + "grad_norm": 0.7885013222694397, + "learning_rate": 5.023407111821534e-06, + "loss": 0.7626, + "step": 18165 + }, + { + "epoch": 0.999834883592933, + "grad_norm": 0.8335184454917908, + "learning_rate": 5.022973649866978e-06, + "loss": 0.7157, + "step": 18166 + }, + { + "epoch": 0.9998899223952887, + "grad_norm": 0.6559926867485046, + "learning_rate": 5.022540187739757e-06, + "loss": 0.6273, + "step": 18167 + }, + { + "epoch": 0.9999449611976443, + "grad_norm": 0.6130450963973999, + "learning_rate": 5.022106725443131e-06, + "loss": 0.6352, + "step": 18168 + }, + { + "epoch": 1.0, + "grad_norm": 0.7267221212387085, + "learning_rate": 5.0216732629803554e-06, + "loss": 0.6457, + "step": 18169 + }, + { + "epoch": 1.0000550388023557, + "grad_norm": 0.8285889029502869, + "learning_rate": 5.021239800354691e-06, + "loss": 0.7876, + "step": 18170 + }, + { + "epoch": 1.0001100776047114, + "grad_norm": 0.7533310651779175, + "learning_rate": 5.020806337569392e-06, + "loss": 0.7404, + "step": 18171 + }, + { + "epoch": 1.000165116407067, + "grad_norm": 0.7649720311164856, + "learning_rate": 5.02037287462772e-06, + "loss": 0.7648, + "step": 18172 + }, + { + "epoch": 1.0002201552094225, + "grad_norm": 0.687062680721283, + "learning_rate": 5.019939411532929e-06, + "loss": 0.7159, + "step": 18173 + }, + { + "epoch": 1.0002751940117782, + "grad_norm": 0.6438779830932617, + "learning_rate": 5.01950594828828e-06, + "loss": 0.6121, + "step": 18174 + }, + { + "epoch": 1.000330232814134, + "grad_norm": 0.7010402679443359, + "learning_rate": 5.0190724848970296e-06, + "loss": 0.8288, + "step": 18175 + }, + { + "epoch": 1.0003852716164896, + "grad_norm": 0.6656293869018555, + "learning_rate": 5.018639021362436e-06, + "loss": 0.7333, + "step": 18176 + }, + { + "epoch": 1.0004403104188453, + "grad_norm": 0.7373145222663879, + "learning_rate": 5.018205557687756e-06, + "loss": 0.7492, + "step": 18177 + }, + { + "epoch": 1.000495349221201, + "grad_norm": 0.6980772018432617, + "learning_rate": 5.017772093876248e-06, + "loss": 0.7217, + "step": 18178 + }, + { + "epoch": 1.0005503880235567, + "grad_norm": 0.6385942101478577, + "learning_rate": 5.01733862993117e-06, + "loss": 0.6618, + "step": 18179 + }, + { + "epoch": 1.0006054268259124, + "grad_norm": 0.6605677008628845, + "learning_rate": 5.016905165855779e-06, + "loss": 0.7343, + "step": 18180 } ], "logging_steps": 1, @@ -108197,7 +127286,7 @@ "attributes": {} } }, - "total_flos": 4.56027501400739e+19, + "total_flos": 5.3650294282439885e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null