|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9945945945945946, |
|
"eval_steps": 500, |
|
"global_step": 115, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008648648648648649, |
|
"grad_norm": 2.249741315841675, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8319, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.017297297297297298, |
|
"grad_norm": 2.1813502311706543, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4027, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.025945945945945945, |
|
"grad_norm": 0.8601759672164917, |
|
"learning_rate": 0.00019823008849557524, |
|
"loss": 1.1102, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.034594594594594595, |
|
"grad_norm": 1.7297605276107788, |
|
"learning_rate": 0.00019646017699115044, |
|
"loss": 1.3774, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.043243243243243246, |
|
"grad_norm": 1.0936262607574463, |
|
"learning_rate": 0.00019469026548672567, |
|
"loss": 0.895, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05189189189189189, |
|
"grad_norm": 0.6946480870246887, |
|
"learning_rate": 0.00019292035398230087, |
|
"loss": 0.7451, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06054054054054054, |
|
"grad_norm": 0.45863592624664307, |
|
"learning_rate": 0.00019115044247787613, |
|
"loss": 0.876, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.06918918918918919, |
|
"grad_norm": 0.5447478890419006, |
|
"learning_rate": 0.00018938053097345133, |
|
"loss": 0.7719, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07783783783783783, |
|
"grad_norm": 0.45514124631881714, |
|
"learning_rate": 0.00018761061946902656, |
|
"loss": 0.5759, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08648648648648649, |
|
"grad_norm": 0.4590395987033844, |
|
"learning_rate": 0.0001858407079646018, |
|
"loss": 0.5838, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09513513513513513, |
|
"grad_norm": 0.5425634384155273, |
|
"learning_rate": 0.000184070796460177, |
|
"loss": 0.6641, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.10378378378378378, |
|
"grad_norm": 1.0379027128219604, |
|
"learning_rate": 0.00018230088495575222, |
|
"loss": 0.9623, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.11243243243243244, |
|
"grad_norm": 0.5286022424697876, |
|
"learning_rate": 0.00018053097345132742, |
|
"loss": 0.4761, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.12108108108108108, |
|
"grad_norm": 0.6451830267906189, |
|
"learning_rate": 0.00017876106194690265, |
|
"loss": 0.547, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.12972972972972974, |
|
"grad_norm": 0.6369953751564026, |
|
"learning_rate": 0.0001769911504424779, |
|
"loss": 0.5872, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.13837837837837838, |
|
"grad_norm": 0.4720052182674408, |
|
"learning_rate": 0.0001752212389380531, |
|
"loss": 0.3248, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.14702702702702702, |
|
"grad_norm": 0.5918360352516174, |
|
"learning_rate": 0.00017345132743362834, |
|
"loss": 0.6277, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.15567567567567567, |
|
"grad_norm": 0.5242601037025452, |
|
"learning_rate": 0.00017168141592920354, |
|
"loss": 0.5645, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1643243243243243, |
|
"grad_norm": 0.474292129278183, |
|
"learning_rate": 0.00016991150442477877, |
|
"loss": 0.2115, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.17297297297297298, |
|
"grad_norm": 0.6523647904396057, |
|
"learning_rate": 0.000168141592920354, |
|
"loss": 0.5803, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18162162162162163, |
|
"grad_norm": 0.521297812461853, |
|
"learning_rate": 0.0001663716814159292, |
|
"loss": 0.4483, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.19027027027027027, |
|
"grad_norm": 0.5689568519592285, |
|
"learning_rate": 0.00016460176991150443, |
|
"loss": 0.6231, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1989189189189189, |
|
"grad_norm": 0.4570567011833191, |
|
"learning_rate": 0.00016283185840707966, |
|
"loss": 0.2368, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.20756756756756756, |
|
"grad_norm": 0.414307564496994, |
|
"learning_rate": 0.0001610619469026549, |
|
"loss": 0.4674, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.21621621621621623, |
|
"grad_norm": 0.5027227997779846, |
|
"learning_rate": 0.0001592920353982301, |
|
"loss": 0.3558, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.22486486486486487, |
|
"grad_norm": 0.4441507160663605, |
|
"learning_rate": 0.00015752212389380532, |
|
"loss": 0.437, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.23351351351351352, |
|
"grad_norm": 0.4098701477050781, |
|
"learning_rate": 0.00015575221238938055, |
|
"loss": 0.3553, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.24216216216216216, |
|
"grad_norm": 0.3602244257926941, |
|
"learning_rate": 0.00015398230088495575, |
|
"loss": 0.3689, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.2508108108108108, |
|
"grad_norm": 0.4340718984603882, |
|
"learning_rate": 0.00015221238938053098, |
|
"loss": 0.318, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.2594594594594595, |
|
"grad_norm": 0.44470590353012085, |
|
"learning_rate": 0.00015044247787610618, |
|
"loss": 0.4992, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2681081081081081, |
|
"grad_norm": 0.43699413537979126, |
|
"learning_rate": 0.00014867256637168144, |
|
"loss": 0.3362, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.27675675675675676, |
|
"grad_norm": 0.4950752258300781, |
|
"learning_rate": 0.00014690265486725664, |
|
"loss": 0.4464, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.28540540540540543, |
|
"grad_norm": 0.4312315881252289, |
|
"learning_rate": 0.00014513274336283187, |
|
"loss": 0.4786, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.29405405405405405, |
|
"grad_norm": 0.45234543085098267, |
|
"learning_rate": 0.0001433628318584071, |
|
"loss": 0.5572, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3027027027027027, |
|
"grad_norm": 0.4373219311237335, |
|
"learning_rate": 0.0001415929203539823, |
|
"loss": 0.3873, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.31135135135135134, |
|
"grad_norm": 0.35862988233566284, |
|
"learning_rate": 0.00013982300884955753, |
|
"loss": 0.2902, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.41014787554740906, |
|
"learning_rate": 0.00013805309734513276, |
|
"loss": 0.3806, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3286486486486486, |
|
"grad_norm": 0.4181463420391083, |
|
"learning_rate": 0.00013628318584070796, |
|
"loss": 0.3036, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.3372972972972973, |
|
"grad_norm": 0.3663095235824585, |
|
"learning_rate": 0.00013451327433628321, |
|
"loss": 0.1979, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.34594594594594597, |
|
"grad_norm": 0.46295005083084106, |
|
"learning_rate": 0.00013274336283185842, |
|
"loss": 0.4204, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3545945945945946, |
|
"grad_norm": 0.39596325159072876, |
|
"learning_rate": 0.00013097345132743365, |
|
"loss": 0.3512, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.36324324324324325, |
|
"grad_norm": 0.7628335952758789, |
|
"learning_rate": 0.00012920353982300885, |
|
"loss": 0.4965, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.37189189189189187, |
|
"grad_norm": 0.5216770172119141, |
|
"learning_rate": 0.00012743362831858408, |
|
"loss": 0.4658, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.38054054054054054, |
|
"grad_norm": 0.38578447699546814, |
|
"learning_rate": 0.0001256637168141593, |
|
"loss": 0.2661, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.3891891891891892, |
|
"grad_norm": 0.2811882197856903, |
|
"learning_rate": 0.0001238938053097345, |
|
"loss": 0.1545, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.3978378378378378, |
|
"grad_norm": 0.3812131881713867, |
|
"learning_rate": 0.00012212389380530974, |
|
"loss": 0.3295, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.4064864864864865, |
|
"grad_norm": 0.3791070878505707, |
|
"learning_rate": 0.00012035398230088497, |
|
"loss": 0.2472, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.4151351351351351, |
|
"grad_norm": 0.38515138626098633, |
|
"learning_rate": 0.0001185840707964602, |
|
"loss": 0.4042, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.4237837837837838, |
|
"grad_norm": 0.5093116164207458, |
|
"learning_rate": 0.00011681415929203541, |
|
"loss": 0.8376, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.43243243243243246, |
|
"grad_norm": 0.2971178889274597, |
|
"learning_rate": 0.00011504424778761063, |
|
"loss": 0.4082, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4410810810810811, |
|
"grad_norm": 0.30018818378448486, |
|
"learning_rate": 0.00011327433628318584, |
|
"loss": 0.129, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.44972972972972974, |
|
"grad_norm": 0.4631483256816864, |
|
"learning_rate": 0.00011150442477876106, |
|
"loss": 0.3752, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.45837837837837836, |
|
"grad_norm": 0.3890452980995178, |
|
"learning_rate": 0.00010973451327433629, |
|
"loss": 0.4054, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.46702702702702703, |
|
"grad_norm": 0.3566686511039734, |
|
"learning_rate": 0.0001079646017699115, |
|
"loss": 0.2452, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.4756756756756757, |
|
"grad_norm": 0.4903372526168823, |
|
"learning_rate": 0.00010619469026548674, |
|
"loss": 0.4505, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.4843243243243243, |
|
"grad_norm": 0.3836239278316498, |
|
"learning_rate": 0.00010442477876106196, |
|
"loss": 0.3952, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.492972972972973, |
|
"grad_norm": 0.42047417163848877, |
|
"learning_rate": 0.00010265486725663717, |
|
"loss": 0.5074, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5016216216216216, |
|
"grad_norm": 0.24409635365009308, |
|
"learning_rate": 0.00010088495575221239, |
|
"loss": 0.1389, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5102702702702703, |
|
"grad_norm": 0.3819220960140228, |
|
"learning_rate": 9.911504424778762e-05, |
|
"loss": 0.3945, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.518918918918919, |
|
"grad_norm": 0.31148406863212585, |
|
"learning_rate": 9.734513274336283e-05, |
|
"loss": 0.5203, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5275675675675676, |
|
"grad_norm": 0.3157011866569519, |
|
"learning_rate": 9.557522123893806e-05, |
|
"loss": 0.262, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5362162162162162, |
|
"grad_norm": 0.40180379152297974, |
|
"learning_rate": 9.380530973451328e-05, |
|
"loss": 0.2404, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.5448648648648649, |
|
"grad_norm": 0.4064180552959442, |
|
"learning_rate": 9.20353982300885e-05, |
|
"loss": 0.6118, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5535135135135135, |
|
"grad_norm": 0.3912467956542969, |
|
"learning_rate": 9.026548672566371e-05, |
|
"loss": 0.271, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.5621621621621622, |
|
"grad_norm": 0.31059980392456055, |
|
"learning_rate": 8.849557522123895e-05, |
|
"loss": 0.2373, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5708108108108109, |
|
"grad_norm": 0.30928152799606323, |
|
"learning_rate": 8.672566371681417e-05, |
|
"loss": 0.4169, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.5794594594594594, |
|
"grad_norm": 0.40631791949272156, |
|
"learning_rate": 8.495575221238938e-05, |
|
"loss": 0.4175, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.5881081081081081, |
|
"grad_norm": 0.40440961718559265, |
|
"learning_rate": 8.31858407079646e-05, |
|
"loss": 0.3269, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5967567567567568, |
|
"grad_norm": 0.4534294009208679, |
|
"learning_rate": 8.141592920353983e-05, |
|
"loss": 0.2242, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.6054054054054054, |
|
"grad_norm": 0.41317978501319885, |
|
"learning_rate": 7.964601769911504e-05, |
|
"loss": 0.2633, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.614054054054054, |
|
"grad_norm": 0.272535115480423, |
|
"learning_rate": 7.787610619469027e-05, |
|
"loss": 0.1455, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.6227027027027027, |
|
"grad_norm": 0.4280416667461395, |
|
"learning_rate": 7.610619469026549e-05, |
|
"loss": 0.5289, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6313513513513513, |
|
"grad_norm": 0.4870530664920807, |
|
"learning_rate": 7.433628318584072e-05, |
|
"loss": 0.5633, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.38074707984924316, |
|
"learning_rate": 7.256637168141593e-05, |
|
"loss": 0.4738, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.6486486486486487, |
|
"grad_norm": 0.32775411009788513, |
|
"learning_rate": 7.079646017699115e-05, |
|
"loss": 0.2764, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6572972972972972, |
|
"grad_norm": 0.3663316071033478, |
|
"learning_rate": 6.902654867256638e-05, |
|
"loss": 0.4794, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.6659459459459459, |
|
"grad_norm": 0.36854031682014465, |
|
"learning_rate": 6.725663716814161e-05, |
|
"loss": 0.1809, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.6745945945945946, |
|
"grad_norm": 0.37296342849731445, |
|
"learning_rate": 6.548672566371682e-05, |
|
"loss": 0.4067, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.6832432432432433, |
|
"grad_norm": 0.4202044606208801, |
|
"learning_rate": 6.371681415929204e-05, |
|
"loss": 0.2752, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.6918918918918919, |
|
"grad_norm": 0.29250282049179077, |
|
"learning_rate": 6.194690265486725e-05, |
|
"loss": 0.1461, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7005405405405405, |
|
"grad_norm": 0.37763354182243347, |
|
"learning_rate": 6.017699115044248e-05, |
|
"loss": 0.2817, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.7091891891891892, |
|
"grad_norm": 0.30031171441078186, |
|
"learning_rate": 5.8407079646017705e-05, |
|
"loss": 0.1572, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7178378378378378, |
|
"grad_norm": 0.4519175887107849, |
|
"learning_rate": 5.663716814159292e-05, |
|
"loss": 0.3046, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.7264864864864865, |
|
"grad_norm": 0.3103352189064026, |
|
"learning_rate": 5.486725663716814e-05, |
|
"loss": 0.1347, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.7351351351351352, |
|
"grad_norm": 0.7960600852966309, |
|
"learning_rate": 5.309734513274337e-05, |
|
"loss": 0.3168, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7437837837837837, |
|
"grad_norm": 0.3281419277191162, |
|
"learning_rate": 5.132743362831859e-05, |
|
"loss": 0.2045, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.7524324324324324, |
|
"grad_norm": 0.35785752534866333, |
|
"learning_rate": 4.955752212389381e-05, |
|
"loss": 0.4077, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.7610810810810811, |
|
"grad_norm": 0.37461650371551514, |
|
"learning_rate": 4.778761061946903e-05, |
|
"loss": 0.3227, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.7697297297297298, |
|
"grad_norm": 0.3365744352340698, |
|
"learning_rate": 4.601769911504425e-05, |
|
"loss": 0.2306, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.7783783783783784, |
|
"grad_norm": 0.29543980956077576, |
|
"learning_rate": 4.4247787610619477e-05, |
|
"loss": 0.3661, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.787027027027027, |
|
"grad_norm": 0.3135324716567993, |
|
"learning_rate": 4.247787610619469e-05, |
|
"loss": 0.2503, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.7956756756756757, |
|
"grad_norm": 0.23556429147720337, |
|
"learning_rate": 4.0707964601769914e-05, |
|
"loss": 0.1044, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.8043243243243243, |
|
"grad_norm": 0.2718769907951355, |
|
"learning_rate": 3.893805309734514e-05, |
|
"loss": 0.1471, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.812972972972973, |
|
"grad_norm": 0.25528448820114136, |
|
"learning_rate": 3.716814159292036e-05, |
|
"loss": 0.1126, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.8216216216216217, |
|
"grad_norm": 0.514164388179779, |
|
"learning_rate": 3.5398230088495574e-05, |
|
"loss": 0.3423, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8302702702702702, |
|
"grad_norm": 0.33162716031074524, |
|
"learning_rate": 3.3628318584070804e-05, |
|
"loss": 0.3637, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.8389189189189189, |
|
"grad_norm": 0.25161704421043396, |
|
"learning_rate": 3.185840707964602e-05, |
|
"loss": 0.1284, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.8475675675675676, |
|
"grad_norm": 0.32825589179992676, |
|
"learning_rate": 3.008849557522124e-05, |
|
"loss": 0.2171, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.8562162162162162, |
|
"grad_norm": 0.23435255885124207, |
|
"learning_rate": 2.831858407079646e-05, |
|
"loss": 0.16, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.8648648648648649, |
|
"grad_norm": 0.2661581337451935, |
|
"learning_rate": 2.6548672566371686e-05, |
|
"loss": 0.2421, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8735135135135135, |
|
"grad_norm": 0.2724602222442627, |
|
"learning_rate": 2.4778761061946905e-05, |
|
"loss": 0.1246, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.8821621621621621, |
|
"grad_norm": 0.47894561290740967, |
|
"learning_rate": 2.3008849557522124e-05, |
|
"loss": 0.4472, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.8908108108108108, |
|
"grad_norm": 0.3064163327217102, |
|
"learning_rate": 2.1238938053097346e-05, |
|
"loss": 0.2987, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.8994594594594595, |
|
"grad_norm": 0.4226900637149811, |
|
"learning_rate": 1.946902654867257e-05, |
|
"loss": 0.4185, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.9081081081081082, |
|
"grad_norm": 0.34745219349861145, |
|
"learning_rate": 1.7699115044247787e-05, |
|
"loss": 0.2572, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9167567567567567, |
|
"grad_norm": 0.35236531496047974, |
|
"learning_rate": 1.592920353982301e-05, |
|
"loss": 0.3427, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.9254054054054054, |
|
"grad_norm": 0.37095391750335693, |
|
"learning_rate": 1.415929203539823e-05, |
|
"loss": 0.4018, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.9340540540540541, |
|
"grad_norm": 0.3331229090690613, |
|
"learning_rate": 1.2389380530973452e-05, |
|
"loss": 0.2038, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.9427027027027027, |
|
"grad_norm": 0.2652183175086975, |
|
"learning_rate": 1.0619469026548673e-05, |
|
"loss": 0.1072, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.9513513513513514, |
|
"grad_norm": 0.29123690724372864, |
|
"learning_rate": 8.849557522123894e-06, |
|
"loss": 0.1406, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.3317340612411499, |
|
"learning_rate": 7.079646017699115e-06, |
|
"loss": 0.2202, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.9686486486486486, |
|
"grad_norm": 0.47986647486686707, |
|
"learning_rate": 5.3097345132743365e-06, |
|
"loss": 0.3464, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.9772972972972973, |
|
"grad_norm": 0.2612822949886322, |
|
"learning_rate": 3.5398230088495575e-06, |
|
"loss": 0.1271, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.985945945945946, |
|
"grad_norm": 0.26845863461494446, |
|
"learning_rate": 1.7699115044247788e-06, |
|
"loss": 0.1044, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.9945945945945946, |
|
"grad_norm": 0.2526237368583679, |
|
"learning_rate": 0.0, |
|
"loss": 0.1158, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.9945945945945946, |
|
"step": 115, |
|
"total_flos": 1.3431114641260646e+17, |
|
"train_loss": 0.4029887131374815, |
|
"train_runtime": 1125.7865, |
|
"train_samples_per_second": 0.822, |
|
"train_steps_per_second": 0.102 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 115, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3431114641260646e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|