|
{ |
|
"best_metric": 0.2019248753786087, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 3.0247349823321557, |
|
"eval_steps": 25, |
|
"global_step": 107, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.028268551236749116, |
|
"grad_norm": 0.1496291607618332, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2719, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.028268551236749116, |
|
"eval_loss": 0.33758026361465454, |
|
"eval_runtime": 2.2677, |
|
"eval_samples_per_second": 22.049, |
|
"eval_steps_per_second": 5.733, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05653710247349823, |
|
"grad_norm": 0.19203834235668182, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3043, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.08480565371024736, |
|
"grad_norm": 0.1976221799850464, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.3131, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.11307420494699646, |
|
"grad_norm": 0.18888598680496216, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3167, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.1413427561837456, |
|
"grad_norm": 0.16196908056735992, |
|
"learning_rate": 9.997906976305083e-05, |
|
"loss": 0.3673, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1696113074204947, |
|
"grad_norm": 0.13969330489635468, |
|
"learning_rate": 9.991629852219523e-05, |
|
"loss": 0.3187, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1978798586572438, |
|
"grad_norm": 0.16544975340366364, |
|
"learning_rate": 9.981174466929743e-05, |
|
"loss": 0.3186, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.22614840989399293, |
|
"grad_norm": 0.1924961507320404, |
|
"learning_rate": 9.966550546377587e-05, |
|
"loss": 0.3365, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.254416961130742, |
|
"grad_norm": 0.12072353810071945, |
|
"learning_rate": 9.947771694212933e-05, |
|
"loss": 0.2508, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.2826855123674912, |
|
"grad_norm": 0.12432608753442764, |
|
"learning_rate": 9.924855379139136e-05, |
|
"loss": 0.2776, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.31095406360424027, |
|
"grad_norm": 0.10128191113471985, |
|
"learning_rate": 9.897822918663062e-05, |
|
"loss": 0.2447, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.3392226148409894, |
|
"grad_norm": 0.08782458305358887, |
|
"learning_rate": 9.866699459264848e-05, |
|
"loss": 0.2468, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.3674911660777385, |
|
"grad_norm": 0.07989340275526047, |
|
"learning_rate": 9.831513953005823e-05, |
|
"loss": 0.254, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.3957597173144876, |
|
"grad_norm": 0.07268672436475754, |
|
"learning_rate": 9.792299130596348e-05, |
|
"loss": 0.2531, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.42402826855123676, |
|
"grad_norm": 0.0746292918920517, |
|
"learning_rate": 9.749091470948645e-05, |
|
"loss": 0.2708, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.45229681978798586, |
|
"grad_norm": 0.07829014211893082, |
|
"learning_rate": 9.70193116724291e-05, |
|
"loss": 0.2828, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.48056537102473496, |
|
"grad_norm": 0.09487626701593399, |
|
"learning_rate": 9.650862089538307e-05, |
|
"loss": 0.3013, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.508833922261484, |
|
"grad_norm": 0.06753429770469666, |
|
"learning_rate": 9.595931743963597e-05, |
|
"loss": 0.2171, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.5371024734982333, |
|
"grad_norm": 0.07974890619516373, |
|
"learning_rate": 9.537191228525384e-05, |
|
"loss": 0.2098, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.5653710247349824, |
|
"grad_norm": 0.07332316040992737, |
|
"learning_rate": 9.474695185575073e-05, |
|
"loss": 0.2234, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5936395759717314, |
|
"grad_norm": 0.06308283656835556, |
|
"learning_rate": 9.408501750978769e-05, |
|
"loss": 0.2219, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.6219081272084805, |
|
"grad_norm": 0.07921251654624939, |
|
"learning_rate": 9.338672500037388e-05, |
|
"loss": 0.237, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.6501766784452296, |
|
"grad_norm": 0.0856022983789444, |
|
"learning_rate": 9.26527239020729e-05, |
|
"loss": 0.2579, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.6784452296819788, |
|
"grad_norm": 0.0858432948589325, |
|
"learning_rate": 9.188369700674736e-05, |
|
"loss": 0.2633, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.7067137809187279, |
|
"grad_norm": 0.08091321587562561, |
|
"learning_rate": 9.108035968840348e-05, |
|
"loss": 0.2571, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.7067137809187279, |
|
"eval_loss": 0.22624744474887848, |
|
"eval_runtime": 2.3489, |
|
"eval_samples_per_second": 21.286, |
|
"eval_steps_per_second": 5.534, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.734982332155477, |
|
"grad_norm": 0.09223099052906036, |
|
"learning_rate": 9.024345923772673e-05, |
|
"loss": 0.2829, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.7632508833922261, |
|
"grad_norm": 0.06381526589393616, |
|
"learning_rate": 8.937377416692753e-05, |
|
"loss": 0.1954, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.7915194346289752, |
|
"grad_norm": 0.0784890204668045, |
|
"learning_rate": 8.847211348554383e-05, |
|
"loss": 0.2135, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.8197879858657244, |
|
"grad_norm": 0.08194228261709213, |
|
"learning_rate": 8.753931594787381e-05, |
|
"loss": 0.2163, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.8480565371024735, |
|
"grad_norm": 0.07212778180837631, |
|
"learning_rate": 8.65762492727392e-05, |
|
"loss": 0.2276, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8763250883392226, |
|
"grad_norm": 0.07101049274206161, |
|
"learning_rate": 8.558380933630481e-05, |
|
"loss": 0.2567, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.9045936395759717, |
|
"grad_norm": 0.06946881860494614, |
|
"learning_rate": 8.456291933870523e-05, |
|
"loss": 0.2489, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.9328621908127208, |
|
"grad_norm": 0.08646480739116669, |
|
"learning_rate": 8.351452894525369e-05, |
|
"loss": 0.2449, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.9611307420494699, |
|
"grad_norm": 0.09382244199514389, |
|
"learning_rate": 8.243961340303246e-05, |
|
"loss": 0.2713, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.9893992932862191, |
|
"grad_norm": 0.10727506130933762, |
|
"learning_rate": 8.13391726336859e-05, |
|
"loss": 0.2837, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.017667844522968, |
|
"grad_norm": 0.11025042831897736, |
|
"learning_rate": 8.021423030326076e-05, |
|
"loss": 0.3322, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.0459363957597174, |
|
"grad_norm": 0.05836005136370659, |
|
"learning_rate": 7.906583286995835e-05, |
|
"loss": 0.1968, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.0742049469964665, |
|
"grad_norm": 0.06407664716243744, |
|
"learning_rate": 7.789504861068493e-05, |
|
"loss": 0.1979, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.1024734982332156, |
|
"grad_norm": 0.07625511288642883, |
|
"learning_rate": 7.670296662730553e-05, |
|
"loss": 0.2166, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.1307420494699647, |
|
"grad_norm": 0.0838479995727539, |
|
"learning_rate": 7.54906958335257e-05, |
|
"loss": 0.2416, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.1590106007067138, |
|
"grad_norm": 0.08144625276327133, |
|
"learning_rate": 7.425936392334369e-05, |
|
"loss": 0.2472, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.187279151943463, |
|
"grad_norm": 0.06474898755550385, |
|
"learning_rate": 7.301011632203251e-05, |
|
"loss": 0.214, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.215547703180212, |
|
"grad_norm": 0.0715680941939354, |
|
"learning_rate": 7.17441151206279e-05, |
|
"loss": 0.2439, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.243816254416961, |
|
"grad_norm": 0.0650620236992836, |
|
"learning_rate": 7.046253799491311e-05, |
|
"loss": 0.1902, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.2720848056537102, |
|
"grad_norm": 0.12278559058904648, |
|
"learning_rate": 6.916657710990633e-05, |
|
"loss": 0.2531, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.3003533568904593, |
|
"grad_norm": 0.07810261845588684, |
|
"learning_rate": 6.785743801086981e-05, |
|
"loss": 0.2229, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.3286219081272086, |
|
"grad_norm": 0.06839544326066971, |
|
"learning_rate": 6.653633850187212e-05, |
|
"loss": 0.2014, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.3568904593639575, |
|
"grad_norm": 0.07193570584058762, |
|
"learning_rate": 6.520450751294685e-05, |
|
"loss": 0.2103, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.3851590106007068, |
|
"grad_norm": 0.09183719009160995, |
|
"learning_rate": 6.386318395690179e-05, |
|
"loss": 0.2542, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.4134275618374559, |
|
"grad_norm": 0.09221762418746948, |
|
"learning_rate": 6.25136155768415e-05, |
|
"loss": 0.2299, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.4134275618374559, |
|
"eval_loss": 0.21029320359230042, |
|
"eval_runtime": 2.4406, |
|
"eval_samples_per_second": 20.487, |
|
"eval_steps_per_second": 5.327, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.441696113074205, |
|
"grad_norm": 0.10233300924301147, |
|
"learning_rate": 6.115705778547597e-05, |
|
"loss": 0.238, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.469964664310954, |
|
"grad_norm": 0.08884930610656738, |
|
"learning_rate": 5.979477249729443e-05, |
|
"loss": 0.2443, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.4982332155477032, |
|
"grad_norm": 0.06135065108537674, |
|
"learning_rate": 5.842802695469132e-05, |
|
"loss": 0.1793, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.5265017667844523, |
|
"grad_norm": 0.0772595927119255, |
|
"learning_rate": 5.705809254913577e-05, |
|
"loss": 0.2317, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.5547703180212014, |
|
"grad_norm": 0.07231206446886063, |
|
"learning_rate": 5.568624363848167e-05, |
|
"loss": 0.1857, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.5830388692579507, |
|
"grad_norm": 0.07848239690065384, |
|
"learning_rate": 5.431375636151834e-05, |
|
"loss": 0.2049, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.6113074204946995, |
|
"grad_norm": 0.07845490425825119, |
|
"learning_rate": 5.294190745086426e-05, |
|
"loss": 0.2081, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.6395759717314489, |
|
"grad_norm": 0.0769682452082634, |
|
"learning_rate": 5.15719730453087e-05, |
|
"loss": 0.2242, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.6678445229681977, |
|
"grad_norm": 0.07719448208808899, |
|
"learning_rate": 5.020522750270559e-05, |
|
"loss": 0.2138, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.696113074204947, |
|
"grad_norm": 0.08276604861021042, |
|
"learning_rate": 4.884294221452406e-05, |
|
"loss": 0.2339, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.7243816254416962, |
|
"grad_norm": 0.09187748283147812, |
|
"learning_rate": 4.7486384423158514e-05, |
|
"loss": 0.2585, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.7526501766784452, |
|
"grad_norm": 0.06316478550434113, |
|
"learning_rate": 4.613681604309824e-05, |
|
"loss": 0.1936, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.7809187279151943, |
|
"grad_norm": 0.06568682938814163, |
|
"learning_rate": 4.479549248705316e-05, |
|
"loss": 0.1895, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.8091872791519434, |
|
"grad_norm": 0.07526635378599167, |
|
"learning_rate": 4.346366149812791e-05, |
|
"loss": 0.1954, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.8374558303886925, |
|
"grad_norm": 0.07216266542673111, |
|
"learning_rate": 4.2142561989130204e-05, |
|
"loss": 0.1972, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.8657243816254416, |
|
"grad_norm": 0.07032765448093414, |
|
"learning_rate": 4.0833422890093684e-05, |
|
"loss": 0.1875, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.893992932862191, |
|
"grad_norm": 0.0753110945224762, |
|
"learning_rate": 3.9537462005086936e-05, |
|
"loss": 0.2326, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.9222614840989398, |
|
"grad_norm": 0.07524023950099945, |
|
"learning_rate": 3.825588487937212e-05, |
|
"loss": 0.235, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.9505300353356891, |
|
"grad_norm": 0.0842115730047226, |
|
"learning_rate": 3.6989883677967485e-05, |
|
"loss": 0.2449, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.978798586572438, |
|
"grad_norm": 0.10185158252716064, |
|
"learning_rate": 3.574063607665633e-05, |
|
"loss": 0.2608, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.0070671378091873, |
|
"grad_norm": 0.14404942095279694, |
|
"learning_rate": 3.450930416647429e-05, |
|
"loss": 0.3676, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 2.035335689045936, |
|
"grad_norm": 0.060068968683481216, |
|
"learning_rate": 3.3297033372694477e-05, |
|
"loss": 0.1747, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.0636042402826855, |
|
"grad_norm": 0.06130439415574074, |
|
"learning_rate": 3.2104951389315077e-05, |
|
"loss": 0.2236, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 2.091872791519435, |
|
"grad_norm": 0.06732094287872314, |
|
"learning_rate": 3.093416713004167e-05, |
|
"loss": 0.1928, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 2.1201413427561837, |
|
"grad_norm": 0.07202989608049393, |
|
"learning_rate": 2.9785769696739264e-05, |
|
"loss": 0.2016, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.1201413427561837, |
|
"eval_loss": 0.20411866903305054, |
|
"eval_runtime": 2.3425, |
|
"eval_samples_per_second": 21.345, |
|
"eval_steps_per_second": 5.55, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.148409893992933, |
|
"grad_norm": 0.08088778704404831, |
|
"learning_rate": 2.86608273663141e-05, |
|
"loss": 0.2475, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.176678445229682, |
|
"grad_norm": 0.07822652906179428, |
|
"learning_rate": 2.7560386596967557e-05, |
|
"loss": 0.2136, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 2.204946996466431, |
|
"grad_norm": 0.080837681889534, |
|
"learning_rate": 2.6485471054746318e-05, |
|
"loss": 0.2256, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.23321554770318, |
|
"grad_norm": 0.07773551344871521, |
|
"learning_rate": 2.5437080661294786e-05, |
|
"loss": 0.2158, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 2.2614840989399294, |
|
"grad_norm": 0.07530491799116135, |
|
"learning_rate": 2.4416190663695194e-05, |
|
"loss": 0.206, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.2897526501766783, |
|
"grad_norm": 0.07282233983278275, |
|
"learning_rate": 2.3423750727260816e-05, |
|
"loss": 0.1726, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.3180212014134276, |
|
"grad_norm": 0.0700765922665596, |
|
"learning_rate": 2.2460684052126197e-05, |
|
"loss": 0.1814, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.3462897526501765, |
|
"grad_norm": 0.06933876872062683, |
|
"learning_rate": 2.152788651445618e-05, |
|
"loss": 0.1945, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 2.374558303886926, |
|
"grad_norm": 0.06883740425109863, |
|
"learning_rate": 2.0626225833072487e-05, |
|
"loss": 0.2101, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.402826855123675, |
|
"grad_norm": 0.06986892968416214, |
|
"learning_rate": 1.97565407622733e-05, |
|
"loss": 0.1994, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.431095406360424, |
|
"grad_norm": 0.07742994278669357, |
|
"learning_rate": 1.891964031159653e-05, |
|
"loss": 0.2147, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.4593639575971733, |
|
"grad_norm": 0.08387456834316254, |
|
"learning_rate": 1.8116302993252637e-05, |
|
"loss": 0.2197, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 2.487632508833922, |
|
"grad_norm": 0.08557935804128647, |
|
"learning_rate": 1.7347276097927105e-05, |
|
"loss": 0.2097, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.5159010600706715, |
|
"grad_norm": 0.06514272093772888, |
|
"learning_rate": 1.6613274999626137e-05, |
|
"loss": 0.2136, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 2.5441696113074204, |
|
"grad_norm": 0.05643463507294655, |
|
"learning_rate": 1.5914982490212312e-05, |
|
"loss": 0.1907, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.5724381625441697, |
|
"grad_norm": 0.06008900701999664, |
|
"learning_rate": 1.5253048144249275e-05, |
|
"loss": 0.1835, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 2.6007067137809186, |
|
"grad_norm": 0.06434128433465958, |
|
"learning_rate": 1.4628087714746172e-05, |
|
"loss": 0.189, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.628975265017668, |
|
"grad_norm": 0.07678020000457764, |
|
"learning_rate": 1.4040682560364033e-05, |
|
"loss": 0.2275, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.657243816254417, |
|
"grad_norm": 0.07552898675203323, |
|
"learning_rate": 1.3491379104616938e-05, |
|
"loss": 0.2218, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.685512367491166, |
|
"grad_norm": 0.0758337527513504, |
|
"learning_rate": 1.2980688327570905e-05, |
|
"loss": 0.2201, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.713780918727915, |
|
"grad_norm": 0.08446727693080902, |
|
"learning_rate": 1.2509085290513564e-05, |
|
"loss": 0.2429, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.7420494699646643, |
|
"grad_norm": 0.08306104689836502, |
|
"learning_rate": 1.2077008694036528e-05, |
|
"loss": 0.2056, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 2.7703180212014136, |
|
"grad_norm": 0.0664680078625679, |
|
"learning_rate": 1.1684860469941786e-05, |
|
"loss": 0.1927, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.7985865724381624, |
|
"grad_norm": 0.06700006872415543, |
|
"learning_rate": 1.1333005407351517e-05, |
|
"loss": 0.1953, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.8268551236749118, |
|
"grad_norm": 0.06202859431505203, |
|
"learning_rate": 1.1021770813369377e-05, |
|
"loss": 0.1882, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.8268551236749118, |
|
"eval_loss": 0.2019248753786087, |
|
"eval_runtime": 2.346, |
|
"eval_samples_per_second": 21.313, |
|
"eval_steps_per_second": 5.541, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.8551236749116606, |
|
"grad_norm": 0.06783071905374527, |
|
"learning_rate": 1.0751446208608642e-05, |
|
"loss": 0.2021, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 2.88339222614841, |
|
"grad_norm": 0.06956081092357635, |
|
"learning_rate": 1.0522283057870676e-05, |
|
"loss": 0.2095, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.9116607773851593, |
|
"grad_norm": 0.07598631829023361, |
|
"learning_rate": 1.0334494536224147e-05, |
|
"loss": 0.2283, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 2.939929328621908, |
|
"grad_norm": 0.07840535044670105, |
|
"learning_rate": 1.0188255330702585e-05, |
|
"loss": 0.2234, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.968197879858657, |
|
"grad_norm": 0.08693535625934601, |
|
"learning_rate": 1.008370147780478e-05, |
|
"loss": 0.2282, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.9964664310954063, |
|
"grad_norm": 0.12355560064315796, |
|
"learning_rate": 1.0020930236949183e-05, |
|
"loss": 0.3288, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 3.0247349823321557, |
|
"grad_norm": 0.06719771027565002, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2029, |
|
"step": 107 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 107, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 30, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.4028515042797814e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|