|
{ |
|
"best_metric": 3.5582468509674072, |
|
"best_model_checkpoint": "checkpoints-mistral-300M-FA2/checkpoint-40000", |
|
"epoch": 0.9999985178004752, |
|
"eval_steps": 5000, |
|
"global_step": 42167, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 9.0925, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 7.7547, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 7.3919, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 7.0885, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0003, |
|
"loss": 6.794, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00035999999999999997, |
|
"loss": 6.5749, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00041999999999999996, |
|
"loss": 6.4027, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00047999999999999996, |
|
"loss": 6.2476, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00054, |
|
"loss": 6.0979, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0006, |
|
"loss": 5.9485, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0005999912644458949, |
|
"loss": 5.8031, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0005999650582923124, |
|
"loss": 5.6781, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0005999213830654211, |
|
"loss": 5.5612, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0005998602413087361, |
|
"loss": 5.4602, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000599781636582972, |
|
"loss": 5.3715, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005996855734658339, |
|
"loss": 5.2891, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005995720575517524, |
|
"loss": 5.2142, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005994410954515569, |
|
"loss": 5.1388, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005992926947920907, |
|
"loss": 5.0648, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005991268642157673, |
|
"loss": 4.9956, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005989436133800661, |
|
"loss": 4.937, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005987429529569716, |
|
"loss": 4.8876, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005985248946323499, |
|
"loss": 4.8387, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005982894511052698, |
|
"loss": 4.7943, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005980366360872623, |
|
"loss": 4.7574, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005977664643015227, |
|
"loss": 4.7216, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005974789514820526, |
|
"loss": 4.6875, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005971741143727439, |
|
"loss": 4.6595, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005968519707264038, |
|
"loss": 4.6346, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005965125393037204, |
|
"loss": 4.6029, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005961558398721711, |
|
"loss": 4.5849, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005957818932048701, |
|
"loss": 4.5592, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00059539072107936, |
|
"loss": 4.537, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005949823462763423, |
|
"loss": 4.5125, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005945567925783518, |
|
"loss": 4.4937, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005941140847683708, |
|
"loss": 4.478, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005936542486283861, |
|
"loss": 4.4609, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005931773109378876, |
|
"loss": 4.4427, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005926832994723086, |
|
"loss": 4.429, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005921722430014085, |
|
"loss": 4.4091, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005916441712875966, |
|
"loss": 4.3971, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005910991150842002, |
|
"loss": 4.3842, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000590537106133672, |
|
"loss": 4.3676, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005899581771657428, |
|
"loss": 4.3585, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005893623618955148, |
|
"loss": 4.3407, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005887496950214981, |
|
"loss": 4.3323, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005881202122235901, |
|
"loss": 4.3157, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000587473950160998, |
|
"loss": 4.3058, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005868109464701029, |
|
"loss": 4.2971, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005861312397622692, |
|
"loss": 4.2911, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 4.291384220123291, |
|
"eval_runtime": 6254.7697, |
|
"eval_samples_per_second": 88.102, |
|
"eval_steps_per_second": 22.026, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005854348696215949, |
|
"loss": 4.28, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000584721876602607, |
|
"loss": 4.2687, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005839923022278993, |
|
"loss": 4.255, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005832461889857147, |
|
"loss": 4.2493, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005824835803274706, |
|
"loss": 4.2397, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005817045206652282, |
|
"loss": 4.2307, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005809090553691065, |
|
"loss": 4.2223, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005800972307646396, |
|
"loss": 4.2181, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005792690941300793, |
|
"loss": 4.206, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005784246936936413, |
|
"loss": 4.1952, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000577564078630697, |
|
"loss": 4.1927, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005766872990609095, |
|
"loss": 4.178, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005757944060453144, |
|
"loss": 4.1725, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005748854515833468, |
|
"loss": 4.1704, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005739604886098125, |
|
"loss": 4.1589, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0005730195709918055, |
|
"loss": 4.1535, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0005720627535255711, |
|
"loss": 4.1452, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000571090091933314, |
|
"loss": 4.1424, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0005701016428599541, |
|
"loss": 4.1345, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0005690974638698271, |
|
"loss": 4.1261, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0005680776134433322, |
|
"loss": 4.1234, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0005670421509735268, |
|
"loss": 4.1154, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000565991136762667, |
|
"loss": 4.1083, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0005649246320186961, |
|
"loss": 4.1002, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0005638426988516804, |
|
"loss": 4.0975, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0005627454002701908, |
|
"loss": 4.0906, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0005616328001776353, |
|
"loss": 4.0872, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0005605049633685356, |
|
"loss": 4.0814, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0005593619555247551, |
|
"loss": 4.0714, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0005582038432116726, |
|
"loss": 4.0643, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0005570306938743069, |
|
"loss": 4.0624, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0005558425758333878, |
|
"loss": 4.054, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0005546395582813782, |
|
"loss": 4.052, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0005534217112784443, |
|
"loss": 4.046, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0005521891057483752, |
|
"loss": 4.0427, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000550941813474453, |
|
"loss": 4.0371, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.000549679907095272, |
|
"loss": 4.0304, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0005484034601005085, |
|
"loss": 4.0262, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0005471125468266411, |
|
"loss": 4.023, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0005458072424526214, |
|
"loss": 4.0215, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000544487622995496, |
|
"loss": 4.015, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0005431537653059793, |
|
"loss": 4.0085, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000541805747063978, |
|
"loss": 4.0006, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0005404436467740676, |
|
"loss": 3.9976, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0005390675437609197, |
|
"loss": 3.9953, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0005376775181646833, |
|
"loss": 3.9894, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0005362736509363169, |
|
"loss": 3.9862, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0005348560238328749, |
|
"loss": 3.9821, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0005334247194127456, |
|
"loss": 3.9795, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0005319798210308438, |
|
"loss": 3.9709, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 3.989983320236206, |
|
"eval_runtime": 6257.6022, |
|
"eval_samples_per_second": 88.062, |
|
"eval_steps_per_second": 22.016, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000530521412833756, |
|
"loss": 3.971, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0005290495797548403, |
|
"loss": 3.9659, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00052756440750928, |
|
"loss": 3.9599, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0005260659825890919, |
|
"loss": 3.958, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0005245543922580891, |
|
"loss": 3.9549, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0005230297245467988, |
|
"loss": 3.9524, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0005214920682473364, |
|
"loss": 3.9487, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.000519941512908234, |
|
"loss": 3.9405, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0005183781488292252, |
|
"loss": 3.9388, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0005168020670559866, |
|
"loss": 3.9395, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0005152133593748358, |
|
"loss": 3.9324, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0005136121183073853, |
|
"loss": 3.9289, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0005119984371051549, |
|
"loss": 3.9234, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0005103724097441411, |
|
"loss": 3.9227, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0005087341309193438, |
|
"loss": 3.9204, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0005070836960392517, |
|
"loss": 3.918, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0005054212012202861, |
|
"loss": 3.9053, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0005037467432812033, |
|
"loss": 3.9075, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0005020604197374561, |
|
"loss": 3.9064, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0005003623287955149, |
|
"loss": 3.9026, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004986697243743568, |
|
"loss": 3.8982, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004969485111851287, |
|
"loss": 3.8938, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004952158283000648, |
|
"loss": 3.8916, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004934717766254659, |
|
"loss": 3.8897, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004917164577297167, |
|
"loss": 3.8904, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000489949973837372, |
|
"loss": 3.8837, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004881724278232027, |
|
"loss": 3.8825, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004863839232062045, |
|
"loss": 3.877, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004845845641435698, |
|
"loss": 3.8772, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004827744554246214, |
|
"loss": 3.8727, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004809537024647106, |
|
"loss": 3.8677, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00047912241129907716, |
|
"loss": 3.8691, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00047728068857667475, |
|
"loss": 3.8654, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00047542864155396025, |
|
"loss": 3.8623, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00047356637808864646, |
|
"loss": 3.8523, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000471694006633422, |
|
"loss": 3.8573, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00046981163622963445, |
|
"loss": 3.8565, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0004679193765009406, |
|
"loss": 3.8482, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00046601733764692197, |
|
"loss": 3.8434, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0004641056304366674, |
|
"loss": 3.8503, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000462184366202322, |
|
"loss": 3.8419, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00046027301031098105, |
|
"loss": 3.8443, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00045833306101326796, |
|
"loss": 3.8355, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0004563838908687476, |
|
"loss": 3.8367, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.000454425613391295, |
|
"loss": 3.8354, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004524583426251691, |
|
"loss": 3.8335, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004504821931383715, |
|
"loss": 3.8349, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00044849728001597385, |
|
"loss": 3.8244, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004465236968920431, |
|
"loss": 3.821, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00044452168853148435, |
|
"loss": 3.8229, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 3.838818311691284, |
|
"eval_runtime": 6259.3563, |
|
"eval_samples_per_second": 88.037, |
|
"eval_steps_per_second": 22.009, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004425112636573954, |
|
"loss": 3.817, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00044049253935094467, |
|
"loss": 3.8165, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004384656331766349, |
|
"loss": 3.8144, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00043643066317545647, |
|
"loss": 3.8139, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.000434387747858013, |
|
"loss": 3.8071, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004323370061976197, |
|
"loss": 3.8034, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004302785576233748, |
|
"loss": 3.8071, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004282125220132043, |
|
"loss": 3.8009, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0004261390196868805, |
|
"loss": 3.7961, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00042405817139901526, |
|
"loss": 3.7929, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00042197009833202696, |
|
"loss": 3.8016, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00041987492208908427, |
|
"loss": 3.7909, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0004177727646870232, |
|
"loss": 3.7895, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00041566374854924194, |
|
"loss": 3.7867, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00041354799649857116, |
|
"loss": 3.7862, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00041142563175012073, |
|
"loss": 3.7839, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004092967779041047, |
|
"loss": 3.7807, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004071615589386428, |
|
"loss": 3.7772, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00040502009920254025, |
|
"loss": 3.7765, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00040287252340804637, |
|
"loss": 3.7742, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0004007189566235915, |
|
"loss": 3.7766, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0003985595242665033, |
|
"loss": 3.7685, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00039639435209570307, |
|
"loss": 3.7715, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0003942235662043819, |
|
"loss": 3.7718, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.000392047293012657, |
|
"loss": 3.7688, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00038986565926021, |
|
"loss": 3.7631, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0003876787919989051, |
|
"loss": 3.7589, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0003854868185853913, |
|
"loss": 3.7614, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0003832898666736839, |
|
"loss": 3.7549, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0003810880642077316, |
|
"loss": 3.7571, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00037888153941396496, |
|
"loss": 3.7534, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0003766704207938287, |
|
"loss": 3.7517, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0003744548371162984, |
|
"loss": 3.7567, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0003722349174103814, |
|
"loss": 3.7486, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00037001079095760225, |
|
"loss": 3.7516, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0003677825872844742, |
|
"loss": 3.7437, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0003655504361549554, |
|
"loss": 3.7457, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00036331446756289226, |
|
"loss": 3.7464, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00036109722610660756, |
|
"loss": 3.741, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0003588540483745179, |
|
"loss": 3.7379, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0003566074431576024, |
|
"loss": 3.738, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00035435754129147054, |
|
"loss": 3.7309, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00035210447380371886, |
|
"loss": 3.7355, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0003498483719063004, |
|
"loss": 3.7344, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.000347589366987883, |
|
"loss": 3.735, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.000345327590606198, |
|
"loss": 3.7291, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00034306317448037834, |
|
"loss": 3.7295, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00034079625048328796, |
|
"loss": 3.7221, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00033852695063384174, |
|
"loss": 3.7301, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00033625540708931705, |
|
"loss": 3.7197, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 3.7453513145446777, |
|
"eval_runtime": 6261.7484, |
|
"eval_samples_per_second": 88.004, |
|
"eval_steps_per_second": 22.001, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0003339817521376575, |
|
"loss": 3.7178, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00033170611818976876, |
|
"loss": 3.7157, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0003294286377718072, |
|
"loss": 3.7184, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00032714944351746255, |
|
"loss": 3.7167, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0003248914833042039, |
|
"loss": 3.7177, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00032260927349466893, |
|
"loss": 3.712, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0003203257469882546, |
|
"loss": 3.7095, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0003180410367707568, |
|
"loss": 3.7036, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0003157552758969068, |
|
"loss": 3.7059, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0003134685974826232, |
|
"loss": 3.7097, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00031118113469725937, |
|
"loss": 3.7021, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00030889302075584824, |
|
"loss": 3.7026, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0003066043889113439, |
|
"loss": 3.7003, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00030431537244686186, |
|
"loss": 3.7008, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00030202610466791653, |
|
"loss": 3.6968, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00029973671889465826, |
|
"loss": 3.6949, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00029744734845410883, |
|
"loss": 3.6992, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00029515812667239735, |
|
"loss": 3.6916, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00029286918686699537, |
|
"loss": 3.6919, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002905806623389529, |
|
"loss": 3.6909, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00028829268636513573, |
|
"loss": 3.6979, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00028600539219046303, |
|
"loss": 3.689, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002837189130201484, |
|
"loss": 3.684, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002814333820119417, |
|
"loss": 3.6825, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00027914893226837486, |
|
"loss": 3.6896, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00027686569682901013, |
|
"loss": 3.6824, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002746066204389395, |
|
"loss": 3.6777, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00027232619697688704, |
|
"loss": 3.6824, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002700473851548586, |
|
"loss": 3.6806, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002677703176840807, |
|
"loss": 3.6795, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002654951271741938, |
|
"loss": 3.6753, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002632219461255299, |
|
"loss": 3.6703, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00026095090692139603, |
|
"loss": 3.6678, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002586821418203645, |
|
"loss": 3.6701, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00025641578294857047, |
|
"loss": 3.6712, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002541519622920176, |
|
"loss": 3.6709, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002518908116888915, |
|
"loss": 3.6688, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00024963246282188163, |
|
"loss": 3.6668, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002473770472105129, |
|
"loss": 3.6671, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024512469620348586, |
|
"loss": 3.6619, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024287554097102775, |
|
"loss": 3.66, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024062971249725343, |
|
"loss": 3.663, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00023838734157253735, |
|
"loss": 3.6586, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00023614855878589612, |
|
"loss": 3.6627, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00023391349451738433, |
|
"loss": 3.6548, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00023168227893050097, |
|
"loss": 3.6541, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00022945504196460908, |
|
"loss": 3.6516, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00022723191332736894, |
|
"loss": 3.6545, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00022501302248718378, |
|
"loss": 3.6536, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002227984986656603, |
|
"loss": 3.652, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 3.6738803386688232, |
|
"eval_runtime": 6261.7124, |
|
"eval_samples_per_second": 88.004, |
|
"eval_steps_per_second": 22.001, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00022061054843048285, |
|
"loss": 3.6444, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.000218405098403175, |
|
"loss": 3.6463, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00021620440022038445, |
|
"loss": 3.6485, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00021400858204423146, |
|
"loss": 3.6457, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00021181777175263927, |
|
"loss": 3.6429, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00020963209693188685, |
|
"loss": 3.6426, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00020745168486917856, |
|
"loss": 3.6436, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00020527666254523122, |
|
"loss": 3.638, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002031071566268795, |
|
"loss": 3.6347, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00020094329345969906, |
|
"loss": 3.6352, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00019878519906064822, |
|
"loss": 3.6357, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00019663299911072975, |
|
"loss": 3.6363, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00019448681894767086, |
|
"loss": 3.6347, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00019234678355862448, |
|
"loss": 3.6289, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001902130175728901, |
|
"loss": 3.6329, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001880856452546559, |
|
"loss": 3.6347, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00018596479049576175, |
|
"loss": 3.6317, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001838505768084843, |
|
"loss": 3.6218, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00018174312731834396, |
|
"loss": 3.6279, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001796425647569343, |
|
"loss": 3.6248, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00017754901145477467, |
|
"loss": 3.6295, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017548341785672704, |
|
"loss": 3.6232, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017340417529776694, |
|
"loss": 3.6214, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017133230530331462, |
|
"loss": 3.6229, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00016926792853291946, |
|
"loss": 3.6203, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00016721116520974823, |
|
"loss": 3.617, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001651621351135826, |
|
"loss": 3.6154, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00016312095757384451, |
|
"loss": 3.6209, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00016108775146264626, |
|
"loss": 3.6179, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00015906263518786752, |
|
"loss": 3.6132, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00015704572668626048, |
|
"loss": 3.6137, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00015503714341658065, |
|
"loss": 3.6088, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0001530370023527469, |
|
"loss": 3.6135, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00015104541997702905, |
|
"loss": 3.6092, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001490625122732643, |
|
"loss": 3.6125, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00014708839472010312, |
|
"loss": 3.6125, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00014512318228428328, |
|
"loss": 3.6076, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00014316698941393538, |
|
"loss": 3.606, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00014121993003191695, |
|
"loss": 3.6039, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00013928211752917854, |
|
"loss": 3.6058, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00013735366475816006, |
|
"loss": 3.6023, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00013543468402621808, |
|
"loss": 3.5966, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00013352528708908623, |
|
"loss": 3.6002, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001316255851443661, |
|
"loss": 3.603, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00012975453888853402, |
|
"loss": 3.5971, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00012787445855677994, |
|
"loss": 3.5955, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00012600440230489343, |
|
"loss": 3.5974, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0001241444790393915, |
|
"loss": 3.5965, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00012229479707667653, |
|
"loss": 3.6012, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00012045546413672746, |
|
"loss": 3.597, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 3.617741823196411, |
|
"eval_runtime": 6508.3328, |
|
"eval_samples_per_second": 84.669, |
|
"eval_steps_per_second": 21.167, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00011862658733682693, |
|
"loss": 3.5872, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00011680827318532343, |
|
"loss": 3.5905, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00011500062757542787, |
|
"loss": 3.5966, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00011320375577904705, |
|
"loss": 3.5901, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00011141776244065287, |
|
"loss": 3.5916, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00010964275157118847, |
|
"loss": 3.5895, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00010787882654201032, |
|
"loss": 3.5866, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00010612609007886857, |
|
"loss": 3.5895, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00010438464425592469, |
|
"loss": 3.5874, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00010265459048980658, |
|
"loss": 3.5868, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.000100936029533703, |
|
"loss": 3.5787, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.922906147149525e-05, |
|
"loss": 3.5839, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.753378571192895e-05, |
|
"loss": 3.5852, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.585030098282516e-05, |
|
"loss": 3.5745, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.417870532532991e-05, |
|
"loss": 3.5768, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.251909608820541e-05, |
|
"loss": 3.577, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.087156992216018e-05, |
|
"loss": 3.5845, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.925251564625636e-05, |
|
"loss": 3.5767, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.762931954253596e-05, |
|
"loss": 3.5754, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.60184912759454e-05, |
|
"loss": 3.5723, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.442012465633435e-05, |
|
"loss": 3.5735, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.283431276782354e-05, |
|
"loss": 3.5732, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 8.126114796338322e-05, |
|
"loss": 3.5705, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.971626276492257e-05, |
|
"loss": 3.5694, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.816853749295341e-05, |
|
"loss": 3.5698, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.663373102593709e-05, |
|
"loss": 3.5638, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.51119327464399e-05, |
|
"loss": 3.5674, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.36032312794699e-05, |
|
"loss": 3.5615, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.21077144873156e-05, |
|
"loss": 3.5749, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.062546946442954e-05, |
|
"loss": 3.5659, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.915658253235543e-05, |
|
"loss": 3.5661, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.770113923470201e-05, |
|
"loss": 3.5628, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.625922433216026e-05, |
|
"loss": 3.5597, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.483092179756783e-05, |
|
"loss": 3.5658, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.341631481101857e-05, |
|
"loss": 3.5596, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.20154857550183e-05, |
|
"loss": 3.5628, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.062851620968693e-05, |
|
"loss": 3.5562, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.925548694800801e-05, |
|
"loss": 3.5659, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.789647793112406e-05, |
|
"loss": 3.5578, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.6551568303680585e-05, |
|
"loss": 3.5617, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.5220836389216264e-05, |
|
"loss": 3.5618, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.390435968560195e-05, |
|
"loss": 3.5566, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.260221486052765e-05, |
|
"loss": 3.558, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.131447774703693e-05, |
|
"loss": 3.5553, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.004122333911149e-05, |
|
"loss": 3.5587, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.8782525787302994e-05, |
|
"loss": 3.5585, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.7538458394415367e-05, |
|
"loss": 3.5541, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.630909361123535e-05, |
|
"loss": 3.5486, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.509450303231335e-05, |
|
"loss": 3.5527, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.3894757391794366e-05, |
|
"loss": 3.5554, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 3.5770018100738525, |
|
"eval_runtime": 6272.5699, |
|
"eval_samples_per_second": 87.852, |
|
"eval_steps_per_second": 21.963, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.27099265592979e-05, |
|
"loss": 3.5507, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.154007953584973e-05, |
|
"loss": 3.5502, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.038528444986291e-05, |
|
"loss": 3.5468, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9245608553170395e-05, |
|
"loss": 3.5483, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.812111821710867e-05, |
|
"loss": 3.5482, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.701187892865215e-05, |
|
"loss": 3.5497, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.591795528659971e-05, |
|
"loss": 3.5513, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4839410997812365e-05, |
|
"loss": 3.5471, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.377630887350332e-05, |
|
"loss": 3.5544, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.272871082558024e-05, |
|
"loss": 3.5426, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.169667786303914e-05, |
|
"loss": 3.5429, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.068027008841208e-05, |
|
"loss": 3.5441, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9679546694266342e-05, |
|
"loss": 3.5479, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.869456595975762e-05, |
|
"loss": 3.5448, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.772538524723592e-05, |
|
"loss": 3.5434, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6772060998904855e-05, |
|
"loss": 3.545, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.583464873353487e-05, |
|
"loss": 3.5468, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4913203043229636e-05, |
|
"loss": 3.5417, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.4007777590247125e-05, |
|
"loss": 3.5426, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.311842510387417e-05, |
|
"loss": 3.5383, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2253849669299984e-05, |
|
"loss": 3.5409, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1396635552045304e-05, |
|
"loss": 3.5476, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0555646466550592e-05, |
|
"loss": 3.5411, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.973093138952013e-05, |
|
"loss": 3.5394, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8922538349908478e-05, |
|
"loss": 3.5395, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.81305144261232e-05, |
|
"loss": 3.5353, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7354905743283154e-05, |
|
"loss": 3.5405, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6595757470532535e-05, |
|
"loss": 3.5375, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.585311381841e-05, |
|
"loss": 3.5369, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5127018036274286e-05, |
|
"loss": 3.5393, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4417512409785326e-05, |
|
"loss": 3.5358, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3724638258441644e-05, |
|
"loss": 3.5394, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3048435933174273e-05, |
|
"loss": 3.5371, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2388944813996426e-05, |
|
"loss": 3.5387, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1746203307710511e-05, |
|
"loss": 3.5385, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.1120248845671176e-05, |
|
"loss": 3.5403, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0511117881605623e-05, |
|
"loss": 3.5324, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.918845889490445e-06, |
|
"loss": 3.5405, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.3434673614858e-06, |
|
"loss": 3.5369, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.785015805926864e-06, |
|
"loss": 3.5344, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.243523745372149e-06, |
|
"loss": 3.5345, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.71902271470949e-06, |
|
"loss": 3.5374, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.211543259319907e-06, |
|
"loss": 3.538, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.725934718863668e-06, |
|
"loss": 3.5348, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.252415148280509e-06, |
|
"loss": 3.5296, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.796002563835378e-06, |
|
"loss": 3.5329, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.356723545640385e-06, |
|
"loss": 3.5323, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.934603675999771e-06, |
|
"loss": 3.5358, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.529667537919968e-06, |
|
"loss": 3.5388, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.141938713677839e-06, |
|
"loss": 3.536, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 3.5582468509674072, |
|
"eval_runtime": 6284.9941, |
|
"eval_samples_per_second": 87.678, |
|
"eval_steps_per_second": 21.92, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.7714397834476497e-06, |
|
"loss": 3.5315, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.418192323985647e-06, |
|
"loss": 3.5348, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.082216907373836e-06, |
|
"loss": 3.5332, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.7635330998217352e-06, |
|
"loss": 3.5331, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.462159460526991e-06, |
|
"loss": 3.5339, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1781135405944396e-06, |
|
"loss": 3.5277, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.911411882014091e-06, |
|
"loss": 3.5324, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.662070016697803e-06, |
|
"loss": 3.5332, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4301024655745675e-06, |
|
"loss": 3.5379, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2155227377449562e-06, |
|
"loss": 3.53, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0183433296945486e-06, |
|
"loss": 3.5326, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.38575724565882e-07, |
|
"loss": 3.5309, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.762303914898848e-07, |
|
"loss": 3.5324, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.326796054423432e-07, |
|
"loss": 3.5324, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.0503172472939884e-07, |
|
"loss": 3.5328, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.9483136438293033e-07, |
|
"loss": 3.5365, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.0208494214430937e-07, |
|
"loss": 3.528, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.267978592894958e-07, |
|
"loss": 3.5359, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.897450031438933e-08, |
|
"loss": 3.525, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.861823267953367e-08, |
|
"loss": 3.535, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.731406613940226e-09, |
|
"loss": 3.5303, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 42167, |
|
"total_flos": 2.0159394207481463e+19, |
|
"train_loss": 3.89913355111991, |
|
"train_runtime": 393554.9634, |
|
"train_samples_per_second": 27.429, |
|
"train_steps_per_second": 0.107 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 42167, |
|
"num_train_epochs": 1, |
|
"save_steps": 5000, |
|
"total_flos": 2.0159394207481463e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|