|
{ |
|
"best_metric": 88.70164904870342, |
|
"best_model_checkpoint": "output/ft_minilm_roberta_base_tokenizer/checkpoint-55000", |
|
"epoch": 4.9679342426158435, |
|
"global_step": 55000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.972902176858459e-05, |
|
"loss": 3.6092, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_exact_match": 38.34437086092715, |
|
"eval_f1": 53.04195171599385, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9458043537169182e-05, |
|
"loss": 2.3309, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_exact_match": 57.48344370860927, |
|
"eval_f1": 69.86399789823338, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.9187065305753772e-05, |
|
"loss": 1.869, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_exact_match": 60.8325449385052, |
|
"eval_f1": 73.39213373938078, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8916087074338363e-05, |
|
"loss": 1.714, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_exact_match": 67.55912961210974, |
|
"eval_f1": 78.46892865109527, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.864510884292295e-05, |
|
"loss": 1.596, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_exact_match": 69.00662251655629, |
|
"eval_f1": 79.38516098327496, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.8374130611507544e-05, |
|
"loss": 1.4757, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_exact_match": 70.37842951750237, |
|
"eval_f1": 80.6431787888757, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.8103152380092135e-05, |
|
"loss": 1.4583, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_exact_match": 70.35004730368969, |
|
"eval_f1": 80.79000685737387, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.783217414867672e-05, |
|
"loss": 1.409, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_exact_match": 72.25165562913908, |
|
"eval_f1": 81.90341789407236, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.7561195917261312e-05, |
|
"loss": 1.3424, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_exact_match": 72.48817407757805, |
|
"eval_f1": 82.40953492685638, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7290217685845906e-05, |
|
"loss": 1.35, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_exact_match": 73.56669820245979, |
|
"eval_f1": 83.04642576372542, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7019239454430493e-05, |
|
"loss": 1.3199, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_exact_match": 73.97350993377484, |
|
"eval_f1": 83.20596198058827, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6748261223015084e-05, |
|
"loss": 1.2529, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_exact_match": 74.53169347209082, |
|
"eval_f1": 83.5595619348799, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.6477282991599678e-05, |
|
"loss": 1.2637, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_exact_match": 75.10879848628193, |
|
"eval_f1": 84.03364368528639, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.6206304760184265e-05, |
|
"loss": 1.2265, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_exact_match": 74.9763481551561, |
|
"eval_f1": 84.28762156566786, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.5935326528768855e-05, |
|
"loss": 1.2274, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_exact_match": 74.78713339640493, |
|
"eval_f1": 84.24767651424737, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.5664348297353446e-05, |
|
"loss": 1.2305, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_exact_match": 75.47776726584674, |
|
"eval_f1": 84.74489870588677, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.539337006593804e-05, |
|
"loss": 1.2175, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_exact_match": 76.7833491012299, |
|
"eval_f1": 85.271914926584, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.5122391834522627e-05, |
|
"loss": 1.182, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_exact_match": 76.45222327341533, |
|
"eval_f1": 85.33283456122666, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.4851413603107218e-05, |
|
"loss": 1.1853, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_exact_match": 76.96310312204352, |
|
"eval_f1": 85.2924473992702, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.4580435371691808e-05, |
|
"loss": 1.1664, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_exact_match": 76.04541154210028, |
|
"eval_f1": 85.02171266178281, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.43094571402764e-05, |
|
"loss": 1.1932, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_exact_match": 75.78051087984863, |
|
"eval_f1": 85.14984769166524, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.403847890886099e-05, |
|
"loss": 1.1671, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_exact_match": 77.6158940397351, |
|
"eval_f1": 85.8681189259958, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.376750067744558e-05, |
|
"loss": 1.028, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_exact_match": 77.33207190160833, |
|
"eval_f1": 86.13476093529944, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.349652244603017e-05, |
|
"loss": 1.0269, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_exact_match": 77.31315042573321, |
|
"eval_f1": 85.78642346203148, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.322554421461476e-05, |
|
"loss": 1.0276, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_exact_match": 77.62535477767265, |
|
"eval_f1": 86.04542466345086, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.295456598319935e-05, |
|
"loss": 1.0251, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_exact_match": 77.96594134342479, |
|
"eval_f1": 86.0039567640003, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.268358775178394e-05, |
|
"loss": 1.0153, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_exact_match": 77.918637653737, |
|
"eval_f1": 85.94724245197462, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.2412609520368532e-05, |
|
"loss": 1.0273, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_exact_match": 77.88079470198676, |
|
"eval_f1": 86.43469515459539, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.2141631288953123e-05, |
|
"loss": 1.0184, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_exact_match": 78.30652790917692, |
|
"eval_f1": 86.49960884571237, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.187065305753771e-05, |
|
"loss": 1.0129, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_exact_match": 77.84295175023652, |
|
"eval_f1": 86.34551242938619, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.15996748261223e-05, |
|
"loss": 1.0298, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_exact_match": 78.36329233680227, |
|
"eval_f1": 86.62513916601708, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.1328696594706894e-05, |
|
"loss": 0.9999, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_exact_match": 78.59981078524125, |
|
"eval_f1": 86.75304072732975, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.105771836329148e-05, |
|
"loss": 0.957, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_exact_match": 78.69441816461683, |
|
"eval_f1": 86.9310767775865, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.0786740131876072e-05, |
|
"loss": 1.0023, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_exact_match": 78.80794701986756, |
|
"eval_f1": 87.1131371682942, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.0515761900460666e-05, |
|
"loss": 0.999, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_exact_match": 78.69441816461683, |
|
"eval_f1": 86.85637778000766, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.0244783669045253e-05, |
|
"loss": 1.0011, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_exact_match": 79.120151371807, |
|
"eval_f1": 87.31859279800312, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.9973805437629844e-05, |
|
"loss": 1.0053, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_exact_match": 79.29990539262063, |
|
"eval_f1": 87.20927813197312, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.9702827206214434e-05, |
|
"loss": 0.9997, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_exact_match": 79.0728476821192, |
|
"eval_f1": 87.14281225266565, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.9431848974799025e-05, |
|
"loss": 0.9993, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_exact_match": 78.77010406811732, |
|
"eval_f1": 86.96515368656466, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9160870743383615e-05, |
|
"loss": 0.9712, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_exact_match": 78.86471144749291, |
|
"eval_f1": 87.139815910334, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.8889892511968206e-05, |
|
"loss": 0.9586, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_exact_match": 79.60264900662251, |
|
"eval_f1": 87.67189897254278, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8618914280552793e-05, |
|
"loss": 0.9855, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_exact_match": 79.29044465468307, |
|
"eval_f1": 87.35142110336871, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8347936049137387e-05, |
|
"loss": 0.9577, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_exact_match": 79.71617786187322, |
|
"eval_f1": 87.58757029471191, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8076957817721978e-05, |
|
"loss": 0.9764, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_exact_match": 79.45127719962157, |
|
"eval_f1": 87.47183032338337, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7805979586306565e-05, |
|
"loss": 0.8665, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_exact_match": 79.66887417218543, |
|
"eval_f1": 87.64021806679038, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.753500135489116e-05, |
|
"loss": 0.8516, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_exact_match": 79.66887417218543, |
|
"eval_f1": 87.7551254398469, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.726402312347575e-05, |
|
"loss": 0.8475, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_exact_match": 79.2336802270577, |
|
"eval_f1": 87.42734800717938, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.699304489206034e-05, |
|
"loss": 0.8682, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_exact_match": 79.85808893093662, |
|
"eval_f1": 87.76673576734072, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.6722066660644927e-05, |
|
"loss": 0.885, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_exact_match": 79.52696310312204, |
|
"eval_f1": 87.60864611417429, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.645108842922952e-05, |
|
"loss": 0.8821, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_exact_match": 79.99053926206244, |
|
"eval_f1": 87.83466920921494, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.618011019781411e-05, |
|
"loss": 0.8379, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_exact_match": 79.68779564806054, |
|
"eval_f1": 87.65155459405366, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.59091319663987e-05, |
|
"loss": 0.8406, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_exact_match": 79.71617786187322, |
|
"eval_f1": 87.80269063903106, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.563815373498329e-05, |
|
"loss": 0.8817, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_exact_match": 79.76348155156103, |
|
"eval_f1": 87.6863825089024, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.5367175503567883e-05, |
|
"loss": 0.8487, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_exact_match": 79.87701040681173, |
|
"eval_f1": 87.85589778459244, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.509619727215247e-05, |
|
"loss": 0.8474, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_exact_match": 80.13245033112582, |
|
"eval_f1": 87.88619033429767, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.482521904073706e-05, |
|
"loss": 0.844, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_exact_match": 80.22705771050141, |
|
"eval_f1": 87.94538843278356, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.4554240809321651e-05, |
|
"loss": 0.8165, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_exact_match": 80.1135288552507, |
|
"eval_f1": 87.94195235666041, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.4283262577906243e-05, |
|
"loss": 0.8857, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_exact_match": 80.3027436140019, |
|
"eval_f1": 88.06711014071337, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.4012284346490832e-05, |
|
"loss": 0.8881, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_exact_match": 79.46073793755913, |
|
"eval_f1": 87.63361243864426, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.3741306115075423e-05, |
|
"loss": 0.8478, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_exact_match": 80.24597918637653, |
|
"eval_f1": 88.16985739641302, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.3470327883660013e-05, |
|
"loss": 0.8322, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_exact_match": 80.13245033112582, |
|
"eval_f1": 87.86660396184342, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.3199349652244604e-05, |
|
"loss": 0.8407, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_exact_match": 80.07568590350047, |
|
"eval_f1": 87.92805047246172, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.2928371420829193e-05, |
|
"loss": 0.8427, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_exact_match": 80.17975402081362, |
|
"eval_f1": 88.0783939864499, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.2657393189413785e-05, |
|
"loss": 0.8334, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_exact_match": 80.19867549668874, |
|
"eval_f1": 88.06310378279342, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.2386414957998374e-05, |
|
"loss": 0.8234, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_exact_match": 80.22705771050141, |
|
"eval_f1": 88.08144574072385, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.2115436726582964e-05, |
|
"loss": 0.8344, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_exact_match": 80.66225165562913, |
|
"eval_f1": 88.25754342761562, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.1844458495167555e-05, |
|
"loss": 0.7908, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_exact_match": 80.3027436140019, |
|
"eval_f1": 88.2107177195419, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.1573480263752145e-05, |
|
"loss": 0.7268, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_exact_match": 80.47303689687796, |
|
"eval_f1": 88.33146167313825, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.1302502032336736e-05, |
|
"loss": 0.7416, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_exact_match": 80.33112582781457, |
|
"eval_f1": 88.20770283656012, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.1031523800921326e-05, |
|
"loss": 0.7289, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_exact_match": 80.27436140018922, |
|
"eval_f1": 88.08675881507963, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.0760545569505917e-05, |
|
"loss": 0.7338, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_exact_match": 80.32166508987702, |
|
"eval_f1": 88.24483276676646, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.0489567338090507e-05, |
|
"loss": 0.7992, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_exact_match": 80.48249763481552, |
|
"eval_f1": 88.24667332064766, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.0218589106675098e-05, |
|
"loss": 0.7497, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_exact_match": 80.0, |
|
"eval_f1": 87.95371777369881, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 9.947610875259687e-06, |
|
"loss": 0.7389, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_exact_match": 80.45411542100284, |
|
"eval_f1": 88.24729821889156, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 9.676632643844279e-06, |
|
"loss": 0.748, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_exact_match": 80.49195837275307, |
|
"eval_f1": 88.27202955999378, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 9.405654412428868e-06, |
|
"loss": 0.7528, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_exact_match": 80.59602649006622, |
|
"eval_f1": 88.27897205791908, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 9.134676181013458e-06, |
|
"loss": 0.7544, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"eval_exact_match": 80.71901608325449, |
|
"eval_f1": 88.29085294791585, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 8.863697949598049e-06, |
|
"loss": 0.7185, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_exact_match": 80.77578051087986, |
|
"eval_f1": 88.45178084091113, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 8.59271971818264e-06, |
|
"loss": 0.711, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_exact_match": 80.54872280037843, |
|
"eval_f1": 88.23006827695103, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 8.32174148676723e-06, |
|
"loss": 0.7767, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_exact_match": 80.51087984862819, |
|
"eval_f1": 88.21518807166053, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 8.05076325535182e-06, |
|
"loss": 0.7299, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_exact_match": 80.42573320719016, |
|
"eval_f1": 88.14657248237756, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 7.779785023936411e-06, |
|
"loss": 0.7443, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_exact_match": 80.45411542100284, |
|
"eval_f1": 88.25882487493044, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 7.508806792521e-06, |
|
"loss": 0.7672, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_exact_match": 80.79470198675497, |
|
"eval_f1": 88.35767687765708, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 7.237828561105591e-06, |
|
"loss": 0.7466, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_exact_match": 80.81362346263009, |
|
"eval_f1": 88.49878983679895, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.966850329690182e-06, |
|
"loss": 0.7294, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_exact_match": 80.66225165562913, |
|
"eval_f1": 88.4219719582272, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 6.695872098274772e-06, |
|
"loss": 0.7554, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_exact_match": 80.80416272469253, |
|
"eval_f1": 88.36066468276921, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 6.424893866859363e-06, |
|
"loss": 0.761, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_exact_match": 80.63386944181646, |
|
"eval_f1": 88.41315914502825, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 6.1539156354439526e-06, |
|
"loss": 0.7363, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_exact_match": 80.65279091769158, |
|
"eval_f1": 88.34571302471203, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 5.882937404028543e-06, |
|
"loss": 0.7057, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_exact_match": 80.91769157994324, |
|
"eval_f1": 88.47587754641178, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 5.611959172613134e-06, |
|
"loss": 0.6499, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_exact_match": 80.9271523178808, |
|
"eval_f1": 88.48670524401668, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 5.340980941197724e-06, |
|
"loss": 0.6538, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_exact_match": 80.89877010406812, |
|
"eval_f1": 88.44196466728391, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 5.070002709782314e-06, |
|
"loss": 0.6996, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_exact_match": 80.98391674550615, |
|
"eval_f1": 88.49930789298939, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 4.799024478366904e-06, |
|
"loss": 0.6394, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_exact_match": 80.6244087038789, |
|
"eval_f1": 88.34556116700644, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 4.528046246951495e-06, |
|
"loss": 0.6721, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_exact_match": 80.83254493850521, |
|
"eval_f1": 88.44910098996408, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 4.257068015536085e-06, |
|
"loss": 0.6854, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_exact_match": 80.86092715231788, |
|
"eval_f1": 88.53212448905681, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 3.986089784120676e-06, |
|
"loss": 0.6761, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"eval_exact_match": 80.6717123935667, |
|
"eval_f1": 88.4448096587991, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 3.715111552705266e-06, |
|
"loss": 0.6943, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_exact_match": 80.52980132450331, |
|
"eval_f1": 88.33280893923866, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 3.444133321289856e-06, |
|
"loss": 0.6655, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_exact_match": 80.93661305581836, |
|
"eval_f1": 88.56888817902892, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 3.173155089874447e-06, |
|
"loss": 0.7029, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_exact_match": 80.84200567644277, |
|
"eval_f1": 88.51593698418615, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 2.9021768584590373e-06, |
|
"loss": 0.6648, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"eval_exact_match": 80.83254493850521, |
|
"eval_f1": 88.61534249704374, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 2.6311986270436274e-06, |
|
"loss": 0.6901, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_exact_match": 80.94607379375591, |
|
"eval_f1": 88.66269781300855, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 2.360220395628218e-06, |
|
"loss": 0.6784, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_exact_match": 80.879848628193, |
|
"eval_f1": 88.58225782143238, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.089242164212808e-06, |
|
"loss": 0.6543, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_exact_match": 80.99337748344371, |
|
"eval_f1": 88.67206922431193, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.8182639327973986e-06, |
|
"loss": 0.6419, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_exact_match": 80.97445600756859, |
|
"eval_f1": 88.6884615459273, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.5472857013819889e-06, |
|
"loss": 0.6617, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_exact_match": 80.85146641438033, |
|
"eval_f1": 88.58266438928703, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 1.2763074699665794e-06, |
|
"loss": 0.7022, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_exact_match": 80.89877010406812, |
|
"eval_f1": 88.62217290793005, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.0053292385511697e-06, |
|
"loss": 0.662, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_exact_match": 80.96499526963103, |
|
"eval_f1": 88.5709480325892, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 7.343510071357602e-07, |
|
"loss": 0.6708, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_exact_match": 80.83254493850521, |
|
"eval_f1": 88.61340923000925, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 4.6337277572035044e-07, |
|
"loss": 0.6504, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_exact_match": 81.01229895931883, |
|
"eval_f1": 88.68117331158336, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 1.9239454430494084e-07, |
|
"loss": 0.6681, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_exact_match": 81.03122043519394, |
|
"eval_f1": 88.70164904870342, |
|
"step": 55000 |
|
} |
|
], |
|
"max_steps": 55355, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.079484291072e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|