|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.807017543859649, |
|
"eval_steps": 50, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.478505000877347e-05, |
|
"loss": 2.8559, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.195713520050049, |
|
"eval_runtime": 14.6436, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.4565713283032112e-05, |
|
"loss": 2.2991, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.9645323753356934, |
|
"eval_runtime": 14.6522, |
|
"eval_samples_per_second": 1.433, |
|
"eval_steps_per_second": 0.205, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.4346376557290754e-05, |
|
"loss": 2.1318, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.8834866285324097, |
|
"eval_runtime": 14.646, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.4127039831549393e-05, |
|
"loss": 2.035, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.8372365236282349, |
|
"eval_runtime": 14.6452, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.390770310580804e-05, |
|
"loss": 2.0511, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.8165128231048584, |
|
"eval_runtime": 14.6408, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.3688366380066677e-05, |
|
"loss": 2.0054, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.7788053750991821, |
|
"eval_runtime": 14.6416, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.3469029654325323e-05, |
|
"loss": 1.944, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.7566713094711304, |
|
"eval_runtime": 14.6486, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.324969292858396e-05, |
|
"loss": 1.9487, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.7560102939605713, |
|
"eval_runtime": 14.6501, |
|
"eval_samples_per_second": 1.433, |
|
"eval_steps_per_second": 0.205, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.3030356202842604e-05, |
|
"loss": 1.9183, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.7329343557357788, |
|
"eval_runtime": 14.6419, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.2811019477101246e-05, |
|
"loss": 1.8684, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.7263619899749756, |
|
"eval_runtime": 14.647, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.2591682751359888e-05, |
|
"loss": 1.9194, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.7277635335922241, |
|
"eval_runtime": 14.6488, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.237234602561853e-05, |
|
"loss": 1.8842, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.728346347808838, |
|
"eval_runtime": 14.6409, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2153009299877172e-05, |
|
"loss": 1.817, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.709412693977356, |
|
"eval_runtime": 14.6427, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1933672574135815e-05, |
|
"loss": 1.8139, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.7006276845932007, |
|
"eval_runtime": 14.6444, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.1714335848394457e-05, |
|
"loss": 1.8657, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.688984990119934, |
|
"eval_runtime": 14.6453, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.14949991226531e-05, |
|
"loss": 1.8366, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.6756386756896973, |
|
"eval_runtime": 14.6493, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.1275662396911738e-05, |
|
"loss": 1.8521, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.6723501682281494, |
|
"eval_runtime": 14.645, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.1056325671170383e-05, |
|
"loss": 1.8357, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.6671411991119385, |
|
"eval_runtime": 14.646, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.0836988945429022e-05, |
|
"loss": 1.8667, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.6564487218856812, |
|
"eval_runtime": 14.6438, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.0617652219687667e-05, |
|
"loss": 1.8163, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.6533170938491821, |
|
"eval_runtime": 14.6431, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.0398315493946306e-05, |
|
"loss": 1.844, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.6446335315704346, |
|
"eval_runtime": 14.6477, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.017897876820495e-05, |
|
"loss": 1.8403, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.6377147436141968, |
|
"eval_runtime": 14.65, |
|
"eval_samples_per_second": 1.433, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.995964204246359e-05, |
|
"loss": 1.8436, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.6400964260101318, |
|
"eval_runtime": 14.6512, |
|
"eval_samples_per_second": 1.433, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.9740305316722233e-05, |
|
"loss": 1.8255, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.6376707553863525, |
|
"eval_runtime": 14.6441, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.9520968590980875e-05, |
|
"loss": 1.7917, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.6302813291549683, |
|
"eval_runtime": 14.6397, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.9301631865239517e-05, |
|
"loss": 1.8289, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.6255093812942505, |
|
"eval_runtime": 14.6406, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.9082295139498156e-05, |
|
"loss": 1.8736, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.617380976676941, |
|
"eval_runtime": 14.6455, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.88629584137568e-05, |
|
"loss": 1.748, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.6211485862731934, |
|
"eval_runtime": 14.6529, |
|
"eval_samples_per_second": 1.433, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.8643621688015444e-05, |
|
"loss": 1.7769, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.6201837062835693, |
|
"eval_runtime": 14.6729, |
|
"eval_samples_per_second": 1.431, |
|
"eval_steps_per_second": 0.204, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.8424284962274086e-05, |
|
"loss": 1.7295, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 1.6232948303222656, |
|
"eval_runtime": 14.6843, |
|
"eval_samples_per_second": 1.43, |
|
"eval_steps_per_second": 0.204, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.8204948236532728e-05, |
|
"loss": 1.6845, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.626202940940857, |
|
"eval_runtime": 14.6738, |
|
"eval_samples_per_second": 1.431, |
|
"eval_steps_per_second": 0.204, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.7985611510791367e-05, |
|
"loss": 1.6655, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 1.617616057395935, |
|
"eval_runtime": 14.6736, |
|
"eval_samples_per_second": 1.431, |
|
"eval_steps_per_second": 0.204, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.7766274785050012e-05, |
|
"loss": 1.6687, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 1.6166654825210571, |
|
"eval_runtime": 14.7006, |
|
"eval_samples_per_second": 1.429, |
|
"eval_steps_per_second": 0.204, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.754693805930865e-05, |
|
"loss": 1.704, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 1.6115292310714722, |
|
"eval_runtime": 14.6762, |
|
"eval_samples_per_second": 1.431, |
|
"eval_steps_per_second": 0.204, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.7327601333567293e-05, |
|
"loss": 1.6818, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 1.6096872091293335, |
|
"eval_runtime": 14.6731, |
|
"eval_samples_per_second": 1.431, |
|
"eval_steps_per_second": 0.204, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.7108264607825935e-05, |
|
"loss": 1.675, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 1.6069471836090088, |
|
"eval_runtime": 14.6844, |
|
"eval_samples_per_second": 1.43, |
|
"eval_steps_per_second": 0.204, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.6888927882084577e-05, |
|
"loss": 1.6865, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 1.6076393127441406, |
|
"eval_runtime": 14.648, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.666959115634322e-05, |
|
"loss": 1.6966, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 1.603057861328125, |
|
"eval_runtime": 14.6382, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.6450254430601862e-05, |
|
"loss": 1.7239, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 1.6086949110031128, |
|
"eval_runtime": 14.6442, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.62309177048605e-05, |
|
"loss": 1.677, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 1.6053651571273804, |
|
"eval_runtime": 14.6414, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.6011580979119146e-05, |
|
"loss": 1.6925, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 1.5961679220199585, |
|
"eval_runtime": 14.6373, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.5792244253377785e-05, |
|
"loss": 1.7188, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 1.591933250427246, |
|
"eval_runtime": 14.6374, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.557290752763643e-05, |
|
"loss": 1.6994, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 1.5993770360946655, |
|
"eval_runtime": 14.6433, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.535357080189507e-05, |
|
"loss": 1.6817, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 1.5951728820800781, |
|
"eval_runtime": 14.6423, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.5134234076153711e-05, |
|
"loss": 1.6576, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 1.595167636871338, |
|
"eval_runtime": 14.6398, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.4914897350412355e-05, |
|
"loss": 1.7117, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 1.594815969467163, |
|
"eval_runtime": 14.633, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.4695560624670996e-05, |
|
"loss": 1.6795, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 1.5858888626098633, |
|
"eval_runtime": 14.6449, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.4476223898929636e-05, |
|
"loss": 1.7084, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.584323525428772, |
|
"eval_runtime": 14.6394, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.425688717318828e-05, |
|
"loss": 1.6785, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 1.5876024961471558, |
|
"eval_runtime": 14.6397, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.403755044744692e-05, |
|
"loss": 1.6863, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.5827977657318115, |
|
"eval_runtime": 14.6332, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.3818213721705564e-05, |
|
"loss": 1.718, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 1.5843836069107056, |
|
"eval_runtime": 14.6334, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.3598876995964205e-05, |
|
"loss": 1.6811, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.5835539102554321, |
|
"eval_runtime": 14.6371, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.3379540270222845e-05, |
|
"loss": 1.6748, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 1.5856655836105347, |
|
"eval_runtime": 14.6358, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.3160203544481489e-05, |
|
"loss": 1.6614, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.5807286500930786, |
|
"eval_runtime": 14.6393, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.294086681874013e-05, |
|
"loss": 1.6587, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 1.5819549560546875, |
|
"eval_runtime": 14.6411, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.2721530092998773e-05, |
|
"loss": 1.7167, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 1.5828579664230347, |
|
"eval_runtime": 14.6359, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.2502193367257414e-05, |
|
"loss": 1.663, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.5743989944458008, |
|
"eval_runtime": 14.6401, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.2282856641516056e-05, |
|
"loss": 1.5641, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.5980640649795532, |
|
"eval_runtime": 14.6296, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.2063519915774696e-05, |
|
"loss": 1.6048, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 1.598111629486084, |
|
"eval_runtime": 14.6392, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.1844183190033339e-05, |
|
"loss": 1.542, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.5949300527572632, |
|
"eval_runtime": 14.6318, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.162484646429198e-05, |
|
"loss": 1.5717, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 1.5896787643432617, |
|
"eval_runtime": 14.6355, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.1405509738550623e-05, |
|
"loss": 1.5775, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 1.5945594310760498, |
|
"eval_runtime": 14.6318, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.1186173012809265e-05, |
|
"loss": 1.5514, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_loss": 1.596181035041809, |
|
"eval_runtime": 14.6312, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.0966836287067907e-05, |
|
"loss": 1.5479, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 1.5988695621490479, |
|
"eval_runtime": 14.6305, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.074749956132655e-05, |
|
"loss": 1.5567, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 1.5939878225326538, |
|
"eval_runtime": 14.6282, |
|
"eval_samples_per_second": 1.436, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.0528162835585192e-05, |
|
"loss": 1.5917, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 1.5928601026535034, |
|
"eval_runtime": 14.6304, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0308826109843834e-05, |
|
"loss": 1.5635, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 1.5916301012039185, |
|
"eval_runtime": 14.6301, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0089489384102474e-05, |
|
"loss": 1.6097, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 1.5879075527191162, |
|
"eval_runtime": 14.6343, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.870152658361116e-06, |
|
"loss": 1.5256, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_loss": 1.5953552722930908, |
|
"eval_runtime": 14.6382, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.650815932619759e-06, |
|
"loss": 1.5758, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 1.5901867151260376, |
|
"eval_runtime": 14.6575, |
|
"eval_samples_per_second": 1.433, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.4314792068784e-06, |
|
"loss": 1.5924, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 1.592368721961975, |
|
"eval_runtime": 14.6369, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 9.212142481137043e-06, |
|
"loss": 1.5442, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 1.5874643325805664, |
|
"eval_runtime": 14.6605, |
|
"eval_samples_per_second": 1.432, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.992805755395683e-06, |
|
"loss": 1.5633, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 1.5897241830825806, |
|
"eval_runtime": 14.6465, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 8.773469029654325e-06, |
|
"loss": 1.5578, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 1.5877512693405151, |
|
"eval_runtime": 14.6433, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.554132303912968e-06, |
|
"loss": 1.5486, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 1.5919996500015259, |
|
"eval_runtime": 14.6424, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 8.33479557817161e-06, |
|
"loss": 1.559, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 1.5883424282073975, |
|
"eval_runtime": 14.6464, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 8.11545885243025e-06, |
|
"loss": 1.5766, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.5866857767105103, |
|
"eval_runtime": 14.6428, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 7.896122126688892e-06, |
|
"loss": 1.5489, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 1.5900993347167969, |
|
"eval_runtime": 14.6566, |
|
"eval_samples_per_second": 1.433, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.676785400947535e-06, |
|
"loss": 1.5606, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 1.5870875120162964, |
|
"eval_runtime": 14.632, |
|
"eval_samples_per_second": 1.435, |
|
"eval_steps_per_second": 0.205, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 7.457448675206178e-06, |
|
"loss": 1.5128, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 1.586663007736206, |
|
"eval_runtime": 14.6412, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.205, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 400, |
|
"total_flos": 7.98304400105472e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|