|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8720549976018488, |
|
"eval_steps": 500, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4e-05, |
|
"loss": 4.4951, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8e-05, |
|
"loss": 3.777, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.999831247941866e-05, |
|
"loss": 3.6246, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.99932500600609e-05, |
|
"loss": 3.5067, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.998481316907362e-05, |
|
"loss": 3.4947, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.99730025183281e-05, |
|
"loss": 3.4452, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.995781910436019e-05, |
|
"loss": 3.3696, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.993926420828609e-05, |
|
"loss": 3.4226, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.991733939569422e-05, |
|
"loss": 3.3765, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.989204651651322e-05, |
|
"loss": 3.4237, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.986338770485576e-05, |
|
"loss": 3.3054, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.983136537883862e-05, |
|
"loss": 3.3544, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.97959822403785e-05, |
|
"loss": 3.3659, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.97572412749641e-05, |
|
"loss": 3.3426, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.971514575140424e-05, |
|
"loss": 3.3332, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.966969922155206e-05, |
|
"loss": 3.3163, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.962090552000528e-05, |
|
"loss": 3.3127, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.956876876378266e-05, |
|
"loss": 3.3187, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.951329335197668e-05, |
|
"loss": 3.3195, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.94544839653823e-05, |
|
"loss": 3.2599, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.939234556610201e-05, |
|
"loss": 3.3353, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.932688339712721e-05, |
|
"loss": 3.1893, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.925810298189578e-05, |
|
"loss": 3.2074, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.918601012382602e-05, |
|
"loss": 3.2427, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.911061090582703e-05, |
|
"loss": 3.3292, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.90319116897854e-05, |
|
"loss": 3.2498, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.89499191160285e-05, |
|
"loss": 3.2745, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.88646401027641e-05, |
|
"loss": 3.2645, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.87760818454967e-05, |
|
"loss": 3.2276, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.868425181642037e-05, |
|
"loss": 3.235, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.858915776378836e-05, |
|
"loss": 3.1867, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.849080771125918e-05, |
|
"loss": 3.1661, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.838920995721975e-05, |
|
"loss": 3.2233, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.828437307408509e-05, |
|
"loss": 3.1632, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.81763059075751e-05, |
|
"loss": 3.2981, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.806501757596819e-05, |
|
"loss": 3.2572, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.795051746933185e-05, |
|
"loss": 3.1959, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.783281524873039e-05, |
|
"loss": 3.2433, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.771192084540983e-05, |
|
"loss": 3.1956, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.75878444599598e-05, |
|
"loss": 3.2134, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.746059656145306e-05, |
|
"loss": 3.1633, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.733018788656199e-05, |
|
"loss": 3.2601, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.71966294386527e-05, |
|
"loss": 3.173, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.70599324868567e-05, |
|
"loss": 3.2264, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.692010856511996e-05, |
|
"loss": 3.1828, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.677716947122976e-05, |
|
"loss": 3.1522, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.663112726581924e-05, |
|
"loss": 3.2148, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.648199427134978e-05, |
|
"loss": 3.1741, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.632978307107125e-05, |
|
"loss": 3.2386, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.617450650796032e-05, |
|
"loss": 3.1865, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.601617768363678e-05, |
|
"loss": 3.2224, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.58548099572581e-05, |
|
"loss": 3.1192, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.569041694439229e-05, |
|
"loss": 3.1802, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.552301251586894e-05, |
|
"loss": 3.1781, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.5352610796609e-05, |
|
"loss": 3.1921, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.517922616443289e-05, |
|
"loss": 3.1896, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.500287324884736e-05, |
|
"loss": 3.1911, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.482356692981116e-05, |
|
"loss": 3.1367, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.464132233647945e-05, |
|
"loss": 3.1416, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.445615484592736e-05, |
|
"loss": 3.1682, |
|
"step": 30000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 172005, |
|
"num_train_epochs": 5, |
|
"save_steps": 5000, |
|
"total_flos": 1.02801736728576e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|