|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7565011820330969, |
|
"eval_steps": 10, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0252167060677699, |
|
"grad_norm": 0.1355619877576828, |
|
"learning_rate": 5e-05, |
|
"loss": 11.932, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0252167060677699, |
|
"eval_loss": 11.934293746948242, |
|
"eval_runtime": 3.4042, |
|
"eval_samples_per_second": 78.727, |
|
"eval_steps_per_second": 19.682, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0504334121355398, |
|
"grad_norm": 0.1327255219221115, |
|
"learning_rate": 0.0001, |
|
"loss": 11.9325, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.07565011820330969, |
|
"grad_norm": 0.14952348172664642, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 11.9329, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.1008668242710796, |
|
"grad_norm": 0.1367342323064804, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 11.9322, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.12608353033884948, |
|
"grad_norm": 0.14394749701023102, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 11.9321, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.15130023640661938, |
|
"grad_norm": 0.13902953267097473, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 11.9303, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1765169424743893, |
|
"grad_norm": 0.16207095980644226, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 11.929, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.2017336485421592, |
|
"grad_norm": 0.17109227180480957, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 11.9289, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.22695035460992907, |
|
"grad_norm": 0.18327724933624268, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 11.9282, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.25216706067769895, |
|
"grad_norm": 0.17470045387744904, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 11.9248, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25216706067769895, |
|
"eval_loss": 11.928186416625977, |
|
"eval_runtime": 2.9094, |
|
"eval_samples_per_second": 92.116, |
|
"eval_steps_per_second": 23.029, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2773837667454689, |
|
"grad_norm": 0.1625070571899414, |
|
"learning_rate": 8.678619553365659e-05, |
|
"loss": 11.9293, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.30260047281323876, |
|
"grad_norm": 0.18875440955162048, |
|
"learning_rate": 8.386407858128706e-05, |
|
"loss": 11.9261, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.32781717888100864, |
|
"grad_norm": 0.17575034499168396, |
|
"learning_rate": 8.07106356344834e-05, |
|
"loss": 11.9265, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.3530338849487786, |
|
"grad_norm": 0.19419781863689423, |
|
"learning_rate": 7.734740790612136e-05, |
|
"loss": 11.9246, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.37825059101654845, |
|
"grad_norm": 0.2197292596101761, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 11.925, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.4034672970843184, |
|
"grad_norm": 0.21017210185527802, |
|
"learning_rate": 7.008477123264848e-05, |
|
"loss": 11.923, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.42868400315208827, |
|
"grad_norm": 0.2026052176952362, |
|
"learning_rate": 6.623497346023418e-05, |
|
"loss": 11.924, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.45390070921985815, |
|
"grad_norm": 0.23036272823810577, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 11.9226, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.4791174152876281, |
|
"grad_norm": 0.22065575420856476, |
|
"learning_rate": 5.8229729514036705e-05, |
|
"loss": 11.9186, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.5043341213553979, |
|
"grad_norm": 0.24293360114097595, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 11.92, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5043341213553979, |
|
"eval_loss": 11.920666694641113, |
|
"eval_runtime": 2.907, |
|
"eval_samples_per_second": 92.192, |
|
"eval_steps_per_second": 23.048, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5295508274231678, |
|
"grad_norm": 0.23226885497570038, |
|
"learning_rate": 5e-05, |
|
"loss": 11.9191, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.5547675334909378, |
|
"grad_norm": 0.23677287995815277, |
|
"learning_rate": 4.5871032726383386e-05, |
|
"loss": 11.9191, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.5799842395587076, |
|
"grad_norm": 0.24115414917469025, |
|
"learning_rate": 4.17702704859633e-05, |
|
"loss": 11.9169, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.6052009456264775, |
|
"grad_norm": 0.23872853815555573, |
|
"learning_rate": 3.772572564296005e-05, |
|
"loss": 11.9193, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.6304176516942475, |
|
"grad_norm": 0.24184449017047882, |
|
"learning_rate": 3.3765026539765834e-05, |
|
"loss": 11.9191, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.6556343577620173, |
|
"grad_norm": 0.2603144645690918, |
|
"learning_rate": 2.991522876735154e-05, |
|
"loss": 11.9174, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 0.26540812849998474, |
|
"learning_rate": 2.6202630348146324e-05, |
|
"loss": 11.916, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.7060677698975572, |
|
"grad_norm": 0.2523845136165619, |
|
"learning_rate": 2.2652592093878666e-05, |
|
"loss": 11.9157, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.731284475965327, |
|
"grad_norm": 0.24367552995681763, |
|
"learning_rate": 1.928936436551661e-05, |
|
"loss": 11.9162, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.7565011820330969, |
|
"grad_norm": 0.25121253728866577, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 11.9183, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7565011820330969, |
|
"eval_loss": 11.915938377380371, |
|
"eval_runtime": 2.9116, |
|
"eval_samples_per_second": 92.045, |
|
"eval_steps_per_second": 23.011, |
|
"step": 30 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 40, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 487493216501760.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|