{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7565011820330969, "eval_steps": 10, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0252167060677699, "grad_norm": 0.1355619877576828, "learning_rate": 5e-05, "loss": 11.932, "step": 1 }, { "epoch": 0.0252167060677699, "eval_loss": 11.934293746948242, "eval_runtime": 3.4042, "eval_samples_per_second": 78.727, "eval_steps_per_second": 19.682, "step": 1 }, { "epoch": 0.0504334121355398, "grad_norm": 0.1327255219221115, "learning_rate": 0.0001, "loss": 11.9325, "step": 2 }, { "epoch": 0.07565011820330969, "grad_norm": 0.14952348172664642, "learning_rate": 9.98292246503335e-05, "loss": 11.9329, "step": 3 }, { "epoch": 0.1008668242710796, "grad_norm": 0.1367342323064804, "learning_rate": 9.931806517013612e-05, "loss": 11.9322, "step": 4 }, { "epoch": 0.12608353033884948, "grad_norm": 0.14394749701023102, "learning_rate": 9.847001329696653e-05, "loss": 11.9321, "step": 5 }, { "epoch": 0.15130023640661938, "grad_norm": 0.13902953267097473, "learning_rate": 9.729086208503174e-05, "loss": 11.9303, "step": 6 }, { "epoch": 0.1765169424743893, "grad_norm": 0.16207095980644226, "learning_rate": 9.578866633275288e-05, "loss": 11.929, "step": 7 }, { "epoch": 0.2017336485421592, "grad_norm": 0.17109227180480957, "learning_rate": 9.397368756032445e-05, "loss": 11.9289, "step": 8 }, { "epoch": 0.22695035460992907, "grad_norm": 0.18327724933624268, "learning_rate": 9.185832391312644e-05, "loss": 11.9282, "step": 9 }, { "epoch": 0.25216706067769895, "grad_norm": 0.17470045387744904, "learning_rate": 8.945702546981969e-05, "loss": 11.9248, "step": 10 }, { "epoch": 0.25216706067769895, "eval_loss": 11.928186416625977, "eval_runtime": 2.9094, "eval_samples_per_second": 92.116, "eval_steps_per_second": 23.029, "step": 10 }, { "epoch": 0.2773837667454689, "grad_norm": 0.1625070571899414, "learning_rate": 8.678619553365659e-05, "loss": 11.9293, "step": 11 }, { "epoch": 0.30260047281323876, "grad_norm": 0.18875440955162048, "learning_rate": 8.386407858128706e-05, "loss": 11.9261, "step": 12 }, { "epoch": 0.32781717888100864, "grad_norm": 0.17575034499168396, "learning_rate": 8.07106356344834e-05, "loss": 11.9265, "step": 13 }, { "epoch": 0.3530338849487786, "grad_norm": 0.19419781863689423, "learning_rate": 7.734740790612136e-05, "loss": 11.9246, "step": 14 }, { "epoch": 0.37825059101654845, "grad_norm": 0.2197292596101761, "learning_rate": 7.379736965185368e-05, "loss": 11.925, "step": 15 }, { "epoch": 0.4034672970843184, "grad_norm": 0.21017210185527802, "learning_rate": 7.008477123264848e-05, "loss": 11.923, "step": 16 }, { "epoch": 0.42868400315208827, "grad_norm": 0.2026052176952362, "learning_rate": 6.623497346023418e-05, "loss": 11.924, "step": 17 }, { "epoch": 0.45390070921985815, "grad_norm": 0.23036272823810577, "learning_rate": 6.227427435703997e-05, "loss": 11.9226, "step": 18 }, { "epoch": 0.4791174152876281, "grad_norm": 0.22065575420856476, "learning_rate": 5.8229729514036705e-05, "loss": 11.9186, "step": 19 }, { "epoch": 0.5043341213553979, "grad_norm": 0.24293360114097595, "learning_rate": 5.4128967273616625e-05, "loss": 11.92, "step": 20 }, { "epoch": 0.5043341213553979, "eval_loss": 11.920666694641113, "eval_runtime": 2.907, "eval_samples_per_second": 92.192, "eval_steps_per_second": 23.048, "step": 20 }, { "epoch": 0.5295508274231678, "grad_norm": 0.23226885497570038, "learning_rate": 5e-05, "loss": 11.9191, "step": 21 }, { "epoch": 0.5547675334909378, "grad_norm": 0.23677287995815277, "learning_rate": 4.5871032726383386e-05, "loss": 11.9191, "step": 22 }, { "epoch": 0.5799842395587076, "grad_norm": 0.24115414917469025, "learning_rate": 4.17702704859633e-05, "loss": 11.9169, "step": 23 }, { "epoch": 0.6052009456264775, "grad_norm": 0.23872853815555573, "learning_rate": 3.772572564296005e-05, "loss": 11.9193, "step": 24 }, { "epoch": 0.6304176516942475, "grad_norm": 0.24184449017047882, "learning_rate": 3.3765026539765834e-05, "loss": 11.9191, "step": 25 }, { "epoch": 0.6556343577620173, "grad_norm": 0.2603144645690918, "learning_rate": 2.991522876735154e-05, "loss": 11.9174, "step": 26 }, { "epoch": 0.6808510638297872, "grad_norm": 0.26540812849998474, "learning_rate": 2.6202630348146324e-05, "loss": 11.916, "step": 27 }, { "epoch": 0.7060677698975572, "grad_norm": 0.2523845136165619, "learning_rate": 2.2652592093878666e-05, "loss": 11.9157, "step": 28 }, { "epoch": 0.731284475965327, "grad_norm": 0.24367552995681763, "learning_rate": 1.928936436551661e-05, "loss": 11.9162, "step": 29 }, { "epoch": 0.7565011820330969, "grad_norm": 0.25121253728866577, "learning_rate": 1.6135921418712956e-05, "loss": 11.9183, "step": 30 }, { "epoch": 0.7565011820330969, "eval_loss": 11.915938377380371, "eval_runtime": 2.9116, "eval_samples_per_second": 92.045, "eval_steps_per_second": 23.011, "step": 30 } ], "logging_steps": 1, "max_steps": 40, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 487493216501760.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }