{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 179.53636841073939, "learning_rate": 2.0000000000000003e-06, "loss": 2.132, "step": 1 }, { "epoch": 0.04, "grad_norm": 251.9635633879995, "learning_rate": 4.000000000000001e-06, "loss": 2.5698, "step": 2 }, { "epoch": 0.06, "grad_norm": 152.8902496957368, "learning_rate": 6e-06, "loss": 1.768, "step": 3 }, { "epoch": 0.08, "grad_norm": 39.306296179877485, "learning_rate": 8.000000000000001e-06, "loss": 0.9482, "step": 4 }, { "epoch": 0.1, "grad_norm": 15.99491838072262, "learning_rate": 1e-05, "loss": 0.7738, "step": 5 }, { "epoch": 0.12, "grad_norm": 5.838903588486864, "learning_rate": 9.987820251299121e-06, "loss": 0.5098, "step": 6 }, { "epoch": 0.14, "grad_norm": 4.783670137975404, "learning_rate": 9.951340343707852e-06, "loss": 0.3721, "step": 7 }, { "epoch": 0.16, "grad_norm": 3.6109544656639025, "learning_rate": 9.890738003669029e-06, "loss": 0.2693, "step": 8 }, { "epoch": 0.18, "grad_norm": 1.7838453759518462, "learning_rate": 9.806308479691595e-06, "loss": 0.2655, "step": 9 }, { "epoch": 0.2, "grad_norm": 1.0136025622608915, "learning_rate": 9.698463103929542e-06, "loss": 0.2257, "step": 10 }, { "epoch": 0.22, "grad_norm": 1.8942955529683139, "learning_rate": 9.567727288213005e-06, "loss": 0.2505, "step": 11 }, { "epoch": 0.24, "grad_norm": 1.7740057720955722, "learning_rate": 9.414737964294636e-06, "loss": 0.2153, "step": 12 }, { "epoch": 0.26, "grad_norm": 1.299424789487767, "learning_rate": 9.24024048078213e-06, "loss": 0.2015, "step": 13 }, { "epoch": 0.28, "grad_norm": 0.8241827071039669, "learning_rate": 9.045084971874738e-06, "loss": 0.1962, "step": 14 }, { "epoch": 0.3, "grad_norm": 1.2657759943266225, "learning_rate": 8.83022221559489e-06, "loss": 0.2104, "step": 15 }, { "epoch": 0.32, "grad_norm": 0.6199251066045637, "learning_rate": 8.596699001693257e-06, "loss": 0.1858, "step": 16 }, { "epoch": 0.34, "grad_norm": 1.0082588167041746, "learning_rate": 8.345653031794292e-06, "loss": 0.1962, "step": 17 }, { "epoch": 0.36, "grad_norm": 1.3806733105910687, "learning_rate": 8.078307376628292e-06, "loss": 0.1922, "step": 18 }, { "epoch": 0.38, "grad_norm": 0.723149823087172, "learning_rate": 7.795964517353734e-06, "loss": 0.1775, "step": 19 }, { "epoch": 0.4, "grad_norm": 0.9173976871827653, "learning_rate": 7.500000000000001e-06, "loss": 0.1878, "step": 20 }, { "epoch": 0.42, "grad_norm": 1.0453801125015292, "learning_rate": 7.191855733945388e-06, "loss": 0.1737, "step": 21 }, { "epoch": 0.44, "grad_norm": 0.9248133544590185, "learning_rate": 6.873032967079562e-06, "loss": 0.184, "step": 22 }, { "epoch": 0.46, "grad_norm": 1.5551843239370804, "learning_rate": 6.545084971874738e-06, "loss": 0.163, "step": 23 }, { "epoch": 0.48, "grad_norm": 0.8847356518931627, "learning_rate": 6.209609477998339e-06, "loss": 0.1734, "step": 24 }, { "epoch": 0.5, "grad_norm": 0.6812558639710236, "learning_rate": 5.8682408883346535e-06, "loss": 0.1569, "step": 25 }, { "epoch": 0.52, "grad_norm": 1.262922291164637, "learning_rate": 5.522642316338268e-06, "loss": 0.1777, "step": 26 }, { "epoch": 0.54, "grad_norm": 0.6617931583057793, "learning_rate": 5.174497483512506e-06, "loss": 0.1622, "step": 27 }, { "epoch": 0.56, "grad_norm": 0.9677789314295031, "learning_rate": 4.825502516487497e-06, "loss": 0.1604, "step": 28 }, { "epoch": 0.58, "grad_norm": 0.6102597975752135, "learning_rate": 4.477357683661734e-06, "loss": 0.1376, "step": 29 }, { "epoch": 0.6, "grad_norm": 0.8360072005534209, "learning_rate": 4.131759111665349e-06, "loss": 0.1633, "step": 30 }, { "epoch": 0.62, "grad_norm": 0.6629076631146715, "learning_rate": 3.790390522001662e-06, "loss": 0.1354, "step": 31 }, { "epoch": 0.64, "grad_norm": 0.6314863269976039, "learning_rate": 3.4549150281252635e-06, "loss": 0.1548, "step": 32 }, { "epoch": 0.66, "grad_norm": 0.6606730970709336, "learning_rate": 3.12696703292044e-06, "loss": 0.1508, "step": 33 }, { "epoch": 0.68, "grad_norm": 0.7610654446520294, "learning_rate": 2.8081442660546126e-06, "loss": 0.1555, "step": 34 }, { "epoch": 0.7, "grad_norm": 0.5915047659147039, "learning_rate": 2.5000000000000015e-06, "loss": 0.1414, "step": 35 }, { "epoch": 0.72, "grad_norm": 0.5677743309663439, "learning_rate": 2.204035482646267e-06, "loss": 0.1367, "step": 36 }, { "epoch": 0.74, "grad_norm": 0.548800059070961, "learning_rate": 1.9216926233717087e-06, "loss": 0.1164, "step": 37 }, { "epoch": 0.76, "grad_norm": 0.5673284409321975, "learning_rate": 1.6543469682057105e-06, "loss": 0.1337, "step": 38 }, { "epoch": 0.78, "grad_norm": 0.7602183062829035, "learning_rate": 1.4033009983067454e-06, "loss": 0.1501, "step": 39 }, { "epoch": 0.8, "grad_norm": 0.5136603955006809, "learning_rate": 1.1697777844051105e-06, "loss": 0.1333, "step": 40 }, { "epoch": 0.82, "grad_norm": 0.5425021614926135, "learning_rate": 9.549150281252633e-07, "loss": 0.1405, "step": 41 }, { "epoch": 0.84, "grad_norm": 0.6257075274385691, "learning_rate": 7.597595192178702e-07, "loss": 0.1511, "step": 42 }, { "epoch": 0.86, "grad_norm": 0.5431160370524597, "learning_rate": 5.852620357053651e-07, "loss": 0.1515, "step": 43 }, { "epoch": 0.88, "grad_norm": 0.6088309676882525, "learning_rate": 4.322727117869951e-07, "loss": 0.1406, "step": 44 }, { "epoch": 0.9, "grad_norm": 0.5000759583820149, "learning_rate": 3.015368960704584e-07, "loss": 0.1319, "step": 45 }, { "epoch": 0.92, "grad_norm": 0.588422977399883, "learning_rate": 1.9369152030840553e-07, "loss": 0.1416, "step": 46 }, { "epoch": 0.94, "grad_norm": 0.47388093466019365, "learning_rate": 1.0926199633097156e-07, "loss": 0.1414, "step": 47 }, { "epoch": 0.96, "grad_norm": 0.47998421829050647, "learning_rate": 4.865965629214819e-08, "loss": 0.1411, "step": 48 }, { "epoch": 0.98, "grad_norm": 0.4988223499901082, "learning_rate": 1.2179748700879013e-08, "loss": 0.1212, "step": 49 }, { "epoch": 1.0, "grad_norm": 0.502592566297319, "learning_rate": 0.0, "loss": 0.1352, "step": 50 }, { "epoch": 1.0, "eval_loss": 0.12961728870868683, "eval_runtime": 7.4391, "eval_samples_per_second": 11.292, "eval_steps_per_second": 2.823, "step": 50 }, { "epoch": 1.0, "step": 50, "total_flos": 11042438184960.0, "train_loss": 0.32609067142009734, "train_runtime": 919.7114, "train_samples_per_second": 1.718, "train_steps_per_second": 0.054 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 11042438184960.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }