{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9961802902979373, "eval_steps": 500, "global_step": 163, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.061115355233002294, "grad_norm": 9.6875, "learning_rate": 2.9411764705882354e-05, "loss": 1.2797, "step": 10 }, { "epoch": 0.12223071046600459, "grad_norm": 8.8125, "learning_rate": 4.994792902163481e-05, "loss": 1.1373, "step": 20 }, { "epoch": 0.18334606569900688, "grad_norm": 5.375, "learning_rate": 4.902824459680752e-05, "loss": 1.1783, "step": 30 }, { "epoch": 0.24446142093200918, "grad_norm": 3.1875, "learning_rate": 4.7000305099338396e-05, "loss": 1.1725, "step": 40 }, { "epoch": 0.30557677616501144, "grad_norm": 2.5625, "learning_rate": 4.395764521196406e-05, "loss": 1.1683, "step": 50 }, { "epoch": 0.36669213139801377, "grad_norm": 2.6875, "learning_rate": 4.004060158062306e-05, "loss": 1.1519, "step": 60 }, { "epoch": 0.42780748663101603, "grad_norm": 2.53125, "learning_rate": 3.542984006530792e-05, "loss": 1.1442, "step": 70 }, { "epoch": 0.48892284186401835, "grad_norm": 2.171875, "learning_rate": 3.0338022885994904e-05, "loss": 1.1354, "step": 80 }, { "epoch": 0.5500381970970206, "grad_norm": 1.90625, "learning_rate": 2.5e-05, "loss": 1.1259, "step": 90 }, { "epoch": 0.6111535523300229, "grad_norm": 1.8828125, "learning_rate": 1.9661977114005098e-05, "loss": 1.0983, "step": 100 }, { "epoch": 0.6722689075630253, "grad_norm": 1.6953125, "learning_rate": 1.4570159934692085e-05, "loss": 1.0884, "step": 110 }, { "epoch": 0.7333842627960275, "grad_norm": 1.7265625, "learning_rate": 9.959398419376932e-06, "loss": 1.0599, "step": 120 }, { "epoch": 0.7944996180290298, "grad_norm": 1.625, "learning_rate": 6.042354788035942e-06, "loss": 1.0558, "step": 130 }, { "epoch": 0.8556149732620321, "grad_norm": 1.5859375, "learning_rate": 2.9996949006616094e-06, "loss": 1.0508, "step": 140 }, { "epoch": 0.9167303284950343, "grad_norm": 1.6640625, "learning_rate": 9.71755403192484e-07, "loss": 1.0264, "step": 150 }, { "epoch": 0.9778456837280367, "grad_norm": 1.6015625, "learning_rate": 5.20709783651957e-08, "loss": 1.0426, "step": 160 }, { "epoch": 0.9961802902979373, "step": 163, "total_flos": 9.115042887803863e+17, "train_loss": 1.117896075629018, "train_runtime": 6585.7608, "train_samples_per_second": 3.18, "train_steps_per_second": 0.025 } ], "logging_steps": 10, "max_steps": 163, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.115042887803863e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }