{ "best_metric": null, "best_model_checkpoint": null, "epoch": 59.80732177263969, "eval_steps": 1164, "global_step": 11640, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.980732177263969, "grad_norm": 1.6488584280014038, "learning_rate": 9.00171821305842e-06, "loss": 2.4506, "step": 1164 }, { "epoch": 5.980732177263969, "eval_accuracy": 0.07356671740233384, "eval_loss": 2.987065315246582, "eval_runtime": 32.1198, "eval_samples_per_second": 245.456, "eval_steps_per_second": 12.298, "step": 1164 }, { "epoch": 11.961464354527939, "grad_norm": 4.022498607635498, "learning_rate": 8.00257731958763e-06, "loss": 2.3229, "step": 2328 }, { "epoch": 11.961464354527939, "eval_accuracy": 0.07331303906646372, "eval_loss": 3.1384434700012207, "eval_runtime": 28.9812, "eval_samples_per_second": 272.038, "eval_steps_per_second": 13.63, "step": 2328 }, { "epoch": 17.942196531791907, "grad_norm": 3.224060535430908, "learning_rate": 7.002577319587629e-06, "loss": 2.2364, "step": 3492 }, { "epoch": 17.942196531791907, "eval_accuracy": 0.07952815829528158, "eval_loss": 3.1919777393341064, "eval_runtime": 28.8853, "eval_samples_per_second": 272.942, "eval_steps_per_second": 13.675, "step": 3492 }, { "epoch": 23.922928709055878, "grad_norm": 3.2662484645843506, "learning_rate": 6.003436426116839e-06, "loss": 2.1589, "step": 4656 }, { "epoch": 23.922928709055878, "eval_accuracy": 0.0824771689497717, "eval_loss": 3.2666664123535156, "eval_runtime": 28.8936, "eval_samples_per_second": 272.863, "eval_steps_per_second": 13.671, "step": 4656 }, { "epoch": 29.903660886319845, "grad_norm": 4.750115394592285, "learning_rate": 5.003436426116839e-06, "loss": 2.1034, "step": 5820 }, { "epoch": 29.903660886319845, "eval_accuracy": 0.08351090816844242, "eval_loss": 3.2883238792419434, "eval_runtime": 28.8169, "eval_samples_per_second": 273.589, "eval_steps_per_second": 13.707, "step": 5820 }, { "epoch": 35.884393063583815, "grad_norm": 4.091363430023193, "learning_rate": 4.004295532646048e-06, "loss": 2.0555, "step": 6984 }, { "epoch": 35.884393063583815, "eval_accuracy": 0.08365043125317098, "eval_loss": 3.351201295852661, "eval_runtime": 28.9642, "eval_samples_per_second": 272.198, "eval_steps_per_second": 13.638, "step": 6984 }, { "epoch": 41.86512524084778, "grad_norm": 3.1545722484588623, "learning_rate": 3.004295532646048e-06, "loss": 2.0231, "step": 8148 }, { "epoch": 41.86512524084778, "eval_accuracy": 0.08360513155033703, "eval_loss": 3.4304280281066895, "eval_runtime": 28.8426, "eval_samples_per_second": 273.345, "eval_steps_per_second": 13.695, "step": 8148 }, { "epoch": 47.845857418111756, "grad_norm": 3.3551816940307617, "learning_rate": 2.005154639175258e-06, "loss": 2.0056, "step": 9312 }, { "epoch": 47.845857418111756, "eval_accuracy": 0.08306380010147134, "eval_loss": 3.553959369659424, "eval_runtime": 29.0501, "eval_samples_per_second": 271.393, "eval_steps_per_second": 13.597, "step": 9312 }, { "epoch": 53.82658959537572, "grad_norm": 3.364650249481201, "learning_rate": 1.005154639175258e-06, "loss": 1.9964, "step": 10476 }, { "epoch": 53.82658959537572, "eval_accuracy": 0.08203675517221941, "eval_loss": 3.589355230331421, "eval_runtime": 28.9839, "eval_samples_per_second": 272.014, "eval_steps_per_second": 13.628, "step": 10476 }, { "epoch": 59.80732177263969, "grad_norm": 5.201988697052002, "learning_rate": 6.0137457044673545e-09, "loss": 1.9848, "step": 11640 }, { "epoch": 59.80732177263969, "eval_accuracy": 0.08131659056316591, "eval_loss": 3.626406192779541, "eval_runtime": 29.0612, "eval_samples_per_second": 271.29, "eval_steps_per_second": 13.592, "step": 11640 }, { "epoch": 59.80732177263969, "step": 11640, "total_flos": 1.472280663793582e+18, "train_loss": 2.133767731656733, "train_runtime": 32669.9946, "train_samples_per_second": 114.321, "train_steps_per_second": 0.356 } ], "logging_steps": 1164, "max_steps": 11640, "num_input_tokens_seen": 0, "num_train_epochs": 60, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.472280663793582e+18, "train_batch_size": 40, "trial_name": null, "trial_params": null }