{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9673590504451037, "eval_steps": 200, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29673590504451036, "grad_norm": 4.922755241394043, "learning_rate": 9.600000000000001e-06, "loss": 0.6132, "step": 100 }, { "epoch": 0.5934718100890207, "grad_norm": 4.277930736541748, "learning_rate": 8.933333333333333e-06, "loss": 0.303, "step": 200 }, { "epoch": 0.5934718100890207, "eval_loss": 0.24335336685180664, "eval_runtime": 493.5362, "eval_samples_per_second": 4.652, "eval_steps_per_second": 0.146, "eval_wer": 0.4226068014038485, "step": 200 }, { "epoch": 0.8902077151335311, "grad_norm": 4.590033054351807, "learning_rate": 7.822222222222224e-06, "loss": 0.2564, "step": 300 }, { "epoch": 1.1869436201780414, "grad_norm": 3.3569984436035156, "learning_rate": 6.711111111111111e-06, "loss": 0.2, "step": 400 }, { "epoch": 1.1869436201780414, "eval_loss": 0.20352379977703094, "eval_runtime": 482.7378, "eval_samples_per_second": 4.756, "eval_steps_per_second": 0.149, "eval_wer": 0.39138327483964663, "step": 400 }, { "epoch": 1.4836795252225519, "grad_norm": 3.2953569889068604, "learning_rate": 5.600000000000001e-06, "loss": 0.1637, "step": 500 }, { "epoch": 1.7804154302670623, "grad_norm": 3.363754987716675, "learning_rate": 4.488888888888889e-06, "loss": 0.1633, "step": 600 }, { "epoch": 1.7804154302670623, "eval_loss": 0.18764939904212952, "eval_runtime": 484.1747, "eval_samples_per_second": 4.742, "eval_steps_per_second": 0.149, "eval_wer": 0.3469079026987777, "step": 600 }, { "epoch": 2.077151335311573, "grad_norm": 2.2284867763519287, "learning_rate": 3.377777777777778e-06, "loss": 0.1428, "step": 700 }, { "epoch": 2.373887240356083, "grad_norm": 2.0610995292663574, "learning_rate": 2.266666666666667e-06, "loss": 0.106, "step": 800 }, { "epoch": 2.373887240356083, "eval_loss": 0.18498285114765167, "eval_runtime": 480.7097, "eval_samples_per_second": 4.776, "eval_steps_per_second": 0.15, "eval_wer": 0.3487837347210456, "step": 800 }, { "epoch": 2.6706231454005933, "grad_norm": 2.554095983505249, "learning_rate": 1.1555555555555556e-06, "loss": 0.1029, "step": 900 }, { "epoch": 2.9673590504451037, "grad_norm": 2.727163314819336, "learning_rate": 4.444444444444445e-08, "loss": 0.1005, "step": 1000 }, { "epoch": 2.9673590504451037, "eval_loss": 0.18132926523685455, "eval_runtime": 479.4261, "eval_samples_per_second": 4.789, "eval_steps_per_second": 0.15, "eval_wer": 0.3367421033522934, "step": 1000 }, { "epoch": 2.9673590504451037, "step": 1000, "total_flos": 9.234732878187725e+18, "train_loss": 0.21518024158477783, "train_runtime": 7211.1645, "train_samples_per_second": 4.438, "train_steps_per_second": 0.139 } ], "logging_steps": 100, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.234732878187725e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }