{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.289544235924933, "eval_steps": 200, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 5e-05, "loss": 0.4292, "step": 100 }, { "epoch": 0.36, "learning_rate": 4.994863481875841e-05, "loss": 0.2746, "step": 200 }, { "epoch": 0.36, "eval_f1_score": 0.6564825695260478, "eval_label_f1": 0.8343125734430082, "eval_loss": 0.26023727655410767, "eval_runtime": 344.2386, "eval_samples_per_second": 2.905, "eval_steps_per_second": 0.363, "eval_wer": 0.10898676368139949, "step": 200 }, { "epoch": 0.54, "learning_rate": 4.979475034558115e-05, "loss": 0.2568, "step": 300 }, { "epoch": 0.71, "learning_rate": 4.9538978924776634e-05, "loss": 0.2481, "step": 400 }, { "epoch": 0.71, "eval_f1_score": 0.6577916992952232, "eval_label_f1": 0.8347689898198903, "eval_loss": 0.246540829539299, "eval_runtime": 341.9234, "eval_samples_per_second": 2.925, "eval_steps_per_second": 0.366, "eval_wer": 0.10217509095131203, "step": 400 }, { "epoch": 0.89, "learning_rate": 4.9182371575975736e-05, "loss": 0.2412, "step": 500 }, { "epoch": 1.07, "learning_rate": 4.8726393675266716e-05, "loss": 0.2385, "step": 600 }, { "epoch": 1.07, "eval_f1_score": 0.6684952978056427, "eval_label_f1": 0.8322884012539185, "eval_loss": 0.24104812741279602, "eval_runtime": 342.1463, "eval_samples_per_second": 2.923, "eval_steps_per_second": 0.365, "eval_wer": 0.10484557628299404, "step": 600 }, { "epoch": 1.25, "learning_rate": 4.817291893365055e-05, "loss": 0.2325, "step": 700 }, { "epoch": 1.43, "learning_rate": 4.752422169756048e-05, "loss": 0.2316, "step": 800 }, { "epoch": 1.43, "eval_f1_score": 0.6724477729601892, "eval_label_f1": 0.8316909735908553, "eval_loss": 0.23740312457084656, "eval_runtime": 344.4982, "eval_samples_per_second": 2.903, "eval_steps_per_second": 0.363, "eval_wer": 0.10221379363727842, "step": 800 }, { "epoch": 1.61, "learning_rate": 4.678296760308474e-05, "loss": 0.2304, "step": 900 }, { "epoch": 1.79, "learning_rate": 4.595220262229601e-05, "loss": 0.2291, "step": 1000 }, { "epoch": 1.79, "eval_f1_score": 0.6698076168040833, "eval_label_f1": 0.8292108362779742, "eval_loss": 0.2348490208387375, "eval_runtime": 338.1818, "eval_samples_per_second": 2.957, "eval_steps_per_second": 0.37, "eval_wer": 0.09683412028794798, "step": 1000 }, { "epoch": 1.97, "learning_rate": 4.503534054669892e-05, "loss": 0.2275, "step": 1100 }, { "epoch": 2.14, "learning_rate": 4.4036148959228365e-05, "loss": 0.2205, "step": 1200 }, { "epoch": 2.14, "eval_f1_score": 0.6744822196170379, "eval_label_f1": 0.8339194998046112, "eval_loss": 0.2333754003047943, "eval_runtime": 340.8077, "eval_samples_per_second": 2.934, "eval_steps_per_second": 0.367, "eval_wer": 0.09636968805635111, "step": 1200 }, { "epoch": 2.32, "learning_rate": 4.2958733752443195e-05, "loss": 0.2224, "step": 1300 }, { "epoch": 2.5, "learning_rate": 4.180752225653292e-05, "loss": 0.2211, "step": 1400 }, { "epoch": 2.5, "eval_f1_score": 0.672933803368586, "eval_label_f1": 0.8327457892675283, "eval_loss": 0.2319139689207077, "eval_runtime": 341.1059, "eval_samples_per_second": 2.932, "eval_steps_per_second": 0.366, "eval_wer": 0.09606006656861986, "step": 1400 }, { "epoch": 2.68, "learning_rate": 4.058724504646834e-05, "loss": 0.2196, "step": 1500 }, { "epoch": 2.86, "learning_rate": 3.9302916503054246e-05, "loss": 0.2163, "step": 1600 }, { "epoch": 2.86, "eval_f1_score": 0.6731669266770671, "eval_label_f1": 0.829953198127925, "eval_loss": 0.2304619699716568, "eval_runtime": 340.688, "eval_samples_per_second": 2.935, "eval_steps_per_second": 0.367, "eval_wer": 0.09807260623887298, "step": 1600 }, { "epoch": 3.04, "learning_rate": 3.7959814207763135e-05, "loss": 0.219, "step": 1700 }, { "epoch": 3.22, "learning_rate": 3.656345725602089e-05, "loss": 0.2108, "step": 1800 }, { "epoch": 3.22, "eval_f1_score": 0.6734375, "eval_label_f1": 0.8328125000000001, "eval_loss": 0.22986993193626404, "eval_runtime": 338.4413, "eval_samples_per_second": 2.955, "eval_steps_per_second": 0.369, "eval_wer": 0.09544082359315736, "step": 1800 }, { "epoch": 3.4, "learning_rate": 3.5119583578059846e-05, "loss": 0.2132, "step": 1900 }, { "epoch": 3.57, "learning_rate": 3.363412636053269e-05, "loss": 0.2104, "step": 2000 }, { "epoch": 3.57, "eval_f1_score": 0.6792156862745098, "eval_label_f1": 0.8368627450980393, "eval_loss": 0.22974026203155518, "eval_runtime": 343.1743, "eval_samples_per_second": 2.914, "eval_steps_per_second": 0.364, "eval_wer": 0.09919498413189876, "step": 2000 }, { "epoch": 3.75, "learning_rate": 3.211318966577581e-05, "loss": 0.2128, "step": 2100 }, { "epoch": 3.93, "learning_rate": 3.056302334890786e-05, "loss": 0.2124, "step": 2200 }, { "epoch": 3.93, "eval_f1_score": 0.6781925343811395, "eval_label_f1": 0.8345776031434184, "eval_loss": 0.2278670072555542, "eval_runtime": 338.3142, "eval_samples_per_second": 2.956, "eval_steps_per_second": 0.369, "eval_wer": 0.09447325644399722, "step": 2200 }, { "epoch": 4.11, "learning_rate": 2.8989997375834482e-05, "loss": 0.2077, "step": 2300 }, { "epoch": 4.29, "learning_rate": 2.7400575647692046e-05, "loss": 0.2027, "step": 2400 }, { "epoch": 4.29, "eval_f1_score": 0.6789638932496076, "eval_label_f1": 0.8335949764521193, "eval_loss": 0.22793905436992645, "eval_runtime": 338.197, "eval_samples_per_second": 2.957, "eval_steps_per_second": 0.37, "eval_wer": 0.09443455375803081, "step": 2400 } ], "logging_steps": 100, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 200, "total_flos": 9.447870845896129e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }