{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.221626452189454, "eval_steps": 200, "global_step": 4600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 5e-05, "loss": 0.4292, "step": 100 }, { "epoch": 0.36, "learning_rate": 4.994863481875841e-05, "loss": 0.2746, "step": 200 }, { "epoch": 0.36, "eval_f1_score": 0.6564825695260478, "eval_label_f1": 0.8343125734430082, "eval_loss": 0.26023727655410767, "eval_runtime": 344.2386, "eval_samples_per_second": 2.905, "eval_steps_per_second": 0.363, "eval_wer": 0.10898676368139949, "step": 200 }, { "epoch": 0.54, "learning_rate": 4.979475034558115e-05, "loss": 0.2568, "step": 300 }, { "epoch": 0.71, "learning_rate": 4.9538978924776634e-05, "loss": 0.2481, "step": 400 }, { "epoch": 0.71, "eval_f1_score": 0.6577916992952232, "eval_label_f1": 0.8347689898198903, "eval_loss": 0.246540829539299, "eval_runtime": 341.9234, "eval_samples_per_second": 2.925, "eval_steps_per_second": 0.366, "eval_wer": 0.10217509095131203, "step": 400 }, { "epoch": 0.89, "learning_rate": 4.9182371575975736e-05, "loss": 0.2412, "step": 500 }, { "epoch": 1.07, "learning_rate": 4.8726393675266716e-05, "loss": 0.2385, "step": 600 }, { "epoch": 1.07, "eval_f1_score": 0.6684952978056427, "eval_label_f1": 0.8322884012539185, "eval_loss": 0.24104812741279602, "eval_runtime": 342.1463, "eval_samples_per_second": 2.923, "eval_steps_per_second": 0.365, "eval_wer": 0.10484557628299404, "step": 600 }, { "epoch": 1.25, "learning_rate": 4.817291893365055e-05, "loss": 0.2325, "step": 700 }, { "epoch": 1.43, "learning_rate": 4.752422169756048e-05, "loss": 0.2316, "step": 800 }, { "epoch": 1.43, "eval_f1_score": 0.6724477729601892, "eval_label_f1": 0.8316909735908553, "eval_loss": 0.23740312457084656, "eval_runtime": 344.4982, "eval_samples_per_second": 2.903, "eval_steps_per_second": 0.363, "eval_wer": 0.10221379363727842, "step": 800 }, { "epoch": 1.61, "learning_rate": 4.678296760308474e-05, "loss": 0.2304, "step": 900 }, { "epoch": 1.79, "learning_rate": 4.595220262229601e-05, "loss": 0.2291, "step": 1000 }, { "epoch": 1.79, "eval_f1_score": 0.6698076168040833, "eval_label_f1": 0.8292108362779742, "eval_loss": 0.2348490208387375, "eval_runtime": 338.1818, "eval_samples_per_second": 2.957, "eval_steps_per_second": 0.37, "eval_wer": 0.09683412028794798, "step": 1000 }, { "epoch": 1.97, "learning_rate": 4.503534054669892e-05, "loss": 0.2275, "step": 1100 }, { "epoch": 2.14, "learning_rate": 4.4036148959228365e-05, "loss": 0.2205, "step": 1200 }, { "epoch": 2.14, "eval_f1_score": 0.6744822196170379, "eval_label_f1": 0.8339194998046112, "eval_loss": 0.2333754003047943, "eval_runtime": 340.8077, "eval_samples_per_second": 2.934, "eval_steps_per_second": 0.367, "eval_wer": 0.09636968805635111, "step": 1200 }, { "epoch": 2.32, "learning_rate": 4.2958733752443195e-05, "loss": 0.2224, "step": 1300 }, { "epoch": 2.5, "learning_rate": 4.180752225653292e-05, "loss": 0.2211, "step": 1400 }, { "epoch": 2.5, "eval_f1_score": 0.672933803368586, "eval_label_f1": 0.8327457892675283, "eval_loss": 0.2319139689207077, "eval_runtime": 341.1059, "eval_samples_per_second": 2.932, "eval_steps_per_second": 0.366, "eval_wer": 0.09606006656861986, "step": 1400 }, { "epoch": 2.68, "learning_rate": 4.058724504646834e-05, "loss": 0.2196, "step": 1500 }, { "epoch": 2.86, "learning_rate": 3.9302916503054246e-05, "loss": 0.2163, "step": 1600 }, { "epoch": 2.86, "eval_f1_score": 0.6731669266770671, "eval_label_f1": 0.829953198127925, "eval_loss": 0.2304619699716568, "eval_runtime": 340.688, "eval_samples_per_second": 2.935, "eval_steps_per_second": 0.367, "eval_wer": 0.09807260623887298, "step": 1600 }, { "epoch": 3.04, "learning_rate": 3.7959814207763135e-05, "loss": 0.219, "step": 1700 }, { "epoch": 3.22, "learning_rate": 3.656345725602089e-05, "loss": 0.2108, "step": 1800 }, { "epoch": 3.22, "eval_f1_score": 0.6734375, "eval_label_f1": 0.8328125000000001, "eval_loss": 0.22986993193626404, "eval_runtime": 338.4413, "eval_samples_per_second": 2.955, "eval_steps_per_second": 0.369, "eval_wer": 0.09544082359315736, "step": 1800 }, { "epoch": 3.4, "learning_rate": 3.5119583578059846e-05, "loss": 0.2132, "step": 1900 }, { "epoch": 3.57, "learning_rate": 3.363412636053269e-05, "loss": 0.2104, "step": 2000 }, { "epoch": 3.57, "eval_f1_score": 0.6792156862745098, "eval_label_f1": 0.8368627450980393, "eval_loss": 0.22974026203155518, "eval_runtime": 343.1743, "eval_samples_per_second": 2.914, "eval_steps_per_second": 0.364, "eval_wer": 0.09919498413189876, "step": 2000 }, { "epoch": 3.75, "learning_rate": 3.211318966577581e-05, "loss": 0.2128, "step": 2100 }, { "epoch": 3.93, "learning_rate": 3.056302334890786e-05, "loss": 0.2124, "step": 2200 }, { "epoch": 3.93, "eval_f1_score": 0.6781925343811395, "eval_label_f1": 0.8345776031434184, "eval_loss": 0.2278670072555542, "eval_runtime": 338.3142, "eval_samples_per_second": 2.956, "eval_steps_per_second": 0.369, "eval_wer": 0.09447325644399722, "step": 2200 }, { "epoch": 4.11, "learning_rate": 2.8989997375834482e-05, "loss": 0.2077, "step": 2300 }, { "epoch": 4.29, "learning_rate": 2.7400575647692046e-05, "loss": 0.2027, "step": 2400 }, { "epoch": 4.29, "eval_f1_score": 0.6789638932496076, "eval_label_f1": 0.8335949764521193, "eval_loss": 0.22793905436992645, "eval_runtime": 338.197, "eval_samples_per_second": 2.957, "eval_steps_per_second": 0.37, "eval_wer": 0.09443455375803081, "step": 2400 }, { "epoch": 4.47, "learning_rate": 2.5801289439291388e-05, "loss": 0.2077, "step": 2500 }, { "epoch": 4.65, "learning_rate": 2.419871056070862e-05, "loss": 0.2055, "step": 2600 }, { "epoch": 4.65, "eval_f1_score": 0.6832347140039449, "eval_label_f1": 0.8347140039447732, "eval_loss": 0.2275087535381317, "eval_runtime": 337.608, "eval_samples_per_second": 2.962, "eval_steps_per_second": 0.37, "eval_wer": 0.09493768867559409, "step": 2600 }, { "epoch": 4.83, "learning_rate": 2.2599424352307957e-05, "loss": 0.2073, "step": 2700 }, { "epoch": 5.0, "learning_rate": 2.1010002624165527e-05, "loss": 0.209, "step": 2800 }, { "epoch": 5.0, "eval_f1_score": 0.6821766561514195, "eval_label_f1": 0.833596214511041, "eval_loss": 0.22685159742832184, "eval_runtime": 338.9186, "eval_samples_per_second": 2.951, "eval_steps_per_second": 0.369, "eval_wer": 0.09826611966870501, "step": 2800 }, { "epoch": 5.18, "learning_rate": 1.9436976651092144e-05, "loss": 0.2011, "step": 2900 }, { "epoch": 5.36, "learning_rate": 1.7886810334224192e-05, "loss": 0.2017, "step": 3000 }, { "epoch": 5.36, "eval_f1_score": 0.6834645669291338, "eval_label_f1": 0.8346456692913385, "eval_loss": 0.22723452746868134, "eval_runtime": 337.6021, "eval_samples_per_second": 2.962, "eval_steps_per_second": 0.37, "eval_wer": 0.09791779549500736, "step": 3000 }, { "epoch": 5.54, "learning_rate": 1.6365873639467315e-05, "loss": 0.2034, "step": 3100 }, { "epoch": 5.72, "learning_rate": 1.4880416421940155e-05, "loss": 0.2029, "step": 3200 }, { "epoch": 5.72, "eval_f1_score": 0.6818718049547778, "eval_label_f1": 0.8320880849390484, "eval_loss": 0.22658005356788635, "eval_runtime": 338.8852, "eval_samples_per_second": 2.951, "eval_steps_per_second": 0.369, "eval_wer": 0.09656320148618314, "step": 3200 }, { "epoch": 5.9, "learning_rate": 1.3436542743979125e-05, "loss": 0.202, "step": 3300 }, { "epoch": 6.08, "learning_rate": 1.2040185792236874e-05, "loss": 0.201, "step": 3400 }, { "epoch": 6.08, "eval_f1_score": 0.6800472255017709, "eval_label_f1": 0.8327430145611965, "eval_loss": 0.22656936943531036, "eval_runtime": 337.9074, "eval_samples_per_second": 2.959, "eval_steps_per_second": 0.37, "eval_wer": 0.09776298475114173, "step": 3400 }, { "epoch": 6.26, "learning_rate": 1.0697083496945765e-05, "loss": 0.1979, "step": 3500 }, { "epoch": 6.43, "learning_rate": 9.412754953531663e-06, "loss": 0.1985, "step": 3600 }, { "epoch": 6.43, "eval_f1_score": 0.6856465005931198, "eval_label_f1": 0.8335310399367339, "eval_loss": 0.22672241926193237, "eval_runtime": 339.7265, "eval_samples_per_second": 2.944, "eval_steps_per_second": 0.368, "eval_wer": 0.0994659029336636, "step": 3600 }, { "epoch": 6.61, "learning_rate": 8.192477743467078e-06, "loss": 0.2006, "step": 3700 }, { "epoch": 6.79, "learning_rate": 7.041266247556813e-06, "loss": 0.1996, "step": 3800 }, { "epoch": 6.79, "eval_f1_score": 0.686437327006722, "eval_label_f1": 0.8343218663503361, "eval_loss": 0.226467102766037, "eval_runtime": 338.3929, "eval_samples_per_second": 2.955, "eval_steps_per_second": 0.369, "eval_wer": 0.0988079572722347, "step": 3800 }, { "epoch": 6.97, "learning_rate": 5.9638510407716394e-06, "loss": 0.2002, "step": 3900 }, { "epoch": 7.15, "learning_rate": 4.9646594533010875e-06, "loss": 0.197, "step": 4000 }, { "epoch": 7.15, "eval_f1_score": 0.6843354430379748, "eval_label_f1": 0.8346518987341772, "eval_loss": 0.2263847291469574, "eval_runtime": 338.0877, "eval_samples_per_second": 2.958, "eval_steps_per_second": 0.37, "eval_wer": 0.09857574115643626, "step": 4000 }, { "epoch": 7.33, "learning_rate": 4.047797377703985e-06, "loss": 0.1983, "step": 4100 }, { "epoch": 7.51, "learning_rate": 3.217032396915265e-06, "loss": 0.1985, "step": 4200 }, { "epoch": 7.51, "eval_f1_score": 0.6837539432176655, "eval_label_f1": 0.8351735015772871, "eval_loss": 0.22642947733402252, "eval_runtime": 337.2581, "eval_samples_per_second": 2.965, "eval_steps_per_second": 0.371, "eval_wer": 0.09849833578450344, "step": 4200 }, { "epoch": 7.69, "learning_rate": 2.475778302439524e-06, "loss": 0.1967, "step": 4300 }, { "epoch": 7.86, "learning_rate": 1.827081066349459e-06, "loss": 0.1999, "step": 4400 }, { "epoch": 7.86, "eval_f1_score": 0.6861429135412555, "eval_label_f1": 0.8345834978286616, "eval_loss": 0.2263396978378296, "eval_runtime": 339.5995, "eval_samples_per_second": 2.945, "eval_steps_per_second": 0.368, "eval_wer": 0.09776298475114173, "step": 4400 }, { "epoch": 8.04, "learning_rate": 1.273606324733284e-06, "loss": 0.1953, "step": 4500 }, { "epoch": 8.22, "learning_rate": 8.176284240242638e-07, "loss": 0.1963, "step": 4600 }, { "epoch": 8.22, "eval_f1_score": 0.6864139020537124, "eval_label_f1": 0.8317535545023697, "eval_loss": 0.22643861174583435, "eval_runtime": 339.619, "eval_samples_per_second": 2.944, "eval_steps_per_second": 0.368, "eval_wer": 0.09784039012307454, "step": 4600 } ], "logging_steps": 100, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 200, "total_flos": 1.8108408869571455e+20, "train_batch_size": 4, "trial_name": null, "trial_params": null }