{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 25511, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.0001, "loss": 3.8798, "step": 500 }, { "epoch": 0.04, "learning_rate": 9.999617887970705e-05, "loss": 3.3824, "step": 1000 }, { "epoch": 0.06, "learning_rate": 9.998471610286659e-05, "loss": 3.3404, "step": 1500 }, { "epoch": 0.08, "learning_rate": 9.996561342150463e-05, "loss": 3.3427, "step": 2000 }, { "epoch": 0.1, "learning_rate": 9.993887375536685e-05, "loss": 3.2575, "step": 2500 }, { "epoch": 0.12, "learning_rate": 9.990450119147252e-05, "loss": 3.2676, "step": 3000 }, { "epoch": 0.14, "learning_rate": 9.98625009834897e-05, "loss": 3.1225, "step": 3500 }, { "epoch": 0.16, "learning_rate": 9.981287955093226e-05, "loss": 3.1038, "step": 4000 }, { "epoch": 0.18, "learning_rate": 9.97556444781787e-05, "loss": 3.132, "step": 4500 }, { "epoch": 0.2, "learning_rate": 9.969080451331299e-05, "loss": 3.1255, "step": 5000 }, { "epoch": 0.22, "learning_rate": 9.96183695667873e-05, "loss": 3.1358, "step": 5500 }, { "epoch": 0.24, "learning_rate": 9.95383507099074e-05, "loss": 3.1162, "step": 6000 }, { "epoch": 0.25, "learning_rate": 9.945076017314044e-05, "loss": 3.0475, "step": 6500 }, { "epoch": 0.27, "learning_rate": 9.935561134424548e-05, "loss": 3.0587, "step": 7000 }, { "epoch": 0.29, "learning_rate": 9.925291876622738e-05, "loss": 3.0822, "step": 7500 }, { "epoch": 0.31, "learning_rate": 9.914269813511388e-05, "loss": 3.0829, "step": 8000 }, { "epoch": 0.33, "learning_rate": 9.902496629755661e-05, "loss": 3.0594, "step": 8500 }, { "epoch": 0.35, "learning_rate": 9.88997412482561e-05, "loss": 3.0949, "step": 9000 }, { "epoch": 0.37, "learning_rate": 9.876704212721141e-05, "loss": 3.0807, "step": 9500 }, { "epoch": 0.39, "learning_rate": 9.862688921679476e-05, "loss": 2.9289, "step": 10000 }, { "epoch": 0.41, "learning_rate": 9.847930393865132e-05, "loss": 3.0059, "step": 10500 }, { "epoch": 0.43, "learning_rate": 9.832430885042515e-05, "loss": 3.0391, "step": 11000 }, { "epoch": 0.45, "learning_rate": 9.816192764231132e-05, "loss": 3.0225, "step": 11500 }, { "epoch": 0.47, "learning_rate": 9.799218513343504e-05, "loss": 3.0199, "step": 12000 }, { "epoch": 0.49, "learning_rate": 9.781510726805807e-05, "loss": 3.0201, "step": 12500 }, { "epoch": 0.51, "learning_rate": 9.763072111161344e-05, "loss": 3.0321, "step": 13000 }, { "epoch": 0.53, "learning_rate": 9.743905484656852e-05, "loss": 2.8965, "step": 13500 }, { "epoch": 0.55, "learning_rate": 9.724013776811747e-05, "loss": 3.0016, "step": 14000 }, { "epoch": 0.57, "learning_rate": 9.70340002797037e-05, "loss": 3.0327, "step": 14500 }, { "epoch": 0.59, "learning_rate": 9.682067388837286e-05, "loss": 3.036, "step": 15000 }, { "epoch": 0.61, "learning_rate": 9.660019119995702e-05, "loss": 3.0152, "step": 15500 }, { "epoch": 0.63, "learning_rate": 9.63725859140912e-05, "loss": 2.9673, "step": 16000 }, { "epoch": 0.65, "learning_rate": 9.613789281906243e-05, "loss": 3.0032, "step": 16500 }, { "epoch": 0.67, "learning_rate": 9.589614778649267e-05, "loss": 2.9865, "step": 17000 }, { "epoch": 0.69, "learning_rate": 9.564738776585591e-05, "loss": 3.0042, "step": 17500 }, { "epoch": 0.71, "learning_rate": 9.539165077883064e-05, "loss": 2.988, "step": 18000 }, { "epoch": 0.73, "learning_rate": 9.51289759134885e-05, "loss": 2.9729, "step": 18500 }, { "epoch": 0.74, "learning_rate": 9.485940331831984e-05, "loss": 2.9924, "step": 19000 }, { "epoch": 0.76, "learning_rate": 9.45829741960972e-05, "loss": 2.96, "step": 19500 }, { "epoch": 0.78, "learning_rate": 9.429973079757773e-05, "loss": 2.9702, "step": 20000 }, { "epoch": 0.8, "learning_rate": 9.400971641504533e-05, "loss": 2.9362, "step": 20500 }, { "epoch": 0.82, "learning_rate": 9.371297537569369e-05, "loss": 2.9421, "step": 21000 }, { "epoch": 0.84, "learning_rate": 9.340955303485112e-05, "loss": 2.8557, "step": 21500 }, { "epoch": 0.86, "learning_rate": 9.309949576904817e-05, "loss": 2.8443, "step": 22000 }, { "epoch": 0.88, "learning_rate": 9.278285096892927e-05, "loss": 2.8352, "step": 22500 }, { "epoch": 0.9, "learning_rate": 9.245966703200923e-05, "loss": 2.8187, "step": 23000 }, { "epoch": 0.92, "learning_rate": 9.212999335527607e-05, "loss": 2.887, "step": 23500 }, { "epoch": 0.94, "learning_rate": 9.179388032764086e-05, "loss": 2.9268, "step": 24000 }, { "epoch": 0.96, "learning_rate": 9.145137932223598e-05, "loss": 2.9458, "step": 24500 }, { "epoch": 0.98, "learning_rate": 9.110254268856312e-05, "loss": 2.8961, "step": 25000 }, { "epoch": 1.0, "learning_rate": 9.074742374449192e-05, "loss": 2.9421, "step": 25500 }, { "epoch": 1.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.790889024734497, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 5805056, "eval_runtime": 9395.1429, "eval_samples_per_second": 1.207, "eval_steps_per_second": 0.302, "eval_translation_length": 5805056, "step": 25511 } ], "logging_steps": 500, "max_steps": 127555, "num_train_epochs": 5, "save_steps": 500, "total_flos": 2.351831312404316e+17, "trial_name": null, "trial_params": null }