{ "best_metric": 3.0774757862091064, "best_model_checkpoint": "dq158/pingusPongus/checkpoint-80324", "epoch": 2.0, "eval_steps": 500, "global_step": 80324, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0001, "loss": 4.5056, "step": 500 }, { "epoch": 0.02, "learning_rate": 9.9999957472774e-05, "loss": 3.8512, "step": 1000 }, { "epoch": 0.04, "learning_rate": 9.999982989116833e-05, "loss": 3.6498, "step": 1500 }, { "epoch": 0.05, "learning_rate": 9.999961725540003e-05, "loss": 3.6328, "step": 2000 }, { "epoch": 0.06, "learning_rate": 9.999931956583082e-05, "loss": 3.6162, "step": 2500 }, { "epoch": 0.07, "learning_rate": 9.999893682296706e-05, "loss": 3.6415, "step": 3000 }, { "epoch": 0.09, "learning_rate": 9.999846902745986e-05, "loss": 3.6273, "step": 3500 }, { "epoch": 0.1, "learning_rate": 9.999791618010498e-05, "loss": 3.5555, "step": 4000 }, { "epoch": 0.11, "learning_rate": 9.999727828184285e-05, "loss": 3.5176, "step": 4500 }, { "epoch": 0.12, "learning_rate": 9.99965553337586e-05, "loss": 3.4374, "step": 5000 }, { "epoch": 0.14, "learning_rate": 9.999574733708204e-05, "loss": 3.5565, "step": 5500 }, { "epoch": 0.15, "learning_rate": 9.999485429318763e-05, "loss": 3.4639, "step": 6000 }, { "epoch": 0.16, "learning_rate": 9.99938762035945e-05, "loss": 3.3833, "step": 6500 }, { "epoch": 0.17, "learning_rate": 9.999281306996651e-05, "loss": 3.4726, "step": 7000 }, { "epoch": 0.19, "learning_rate": 9.999166489411211e-05, "loss": 3.4132, "step": 7500 }, { "epoch": 0.2, "learning_rate": 9.999043167798448e-05, "loss": 3.3754, "step": 8000 }, { "epoch": 0.21, "learning_rate": 9.99891134236814e-05, "loss": 3.4333, "step": 8500 }, { "epoch": 0.22, "learning_rate": 9.998771013344535e-05, "loss": 3.3313, "step": 9000 }, { "epoch": 0.24, "learning_rate": 9.998622180966344e-05, "loss": 3.3758, "step": 9500 }, { "epoch": 0.25, "learning_rate": 9.998464845486746e-05, "loss": 3.3814, "step": 10000 }, { "epoch": 0.26, "learning_rate": 9.998299007173383e-05, "loss": 3.429, "step": 10500 }, { "epoch": 0.27, "learning_rate": 9.99812466630836e-05, "loss": 3.4547, "step": 11000 }, { "epoch": 0.29, "learning_rate": 9.997941823188243e-05, "loss": 3.4204, "step": 11500 }, { "epoch": 0.3, "learning_rate": 9.99775047812407e-05, "loss": 3.3807, "step": 12000 }, { "epoch": 0.31, "learning_rate": 9.997550631441332e-05, "loss": 3.3749, "step": 12500 }, { "epoch": 0.32, "learning_rate": 9.997342283479989e-05, "loss": 3.326, "step": 13000 }, { "epoch": 0.34, "learning_rate": 9.997125434594458e-05, "loss": 3.3626, "step": 13500 }, { "epoch": 0.35, "learning_rate": 9.996900085153617e-05, "loss": 3.304, "step": 14000 }, { "epoch": 0.36, "learning_rate": 9.996666235540808e-05, "loss": 3.3671, "step": 14500 }, { "epoch": 0.37, "learning_rate": 9.996423886153828e-05, "loss": 3.3667, "step": 15000 }, { "epoch": 0.39, "learning_rate": 9.996173037404934e-05, "loss": 3.321, "step": 15500 }, { "epoch": 0.4, "learning_rate": 9.995913689720844e-05, "loss": 3.2639, "step": 16000 }, { "epoch": 0.41, "learning_rate": 9.995645843542732e-05, "loss": 3.3529, "step": 16500 }, { "epoch": 0.42, "learning_rate": 9.995369499326228e-05, "loss": 3.2607, "step": 17000 }, { "epoch": 0.44, "learning_rate": 9.995084657541416e-05, "loss": 3.2858, "step": 17500 }, { "epoch": 0.45, "learning_rate": 9.994791318672838e-05, "loss": 3.3516, "step": 18000 }, { "epoch": 0.46, "learning_rate": 9.994489483219492e-05, "loss": 3.2944, "step": 18500 }, { "epoch": 0.47, "learning_rate": 9.994179151694824e-05, "loss": 3.278, "step": 19000 }, { "epoch": 0.49, "learning_rate": 9.993860324626737e-05, "loss": 3.2557, "step": 19500 }, { "epoch": 0.5, "learning_rate": 9.993533002557585e-05, "loss": 3.366, "step": 20000 }, { "epoch": 0.51, "learning_rate": 9.99319718604417e-05, "loss": 3.2581, "step": 20500 }, { "epoch": 0.52, "learning_rate": 9.992852875657746e-05, "loss": 3.3302, "step": 21000 }, { "epoch": 0.54, "learning_rate": 9.992500071984017e-05, "loss": 3.334, "step": 21500 }, { "epoch": 0.55, "learning_rate": 9.992138775623132e-05, "loss": 3.3146, "step": 22000 }, { "epoch": 0.56, "learning_rate": 9.991768987189688e-05, "loss": 3.3315, "step": 22500 }, { "epoch": 0.57, "learning_rate": 9.991390707312733e-05, "loss": 3.3853, "step": 23000 }, { "epoch": 0.59, "learning_rate": 9.991003936635747e-05, "loss": 3.3447, "step": 23500 }, { "epoch": 0.6, "learning_rate": 9.990608675816668e-05, "loss": 3.1906, "step": 24000 }, { "epoch": 0.61, "learning_rate": 9.990204925527867e-05, "loss": 3.3639, "step": 24500 }, { "epoch": 0.62, "learning_rate": 9.989792686456158e-05, "loss": 3.2723, "step": 25000 }, { "epoch": 0.63, "learning_rate": 9.989371959302797e-05, "loss": 3.2156, "step": 25500 }, { "epoch": 0.65, "learning_rate": 9.988942744783481e-05, "loss": 3.3264, "step": 26000 }, { "epoch": 0.66, "learning_rate": 9.988505043628337e-05, "loss": 3.2336, "step": 26500 }, { "epoch": 0.67, "learning_rate": 9.98805885658194e-05, "loss": 3.2806, "step": 27000 }, { "epoch": 0.68, "learning_rate": 9.98760418440329e-05, "loss": 3.3251, "step": 27500 }, { "epoch": 0.7, "learning_rate": 9.987141027865825e-05, "loss": 3.2188, "step": 28000 }, { "epoch": 0.71, "learning_rate": 9.986669387757414e-05, "loss": 3.2981, "step": 28500 }, { "epoch": 0.72, "learning_rate": 9.986189264880364e-05, "loss": 3.3023, "step": 29000 }, { "epoch": 0.73, "learning_rate": 9.985700660051403e-05, "loss": 3.3271, "step": 29500 }, { "epoch": 0.75, "learning_rate": 9.985203574101691e-05, "loss": 3.2955, "step": 30000 }, { "epoch": 0.76, "learning_rate": 9.984698007876816e-05, "loss": 3.3756, "step": 30500 }, { "epoch": 0.77, "learning_rate": 9.984183962236792e-05, "loss": 3.2936, "step": 31000 }, { "epoch": 0.78, "learning_rate": 9.983661438056056e-05, "loss": 3.3248, "step": 31500 }, { "epoch": 0.8, "learning_rate": 9.983130436223469e-05, "loss": 3.2569, "step": 32000 }, { "epoch": 0.81, "learning_rate": 9.98259095764231e-05, "loss": 3.3616, "step": 32500 }, { "epoch": 0.82, "learning_rate": 9.982043003230282e-05, "loss": 3.2892, "step": 33000 }, { "epoch": 0.83, "learning_rate": 9.981486573919504e-05, "loss": 3.2331, "step": 33500 }, { "epoch": 0.85, "learning_rate": 9.98092167065651e-05, "loss": 3.3242, "step": 34000 }, { "epoch": 0.86, "learning_rate": 9.980348294402255e-05, "loss": 3.3123, "step": 34500 }, { "epoch": 0.87, "learning_rate": 9.9797664461321e-05, "loss": 3.2869, "step": 35000 }, { "epoch": 0.88, "learning_rate": 9.979176126835821e-05, "loss": 3.2566, "step": 35500 }, { "epoch": 0.9, "learning_rate": 9.978577337517603e-05, "loss": 3.3036, "step": 36000 }, { "epoch": 0.91, "learning_rate": 9.977970079196041e-05, "loss": 3.299, "step": 36500 }, { "epoch": 0.92, "learning_rate": 9.977354352904136e-05, "loss": 3.31, "step": 37000 }, { "epoch": 0.93, "learning_rate": 9.976730159689292e-05, "loss": 3.2459, "step": 37500 }, { "epoch": 0.95, "learning_rate": 9.976097500613318e-05, "loss": 3.3162, "step": 38000 }, { "epoch": 0.96, "learning_rate": 9.975456376752424e-05, "loss": 3.3056, "step": 38500 }, { "epoch": 0.97, "learning_rate": 9.974806789197216e-05, "loss": 3.2404, "step": 39000 }, { "epoch": 0.98, "learning_rate": 9.974148739052703e-05, "loss": 3.2376, "step": 39500 }, { "epoch": 1.0, "learning_rate": 9.973482227438287e-05, "loss": 3.356, "step": 40000 }, { "epoch": 1.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 3.1455512046813965, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 4569600, "eval_runtime": 8741.384, "eval_samples_per_second": 1.021, "eval_steps_per_second": 0.511, "eval_translation_length": 4569600, "step": 40162 }, { "epoch": 1.01, "learning_rate": 9.972807255487761e-05, "loss": 3.2504, "step": 40500 }, { "epoch": 1.02, "learning_rate": 9.972123824349316e-05, "loss": 3.23, "step": 41000 }, { "epoch": 1.03, "learning_rate": 9.971431935185526e-05, "loss": 3.1975, "step": 41500 }, { "epoch": 1.05, "learning_rate": 9.970731589173359e-05, "loss": 3.2147, "step": 42000 }, { "epoch": 1.06, "learning_rate": 9.970022787504163e-05, "loss": 3.1061, "step": 42500 }, { "epoch": 1.07, "learning_rate": 9.969305531383673e-05, "loss": 3.1693, "step": 43000 }, { "epoch": 1.08, "learning_rate": 9.968579822032009e-05, "loss": 3.1406, "step": 43500 }, { "epoch": 1.1, "learning_rate": 9.967845660683664e-05, "loss": 3.28, "step": 44000 }, { "epoch": 1.11, "learning_rate": 9.967103048587511e-05, "loss": 3.1341, "step": 44500 }, { "epoch": 1.12, "learning_rate": 9.966351987006803e-05, "loss": 3.1911, "step": 45000 }, { "epoch": 1.13, "learning_rate": 9.965592477219158e-05, "loss": 3.2183, "step": 45500 }, { "epoch": 1.15, "learning_rate": 9.964824520516576e-05, "loss": 3.1452, "step": 46000 }, { "epoch": 1.16, "learning_rate": 9.964048118205414e-05, "loss": 3.2294, "step": 46500 }, { "epoch": 1.17, "learning_rate": 9.963263271606403e-05, "loss": 3.2148, "step": 47000 }, { "epoch": 1.18, "learning_rate": 9.962469982054638e-05, "loss": 3.2715, "step": 47500 }, { "epoch": 1.2, "learning_rate": 9.961668250899575e-05, "loss": 3.1949, "step": 48000 }, { "epoch": 1.21, "learning_rate": 9.96085807950503e-05, "loss": 3.1926, "step": 48500 }, { "epoch": 1.22, "learning_rate": 9.960039469249177e-05, "loss": 3.2328, "step": 49000 }, { "epoch": 1.23, "learning_rate": 9.959212421524542e-05, "loss": 3.121, "step": 49500 }, { "epoch": 1.24, "learning_rate": 9.95837693773801e-05, "loss": 3.1933, "step": 50000 }, { "epoch": 1.26, "learning_rate": 9.957533019310813e-05, "loss": 3.1807, "step": 50500 }, { "epoch": 1.27, "learning_rate": 9.956680667678531e-05, "loss": 3.2696, "step": 51000 }, { "epoch": 1.28, "learning_rate": 9.955819884291088e-05, "loss": 3.1498, "step": 51500 }, { "epoch": 1.29, "learning_rate": 9.954950670612758e-05, "loss": 3.1895, "step": 52000 }, { "epoch": 1.31, "learning_rate": 9.954073028122147e-05, "loss": 3.2206, "step": 52500 }, { "epoch": 1.32, "learning_rate": 9.953186958312204e-05, "loss": 3.1473, "step": 53000 }, { "epoch": 1.33, "learning_rate": 9.952292462690212e-05, "loss": 3.2062, "step": 53500 }, { "epoch": 1.34, "learning_rate": 9.951389542777789e-05, "loss": 3.2043, "step": 54000 }, { "epoch": 1.36, "learning_rate": 9.95047820011088e-05, "loss": 3.1293, "step": 54500 }, { "epoch": 1.37, "learning_rate": 9.949558436239762e-05, "loss": 3.2194, "step": 55000 }, { "epoch": 1.38, "learning_rate": 9.948630252729036e-05, "loss": 3.2101, "step": 55500 }, { "epoch": 1.39, "learning_rate": 9.947693651157621e-05, "loss": 3.2321, "step": 56000 }, { "epoch": 1.41, "learning_rate": 9.946748633118766e-05, "loss": 3.2132, "step": 56500 }, { "epoch": 1.42, "learning_rate": 9.945795200220022e-05, "loss": 3.2587, "step": 57000 }, { "epoch": 1.43, "learning_rate": 9.944833354083273e-05, "loss": 3.1798, "step": 57500 }, { "epoch": 1.44, "learning_rate": 9.943863096344698e-05, "loss": 3.2519, "step": 58000 }, { "epoch": 1.46, "learning_rate": 9.942884428654794e-05, "loss": 3.1065, "step": 58500 }, { "epoch": 1.47, "learning_rate": 9.941897352678362e-05, "loss": 3.3128, "step": 59000 }, { "epoch": 1.48, "learning_rate": 9.940901870094506e-05, "loss": 3.2352, "step": 59500 }, { "epoch": 1.49, "learning_rate": 9.939897982596631e-05, "loss": 3.1773, "step": 60000 }, { "epoch": 1.51, "learning_rate": 9.938885691892437e-05, "loss": 3.2472, "step": 60500 }, { "epoch": 1.52, "learning_rate": 9.937864999703925e-05, "loss": 3.0983, "step": 61000 }, { "epoch": 1.53, "learning_rate": 9.936835907767378e-05, "loss": 3.2046, "step": 61500 }, { "epoch": 1.54, "learning_rate": 9.935798417833376e-05, "loss": 3.1239, "step": 62000 }, { "epoch": 1.56, "learning_rate": 9.934752531666782e-05, "loss": 3.2518, "step": 62500 }, { "epoch": 1.57, "learning_rate": 9.933698251046739e-05, "loss": 3.2518, "step": 63000 }, { "epoch": 1.58, "learning_rate": 9.932635577766676e-05, "loss": 3.0939, "step": 63500 }, { "epoch": 1.59, "learning_rate": 9.931564513634291e-05, "loss": 3.2243, "step": 64000 }, { "epoch": 1.61, "learning_rate": 9.930485060471562e-05, "loss": 3.196, "step": 64500 }, { "epoch": 1.62, "learning_rate": 9.929397220114736e-05, "loss": 3.2016, "step": 65000 }, { "epoch": 1.63, "learning_rate": 9.928300994414321e-05, "loss": 3.1955, "step": 65500 }, { "epoch": 1.64, "learning_rate": 9.9271963852351e-05, "loss": 3.2093, "step": 66000 }, { "epoch": 1.66, "learning_rate": 9.92608339445611e-05, "loss": 3.2283, "step": 66500 }, { "epoch": 1.67, "learning_rate": 9.924962023970646e-05, "loss": 3.2368, "step": 67000 }, { "epoch": 1.68, "learning_rate": 9.92383227568626e-05, "loss": 3.1782, "step": 67500 }, { "epoch": 1.69, "learning_rate": 9.922694151524756e-05, "loss": 3.1762, "step": 68000 }, { "epoch": 1.71, "learning_rate": 9.921547653422182e-05, "loss": 3.2227, "step": 68500 }, { "epoch": 1.72, "learning_rate": 9.920392783328834e-05, "loss": 3.1744, "step": 69000 }, { "epoch": 1.73, "learning_rate": 9.91922954320925e-05, "loss": 3.1868, "step": 69500 }, { "epoch": 1.74, "learning_rate": 9.918057935042204e-05, "loss": 3.1313, "step": 70000 }, { "epoch": 1.76, "learning_rate": 9.916877960820705e-05, "loss": 3.1623, "step": 70500 }, { "epoch": 1.77, "learning_rate": 9.915689622551996e-05, "loss": 3.0594, "step": 71000 }, { "epoch": 1.78, "learning_rate": 9.914492922257546e-05, "loss": 3.2163, "step": 71500 }, { "epoch": 1.79, "learning_rate": 9.913287861973049e-05, "loss": 3.0811, "step": 72000 }, { "epoch": 1.81, "learning_rate": 9.912074443748416e-05, "loss": 3.2226, "step": 72500 }, { "epoch": 1.82, "learning_rate": 9.910852669647785e-05, "loss": 3.2876, "step": 73000 }, { "epoch": 1.83, "learning_rate": 9.909622541749499e-05, "loss": 3.2285, "step": 73500 }, { "epoch": 1.84, "learning_rate": 9.908384062146118e-05, "loss": 3.1274, "step": 74000 }, { "epoch": 1.85, "learning_rate": 9.907137232944404e-05, "loss": 3.1894, "step": 74500 }, { "epoch": 1.87, "learning_rate": 9.905882056265323e-05, "loss": 3.1468, "step": 75000 }, { "epoch": 1.88, "learning_rate": 9.904618534244044e-05, "loss": 3.2397, "step": 75500 }, { "epoch": 1.89, "learning_rate": 9.903346669029932e-05, "loss": 3.0135, "step": 76000 }, { "epoch": 1.9, "learning_rate": 9.90206646278654e-05, "loss": 3.2206, "step": 76500 }, { "epoch": 1.92, "learning_rate": 9.900777917691615e-05, "loss": 3.1868, "step": 77000 }, { "epoch": 1.93, "learning_rate": 9.899481035937086e-05, "loss": 3.2184, "step": 77500 }, { "epoch": 1.94, "learning_rate": 9.898175819729063e-05, "loss": 3.1739, "step": 78000 }, { "epoch": 1.95, "learning_rate": 9.896862271287839e-05, "loss": 3.2227, "step": 78500 }, { "epoch": 1.97, "learning_rate": 9.895540392847874e-05, "loss": 3.2236, "step": 79000 }, { "epoch": 1.98, "learning_rate": 9.8942101866578e-05, "loss": 3.1321, "step": 79500 }, { "epoch": 1.99, "learning_rate": 9.892871654980418e-05, "loss": 3.1849, "step": 80000 }, { "epoch": 2.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 3.0774757862091064, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 4569600, "eval_runtime": 8605.765, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "eval_translation_length": 4569600, "step": 80324 } ], "logging_steps": 500, "max_steps": 1204860, "num_train_epochs": 30, "save_steps": 1000, "total_flos": 1.3785697922643395e+18, "trial_name": null, "trial_params": null }