yomilimi's picture
Upload folder using huggingface_hub
0a46a49 verified
raw
history blame
14.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.943454686289698,
"eval_steps": 500,
"global_step": 38000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.038729666924864445,
"grad_norm": 5.300307750701904,
"learning_rate": 1.9741802220500904e-05,
"loss": 0.008,
"step": 500
},
{
"epoch": 0.07745933384972889,
"grad_norm": 0.046279702335596085,
"learning_rate": 1.9483604441001807e-05,
"loss": 0.0079,
"step": 1000
},
{
"epoch": 0.11618900077459333,
"grad_norm": 0.16018961369991302,
"learning_rate": 1.9225406661502713e-05,
"loss": 0.0077,
"step": 1500
},
{
"epoch": 0.15491866769945778,
"grad_norm": 0.12051185965538025,
"learning_rate": 1.8967208882003616e-05,
"loss": 0.0078,
"step": 2000
},
{
"epoch": 0.19364833462432224,
"grad_norm": 0.16797564923763275,
"learning_rate": 1.870901110250452e-05,
"loss": 0.007,
"step": 2500
},
{
"epoch": 0.23237800154918667,
"grad_norm": 0.20184361934661865,
"learning_rate": 1.845081332300542e-05,
"loss": 0.0072,
"step": 3000
},
{
"epoch": 0.2711076684740511,
"grad_norm": 0.3552689552307129,
"learning_rate": 1.8192615543506328e-05,
"loss": 0.0069,
"step": 3500
},
{
"epoch": 0.30983733539891556,
"grad_norm": 0.14999011158943176,
"learning_rate": 1.7937516137361218e-05,
"loss": 0.0185,
"step": 4000
},
{
"epoch": 0.34856700232378,
"grad_norm": 0.04176361858844757,
"learning_rate": 1.7679318357862124e-05,
"loss": 0.007,
"step": 4500
},
{
"epoch": 0.3872966692486445,
"grad_norm": 0.11695006489753723,
"learning_rate": 1.7421120578363027e-05,
"loss": 0.0062,
"step": 5000
},
{
"epoch": 0.42602633617350893,
"grad_norm": 0.11298699676990509,
"learning_rate": 1.716292279886393e-05,
"loss": 0.0077,
"step": 5500
},
{
"epoch": 0.46475600309837334,
"grad_norm": 0.7519420385360718,
"learning_rate": 1.6904725019364833e-05,
"loss": 0.0065,
"step": 6000
},
{
"epoch": 0.5034856700232379,
"grad_norm": 0.146876260638237,
"learning_rate": 1.664652723986574e-05,
"loss": 0.0055,
"step": 6500
},
{
"epoch": 0.5422153369481022,
"grad_norm": 0.18907134234905243,
"learning_rate": 1.638832946036664e-05,
"loss": 0.0061,
"step": 7000
},
{
"epoch": 0.5809450038729667,
"grad_norm": 0.3313717842102051,
"learning_rate": 1.6130131680867548e-05,
"loss": 0.0062,
"step": 7500
},
{
"epoch": 0.6196746707978311,
"grad_norm": 0.07646404951810837,
"learning_rate": 1.587193390136845e-05,
"loss": 0.0059,
"step": 8000
},
{
"epoch": 0.6584043377226956,
"grad_norm": 0.09517477452754974,
"learning_rate": 1.5613736121869353e-05,
"loss": 0.0058,
"step": 8500
},
{
"epoch": 0.69713400464756,
"grad_norm": 0.15235310792922974,
"learning_rate": 1.5355538342370256e-05,
"loss": 0.0055,
"step": 9000
},
{
"epoch": 0.7358636715724245,
"grad_norm": 0.07088156044483185,
"learning_rate": 1.509734056287116e-05,
"loss": 0.0058,
"step": 9500
},
{
"epoch": 0.774593338497289,
"grad_norm": 0.19365285336971283,
"learning_rate": 1.4839142783372065e-05,
"loss": 0.0059,
"step": 10000
},
{
"epoch": 0.8133230054221534,
"grad_norm": 0.6815012097358704,
"learning_rate": 1.4580945003872968e-05,
"loss": 0.0058,
"step": 10500
},
{
"epoch": 0.8520526723470179,
"grad_norm": 0.050135429948568344,
"learning_rate": 1.4322747224373872e-05,
"loss": 0.006,
"step": 11000
},
{
"epoch": 0.8907823392718822,
"grad_norm": 0.21455714106559753,
"learning_rate": 1.4064549444874775e-05,
"loss": 0.0061,
"step": 11500
},
{
"epoch": 0.9295120061967467,
"grad_norm": 0.1364729255437851,
"learning_rate": 1.380635166537568e-05,
"loss": 0.0053,
"step": 12000
},
{
"epoch": 0.9682416731216111,
"grad_norm": 0.07110429555177689,
"learning_rate": 1.3548670281435581e-05,
"loss": 0.0054,
"step": 12500
},
{
"epoch": 1.0,
"eval_bleu": 91.1948,
"eval_gen_len": 13.3484,
"eval_loss": 0.007155969273298979,
"eval_runtime": 76.0184,
"eval_samples_per_second": 603.801,
"eval_steps_per_second": 18.877,
"step": 12910
},
{
"epoch": 1.0069713400464757,
"grad_norm": 0.2353477030992508,
"learning_rate": 1.3290988897495483e-05,
"loss": 0.0072,
"step": 13000
},
{
"epoch": 1.04570100697134,
"grad_norm": 0.10523347556591034,
"learning_rate": 1.3032791117996386e-05,
"loss": 0.0059,
"step": 13500
},
{
"epoch": 1.0844306738962044,
"grad_norm": 0.13372991979122162,
"learning_rate": 1.277459333849729e-05,
"loss": 0.0052,
"step": 14000
},
{
"epoch": 1.1231603408210689,
"grad_norm": 0.06887730956077576,
"learning_rate": 1.2516395558998193e-05,
"loss": 0.0059,
"step": 14500
},
{
"epoch": 1.1618900077459333,
"grad_norm": 0.11208628118038177,
"learning_rate": 1.2258197779499098e-05,
"loss": 0.0065,
"step": 15000
},
{
"epoch": 1.2006196746707978,
"grad_norm": 0.057284511625766754,
"learning_rate": 1.2e-05,
"loss": 0.0053,
"step": 15500
},
{
"epoch": 1.2393493415956622,
"grad_norm": 0.30577805638313293,
"learning_rate": 1.1741802220500905e-05,
"loss": 0.006,
"step": 16000
},
{
"epoch": 1.2780790085205267,
"grad_norm": 0.023146886378526688,
"learning_rate": 1.1483604441001808e-05,
"loss": 0.0058,
"step": 16500
},
{
"epoch": 1.3168086754453912,
"grad_norm": 0.06862912327051163,
"learning_rate": 1.1225406661502712e-05,
"loss": 0.0055,
"step": 17000
},
{
"epoch": 1.3555383423702556,
"grad_norm": 0.11294469982385635,
"learning_rate": 1.0967208882003615e-05,
"loss": 0.0055,
"step": 17500
},
{
"epoch": 1.39426800929512,
"grad_norm": 0.0633481964468956,
"learning_rate": 1.070901110250452e-05,
"loss": 0.0056,
"step": 18000
},
{
"epoch": 1.4329976762199845,
"grad_norm": 0.20660839974880219,
"learning_rate": 1.0450813323005422e-05,
"loss": 0.0055,
"step": 18500
},
{
"epoch": 1.471727343144849,
"grad_norm": 0.2304856777191162,
"learning_rate": 1.0192615543506326e-05,
"loss": 0.0056,
"step": 19000
},
{
"epoch": 1.5104570100697134,
"grad_norm": 0.07686398923397064,
"learning_rate": 9.934417764007231e-06,
"loss": 0.0057,
"step": 19500
},
{
"epoch": 1.549186676994578,
"grad_norm": 0.184184268116951,
"learning_rate": 9.676219984508134e-06,
"loss": 0.0057,
"step": 20000
},
{
"epoch": 1.5879163439194421,
"grad_norm": 0.16291634738445282,
"learning_rate": 9.418022205009038e-06,
"loss": 0.0054,
"step": 20500
},
{
"epoch": 1.6266460108443068,
"grad_norm": 0.08518970757722855,
"learning_rate": 9.159824425509941e-06,
"loss": 0.0054,
"step": 21000
},
{
"epoch": 1.665375677769171,
"grad_norm": 0.15436352789402008,
"learning_rate": 8.901626646010845e-06,
"loss": 0.0054,
"step": 21500
},
{
"epoch": 1.7041053446940357,
"grad_norm": 0.036033745855093,
"learning_rate": 8.643428866511748e-06,
"loss": 0.0054,
"step": 22000
},
{
"epoch": 1.7428350116189,
"grad_norm": 0.10140281915664673,
"learning_rate": 8.385231087012653e-06,
"loss": 0.0058,
"step": 22500
},
{
"epoch": 1.7815646785437647,
"grad_norm": 0.10392390936613083,
"learning_rate": 8.127549703072554e-06,
"loss": 0.0052,
"step": 23000
},
{
"epoch": 1.820294345468629,
"grad_norm": 0.4044785499572754,
"learning_rate": 7.869351923573459e-06,
"loss": 0.0052,
"step": 23500
},
{
"epoch": 1.8590240123934936,
"grad_norm": 0.04380480572581291,
"learning_rate": 7.611154144074362e-06,
"loss": 0.0054,
"step": 24000
},
{
"epoch": 1.8977536793183578,
"grad_norm": 0.49817517399787903,
"learning_rate": 7.352956364575265e-06,
"loss": 0.005,
"step": 24500
},
{
"epoch": 1.9364833462432223,
"grad_norm": 0.1552819013595581,
"learning_rate": 7.094758585076169e-06,
"loss": 0.0052,
"step": 25000
},
{
"epoch": 1.9752130131680867,
"grad_norm": 0.3041650056838989,
"learning_rate": 6.8365608055770725e-06,
"loss": 0.0052,
"step": 25500
},
{
"epoch": 2.0,
"eval_bleu": 91.3684,
"eval_gen_len": 13.3464,
"eval_loss": 0.0063232011161744595,
"eval_runtime": 76.6579,
"eval_samples_per_second": 598.764,
"eval_steps_per_second": 18.72,
"step": 25820
},
{
"epoch": 2.0139426800929514,
"grad_norm": 0.07072452455759048,
"learning_rate": 6.578363026077976e-06,
"loss": 0.0053,
"step": 26000
},
{
"epoch": 2.0526723470178156,
"grad_norm": 0.08410033583641052,
"learning_rate": 6.32016524657888e-06,
"loss": 0.0043,
"step": 26500
},
{
"epoch": 2.09140201394268,
"grad_norm": 0.10657070577144623,
"learning_rate": 6.061967467079783e-06,
"loss": 0.0043,
"step": 27000
},
{
"epoch": 2.1301316808675446,
"grad_norm": 0.10941112786531448,
"learning_rate": 5.803769687580687e-06,
"loss": 0.0043,
"step": 27500
},
{
"epoch": 2.168861347792409,
"grad_norm": 0.1383851021528244,
"learning_rate": 5.5455719080815905e-06,
"loss": 0.0044,
"step": 28000
},
{
"epoch": 2.2075910147172735,
"grad_norm": 0.09801546484231949,
"learning_rate": 5.287374128582494e-06,
"loss": 0.0043,
"step": 28500
},
{
"epoch": 2.2463206816421377,
"grad_norm": 0.10826662182807922,
"learning_rate": 5.029176349083398e-06,
"loss": 0.0047,
"step": 29000
},
{
"epoch": 2.2850503485670024,
"grad_norm": 0.1411546766757965,
"learning_rate": 4.770978569584302e-06,
"loss": 0.0046,
"step": 29500
},
{
"epoch": 2.3237800154918666,
"grad_norm": 0.1722804307937622,
"learning_rate": 4.512780790085206e-06,
"loss": 0.0044,
"step": 30000
},
{
"epoch": 2.3625096824167313,
"grad_norm": 0.21791347861289978,
"learning_rate": 4.254583010586109e-06,
"loss": 0.0042,
"step": 30500
},
{
"epoch": 2.4012393493415956,
"grad_norm": 0.05044485256075859,
"learning_rate": 3.996385231087013e-06,
"loss": 0.0044,
"step": 31000
},
{
"epoch": 2.4399690162664602,
"grad_norm": 0.16592107713222504,
"learning_rate": 3.738703847146915e-06,
"loss": 0.0043,
"step": 31500
},
{
"epoch": 2.4786986831913245,
"grad_norm": 0.041500113904476166,
"learning_rate": 3.4805060676478185e-06,
"loss": 0.0045,
"step": 32000
},
{
"epoch": 2.517428350116189,
"grad_norm": 0.06472677737474442,
"learning_rate": 3.222308288148722e-06,
"loss": 0.0042,
"step": 32500
},
{
"epoch": 2.5561580170410534,
"grad_norm": 0.17371675372123718,
"learning_rate": 2.9641105086496257e-06,
"loss": 0.0044,
"step": 33000
},
{
"epoch": 2.5948876839659176,
"grad_norm": 0.17464491724967957,
"learning_rate": 2.7059127291505293e-06,
"loss": 0.0042,
"step": 33500
},
{
"epoch": 2.6336173508907823,
"grad_norm": 0.04553200677037239,
"learning_rate": 2.4477149496514333e-06,
"loss": 0.0043,
"step": 34000
},
{
"epoch": 2.672347017815647,
"grad_norm": 0.07096046209335327,
"learning_rate": 2.190033565711335e-06,
"loss": 0.0041,
"step": 34500
},
{
"epoch": 2.7110766847405112,
"grad_norm": 0.06601293385028839,
"learning_rate": 1.931835786212239e-06,
"loss": 0.0043,
"step": 35000
},
{
"epoch": 2.7498063516653755,
"grad_norm": 0.07469964027404785,
"learning_rate": 1.6736380067131424e-06,
"loss": 0.0042,
"step": 35500
},
{
"epoch": 2.78853601859024,
"grad_norm": 0.14455720782279968,
"learning_rate": 1.415440227214046e-06,
"loss": 0.0044,
"step": 36000
},
{
"epoch": 2.827265685515105,
"grad_norm": 0.04146069288253784,
"learning_rate": 1.1577588432739479e-06,
"loss": 0.0043,
"step": 36500
},
{
"epoch": 2.865995352439969,
"grad_norm": 0.038276560604572296,
"learning_rate": 8.995610637748516e-07,
"loss": 0.0044,
"step": 37000
},
{
"epoch": 2.9047250193648333,
"grad_norm": 0.1604069173336029,
"learning_rate": 6.413632842757553e-07,
"loss": 0.0043,
"step": 37500
},
{
"epoch": 2.943454686289698,
"grad_norm": 0.055847618728876114,
"learning_rate": 3.831655047766589e-07,
"loss": 0.0043,
"step": 38000
}
],
"logging_steps": 500,
"max_steps": 38730,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.633798918275072e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}