shorecode's picture
Upload folder using huggingface_hub
3ea3464 verified
{
"best_metric": 2.225323438644409,
"best_model_checkpoint": "autotrain-x906d-mvlef/checkpoint-2460",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 2460,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03048780487804878,
"grad_norm": 135.88742065429688,
"learning_rate": 1.016260162601626e-05,
"loss": 30.3396,
"step": 25
},
{
"epoch": 0.06097560975609756,
"grad_norm": 93.93063354492188,
"learning_rate": 2.032520325203252e-05,
"loss": 24.435,
"step": 50
},
{
"epoch": 0.09146341463414634,
"grad_norm": 104.44407653808594,
"learning_rate": 3.048780487804878e-05,
"loss": 14.4152,
"step": 75
},
{
"epoch": 0.12195121951219512,
"grad_norm": 15.064167976379395,
"learning_rate": 4.065040650406504e-05,
"loss": 8.6678,
"step": 100
},
{
"epoch": 0.1524390243902439,
"grad_norm": 8.59422492980957,
"learning_rate": 5.081300813008131e-05,
"loss": 6.2719,
"step": 125
},
{
"epoch": 0.18292682926829268,
"grad_norm": 6.402270793914795,
"learning_rate": 6.097560975609756e-05,
"loss": 5.391,
"step": 150
},
{
"epoch": 0.21341463414634146,
"grad_norm": 7.405852794647217,
"learning_rate": 7.113821138211383e-05,
"loss": 4.9132,
"step": 175
},
{
"epoch": 0.24390243902439024,
"grad_norm": 6.045359134674072,
"learning_rate": 8.130081300813008e-05,
"loss": 4.4981,
"step": 200
},
{
"epoch": 0.27439024390243905,
"grad_norm": 4.673328876495361,
"learning_rate": 9.146341463414635e-05,
"loss": 4.267,
"step": 225
},
{
"epoch": 0.3048780487804878,
"grad_norm": 5.1413774490356445,
"learning_rate": 9.981933152664861e-05,
"loss": 4.0875,
"step": 250
},
{
"epoch": 0.3353658536585366,
"grad_norm": 4.102631092071533,
"learning_rate": 9.869015356820235e-05,
"loss": 3.9819,
"step": 275
},
{
"epoch": 0.36585365853658536,
"grad_norm": 3.946659803390503,
"learning_rate": 9.75609756097561e-05,
"loss": 4.0086,
"step": 300
},
{
"epoch": 0.39634146341463417,
"grad_norm": 5.377846717834473,
"learning_rate": 9.643179765130986e-05,
"loss": 3.7839,
"step": 325
},
{
"epoch": 0.4268292682926829,
"grad_norm": 3.9124577045440674,
"learning_rate": 9.530261969286361e-05,
"loss": 3.6614,
"step": 350
},
{
"epoch": 0.4573170731707317,
"grad_norm": 3.9939420223236084,
"learning_rate": 9.417344173441735e-05,
"loss": 3.7353,
"step": 375
},
{
"epoch": 0.4878048780487805,
"grad_norm": 4.000846862792969,
"learning_rate": 9.30442637759711e-05,
"loss": 3.6826,
"step": 400
},
{
"epoch": 0.5182926829268293,
"grad_norm": 4.859684944152832,
"learning_rate": 9.191508581752484e-05,
"loss": 3.5672,
"step": 425
},
{
"epoch": 0.5487804878048781,
"grad_norm": 3.8994104862213135,
"learning_rate": 9.07859078590786e-05,
"loss": 3.4385,
"step": 450
},
{
"epoch": 0.5792682926829268,
"grad_norm": 5.588184356689453,
"learning_rate": 8.965672990063234e-05,
"loss": 3.4864,
"step": 475
},
{
"epoch": 0.6097560975609756,
"grad_norm": 4.631311416625977,
"learning_rate": 8.852755194218609e-05,
"loss": 3.4949,
"step": 500
},
{
"epoch": 0.6402439024390244,
"grad_norm": 4.662644386291504,
"learning_rate": 8.739837398373984e-05,
"loss": 3.4993,
"step": 525
},
{
"epoch": 0.6707317073170732,
"grad_norm": 4.6329874992370605,
"learning_rate": 8.626919602529358e-05,
"loss": 3.4153,
"step": 550
},
{
"epoch": 0.7012195121951219,
"grad_norm": 5.302168846130371,
"learning_rate": 8.514001806684734e-05,
"loss": 3.4356,
"step": 575
},
{
"epoch": 0.7317073170731707,
"grad_norm": 4.273636341094971,
"learning_rate": 8.401084010840109e-05,
"loss": 3.479,
"step": 600
},
{
"epoch": 0.7621951219512195,
"grad_norm": 3.5382657051086426,
"learning_rate": 8.288166214995484e-05,
"loss": 3.4343,
"step": 625
},
{
"epoch": 0.7926829268292683,
"grad_norm": 3.272901773452759,
"learning_rate": 8.175248419150858e-05,
"loss": 3.3167,
"step": 650
},
{
"epoch": 0.823170731707317,
"grad_norm": 3.156431198120117,
"learning_rate": 8.062330623306234e-05,
"loss": 3.2939,
"step": 675
},
{
"epoch": 0.8536585365853658,
"grad_norm": 4.288792610168457,
"learning_rate": 7.949412827461609e-05,
"loss": 3.2499,
"step": 700
},
{
"epoch": 0.8841463414634146,
"grad_norm": 3.8503577709198,
"learning_rate": 7.836495031616983e-05,
"loss": 3.3326,
"step": 725
},
{
"epoch": 0.9146341463414634,
"grad_norm": 3.282243251800537,
"learning_rate": 7.723577235772358e-05,
"loss": 3.3124,
"step": 750
},
{
"epoch": 0.9451219512195121,
"grad_norm": 3.938842296600342,
"learning_rate": 7.610659439927734e-05,
"loss": 3.2749,
"step": 775
},
{
"epoch": 0.975609756097561,
"grad_norm": 4.175856590270996,
"learning_rate": 7.497741644083109e-05,
"loss": 3.2124,
"step": 800
},
{
"epoch": 1.0,
"eval_gen_len": 19.0,
"eval_loss": 2.420257329940796,
"eval_rouge1": 25.1635,
"eval_rouge2": 23.2382,
"eval_rougeL": 24.6426,
"eval_rougeLsum": 25.0666,
"eval_runtime": 54.8826,
"eval_samples_per_second": 7.47,
"eval_steps_per_second": 1.877,
"step": 820
},
{
"epoch": 1.0060975609756098,
"grad_norm": 3.5674939155578613,
"learning_rate": 7.384823848238483e-05,
"loss": 3.2418,
"step": 825
},
{
"epoch": 1.0365853658536586,
"grad_norm": 4.070037841796875,
"learning_rate": 7.271906052393858e-05,
"loss": 3.2318,
"step": 850
},
{
"epoch": 1.0670731707317074,
"grad_norm": 4.293022632598877,
"learning_rate": 7.158988256549232e-05,
"loss": 3.1837,
"step": 875
},
{
"epoch": 1.0975609756097562,
"grad_norm": 3.463479995727539,
"learning_rate": 7.046070460704606e-05,
"loss": 3.228,
"step": 900
},
{
"epoch": 1.1280487804878048,
"grad_norm": 7.417110443115234,
"learning_rate": 6.933152664859982e-05,
"loss": 3.1462,
"step": 925
},
{
"epoch": 1.1585365853658536,
"grad_norm": 5.171451091766357,
"learning_rate": 6.820234869015357e-05,
"loss": 3.0788,
"step": 950
},
{
"epoch": 1.1890243902439024,
"grad_norm": 3.9318175315856934,
"learning_rate": 6.707317073170732e-05,
"loss": 3.207,
"step": 975
},
{
"epoch": 1.2195121951219512,
"grad_norm": 5.252470016479492,
"learning_rate": 6.594399277326106e-05,
"loss": 3.0539,
"step": 1000
},
{
"epoch": 1.25,
"grad_norm": 3.6664767265319824,
"learning_rate": 6.481481481481482e-05,
"loss": 3.1699,
"step": 1025
},
{
"epoch": 1.2804878048780488,
"grad_norm": 2.665335178375244,
"learning_rate": 6.368563685636857e-05,
"loss": 3.0474,
"step": 1050
},
{
"epoch": 1.3109756097560976,
"grad_norm": 3.2224156856536865,
"learning_rate": 6.255645889792232e-05,
"loss": 3.1378,
"step": 1075
},
{
"epoch": 1.3414634146341464,
"grad_norm": 3.4332261085510254,
"learning_rate": 6.142728093947606e-05,
"loss": 3.1683,
"step": 1100
},
{
"epoch": 1.3719512195121952,
"grad_norm": 4.648336887359619,
"learning_rate": 6.0298102981029816e-05,
"loss": 3.1224,
"step": 1125
},
{
"epoch": 1.4024390243902438,
"grad_norm": 3.557978391647339,
"learning_rate": 5.916892502258356e-05,
"loss": 3.1106,
"step": 1150
},
{
"epoch": 1.4329268292682926,
"grad_norm": 3.8348541259765625,
"learning_rate": 5.803974706413731e-05,
"loss": 3.1733,
"step": 1175
},
{
"epoch": 1.4634146341463414,
"grad_norm": 4.132476329803467,
"learning_rate": 5.6910569105691056e-05,
"loss": 3.0671,
"step": 1200
},
{
"epoch": 1.4939024390243902,
"grad_norm": 3.8571975231170654,
"learning_rate": 5.578139114724481e-05,
"loss": 3.1065,
"step": 1225
},
{
"epoch": 1.524390243902439,
"grad_norm": 3.408566951751709,
"learning_rate": 5.465221318879856e-05,
"loss": 3.1613,
"step": 1250
},
{
"epoch": 1.5548780487804879,
"grad_norm": 3.7324483394622803,
"learning_rate": 5.35230352303523e-05,
"loss": 2.9743,
"step": 1275
},
{
"epoch": 1.5853658536585367,
"grad_norm": 3.1098973751068115,
"learning_rate": 5.2393857271906056e-05,
"loss": 3.1296,
"step": 1300
},
{
"epoch": 1.6158536585365852,
"grad_norm": 5.050591468811035,
"learning_rate": 5.126467931345981e-05,
"loss": 2.9844,
"step": 1325
},
{
"epoch": 1.6463414634146343,
"grad_norm": 3.30641508102417,
"learning_rate": 5.013550135501355e-05,
"loss": 3.0301,
"step": 1350
},
{
"epoch": 1.6768292682926829,
"grad_norm": 4.2086358070373535,
"learning_rate": 4.90063233965673e-05,
"loss": 3.0881,
"step": 1375
},
{
"epoch": 1.7073170731707317,
"grad_norm": 4.049353122711182,
"learning_rate": 4.787714543812105e-05,
"loss": 3.0382,
"step": 1400
},
{
"epoch": 1.7378048780487805,
"grad_norm": 3.4281935691833496,
"learning_rate": 4.6747967479674795e-05,
"loss": 3.0105,
"step": 1425
},
{
"epoch": 1.7682926829268293,
"grad_norm": 3.7564141750335693,
"learning_rate": 4.561878952122855e-05,
"loss": 3.0319,
"step": 1450
},
{
"epoch": 1.798780487804878,
"grad_norm": 4.241165637969971,
"learning_rate": 4.4489611562782295e-05,
"loss": 3.0275,
"step": 1475
},
{
"epoch": 1.8292682926829267,
"grad_norm": 3.024312734603882,
"learning_rate": 4.336043360433605e-05,
"loss": 3.004,
"step": 1500
},
{
"epoch": 1.8597560975609757,
"grad_norm": 3.0586462020874023,
"learning_rate": 4.2231255645889795e-05,
"loss": 2.8741,
"step": 1525
},
{
"epoch": 1.8902439024390243,
"grad_norm": 3.032233953475952,
"learning_rate": 4.110207768744354e-05,
"loss": 3.0305,
"step": 1550
},
{
"epoch": 1.9207317073170733,
"grad_norm": 3.4631378650665283,
"learning_rate": 3.9972899728997295e-05,
"loss": 2.9829,
"step": 1575
},
{
"epoch": 1.951219512195122,
"grad_norm": 3.462908983230591,
"learning_rate": 3.884372177055104e-05,
"loss": 2.9073,
"step": 1600
},
{
"epoch": 1.9817073170731707,
"grad_norm": 3.844022512435913,
"learning_rate": 3.771454381210479e-05,
"loss": 2.943,
"step": 1625
},
{
"epoch": 2.0,
"eval_gen_len": 17.9268,
"eval_loss": 2.2693333625793457,
"eval_rouge1": 24.8213,
"eval_rouge2": 22.8064,
"eval_rougeL": 24.126,
"eval_rougeLsum": 24.7561,
"eval_runtime": 54.0619,
"eval_samples_per_second": 7.584,
"eval_steps_per_second": 1.905,
"step": 1640
},
{
"epoch": 2.0121951219512195,
"grad_norm": 3.2110657691955566,
"learning_rate": 3.6585365853658535e-05,
"loss": 2.9109,
"step": 1650
},
{
"epoch": 2.042682926829268,
"grad_norm": 5.20732307434082,
"learning_rate": 3.545618789521229e-05,
"loss": 2.907,
"step": 1675
},
{
"epoch": 2.073170731707317,
"grad_norm": 3.6366891860961914,
"learning_rate": 3.4327009936766035e-05,
"loss": 2.7614,
"step": 1700
},
{
"epoch": 2.1036585365853657,
"grad_norm": 3.012146472930908,
"learning_rate": 3.319783197831978e-05,
"loss": 2.8866,
"step": 1725
},
{
"epoch": 2.1341463414634148,
"grad_norm": 5.310023307800293,
"learning_rate": 3.2068654019873535e-05,
"loss": 2.9738,
"step": 1750
},
{
"epoch": 2.1646341463414633,
"grad_norm": 3.908693313598633,
"learning_rate": 3.093947606142728e-05,
"loss": 2.989,
"step": 1775
},
{
"epoch": 2.1951219512195124,
"grad_norm": 3.6812493801116943,
"learning_rate": 2.9810298102981032e-05,
"loss": 2.9844,
"step": 1800
},
{
"epoch": 2.225609756097561,
"grad_norm": 3.1762988567352295,
"learning_rate": 2.868112014453478e-05,
"loss": 2.9518,
"step": 1825
},
{
"epoch": 2.2560975609756095,
"grad_norm": 3.526785373687744,
"learning_rate": 2.7551942186088532e-05,
"loss": 3.0312,
"step": 1850
},
{
"epoch": 2.2865853658536586,
"grad_norm": 3.4673197269439697,
"learning_rate": 2.642276422764228e-05,
"loss": 2.9413,
"step": 1875
},
{
"epoch": 2.317073170731707,
"grad_norm": 3.7603211402893066,
"learning_rate": 2.529358626919603e-05,
"loss": 2.8524,
"step": 1900
},
{
"epoch": 2.347560975609756,
"grad_norm": 3.039940357208252,
"learning_rate": 2.4164408310749775e-05,
"loss": 2.9403,
"step": 1925
},
{
"epoch": 2.3780487804878048,
"grad_norm": 4.610980987548828,
"learning_rate": 2.3035230352303525e-05,
"loss": 2.892,
"step": 1950
},
{
"epoch": 2.408536585365854,
"grad_norm": 3.3454501628875732,
"learning_rate": 2.1906052393857275e-05,
"loss": 2.905,
"step": 1975
},
{
"epoch": 2.4390243902439024,
"grad_norm": 2.9850571155548096,
"learning_rate": 2.077687443541102e-05,
"loss": 2.9325,
"step": 2000
},
{
"epoch": 2.4695121951219514,
"grad_norm": 4.394461631774902,
"learning_rate": 1.9647696476964768e-05,
"loss": 2.9112,
"step": 2025
},
{
"epoch": 2.5,
"grad_norm": 4.401582717895508,
"learning_rate": 1.8518518518518518e-05,
"loss": 2.8956,
"step": 2050
},
{
"epoch": 2.5304878048780486,
"grad_norm": 3.5677547454833984,
"learning_rate": 1.7389340560072268e-05,
"loss": 2.9706,
"step": 2075
},
{
"epoch": 2.5609756097560976,
"grad_norm": 3.1517820358276367,
"learning_rate": 1.6260162601626018e-05,
"loss": 2.896,
"step": 2100
},
{
"epoch": 2.591463414634146,
"grad_norm": 5.237459182739258,
"learning_rate": 1.5130984643179767e-05,
"loss": 2.9137,
"step": 2125
},
{
"epoch": 2.6219512195121952,
"grad_norm": 3.8501670360565186,
"learning_rate": 1.4001806684733515e-05,
"loss": 2.9322,
"step": 2150
},
{
"epoch": 2.652439024390244,
"grad_norm": 6.259314060211182,
"learning_rate": 1.2872628726287265e-05,
"loss": 2.8803,
"step": 2175
},
{
"epoch": 2.682926829268293,
"grad_norm": 3.369128942489624,
"learning_rate": 1.1743450767841012e-05,
"loss": 2.9141,
"step": 2200
},
{
"epoch": 2.7134146341463414,
"grad_norm": 3.4941694736480713,
"learning_rate": 1.0614272809394762e-05,
"loss": 2.9613,
"step": 2225
},
{
"epoch": 2.7439024390243905,
"grad_norm": 4.332786560058594,
"learning_rate": 9.48509485094851e-06,
"loss": 2.9518,
"step": 2250
},
{
"epoch": 2.774390243902439,
"grad_norm": 3.981058359146118,
"learning_rate": 8.35591689250226e-06,
"loss": 2.8848,
"step": 2275
},
{
"epoch": 2.8048780487804876,
"grad_norm": 3.464871644973755,
"learning_rate": 7.226738934056007e-06,
"loss": 2.9663,
"step": 2300
},
{
"epoch": 2.8353658536585367,
"grad_norm": 3.324209690093994,
"learning_rate": 6.0975609756097564e-06,
"loss": 2.8891,
"step": 2325
},
{
"epoch": 2.8658536585365852,
"grad_norm": 3.896878480911255,
"learning_rate": 4.968383017163506e-06,
"loss": 2.952,
"step": 2350
},
{
"epoch": 2.8963414634146343,
"grad_norm": 3.644209384918213,
"learning_rate": 3.839205058717254e-06,
"loss": 2.8857,
"step": 2375
},
{
"epoch": 2.926829268292683,
"grad_norm": 3.039807081222534,
"learning_rate": 2.710027100271003e-06,
"loss": 2.8308,
"step": 2400
},
{
"epoch": 2.9573170731707314,
"grad_norm": 3.2632334232330322,
"learning_rate": 1.5808491418247518e-06,
"loss": 2.9224,
"step": 2425
},
{
"epoch": 2.9878048780487805,
"grad_norm": 4.359528064727783,
"learning_rate": 4.5167118337850045e-07,
"loss": 2.901,
"step": 2450
},
{
"epoch": 3.0,
"eval_gen_len": 18.8,
"eval_loss": 2.225323438644409,
"eval_rouge1": 26.024,
"eval_rouge2": 24.1663,
"eval_rougeL": 25.432,
"eval_rougeLsum": 25.9929,
"eval_runtime": 55.2175,
"eval_samples_per_second": 7.425,
"eval_steps_per_second": 1.865,
"step": 2460
}
],
"logging_steps": 25,
"max_steps": 2460,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 27756032163840.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}