sentiment-pt-pl30-0 / trainer_state.json
apwic's picture
End of training
d907130 verified
raw
history blame
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 4.279876708984375,
"learning_rate": 4.75e-05,
"loss": 0.5457,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7192982456140351,
"eval_f1": 0.6013986013986015,
"eval_loss": 0.4752861261367798,
"eval_precision": 0.6464762782128051,
"eval_recall": 0.5963811602109474,
"eval_runtime": 5.2249,
"eval_samples_per_second": 76.365,
"eval_steps_per_second": 9.57,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 5.457512855529785,
"learning_rate": 4.5e-05,
"loss": 0.4518,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7969924812030075,
"eval_f1": 0.7684901970616256,
"eval_loss": 0.4070126414299011,
"eval_precision": 0.7589055735189347,
"eval_recall": 0.7863702491362066,
"eval_runtime": 5.1546,
"eval_samples_per_second": 77.407,
"eval_steps_per_second": 9.7,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 3.400643825531006,
"learning_rate": 4.25e-05,
"loss": 0.3461,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.8421052631578947,
"eval_f1": 0.7970482088214634,
"eval_loss": 0.3412257730960846,
"eval_precision": 0.8231252179979072,
"eval_recall": 0.7807783233315149,
"eval_runtime": 5.1898,
"eval_samples_per_second": 76.882,
"eval_steps_per_second": 9.634,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 2.3028647899627686,
"learning_rate": 4e-05,
"loss": 0.2958,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8228567054500919,
"eval_loss": 0.3252820372581482,
"eval_precision": 0.8263351692555232,
"eval_recall": 0.8196490270958356,
"eval_runtime": 5.1759,
"eval_samples_per_second": 77.088,
"eval_steps_per_second": 9.66,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 0.7779637575149536,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.2659,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.8822055137844611,
"eval_f1": 0.8560793854229822,
"eval_loss": 0.2941116690635681,
"eval_precision": 0.8609538327526132,
"eval_recall": 0.8516548463356974,
"eval_runtime": 5.1453,
"eval_samples_per_second": 77.546,
"eval_steps_per_second": 9.718,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.210702896118164,
"learning_rate": 3.5e-05,
"loss": 0.2482,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8771929824561403,
"eval_f1": 0.8563451067988272,
"eval_loss": 0.2965351343154907,
"eval_precision": 0.8473119816985988,
"eval_recall": 0.8681123840698308,
"eval_runtime": 5.1607,
"eval_samples_per_second": 77.315,
"eval_steps_per_second": 9.689,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 0.5390880703926086,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.2264,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.87468671679198,
"eval_f1": 0.8530841286673736,
"eval_loss": 0.286903977394104,
"eval_precision": 0.8446597760551249,
"eval_recall": 0.8638388797963266,
"eval_runtime": 5.4309,
"eval_samples_per_second": 73.468,
"eval_steps_per_second": 9.207,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 2.480511426925659,
"learning_rate": 3e-05,
"loss": 0.2218,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.899749373433584,
"eval_f1": 0.8730431462390225,
"eval_loss": 0.2795129120349884,
"eval_precision": 0.8961111111111111,
"eval_recall": 0.8565648299690853,
"eval_runtime": 5.1715,
"eval_samples_per_second": 77.153,
"eval_steps_per_second": 9.668,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 6.716876029968262,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2106,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8922305764411027,
"eval_f1": 0.8715803025426456,
"eval_loss": 0.27050527930259705,
"eval_precision": 0.8673433153814287,
"eval_recall": 0.8762502273140571,
"eval_runtime": 5.1606,
"eval_samples_per_second": 77.317,
"eval_steps_per_second": 9.689,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 3.200309991836548,
"learning_rate": 2.5e-05,
"loss": 0.1981,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.9072681704260651,
"eval_f1": 0.8867007927797945,
"eval_loss": 0.275096595287323,
"eval_precision": 0.89198606271777,
"eval_recall": 0.8818876159301692,
"eval_runtime": 5.1671,
"eval_samples_per_second": 77.22,
"eval_steps_per_second": 9.677,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 2.14013409614563,
"learning_rate": 2.25e-05,
"loss": 0.1802,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.9047619047619048,
"eval_f1": 0.8862394957983193,
"eval_loss": 0.27452006936073303,
"eval_precision": 0.8826476009275225,
"eval_recall": 0.8901163847972358,
"eval_runtime": 5.1559,
"eval_samples_per_second": 77.387,
"eval_steps_per_second": 9.698,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 6.242837905883789,
"learning_rate": 2e-05,
"loss": 0.1828,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.9072681704260651,
"eval_f1": 0.8854915648632926,
"eval_loss": 0.279909610748291,
"eval_precision": 0.8956662848415425,
"eval_recall": 0.8768867066739408,
"eval_runtime": 5.1353,
"eval_samples_per_second": 77.698,
"eval_steps_per_second": 9.737,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 1.8258763551712036,
"learning_rate": 1.75e-05,
"loss": 0.1707,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.9097744360902256,
"eval_f1": 0.8894736842105263,
"eval_loss": 0.27386558055877686,
"eval_precision": 0.8959740990990992,
"eval_recall": 0.8836606655755592,
"eval_runtime": 5.1775,
"eval_samples_per_second": 77.065,
"eval_steps_per_second": 9.657,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 3.6909942626953125,
"learning_rate": 1.5e-05,
"loss": 0.1606,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.9072681704260651,
"eval_f1": 0.888964101175568,
"eval_loss": 0.28680744767189026,
"eval_precision": 0.8862007168458781,
"eval_recall": 0.8918894344426259,
"eval_runtime": 5.1734,
"eval_samples_per_second": 77.126,
"eval_steps_per_second": 9.665,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 0.49116143584251404,
"learning_rate": 1.25e-05,
"loss": 0.1499,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.9022556390977443,
"eval_f1": 0.8817957385392532,
"eval_loss": 0.2929786145687103,
"eval_precision": 0.8827677592299257,
"eval_recall": 0.8808419712675032,
"eval_runtime": 5.1562,
"eval_samples_per_second": 77.383,
"eval_steps_per_second": 9.697,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 1.3892961740493774,
"learning_rate": 1e-05,
"loss": 0.1555,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8947368421052632,
"eval_f1": 0.8760282890453928,
"eval_loss": 0.3041006028652191,
"eval_precision": 0.8682260305697083,
"eval_recall": 0.8855246408437898,
"eval_runtime": 5.1466,
"eval_samples_per_second": 77.527,
"eval_steps_per_second": 9.715,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 3.9728598594665527,
"learning_rate": 7.5e-06,
"loss": 0.1396,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.9022556390977443,
"eval_f1": 0.8823853973772722,
"eval_loss": 0.28764259815216064,
"eval_precision": 0.8814464081066409,
"eval_recall": 0.8833424258956174,
"eval_runtime": 5.1398,
"eval_samples_per_second": 77.63,
"eval_steps_per_second": 9.728,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 3.8131844997406006,
"learning_rate": 5e-06,
"loss": 0.1477,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.9047619047619048,
"eval_f1": 0.8845345436822225,
"eval_loss": 0.28996890783309937,
"eval_precision": 0.8865278628291489,
"eval_recall": 0.8826150209128933,
"eval_runtime": 5.3979,
"eval_samples_per_second": 73.917,
"eval_steps_per_second": 9.263,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 3.289058208465576,
"learning_rate": 2.5e-06,
"loss": 0.1434,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.9047619047619048,
"eval_f1": 0.8851154755410074,
"eval_loss": 0.2917177975177765,
"eval_precision": 0.8851154755410074,
"eval_recall": 0.8851154755410074,
"eval_runtime": 5.1418,
"eval_samples_per_second": 77.6,
"eval_steps_per_second": 9.724,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 2.46004581451416,
"learning_rate": 0.0,
"loss": 0.1386,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.9047619047619048,
"eval_f1": 0.8851154755410074,
"eval_loss": 0.2913039028644562,
"eval_precision": 0.8851154755410074,
"eval_recall": 0.8851154755410074,
"eval_runtime": 5.1424,
"eval_samples_per_second": 77.591,
"eval_steps_per_second": 9.723,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 8444128359504000.0,
"train_loss": 0.22896970373685244,
"train_runtime": 1968.7629,
"train_samples_per_second": 36.957,
"train_steps_per_second": 1.239
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 8444128359504000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}