sentiment-base-0 / trainer_state.json
apwic's picture
End of training
b77b690 verified
raw
history blame
10.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 3.0131800174713135,
"learning_rate": 4.75e-05,
"loss": 0.5623,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7167919799498746,
"eval_f1": 0.5794790005316321,
"eval_loss": 0.5053456425666809,
"eval_precision": 0.6409822866344606,
"eval_recall": 0.5796053827968721,
"eval_runtime": 5.6071,
"eval_samples_per_second": 71.159,
"eval_steps_per_second": 8.917,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 5.634490966796875,
"learning_rate": 4.5e-05,
"loss": 0.518,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7293233082706767,
"eval_f1": 0.599784530797236,
"eval_loss": 0.4860531687736511,
"eval_precision": 0.6673625792811839,
"eval_recall": 0.5959719949081652,
"eval_runtime": 5.7755,
"eval_samples_per_second": 69.085,
"eval_steps_per_second": 8.657,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 1.959808111190796,
"learning_rate": 4.25e-05,
"loss": 0.4835,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.7694235588972431,
"eval_f1": 0.7145034843205575,
"eval_loss": 0.45518842339515686,
"eval_precision": 0.7210824478299833,
"eval_recall": 0.7093562465902892,
"eval_runtime": 5.2584,
"eval_samples_per_second": 75.878,
"eval_steps_per_second": 9.509,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 4.635169506072998,
"learning_rate": 4e-05,
"loss": 0.4497,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.7944862155388471,
"eval_f1": 0.7520912893253319,
"eval_loss": 0.4223441481590271,
"eval_precision": 0.7520912893253319,
"eval_recall": 0.7520912893253319,
"eval_runtime": 5.0487,
"eval_samples_per_second": 79.03,
"eval_steps_per_second": 9.903,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 8.219679832458496,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4266,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.8170426065162907,
"eval_f1": 0.7740779522978476,
"eval_loss": 0.399569034576416,
"eval_precision": 0.7814051164566629,
"eval_recall": 0.7680487361338425,
"eval_runtime": 5.0767,
"eval_samples_per_second": 78.595,
"eval_steps_per_second": 9.849,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 4.150725841522217,
"learning_rate": 3.5e-05,
"loss": 0.3907,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8195488721804511,
"eval_f1": 0.784453781512605,
"eval_loss": 0.3830115497112274,
"eval_precision": 0.7818241274748796,
"eval_recall": 0.787324968176032,
"eval_runtime": 5.0718,
"eval_samples_per_second": 78.67,
"eval_steps_per_second": 9.858,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 3.297985076904297,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.3742,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8345864661654135,
"eval_f1": 0.798423147581139,
"eval_loss": 0.3684135675430298,
"eval_precision": 0.8016430472182685,
"eval_recall": 0.7954628114202582,
"eval_runtime": 5.0743,
"eval_samples_per_second": 78.632,
"eval_steps_per_second": 9.854,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 8.395323753356934,
"learning_rate": 3e-05,
"loss": 0.3616,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8270676691729323,
"eval_f1": 0.7967966933608887,
"eval_loss": 0.3719731867313385,
"eval_precision": 0.7902444649446494,
"eval_recall": 0.8051463902527732,
"eval_runtime": 5.0484,
"eval_samples_per_second": 79.035,
"eval_steps_per_second": 9.904,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 3.748974561691284,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.3294,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8370927318295739,
"eval_f1": 0.8076965854743632,
"eval_loss": 0.36888691782951355,
"eval_precision": 0.8018925518925519,
"eval_recall": 0.8147390434624477,
"eval_runtime": 5.0543,
"eval_samples_per_second": 78.943,
"eval_steps_per_second": 9.893,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 9.309541702270508,
"learning_rate": 2.5e-05,
"loss": 0.3207,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8395989974937343,
"eval_f1": 0.8110907261644079,
"eval_loss": 0.36315786838531494,
"eval_precision": 0.8046983557202408,
"eval_recall": 0.819012547735952,
"eval_runtime": 5.0709,
"eval_samples_per_second": 78.684,
"eval_steps_per_second": 9.86,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 1.2568168640136719,
"learning_rate": 2.25e-05,
"loss": 0.3214,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8370927318295739,
"eval_f1": 0.8085765951950401,
"eval_loss": 0.3577338457107544,
"eval_precision": 0.8017470018450185,
"eval_recall": 0.817239498090562,
"eval_runtime": 5.1071,
"eval_samples_per_second": 78.126,
"eval_steps_per_second": 9.79,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 1.915198802947998,
"learning_rate": 2e-05,
"loss": 0.3167,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8395989974937343,
"eval_f1": 0.8119476846942383,
"eval_loss": 0.36069995164871216,
"eval_precision": 0.8045650301464256,
"eval_recall": 0.8215130023640662,
"eval_runtime": 5.0598,
"eval_samples_per_second": 78.857,
"eval_steps_per_second": 9.882,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 1.9545631408691406,
"learning_rate": 1.75e-05,
"loss": 0.289,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8345864661654135,
"eval_f1": 0.8060710498409331,
"eval_loss": 0.3684280812740326,
"eval_precision": 0.7988372093023256,
"eval_recall": 0.8154664484451719,
"eval_runtime": 5.1019,
"eval_samples_per_second": 78.206,
"eval_steps_per_second": 9.8,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 5.748187065124512,
"learning_rate": 1.5e-05,
"loss": 0.2997,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.849624060150376,
"eval_f1": 0.8176861216035092,
"eval_loss": 0.3479882776737213,
"eval_precision": 0.8193355786895284,
"eval_recall": 0.8161029278050556,
"eval_runtime": 5.0557,
"eval_samples_per_second": 78.92,
"eval_steps_per_second": 9.89,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 4.010083198547363,
"learning_rate": 1.25e-05,
"loss": 0.2986,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.849624060150376,
"eval_f1": 0.821236559139785,
"eval_loss": 0.35758015513420105,
"eval_precision": 0.8169406150583245,
"eval_recall": 0.8261047463175123,
"eval_runtime": 5.0955,
"eval_samples_per_second": 78.304,
"eval_steps_per_second": 9.813,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 0.9220337271690369,
"learning_rate": 1e-05,
"loss": 0.2914,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.849624060150376,
"eval_f1": 0.8195005730140539,
"eval_loss": 0.34965991973876953,
"eval_precision": 0.8179621848739496,
"eval_recall": 0.8211038370612839,
"eval_runtime": 5.0617,
"eval_samples_per_second": 78.827,
"eval_steps_per_second": 9.878,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 1.7026562690734863,
"learning_rate": 7.5e-06,
"loss": 0.278,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8521303258145363,
"eval_f1": 0.8229427559286084,
"eval_loss": 0.3539772927761078,
"eval_precision": 0.8206541218637993,
"eval_recall": 0.8253773413347881,
"eval_runtime": 5.1199,
"eval_samples_per_second": 77.931,
"eval_steps_per_second": 9.766,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 5.839470863342285,
"learning_rate": 5e-06,
"loss": 0.2887,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8521303258145363,
"eval_f1": 0.8229427559286084,
"eval_loss": 0.35161107778549194,
"eval_precision": 0.8206541218637993,
"eval_recall": 0.8253773413347881,
"eval_runtime": 5.1154,
"eval_samples_per_second": 77.999,
"eval_steps_per_second": 9.774,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 7.782900810241699,
"learning_rate": 2.5e-06,
"loss": 0.2829,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8521303258145363,
"eval_f1": 0.8229427559286084,
"eval_loss": 0.35369938611984253,
"eval_precision": 0.8206541218637993,
"eval_recall": 0.8253773413347881,
"eval_runtime": 5.0565,
"eval_samples_per_second": 78.908,
"eval_steps_per_second": 9.888,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 5.059621334075928,
"learning_rate": 0.0,
"loss": 0.2771,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8263655462184873,
"eval_loss": 0.35401326417922974,
"eval_precision": 0.8233396753671443,
"eval_recall": 0.8296508456082925,
"eval_runtime": 5.0854,
"eval_samples_per_second": 78.459,
"eval_steps_per_second": 9.832,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7597037114448000.0,
"train_loss": 0.35800845193081215,
"train_runtime": 2113.1391,
"train_samples_per_second": 34.432,
"train_steps_per_second": 1.155
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7597037114448000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}