llama2-7b_chat_newsqa_PO_5e-5_4 / trainer_state.json
boyiwei's picture
init
2442366
raw
history blame
2.87 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.968,
"eval_steps": 62,
"global_step": 248,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.24,
"grad_norm": 1.5051484779217006,
"learning_rate": 1.2096774193548388e-05,
"loss": 0.0697,
"step": 15
},
{
"epoch": 0.48,
"grad_norm": 3.6557213411275504,
"learning_rate": 2.4193548387096777e-05,
"loss": 0.0453,
"step": 30
},
{
"epoch": 0.72,
"grad_norm": 5.8190678296309155,
"learning_rate": 3.6290322580645165e-05,
"loss": 0.1379,
"step": 45
},
{
"epoch": 0.96,
"grad_norm": 5.082131701290993,
"learning_rate": 4.8387096774193554e-05,
"loss": 0.3262,
"step": 60
},
{
"epoch": 1.2,
"grad_norm": 4.533397429277955,
"learning_rate": 4.74e-05,
"loss": 0.4413,
"step": 75
},
{
"epoch": 1.44,
"grad_norm": 3.9912559897316915,
"learning_rate": 4.44e-05,
"loss": 0.4631,
"step": 90
},
{
"epoch": 1.6800000000000002,
"grad_norm": 3.605033658976913,
"learning_rate": 4.14e-05,
"loss": 0.3972,
"step": 105
},
{
"epoch": 1.92,
"grad_norm": 3.1324872605719025,
"learning_rate": 3.8400000000000005e-05,
"loss": 0.3926,
"step": 120
},
{
"epoch": 2.16,
"grad_norm": 2.7954439766075647,
"learning_rate": 3.54e-05,
"loss": 0.3268,
"step": 135
},
{
"epoch": 2.4,
"grad_norm": 2.4279604222176263,
"learning_rate": 3.24e-05,
"loss": 0.2419,
"step": 150
},
{
"epoch": 2.64,
"grad_norm": 2.2595799598751447,
"learning_rate": 2.94e-05,
"loss": 0.2212,
"step": 165
},
{
"epoch": 2.88,
"grad_norm": 2.573622729528534,
"learning_rate": 2.64e-05,
"loss": 0.1586,
"step": 180
},
{
"epoch": 3.12,
"grad_norm": 2.2604733828967016,
"learning_rate": 2.3400000000000003e-05,
"loss": 0.1431,
"step": 195
},
{
"epoch": 3.36,
"grad_norm": 1.4987841874421017,
"learning_rate": 2.04e-05,
"loss": 0.1047,
"step": 210
},
{
"epoch": 3.6,
"grad_norm": 0.6724445238800598,
"learning_rate": 1.74e-05,
"loss": 0.0896,
"step": 225
},
{
"epoch": 3.84,
"grad_norm": 1.3552689605969652,
"learning_rate": 1.44e-05,
"loss": 0.0871,
"step": 240
}
],
"logging_steps": 15,
"max_steps": 312,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 62,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}