w2s_llama3-boolq / trainer_state.json
skrishna's picture
Upload folder using huggingface_hub
f5acdcd verified
{
"best_metric": 0.9030581039755352,
"best_model_checkpoint": "/n/holyscratch01/hlakkaraju_lab/Lab/aaronli/w2s_models/wts/boolq/llama3-8b-instruct/gpt2-xl/002/checkpoint-1000",
"epoch": 3.903680916516389,
"eval_steps": 100,
"global_step": 2300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17,
"learning_rate": 3.265306122448979e-07,
"loss": 1.1307,
"step": 100
},
{
"epoch": 0.17,
"eval_accuracy": 0.6275229357798165,
"eval_loss": 0.7803781032562256,
"eval_runtime": 330.8684,
"eval_samples_per_second": 9.883,
"eval_steps_per_second": 9.883,
"step": 100
},
{
"epoch": 0.34,
"learning_rate": 6.666666666666666e-07,
"loss": 0.5918,
"step": 200
},
{
"epoch": 0.34,
"eval_accuracy": 0.8012232415902141,
"eval_loss": 0.48456913232803345,
"eval_runtime": 330.4617,
"eval_samples_per_second": 9.895,
"eval_steps_per_second": 9.895,
"step": 200
},
{
"epoch": 0.51,
"learning_rate": 1e-06,
"loss": 0.3622,
"step": 300
},
{
"epoch": 0.51,
"eval_accuracy": 0.8660550458715597,
"eval_loss": 0.3509620428085327,
"eval_runtime": 330.4751,
"eval_samples_per_second": 9.895,
"eval_steps_per_second": 9.895,
"step": 300
},
{
"epoch": 0.68,
"learning_rate": 9.622783855149e-07,
"loss": 0.3308,
"step": 400
},
{
"epoch": 0.68,
"eval_accuracy": 0.8902140672782874,
"eval_loss": 0.2842748761177063,
"eval_runtime": 330.8369,
"eval_samples_per_second": 9.884,
"eval_steps_per_second": 9.884,
"step": 400
},
{
"epoch": 0.85,
"learning_rate": 9.245567710298e-07,
"loss": 0.3175,
"step": 500
},
{
"epoch": 0.85,
"eval_accuracy": 0.8889908256880734,
"eval_loss": 0.27379733324050903,
"eval_runtime": 330.6584,
"eval_samples_per_second": 9.889,
"eval_steps_per_second": 9.889,
"step": 500
},
{
"epoch": 1.02,
"learning_rate": 8.868351565447001e-07,
"loss": 0.2717,
"step": 600
},
{
"epoch": 1.02,
"eval_accuracy": 0.8902140672782874,
"eval_loss": 0.38391104340553284,
"eval_runtime": 330.8255,
"eval_samples_per_second": 9.884,
"eval_steps_per_second": 9.884,
"step": 600
},
{
"epoch": 1.19,
"learning_rate": 8.491135420596e-07,
"loss": 0.1692,
"step": 700
},
{
"epoch": 1.19,
"eval_accuracy": 0.8902140672782874,
"eval_loss": 0.4322011470794678,
"eval_runtime": 330.9477,
"eval_samples_per_second": 9.881,
"eval_steps_per_second": 9.881,
"step": 700
},
{
"epoch": 1.36,
"learning_rate": 8.113919275745001e-07,
"loss": 0.2002,
"step": 800
},
{
"epoch": 1.36,
"eval_accuracy": 0.8963302752293578,
"eval_loss": 0.5128087997436523,
"eval_runtime": 330.6728,
"eval_samples_per_second": 9.889,
"eval_steps_per_second": 9.889,
"step": 800
},
{
"epoch": 1.53,
"learning_rate": 7.736703130894002e-07,
"loss": 0.2214,
"step": 900
},
{
"epoch": 1.53,
"eval_accuracy": 0.9,
"eval_loss": 0.3137321174144745,
"eval_runtime": 330.5236,
"eval_samples_per_second": 9.893,
"eval_steps_per_second": 9.893,
"step": 900
},
{
"epoch": 1.7,
"learning_rate": 7.359486986043002e-07,
"loss": 0.1993,
"step": 1000
},
{
"epoch": 1.7,
"eval_accuracy": 0.9030581039755352,
"eval_loss": 0.4319615960121155,
"eval_runtime": 330.874,
"eval_samples_per_second": 9.883,
"eval_steps_per_second": 9.883,
"step": 1000
},
{
"epoch": 1.87,
"learning_rate": 6.982270841192002e-07,
"loss": 0.1967,
"step": 1100
},
{
"epoch": 1.87,
"eval_accuracy": 0.901223241590214,
"eval_loss": 0.43224194645881653,
"eval_runtime": 330.5713,
"eval_samples_per_second": 9.892,
"eval_steps_per_second": 9.892,
"step": 1100
},
{
"epoch": 2.04,
"learning_rate": 6.605054696341003e-07,
"loss": 0.1723,
"step": 1200
},
{
"epoch": 2.04,
"eval_accuracy": 0.9030581039755352,
"eval_loss": 0.56269770860672,
"eval_runtime": 330.8803,
"eval_samples_per_second": 9.883,
"eval_steps_per_second": 9.883,
"step": 1200
},
{
"epoch": 2.21,
"learning_rate": 6.227838551490004e-07,
"loss": 0.0888,
"step": 1300
},
{
"epoch": 2.21,
"eval_accuracy": 0.9027522935779817,
"eval_loss": 0.5143199563026428,
"eval_runtime": 331.0535,
"eval_samples_per_second": 9.878,
"eval_steps_per_second": 9.878,
"step": 1300
},
{
"epoch": 2.38,
"learning_rate": 5.850622406639005e-07,
"loss": 0.057,
"step": 1400
},
{
"epoch": 2.38,
"eval_accuracy": 0.9018348623853211,
"eval_loss": 0.6684749722480774,
"eval_runtime": 330.8995,
"eval_samples_per_second": 9.882,
"eval_steps_per_second": 9.882,
"step": 1400
},
{
"epoch": 2.55,
"learning_rate": 5.473406261788004e-07,
"loss": 0.0556,
"step": 1500
},
{
"epoch": 2.55,
"eval_accuracy": 0.9021406727828746,
"eval_loss": 0.690205454826355,
"eval_runtime": 330.6813,
"eval_samples_per_second": 9.889,
"eval_steps_per_second": 9.889,
"step": 1500
},
{
"epoch": 2.72,
"learning_rate": 5.096190116937005e-07,
"loss": 0.036,
"step": 1600
},
{
"epoch": 2.72,
"eval_accuracy": 0.9030581039755352,
"eval_loss": 0.708600640296936,
"eval_runtime": 330.8665,
"eval_samples_per_second": 9.883,
"eval_steps_per_second": 9.883,
"step": 1600
},
{
"epoch": 2.89,
"learning_rate": 4.718973972086005e-07,
"loss": 0.0529,
"step": 1700
},
{
"epoch": 2.89,
"eval_accuracy": 0.901223241590214,
"eval_loss": 0.6835083365440369,
"eval_runtime": 330.509,
"eval_samples_per_second": 9.894,
"eval_steps_per_second": 9.894,
"step": 1700
},
{
"epoch": 3.06,
"learning_rate": 4.3417578272350054e-07,
"loss": 0.0265,
"step": 1800
},
{
"epoch": 3.06,
"eval_accuracy": 0.9024464831804281,
"eval_loss": 0.7818667888641357,
"eval_runtime": 330.6981,
"eval_samples_per_second": 9.888,
"eval_steps_per_second": 9.888,
"step": 1800
},
{
"epoch": 3.22,
"learning_rate": 3.964541682384006e-07,
"loss": 0.0204,
"step": 1900
},
{
"epoch": 3.22,
"eval_accuracy": 0.901223241590214,
"eval_loss": 0.773235023021698,
"eval_runtime": 330.8228,
"eval_samples_per_second": 9.884,
"eval_steps_per_second": 9.884,
"step": 1900
},
{
"epoch": 3.39,
"learning_rate": 3.5873255375330063e-07,
"loss": 0.0085,
"step": 2000
},
{
"epoch": 3.39,
"eval_accuracy": 0.901223241590214,
"eval_loss": 0.7827399373054504,
"eval_runtime": 330.5421,
"eval_samples_per_second": 9.893,
"eval_steps_per_second": 9.893,
"step": 2000
},
{
"epoch": 3.56,
"learning_rate": 3.2101093926820065e-07,
"loss": 0.0098,
"step": 2100
},
{
"epoch": 3.56,
"eval_accuracy": 0.900611620795107,
"eval_loss": 0.8394690752029419,
"eval_runtime": 330.8326,
"eval_samples_per_second": 9.884,
"eval_steps_per_second": 9.884,
"step": 2100
},
{
"epoch": 3.73,
"learning_rate": 2.832893247831007e-07,
"loss": 0.0059,
"step": 2200
},
{
"epoch": 3.73,
"eval_accuracy": 0.9018348623853211,
"eval_loss": 0.8700089454650879,
"eval_runtime": 330.8106,
"eval_samples_per_second": 9.885,
"eval_steps_per_second": 9.885,
"step": 2200
},
{
"epoch": 3.9,
"learning_rate": 2.4556771029800075e-07,
"loss": 0.002,
"step": 2300
},
{
"epoch": 3.9,
"eval_accuracy": 0.9015290519877676,
"eval_loss": 0.9172555208206177,
"eval_runtime": 330.8933,
"eval_samples_per_second": 9.882,
"eval_steps_per_second": 9.882,
"step": 2300
}
],
"logging_steps": 100,
"max_steps": 2945,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100,
"total_flos": 1.024320316529664e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}