jfranklin-foundry's picture
Upload folder using huggingface_hub
f95b67e verified
{
"best_metric": 1.3626736402511597,
"best_model_checkpoint": "outputs/checkpoint-453",
"epoch": 12.989247311827956,
"eval_steps": 500,
"global_step": 453,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5734767025089605,
"grad_norm": 1.2556052207946777,
"learning_rate": 4.000000000000001e-06,
"loss": 2.9206,
"step": 20
},
{
"epoch": 0.974910394265233,
"eval_loss": 2.7616007328033447,
"eval_runtime": 12.2966,
"eval_samples_per_second": 30.252,
"eval_steps_per_second": 3.822,
"step": 34
},
{
"epoch": 1.146953405017921,
"grad_norm": 1.4213184118270874,
"learning_rate": 8.000000000000001e-06,
"loss": 2.8502,
"step": 40
},
{
"epoch": 1.7204301075268817,
"grad_norm": 1.9485336542129517,
"learning_rate": 1.2e-05,
"loss": 2.6654,
"step": 60
},
{
"epoch": 1.978494623655914,
"eval_loss": 2.177459478378296,
"eval_runtime": 12.2962,
"eval_samples_per_second": 30.253,
"eval_steps_per_second": 3.822,
"step": 69
},
{
"epoch": 2.293906810035842,
"grad_norm": 0.9282850623130798,
"learning_rate": 1.6000000000000003e-05,
"loss": 2.2325,
"step": 80
},
{
"epoch": 2.867383512544803,
"grad_norm": 0.6272071003913879,
"learning_rate": 2e-05,
"loss": 1.9127,
"step": 100
},
{
"epoch": 2.982078853046595,
"eval_loss": 1.8282170295715332,
"eval_runtime": 12.2659,
"eval_samples_per_second": 30.328,
"eval_steps_per_second": 3.832,
"step": 104
},
{
"epoch": 3.4408602150537635,
"grad_norm": 0.6469711065292358,
"learning_rate": 1.9882804237803487e-05,
"loss": 1.833,
"step": 120
},
{
"epoch": 3.985663082437276,
"eval_loss": 1.7144227027893066,
"eval_runtime": 12.2541,
"eval_samples_per_second": 30.357,
"eval_steps_per_second": 3.835,
"step": 139
},
{
"epoch": 4.014336917562724,
"grad_norm": 0.7177844643592834,
"learning_rate": 1.9533963920549307e-05,
"loss": 1.7656,
"step": 140
},
{
"epoch": 4.587813620071684,
"grad_norm": 0.7868255972862244,
"learning_rate": 1.8961655569610557e-05,
"loss": 1.7057,
"step": 160
},
{
"epoch": 4.989247311827957,
"eval_loss": 1.6268185377120972,
"eval_runtime": 12.2554,
"eval_samples_per_second": 30.354,
"eval_steps_per_second": 3.835,
"step": 174
},
{
"epoch": 5.161290322580645,
"grad_norm": 0.7630313038825989,
"learning_rate": 1.8179293607667177e-05,
"loss": 1.6298,
"step": 180
},
{
"epoch": 5.734767025089606,
"grad_norm": 0.8428413271903992,
"learning_rate": 1.720521593600787e-05,
"loss": 1.5832,
"step": 200
},
{
"epoch": 5.992831541218638,
"eval_loss": 1.5533965826034546,
"eval_runtime": 12.2514,
"eval_samples_per_second": 30.364,
"eval_steps_per_second": 3.836,
"step": 209
},
{
"epoch": 6.308243727598566,
"grad_norm": 0.9982613921165466,
"learning_rate": 1.6062254109666383e-05,
"loss": 1.5144,
"step": 220
},
{
"epoch": 6.881720430107527,
"grad_norm": 0.988570511341095,
"learning_rate": 1.477719818512263e-05,
"loss": 1.4884,
"step": 240
},
{
"epoch": 6.996415770609319,
"eval_loss": 1.4935128688812256,
"eval_runtime": 12.2503,
"eval_samples_per_second": 30.367,
"eval_steps_per_second": 3.837,
"step": 244
},
{
"epoch": 7.455197132616488,
"grad_norm": 0.9964269399642944,
"learning_rate": 1.3380168784085028e-05,
"loss": 1.4513,
"step": 260
},
{
"epoch": 8.0,
"eval_loss": 1.4478424787521362,
"eval_runtime": 12.2711,
"eval_samples_per_second": 30.315,
"eval_steps_per_second": 3.83,
"step": 279
},
{
"epoch": 8.028673835125447,
"grad_norm": 1.040726900100708,
"learning_rate": 1.1903911091646684e-05,
"loss": 1.3805,
"step": 280
},
{
"epoch": 8.602150537634408,
"grad_norm": 1.3048664331436157,
"learning_rate": 1.0383027336900356e-05,
"loss": 1.3677,
"step": 300
},
{
"epoch": 8.974910394265233,
"eval_loss": 1.4132319688796997,
"eval_runtime": 12.2651,
"eval_samples_per_second": 30.33,
"eval_steps_per_second": 3.832,
"step": 313
},
{
"epoch": 9.175627240143369,
"grad_norm": 1.1397103071212769,
"learning_rate": 8.853165746015997e-06,
"loss": 1.3506,
"step": 320
},
{
"epoch": 9.74910394265233,
"grad_norm": 1.4186557531356812,
"learning_rate": 7.350184978033386e-06,
"loss": 1.3173,
"step": 340
},
{
"epoch": 9.978494623655914,
"eval_loss": 1.3905054330825806,
"eval_runtime": 12.26,
"eval_samples_per_second": 30.343,
"eval_steps_per_second": 3.834,
"step": 348
},
{
"epoch": 10.32258064516129,
"grad_norm": 1.2632255554199219,
"learning_rate": 5.9093136282866014e-06,
"loss": 1.3071,
"step": 360
},
{
"epoch": 10.89605734767025,
"grad_norm": 1.3758857250213623,
"learning_rate": 4.56432449998779e-06,
"loss": 1.2863,
"step": 380
},
{
"epoch": 10.982078853046595,
"eval_loss": 1.375404953956604,
"eval_runtime": 12.2609,
"eval_samples_per_second": 30.34,
"eval_steps_per_second": 3.833,
"step": 383
},
{
"epoch": 11.469534050179211,
"grad_norm": 1.365923285484314,
"learning_rate": 3.3467429983443477e-06,
"loss": 1.2785,
"step": 400
},
{
"epoch": 11.985663082437275,
"eval_loss": 1.366598129272461,
"eval_runtime": 12.2562,
"eval_samples_per_second": 30.352,
"eval_steps_per_second": 3.835,
"step": 418
},
{
"epoch": 12.043010752688172,
"grad_norm": 1.328456163406372,
"learning_rate": 2.2851082017805704e-06,
"loss": 1.2641,
"step": 420
},
{
"epoch": 12.616487455197133,
"grad_norm": 1.3476920127868652,
"learning_rate": 1.4043039301279904e-06,
"loss": 1.261,
"step": 440
},
{
"epoch": 12.989247311827956,
"eval_loss": 1.3626736402511597,
"eval_runtime": 12.2637,
"eval_samples_per_second": 30.333,
"eval_steps_per_second": 3.832,
"step": 453
}
],
"logging_steps": 20,
"max_steps": 510,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"total_flos": 2.385554361807667e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}