felipeoes's picture
Model save
0890c91 verified
{
"best_metric": 0.4858,
"best_model_checkpoint": "runs/cocoruta2-llama3-1-8b-regex-only-valid/checkpoint-350",
"epoch": 0.9992542878448919,
"eval_steps": 25,
"global_step": 670,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.037285607755406416,
"grad_norm": 0.5538629931679852,
"learning_rate": 7.46268656716418e-05,
"loss": 0.8987,
"step": 25
},
{
"epoch": 0.037285607755406416,
"eval_loss": 0.6527890563011169,
"eval_runtime": 4.7198,
"eval_samples_per_second": 1.059,
"eval_steps_per_second": 0.636,
"step": 25
},
{
"epoch": 0.07457121551081283,
"grad_norm": 0.2341721772038344,
"learning_rate": 0.0001492537313432836,
"loss": 0.6394,
"step": 50
},
{
"epoch": 0.07457121551081283,
"eval_loss": 0.5486425757408142,
"eval_runtime": 4.7295,
"eval_samples_per_second": 1.057,
"eval_steps_per_second": 0.634,
"step": 50
},
{
"epoch": 0.11185682326621924,
"grad_norm": 0.19912920431393782,
"learning_rate": 0.00019991315351855748,
"loss": 0.6007,
"step": 75
},
{
"epoch": 0.11185682326621924,
"eval_loss": 0.5331323146820068,
"eval_runtime": 4.6724,
"eval_samples_per_second": 1.07,
"eval_steps_per_second": 0.642,
"step": 75
},
{
"epoch": 0.14914243102162567,
"grad_norm": 0.22498739924316974,
"learning_rate": 0.0001985256759242359,
"loss": 0.5696,
"step": 100
},
{
"epoch": 0.14914243102162567,
"eval_loss": 0.49856358766555786,
"eval_runtime": 4.7327,
"eval_samples_per_second": 1.056,
"eval_steps_per_second": 0.634,
"step": 100
},
{
"epoch": 0.18642803877703207,
"grad_norm": 0.14018935803156768,
"learning_rate": 0.00019546910545535558,
"loss": 0.5523,
"step": 125
},
{
"epoch": 0.18642803877703207,
"eval_loss": 0.48138752579689026,
"eval_runtime": 4.6757,
"eval_samples_per_second": 1.069,
"eval_steps_per_second": 0.642,
"step": 125
},
{
"epoch": 0.22371364653243847,
"grad_norm": 0.16571925714609734,
"learning_rate": 0.00019079522252288386,
"loss": 0.5779,
"step": 150
},
{
"epoch": 0.22371364653243847,
"eval_loss": 0.472788006067276,
"eval_runtime": 4.7083,
"eval_samples_per_second": 1.062,
"eval_steps_per_second": 0.637,
"step": 150
},
{
"epoch": 0.2609992542878449,
"grad_norm": 0.14926948884435004,
"learning_rate": 0.00018458320592590975,
"loss": 0.5351,
"step": 175
},
{
"epoch": 0.2609992542878449,
"eval_loss": 0.467672735452652,
"eval_runtime": 4.6956,
"eval_samples_per_second": 1.065,
"eval_steps_per_second": 0.639,
"step": 175
},
{
"epoch": 0.29828486204325133,
"grad_norm": 0.13621513300078802,
"learning_rate": 0.00017693829150820068,
"loss": 0.5251,
"step": 200
},
{
"epoch": 0.29828486204325133,
"eval_loss": 0.4527561664581299,
"eval_runtime": 4.7174,
"eval_samples_per_second": 1.06,
"eval_steps_per_second": 0.636,
"step": 200
},
{
"epoch": 0.33557046979865773,
"grad_norm": 0.14343687732505184,
"learning_rate": 0.00016798998939045895,
"loss": 0.5301,
"step": 225
},
{
"epoch": 0.33557046979865773,
"eval_loss": 0.45074066519737244,
"eval_runtime": 4.7651,
"eval_samples_per_second": 1.049,
"eval_steps_per_second": 0.63,
"step": 225
},
{
"epoch": 0.37285607755406414,
"grad_norm": 0.14384455598548593,
"learning_rate": 0.00015788988997959114,
"loss": 0.5554,
"step": 250
},
{
"epoch": 0.37285607755406414,
"eval_loss": 0.45150551199913025,
"eval_runtime": 4.7234,
"eval_samples_per_second": 1.059,
"eval_steps_per_second": 0.635,
"step": 250
},
{
"epoch": 0.41014168530947054,
"grad_norm": 0.11661007763800872,
"learning_rate": 0.0001468090959227082,
"loss": 0.5198,
"step": 275
},
{
"epoch": 0.41014168530947054,
"eval_loss": 0.4460136294364929,
"eval_runtime": 4.7242,
"eval_samples_per_second": 1.058,
"eval_steps_per_second": 0.635,
"step": 275
},
{
"epoch": 0.44742729306487694,
"grad_norm": 0.1321962416990341,
"learning_rate": 0.0001349353235103232,
"loss": 0.5484,
"step": 300
},
{
"epoch": 0.44742729306487694,
"eval_loss": 0.4421899914741516,
"eval_runtime": 4.7511,
"eval_samples_per_second": 1.052,
"eval_steps_per_second": 0.631,
"step": 300
},
{
"epoch": 0.48471290082028334,
"grad_norm": 0.13900994098830932,
"learning_rate": 0.0001224697226329772,
"loss": 0.5223,
"step": 325
},
{
"epoch": 0.48471290082028334,
"eval_loss": 0.4396364092826843,
"eval_runtime": 4.705,
"eval_samples_per_second": 1.063,
"eval_steps_per_second": 0.638,
"step": 325
},
{
"epoch": 0.5219985085756897,
"grad_norm": 0.15825647169367713,
"learning_rate": 0.00010962346916341903,
"loss": 0.4858,
"step": 350
},
{
"epoch": 0.5219985085756897,
"eval_loss": 0.43353357911109924,
"eval_runtime": 4.7849,
"eval_samples_per_second": 1.045,
"eval_steps_per_second": 0.627,
"step": 350
},
{
"epoch": 0.5592841163310962,
"grad_norm": 0.11937908855087626,
"learning_rate": 9.661418749173467e-05,
"loss": 0.5051,
"step": 375
},
{
"epoch": 0.5592841163310962,
"eval_loss": 0.42787012457847595,
"eval_runtime": 4.7427,
"eval_samples_per_second": 1.054,
"eval_steps_per_second": 0.633,
"step": 375
},
{
"epoch": 0.5965697240865027,
"grad_norm": 0.1236665974221879,
"learning_rate": 8.366226381814697e-05,
"loss": 0.489,
"step": 400
},
{
"epoch": 0.5965697240865027,
"eval_loss": 0.4265735149383545,
"eval_runtime": 4.7586,
"eval_samples_per_second": 1.051,
"eval_steps_per_second": 0.63,
"step": 400
},
{
"epoch": 0.633855331841909,
"grad_norm": 0.1340681564316269,
"learning_rate": 7.09871126588481e-05,
"loss": 0.4992,
"step": 425
},
{
"epoch": 0.633855331841909,
"eval_loss": 0.41966643929481506,
"eval_runtime": 4.725,
"eval_samples_per_second": 1.058,
"eval_steps_per_second": 0.635,
"step": 425
},
{
"epoch": 0.6711409395973155,
"grad_norm": 0.14955138964358772,
"learning_rate": 5.880345981282876e-05,
"loss": 0.4985,
"step": 450
},
{
"epoch": 0.6711409395973155,
"eval_loss": 0.41805362701416016,
"eval_runtime": 4.6928,
"eval_samples_per_second": 1.065,
"eval_steps_per_second": 0.639,
"step": 450
},
{
"epoch": 0.7084265473527218,
"grad_norm": 0.13281003649358977,
"learning_rate": 4.7317704758809946e-05,
"loss": 0.511,
"step": 475
},
{
"epoch": 0.7084265473527218,
"eval_loss": 0.4160347878932953,
"eval_runtime": 4.6998,
"eval_samples_per_second": 1.064,
"eval_steps_per_second": 0.638,
"step": 475
},
{
"epoch": 0.7457121551081283,
"grad_norm": 0.13212294340504707,
"learning_rate": 3.672442410577965e-05,
"loss": 0.5103,
"step": 500
},
{
"epoch": 0.7457121551081283,
"eval_loss": 0.41526561975479126,
"eval_runtime": 4.7196,
"eval_samples_per_second": 1.059,
"eval_steps_per_second": 0.636,
"step": 500
},
{
"epoch": 0.7829977628635347,
"grad_norm": 0.13255473695033904,
"learning_rate": 2.7203075331094017e-05,
"loss": 0.4953,
"step": 525
},
{
"epoch": 0.7829977628635347,
"eval_loss": 0.41363996267318726,
"eval_runtime": 4.6994,
"eval_samples_per_second": 1.064,
"eval_steps_per_second": 0.638,
"step": 525
},
{
"epoch": 0.8202833706189411,
"grad_norm": 0.11553089711715571,
"learning_rate": 1.89149566470915e-05,
"loss": 0.5017,
"step": 550
},
{
"epoch": 0.8202833706189411,
"eval_loss": 0.4126836359500885,
"eval_runtime": 4.7366,
"eval_samples_per_second": 1.056,
"eval_steps_per_second": 0.633,
"step": 550
},
{
"epoch": 0.8575689783743475,
"grad_norm": 0.1205660312265152,
"learning_rate": 1.2000474498175552e-05,
"loss": 0.488,
"step": 575
},
{
"epoch": 0.8575689783743475,
"eval_loss": 0.41148170828819275,
"eval_runtime": 4.6986,
"eval_samples_per_second": 1.064,
"eval_steps_per_second": 0.638,
"step": 575
},
{
"epoch": 0.8948545861297539,
"grad_norm": 0.11357983134630366,
"learning_rate": 6.576764978849004e-06,
"loss": 0.4862,
"step": 600
},
{
"epoch": 0.8948545861297539,
"eval_loss": 0.4110669493675232,
"eval_runtime": 4.7813,
"eval_samples_per_second": 1.046,
"eval_steps_per_second": 0.627,
"step": 600
},
{
"epoch": 0.9321401938851603,
"grad_norm": 0.11689127563900782,
"learning_rate": 2.735709467518699e-06,
"loss": 0.4866,
"step": 625
},
{
"epoch": 0.9321401938851603,
"eval_loss": 0.41068965196609497,
"eval_runtime": 4.7375,
"eval_samples_per_second": 1.055,
"eval_steps_per_second": 0.633,
"step": 625
},
{
"epoch": 0.9694258016405667,
"grad_norm": 0.12155258793935453,
"learning_rate": 5.42378092601481e-07,
"loss": 0.4902,
"step": 650
},
{
"epoch": 0.9694258016405667,
"eval_loss": 0.4103812277317047,
"eval_runtime": 4.6763,
"eval_samples_per_second": 1.069,
"eval_steps_per_second": 0.642,
"step": 650
},
{
"epoch": 0.9992542878448919,
"step": 670,
"total_flos": 7.44129634982953e+16,
"train_loss": 0.0,
"train_runtime": 12.3404,
"train_samples_per_second": 434.426,
"train_steps_per_second": 54.293
}
],
"logging_steps": 25,
"max_steps": 670,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.44129634982953e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}