|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 2721, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11025358324145534, |
|
"grad_norm": 0.8042088747024536, |
|
"learning_rate": 4.816244027930908e-05, |
|
"loss": 5.8614, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2205071664829107, |
|
"grad_norm": 0.7716182470321655, |
|
"learning_rate": 4.6324880558618154e-05, |
|
"loss": 4.914, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33076074972436603, |
|
"grad_norm": 0.7360222935676575, |
|
"learning_rate": 4.448732083792723e-05, |
|
"loss": 4.8208, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4410143329658214, |
|
"grad_norm": 0.6253624558448792, |
|
"learning_rate": 4.264976111723631e-05, |
|
"loss": 4.7569, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5512679162072768, |
|
"grad_norm": 0.8106631636619568, |
|
"learning_rate": 4.081220139654539e-05, |
|
"loss": 4.7186, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6615214994487321, |
|
"grad_norm": 0.7546982765197754, |
|
"learning_rate": 3.897464167585447e-05, |
|
"loss": 4.7019, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7717750826901875, |
|
"grad_norm": 0.6954119205474854, |
|
"learning_rate": 3.713708195516354e-05, |
|
"loss": 4.6666, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8820286659316428, |
|
"grad_norm": 0.8203008770942688, |
|
"learning_rate": 3.529952223447262e-05, |
|
"loss": 4.6249, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9922822491730982, |
|
"grad_norm": 0.6401004791259766, |
|
"learning_rate": 3.34619625137817e-05, |
|
"loss": 4.6244, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1025358324145536, |
|
"grad_norm": 0.7980089783668518, |
|
"learning_rate": 3.162440279309078e-05, |
|
"loss": 4.5987, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.2127894156560088, |
|
"grad_norm": 0.6513141393661499, |
|
"learning_rate": 2.9786843072399855e-05, |
|
"loss": 4.5563, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.3230429988974641, |
|
"grad_norm": 0.7528166770935059, |
|
"learning_rate": 2.7949283351708934e-05, |
|
"loss": 4.5834, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.4332965821389196, |
|
"grad_norm": 0.8176023960113525, |
|
"learning_rate": 2.611172363101801e-05, |
|
"loss": 4.5594, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.543550165380375, |
|
"grad_norm": 0.6479560732841492, |
|
"learning_rate": 2.4274163910327085e-05, |
|
"loss": 4.5616, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.6538037486218302, |
|
"grad_norm": 0.6746203899383545, |
|
"learning_rate": 2.2436604189636164e-05, |
|
"loss": 4.5177, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.7640573318632855, |
|
"grad_norm": 0.6707162857055664, |
|
"learning_rate": 2.059904446894524e-05, |
|
"loss": 4.5275, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.8743109151047408, |
|
"grad_norm": 0.6458820700645447, |
|
"learning_rate": 1.876148474825432e-05, |
|
"loss": 4.5453, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.9845644983461963, |
|
"grad_norm": 0.6533669829368591, |
|
"learning_rate": 1.6923925027563398e-05, |
|
"loss": 4.5206, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.0948180815876514, |
|
"grad_norm": 0.7627405524253845, |
|
"learning_rate": 1.5086365306872474e-05, |
|
"loss": 4.5118, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.205071664829107, |
|
"grad_norm": 0.7460177540779114, |
|
"learning_rate": 1.3248805586181551e-05, |
|
"loss": 4.4961, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.3153252480705624, |
|
"grad_norm": 0.6778430938720703, |
|
"learning_rate": 1.1411245865490629e-05, |
|
"loss": 4.4962, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.4255788313120177, |
|
"grad_norm": 0.6369034647941589, |
|
"learning_rate": 9.573686144799706e-06, |
|
"loss": 4.4971, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.535832414553473, |
|
"grad_norm": 0.6314408779144287, |
|
"learning_rate": 7.736126424108783e-06, |
|
"loss": 4.5085, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.6460859977949283, |
|
"grad_norm": 0.6529719233512878, |
|
"learning_rate": 5.8985667034178614e-06, |
|
"loss": 4.4746, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.7563395810363835, |
|
"grad_norm": 0.6391235589981079, |
|
"learning_rate": 4.061006982726939e-06, |
|
"loss": 4.4794, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.8665931642778393, |
|
"grad_norm": 0.8441604971885681, |
|
"learning_rate": 2.223447262036016e-06, |
|
"loss": 4.4858, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.9768467475192946, |
|
"grad_norm": 0.644396960735321, |
|
"learning_rate": 3.858875413450937e-07, |
|
"loss": 4.5054, |
|
"step": 2700 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2721, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1472045103710208.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|