|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 50, |
|
"global_step": 32, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.25e-07, |
|
"logits/generated": -2.9780185222625732, |
|
"logits/real": -2.891969919204712, |
|
"logps/generated": -75.28430938720703, |
|
"logps/real": -112.39949035644531, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.928571428571428e-07, |
|
"logits/generated": -3.0109641551971436, |
|
"logits/real": -2.735833168029785, |
|
"logps/generated": -129.67689514160156, |
|
"logps/real": -122.11038970947266, |
|
"loss": 0.1661, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -2.9385464191436768, |
|
"rewards/margins": 3.736133098602295, |
|
"rewards/real": 0.7975864410400391, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.1428571428571426e-07, |
|
"logits/generated": -2.9212605953216553, |
|
"logits/real": -2.739665985107422, |
|
"logps/generated": -155.23023986816406, |
|
"logps/real": -139.42665100097656, |
|
"loss": 0.0224, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.471407890319824, |
|
"rewards/margins": 6.653175354003906, |
|
"rewards/real": 1.1817679405212402, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.571428571428571e-08, |
|
"logits/generated": -2.919872760772705, |
|
"logits/real": -2.64644455909729, |
|
"logps/generated": -149.81588745117188, |
|
"logps/real": -110.94522857666016, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.761648178100586, |
|
"rewards/margins": 7.198714256286621, |
|
"rewards/real": 1.4370663166046143, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 32, |
|
"total_flos": 0.0, |
|
"train_loss": 0.07844111844315194, |
|
"train_runtime": 153.525, |
|
"train_samples_per_second": 6.514, |
|
"train_steps_per_second": 0.208 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 32, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|