|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9931869795609387, |
|
"eval_steps": 100, |
|
"global_step": 82, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.555555555555555e-07, |
|
"logits/chosen": -2.692568302154541, |
|
"logits/rejected": -2.667994260787964, |
|
"logps/chosen": -299.3318176269531, |
|
"logps/rejected": -311.19903564453125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9976852877555755e-06, |
|
"logits/chosen": -2.7045490741729736, |
|
"logits/rejected": -2.6807374954223633, |
|
"logps/chosen": -274.0246276855469, |
|
"logps/rejected": -284.09259033203125, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": 0.007271192967891693, |
|
"rewards/margins": 0.002095710253342986, |
|
"rewards/rejected": 0.005175482481718063, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.725068941086693e-06, |
|
"logits/chosen": -2.6655147075653076, |
|
"logits/rejected": -2.6132850646972656, |
|
"logps/chosen": -288.611328125, |
|
"logps/rejected": -283.16064453125, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.598437488079071, |
|
"rewards/chosen": 0.11068712174892426, |
|
"rewards/margins": 0.026317646726965904, |
|
"rewards/rejected": 0.0843694880604744, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.046678508156259e-06, |
|
"logits/chosen": -2.6170387268066406, |
|
"logits/rejected": -2.5729379653930664, |
|
"logps/chosen": -272.33197021484375, |
|
"logps/rejected": -286.86785888671875, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": 0.1085876002907753, |
|
"rewards/margins": 0.06517539173364639, |
|
"rewards/rejected": 0.04341219738125801, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0862284739260247e-06, |
|
"logits/chosen": -2.555568218231201, |
|
"logits/rejected": -2.5221917629241943, |
|
"logps/chosen": -302.2290954589844, |
|
"logps/rejected": -319.53826904296875, |
|
"loss": 0.6616, |
|
"rewards/accuracies": 0.6234375238418579, |
|
"rewards/chosen": 0.04670686274766922, |
|
"rewards/margins": 0.09154415130615234, |
|
"rewards/rejected": -0.04483727738261223, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0188710450729255e-06, |
|
"logits/chosen": -2.580871105194092, |
|
"logits/rejected": -2.525937557220459, |
|
"logps/chosen": -271.65765380859375, |
|
"logps/rejected": -297.47052001953125, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.07181344926357269, |
|
"rewards/margins": 0.09304286539554596, |
|
"rewards/rejected": -0.021229416131973267, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0392545659290789e-06, |
|
"logits/chosen": -2.575265407562256, |
|
"logits/rejected": -2.492567539215088, |
|
"logps/chosen": -287.5987854003906, |
|
"logps/rejected": -283.6988830566406, |
|
"loss": 0.6442, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": 0.09387712180614471, |
|
"rewards/margins": 0.14719055593013763, |
|
"rewards/rejected": -0.05331344157457352, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.260265266334725e-07, |
|
"logits/chosen": -2.5620856285095215, |
|
"logits/rejected": -2.51887845993042, |
|
"logps/chosen": -289.19384765625, |
|
"logps/rejected": -305.2491760253906, |
|
"loss": 0.6476, |
|
"rewards/accuracies": 0.6078125238418579, |
|
"rewards/chosen": 0.042650334537029266, |
|
"rewards/margins": 0.11376988887786865, |
|
"rewards/rejected": -0.07111954689025879, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.25456266348046e-09, |
|
"logits/chosen": -2.559069871902466, |
|
"logits/rejected": -2.489203929901123, |
|
"logps/chosen": -297.2583923339844, |
|
"logps/rejected": -301.9067687988281, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.6734374761581421, |
|
"rewards/chosen": 0.04438754543662071, |
|
"rewards/margins": 0.13041387498378754, |
|
"rewards/rejected": -0.08602632582187653, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 82, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6611525947000922, |
|
"train_runtime": 6866.2393, |
|
"train_samples_per_second": 3.078, |
|
"train_steps_per_second": 0.012 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 82, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|