PEFT
Safetensors
qwen2
alignment-handbook
trl
dpo
Generated from Trainer
khongtrunght's picture
End of training
87bf22a verified
raw
history blame contribute delete
582 Bytes
{
"epoch": 0.9994761655316919,
"eval_logits/chosen": -0.6647697687149048,
"eval_logits/rejected": -0.7098657488822937,
"eval_logps/chosen": -225.6959991455078,
"eval_logps/rejected": -275.41259765625,
"eval_loss": 0.3207552433013916,
"eval_rewards/accuracies": 0.8852941393852234,
"eval_rewards/chosen": 1.798000693321228,
"eval_rewards/margins": 1.842041015625,
"eval_rewards/rejected": -0.044040482491254807,
"eval_runtime": 260.7673,
"eval_samples": 2720,
"eval_samples_per_second": 10.431,
"eval_steps_per_second": 1.304
}