PEFT
Safetensors
qwen2
alignment-handbook
trl
dpo
Generated from Trainer
khongtrunght's picture
End of training
8ece1d4 verified
raw
history blame contribute delete
573 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -0.6185179352760315,
"eval_logits/rejected": -0.6204895973205566,
"eval_logps/chosen": -202.53036499023438,
"eval_logps/rejected": -267.17254638671875,
"eval_loss": 0.3152291178703308,
"eval_rewards/accuracies": 0.8815028667449951,
"eval_rewards/chosen": 1.9960932731628418,
"eval_rewards/margins": 1.7800120115280151,
"eval_rewards/rejected": 0.2160811871290207,
"eval_runtime": 252.3512,
"eval_samples": 2763,
"eval_samples_per_second": 10.949,
"eval_steps_per_second": 1.371
}