PEFT
Safetensors
qwen2
alignment-handbook
trl
dpo
Generated from Trainer
khongtrunght's picture
End of training
8ece1d4 verified
raw
history blame contribute delete
768 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -0.6185179352760315,
"eval_logits/rejected": -0.6204895973205566,
"eval_logps/chosen": -202.53036499023438,
"eval_logps/rejected": -267.17254638671875,
"eval_loss": 0.3152291178703308,
"eval_rewards/accuracies": 0.8815028667449951,
"eval_rewards/chosen": 1.9960932731628418,
"eval_rewards/margins": 1.7800120115280151,
"eval_rewards/rejected": 0.2160811871290207,
"eval_runtime": 252.3512,
"eval_samples": 2763,
"eval_samples_per_second": 10.949,
"eval_steps_per_second": 1.371,
"total_flos": 0.0,
"train_loss": 0.39260031933687173,
"train_runtime": 7916.79,
"train_samples": 31353,
"train_samples_per_second": 3.96,
"train_steps_per_second": 0.124
}