PEFT
Safetensors
qwen2
alignment-handbook
trl
dpo
Generated from Trainer
File size: 573 Bytes
017105e
8ece1d4
 
 
 
 
 
 
 
 
 
 
 
 
 
017105e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
{
    "epoch": 1.0,
    "eval_logits/chosen": -0.6185179352760315,
    "eval_logits/rejected": -0.6204895973205566,
    "eval_logps/chosen": -202.53036499023438,
    "eval_logps/rejected": -267.17254638671875,
    "eval_loss": 0.3152291178703308,
    "eval_rewards/accuracies": 0.8815028667449951,
    "eval_rewards/chosen": 1.9960932731628418,
    "eval_rewards/margins": 1.7800120115280151,
    "eval_rewards/rejected": 0.2160811871290207,
    "eval_runtime": 252.3512,
    "eval_samples": 2763,
    "eval_samples_per_second": 10.949,
    "eval_steps_per_second": 1.371
}