PEFT
Safetensors
qwen2
alignment-handbook
trl
dpo
Generated from Trainer
File size: 582 Bytes
87bf22a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
{
    "epoch": 0.9994761655316919,
    "eval_logits/chosen": -0.6647697687149048,
    "eval_logits/rejected": -0.7098657488822937,
    "eval_logps/chosen": -225.6959991455078,
    "eval_logps/rejected": -275.41259765625,
    "eval_loss": 0.3207552433013916,
    "eval_rewards/accuracies": 0.8852941393852234,
    "eval_rewards/chosen": 1.798000693321228,
    "eval_rewards/margins": 1.842041015625,
    "eval_rewards/rejected": -0.044040482491254807,
    "eval_runtime": 260.7673,
    "eval_samples": 2720,
    "eval_samples_per_second": 10.431,
    "eval_steps_per_second": 1.304
}