PEFT
Safetensors
qwen2
alignment-handbook
trl
dpo
Generated from Trainer
File size: 588 Bytes
017105e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
{
    "epoch": 0.9996824388694824,
    "eval_logits/chosen": -0.7089246511459351,
    "eval_logits/rejected": -0.7224333882331848,
    "eval_logps/chosen": -261.0423889160156,
    "eval_logps/rejected": -333.52276611328125,
    "eval_loss": 0.4344652593135834,
    "eval_rewards/accuracies": 0.8185185194015503,
    "eval_rewards/chosen": 1.3033398389816284,
    "eval_rewards/margins": 1.0257779359817505,
    "eval_rewards/rejected": 0.27756187319755554,
    "eval_runtime": 185.0205,
    "eval_samples": 2155,
    "eval_samples_per_second": 11.647,
    "eval_steps_per_second": 1.459
}