|
{ |
|
"epoch": 1.0, |
|
"eval_dpo_losses": 0.6513049602508545, |
|
"eval_logits/chosen": -2.831768035888672, |
|
"eval_logits/rejected": -2.788257598876953, |
|
"eval_logps/chosen": -274.86163330078125, |
|
"eval_logps/rejected": -259.8882751464844, |
|
"eval_loss": 1.0971728563308716, |
|
"eval_positive_losses": 4.613582134246826, |
|
"eval_rewards/accuracies": 0.6269841194152832, |
|
"eval_rewards/chosen": 0.10359582304954529, |
|
"eval_rewards/margins": 0.11065211147069931, |
|
"eval_rewards/margins_max": 0.4366241991519928, |
|
"eval_rewards/margins_min": -0.18976463377475739, |
|
"eval_rewards/margins_std": 0.27969247102737427, |
|
"eval_rewards/rejected": -0.007056289818137884, |
|
"eval_runtime": 283.0495, |
|
"eval_samples": 2000, |
|
"eval_samples_per_second": 7.066, |
|
"eval_steps_per_second": 0.223 |
|
} |