smollm2-17b-dpo-cai-v1 / all_results.json
moodlep's picture
End of training
9f43716 verified
raw
history blame contribute delete
793 Bytes
{
"epoch": 0.9980582524271845,
"eval_logits/chosen": -1.39054536819458,
"eval_logits/rejected": -1.4322466850280762,
"eval_logps/chosen": -245.04893493652344,
"eval_logps/rejected": -228.5849151611328,
"eval_loss": 0.6930710077285767,
"eval_rewards/accuracies": 0.43386074900627136,
"eval_rewards/chosen": 2.820852751028724e-05,
"eval_rewards/margins": 0.00016564132238272578,
"eval_rewards/rejected": -0.00013743281306233257,
"eval_runtime": 970.4456,
"eval_samples": 3156,
"eval_samples_per_second": 3.252,
"eval_steps_per_second": 0.407,
"total_flos": 0.0,
"train_loss": 0.6931419428220519,
"train_runtime": 4635.9681,
"train_samples": 4119,
"train_samples_per_second": 0.888,
"train_steps_per_second": 0.055
}