|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": 106.62171173095703, |
|
"eval_logits/rejected": 100.2883529663086, |
|
"eval_logps/chosen": -476.6144104003906, |
|
"eval_logps/rejected": -493.8050231933594, |
|
"eval_loss": 0.5058174729347229, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": -1.1604968309402466, |
|
"eval_rewards/margins": 0.9522747993469238, |
|
"eval_rewards/rejected": -2.11277174949646, |
|
"eval_runtime": 51.3345, |
|
"eval_samples": 750, |
|
"eval_samples_per_second": 14.61, |
|
"eval_steps_per_second": 0.468, |
|
"train_loss": 0.5840168778712933, |
|
"train_runtime": 572.7421, |
|
"train_samples": 6750, |
|
"train_samples_per_second": 11.785, |
|
"train_steps_per_second": 0.091 |
|
} |