|
{ |
|
"epoch": 1.9936102236421727, |
|
"eval_log_odds_chosen": 1.0634132623672485, |
|
"eval_log_odds_ratio": -0.421150267124176, |
|
"eval_logits/chosen": 35.52544021606445, |
|
"eval_logits/rejected": 34.42332077026367, |
|
"eval_logps/chosen": -0.3376733958721161, |
|
"eval_logps/rejected": -0.8398498296737671, |
|
"eval_loss": 0.6817505359649658, |
|
"eval_nll_loss": 0.6413611173629761, |
|
"eval_rewards/accuracies": 0.8088235259056091, |
|
"eval_rewards/chosen": -0.033767346292734146, |
|
"eval_rewards/margins": 0.0502176471054554, |
|
"eval_rewards/rejected": -0.08398497849702835, |
|
"eval_runtime": 252.7054, |
|
"eval_samples": 5398, |
|
"eval_samples_per_second": 21.361, |
|
"eval_steps_per_second": 0.336, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7846963420892373, |
|
"train_runtime": 5577.3844, |
|
"train_samples": 20000, |
|
"train_samples_per_second": 7.172, |
|
"train_steps_per_second": 0.056 |
|
} |