|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 38, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.6681179725575745, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -0.3981982469558716, |
|
"logits/rejected": 0.007589429616928101, |
|
"logps/chosen": -253.3463134765625, |
|
"logps/rejected": -671.2213745117188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.768585444261873, |
|
"learning_rate": 4.6255428393240354e-07, |
|
"logits/chosen": -0.5132235884666443, |
|
"logits/rejected": -0.01092798262834549, |
|
"logps/chosen": -188.30332946777344, |
|
"logps/rejected": -627.010986328125, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0005456734797917306, |
|
"rewards/margins": 0.004515086766332388, |
|
"rewards/rejected": -0.003969413228332996, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4.366842926411694, |
|
"learning_rate": 2.730670898658255e-07, |
|
"logits/chosen": -0.6587502360343933, |
|
"logits/rejected": -0.0813610702753067, |
|
"logps/chosen": -157.48902893066406, |
|
"logps/rejected": -626.0028686523438, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.002243603579699993, |
|
"rewards/margins": 0.03073681890964508, |
|
"rewards/rejected": -0.03298041969537735, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 4.881380621263082, |
|
"learning_rate": 6.524777069483525e-08, |
|
"logits/chosen": -0.49691787362098694, |
|
"logits/rejected": -0.0660545602440834, |
|
"logps/chosen": -177.71261596679688, |
|
"logps/rejected": -657.294677734375, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.006053019780665636, |
|
"rewards/margins": 0.07823501527309418, |
|
"rewards/rejected": -0.08428805321455002, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 38, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6694252616480777, |
|
"train_runtime": 502.7814, |
|
"train_samples_per_second": 4.799, |
|
"train_steps_per_second": 0.076 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 38, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|