|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 468, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10666666666666667, |
|
"grad_norm": 34.43344966418026, |
|
"learning_rate": 4.999373573764186e-07, |
|
"logits/chosen": -1.2502222061157227, |
|
"logits/rejected": -1.2531837224960327, |
|
"logps/chosen": -380.0782165527344, |
|
"logps/rejected": -386.2172546386719, |
|
"loss": 0.8964, |
|
"rewards/accuracies": 0.4950000047683716, |
|
"rewards/chosen": -0.023556923493742943, |
|
"rewards/margins": 0.005924960598349571, |
|
"rewards/rejected": -0.029481882229447365, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21333333333333335, |
|
"grad_norm": 37.45052221475562, |
|
"learning_rate": 4.807012604511541e-07, |
|
"logits/chosen": -1.2312381267547607, |
|
"logits/rejected": -1.2327096462249756, |
|
"logps/chosen": -395.3690490722656, |
|
"logps/rejected": -403.4739990234375, |
|
"loss": 0.8496, |
|
"rewards/accuracies": 0.5987499952316284, |
|
"rewards/chosen": -0.4553294777870178, |
|
"rewards/margins": 0.1346004605293274, |
|
"rewards/rejected": -0.5899299383163452, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 35.36746039819262, |
|
"learning_rate": 4.2971971741276185e-07, |
|
"logits/chosen": -1.260800838470459, |
|
"logits/rejected": -1.2635284662246704, |
|
"logps/chosen": -376.4151306152344, |
|
"logps/rejected": -384.2753601074219, |
|
"loss": 0.8243, |
|
"rewards/accuracies": 0.5987499952316284, |
|
"rewards/chosen": -0.9694927930831909, |
|
"rewards/margins": 0.27553480863571167, |
|
"rewards/rejected": -1.2450276613235474, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 36.27189952514476, |
|
"learning_rate": 3.540079991103235e-07, |
|
"logits/chosen": -1.26079523563385, |
|
"logits/rejected": -1.2639491558074951, |
|
"logps/chosen": -394.5101623535156, |
|
"logps/rejected": -405.01641845703125, |
|
"loss": 0.8048, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2865251302719116, |
|
"rewards/margins": 0.49585381150245667, |
|
"rewards/rejected": -1.7823787927627563, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 32.6541770215758, |
|
"learning_rate": 2.639843506318899e-07, |
|
"logits/chosen": -1.252700686454773, |
|
"logits/rejected": -1.263854742050171, |
|
"logps/chosen": -384.5255126953125, |
|
"logps/rejected": -390.5956726074219, |
|
"loss": 0.7906, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3771264553070068, |
|
"rewards/margins": 0.43737706542015076, |
|
"rewards/rejected": -1.8145036697387695, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 36.7055592501965, |
|
"learning_rate": 1.7203639775848423e-07, |
|
"logits/chosen": -1.2447404861450195, |
|
"logits/rejected": -1.2467286586761475, |
|
"logps/chosen": -384.63165283203125, |
|
"logps/rejected": -393.9804992675781, |
|
"loss": 0.7835, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.325575590133667, |
|
"rewards/margins": 0.5131824016571045, |
|
"rewards/rejected": -1.8387579917907715, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7466666666666667, |
|
"grad_norm": 30.904620054592144, |
|
"learning_rate": 9.081655850224449e-08, |
|
"logits/chosen": -1.2564585208892822, |
|
"logits/rejected": -1.256412386894226, |
|
"logps/chosen": -383.8439025878906, |
|
"logps/rejected": -396.41839599609375, |
|
"loss": 0.7776, |
|
"rewards/accuracies": 0.6775000095367432, |
|
"rewards/chosen": -1.3743709325790405, |
|
"rewards/margins": 0.5735958814620972, |
|
"rewards/rejected": -1.9479665756225586, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 34.514970559024796, |
|
"learning_rate": 3.150101814011136e-08, |
|
"logits/chosen": -1.2832634449005127, |
|
"logits/rejected": -1.2844091653823853, |
|
"logps/chosen": -393.8801574707031, |
|
"logps/rejected": -403.3578186035156, |
|
"loss": 0.7851, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4652782678604126, |
|
"rewards/margins": 0.5471586585044861, |
|
"rewards/rejected": -2.012436866760254, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 33.17626594386728, |
|
"learning_rate": 2.251839967945535e-09, |
|
"logits/chosen": -1.242918610572815, |
|
"logits/rejected": -1.2440977096557617, |
|
"logps/chosen": -381.6874084472656, |
|
"logps/rejected": -393.9809265136719, |
|
"loss": 0.7781, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.4276537895202637, |
|
"rewards/margins": 0.5591089129447937, |
|
"rewards/rejected": -1.9867626428604126, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"step": 468, |
|
"total_flos": 160065019445248.0, |
|
"train_loss": 0.8088664193438668, |
|
"train_runtime": 8283.0284, |
|
"train_samples_per_second": 7.244, |
|
"train_steps_per_second": 0.057 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 468, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 160065019445248.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|