|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.992, |
|
"eval_steps": 200, |
|
"global_step": 62, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 107.88710762985947, |
|
"learning_rate": 7.142857142857142e-08, |
|
"logits/generated": -2.752808094024658, |
|
"logits/real": -2.2284693717956543, |
|
"logps/generated": -121.13522338867188, |
|
"logps/real": -153.264892578125, |
|
"loss": 0.8501, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 91.94956537605988, |
|
"learning_rate": 4.727272727272727e-07, |
|
"logits/generated": -2.6014535427093506, |
|
"logits/real": -2.6951539516448975, |
|
"logps/generated": -133.1143035888672, |
|
"logps/real": -140.10543823242188, |
|
"loss": 0.7861, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/generated": 0.36728113889694214, |
|
"rewards/margins": 0.08309322595596313, |
|
"rewards/real": 0.45037439465522766, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 81.49584102213795, |
|
"learning_rate": 3.818181818181818e-07, |
|
"logits/generated": -2.740352153778076, |
|
"logits/real": -2.7934975624084473, |
|
"logps/generated": -124.69525146484375, |
|
"logps/real": -122.46867370605469, |
|
"loss": 0.8205, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/generated": 0.6012061834335327, |
|
"rewards/margins": 0.2280128300189972, |
|
"rewards/real": 0.8292189836502075, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 86.29390865351938, |
|
"learning_rate": 2.909090909090909e-07, |
|
"logits/generated": -2.7653279304504395, |
|
"logits/real": -2.861011028289795, |
|
"logps/generated": -104.97627258300781, |
|
"logps/real": -110.78263854980469, |
|
"loss": 0.7926, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/generated": 1.0582153797149658, |
|
"rewards/margins": 0.4253184199333191, |
|
"rewards/real": 1.4835339784622192, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 73.40988849274181, |
|
"learning_rate": 2e-07, |
|
"logits/generated": -2.899013042449951, |
|
"logits/real": -2.9045419692993164, |
|
"logps/generated": -116.1448745727539, |
|
"logps/real": -126.13529968261719, |
|
"loss": 0.8216, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/generated": 1.988576889038086, |
|
"rewards/margins": 0.31346917152404785, |
|
"rewards/real": 2.302046060562134, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 87.84851470741809, |
|
"learning_rate": 1.0909090909090908e-07, |
|
"logits/generated": -2.8664755821228027, |
|
"logits/real": -2.9287781715393066, |
|
"logps/generated": -113.2905502319336, |
|
"logps/real": -129.95413208007812, |
|
"loss": 0.803, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/generated": 2.1775314807891846, |
|
"rewards/margins": 0.2134767472743988, |
|
"rewards/real": 2.391007900238037, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 73.6437417177821, |
|
"learning_rate": 1.818181818181818e-08, |
|
"logits/generated": -2.788062572479248, |
|
"logits/real": -2.8471646308898926, |
|
"logps/generated": -119.0680923461914, |
|
"logps/real": -124.4272232055664, |
|
"loss": 0.7742, |
|
"rewards/accuracies": 0.625, |
|
"rewards/generated": 2.202199935913086, |
|
"rewards/margins": 0.361619770526886, |
|
"rewards/real": 2.5638198852539062, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"step": 62, |
|
"total_flos": 0.0, |
|
"train_loss": 0.8044227688543258, |
|
"train_runtime": 744.0688, |
|
"train_samples_per_second": 2.688, |
|
"train_steps_per_second": 0.083 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 62, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|