zephyr-7b-lora-64-no-quant-all / trainer_state.json
YYYYYYibo's picture
Model save
b829e2d verified
raw
history blame contribute delete
No virus
4.92 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9931869795609387,
"eval_steps": 100,
"global_step": 82,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 5.555555555555555e-07,
"logits/chosen": -2.692568302154541,
"logits/rejected": -2.667994260787964,
"logps/chosen": -299.3318176269531,
"logps/rejected": -311.19903564453125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.12,
"learning_rate": 4.9976852877555755e-06,
"logits/chosen": -2.7045490741729736,
"logits/rejected": -2.6807374954223633,
"logps/chosen": -274.0246276855469,
"logps/rejected": -284.09259033203125,
"loss": 0.6918,
"rewards/accuracies": 0.484375,
"rewards/chosen": 0.007271192967891693,
"rewards/margins": 0.002095710253342986,
"rewards/rejected": 0.005175482481718063,
"step": 10
},
{
"epoch": 0.24,
"learning_rate": 4.725068941086693e-06,
"logits/chosen": -2.6655147075653076,
"logits/rejected": -2.6132850646972656,
"logps/chosen": -288.611328125,
"logps/rejected": -283.16064453125,
"loss": 0.6796,
"rewards/accuracies": 0.598437488079071,
"rewards/chosen": 0.11068712174892426,
"rewards/margins": 0.026317646726965904,
"rewards/rejected": 0.0843694880604744,
"step": 20
},
{
"epoch": 0.36,
"learning_rate": 4.046678508156259e-06,
"logits/chosen": -2.6170387268066406,
"logits/rejected": -2.5729379653930664,
"logps/chosen": -272.33197021484375,
"logps/rejected": -286.86785888671875,
"loss": 0.664,
"rewards/accuracies": 0.6171875,
"rewards/chosen": 0.1085876002907753,
"rewards/margins": 0.06517539173364639,
"rewards/rejected": 0.04341219738125801,
"step": 30
},
{
"epoch": 0.48,
"learning_rate": 3.0862284739260247e-06,
"logits/chosen": -2.555568218231201,
"logits/rejected": -2.5221917629241943,
"logps/chosen": -302.2290954589844,
"logps/rejected": -319.53826904296875,
"loss": 0.6616,
"rewards/accuracies": 0.6234375238418579,
"rewards/chosen": 0.04670686274766922,
"rewards/margins": 0.09154415130615234,
"rewards/rejected": -0.04483727738261223,
"step": 40
},
{
"epoch": 0.61,
"learning_rate": 2.0188710450729255e-06,
"logits/chosen": -2.580871105194092,
"logits/rejected": -2.525937557220459,
"logps/chosen": -271.65765380859375,
"logps/rejected": -297.47052001953125,
"loss": 0.6567,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.07181344926357269,
"rewards/margins": 0.09304286539554596,
"rewards/rejected": -0.021229416131973267,
"step": 50
},
{
"epoch": 0.73,
"learning_rate": 1.0392545659290789e-06,
"logits/chosen": -2.575265407562256,
"logits/rejected": -2.492567539215088,
"logps/chosen": -287.5987854003906,
"logps/rejected": -283.6988830566406,
"loss": 0.6442,
"rewards/accuracies": 0.6796875,
"rewards/chosen": 0.09387712180614471,
"rewards/margins": 0.14719055593013763,
"rewards/rejected": -0.05331344157457352,
"step": 60
},
{
"epoch": 0.85,
"learning_rate": 3.260265266334725e-07,
"logits/chosen": -2.5620856285095215,
"logits/rejected": -2.51887845993042,
"logps/chosen": -289.19384765625,
"logps/rejected": -305.2491760253906,
"loss": 0.6476,
"rewards/accuracies": 0.6078125238418579,
"rewards/chosen": 0.042650334537029266,
"rewards/margins": 0.11376988887786865,
"rewards/rejected": -0.07111954689025879,
"step": 70
},
{
"epoch": 0.97,
"learning_rate": 9.25456266348046e-09,
"logits/chosen": -2.559069871902466,
"logits/rejected": -2.489203929901123,
"logps/chosen": -297.2583923339844,
"logps/rejected": -301.9067687988281,
"loss": 0.6453,
"rewards/accuracies": 0.6734374761581421,
"rewards/chosen": 0.04438754543662071,
"rewards/margins": 0.13041387498378754,
"rewards/rejected": -0.08602632582187653,
"step": 80
},
{
"epoch": 0.99,
"step": 82,
"total_flos": 0.0,
"train_loss": 0.6611525947000922,
"train_runtime": 6866.2393,
"train_samples_per_second": 3.078,
"train_steps_per_second": 0.012
}
],
"logging_steps": 10,
"max_steps": 82,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}