nash_dpo_doff_no_golden_iter_2 / trainer_state.json
YYYYYYibo's picture
Model save
2517f70 verified
raw
history blame contribute delete
No virus
8.42 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.998324958123953,
"eval_steps": 100,
"global_step": 149,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.3333333333333335e-07,
"logits/chosen": -2.7147891521453857,
"logits/rejected": -2.661033868789673,
"logps/chosen": -319.7568359375,
"logps/rejected": -246.44630432128906,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.07,
"learning_rate": 3.3333333333333333e-06,
"logits/chosen": -2.694335699081421,
"logits/rejected": -2.6309776306152344,
"logps/chosen": -281.6622619628906,
"logps/rejected": -233.69451904296875,
"loss": 0.692,
"rewards/accuracies": 0.5277777910232544,
"rewards/chosen": 0.004336116369813681,
"rewards/margins": 0.00294702360406518,
"rewards/rejected": 0.001389092649333179,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 4.982842942906386e-06,
"logits/chosen": -2.7561354637145996,
"logits/rejected": -2.686753273010254,
"logps/chosen": -293.4931640625,
"logps/rejected": -248.32424926757812,
"loss": 0.6807,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.018854713067412376,
"rewards/margins": 0.025644272565841675,
"rewards/rejected": -0.006789558567106724,
"step": 20
},
{
"epoch": 0.2,
"learning_rate": 4.846996204000967e-06,
"logits/chosen": -2.6753716468811035,
"logits/rejected": -2.619434356689453,
"logps/chosen": -274.0318908691406,
"logps/rejected": -236.3435821533203,
"loss": 0.6562,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.012759427540004253,
"rewards/margins": 0.08607137203216553,
"rewards/rejected": -0.073311947286129,
"step": 30
},
{
"epoch": 0.27,
"learning_rate": 4.582735470385229e-06,
"logits/chosen": -2.6350154876708984,
"logits/rejected": -2.5529685020446777,
"logps/chosen": -262.11944580078125,
"logps/rejected": -247.7893829345703,
"loss": 0.6381,
"rewards/accuracies": 0.71875,
"rewards/chosen": 0.020980656147003174,
"rewards/margins": 0.13384665548801422,
"rewards/rejected": -0.11286599934101105,
"step": 40
},
{
"epoch": 0.34,
"learning_rate": 4.204519553876095e-06,
"logits/chosen": -2.602764129638672,
"logits/rejected": -2.505323648452759,
"logps/chosen": -258.61865234375,
"logps/rejected": -234.4953155517578,
"loss": 0.6301,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.028567293658852577,
"rewards/margins": 0.14363256096839905,
"rewards/rejected": -0.17219987511634827,
"step": 50
},
{
"epoch": 0.4,
"learning_rate": 3.7330422317447686e-06,
"logits/chosen": -2.5826821327209473,
"logits/rejected": -2.4887733459472656,
"logps/chosen": -276.9225158691406,
"logps/rejected": -262.2298583984375,
"loss": 0.613,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.01478421501815319,
"rewards/margins": 0.1955140084028244,
"rewards/rejected": -0.21029825508594513,
"step": 60
},
{
"epoch": 0.47,
"learning_rate": 3.1941000034687516e-06,
"logits/chosen": -2.5672378540039062,
"logits/rejected": -2.505222797393799,
"logps/chosen": -260.82904052734375,
"logps/rejected": -261.2389831542969,
"loss": 0.6081,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.07171599566936493,
"rewards/margins": 0.21727749705314636,
"rewards/rejected": -0.2889935076236725,
"step": 70
},
{
"epoch": 0.54,
"learning_rate": 2.6171806561748503e-06,
"logits/chosen": -2.4923880100250244,
"logits/rejected": -2.4172475337982178,
"logps/chosen": -274.5457458496094,
"logps/rejected": -276.92547607421875,
"loss": 0.5938,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.05124073475599289,
"rewards/margins": 0.25491490960121155,
"rewards/rejected": -0.30615565180778503,
"step": 80
},
{
"epoch": 0.6,
"learning_rate": 2.0338498642707977e-06,
"logits/chosen": -2.5528998374938965,
"logits/rejected": -2.5099101066589355,
"logps/chosen": -260.18048095703125,
"logps/rejected": -289.6683044433594,
"loss": 0.5904,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.03127538785338402,
"rewards/margins": 0.23240149021148682,
"rewards/rejected": -0.26367685198783875,
"step": 90
},
{
"epoch": 0.67,
"learning_rate": 1.4760240991587338e-06,
"logits/chosen": -2.480579137802124,
"logits/rejected": -2.3913416862487793,
"logps/chosen": -252.4193115234375,
"logps/rejected": -251.07431030273438,
"loss": 0.5862,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.05794032663106918,
"rewards/margins": 0.27387815713882446,
"rewards/rejected": -0.33181843161582947,
"step": 100
},
{
"epoch": 0.67,
"eval_logits/chosen": -2.498286247253418,
"eval_logits/rejected": -2.403763771057129,
"eval_logps/chosen": -301.20855712890625,
"eval_logps/rejected": -296.8028869628906,
"eval_loss": 0.6263673305511475,
"eval_rewards/accuracies": 0.6299999952316284,
"eval_rewards/chosen": -0.09310445934534073,
"eval_rewards/margins": 0.17094683647155762,
"eval_rewards/rejected": -0.26405128836631775,
"eval_runtime": 395.581,
"eval_samples_per_second": 5.056,
"eval_steps_per_second": 0.632,
"step": 100
},
{
"epoch": 0.74,
"learning_rate": 9.742243453755202e-07,
"logits/chosen": -2.492846965789795,
"logits/rejected": -2.398972988128662,
"logps/chosen": -284.0019836425781,
"logps/rejected": -291.156982421875,
"loss": 0.5749,
"rewards/accuracies": 0.7593749761581421,
"rewards/chosen": -0.056562770158052444,
"rewards/margins": 0.32814931869506836,
"rewards/rejected": -0.3847121000289917,
"step": 110
},
{
"epoch": 0.8,
"learning_rate": 5.559061696656199e-07,
"logits/chosen": -2.4556164741516113,
"logits/rejected": -2.3743937015533447,
"logps/chosen": -277.1618957519531,
"logps/rejected": -281.0685119628906,
"loss": 0.5891,
"rewards/accuracies": 0.778124988079071,
"rewards/chosen": -0.09333664923906326,
"rewards/margins": 0.2766883969306946,
"rewards/rejected": -0.37002506852149963,
"step": 120
},
{
"epoch": 0.87,
"learning_rate": 2.4395751190352924e-07,
"logits/chosen": -2.5133512020111084,
"logits/rejected": -2.4132015705108643,
"logps/chosen": -293.0803527832031,
"logps/rejected": -298.37786865234375,
"loss": 0.5805,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.07415858656167984,
"rewards/margins": 0.3214946687221527,
"rewards/rejected": -0.39565327763557434,
"step": 130
},
{
"epoch": 0.94,
"learning_rate": 5.544639001763719e-08,
"logits/chosen": -2.5079007148742676,
"logits/rejected": -2.387241840362549,
"logps/chosen": -287.03729248046875,
"logps/rejected": -284.5392761230469,
"loss": 0.5807,
"rewards/accuracies": 0.784375011920929,
"rewards/chosen": -0.09289330244064331,
"rewards/margins": 0.31849053502082825,
"rewards/rejected": -0.41138380765914917,
"step": 140
},
{
"epoch": 1.0,
"step": 149,
"total_flos": 0.0,
"train_loss": 0.61372606626293,
"train_runtime": 6992.9528,
"train_samples_per_second": 2.731,
"train_steps_per_second": 0.021
}
],
"logging_steps": 10,
"max_steps": 149,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}