|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998324958123953, |
|
"eval_steps": 100, |
|
"global_step": 149, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"logits/chosen": -2.7147891521453857, |
|
"logits/rejected": -2.661033868789673, |
|
"logps/chosen": -319.7568359375, |
|
"logps/rejected": -246.44630432128906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -2.694335699081421, |
|
"logits/rejected": -2.6309776306152344, |
|
"logps/chosen": -281.6622619628906, |
|
"logps/rejected": -233.69451904296875, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.004336116369813681, |
|
"rewards/margins": 0.00294702360406518, |
|
"rewards/rejected": 0.001389092649333179, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.982842942906386e-06, |
|
"logits/chosen": -2.7561354637145996, |
|
"logits/rejected": -2.686753273010254, |
|
"logps/chosen": -293.4931640625, |
|
"logps/rejected": -248.32424926757812, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.018854713067412376, |
|
"rewards/margins": 0.025644272565841675, |
|
"rewards/rejected": -0.006789558567106724, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.846996204000967e-06, |
|
"logits/chosen": -2.6753716468811035, |
|
"logits/rejected": -2.619434356689453, |
|
"logps/chosen": -274.0318908691406, |
|
"logps/rejected": -236.3435821533203, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.012759427540004253, |
|
"rewards/margins": 0.08607137203216553, |
|
"rewards/rejected": -0.073311947286129, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.582735470385229e-06, |
|
"logits/chosen": -2.6350154876708984, |
|
"logits/rejected": -2.5529685020446777, |
|
"logps/chosen": -262.11944580078125, |
|
"logps/rejected": -247.7893829345703, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.020980656147003174, |
|
"rewards/margins": 0.13384665548801422, |
|
"rewards/rejected": -0.11286599934101105, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.204519553876095e-06, |
|
"logits/chosen": -2.602764129638672, |
|
"logits/rejected": -2.505323648452759, |
|
"logps/chosen": -258.61865234375, |
|
"logps/rejected": -234.4953155517578, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.028567293658852577, |
|
"rewards/margins": 0.14363256096839905, |
|
"rewards/rejected": -0.17219987511634827, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7330422317447686e-06, |
|
"logits/chosen": -2.5826821327209473, |
|
"logits/rejected": -2.4887733459472656, |
|
"logps/chosen": -276.9225158691406, |
|
"logps/rejected": -262.2298583984375, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.01478421501815319, |
|
"rewards/margins": 0.1955140084028244, |
|
"rewards/rejected": -0.21029825508594513, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1941000034687516e-06, |
|
"logits/chosen": -2.5672378540039062, |
|
"logits/rejected": -2.505222797393799, |
|
"logps/chosen": -260.82904052734375, |
|
"logps/rejected": -261.2389831542969, |
|
"loss": 0.6081, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.07171599566936493, |
|
"rewards/margins": 0.21727749705314636, |
|
"rewards/rejected": -0.2889935076236725, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6171806561748503e-06, |
|
"logits/chosen": -2.4923880100250244, |
|
"logits/rejected": -2.4172475337982178, |
|
"logps/chosen": -274.5457458496094, |
|
"logps/rejected": -276.92547607421875, |
|
"loss": 0.5938, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.05124073475599289, |
|
"rewards/margins": 0.25491490960121155, |
|
"rewards/rejected": -0.30615565180778503, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0338498642707977e-06, |
|
"logits/chosen": -2.5528998374938965, |
|
"logits/rejected": -2.5099101066589355, |
|
"logps/chosen": -260.18048095703125, |
|
"logps/rejected": -289.6683044433594, |
|
"loss": 0.5904, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.03127538785338402, |
|
"rewards/margins": 0.23240149021148682, |
|
"rewards/rejected": -0.26367685198783875, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4760240991587338e-06, |
|
"logits/chosen": -2.480579137802124, |
|
"logits/rejected": -2.3913416862487793, |
|
"logps/chosen": -252.4193115234375, |
|
"logps/rejected": -251.07431030273438, |
|
"loss": 0.5862, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.05794032663106918, |
|
"rewards/margins": 0.27387815713882446, |
|
"rewards/rejected": -0.33181843161582947, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_logits/chosen": -2.498286247253418, |
|
"eval_logits/rejected": -2.403763771057129, |
|
"eval_logps/chosen": -301.20855712890625, |
|
"eval_logps/rejected": -296.8028869628906, |
|
"eval_loss": 0.6263673305511475, |
|
"eval_rewards/accuracies": 0.6299999952316284, |
|
"eval_rewards/chosen": -0.09310445934534073, |
|
"eval_rewards/margins": 0.17094683647155762, |
|
"eval_rewards/rejected": -0.26405128836631775, |
|
"eval_runtime": 395.581, |
|
"eval_samples_per_second": 5.056, |
|
"eval_steps_per_second": 0.632, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.742243453755202e-07, |
|
"logits/chosen": -2.492846965789795, |
|
"logits/rejected": -2.398972988128662, |
|
"logps/chosen": -284.0019836425781, |
|
"logps/rejected": -291.156982421875, |
|
"loss": 0.5749, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.056562770158052444, |
|
"rewards/margins": 0.32814931869506836, |
|
"rewards/rejected": -0.3847121000289917, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.559061696656199e-07, |
|
"logits/chosen": -2.4556164741516113, |
|
"logits/rejected": -2.3743937015533447, |
|
"logps/chosen": -277.1618957519531, |
|
"logps/rejected": -281.0685119628906, |
|
"loss": 0.5891, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.09333664923906326, |
|
"rewards/margins": 0.2766883969306946, |
|
"rewards/rejected": -0.37002506852149963, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.4395751190352924e-07, |
|
"logits/chosen": -2.5133512020111084, |
|
"logits/rejected": -2.4132015705108643, |
|
"logps/chosen": -293.0803527832031, |
|
"logps/rejected": -298.37786865234375, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.07415858656167984, |
|
"rewards/margins": 0.3214946687221527, |
|
"rewards/rejected": -0.39565327763557434, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.544639001763719e-08, |
|
"logits/chosen": -2.5079007148742676, |
|
"logits/rejected": -2.387241840362549, |
|
"logps/chosen": -287.03729248046875, |
|
"logps/rejected": -284.5392761230469, |
|
"loss": 0.5807, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.09289330244064331, |
|
"rewards/margins": 0.31849053502082825, |
|
"rewards/rejected": -0.41138380765914917, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 149, |
|
"total_flos": 0.0, |
|
"train_loss": 0.61372606626293, |
|
"train_runtime": 6992.9528, |
|
"train_samples_per_second": 2.731, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 149, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|