{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.998324958123953, "eval_steps": 100, "global_step": 149, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.3333333333333335e-07, "logits/chosen": -2.7147891521453857, "logits/rejected": -2.661033868789673, "logps/chosen": -319.7568359375, "logps/rejected": -246.44630432128906, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -2.694335699081421, "logits/rejected": -2.6309776306152344, "logps/chosen": -281.6622619628906, "logps/rejected": -233.69451904296875, "loss": 0.692, "rewards/accuracies": 0.5277777910232544, "rewards/chosen": 0.004336116369813681, "rewards/margins": 0.00294702360406518, "rewards/rejected": 0.001389092649333179, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.982842942906386e-06, "logits/chosen": -2.7561354637145996, "logits/rejected": -2.686753273010254, "logps/chosen": -293.4931640625, "logps/rejected": -248.32424926757812, "loss": 0.6807, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.018854713067412376, "rewards/margins": 0.025644272565841675, "rewards/rejected": -0.006789558567106724, "step": 20 }, { "epoch": 0.2, "learning_rate": 4.846996204000967e-06, "logits/chosen": -2.6753716468811035, "logits/rejected": -2.619434356689453, "logps/chosen": -274.0318908691406, "logps/rejected": -236.3435821533203, "loss": 0.6562, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.012759427540004253, "rewards/margins": 0.08607137203216553, "rewards/rejected": -0.073311947286129, "step": 30 }, { "epoch": 0.27, "learning_rate": 4.582735470385229e-06, "logits/chosen": -2.6350154876708984, "logits/rejected": -2.5529685020446777, "logps/chosen": -262.11944580078125, "logps/rejected": -247.7893829345703, "loss": 0.6381, "rewards/accuracies": 0.71875, "rewards/chosen": 0.020980656147003174, "rewards/margins": 0.13384665548801422, "rewards/rejected": -0.11286599934101105, "step": 40 }, { "epoch": 0.34, "learning_rate": 4.204519553876095e-06, "logits/chosen": -2.602764129638672, "logits/rejected": -2.505323648452759, "logps/chosen": -258.61865234375, "logps/rejected": -234.4953155517578, "loss": 0.6301, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.028567293658852577, "rewards/margins": 0.14363256096839905, "rewards/rejected": -0.17219987511634827, "step": 50 }, { "epoch": 0.4, "learning_rate": 3.7330422317447686e-06, "logits/chosen": -2.5826821327209473, "logits/rejected": -2.4887733459472656, "logps/chosen": -276.9225158691406, "logps/rejected": -262.2298583984375, "loss": 0.613, "rewards/accuracies": 0.765625, "rewards/chosen": -0.01478421501815319, "rewards/margins": 0.1955140084028244, "rewards/rejected": -0.21029825508594513, "step": 60 }, { "epoch": 0.47, "learning_rate": 3.1941000034687516e-06, "logits/chosen": -2.5672378540039062, "logits/rejected": -2.505222797393799, "logps/chosen": -260.82904052734375, "logps/rejected": -261.2389831542969, "loss": 0.6081, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.07171599566936493, "rewards/margins": 0.21727749705314636, "rewards/rejected": -0.2889935076236725, "step": 70 }, { "epoch": 0.54, "learning_rate": 2.6171806561748503e-06, "logits/chosen": -2.4923880100250244, "logits/rejected": -2.4172475337982178, "logps/chosen": -274.5457458496094, "logps/rejected": -276.92547607421875, "loss": 0.5938, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.05124073475599289, "rewards/margins": 0.25491490960121155, "rewards/rejected": -0.30615565180778503, "step": 80 }, { "epoch": 0.6, "learning_rate": 2.0338498642707977e-06, "logits/chosen": -2.5528998374938965, "logits/rejected": -2.5099101066589355, "logps/chosen": -260.18048095703125, "logps/rejected": -289.6683044433594, "loss": 0.5904, "rewards/accuracies": 0.75, "rewards/chosen": -0.03127538785338402, "rewards/margins": 0.23240149021148682, "rewards/rejected": -0.26367685198783875, "step": 90 }, { "epoch": 0.67, "learning_rate": 1.4760240991587338e-06, "logits/chosen": -2.480579137802124, "logits/rejected": -2.3913416862487793, "logps/chosen": -252.4193115234375, "logps/rejected": -251.07431030273438, "loss": 0.5862, "rewards/accuracies": 0.765625, "rewards/chosen": -0.05794032663106918, "rewards/margins": 0.27387815713882446, "rewards/rejected": -0.33181843161582947, "step": 100 }, { "epoch": 0.67, "eval_logits/chosen": -2.498286247253418, "eval_logits/rejected": -2.403763771057129, "eval_logps/chosen": -301.20855712890625, "eval_logps/rejected": -296.8028869628906, "eval_loss": 0.6263673305511475, "eval_rewards/accuracies": 0.6299999952316284, "eval_rewards/chosen": -0.09310445934534073, "eval_rewards/margins": 0.17094683647155762, "eval_rewards/rejected": -0.26405128836631775, "eval_runtime": 395.581, "eval_samples_per_second": 5.056, "eval_steps_per_second": 0.632, "step": 100 }, { "epoch": 0.74, "learning_rate": 9.742243453755202e-07, "logits/chosen": -2.492846965789795, "logits/rejected": -2.398972988128662, "logps/chosen": -284.0019836425781, "logps/rejected": -291.156982421875, "loss": 0.5749, "rewards/accuracies": 0.7593749761581421, "rewards/chosen": -0.056562770158052444, "rewards/margins": 0.32814931869506836, "rewards/rejected": -0.3847121000289917, "step": 110 }, { "epoch": 0.8, "learning_rate": 5.559061696656199e-07, "logits/chosen": -2.4556164741516113, "logits/rejected": -2.3743937015533447, "logps/chosen": -277.1618957519531, "logps/rejected": -281.0685119628906, "loss": 0.5891, "rewards/accuracies": 0.778124988079071, "rewards/chosen": -0.09333664923906326, "rewards/margins": 0.2766883969306946, "rewards/rejected": -0.37002506852149963, "step": 120 }, { "epoch": 0.87, "learning_rate": 2.4395751190352924e-07, "logits/chosen": -2.5133512020111084, "logits/rejected": -2.4132015705108643, "logps/chosen": -293.0803527832031, "logps/rejected": -298.37786865234375, "loss": 0.5805, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07415858656167984, "rewards/margins": 0.3214946687221527, "rewards/rejected": -0.39565327763557434, "step": 130 }, { "epoch": 0.94, "learning_rate": 5.544639001763719e-08, "logits/chosen": -2.5079007148742676, "logits/rejected": -2.387241840362549, "logps/chosen": -287.03729248046875, "logps/rejected": -284.5392761230469, "loss": 0.5807, "rewards/accuracies": 0.784375011920929, "rewards/chosen": -0.09289330244064331, "rewards/margins": 0.31849053502082825, "rewards/rejected": -0.41138380765914917, "step": 140 }, { "epoch": 1.0, "step": 149, "total_flos": 0.0, "train_loss": 0.61372606626293, "train_runtime": 6992.9528, "train_samples_per_second": 2.731, "train_steps_per_second": 0.021 } ], "logging_steps": 10, "max_steps": 149, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }