{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9965977492802931, "eval_steps": 100, "global_step": 238, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -2.852463483810425, "logits/rejected": -2.8067848682403564, "logps/chosen": -307.2070617675781, "logps/rejected": -292.9268493652344, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.04, "learning_rate": 2.0833333333333334e-06, "logits/chosen": -2.7673232555389404, "logits/rejected": -2.714327096939087, "logps/chosen": -278.9463806152344, "logps/rejected": -269.96435546875, "loss": 0.6929, "rewards/accuracies": 0.5086805820465088, "rewards/chosen": 0.0003373799263499677, "rewards/margins": 0.0005398019566200674, "rewards/rejected": -0.0002024220593739301, "step": 10 }, { "epoch": 0.08, "learning_rate": 4.166666666666667e-06, "logits/chosen": -2.789074659347534, "logits/rejected": -2.739534378051758, "logps/chosen": -288.80340576171875, "logps/rejected": -252.3999481201172, "loss": 0.6892, "rewards/accuracies": 0.6546875238418579, "rewards/chosen": 0.003567395731806755, "rewards/margins": 0.00781105924397707, "rewards/rejected": -0.004243663977831602, "step": 20 }, { "epoch": 0.13, "learning_rate": 4.99030821197584e-06, "logits/chosen": -2.7394039630889893, "logits/rejected": -2.6750001907348633, "logps/chosen": -277.71734619140625, "logps/rejected": -260.70751953125, "loss": 0.6798, "rewards/accuracies": 0.6953125, "rewards/chosen": 0.015827985480427742, "rewards/margins": 0.02965703047811985, "rewards/rejected": -0.013829047791659832, "step": 30 }, { "epoch": 0.17, "learning_rate": 4.931352528237398e-06, "logits/chosen": -2.73751163482666, "logits/rejected": -2.684286594390869, "logps/chosen": -285.98248291015625, "logps/rejected": -277.09478759765625, "loss": 0.6678, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.02657519280910492, "rewards/margins": 0.0554216094315052, "rewards/rejected": -0.028846416622400284, "step": 40 }, { "epoch": 0.21, "learning_rate": 4.820092227512736e-06, "logits/chosen": -2.729034900665283, "logits/rejected": -2.6729769706726074, "logps/chosen": -275.21514892578125, "logps/rejected": -280.52362060546875, "loss": 0.6521, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.01229151152074337, "rewards/margins": 0.09265317022800446, "rewards/rejected": -0.10494468361139297, "step": 50 }, { "epoch": 0.25, "learning_rate": 4.658920803689553e-06, "logits/chosen": -2.6756510734558105, "logits/rejected": -2.621499538421631, "logps/chosen": -280.72991943359375, "logps/rejected": -274.62103271484375, "loss": 0.6387, "rewards/accuracies": 0.692187488079071, "rewards/chosen": -0.04894893616437912, "rewards/margins": 0.1295410692691803, "rewards/rejected": -0.17849001288414001, "step": 60 }, { "epoch": 0.29, "learning_rate": 4.451305466682615e-06, "logits/chosen": -2.6422786712646484, "logits/rejected": -2.585517644882202, "logps/chosen": -285.8612365722656, "logps/rejected": -291.7132873535156, "loss": 0.6281, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": -0.10629353672266006, "rewards/margins": 0.18995100259780884, "rewards/rejected": -0.2962445318698883, "step": 70 }, { "epoch": 0.33, "learning_rate": 4.2017125538726574e-06, "logits/chosen": -2.660236120223999, "logits/rejected": -2.5804734230041504, "logps/chosen": -292.82135009765625, "logps/rejected": -288.45306396484375, "loss": 0.605, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13801410794258118, "rewards/margins": 0.2476145476102829, "rewards/rejected": -0.3856286406517029, "step": 80 }, { "epoch": 0.38, "learning_rate": 3.915511447755793e-06, "logits/chosen": -2.6195216178894043, "logits/rejected": -2.5578157901763916, "logps/chosen": -293.1012878417969, "logps/rejected": -303.3466796875, "loss": 0.6143, "rewards/accuracies": 0.6734374761581421, "rewards/chosen": -0.19188150763511658, "rewards/margins": 0.25040262937545776, "rewards/rejected": -0.44228416681289673, "step": 90 }, { "epoch": 0.42, "learning_rate": 3.5988590667807542e-06, "logits/chosen": -2.614781618118286, "logits/rejected": -2.5338780879974365, "logps/chosen": -307.21429443359375, "logps/rejected": -317.61480712890625, "loss": 0.5916, "rewards/accuracies": 0.734375, "rewards/chosen": -0.21577294170856476, "rewards/margins": 0.3157234787940979, "rewards/rejected": -0.5314964056015015, "step": 100 }, { "epoch": 0.42, "eval_logits/chosen": -2.6026527881622314, "eval_logits/rejected": -2.5165884494781494, "eval_logps/chosen": -309.4196472167969, "eval_logps/rejected": -308.91461181640625, "eval_loss": 0.6025363802909851, "eval_rewards/accuracies": 0.6940000057220459, "eval_rewards/chosen": -0.25379911065101624, "eval_rewards/margins": 0.26018837094306946, "eval_rewards/rejected": -0.5139874815940857, "eval_runtime": 384.168, "eval_samples_per_second": 5.206, "eval_steps_per_second": 0.651, "step": 100 }, { "epoch": 0.46, "learning_rate": 3.2585674142717483e-06, "logits/chosen": -2.6301052570343018, "logits/rejected": -2.5181527137756348, "logps/chosen": -304.8443603515625, "logps/rejected": -305.76165771484375, "loss": 0.6052, "rewards/accuracies": 0.6953125, "rewards/chosen": -0.2113591432571411, "rewards/margins": 0.2960760295391083, "rewards/rejected": -0.507435142993927, "step": 110 }, { "epoch": 0.5, "learning_rate": 2.901957034798671e-06, "logits/chosen": -2.571101665496826, "logits/rejected": -2.4931883811950684, "logps/chosen": -299.7984313964844, "logps/rejected": -306.89056396484375, "loss": 0.6, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.281256765127182, "rewards/margins": 0.25873565673828125, "rewards/rejected": -0.5399924516677856, "step": 120 }, { "epoch": 0.54, "learning_rate": 2.536699530523292e-06, "logits/chosen": -2.564572811126709, "logits/rejected": -2.4486539363861084, "logps/chosen": -308.9500732421875, "logps/rejected": -306.9311828613281, "loss": 0.582, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.2418045997619629, "rewards/margins": 0.36538395285606384, "rewards/rejected": -0.6071885228157043, "step": 130 }, { "epoch": 0.59, "learning_rate": 2.1706525253979533e-06, "logits/chosen": -2.544403553009033, "logits/rejected": -2.417354106903076, "logps/chosen": -327.3999328613281, "logps/rejected": -330.9579162597656, "loss": 0.5807, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.29655271768569946, "rewards/margins": 0.36625009775161743, "rewards/rejected": -0.6628028750419617, "step": 140 }, { "epoch": 0.63, "learning_rate": 1.811690627559351e-06, "logits/chosen": -2.4691693782806396, "logits/rejected": -2.3947396278381348, "logps/chosen": -303.43988037109375, "logps/rejected": -326.5160217285156, "loss": 0.5886, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": -0.25864046812057495, "rewards/margins": 0.34002798795700073, "rewards/rejected": -0.5986684560775757, "step": 150 }, { "epoch": 0.67, "learning_rate": 1.4675360263490296e-06, "logits/chosen": -2.4801511764526367, "logits/rejected": -2.3981406688690186, "logps/chosen": -303.1156005859375, "logps/rejected": -314.0051574707031, "loss": 0.577, "rewards/accuracies": 0.71875, "rewards/chosen": -0.25856736302375793, "rewards/margins": 0.3619126081466675, "rewards/rejected": -0.620479941368103, "step": 160 }, { "epoch": 0.71, "learning_rate": 1.1455923682523476e-06, "logits/chosen": -2.462503671646118, "logits/rejected": -2.3446600437164307, "logps/chosen": -325.72021484375, "logps/rejected": -326.17205810546875, "loss": 0.5832, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": -0.3383588194847107, "rewards/margins": 0.36354926228523254, "rewards/rejected": -0.7019080519676208, "step": 170 }, { "epoch": 0.75, "learning_rate": 8.527854855097226e-07, "logits/chosen": -2.4440627098083496, "logits/rejected": -2.290944814682007, "logps/chosen": -310.25933837890625, "logps/rejected": -306.5115966796875, "loss": 0.5763, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.32932430505752563, "rewards/margins": 0.3963031768798828, "rewards/rejected": -0.7256274223327637, "step": 180 }, { "epoch": 0.8, "learning_rate": 5.954144037354645e-07, "logits/chosen": -2.4063987731933594, "logits/rejected": -2.3209547996520996, "logps/chosen": -316.28802490234375, "logps/rejected": -324.861083984375, "loss": 0.5673, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.3298575282096863, "rewards/margins": 0.40440693497657776, "rewards/rejected": -0.7342644929885864, "step": 190 }, { "epoch": 0.84, "learning_rate": 3.7901583375171277e-07, "logits/chosen": -2.37199068069458, "logits/rejected": -2.2828264236450195, "logps/chosen": -322.8636169433594, "logps/rejected": -347.17742919921875, "loss": 0.5667, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.3475632071495056, "rewards/margins": 0.3942633271217346, "rewards/rejected": -0.7418265342712402, "step": 200 }, { "epoch": 0.84, "eval_logits/chosen": -2.372931718826294, "eval_logits/rejected": -2.2597038745880127, "eval_logps/chosen": -323.44244384765625, "eval_logps/rejected": -332.209228515625, "eval_loss": 0.5787754058837891, "eval_rewards/accuracies": 0.7200000286102295, "eval_rewards/chosen": -0.39402660727500916, "eval_rewards/margins": 0.35290706157684326, "eval_rewards/rejected": -0.74693363904953, "eval_runtime": 384.1102, "eval_samples_per_second": 5.207, "eval_steps_per_second": 0.651, "step": 200 }, { "epoch": 0.88, "learning_rate": 2.0824506276503898e-07, "logits/chosen": -2.346809148788452, "logits/rejected": -2.259718656539917, "logps/chosen": -330.906005859375, "logps/rejected": -344.9519348144531, "loss": 0.5802, "rewards/accuracies": 0.667187511920929, "rewards/chosen": -0.37789514660835266, "rewards/margins": 0.33273065090179443, "rewards/rejected": -0.7106258273124695, "step": 210 }, { "epoch": 0.92, "learning_rate": 8.677580722139673e-08, "logits/chosen": -2.3068695068359375, "logits/rejected": -2.2727885246276855, "logps/chosen": -312.369140625, "logps/rejected": -334.8897399902344, "loss": 0.5761, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": -0.40535077452659607, "rewards/margins": 0.3713846206665039, "rewards/rejected": -0.7767353653907776, "step": 220 }, { "epoch": 0.96, "learning_rate": 1.7221181760899153e-08, "logits/chosen": -2.347468852996826, "logits/rejected": -2.2807364463806152, "logps/chosen": -321.6934814453125, "logps/rejected": -344.81976318359375, "loss": 0.5749, "rewards/accuracies": 0.698437511920929, "rewards/chosen": -0.3814999759197235, "rewards/margins": 0.3564358651638031, "rewards/rejected": -0.7379359006881714, "step": 230 }, { "epoch": 1.0, "step": 238, "total_flos": 0.0, "train_loss": 0.6080600586758942, "train_runtime": 20828.337, "train_samples_per_second": 2.935, "train_steps_per_second": 0.011 } ], "logging_steps": 10, "max_steps": 238, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }