{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 2685, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00037243947858472997, "grad_norm": 362.0, "learning_rate": 6.691449814126393e-09, "logits/chosen": -0.68359375, "logits/rejected": -2.25, "logps/chosen": -2.28125, "logps/rejected": -0.7578125, "loss": 8.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.375, "rewards/margins": -7.59375, "rewards/rejected": -3.796875, "step": 1 }, { "epoch": 0.0007448789571694599, "grad_norm": 374.0, "learning_rate": 1.3382899628252786e-08, "logits/chosen": -0.64453125, "logits/rejected": -2.3125, "logps/chosen": -2.4375, "logps/rejected": -0.6953125, "loss": 9.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.25, "rewards/margins": -8.75, "rewards/rejected": -3.484375, "step": 2 }, { "epoch": 0.0011173184357541898, "grad_norm": 364.0, "learning_rate": 2.007434944237918e-08, "logits/chosen": -0.796875, "logits/rejected": -2.015625, "logps/chosen": -2.625, "logps/rejected": -0.71875, "loss": 10.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.125, "rewards/margins": -9.5, "rewards/rejected": -3.59375, "step": 3 }, { "epoch": 0.0014897579143389199, "grad_norm": 386.0, "learning_rate": 2.6765799256505573e-08, "logits/chosen": -0.5546875, "logits/rejected": -2.421875, "logps/chosen": -2.671875, "logps/rejected": -0.828125, "loss": 10.0, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.375, "rewards/margins": -9.25, "rewards/rejected": -4.125, "step": 4 }, { "epoch": 0.00186219739292365, "grad_norm": 386.0, "learning_rate": 3.345724907063197e-08, "logits/chosen": -0.74609375, "logits/rejected": -2.0625, "logps/chosen": -2.40625, "logps/rejected": -0.58203125, "loss": 9.9375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.0625, "rewards/margins": -9.125, "rewards/rejected": -2.90625, "step": 5 }, { "epoch": 0.0022346368715083797, "grad_norm": 364.0, "learning_rate": 4.014869888475836e-08, "logits/chosen": -0.71875, "logits/rejected": -1.7734375, "logps/chosen": -2.40625, "logps/rejected": -0.6171875, "loss": 9.6875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.0, "rewards/margins": -8.875, "rewards/rejected": -3.09375, "step": 6 }, { "epoch": 0.0026070763500931097, "grad_norm": 366.0, "learning_rate": 4.6840148698884756e-08, "logits/chosen": -0.6484375, "logits/rejected": -2.265625, "logps/chosen": -2.640625, "logps/rejected": -0.71875, "loss": 10.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.1875, "rewards/margins": -9.625, "rewards/rejected": -3.59375, "step": 7 }, { "epoch": 0.0029795158286778397, "grad_norm": 324.0, "learning_rate": 5.3531598513011146e-08, "logits/chosen": -0.7734375, "logits/rejected": -1.921875, "logps/chosen": -2.4375, "logps/rejected": -0.53515625, "loss": 10.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.125, "rewards/margins": -9.5, "rewards/rejected": -2.6875, "step": 8 }, { "epoch": 0.0033519553072625698, "grad_norm": 328.0, "learning_rate": 6.022304832713754e-08, "logits/chosen": -0.8671875, "logits/rejected": -2.046875, "logps/chosen": -2.171875, "logps/rejected": -0.69921875, "loss": 8.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.875, "rewards/margins": -7.375, "rewards/rejected": -3.5, "step": 9 }, { "epoch": 0.0037243947858473, "grad_norm": 346.0, "learning_rate": 6.691449814126394e-08, "logits/chosen": -0.91015625, "logits/rejected": -1.71875, "logps/chosen": -2.59375, "logps/rejected": -0.625, "loss": 10.625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.9375, "rewards/margins": -9.8125, "rewards/rejected": -3.125, "step": 10 }, { "epoch": 0.00409683426443203, "grad_norm": 324.0, "learning_rate": 7.360594795539034e-08, "logits/chosen": -0.66015625, "logits/rejected": -2.4375, "logps/chosen": -2.1875, "logps/rejected": -0.765625, "loss": 7.875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.875, "rewards/margins": -7.09375, "rewards/rejected": -3.8125, "step": 11 }, { "epoch": 0.004469273743016759, "grad_norm": 334.0, "learning_rate": 8.029739776951672e-08, "logits/chosen": -0.7578125, "logits/rejected": -2.375, "logps/chosen": -2.328125, "logps/rejected": -0.7265625, "loss": 8.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.625, "rewards/margins": -8.0, "rewards/rejected": -3.640625, "step": 12 }, { "epoch": 0.00484171322160149, "grad_norm": 474.0, "learning_rate": 8.698884758364313e-08, "logits/chosen": -0.640625, "logits/rejected": -1.84375, "logps/chosen": -2.53125, "logps/rejected": -0.6875, "loss": 10.0625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.75, "rewards/margins": -9.25, "rewards/rejected": -3.4375, "step": 13 }, { "epoch": 0.0052141527001862194, "grad_norm": 286.0, "learning_rate": 9.368029739776951e-08, "logits/chosen": -0.7421875, "logits/rejected": -1.46875, "logps/chosen": -2.15625, "logps/rejected": -0.5390625, "loss": 8.875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.8125, "rewards/margins": -8.125, "rewards/rejected": -2.6875, "step": 14 }, { "epoch": 0.00558659217877095, "grad_norm": 346.0, "learning_rate": 1.0037174721189591e-07, "logits/chosen": -0.734375, "logits/rejected": -2.171875, "logps/chosen": -2.703125, "logps/rejected": -0.78125, "loss": 10.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.5, "rewards/margins": -9.5625, "rewards/rejected": -3.90625, "step": 15 }, { "epoch": 0.0059590316573556795, "grad_norm": 388.0, "learning_rate": 1.0706319702602229e-07, "logits/chosen": -0.796875, "logits/rejected": -2.265625, "logps/chosen": -2.6875, "logps/rejected": -0.61328125, "loss": 11.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.375, "rewards/margins": -10.375, "rewards/rejected": -3.078125, "step": 16 }, { "epoch": 0.00633147113594041, "grad_norm": 336.0, "learning_rate": 1.1375464684014869e-07, "logits/chosen": -0.67578125, "logits/rejected": -2.34375, "logps/chosen": -2.5625, "logps/rejected": -0.6640625, "loss": 10.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.8125, "rewards/margins": -9.5, "rewards/rejected": -3.3125, "step": 17 }, { "epoch": 0.0067039106145251395, "grad_norm": 374.0, "learning_rate": 1.2044609665427509e-07, "logits/chosen": -0.71875, "logits/rejected": -1.4140625, "logps/chosen": -2.8125, "logps/rejected": -0.65234375, "loss": 11.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -14.0625, "rewards/margins": -10.8125, "rewards/rejected": -3.25, "step": 18 }, { "epoch": 0.00707635009310987, "grad_norm": 372.0, "learning_rate": 1.271375464684015e-07, "logits/chosen": -0.68359375, "logits/rejected": -2.390625, "logps/chosen": -2.265625, "logps/rejected": -0.671875, "loss": 8.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.375, "rewards/margins": -8.0, "rewards/rejected": -3.34375, "step": 19 }, { "epoch": 0.0074487895716946, "grad_norm": 328.0, "learning_rate": 1.3382899628252788e-07, "logits/chosen": -0.5546875, "logits/rejected": -2.390625, "logps/chosen": -2.40625, "logps/rejected": -0.66015625, "loss": 9.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.0, "rewards/margins": -8.75, "rewards/rejected": -3.3125, "step": 20 }, { "epoch": 0.00782122905027933, "grad_norm": 384.0, "learning_rate": 1.4052044609665426e-07, "logits/chosen": -0.79296875, "logits/rejected": -2.21875, "logps/chosen": -2.5, "logps/rejected": -0.71875, "loss": 9.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.5625, "rewards/margins": -9.0, "rewards/rejected": -3.59375, "step": 21 }, { "epoch": 0.00819366852886406, "grad_norm": 422.0, "learning_rate": 1.4721189591078067e-07, "logits/chosen": -0.51953125, "logits/rejected": -2.328125, "logps/chosen": -2.65625, "logps/rejected": -0.734375, "loss": 10.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.25, "rewards/margins": -9.625, "rewards/rejected": -3.671875, "step": 22 }, { "epoch": 0.00856610800744879, "grad_norm": 360.0, "learning_rate": 1.5390334572490705e-07, "logits/chosen": -0.6796875, "logits/rejected": -2.171875, "logps/chosen": -2.5625, "logps/rejected": -0.71484375, "loss": 10.0, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.875, "rewards/margins": -9.3125, "rewards/rejected": -3.578125, "step": 23 }, { "epoch": 0.008938547486033519, "grad_norm": 332.0, "learning_rate": 1.6059479553903344e-07, "logits/chosen": -0.74609375, "logits/rejected": -1.8984375, "logps/chosen": -2.40625, "logps/rejected": -0.65625, "loss": 9.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.0, "rewards/margins": -8.75, "rewards/rejected": -3.28125, "step": 24 }, { "epoch": 0.00931098696461825, "grad_norm": 382.0, "learning_rate": 1.6728624535315982e-07, "logits/chosen": -0.67578125, "logits/rejected": -2.25, "logps/chosen": -2.578125, "logps/rejected": -0.76171875, "loss": 9.875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.875, "rewards/margins": -9.125, "rewards/rejected": -3.8125, "step": 25 }, { "epoch": 0.00968342644320298, "grad_norm": 366.0, "learning_rate": 1.7397769516728626e-07, "logits/chosen": -0.76171875, "logits/rejected": -1.921875, "logps/chosen": -2.4375, "logps/rejected": -0.6328125, "loss": 9.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.125, "rewards/margins": -9.0, "rewards/rejected": -3.171875, "step": 26 }, { "epoch": 0.01005586592178771, "grad_norm": 576.0, "learning_rate": 1.8066914498141264e-07, "logits/chosen": -0.59765625, "logits/rejected": -1.90625, "logps/chosen": -3.4375, "logps/rejected": -0.73828125, "loss": 14.3125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -17.25, "rewards/margins": -13.5, "rewards/rejected": -3.6875, "step": 27 }, { "epoch": 0.010428305400372439, "grad_norm": 312.0, "learning_rate": 1.8736059479553902e-07, "logits/chosen": -0.6640625, "logits/rejected": -2.296875, "logps/chosen": -2.28125, "logps/rejected": -0.75390625, "loss": 8.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.4375, "rewards/margins": -7.65625, "rewards/rejected": -3.78125, "step": 28 }, { "epoch": 0.01080074487895717, "grad_norm": 400.0, "learning_rate": 1.940520446096654e-07, "logits/chosen": -0.69921875, "logits/rejected": -2.359375, "logps/chosen": -2.71875, "logps/rejected": -0.609375, "loss": 11.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.5625, "rewards/margins": -10.5, "rewards/rejected": -3.046875, "step": 29 }, { "epoch": 0.0111731843575419, "grad_norm": 358.0, "learning_rate": 2.0074349442379182e-07, "logits/chosen": -0.63671875, "logits/rejected": -2.359375, "logps/chosen": -2.4375, "logps/rejected": -0.7578125, "loss": 9.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.125, "rewards/margins": -8.375, "rewards/rejected": -3.78125, "step": 30 }, { "epoch": 0.01154562383612663, "grad_norm": 368.0, "learning_rate": 2.074349442379182e-07, "logits/chosen": -0.81640625, "logits/rejected": -2.203125, "logps/chosen": -2.59375, "logps/rejected": -0.79296875, "loss": 9.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.0, "rewards/margins": -9.0, "rewards/rejected": -3.96875, "step": 31 }, { "epoch": 0.011918063314711359, "grad_norm": 322.0, "learning_rate": 2.1412639405204458e-07, "logits/chosen": -0.796875, "logits/rejected": -2.296875, "logps/chosen": -2.453125, "logps/rejected": -0.75, "loss": 9.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.25, "rewards/margins": -8.5, "rewards/rejected": -3.75, "step": 32 }, { "epoch": 0.012290502793296089, "grad_norm": 364.0, "learning_rate": 2.20817843866171e-07, "logits/chosen": -0.734375, "logits/rejected": -2.3125, "logps/chosen": -2.4375, "logps/rejected": -0.71484375, "loss": 9.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.25, "rewards/margins": -8.625, "rewards/rejected": -3.578125, "step": 33 }, { "epoch": 0.01266294227188082, "grad_norm": 398.0, "learning_rate": 2.2750929368029738e-07, "logits/chosen": -0.640625, "logits/rejected": -2.375, "logps/chosen": -2.640625, "logps/rejected": -0.734375, "loss": 10.3125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.25, "rewards/margins": -9.5, "rewards/rejected": -3.65625, "step": 34 }, { "epoch": 0.01303538175046555, "grad_norm": 276.0, "learning_rate": 2.3420074349442379e-07, "logits/chosen": -1.03125, "logits/rejected": -2.390625, "logps/chosen": -1.703125, "logps/rejected": -0.625, "loss": 6.1875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.5, "rewards/margins": -5.375, "rewards/rejected": -3.125, "step": 35 }, { "epoch": 0.013407821229050279, "grad_norm": 342.0, "learning_rate": 2.4089219330855017e-07, "logits/chosen": -0.72265625, "logits/rejected": -2.34375, "logps/chosen": -2.359375, "logps/rejected": -0.8046875, "loss": 8.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.8125, "rewards/margins": -7.78125, "rewards/rejected": -4.03125, "step": 36 }, { "epoch": 0.013780260707635009, "grad_norm": 372.0, "learning_rate": 2.4758364312267655e-07, "logits/chosen": -0.671875, "logits/rejected": -2.234375, "logps/chosen": -2.71875, "logps/rejected": -0.7734375, "loss": 10.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.5625, "rewards/margins": -9.6875, "rewards/rejected": -3.875, "step": 37 }, { "epoch": 0.01415270018621974, "grad_norm": 336.0, "learning_rate": 2.54275092936803e-07, "logits/chosen": -0.84375, "logits/rejected": -2.21875, "logps/chosen": -2.375, "logps/rejected": -0.75, "loss": 9.0, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.875, "rewards/margins": -8.125, "rewards/rejected": -3.75, "step": 38 }, { "epoch": 0.01452513966480447, "grad_norm": 344.0, "learning_rate": 2.6096654275092937e-07, "logits/chosen": -0.7890625, "logits/rejected": -1.90625, "logps/chosen": -2.5, "logps/rejected": -0.59375, "loss": 10.3125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.5, "rewards/margins": -9.5, "rewards/rejected": -2.96875, "step": 39 }, { "epoch": 0.0148975791433892, "grad_norm": 356.0, "learning_rate": 2.6765799256505576e-07, "logits/chosen": -0.53125, "logits/rejected": -2.328125, "logps/chosen": -2.40625, "logps/rejected": -0.74609375, "loss": 9.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.125, "rewards/margins": -8.375, "rewards/rejected": -3.71875, "step": 40 }, { "epoch": 0.015270018621973929, "grad_norm": 404.0, "learning_rate": 2.7434944237918214e-07, "logits/chosen": -0.74609375, "logits/rejected": -2.03125, "logps/chosen": -2.5, "logps/rejected": -0.75, "loss": 9.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.5, "rewards/margins": -8.75, "rewards/rejected": -3.75, "step": 41 }, { "epoch": 0.01564245810055866, "grad_norm": 412.0, "learning_rate": 2.810408921933085e-07, "logits/chosen": -0.81640625, "logits/rejected": -2.125, "logps/chosen": -2.734375, "logps/rejected": -0.74609375, "loss": 10.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.6875, "rewards/margins": -10.0, "rewards/rejected": -3.71875, "step": 42 }, { "epoch": 0.01601489757914339, "grad_norm": 414.0, "learning_rate": 2.877323420074349e-07, "logits/chosen": -0.61328125, "logits/rejected": -2.46875, "logps/chosen": -2.625, "logps/rejected": -0.71875, "loss": 10.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.125, "rewards/margins": -9.5, "rewards/rejected": -3.59375, "step": 43 }, { "epoch": 0.01638733705772812, "grad_norm": 348.0, "learning_rate": 2.9442379182156134e-07, "logits/chosen": -0.60546875, "logits/rejected": -2.078125, "logps/chosen": -2.53125, "logps/rejected": -0.75, "loss": 9.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.6875, "rewards/margins": -8.875, "rewards/rejected": -3.765625, "step": 44 }, { "epoch": 0.01675977653631285, "grad_norm": 304.0, "learning_rate": 3.011152416356877e-07, "logits/chosen": -0.6796875, "logits/rejected": -2.296875, "logps/chosen": -2.4375, "logps/rejected": -0.6796875, "loss": 9.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.125, "rewards/margins": -8.6875, "rewards/rejected": -3.40625, "step": 45 }, { "epoch": 0.01713221601489758, "grad_norm": 496.0, "learning_rate": 3.078066914498141e-07, "logits/chosen": -0.55859375, "logits/rejected": -2.28125, "logps/chosen": -2.984375, "logps/rejected": -0.81640625, "loss": 11.625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -14.9375, "rewards/margins": -10.875, "rewards/rejected": -4.0625, "step": 46 }, { "epoch": 0.017504655493482308, "grad_norm": 358.0, "learning_rate": 3.1449814126394055e-07, "logits/chosen": -0.75, "logits/rejected": -1.671875, "logps/chosen": -2.5625, "logps/rejected": -0.5625, "loss": 10.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.75, "rewards/margins": -10.0, "rewards/rejected": -2.8125, "step": 47 }, { "epoch": 0.017877094972067038, "grad_norm": 396.0, "learning_rate": 3.211895910780669e-07, "logits/chosen": -0.7734375, "logits/rejected": -1.609375, "logps/chosen": -2.6875, "logps/rejected": -0.58984375, "loss": 11.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.5, "rewards/margins": -10.5, "rewards/rejected": -2.9375, "step": 48 }, { "epoch": 0.01824953445065177, "grad_norm": 312.0, "learning_rate": 3.278810408921933e-07, "logits/chosen": -1.03125, "logits/rejected": -1.6640625, "logps/chosen": -2.5, "logps/rejected": -0.58984375, "loss": 10.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.5, "rewards/margins": -9.5625, "rewards/rejected": -2.9375, "step": 49 }, { "epoch": 0.0186219739292365, "grad_norm": 302.0, "learning_rate": 3.3457249070631964e-07, "logits/chosen": -0.82421875, "logits/rejected": -2.3125, "logps/chosen": -2.171875, "logps/rejected": -0.7265625, "loss": 8.0, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.875, "rewards/margins": -7.25, "rewards/rejected": -3.625, "step": 50 }, { "epoch": 0.01899441340782123, "grad_norm": 364.0, "learning_rate": 3.412639405204461e-07, "logits/chosen": -0.81640625, "logits/rejected": -2.28125, "logps/chosen": -2.875, "logps/rejected": -0.6953125, "loss": 11.625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -14.3125, "rewards/margins": -10.8125, "rewards/rejected": -3.484375, "step": 51 }, { "epoch": 0.01936685288640596, "grad_norm": 360.0, "learning_rate": 3.479553903345725e-07, "logits/chosen": -0.89453125, "logits/rejected": -2.265625, "logps/chosen": -2.53125, "logps/rejected": -0.6796875, "loss": 10.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.6875, "rewards/margins": -9.3125, "rewards/rejected": -3.375, "step": 52 }, { "epoch": 0.01973929236499069, "grad_norm": 346.0, "learning_rate": 3.5464684014869885e-07, "logits/chosen": -0.75, "logits/rejected": -1.9609375, "logps/chosen": -2.53125, "logps/rejected": -0.84765625, "loss": 9.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.625, "rewards/margins": -8.375, "rewards/rejected": -4.25, "step": 53 }, { "epoch": 0.02011173184357542, "grad_norm": 444.0, "learning_rate": 3.613382899628253e-07, "logits/chosen": -0.56640625, "logits/rejected": -2.296875, "logps/chosen": -2.53125, "logps/rejected": -0.8046875, "loss": 9.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.625, "rewards/margins": -8.5625, "rewards/rejected": -4.0, "step": 54 }, { "epoch": 0.020484171322160148, "grad_norm": 245.0, "learning_rate": 3.6802973977695166e-07, "logits/chosen": -0.94921875, "logits/rejected": -2.375, "logps/chosen": -1.9140625, "logps/rejected": -0.8203125, "loss": 6.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.5625, "rewards/margins": -5.4375, "rewards/rejected": -4.125, "step": 55 }, { "epoch": 0.020856610800744878, "grad_norm": 372.0, "learning_rate": 3.7472118959107805e-07, "logits/chosen": -1.0078125, "logits/rejected": -2.171875, "logps/chosen": -2.5, "logps/rejected": -0.69921875, "loss": 9.8125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.5, "rewards/margins": -9.0, "rewards/rejected": -3.5, "step": 56 }, { "epoch": 0.021229050279329607, "grad_norm": 368.0, "learning_rate": 3.8141263940520443e-07, "logits/chosen": -0.6953125, "logits/rejected": -1.7109375, "logps/chosen": -2.59375, "logps/rejected": -0.65625, "loss": 10.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.0, "rewards/margins": -9.75, "rewards/rejected": -3.28125, "step": 57 }, { "epoch": 0.02160148975791434, "grad_norm": 474.0, "learning_rate": 3.881040892193308e-07, "logits/chosen": -0.890625, "logits/rejected": -1.3046875, "logps/chosen": -3.1875, "logps/rejected": -0.43359375, "loss": 14.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -15.9375, "rewards/margins": -13.75, "rewards/rejected": -2.171875, "step": 58 }, { "epoch": 0.02197392923649907, "grad_norm": 240.0, "learning_rate": 3.9479553903345725e-07, "logits/chosen": -0.828125, "logits/rejected": -1.6015625, "logps/chosen": -2.28125, "logps/rejected": -1.6875, "loss": 5.5, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -11.4375, "rewards/margins": -3.0, "rewards/rejected": -8.4375, "step": 59 }, { "epoch": 0.0223463687150838, "grad_norm": 372.0, "learning_rate": 4.0148698884758363e-07, "logits/chosen": -0.76171875, "logits/rejected": -2.296875, "logps/chosen": -2.53125, "logps/rejected": -0.83203125, "loss": 9.3125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.6875, "rewards/margins": -8.5, "rewards/rejected": -4.15625, "step": 60 }, { "epoch": 0.02271880819366853, "grad_norm": 400.0, "learning_rate": 4.0817843866171e-07, "logits/chosen": -0.69921875, "logits/rejected": -1.359375, "logps/chosen": -2.828125, "logps/rejected": -0.6171875, "loss": 11.8125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -14.125, "rewards/margins": -11.0625, "rewards/rejected": -3.09375, "step": 61 }, { "epoch": 0.02309124767225326, "grad_norm": 368.0, "learning_rate": 4.148698884758364e-07, "logits/chosen": -0.6640625, "logits/rejected": -2.34375, "logps/chosen": -2.453125, "logps/rejected": -0.8125, "loss": 9.0, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.25, "rewards/margins": -8.25, "rewards/rejected": -4.0625, "step": 62 }, { "epoch": 0.02346368715083799, "grad_norm": 390.0, "learning_rate": 4.2156133828996284e-07, "logits/chosen": -0.7265625, "logits/rejected": -1.734375, "logps/chosen": -2.3125, "logps/rejected": -0.48828125, "loss": 9.9375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.625, "rewards/margins": -9.1875, "rewards/rejected": -2.453125, "step": 63 }, { "epoch": 0.023836126629422718, "grad_norm": 348.0, "learning_rate": 4.2825278810408917e-07, "logits/chosen": -0.7421875, "logits/rejected": -2.328125, "logps/chosen": -2.1875, "logps/rejected": -0.6953125, "loss": 8.1875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.875, "rewards/margins": -7.4375, "rewards/rejected": -3.46875, "step": 64 }, { "epoch": 0.024208566108007448, "grad_norm": 314.0, "learning_rate": 4.349442379182156e-07, "logits/chosen": -0.53515625, "logits/rejected": -2.265625, "logps/chosen": -2.15625, "logps/rejected": -0.67578125, "loss": 8.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.75, "rewards/margins": -7.375, "rewards/rejected": -3.375, "step": 65 }, { "epoch": 0.024581005586592177, "grad_norm": 370.0, "learning_rate": 4.41635687732342e-07, "logits/chosen": -0.62890625, "logits/rejected": -2.34375, "logps/chosen": -2.5, "logps/rejected": -0.7734375, "loss": 9.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.4375, "rewards/margins": -8.5625, "rewards/rejected": -3.875, "step": 66 }, { "epoch": 0.02495344506517691, "grad_norm": 294.0, "learning_rate": 4.4832713754646837e-07, "logits/chosen": -0.828125, "logits/rejected": -2.140625, "logps/chosen": -2.1875, "logps/rejected": -0.828125, "loss": 7.625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.0, "rewards/margins": -6.8125, "rewards/rejected": -4.125, "step": 67 }, { "epoch": 0.02532588454376164, "grad_norm": 340.0, "learning_rate": 4.5501858736059475e-07, "logits/chosen": -0.82421875, "logits/rejected": -2.25, "logps/chosen": -2.59375, "logps/rejected": -0.671875, "loss": 10.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.9375, "rewards/margins": -9.5625, "rewards/rejected": -3.34375, "step": 68 }, { "epoch": 0.02569832402234637, "grad_norm": 374.0, "learning_rate": 4.6171003717472114e-07, "logits/chosen": -0.859375, "logits/rejected": -2.140625, "logps/chosen": -2.390625, "logps/rejected": -0.6015625, "loss": 9.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.9375, "rewards/margins": -8.875, "rewards/rejected": -3.0, "step": 69 }, { "epoch": 0.0260707635009311, "grad_norm": 418.0, "learning_rate": 4.6840148698884757e-07, "logits/chosen": -0.7734375, "logits/rejected": -1.9140625, "logps/chosen": -2.4375, "logps/rejected": -0.6953125, "loss": 9.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.1875, "rewards/margins": -8.75, "rewards/rejected": -3.46875, "step": 70 }, { "epoch": 0.02644320297951583, "grad_norm": 322.0, "learning_rate": 4.75092936802974e-07, "logits/chosen": -1.015625, "logits/rejected": -2.09375, "logps/chosen": -2.15625, "logps/rejected": -0.6171875, "loss": 8.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.8125, "rewards/margins": -7.75, "rewards/rejected": -3.09375, "step": 71 }, { "epoch": 0.026815642458100558, "grad_norm": 396.0, "learning_rate": 4.817843866171003e-07, "logits/chosen": -0.65625, "logits/rejected": -1.6328125, "logps/chosen": -2.40625, "logps/rejected": -0.73828125, "loss": 9.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.0625, "rewards/margins": -8.375, "rewards/rejected": -3.6875, "step": 72 }, { "epoch": 0.027188081936685288, "grad_norm": 324.0, "learning_rate": 4.884758364312267e-07, "logits/chosen": -0.84375, "logits/rejected": -2.109375, "logps/chosen": -2.3125, "logps/rejected": -0.71875, "loss": 8.6875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.5, "rewards/margins": -7.9375, "rewards/rejected": -3.578125, "step": 73 }, { "epoch": 0.027560521415270017, "grad_norm": 362.0, "learning_rate": 4.951672862453531e-07, "logits/chosen": -0.70703125, "logits/rejected": -2.359375, "logps/chosen": -2.53125, "logps/rejected": -0.8671875, "loss": 9.0, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.625, "rewards/margins": -8.25, "rewards/rejected": -4.34375, "step": 74 }, { "epoch": 0.027932960893854747, "grad_norm": 444.0, "learning_rate": 5.018587360594795e-07, "logits/chosen": -0.703125, "logits/rejected": -1.53125, "logps/chosen": -2.25, "logps/rejected": -0.765625, "loss": 8.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.25, "rewards/margins": -7.4375, "rewards/rejected": -3.828125, "step": 75 }, { "epoch": 0.02830540037243948, "grad_norm": 316.0, "learning_rate": 5.08550185873606e-07, "logits/chosen": -0.9921875, "logits/rejected": -2.203125, "logps/chosen": -2.28125, "logps/rejected": -0.625, "loss": 9.0625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.375, "rewards/margins": -8.25, "rewards/rejected": -3.125, "step": 76 }, { "epoch": 0.02867783985102421, "grad_norm": 258.0, "learning_rate": 5.152416356877323e-07, "logits/chosen": -0.8359375, "logits/rejected": -2.140625, "logps/chosen": -1.921875, "logps/rejected": -0.71484375, "loss": 6.8125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.5625, "rewards/margins": -6.0, "rewards/rejected": -3.578125, "step": 77 }, { "epoch": 0.02905027932960894, "grad_norm": 336.0, "learning_rate": 5.219330855018587e-07, "logits/chosen": -0.7421875, "logits/rejected": -2.25, "logps/chosen": -2.21875, "logps/rejected": -0.6640625, "loss": 8.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.0625, "rewards/margins": -7.71875, "rewards/rejected": -3.3125, "step": 78 }, { "epoch": 0.02942271880819367, "grad_norm": 348.0, "learning_rate": 5.286245353159851e-07, "logits/chosen": -0.73046875, "logits/rejected": -2.203125, "logps/chosen": -2.46875, "logps/rejected": -0.7109375, "loss": 9.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.375, "rewards/margins": -8.75, "rewards/rejected": -3.5625, "step": 79 }, { "epoch": 0.0297951582867784, "grad_norm": 352.0, "learning_rate": 5.353159851301115e-07, "logits/chosen": -0.7109375, "logits/rejected": -2.34375, "logps/chosen": -2.421875, "logps/rejected": -0.80078125, "loss": 8.875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.125, "rewards/margins": -8.125, "rewards/rejected": -4.0, "step": 80 }, { "epoch": 0.030167597765363128, "grad_norm": 326.0, "learning_rate": 5.42007434944238e-07, "logits/chosen": -0.68359375, "logits/rejected": -2.421875, "logps/chosen": -2.46875, "logps/rejected": -0.7890625, "loss": 9.1875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.375, "rewards/margins": -8.4375, "rewards/rejected": -3.953125, "step": 81 }, { "epoch": 0.030540037243947857, "grad_norm": 354.0, "learning_rate": 5.486988847583643e-07, "logits/chosen": -0.8515625, "logits/rejected": -2.15625, "logps/chosen": -2.359375, "logps/rejected": -0.7109375, "loss": 9.0, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.75, "rewards/margins": -8.25, "rewards/rejected": -3.53125, "step": 82 }, { "epoch": 0.030912476722532587, "grad_norm": 380.0, "learning_rate": 5.553903345724907e-07, "logits/chosen": -0.69140625, "logits/rejected": -1.703125, "logps/chosen": -2.65625, "logps/rejected": -0.7109375, "loss": 10.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.3125, "rewards/margins": -9.75, "rewards/rejected": -3.546875, "step": 83 }, { "epoch": 0.03128491620111732, "grad_norm": 318.0, "learning_rate": 5.62081784386617e-07, "logits/chosen": -0.79296875, "logits/rejected": -2.34375, "logps/chosen": -2.296875, "logps/rejected": -0.76171875, "loss": 8.4375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.5, "rewards/margins": -7.6875, "rewards/rejected": -3.8125, "step": 84 }, { "epoch": 0.03165735567970205, "grad_norm": 408.0, "learning_rate": 5.687732342007435e-07, "logits/chosen": -0.84375, "logits/rejected": -1.8046875, "logps/chosen": -2.609375, "logps/rejected": -0.75390625, "loss": 10.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.0, "rewards/margins": -9.25, "rewards/rejected": -3.765625, "step": 85 }, { "epoch": 0.03202979515828678, "grad_norm": 336.0, "learning_rate": 5.754646840148698e-07, "logits/chosen": -0.6953125, "logits/rejected": -2.1875, "logps/chosen": -2.28125, "logps/rejected": -0.8125, "loss": 8.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.375, "rewards/margins": -7.3125, "rewards/rejected": -4.0625, "step": 86 }, { "epoch": 0.03240223463687151, "grad_norm": 342.0, "learning_rate": 5.821561338289962e-07, "logits/chosen": -0.84375, "logits/rejected": -1.640625, "logps/chosen": -2.40625, "logps/rejected": -0.80078125, "loss": 8.8125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.0625, "rewards/margins": -8.0625, "rewards/rejected": -4.0, "step": 87 }, { "epoch": 0.03277467411545624, "grad_norm": 356.0, "learning_rate": 5.888475836431227e-07, "logits/chosen": -0.55078125, "logits/rejected": -2.4375, "logps/chosen": -2.421875, "logps/rejected": -0.7578125, "loss": 9.0625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.125, "rewards/margins": -8.375, "rewards/rejected": -3.796875, "step": 88 }, { "epoch": 0.03314711359404097, "grad_norm": 334.0, "learning_rate": 5.95539033457249e-07, "logits/chosen": -0.6875, "logits/rejected": -2.28125, "logps/chosen": -2.484375, "logps/rejected": -0.80078125, "loss": 9.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.375, "rewards/margins": -8.375, "rewards/rejected": -4.0, "step": 89 }, { "epoch": 0.0335195530726257, "grad_norm": 366.0, "learning_rate": 6.022304832713755e-07, "logits/chosen": -0.90234375, "logits/rejected": -2.375, "logps/chosen": -2.15625, "logps/rejected": -0.46484375, "loss": 9.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.8125, "rewards/margins": -8.5, "rewards/rejected": -2.328125, "step": 90 }, { "epoch": 0.03389199255121043, "grad_norm": 388.0, "learning_rate": 6.089219330855018e-07, "logits/chosen": -0.91015625, "logits/rejected": -2.203125, "logps/chosen": -2.4375, "logps/rejected": -0.796875, "loss": 9.0, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.25, "rewards/margins": -8.25, "rewards/rejected": -3.96875, "step": 91 }, { "epoch": 0.03426443202979516, "grad_norm": 332.0, "learning_rate": 6.156133828996282e-07, "logits/chosen": -0.81640625, "logits/rejected": -1.46875, "logps/chosen": -2.59375, "logps/rejected": -0.66015625, "loss": 10.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -13.0, "rewards/margins": -9.75, "rewards/rejected": -3.3125, "step": 92 }, { "epoch": 0.034636871508379886, "grad_norm": 266.0, "learning_rate": 6.223048327137547e-07, "logits/chosen": -0.79296875, "logits/rejected": -2.25, "logps/chosen": -2.0625, "logps/rejected": -0.703125, "loss": 7.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.375, "rewards/margins": -6.8125, "rewards/rejected": -3.53125, "step": 93 }, { "epoch": 0.035009310986964616, "grad_norm": 392.0, "learning_rate": 6.289962825278811e-07, "logits/chosen": -0.66015625, "logits/rejected": -2.328125, "logps/chosen": -2.46875, "logps/rejected": -0.69140625, "loss": 9.6875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -12.375, "rewards/margins": -8.875, "rewards/rejected": -3.453125, "step": 94 }, { "epoch": 0.035381750465549346, "grad_norm": 342.0, "learning_rate": 6.356877323420074e-07, "logits/chosen": -0.734375, "logits/rejected": -2.453125, "logps/chosen": -2.171875, "logps/rejected": -0.71875, "loss": 8.0625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.875, "rewards/margins": -7.28125, "rewards/rejected": -3.59375, "step": 95 }, { "epoch": 0.035754189944134075, "grad_norm": 296.0, "learning_rate": 6.423791821561338e-07, "logits/chosen": -0.71875, "logits/rejected": -2.3125, "logps/chosen": -1.984375, "logps/rejected": -0.765625, "loss": 6.875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.875, "rewards/margins": -6.0625, "rewards/rejected": -3.828125, "step": 96 }, { "epoch": 0.036126629422718805, "grad_norm": 344.0, "learning_rate": 6.490706319702602e-07, "logits/chosen": -0.734375, "logits/rejected": -1.765625, "logps/chosen": -2.375, "logps/rejected": -0.640625, "loss": 9.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.875, "rewards/margins": -8.625, "rewards/rejected": -3.203125, "step": 97 }, { "epoch": 0.03649906890130354, "grad_norm": 354.0, "learning_rate": 6.557620817843866e-07, "logits/chosen": -0.58984375, "logits/rejected": -2.296875, "logps/chosen": -2.21875, "logps/rejected": -0.73046875, "loss": 8.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.0625, "rewards/margins": -7.40625, "rewards/rejected": -3.65625, "step": 98 }, { "epoch": 0.03687150837988827, "grad_norm": 296.0, "learning_rate": 6.624535315985131e-07, "logits/chosen": -0.5859375, "logits/rejected": -2.25, "logps/chosen": -1.875, "logps/rejected": -0.703125, "loss": 6.6875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.375, "rewards/margins": -5.875, "rewards/rejected": -3.515625, "step": 99 }, { "epoch": 0.037243947858473, "grad_norm": 231.0, "learning_rate": 6.691449814126393e-07, "logits/chosen": -1.0546875, "logits/rejected": -2.203125, "logps/chosen": -1.75, "logps/rejected": -0.66015625, "loss": 6.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.75, "rewards/margins": -5.4375, "rewards/rejected": -3.3125, "step": 100 }, { "epoch": 0.03761638733705773, "grad_norm": 302.0, "learning_rate": 6.758364312267657e-07, "logits/chosen": -0.828125, "logits/rejected": -2.03125, "logps/chosen": -2.09375, "logps/rejected": -0.546875, "loss": 8.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.5, "rewards/margins": -7.75, "rewards/rejected": -2.734375, "step": 101 }, { "epoch": 0.03798882681564246, "grad_norm": 274.0, "learning_rate": 6.825278810408922e-07, "logits/chosen": -0.9296875, "logits/rejected": -2.421875, "logps/chosen": -1.9375, "logps/rejected": -0.8359375, "loss": 6.2812, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.6875, "rewards/margins": -5.5, "rewards/rejected": -4.1875, "step": 102 }, { "epoch": 0.03836126629422719, "grad_norm": 304.0, "learning_rate": 6.892193308550186e-07, "logits/chosen": -0.54296875, "logits/rejected": -1.921875, "logps/chosen": -2.140625, "logps/rejected": -0.5859375, "loss": 8.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.6875, "rewards/margins": -7.75, "rewards/rejected": -2.9375, "step": 103 }, { "epoch": 0.03873370577281192, "grad_norm": 352.0, "learning_rate": 6.95910780669145e-07, "logits/chosen": -0.63671875, "logits/rejected": -2.28125, "logps/chosen": -2.1875, "logps/rejected": -0.796875, "loss": 7.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.9375, "rewards/margins": -6.9375, "rewards/rejected": -3.984375, "step": 104 }, { "epoch": 0.03910614525139665, "grad_norm": 352.0, "learning_rate": 7.026022304832714e-07, "logits/chosen": -0.6640625, "logits/rejected": -2.328125, "logps/chosen": -2.21875, "logps/rejected": -0.77734375, "loss": 8.0, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.125, "rewards/margins": -7.1875, "rewards/rejected": -3.890625, "step": 105 }, { "epoch": 0.03947858472998138, "grad_norm": 364.0, "learning_rate": 7.092936802973977e-07, "logits/chosen": -0.80859375, "logits/rejected": -1.6953125, "logps/chosen": -2.140625, "logps/rejected": -0.5859375, "loss": 8.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.6875, "rewards/margins": -7.75, "rewards/rejected": -2.9375, "step": 106 }, { "epoch": 0.03985102420856611, "grad_norm": 356.0, "learning_rate": 7.159851301115241e-07, "logits/chosen": -0.74609375, "logits/rejected": -2.21875, "logps/chosen": -2.203125, "logps/rejected": -0.60546875, "loss": 8.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.0, "rewards/margins": -8.0, "rewards/rejected": -3.03125, "step": 107 }, { "epoch": 0.04022346368715084, "grad_norm": 324.0, "learning_rate": 7.226765799256506e-07, "logits/chosen": -0.6015625, "logits/rejected": -2.265625, "logps/chosen": -1.890625, "logps/rejected": -0.7421875, "loss": 6.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.4375, "rewards/margins": -5.6875, "rewards/rejected": -3.71875, "step": 108 }, { "epoch": 0.04059590316573557, "grad_norm": 338.0, "learning_rate": 7.293680297397769e-07, "logits/chosen": -0.85546875, "logits/rejected": -2.203125, "logps/chosen": -1.9375, "logps/rejected": -0.6640625, "loss": 7.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.625, "rewards/margins": -6.3125, "rewards/rejected": -3.3125, "step": 109 }, { "epoch": 0.040968342644320296, "grad_norm": 242.0, "learning_rate": 7.360594795539033e-07, "logits/chosen": -0.86328125, "logits/rejected": -2.28125, "logps/chosen": -1.9375, "logps/rejected": -0.76953125, "loss": 6.625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.6875, "rewards/margins": -5.84375, "rewards/rejected": -3.84375, "step": 110 }, { "epoch": 0.041340782122905026, "grad_norm": 460.0, "learning_rate": 7.427509293680297e-07, "logits/chosen": -0.55859375, "logits/rejected": -2.3125, "logps/chosen": -2.328125, "logps/rejected": -0.77734375, "loss": 8.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.625, "rewards/margins": -7.75, "rewards/rejected": -3.890625, "step": 111 }, { "epoch": 0.041713221601489756, "grad_norm": 245.0, "learning_rate": 7.494423791821561e-07, "logits/chosen": -1.015625, "logits/rejected": -2.046875, "logps/chosen": -2.03125, "logps/rejected": -0.69140625, "loss": 7.4688, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.125, "rewards/margins": -6.6875, "rewards/rejected": -3.453125, "step": 112 }, { "epoch": 0.042085661080074485, "grad_norm": 316.0, "learning_rate": 7.561338289962825e-07, "logits/chosen": -0.86328125, "logits/rejected": -2.1875, "logps/chosen": -2.15625, "logps/rejected": -0.734375, "loss": 7.9375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.8125, "rewards/margins": -7.125, "rewards/rejected": -3.671875, "step": 113 }, { "epoch": 0.042458100558659215, "grad_norm": 306.0, "learning_rate": 7.628252788104089e-07, "logits/chosen": -0.78515625, "logits/rejected": -2.25, "logps/chosen": -2.15625, "logps/rejected": -0.82421875, "loss": 7.4062, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.75, "rewards/margins": -6.59375, "rewards/rejected": -4.125, "step": 114 }, { "epoch": 0.04283054003724395, "grad_norm": 322.0, "learning_rate": 7.695167286245353e-07, "logits/chosen": -0.8125, "logits/rejected": -2.15625, "logps/chosen": -1.984375, "logps/rejected": -0.8359375, "loss": 6.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.9375, "rewards/margins": -5.75, "rewards/rejected": -4.1875, "step": 115 }, { "epoch": 0.04320297951582868, "grad_norm": 300.0, "learning_rate": 7.762081784386616e-07, "logits/chosen": -0.6171875, "logits/rejected": -1.6796875, "logps/chosen": -2.0, "logps/rejected": -0.6328125, "loss": 7.6562, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.0, "rewards/margins": -6.84375, "rewards/rejected": -3.15625, "step": 116 }, { "epoch": 0.04357541899441341, "grad_norm": 372.0, "learning_rate": 7.828996282527881e-07, "logits/chosen": -0.68359375, "logits/rejected": -2.1875, "logps/chosen": -1.921875, "logps/rejected": -0.625, "loss": 7.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.625, "rewards/margins": -6.4375, "rewards/rejected": -3.140625, "step": 117 }, { "epoch": 0.04394785847299814, "grad_norm": 280.0, "learning_rate": 7.895910780669145e-07, "logits/chosen": -0.76953125, "logits/rejected": -2.265625, "logps/chosen": -1.9375, "logps/rejected": -0.8359375, "loss": 6.3125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.75, "rewards/margins": -5.53125, "rewards/rejected": -4.1875, "step": 118 }, { "epoch": 0.04432029795158287, "grad_norm": 310.0, "learning_rate": 7.962825278810408e-07, "logits/chosen": -0.75390625, "logits/rejected": -2.03125, "logps/chosen": -2.0, "logps/rejected": -0.703125, "loss": 7.3125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.0, "rewards/margins": -6.5, "rewards/rejected": -3.5, "step": 119 }, { "epoch": 0.0446927374301676, "grad_norm": 244.0, "learning_rate": 8.029739776951673e-07, "logits/chosen": -0.765625, "logits/rejected": -1.8203125, "logps/chosen": -1.515625, "logps/rejected": -0.640625, "loss": 5.1562, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.5625, "rewards/margins": -4.375, "rewards/rejected": -3.21875, "step": 120 }, { "epoch": 0.04506517690875233, "grad_norm": 362.0, "learning_rate": 8.096654275092937e-07, "logits/chosen": -0.828125, "logits/rejected": -2.28125, "logps/chosen": -2.21875, "logps/rejected": -0.71484375, "loss": 8.3125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.0625, "rewards/margins": -7.5, "rewards/rejected": -3.578125, "step": 121 }, { "epoch": 0.04543761638733706, "grad_norm": 320.0, "learning_rate": 8.1635687732342e-07, "logits/chosen": -0.8046875, "logits/rejected": -2.265625, "logps/chosen": -1.8203125, "logps/rejected": -0.6796875, "loss": 6.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.125, "rewards/margins": -5.71875, "rewards/rejected": -3.390625, "step": 122 }, { "epoch": 0.04581005586592179, "grad_norm": 332.0, "learning_rate": 8.230483271375464e-07, "logits/chosen": -0.61328125, "logits/rejected": -2.4375, "logps/chosen": -2.3125, "logps/rejected": -0.671875, "loss": 9.0, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.5625, "rewards/margins": -8.25, "rewards/rejected": -3.34375, "step": 123 }, { "epoch": 0.04618249534450652, "grad_norm": 434.0, "learning_rate": 8.297397769516728e-07, "logits/chosen": -0.62890625, "logits/rejected": -1.921875, "logps/chosen": -1.953125, "logps/rejected": -0.7734375, "loss": 6.6875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.75, "rewards/margins": -5.90625, "rewards/rejected": -3.875, "step": 124 }, { "epoch": 0.04655493482309125, "grad_norm": 316.0, "learning_rate": 8.364312267657992e-07, "logits/chosen": -0.80859375, "logits/rejected": -2.25, "logps/chosen": -1.875, "logps/rejected": -0.62109375, "loss": 7.0312, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.375, "rewards/margins": -6.25, "rewards/rejected": -3.09375, "step": 125 }, { "epoch": 0.04692737430167598, "grad_norm": 282.0, "learning_rate": 8.431226765799257e-07, "logits/chosen": -0.8125, "logits/rejected": -2.328125, "logps/chosen": -1.71875, "logps/rejected": -0.703125, "loss": 5.875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.625, "rewards/margins": -5.0625, "rewards/rejected": -3.5, "step": 126 }, { "epoch": 0.047299813780260706, "grad_norm": 278.0, "learning_rate": 8.49814126394052e-07, "logits/chosen": -0.82421875, "logits/rejected": -2.140625, "logps/chosen": -1.84375, "logps/rejected": -0.7421875, "loss": 6.3125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.1875, "rewards/margins": -5.5, "rewards/rejected": -3.703125, "step": 127 }, { "epoch": 0.047672253258845436, "grad_norm": 322.0, "learning_rate": 8.565055762081783e-07, "logits/chosen": -0.62109375, "logits/rejected": -2.25, "logps/chosen": -1.984375, "logps/rejected": -0.765625, "loss": 6.9062, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.9375, "rewards/margins": -6.125, "rewards/rejected": -3.8125, "step": 128 }, { "epoch": 0.048044692737430165, "grad_norm": 228.0, "learning_rate": 8.631970260223048e-07, "logits/chosen": -0.8828125, "logits/rejected": -1.65625, "logps/chosen": -1.59375, "logps/rejected": -0.578125, "loss": 5.875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.96875, "rewards/margins": -5.09375, "rewards/rejected": -2.875, "step": 129 }, { "epoch": 0.048417132216014895, "grad_norm": 300.0, "learning_rate": 8.698884758364312e-07, "logits/chosen": -0.7109375, "logits/rejected": -2.40625, "logps/chosen": -1.75, "logps/rejected": -0.73828125, "loss": 5.8438, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.75, "rewards/margins": -5.0625, "rewards/rejected": -3.6875, "step": 130 }, { "epoch": 0.048789571694599625, "grad_norm": 320.0, "learning_rate": 8.765799256505576e-07, "logits/chosen": -0.8046875, "logits/rejected": -2.21875, "logps/chosen": -1.796875, "logps/rejected": -0.7421875, "loss": 6.0625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.9375, "rewards/margins": -5.25, "rewards/rejected": -3.71875, "step": 131 }, { "epoch": 0.049162011173184354, "grad_norm": 274.0, "learning_rate": 8.83271375464684e-07, "logits/chosen": -0.609375, "logits/rejected": -2.296875, "logps/chosen": -1.6875, "logps/rejected": -0.69921875, "loss": 5.7188, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.4375, "rewards/margins": -4.9375, "rewards/rejected": -3.5, "step": 132 }, { "epoch": 0.04953445065176909, "grad_norm": 274.0, "learning_rate": 8.899628252788103e-07, "logits/chosen": -0.8203125, "logits/rejected": -2.28125, "logps/chosen": -1.8203125, "logps/rejected": -0.7578125, "loss": 6.0938, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.125, "rewards/margins": -5.3125, "rewards/rejected": -3.796875, "step": 133 }, { "epoch": 0.04990689013035382, "grad_norm": 348.0, "learning_rate": 8.966542750929367e-07, "logits/chosen": -0.75390625, "logits/rejected": -2.390625, "logps/chosen": -1.890625, "logps/rejected": -0.8359375, "loss": 6.0312, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.4375, "rewards/margins": -5.25, "rewards/rejected": -4.1875, "step": 134 }, { "epoch": 0.05027932960893855, "grad_norm": 300.0, "learning_rate": 9.033457249070631e-07, "logits/chosen": -0.6484375, "logits/rejected": -2.40625, "logps/chosen": -2.15625, "logps/rejected": -0.74609375, "loss": 7.8438, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.8125, "rewards/margins": -7.09375, "rewards/rejected": -3.734375, "step": 135 }, { "epoch": 0.05065176908752328, "grad_norm": 350.0, "learning_rate": 9.100371747211895e-07, "logits/chosen": -0.76171875, "logits/rejected": -2.375, "logps/chosen": -1.90625, "logps/rejected": -0.703125, "loss": 6.8125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.5625, "rewards/margins": -6.0625, "rewards/rejected": -3.5, "step": 136 }, { "epoch": 0.05102420856610801, "grad_norm": 330.0, "learning_rate": 9.167286245353159e-07, "logits/chosen": -0.66796875, "logits/rejected": -2.5, "logps/chosen": -2.0, "logps/rejected": -0.8515625, "loss": 6.5312, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.0, "rewards/margins": -5.75, "rewards/rejected": -4.25, "step": 137 }, { "epoch": 0.05139664804469274, "grad_norm": 320.0, "learning_rate": 9.234200743494423e-07, "logits/chosen": -0.734375, "logits/rejected": -2.390625, "logps/chosen": -1.703125, "logps/rejected": -0.8203125, "loss": 5.1875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.5, "rewards/margins": -4.40625, "rewards/rejected": -4.09375, "step": 138 }, { "epoch": 0.05176908752327747, "grad_norm": 302.0, "learning_rate": 9.301115241635687e-07, "logits/chosen": -0.6640625, "logits/rejected": -2.375, "logps/chosen": -1.90625, "logps/rejected": -0.75, "loss": 6.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.5625, "rewards/margins": -5.8125, "rewards/rejected": -3.75, "step": 139 }, { "epoch": 0.0521415270018622, "grad_norm": 350.0, "learning_rate": 9.368029739776951e-07, "logits/chosen": -0.58984375, "logits/rejected": -2.328125, "logps/chosen": -1.9453125, "logps/rejected": -0.7421875, "loss": 6.8125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.75, "rewards/margins": -6.03125, "rewards/rejected": -3.6875, "step": 140 }, { "epoch": 0.05251396648044693, "grad_norm": 276.0, "learning_rate": 9.434944237918216e-07, "logits/chosen": -0.58203125, "logits/rejected": -1.828125, "logps/chosen": -1.6640625, "logps/rejected": -0.66796875, "loss": 5.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.3125, "rewards/margins": -5.0, "rewards/rejected": -3.34375, "step": 141 }, { "epoch": 0.05288640595903166, "grad_norm": 338.0, "learning_rate": 9.50185873605948e-07, "logits/chosen": -0.62109375, "logits/rejected": -2.296875, "logps/chosen": -1.71875, "logps/rejected": -0.8046875, "loss": 5.3125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.5625, "rewards/margins": -4.5, "rewards/rejected": -4.03125, "step": 142 }, { "epoch": 0.05325884543761639, "grad_norm": 314.0, "learning_rate": 9.568773234200745e-07, "logits/chosen": -0.6484375, "logits/rejected": -2.21875, "logps/chosen": -1.765625, "logps/rejected": -0.8125, "loss": 5.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.875, "rewards/margins": -4.78125, "rewards/rejected": -4.0625, "step": 143 }, { "epoch": 0.053631284916201116, "grad_norm": 221.0, "learning_rate": 9.635687732342007e-07, "logits/chosen": -0.828125, "logits/rejected": -2.234375, "logps/chosen": -1.828125, "logps/rejected": -0.87890625, "loss": 5.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.125, "rewards/margins": -4.75, "rewards/rejected": -4.375, "step": 144 }, { "epoch": 0.054003724394785846, "grad_norm": 332.0, "learning_rate": 9.702602230483271e-07, "logits/chosen": -0.65234375, "logits/rejected": -2.25, "logps/chosen": -2.203125, "logps/rejected": -0.79296875, "loss": 7.8438, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -11.0, "rewards/margins": -7.03125, "rewards/rejected": -3.96875, "step": 145 }, { "epoch": 0.054376163873370575, "grad_norm": 272.0, "learning_rate": 9.769516728624533e-07, "logits/chosen": -0.70703125, "logits/rejected": -2.3125, "logps/chosen": -1.640625, "logps/rejected": -0.7578125, "loss": 5.2188, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.25, "rewards/margins": -4.4375, "rewards/rejected": -3.78125, "step": 146 }, { "epoch": 0.054748603351955305, "grad_norm": 308.0, "learning_rate": 9.836431226765798e-07, "logits/chosen": -0.6953125, "logits/rejected": -1.6953125, "logps/chosen": -1.6796875, "logps/rejected": -0.64453125, "loss": 6.0, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.375, "rewards/margins": -5.1875, "rewards/rejected": -3.21875, "step": 147 }, { "epoch": 0.055121042830540035, "grad_norm": 288.0, "learning_rate": 9.903345724907062e-07, "logits/chosen": -0.54296875, "logits/rejected": -2.25, "logps/chosen": -1.828125, "logps/rejected": -0.74609375, "loss": 6.2188, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.125, "rewards/margins": -5.4375, "rewards/rejected": -3.71875, "step": 148 }, { "epoch": 0.055493482309124764, "grad_norm": 286.0, "learning_rate": 9.970260223048326e-07, "logits/chosen": -0.6796875, "logits/rejected": -2.375, "logps/chosen": -1.734375, "logps/rejected": -0.75, "loss": 5.6875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.6875, "rewards/margins": -4.9375, "rewards/rejected": -3.75, "step": 149 }, { "epoch": 0.055865921787709494, "grad_norm": 236.0, "learning_rate": 1.003717472118959e-06, "logits/chosen": -0.81640625, "logits/rejected": -2.375, "logps/chosen": -1.6875, "logps/rejected": -0.75, "loss": 5.4688, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.4375, "rewards/margins": -4.6875, "rewards/rejected": -3.75, "step": 150 }, { "epoch": 0.05623836126629423, "grad_norm": 262.0, "learning_rate": 1.0104089219330855e-06, "logits/chosen": -0.71484375, "logits/rejected": -2.28125, "logps/chosen": -1.515625, "logps/rejected": -0.78515625, "loss": 4.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.59375, "rewards/margins": -3.671875, "rewards/rejected": -3.921875, "step": 151 }, { "epoch": 0.05661080074487896, "grad_norm": 288.0, "learning_rate": 1.017100371747212e-06, "logits/chosen": -0.8046875, "logits/rejected": -2.125, "logps/chosen": -1.8046875, "logps/rejected": -0.6484375, "loss": 6.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.0, "rewards/margins": -5.75, "rewards/rejected": -3.25, "step": 152 }, { "epoch": 0.05698324022346369, "grad_norm": 374.0, "learning_rate": 1.0237918215613384e-06, "logits/chosen": -0.81640625, "logits/rejected": -2.375, "logps/chosen": -2.0625, "logps/rejected": -0.78125, "loss": 7.2188, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -10.375, "rewards/margins": -6.4375, "rewards/rejected": -3.90625, "step": 153 }, { "epoch": 0.05735567970204842, "grad_norm": 288.0, "learning_rate": 1.0304832713754646e-06, "logits/chosen": -0.58984375, "logits/rejected": -2.34375, "logps/chosen": -1.515625, "logps/rejected": -0.73828125, "loss": 4.6562, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.5625, "rewards/margins": -3.875, "rewards/rejected": -3.6875, "step": 154 }, { "epoch": 0.05772811918063315, "grad_norm": 268.0, "learning_rate": 1.037174721189591e-06, "logits/chosen": -0.73046875, "logits/rejected": -2.25, "logps/chosen": -1.6484375, "logps/rejected": -0.796875, "loss": 5.0625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.25, "rewards/margins": -4.28125, "rewards/rejected": -3.96875, "step": 155 }, { "epoch": 0.05810055865921788, "grad_norm": 304.0, "learning_rate": 1.0438661710037175e-06, "logits/chosen": -0.5234375, "logits/rejected": -2.28125, "logps/chosen": -1.375, "logps/rejected": -0.78125, "loss": 3.8125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.875, "rewards/margins": -3.0, "rewards/rejected": -3.90625, "step": 156 }, { "epoch": 0.05847299813780261, "grad_norm": 274.0, "learning_rate": 1.0505576208178437e-06, "logits/chosen": -0.5078125, "logits/rejected": -1.71875, "logps/chosen": -1.609375, "logps/rejected": -0.494140625, "loss": 6.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.0625, "rewards/margins": -5.59375, "rewards/rejected": -2.46875, "step": 157 }, { "epoch": 0.05884543761638734, "grad_norm": 308.0, "learning_rate": 1.0572490706319702e-06, "logits/chosen": -0.953125, "logits/rejected": -1.5703125, "logps/chosen": -1.953125, "logps/rejected": -0.52734375, "loss": 7.9062, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.75, "rewards/margins": -7.15625, "rewards/rejected": -2.640625, "step": 158 }, { "epoch": 0.05921787709497207, "grad_norm": 286.0, "learning_rate": 1.0639405204460966e-06, "logits/chosen": -0.7109375, "logits/rejected": -2.34375, "logps/chosen": -1.765625, "logps/rejected": -0.61328125, "loss": 6.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.875, "rewards/margins": -5.78125, "rewards/rejected": -3.0625, "step": 159 }, { "epoch": 0.0595903165735568, "grad_norm": 251.0, "learning_rate": 1.070631970260223e-06, "logits/chosen": -0.6328125, "logits/rejected": -2.21875, "logps/chosen": -1.53125, "logps/rejected": -0.71875, "loss": 4.875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.6875, "rewards/margins": -4.125, "rewards/rejected": -3.59375, "step": 160 }, { "epoch": 0.059962756052141526, "grad_norm": 284.0, "learning_rate": 1.0773234200743495e-06, "logits/chosen": -0.8046875, "logits/rejected": -2.15625, "logps/chosen": -1.59375, "logps/rejected": -0.63671875, "loss": 5.625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.0, "rewards/margins": -4.8125, "rewards/rejected": -3.1875, "step": 161 }, { "epoch": 0.060335195530726256, "grad_norm": 230.0, "learning_rate": 1.084014869888476e-06, "logits/chosen": -0.796875, "logits/rejected": -2.234375, "logps/chosen": -1.46875, "logps/rejected": -0.625, "loss": 5.0312, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.34375, "rewards/margins": -4.21875, "rewards/rejected": -3.125, "step": 162 }, { "epoch": 0.060707635009310985, "grad_norm": 250.0, "learning_rate": 1.0907063197026021e-06, "logits/chosen": -0.671875, "logits/rejected": -2.328125, "logps/chosen": -1.3359375, "logps/rejected": -0.7265625, "loss": 3.8594, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.6875, "rewards/margins": -3.0625, "rewards/rejected": -3.625, "step": 163 }, { "epoch": 0.061080074487895715, "grad_norm": 245.0, "learning_rate": 1.0973977695167286e-06, "logits/chosen": -0.66796875, "logits/rejected": -2.34375, "logps/chosen": -1.421875, "logps/rejected": -0.6875, "loss": 4.4688, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.0625, "rewards/margins": -3.65625, "rewards/rejected": -3.4375, "step": 164 }, { "epoch": 0.061452513966480445, "grad_norm": 328.0, "learning_rate": 1.104089219330855e-06, "logits/chosen": -0.68359375, "logits/rejected": -1.890625, "logps/chosen": -1.78125, "logps/rejected": -0.7421875, "loss": 5.9375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.875, "rewards/margins": -5.1875, "rewards/rejected": -3.71875, "step": 165 }, { "epoch": 0.061824953445065174, "grad_norm": 264.0, "learning_rate": 1.1107806691449814e-06, "logits/chosen": -0.68359375, "logits/rejected": -1.640625, "logps/chosen": -1.4296875, "logps/rejected": -0.609375, "loss": 4.9375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.125, "rewards/margins": -4.09375, "rewards/rejected": -3.03125, "step": 166 }, { "epoch": 0.062197392923649904, "grad_norm": 360.0, "learning_rate": 1.1174721189591077e-06, "logits/chosen": -0.60546875, "logits/rejected": -2.453125, "logps/chosen": -1.4453125, "logps/rejected": -0.80078125, "loss": 4.0312, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.25, "rewards/margins": -3.21875, "rewards/rejected": -4.0, "step": 167 }, { "epoch": 0.06256983240223464, "grad_norm": 302.0, "learning_rate": 1.124163568773234e-06, "logits/chosen": -0.91015625, "logits/rejected": -1.8984375, "logps/chosen": -1.71875, "logps/rejected": -0.8046875, "loss": 5.4062, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.625, "rewards/margins": -4.625, "rewards/rejected": -4.0, "step": 168 }, { "epoch": 0.06294227188081937, "grad_norm": 245.0, "learning_rate": 1.1308550185873605e-06, "logits/chosen": -0.953125, "logits/rejected": -2.296875, "logps/chosen": -1.3515625, "logps/rejected": -0.609375, "loss": 4.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.75, "rewards/margins": -3.71875, "rewards/rejected": -3.03125, "step": 169 }, { "epoch": 0.0633147113594041, "grad_norm": 243.0, "learning_rate": 1.137546468401487e-06, "logits/chosen": -0.5546875, "logits/rejected": -2.203125, "logps/chosen": -1.4453125, "logps/rejected": -0.8203125, "loss": 3.9531, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.21875, "rewards/margins": -3.125, "rewards/rejected": -4.09375, "step": 170 }, { "epoch": 0.06368715083798883, "grad_norm": 272.0, "learning_rate": 1.1442379182156134e-06, "logits/chosen": -0.7421875, "logits/rejected": -2.328125, "logps/chosen": -1.390625, "logps/rejected": -0.6953125, "loss": 4.3125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.96875, "rewards/margins": -3.5, "rewards/rejected": -3.46875, "step": 171 }, { "epoch": 0.06405959031657356, "grad_norm": 245.0, "learning_rate": 1.1509293680297396e-06, "logits/chosen": -0.61328125, "logits/rejected": -2.328125, "logps/chosen": -1.25, "logps/rejected": -0.7109375, "loss": 3.5, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.25, "rewards/margins": -2.65625, "rewards/rejected": -3.5625, "step": 172 }, { "epoch": 0.06443202979515829, "grad_norm": 316.0, "learning_rate": 1.157620817843866e-06, "logits/chosen": -0.71484375, "logits/rejected": -2.390625, "logps/chosen": -1.734375, "logps/rejected": -0.640625, "loss": 6.1875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -8.625, "rewards/margins": -5.4375, "rewards/rejected": -3.21875, "step": 173 }, { "epoch": 0.06480446927374302, "grad_norm": 249.0, "learning_rate": 1.1643122676579925e-06, "logits/chosen": -0.69140625, "logits/rejected": -2.3125, "logps/chosen": -1.5625, "logps/rejected": -0.77734375, "loss": 4.75, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.8125, "rewards/margins": -3.9375, "rewards/rejected": -3.890625, "step": 174 }, { "epoch": 0.06517690875232775, "grad_norm": 272.0, "learning_rate": 1.171003717472119e-06, "logits/chosen": -0.8203125, "logits/rejected": -1.6328125, "logps/chosen": -1.3046875, "logps/rejected": -0.55859375, "loss": 4.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.53125, "rewards/margins": -3.734375, "rewards/rejected": -2.78125, "step": 175 }, { "epoch": 0.06554934823091248, "grad_norm": 240.0, "learning_rate": 1.1776951672862454e-06, "logits/chosen": -0.76171875, "logits/rejected": -2.265625, "logps/chosen": -1.40625, "logps/rejected": -0.66015625, "loss": 4.5312, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.0, "rewards/margins": -3.6875, "rewards/rejected": -3.3125, "step": 176 }, { "epoch": 0.0659217877094972, "grad_norm": 294.0, "learning_rate": 1.1843866171003718e-06, "logits/chosen": -0.6875, "logits/rejected": -2.328125, "logps/chosen": -1.890625, "logps/rejected": -0.828125, "loss": 6.0938, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -9.5, "rewards/margins": -5.3125, "rewards/rejected": -4.15625, "step": 177 }, { "epoch": 0.06629422718808194, "grad_norm": 252.0, "learning_rate": 1.191078066914498e-06, "logits/chosen": -0.427734375, "logits/rejected": -2.265625, "logps/chosen": -1.4609375, "logps/rejected": -0.8515625, "loss": 3.875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.3125, "rewards/margins": -3.046875, "rewards/rejected": -4.25, "step": 178 }, { "epoch": 0.06666666666666667, "grad_norm": 310.0, "learning_rate": 1.1977695167286245e-06, "logits/chosen": -0.7578125, "logits/rejected": -2.25, "logps/chosen": -1.15625, "logps/rejected": -0.69140625, "loss": 3.1562, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -5.75, "rewards/margins": -2.3125, "rewards/rejected": -3.453125, "step": 179 }, { "epoch": 0.0670391061452514, "grad_norm": 255.0, "learning_rate": 1.204460966542751e-06, "logits/chosen": -0.5625, "logits/rejected": -2.265625, "logps/chosen": -1.375, "logps/rejected": -0.69140625, "loss": 4.25, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.875, "rewards/margins": -3.4375, "rewards/rejected": -3.453125, "step": 180 }, { "epoch": 0.06741154562383612, "grad_norm": 254.0, "learning_rate": 1.2111524163568771e-06, "logits/chosen": -0.69140625, "logits/rejected": -2.265625, "logps/chosen": -1.40625, "logps/rejected": -0.7890625, "loss": 3.875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.0, "rewards/margins": -3.03125, "rewards/rejected": -3.96875, "step": 181 }, { "epoch": 0.06778398510242085, "grad_norm": 212.0, "learning_rate": 1.2178438661710036e-06, "logits/chosen": -0.6796875, "logits/rejected": -2.296875, "logps/chosen": -1.2109375, "logps/rejected": -0.8125, "loss": 2.875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.0625, "rewards/margins": -1.984375, "rewards/rejected": -4.0625, "step": 182 }, { "epoch": 0.06815642458100558, "grad_norm": 231.0, "learning_rate": 1.22453531598513e-06, "logits/chosen": -0.6328125, "logits/rejected": -2.453125, "logps/chosen": -1.34375, "logps/rejected": -0.703125, "loss": 4.0312, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.75, "rewards/margins": -3.234375, "rewards/rejected": -3.515625, "step": 183 }, { "epoch": 0.06852886405959031, "grad_norm": 268.0, "learning_rate": 1.2312267657992564e-06, "logits/chosen": -0.65234375, "logits/rejected": -1.8125, "logps/chosen": -1.3125, "logps/rejected": -0.671875, "loss": 4.0625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.59375, "rewards/margins": -3.234375, "rewards/rejected": -3.359375, "step": 184 }, { "epoch": 0.06890130353817504, "grad_norm": 256.0, "learning_rate": 1.2379182156133829e-06, "logits/chosen": -0.8046875, "logits/rejected": -2.25, "logps/chosen": -1.53125, "logps/rejected": -0.8203125, "loss": 4.375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.625, "rewards/margins": -3.546875, "rewards/rejected": -4.09375, "step": 185 }, { "epoch": 0.06927374301675977, "grad_norm": 227.0, "learning_rate": 1.2446096654275093e-06, "logits/chosen": -0.8046875, "logits/rejected": -1.8984375, "logps/chosen": -1.296875, "logps/rejected": -0.7265625, "loss": 3.6719, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.5, "rewards/margins": -2.84375, "rewards/rejected": -3.625, "step": 186 }, { "epoch": 0.0696461824953445, "grad_norm": 158.0, "learning_rate": 1.2513011152416357e-06, "logits/chosen": -0.74609375, "logits/rejected": -1.9921875, "logps/chosen": -0.953125, "logps/rejected": -0.703125, "loss": 2.1562, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -4.75, "rewards/margins": -1.2265625, "rewards/rejected": -3.53125, "step": 187 }, { "epoch": 0.07001862197392923, "grad_norm": 237.0, "learning_rate": 1.2579925650557622e-06, "logits/chosen": -0.609375, "logits/rejected": -2.140625, "logps/chosen": -1.125, "logps/rejected": -0.79296875, "loss": 2.5625, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -5.625, "rewards/margins": -1.6640625, "rewards/rejected": -3.96875, "step": 188 }, { "epoch": 0.07039106145251396, "grad_norm": 292.0, "learning_rate": 1.2646840148698884e-06, "logits/chosen": -0.68359375, "logits/rejected": -2.3125, "logps/chosen": -1.34375, "logps/rejected": -0.7734375, "loss": 3.6875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.75, "rewards/margins": -2.875, "rewards/rejected": -3.859375, "step": 189 }, { "epoch": 0.07076350093109869, "grad_norm": 211.0, "learning_rate": 1.2713754646840148e-06, "logits/chosen": -0.68359375, "logits/rejected": -2.3125, "logps/chosen": -1.203125, "logps/rejected": -0.71484375, "loss": 3.2969, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.03125, "rewards/margins": -2.46875, "rewards/rejected": -3.5625, "step": 190 }, { "epoch": 0.07113594040968342, "grad_norm": 237.0, "learning_rate": 1.278066914498141e-06, "logits/chosen": -0.71875, "logits/rejected": -1.9296875, "logps/chosen": -1.328125, "logps/rejected": -0.75390625, "loss": 3.7188, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.625, "rewards/margins": -2.875, "rewards/rejected": -3.78125, "step": 191 }, { "epoch": 0.07150837988826815, "grad_norm": 246.0, "learning_rate": 1.2847583643122675e-06, "logits/chosen": -0.6875, "logits/rejected": -2.296875, "logps/chosen": -0.953125, "logps/rejected": -0.765625, "loss": 1.9062, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -4.75, "rewards/margins": -0.9375, "rewards/rejected": -3.828125, "step": 192 }, { "epoch": 0.07188081936685288, "grad_norm": 262.0, "learning_rate": 1.291449814126394e-06, "logits/chosen": -0.55078125, "logits/rejected": -2.3125, "logps/chosen": -1.203125, "logps/rejected": -0.8984375, "loss": 2.4375, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.0, "rewards/margins": -1.515625, "rewards/rejected": -4.5, "step": 193 }, { "epoch": 0.07225325884543761, "grad_norm": 230.0, "learning_rate": 1.2981412639405204e-06, "logits/chosen": -0.7578125, "logits/rejected": -2.34375, "logps/chosen": -1.1875, "logps/rejected": -0.76171875, "loss": 3.0, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -5.9375, "rewards/margins": -2.140625, "rewards/rejected": -3.8125, "step": 194 }, { "epoch": 0.07262569832402235, "grad_norm": 213.0, "learning_rate": 1.3048327137546468e-06, "logits/chosen": -0.5703125, "logits/rejected": -2.125, "logps/chosen": -1.1015625, "logps/rejected": -0.734375, "loss": 2.6875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -5.5, "rewards/margins": -1.8359375, "rewards/rejected": -3.6875, "step": 195 }, { "epoch": 0.07299813780260708, "grad_norm": 139.0, "learning_rate": 1.3115241635687732e-06, "logits/chosen": -0.66796875, "logits/rejected": -2.296875, "logps/chosen": -0.7890625, "logps/rejected": -0.765625, "loss": 1.2891, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.953125, "rewards/margins": -0.1171875, "rewards/rejected": -3.84375, "step": 196 }, { "epoch": 0.07337057728119181, "grad_norm": 182.0, "learning_rate": 1.3182156133828997e-06, "logits/chosen": -0.55859375, "logits/rejected": -2.109375, "logps/chosen": -0.96875, "logps/rejected": -0.7421875, "loss": 2.1719, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.84375, "rewards/margins": -1.140625, "rewards/rejected": -3.71875, "step": 197 }, { "epoch": 0.07374301675977654, "grad_norm": 186.0, "learning_rate": 1.3249070631970261e-06, "logits/chosen": -0.50390625, "logits/rejected": -2.34375, "logps/chosen": -0.93359375, "logps/rejected": -0.765625, "loss": 1.875, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.6875, "rewards/margins": -0.84375, "rewards/rejected": -3.828125, "step": 198 }, { "epoch": 0.07411545623836127, "grad_norm": 242.0, "learning_rate": 1.3315985130111523e-06, "logits/chosen": -0.546875, "logits/rejected": -2.28125, "logps/chosen": -1.1875, "logps/rejected": -0.78125, "loss": 2.9531, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -5.9375, "rewards/margins": -2.0625, "rewards/rejected": -3.890625, "step": 199 }, { "epoch": 0.074487895716946, "grad_norm": 306.0, "learning_rate": 1.3382899628252786e-06, "logits/chosen": -0.75390625, "logits/rejected": -2.328125, "logps/chosen": -1.1875, "logps/rejected": -0.6015625, "loss": 3.7812, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -5.96875, "rewards/margins": -2.953125, "rewards/rejected": -3.015625, "step": 200 }, { "epoch": 0.07486033519553073, "grad_norm": 219.0, "learning_rate": 1.344981412639405e-06, "logits/chosen": -0.71484375, "logits/rejected": -2.46875, "logps/chosen": -0.953125, "logps/rejected": -0.68359375, "loss": 2.2656, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -4.75, "rewards/margins": -1.3515625, "rewards/rejected": -3.40625, "step": 201 }, { "epoch": 0.07523277467411546, "grad_norm": 258.0, "learning_rate": 1.3516728624535314e-06, "logits/chosen": -0.61328125, "logits/rejected": -2.375, "logps/chosen": -1.046875, "logps/rejected": -0.8203125, "loss": 2.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -5.1875, "rewards/margins": -1.109375, "rewards/rejected": -4.09375, "step": 202 }, { "epoch": 0.07560521415270019, "grad_norm": 191.0, "learning_rate": 1.3583643122676579e-06, "logits/chosen": -0.75390625, "logits/rejected": -2.140625, "logps/chosen": -0.9140625, "logps/rejected": -0.65625, "loss": 2.2188, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -4.5625, "rewards/margins": -1.296875, "rewards/rejected": -3.28125, "step": 203 }, { "epoch": 0.07597765363128492, "grad_norm": 151.0, "learning_rate": 1.3650557620817843e-06, "logits/chosen": -0.7578125, "logits/rejected": -2.234375, "logps/chosen": -0.71875, "logps/rejected": -0.7109375, "loss": 1.2812, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.59375, "rewards/margins": -0.046875, "rewards/rejected": -3.546875, "step": 204 }, { "epoch": 0.07635009310986965, "grad_norm": 143.0, "learning_rate": 1.3717472118959108e-06, "logits/chosen": -0.671875, "logits/rejected": -2.28125, "logps/chosen": -0.88671875, "logps/rejected": -0.87109375, "loss": 1.2812, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.4375, "rewards/margins": -0.0703125, "rewards/rejected": -4.375, "step": 205 }, { "epoch": 0.07672253258845438, "grad_norm": 222.0, "learning_rate": 1.3784386617100372e-06, "logits/chosen": -0.77734375, "logits/rejected": -2.09375, "logps/chosen": -1.0234375, "logps/rejected": -0.6875, "loss": 2.5781, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -5.125, "rewards/margins": -1.6796875, "rewards/rejected": -3.4375, "step": 206 }, { "epoch": 0.07709497206703911, "grad_norm": 126.0, "learning_rate": 1.3851301115241636e-06, "logits/chosen": -0.62109375, "logits/rejected": -2.0625, "logps/chosen": -0.75390625, "logps/rejected": -0.8125, "loss": 1.0078, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.78125, "rewards/margins": 0.2890625, "rewards/rejected": -4.0625, "step": 207 }, { "epoch": 0.07746741154562384, "grad_norm": 266.0, "learning_rate": 1.39182156133829e-06, "logits/chosen": -0.6015625, "logits/rejected": -2.15625, "logps/chosen": -0.921875, "logps/rejected": -0.75, "loss": 1.875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -4.625, "rewards/margins": -0.875, "rewards/rejected": -3.734375, "step": 208 }, { "epoch": 0.07783985102420857, "grad_norm": 217.0, "learning_rate": 1.3985130111524163e-06, "logits/chosen": -0.79296875, "logits/rejected": -2.421875, "logps/chosen": -1.0625, "logps/rejected": -0.703125, "loss": 2.6875, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -5.34375, "rewards/margins": -1.828125, "rewards/rejected": -3.515625, "step": 209 }, { "epoch": 0.0782122905027933, "grad_norm": 304.0, "learning_rate": 1.4052044609665427e-06, "logits/chosen": -0.796875, "logits/rejected": -1.734375, "logps/chosen": -1.53125, "logps/rejected": -0.66015625, "loss": 5.125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.65625, "rewards/margins": -4.34375, "rewards/rejected": -3.3125, "step": 210 }, { "epoch": 0.07858472998137803, "grad_norm": 151.0, "learning_rate": 1.411895910780669e-06, "logits/chosen": -0.62890625, "logits/rejected": -2.1875, "logps/chosen": -0.84375, "logps/rejected": -0.8046875, "loss": 1.3125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -4.21875, "rewards/margins": -0.1953125, "rewards/rejected": -4.0, "step": 211 }, { "epoch": 0.07895716945996276, "grad_norm": 216.0, "learning_rate": 1.4185873605947954e-06, "logits/chosen": -0.77734375, "logits/rejected": -2.09375, "logps/chosen": -1.109375, "logps/rejected": -0.65625, "loss": 3.2812, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -5.53125, "rewards/margins": -2.234375, "rewards/rejected": -3.296875, "step": 212 }, { "epoch": 0.07932960893854749, "grad_norm": 177.0, "learning_rate": 1.4252788104089218e-06, "logits/chosen": -0.70703125, "logits/rejected": -2.3125, "logps/chosen": -1.0625, "logps/rejected": -0.8125, "loss": 2.5625, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -5.3125, "rewards/margins": -1.234375, "rewards/rejected": -4.0625, "step": 213 }, { "epoch": 0.07970204841713222, "grad_norm": 292.0, "learning_rate": 1.4319702602230483e-06, "logits/chosen": -0.7265625, "logits/rejected": -2.171875, "logps/chosen": -1.2109375, "logps/rejected": -0.6953125, "loss": 3.5469, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.0625, "rewards/margins": -2.5625, "rewards/rejected": -3.484375, "step": 214 }, { "epoch": 0.08007448789571694, "grad_norm": 196.0, "learning_rate": 1.4386617100371747e-06, "logits/chosen": -0.640625, "logits/rejected": -1.71875, "logps/chosen": -0.7734375, "logps/rejected": -0.6640625, "loss": 1.8047, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.859375, "rewards/margins": -0.53125, "rewards/rejected": -3.328125, "step": 215 }, { "epoch": 0.08044692737430167, "grad_norm": 164.0, "learning_rate": 1.4453531598513011e-06, "logits/chosen": -0.7265625, "logits/rejected": -2.203125, "logps/chosen": -0.8671875, "logps/rejected": -0.84375, "loss": 1.2734, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.34375, "rewards/margins": -0.109375, "rewards/rejected": -4.25, "step": 216 }, { "epoch": 0.0808193668528864, "grad_norm": 133.0, "learning_rate": 1.4520446096654276e-06, "logits/chosen": -0.625, "logits/rejected": -2.296875, "logps/chosen": -0.8046875, "logps/rejected": -0.7890625, "loss": 1.2969, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.0, "rewards/margins": -0.0703125, "rewards/rejected": -3.9375, "step": 217 }, { "epoch": 0.08119180633147113, "grad_norm": 148.0, "learning_rate": 1.4587360594795538e-06, "logits/chosen": -0.65625, "logits/rejected": -2.171875, "logps/chosen": -0.7109375, "logps/rejected": -0.75, "loss": 1.0703, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.5625, "rewards/margins": 0.203125, "rewards/rejected": -3.765625, "step": 218 }, { "epoch": 0.08156424581005586, "grad_norm": 155.0, "learning_rate": 1.4654275092936802e-06, "logits/chosen": -0.66796875, "logits/rejected": -1.625, "logps/chosen": -0.78125, "logps/rejected": -0.6484375, "loss": 2.0469, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.90625, "rewards/margins": -0.6640625, "rewards/rejected": -3.25, "step": 219 }, { "epoch": 0.08193668528864059, "grad_norm": 194.0, "learning_rate": 1.4721189591078067e-06, "logits/chosen": -0.46875, "logits/rejected": -2.40625, "logps/chosen": -0.8359375, "logps/rejected": -0.7890625, "loss": 1.4531, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.15625, "rewards/margins": -0.2265625, "rewards/rejected": -3.9375, "step": 220 }, { "epoch": 0.08230912476722532, "grad_norm": 137.0, "learning_rate": 1.4788104089219329e-06, "logits/chosen": -0.4453125, "logits/rejected": -2.421875, "logps/chosen": -0.75, "logps/rejected": -0.80859375, "loss": 1.0, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.75, "rewards/margins": 0.2890625, "rewards/rejected": -4.03125, "step": 221 }, { "epoch": 0.08268156424581005, "grad_norm": 137.0, "learning_rate": 1.4855018587360593e-06, "logits/chosen": -0.5703125, "logits/rejected": -2.09375, "logps/chosen": -0.765625, "logps/rejected": -0.73046875, "loss": 1.3438, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.84375, "rewards/margins": -0.1875, "rewards/rejected": -3.65625, "step": 222 }, { "epoch": 0.08305400372439478, "grad_norm": 208.0, "learning_rate": 1.4921933085501858e-06, "logits/chosen": -0.67578125, "logits/rejected": -1.6875, "logps/chosen": -0.8125, "logps/rejected": -0.71484375, "loss": 1.5781, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.0625, "rewards/margins": -0.515625, "rewards/rejected": -3.5625, "step": 223 }, { "epoch": 0.08342644320297951, "grad_norm": 105.0, "learning_rate": 1.4988847583643122e-06, "logits/chosen": -0.51171875, "logits/rejected": -2.296875, "logps/chosen": -0.66015625, "logps/rejected": -0.85546875, "loss": 0.6914, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 0.9921875, "rewards/rejected": -4.28125, "step": 224 }, { "epoch": 0.08379888268156424, "grad_norm": 132.0, "learning_rate": 1.5055762081784386e-06, "logits/chosen": -0.6953125, "logits/rejected": -2.1875, "logps/chosen": -0.6484375, "logps/rejected": -0.6796875, "loss": 1.0781, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.25, "rewards/margins": 0.15625, "rewards/rejected": -3.40625, "step": 225 }, { "epoch": 0.08417132216014897, "grad_norm": 121.0, "learning_rate": 1.512267657992565e-06, "logits/chosen": -0.703125, "logits/rejected": -2.171875, "logps/chosen": -0.68359375, "logps/rejected": -0.765625, "loss": 0.918, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.421875, "rewards/margins": 0.390625, "rewards/rejected": -3.8125, "step": 226 }, { "epoch": 0.0845437616387337, "grad_norm": 176.0, "learning_rate": 1.5189591078066915e-06, "logits/chosen": -0.859375, "logits/rejected": -2.21875, "logps/chosen": -1.34375, "logps/rejected": -0.87109375, "loss": 3.3125, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -6.75, "rewards/margins": -2.390625, "rewards/rejected": -4.375, "step": 227 }, { "epoch": 0.08491620111731843, "grad_norm": 113.0, "learning_rate": 1.5256505576208177e-06, "logits/chosen": -0.55859375, "logits/rejected": -2.484375, "logps/chosen": -0.6640625, "logps/rejected": -0.81640625, "loss": 0.7148, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3125, "rewards/margins": 0.765625, "rewards/rejected": -4.09375, "step": 228 }, { "epoch": 0.08528864059590316, "grad_norm": 197.0, "learning_rate": 1.5323420074349442e-06, "logits/chosen": -0.4921875, "logits/rejected": -2.328125, "logps/chosen": -0.75, "logps/rejected": -0.796875, "loss": 1.125, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.75, "rewards/margins": 0.2265625, "rewards/rejected": -3.96875, "step": 229 }, { "epoch": 0.0856610800744879, "grad_norm": 123.0, "learning_rate": 1.5390334572490706e-06, "logits/chosen": -0.671875, "logits/rejected": -2.359375, "logps/chosen": -0.63671875, "logps/rejected": -0.7734375, "loss": 0.8125, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.1875, "rewards/margins": 0.6796875, "rewards/rejected": -3.875, "step": 230 }, { "epoch": 0.08603351955307263, "grad_norm": 206.0, "learning_rate": 1.545724907063197e-06, "logits/chosen": -0.7734375, "logits/rejected": -1.859375, "logps/chosen": -0.640625, "logps/rejected": -0.59375, "loss": 1.6016, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.1875, "rewards/margins": -0.2265625, "rewards/rejected": -2.96875, "step": 231 }, { "epoch": 0.08640595903165736, "grad_norm": 168.0, "learning_rate": 1.5524163568773233e-06, "logits/chosen": -0.7890625, "logits/rejected": -1.8359375, "logps/chosen": -0.609375, "logps/rejected": -0.515625, "loss": 1.5469, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.03125, "rewards/margins": -0.453125, "rewards/rejected": -2.59375, "step": 232 }, { "epoch": 0.08677839851024209, "grad_norm": 251.0, "learning_rate": 1.5591078066914497e-06, "logits/chosen": -0.625, "logits/rejected": -1.9765625, "logps/chosen": -0.61328125, "logps/rejected": -0.65234375, "loss": 1.2891, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.078125, "rewards/margins": 0.1796875, "rewards/rejected": -3.25, "step": 233 }, { "epoch": 0.08715083798882682, "grad_norm": 100.5, "learning_rate": 1.5657992565055761e-06, "logits/chosen": -0.65234375, "logits/rejected": -2.046875, "logps/chosen": -0.62109375, "logps/rejected": -0.8046875, "loss": 0.6641, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.109375, "rewards/margins": 0.921875, "rewards/rejected": -4.03125, "step": 234 }, { "epoch": 0.08752327746741155, "grad_norm": 229.0, "learning_rate": 1.5724907063197026e-06, "logits/chosen": -0.6484375, "logits/rejected": -1.4765625, "logps/chosen": -0.7890625, "logps/rejected": -0.7109375, "loss": 2.0625, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.9375, "rewards/margins": -0.390625, "rewards/rejected": -3.546875, "step": 235 }, { "epoch": 0.08789571694599628, "grad_norm": 119.5, "learning_rate": 1.579182156133829e-06, "logits/chosen": -0.8515625, "logits/rejected": -1.859375, "logps/chosen": -0.58984375, "logps/rejected": -0.7109375, "loss": 0.9062, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.9375, "rewards/margins": 0.6015625, "rewards/rejected": -3.546875, "step": 236 }, { "epoch": 0.08826815642458101, "grad_norm": 97.0, "learning_rate": 1.5858736059479552e-06, "logits/chosen": -0.84375, "logits/rejected": -1.6796875, "logps/chosen": -0.51171875, "logps/rejected": -0.5859375, "loss": 0.9961, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.5625, "rewards/margins": 0.3828125, "rewards/rejected": -2.9375, "step": 237 }, { "epoch": 0.08864059590316574, "grad_norm": 118.5, "learning_rate": 1.5925650557620817e-06, "logits/chosen": -0.69921875, "logits/rejected": -1.515625, "logps/chosen": -0.578125, "logps/rejected": -0.6484375, "loss": 1.0469, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.90625, "rewards/margins": 0.359375, "rewards/rejected": -3.25, "step": 238 }, { "epoch": 0.08901303538175047, "grad_norm": 87.0, "learning_rate": 1.599256505576208e-06, "logits/chosen": -0.77734375, "logits/rejected": -2.046875, "logps/chosen": -0.52734375, "logps/rejected": -0.84375, "loss": 0.4473, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.640625, "rewards/margins": 1.578125, "rewards/rejected": -4.21875, "step": 239 }, { "epoch": 0.0893854748603352, "grad_norm": 147.0, "learning_rate": 1.6059479553903345e-06, "logits/chosen": -0.48828125, "logits/rejected": -2.359375, "logps/chosen": -0.8828125, "logps/rejected": -0.79296875, "loss": 1.6094, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.4375, "rewards/margins": -0.4609375, "rewards/rejected": -3.96875, "step": 240 }, { "epoch": 0.08975791433891993, "grad_norm": 294.0, "learning_rate": 1.612639405204461e-06, "logits/chosen": -0.6484375, "logits/rejected": -2.3125, "logps/chosen": -1.453125, "logps/rejected": -0.8828125, "loss": 3.6562, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -7.25, "rewards/margins": -2.8125, "rewards/rejected": -4.4375, "step": 241 }, { "epoch": 0.09013035381750466, "grad_norm": 193.0, "learning_rate": 1.6193308550185874e-06, "logits/chosen": -0.640625, "logits/rejected": -1.6328125, "logps/chosen": -1.140625, "logps/rejected": -0.70703125, "loss": 3.4062, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -5.6875, "rewards/margins": -2.15625, "rewards/rejected": -3.53125, "step": 242 }, { "epoch": 0.09050279329608939, "grad_norm": 112.5, "learning_rate": 1.6260223048327136e-06, "logits/chosen": -0.53125, "logits/rejected": -2.28125, "logps/chosen": -0.6796875, "logps/rejected": -0.91015625, "loss": 0.6719, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.390625, "rewards/margins": 1.15625, "rewards/rejected": -4.5625, "step": 243 }, { "epoch": 0.09087523277467412, "grad_norm": 143.0, "learning_rate": 1.63271375464684e-06, "logits/chosen": -0.68359375, "logits/rejected": -2.265625, "logps/chosen": -0.984375, "logps/rejected": -0.8203125, "loss": 2.0625, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.9375, "rewards/margins": -0.828125, "rewards/rejected": -4.125, "step": 244 }, { "epoch": 0.09124767225325885, "grad_norm": 162.0, "learning_rate": 1.6394052044609665e-06, "logits/chosen": -0.46875, "logits/rejected": -2.28125, "logps/chosen": -0.84375, "logps/rejected": -0.96875, "loss": 0.8398, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.21875, "rewards/margins": 0.6171875, "rewards/rejected": -4.8125, "step": 245 }, { "epoch": 0.09162011173184358, "grad_norm": 190.0, "learning_rate": 1.6460966542750927e-06, "logits/chosen": -0.86328125, "logits/rejected": -1.8359375, "logps/chosen": -0.8125, "logps/rejected": -0.61328125, "loss": 2.1562, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.0625, "rewards/margins": -0.984375, "rewards/rejected": -3.0625, "step": 246 }, { "epoch": 0.0919925512104283, "grad_norm": 95.0, "learning_rate": 1.6527881040892192e-06, "logits/chosen": -0.87109375, "logits/rejected": -2.0625, "logps/chosen": -0.56640625, "logps/rejected": -0.6953125, "loss": 0.7695, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.828125, "rewards/margins": 0.6640625, "rewards/rejected": -3.5, "step": 247 }, { "epoch": 0.09236499068901304, "grad_norm": 116.0, "learning_rate": 1.6594795539033456e-06, "logits/chosen": -0.609375, "logits/rejected": -2.203125, "logps/chosen": -0.5859375, "logps/rejected": -0.76953125, "loss": 0.668, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.921875, "rewards/margins": 0.9375, "rewards/rejected": -3.859375, "step": 248 }, { "epoch": 0.09273743016759776, "grad_norm": 127.5, "learning_rate": 1.666171003717472e-06, "logits/chosen": -0.69921875, "logits/rejected": -1.8828125, "logps/chosen": -0.765625, "logps/rejected": -0.90625, "loss": 0.75, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.828125, "rewards/margins": 0.703125, "rewards/rejected": -4.53125, "step": 249 }, { "epoch": 0.0931098696461825, "grad_norm": 52.5, "learning_rate": 1.6728624535315985e-06, "logits/chosen": -0.53515625, "logits/rejected": -2.265625, "logps/chosen": -0.5078125, "logps/rejected": -0.8984375, "loss": 0.3086, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.53125, "rewards/margins": 1.9453125, "rewards/rejected": -4.5, "step": 250 }, { "epoch": 0.09348230912476722, "grad_norm": 69.0, "learning_rate": 1.679553903345725e-06, "logits/chosen": -0.6015625, "logits/rejected": -2.3125, "logps/chosen": -0.470703125, "logps/rejected": -0.73828125, "loss": 0.4805, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34375, "rewards/margins": 1.34375, "rewards/rejected": -3.6875, "step": 251 }, { "epoch": 0.09385474860335195, "grad_norm": 61.25, "learning_rate": 1.6862453531598513e-06, "logits/chosen": -0.5546875, "logits/rejected": -2.125, "logps/chosen": -0.51953125, "logps/rejected": -0.8203125, "loss": 0.4102, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.59375, "rewards/margins": 1.5078125, "rewards/rejected": -4.125, "step": 252 }, { "epoch": 0.09422718808193668, "grad_norm": 26.75, "learning_rate": 1.6929368029739776e-06, "logits/chosen": -0.478515625, "logits/rejected": -2.125, "logps/chosen": -0.326171875, "logps/rejected": -0.82421875, "loss": 0.1924, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.625, "rewards/margins": 2.484375, "rewards/rejected": -4.125, "step": 253 }, { "epoch": 0.09459962756052141, "grad_norm": 89.0, "learning_rate": 1.699628252788104e-06, "logits/chosen": -0.37109375, "logits/rejected": -2.125, "logps/chosen": -0.609375, "logps/rejected": -0.9921875, "loss": 0.3613, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.046875, "rewards/margins": 1.921875, "rewards/rejected": -4.96875, "step": 254 }, { "epoch": 0.09497206703910614, "grad_norm": 231.0, "learning_rate": 1.7063197026022304e-06, "logits/chosen": -0.86328125, "logits/rejected": -2.296875, "logps/chosen": -1.1484375, "logps/rejected": -0.609375, "loss": 3.5312, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -5.75, "rewards/margins": -2.6875, "rewards/rejected": -3.046875, "step": 255 }, { "epoch": 0.09534450651769087, "grad_norm": 251.0, "learning_rate": 1.7130111524163567e-06, "logits/chosen": -0.8125, "logits/rejected": -1.6328125, "logps/chosen": -0.6953125, "logps/rejected": -0.42578125, "loss": 2.375, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.46875, "rewards/margins": -1.34375, "rewards/rejected": -2.125, "step": 256 }, { "epoch": 0.0957169459962756, "grad_norm": 57.5, "learning_rate": 1.7197026022304831e-06, "logits/chosen": -0.4375, "logits/rejected": -2.234375, "logps/chosen": -0.51171875, "logps/rejected": -1.0, "loss": 0.2109, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5625, "rewards/margins": 2.453125, "rewards/rejected": -5.0, "step": 257 }, { "epoch": 0.09608938547486033, "grad_norm": 29.375, "learning_rate": 1.7263940520446095e-06, "logits/chosen": -0.435546875, "logits/rejected": -2.34375, "logps/chosen": -0.35546875, "logps/rejected": -0.90625, "loss": 0.1406, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7734375, "rewards/margins": 2.75, "rewards/rejected": -4.53125, "step": 258 }, { "epoch": 0.09646182495344506, "grad_norm": 198.0, "learning_rate": 1.733085501858736e-06, "logits/chosen": -0.74609375, "logits/rejected": -1.8125, "logps/chosen": -0.82421875, "logps/rejected": -0.6875, "loss": 2.2969, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.125, "rewards/margins": -0.6875, "rewards/rejected": -3.4375, "step": 259 }, { "epoch": 0.09683426443202979, "grad_norm": 29.875, "learning_rate": 1.7397769516728624e-06, "logits/chosen": -0.65234375, "logits/rejected": -2.25, "logps/chosen": -0.44140625, "logps/rejected": -0.8984375, "loss": 0.207, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.203125, "rewards/margins": 2.28125, "rewards/rejected": -4.5, "step": 260 }, { "epoch": 0.09720670391061452, "grad_norm": 105.5, "learning_rate": 1.7464684014869889e-06, "logits/chosen": -0.5234375, "logits/rejected": -2.40625, "logps/chosen": -0.6953125, "logps/rejected": -0.87890625, "loss": 0.6836, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.46875, "rewards/margins": 0.9375, "rewards/rejected": -4.40625, "step": 261 }, { "epoch": 0.09757914338919925, "grad_norm": 171.0, "learning_rate": 1.7531598513011153e-06, "logits/chosen": -0.427734375, "logits/rejected": -2.328125, "logps/chosen": -0.73828125, "logps/rejected": -0.859375, "loss": 0.9219, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.6875, "rewards/margins": 0.6171875, "rewards/rejected": -4.3125, "step": 262 }, { "epoch": 0.09795158286778398, "grad_norm": 64.5, "learning_rate": 1.7598513011152417e-06, "logits/chosen": -0.5703125, "logits/rejected": -2.03125, "logps/chosen": -0.5234375, "logps/rejected": -0.859375, "loss": 0.3477, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.609375, "rewards/margins": 1.703125, "rewards/rejected": -4.3125, "step": 263 }, { "epoch": 0.09832402234636871, "grad_norm": 71.0, "learning_rate": 1.766542750929368e-06, "logits/chosen": -0.640625, "logits/rejected": -2.09375, "logps/chosen": -0.42578125, "logps/rejected": -0.8359375, "loss": 0.4199, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 2.0625, "rewards/rejected": -4.1875, "step": 264 }, { "epoch": 0.09869646182495345, "grad_norm": 142.0, "learning_rate": 1.7732342007434942e-06, "logits/chosen": -0.484375, "logits/rejected": -2.046875, "logps/chosen": -0.61328125, "logps/rejected": -0.7734375, "loss": 0.7734, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.0625, "rewards/margins": 0.8125, "rewards/rejected": -3.875, "step": 265 }, { "epoch": 0.09906890130353818, "grad_norm": 62.25, "learning_rate": 1.7799256505576206e-06, "logits/chosen": -0.455078125, "logits/rejected": -2.15625, "logps/chosen": -0.470703125, "logps/rejected": -0.7578125, "loss": 0.4258, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34375, "rewards/margins": 1.453125, "rewards/rejected": -3.8125, "step": 266 }, { "epoch": 0.09944134078212291, "grad_norm": 179.0, "learning_rate": 1.786617100371747e-06, "logits/chosen": -0.546875, "logits/rejected": -1.4609375, "logps/chosen": -0.57421875, "logps/rejected": -0.828125, "loss": 0.625, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 1.28125, "rewards/rejected": -4.15625, "step": 267 }, { "epoch": 0.09981378026070764, "grad_norm": 176.0, "learning_rate": 1.7933085501858735e-06, "logits/chosen": -0.58203125, "logits/rejected": -0.859375, "logps/chosen": -0.765625, "logps/rejected": -0.77734375, "loss": 1.7031, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.8125, "rewards/margins": 0.0703125, "rewards/rejected": -3.890625, "step": 268 }, { "epoch": 0.10018621973929237, "grad_norm": 146.0, "learning_rate": 1.8e-06, "logits/chosen": -0.59375, "logits/rejected": -1.9140625, "logps/chosen": -0.8046875, "logps/rejected": -0.66015625, "loss": 1.7812, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.03125, "rewards/margins": -0.734375, "rewards/rejected": -3.296875, "step": 269 }, { "epoch": 0.1005586592178771, "grad_norm": 139.0, "learning_rate": 1.7999992391161992e-06, "logits/chosen": -0.357421875, "logits/rejected": -2.234375, "logps/chosen": -0.76171875, "logps/rejected": -0.9140625, "loss": 1.1094, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.8125, "rewards/margins": 0.7421875, "rewards/rejected": -4.5625, "step": 270 }, { "epoch": 0.10093109869646183, "grad_norm": 55.25, "learning_rate": 1.7999969564660834e-06, "logits/chosen": -0.5703125, "logits/rejected": -2.25, "logps/chosen": -0.4453125, "logps/rejected": -0.828125, "loss": 0.2988, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 1.9296875, "rewards/rejected": -4.15625, "step": 271 }, { "epoch": 0.10130353817504656, "grad_norm": 178.0, "learning_rate": 1.799993152053512e-06, "logits/chosen": -0.66015625, "logits/rejected": -2.203125, "logps/chosen": -0.7578125, "logps/rejected": -0.7421875, "loss": 1.7656, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.8125, "rewards/margins": -0.09375, "rewards/rejected": -3.71875, "step": 272 }, { "epoch": 0.10167597765363129, "grad_norm": 107.0, "learning_rate": 1.7999878258849183e-06, "logits/chosen": -0.55859375, "logits/rejected": -1.84375, "logps/chosen": -0.5859375, "logps/rejected": -0.7109375, "loss": 0.832, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.9375, "rewards/margins": 0.6171875, "rewards/rejected": -3.5625, "step": 273 }, { "epoch": 0.10204841713221602, "grad_norm": 168.0, "learning_rate": 1.7999809779693073e-06, "logits/chosen": -0.51171875, "logits/rejected": -2.125, "logps/chosen": -0.8359375, "logps/rejected": -0.890625, "loss": 1.0078, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.1875, "rewards/margins": 0.265625, "rewards/rejected": -4.4375, "step": 274 }, { "epoch": 0.10242085661080075, "grad_norm": 142.0, "learning_rate": 1.7999726083182583e-06, "logits/chosen": -0.6484375, "logits/rejected": -2.25, "logps/chosen": -0.71875, "logps/rejected": -0.9375, "loss": 0.6367, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.59375, "rewards/margins": 1.0859375, "rewards/rejected": -4.6875, "step": 275 }, { "epoch": 0.10279329608938548, "grad_norm": 171.0, "learning_rate": 1.7999627169459232e-06, "logits/chosen": -0.427734375, "logits/rejected": -2.296875, "logps/chosen": -0.8828125, "logps/rejected": -0.96875, "loss": 1.0469, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.40625, "rewards/margins": 0.421875, "rewards/rejected": -4.8125, "step": 276 }, { "epoch": 0.10316573556797021, "grad_norm": 130.0, "learning_rate": 1.7999513038690264e-06, "logits/chosen": -0.609375, "logits/rejected": -2.359375, "logps/chosen": -0.640625, "logps/rejected": -0.8359375, "loss": 1.0, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.203125, "rewards/margins": 0.984375, "rewards/rejected": -4.1875, "step": 277 }, { "epoch": 0.10353817504655494, "grad_norm": 40.25, "learning_rate": 1.7999383691068662e-06, "logits/chosen": -0.51953125, "logits/rejected": -2.296875, "logps/chosen": -0.466796875, "logps/rejected": -0.921875, "loss": 0.2373, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.328125, "rewards/margins": 2.28125, "rewards/rejected": -4.625, "step": 278 }, { "epoch": 0.10391061452513967, "grad_norm": 139.0, "learning_rate": 1.7999239126813131e-06, "logits/chosen": -0.59375, "logits/rejected": -1.7734375, "logps/chosen": -0.734375, "logps/rejected": -0.828125, "loss": 0.875, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.671875, "rewards/margins": 0.484375, "rewards/rejected": -4.15625, "step": 279 }, { "epoch": 0.1042830540037244, "grad_norm": 37.25, "learning_rate": 1.7999079346168108e-06, "logits/chosen": -0.482421875, "logits/rejected": -2.25, "logps/chosen": -0.26953125, "logps/rejected": -0.8046875, "loss": 0.1738, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 2.6875, "rewards/rejected": -4.03125, "step": 280 }, { "epoch": 0.10465549348230913, "grad_norm": 160.0, "learning_rate": 1.7998904349403761e-06, "logits/chosen": -0.671875, "logits/rejected": -0.91796875, "logps/chosen": -0.55859375, "logps/rejected": -0.5078125, "loss": 1.3594, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.78125, "rewards/margins": -0.2421875, "rewards/rejected": -2.546875, "step": 281 }, { "epoch": 0.10502793296089385, "grad_norm": 60.5, "learning_rate": 1.7998714136815982e-06, "logits/chosen": -0.48046875, "logits/rejected": -1.90625, "logps/chosen": -0.52734375, "logps/rejected": -0.96875, "loss": 0.2451, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 2.21875, "rewards/rejected": -4.84375, "step": 282 }, { "epoch": 0.10540037243947858, "grad_norm": 94.0, "learning_rate": 1.7998508708726394e-06, "logits/chosen": -0.58203125, "logits/rejected": -1.59375, "logps/chosen": -0.30078125, "logps/rejected": -0.7265625, "loss": 0.5195, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5, "rewards/margins": 2.125, "rewards/rejected": -3.625, "step": 283 }, { "epoch": 0.10577281191806331, "grad_norm": 29.125, "learning_rate": 1.7998288065482342e-06, "logits/chosen": -0.40625, "logits/rejected": -2.234375, "logps/chosen": -0.365234375, "logps/rejected": -0.8359375, "loss": 0.1904, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.828125, "rewards/margins": 2.359375, "rewards/rejected": -4.1875, "step": 284 }, { "epoch": 0.10614525139664804, "grad_norm": 50.5, "learning_rate": 1.7998052207456905e-06, "logits/chosen": -0.5703125, "logits/rejected": -1.8515625, "logps/chosen": -0.4140625, "logps/rejected": -0.85546875, "loss": 0.2402, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 2.21875, "rewards/rejected": -4.28125, "step": 285 }, { "epoch": 0.10651769087523277, "grad_norm": 92.0, "learning_rate": 1.7997801135048885e-06, "logits/chosen": -0.5703125, "logits/rejected": -2.125, "logps/chosen": -0.546875, "logps/rejected": -0.8515625, "loss": 0.5703, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.71875, "rewards/margins": 1.546875, "rewards/rejected": -4.25, "step": 286 }, { "epoch": 0.1068901303538175, "grad_norm": 14.25, "learning_rate": 1.7997534848682803e-06, "logits/chosen": -0.279296875, "logits/rejected": -2.265625, "logps/chosen": -0.275390625, "logps/rejected": -0.93359375, "loss": 0.0796, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.375, "rewards/margins": 3.296875, "rewards/rejected": -4.6875, "step": 287 }, { "epoch": 0.10726256983240223, "grad_norm": 180.0, "learning_rate": 1.7997253348808915e-06, "logits/chosen": -0.53515625, "logits/rejected": -1.9296875, "logps/chosen": -0.85546875, "logps/rejected": -0.84765625, "loss": 1.25, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.28125, "rewards/margins": -0.046875, "rewards/rejected": -4.25, "step": 288 }, { "epoch": 0.10763500931098696, "grad_norm": 53.25, "learning_rate": 1.7996956635903194e-06, "logits/chosen": -0.609375, "logits/rejected": -1.828125, "logps/chosen": -0.466796875, "logps/rejected": -0.9453125, "loss": 0.2617, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34375, "rewards/margins": 2.375, "rewards/rejected": -4.71875, "step": 289 }, { "epoch": 0.10800744878957169, "grad_norm": 69.0, "learning_rate": 1.7996644710467336e-06, "logits/chosen": -0.48828125, "logits/rejected": -2.109375, "logps/chosen": -0.4921875, "logps/rejected": -0.859375, "loss": 0.4219, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 1.8515625, "rewards/rejected": -4.3125, "step": 290 }, { "epoch": 0.10837988826815642, "grad_norm": 91.5, "learning_rate": 1.7996317573028767e-06, "logits/chosen": -0.62109375, "logits/rejected": -2.0, "logps/chosen": -0.671875, "logps/rejected": -0.8828125, "loss": 0.6797, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.34375, "rewards/margins": 1.046875, "rewards/rejected": -4.40625, "step": 291 }, { "epoch": 0.10875232774674115, "grad_norm": 94.0, "learning_rate": 1.7995975224140623e-06, "logits/chosen": -0.400390625, "logits/rejected": -2.078125, "logps/chosen": -0.57421875, "logps/rejected": -0.96484375, "loss": 0.3008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 1.953125, "rewards/rejected": -4.8125, "step": 292 }, { "epoch": 0.10912476722532588, "grad_norm": 48.0, "learning_rate": 1.7995617664381768e-06, "logits/chosen": -0.443359375, "logits/rejected": -2.140625, "logps/chosen": -0.48046875, "logps/rejected": -0.87109375, "loss": 0.2773, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.40625, "rewards/margins": 1.9375, "rewards/rejected": -4.34375, "step": 293 }, { "epoch": 0.10949720670391061, "grad_norm": 36.25, "learning_rate": 1.799524489435678e-06, "logits/chosen": -0.546875, "logits/rejected": -1.953125, "logps/chosen": -0.55078125, "logps/rejected": -1.0546875, "loss": 0.1738, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 2.53125, "rewards/rejected": -5.28125, "step": 294 }, { "epoch": 0.10986964618249534, "grad_norm": 42.75, "learning_rate": 1.799485691469596e-06, "logits/chosen": -0.365234375, "logits/rejected": -2.28125, "logps/chosen": -0.41015625, "logps/rejected": -0.8828125, "loss": 0.2002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 2.375, "rewards/rejected": -4.4375, "step": 295 }, { "epoch": 0.11024208566108007, "grad_norm": 92.0, "learning_rate": 1.799445372605533e-06, "logits/chosen": -0.51953125, "logits/rejected": -1.21875, "logps/chosen": -0.6640625, "logps/rejected": -0.8828125, "loss": 0.5977, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.34375, "rewards/margins": 1.0703125, "rewards/rejected": -4.40625, "step": 296 }, { "epoch": 0.1106145251396648, "grad_norm": 27.75, "learning_rate": 1.7994035329116611e-06, "logits/chosen": -0.5, "logits/rejected": -2.234375, "logps/chosen": -0.39453125, "logps/rejected": -0.953125, "loss": 0.1328, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.96875, "rewards/margins": 2.78125, "rewards/rejected": -4.75, "step": 297 }, { "epoch": 0.11098696461824953, "grad_norm": 203.0, "learning_rate": 1.7993601724587262e-06, "logits/chosen": -0.76171875, "logits/rejected": -2.140625, "logps/chosen": -0.87109375, "logps/rejected": -0.8359375, "loss": 2.0156, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.34375, "rewards/margins": -0.1796875, "rewards/rejected": -4.1875, "step": 298 }, { "epoch": 0.11135940409683426, "grad_norm": 102.5, "learning_rate": 1.799315291320044e-06, "logits/chosen": -0.2890625, "logits/rejected": -2.078125, "logps/chosen": -0.45703125, "logps/rejected": -1.0078125, "loss": 0.4277, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.28125, "rewards/margins": 2.75, "rewards/rejected": -5.03125, "step": 299 }, { "epoch": 0.11173184357541899, "grad_norm": 190.0, "learning_rate": 1.7992688895715016e-06, "logits/chosen": -0.5625, "logits/rejected": -2.15625, "logps/chosen": -0.984375, "logps/rejected": -0.8359375, "loss": 1.7344, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -4.90625, "rewards/margins": -0.734375, "rewards/rejected": -4.1875, "step": 300 }, { "epoch": 0.11210428305400373, "grad_norm": 27.625, "learning_rate": 1.7992209672915578e-06, "logits/chosen": -0.28515625, "logits/rejected": -2.1875, "logps/chosen": -0.34375, "logps/rejected": -0.890625, "loss": 0.1621, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.71875, "rewards/margins": 2.71875, "rewards/rejected": -4.4375, "step": 301 }, { "epoch": 0.11247672253258846, "grad_norm": 26.125, "learning_rate": 1.7991715245612422e-06, "logits/chosen": -0.333984375, "logits/rejected": -1.96875, "logps/chosen": -0.375, "logps/rejected": -1.078125, "loss": 0.1133, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.875, "rewards/margins": 3.5, "rewards/rejected": -5.375, "step": 302 }, { "epoch": 0.11284916201117319, "grad_norm": 217.0, "learning_rate": 1.7991205614641554e-06, "logits/chosen": -0.515625, "logits/rejected": -1.734375, "logps/chosen": -0.8984375, "logps/rejected": -0.76953125, "loss": 1.6797, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -4.5, "rewards/margins": -0.640625, "rewards/rejected": -3.84375, "step": 303 }, { "epoch": 0.11322160148975792, "grad_norm": 100.5, "learning_rate": 1.799068078086468e-06, "logits/chosen": -0.59765625, "logits/rejected": -1.5703125, "logps/chosen": -0.4140625, "logps/rejected": -0.6796875, "loss": 0.6797, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.0625, "rewards/margins": 1.3359375, "rewards/rejected": -3.40625, "step": 304 }, { "epoch": 0.11359404096834265, "grad_norm": 16.125, "learning_rate": 1.7990140745169216e-06, "logits/chosen": -0.40625, "logits/rejected": -2.0625, "logps/chosen": -0.3203125, "logps/rejected": -1.015625, "loss": 0.0698, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6015625, "rewards/margins": 3.46875, "rewards/rejected": -5.0625, "step": 305 }, { "epoch": 0.11396648044692738, "grad_norm": 23.75, "learning_rate": 1.7989585508468289e-06, "logits/chosen": -0.255859375, "logits/rejected": -2.28125, "logps/chosen": -0.3671875, "logps/rejected": -0.984375, "loss": 0.1006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.84375, "rewards/margins": 3.078125, "rewards/rejected": -4.9375, "step": 306 }, { "epoch": 0.11433891992551211, "grad_norm": 16.5, "learning_rate": 1.7989015071700718e-06, "logits/chosen": -0.2060546875, "logits/rejected": -2.21875, "logps/chosen": -0.328125, "logps/rejected": -1.109375, "loss": 0.0613, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6484375, "rewards/margins": 3.875, "rewards/rejected": -5.5, "step": 307 }, { "epoch": 0.11471135940409684, "grad_norm": 227.0, "learning_rate": 1.7988429435831033e-06, "logits/chosen": -0.64453125, "logits/rejected": -2.015625, "logps/chosen": -0.59765625, "logps/rejected": -0.7578125, "loss": 1.3438, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.96875, "rewards/margins": 0.796875, "rewards/rejected": -3.78125, "step": 308 }, { "epoch": 0.11508379888268157, "grad_norm": 11.5, "learning_rate": 1.798782860184945e-06, "logits/chosen": -0.37109375, "logits/rejected": -2.21875, "logps/chosen": -0.2109375, "logps/rejected": -0.93359375, "loss": 0.0659, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0546875, "rewards/margins": 3.59375, "rewards/rejected": -4.65625, "step": 309 }, { "epoch": 0.1154562383612663, "grad_norm": 53.5, "learning_rate": 1.7987212570771894e-06, "logits/chosen": -0.6171875, "logits/rejected": -2.0625, "logps/chosen": -0.392578125, "logps/rejected": -0.9296875, "loss": 0.2656, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.953125, "rewards/margins": 2.6875, "rewards/rejected": -4.625, "step": 310 }, { "epoch": 0.11582867783985103, "grad_norm": 10.5, "learning_rate": 1.7986581343639986e-06, "logits/chosen": -0.302734375, "logits/rejected": -2.140625, "logps/chosen": -0.30078125, "logps/rejected": -1.078125, "loss": 0.0439, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5078125, "rewards/margins": 3.90625, "rewards/rejected": -5.4375, "step": 311 }, { "epoch": 0.11620111731843576, "grad_norm": 11.6875, "learning_rate": 1.7985934921521035e-06, "logits/chosen": -0.328125, "logits/rejected": -2.09375, "logps/chosen": -0.31640625, "logps/rejected": -1.0625, "loss": 0.0547, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5859375, "rewards/margins": 3.75, "rewards/rejected": -5.3125, "step": 312 }, { "epoch": 0.11657355679702049, "grad_norm": 13.875, "learning_rate": 1.7985273305508048e-06, "logits/chosen": -0.53515625, "logits/rejected": -1.8984375, "logps/chosen": -0.28125, "logps/rejected": -0.984375, "loss": 0.0801, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.40625, "rewards/margins": 3.53125, "rewards/rejected": -4.9375, "step": 313 }, { "epoch": 0.11694599627560522, "grad_norm": 140.0, "learning_rate": 1.798459649671972e-06, "logits/chosen": -0.625, "logits/rejected": -1.6796875, "logps/chosen": -0.56640625, "logps/rejected": -0.703125, "loss": 0.8945, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.828125, "rewards/margins": 0.703125, "rewards/rejected": -3.53125, "step": 314 }, { "epoch": 0.11731843575418995, "grad_norm": 106.5, "learning_rate": 1.7983904496300434e-06, "logits/chosen": -0.494140625, "logits/rejected": -2.0625, "logps/chosen": -0.7890625, "logps/rejected": -1.109375, "loss": 0.6758, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.9375, "rewards/margins": 1.6171875, "rewards/rejected": -5.5625, "step": 315 }, { "epoch": 0.11769087523277467, "grad_norm": 55.25, "learning_rate": 1.7983197305420257e-06, "logits/chosen": -0.28125, "logits/rejected": -2.0625, "logps/chosen": -0.6484375, "logps/rejected": -1.0859375, "loss": 0.2383, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.25, "rewards/margins": 2.15625, "rewards/rejected": -5.4375, "step": 316 }, { "epoch": 0.1180633147113594, "grad_norm": 195.0, "learning_rate": 1.7982474925274954e-06, "logits/chosen": -0.359375, "logits/rejected": -1.40625, "logps/chosen": -0.376953125, "logps/rejected": -0.61328125, "loss": 1.1875, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -1.890625, "rewards/margins": 1.15625, "rewards/rejected": -3.046875, "step": 317 }, { "epoch": 0.11843575418994413, "grad_norm": 108.5, "learning_rate": 1.7981737357085955e-06, "logits/chosen": -0.45703125, "logits/rejected": -1.75, "logps/chosen": -0.53515625, "logps/rejected": -0.76171875, "loss": 0.6758, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.671875, "rewards/margins": 1.1328125, "rewards/rejected": -3.8125, "step": 318 }, { "epoch": 0.11880819366852886, "grad_norm": 90.5, "learning_rate": 1.7980984602100387e-06, "logits/chosen": -0.42578125, "logits/rejected": -2.0, "logps/chosen": -0.53515625, "logps/rejected": -0.84765625, "loss": 0.4766, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6875, "rewards/margins": 1.5546875, "rewards/rejected": -4.25, "step": 319 }, { "epoch": 0.1191806331471136, "grad_norm": 18.125, "learning_rate": 1.798021666159104e-06, "logits/chosen": -0.61328125, "logits/rejected": -2.09375, "logps/chosen": -0.345703125, "logps/rejected": -0.9921875, "loss": 0.0869, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7265625, "rewards/margins": 3.21875, "rewards/rejected": -4.9375, "step": 320 }, { "epoch": 0.11955307262569832, "grad_norm": 6.71875, "learning_rate": 1.7979433536856395e-06, "logits/chosen": -0.376953125, "logits/rejected": -2.125, "logps/chosen": -0.224609375, "logps/rejected": -1.171875, "loss": 0.0283, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.125, "rewards/margins": 4.71875, "rewards/rejected": -5.8125, "step": 321 }, { "epoch": 0.11992551210428305, "grad_norm": 32.5, "learning_rate": 1.7978635229220595e-06, "logits/chosen": -0.35546875, "logits/rejected": -2.125, "logps/chosen": -0.4921875, "logps/rejected": -1.1015625, "loss": 0.1289, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 3.03125, "rewards/rejected": -5.5, "step": 322 }, { "epoch": 0.12029795158286778, "grad_norm": 33.5, "learning_rate": 1.7977821740033467e-06, "logits/chosen": -0.34375, "logits/rejected": -1.96875, "logps/chosen": -0.5859375, "logps/rejected": -1.1875, "loss": 0.1191, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9375, "rewards/margins": 3.0, "rewards/rejected": -5.9375, "step": 323 }, { "epoch": 0.12067039106145251, "grad_norm": 14.9375, "learning_rate": 1.7976993070670495e-06, "logits/chosen": -0.486328125, "logits/rejected": -2.25, "logps/chosen": -0.208984375, "logps/rejected": -0.86328125, "loss": 0.082, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 3.265625, "rewards/rejected": -4.3125, "step": 324 }, { "epoch": 0.12104283054003724, "grad_norm": 113.0, "learning_rate": 1.7976149222532843e-06, "logits/chosen": -0.65625, "logits/rejected": -2.0625, "logps/chosen": -0.625, "logps/rejected": -1.0078125, "loss": 0.7734, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.125, "rewards/margins": 1.921875, "rewards/rejected": -5.0625, "step": 325 }, { "epoch": 0.12141527001862197, "grad_norm": 48.0, "learning_rate": 1.7975290197047332e-06, "logits/chosen": -0.82421875, "logits/rejected": -2.078125, "logps/chosen": -0.4921875, "logps/rejected": -1.0, "loss": 0.2236, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 2.53125, "rewards/rejected": -5.0, "step": 326 }, { "epoch": 0.1217877094972067, "grad_norm": 16.0, "learning_rate": 1.7974415995666442e-06, "logits/chosen": -0.478515625, "logits/rejected": -2.046875, "logps/chosen": -0.234375, "logps/rejected": -0.92578125, "loss": 0.0767, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.171875, "rewards/margins": 3.453125, "rewards/rejected": -4.625, "step": 327 }, { "epoch": 0.12216014897579143, "grad_norm": 140.0, "learning_rate": 1.7973526619868326e-06, "logits/chosen": -0.412109375, "logits/rejected": -1.5, "logps/chosen": -0.578125, "logps/rejected": -0.78125, "loss": 1.0, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.890625, "rewards/margins": 1.0234375, "rewards/rejected": -3.90625, "step": 328 }, { "epoch": 0.12253258845437616, "grad_norm": 38.25, "learning_rate": 1.797262207115679e-06, "logits/chosen": -0.4453125, "logits/rejected": -1.8203125, "logps/chosen": -0.4140625, "logps/rejected": -1.015625, "loss": 0.1758, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.078125, "rewards/margins": 3.046875, "rewards/rejected": -5.125, "step": 329 }, { "epoch": 0.12290502793296089, "grad_norm": 28.625, "learning_rate": 1.7971702351061284e-06, "logits/chosen": -0.265625, "logits/rejected": -2.1875, "logps/chosen": -0.41015625, "logps/rejected": -0.96875, "loss": 0.1299, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 2.78125, "rewards/rejected": -4.84375, "step": 330 }, { "epoch": 0.12327746741154562, "grad_norm": 42.0, "learning_rate": 1.7970767461136924e-06, "logits/chosen": -0.376953125, "logits/rejected": -1.40625, "logps/chosen": -0.33203125, "logps/rejected": -0.90234375, "loss": 0.249, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 2.859375, "rewards/rejected": -4.5, "step": 331 }, { "epoch": 0.12364990689013035, "grad_norm": 48.0, "learning_rate": 1.796981740296447e-06, "logits/chosen": -0.46875, "logits/rejected": -2.078125, "logps/chosen": -0.390625, "logps/rejected": -0.9296875, "loss": 0.2559, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9609375, "rewards/margins": 2.6875, "rewards/rejected": -4.65625, "step": 332 }, { "epoch": 0.12402234636871508, "grad_norm": 29.375, "learning_rate": 1.7968852178150335e-06, "logits/chosen": -0.310546875, "logits/rejected": -2.09375, "logps/chosen": -0.376953125, "logps/rejected": -1.0703125, "loss": 0.0996, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8828125, "rewards/margins": 3.46875, "rewards/rejected": -5.375, "step": 333 }, { "epoch": 0.12439478584729981, "grad_norm": 58.75, "learning_rate": 1.7967871788326566e-06, "logits/chosen": -0.3515625, "logits/rejected": -2.0625, "logps/chosen": -0.625, "logps/rejected": -1.21875, "loss": 0.1768, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.125, "rewards/margins": 2.96875, "rewards/rejected": -6.09375, "step": 334 }, { "epoch": 0.12476722532588454, "grad_norm": 60.0, "learning_rate": 1.7966876235150864e-06, "logits/chosen": -0.265625, "logits/rejected": -2.09375, "logps/chosen": -0.48046875, "logps/rejected": -1.234375, "loss": 0.1885, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.40625, "rewards/margins": 3.75, "rewards/rejected": -6.15625, "step": 335 }, { "epoch": 0.12513966480446928, "grad_norm": 27.5, "learning_rate": 1.7965865520306558e-06, "logits/chosen": -0.458984375, "logits/rejected": -1.9296875, "logps/chosen": -0.18359375, "logps/rejected": -0.84375, "loss": 0.1377, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.921875, "rewards/margins": 3.3125, "rewards/rejected": -4.21875, "step": 336 }, { "epoch": 0.125512104283054, "grad_norm": 23.125, "learning_rate": 1.7964839645502622e-06, "logits/chosen": -0.357421875, "logits/rejected": -2.140625, "logps/chosen": -0.28125, "logps/rejected": -0.96875, "loss": 0.0923, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.40625, "rewards/margins": 3.4375, "rewards/rejected": -4.84375, "step": 337 }, { "epoch": 0.12588454376163874, "grad_norm": 121.5, "learning_rate": 1.7963798612473651e-06, "logits/chosen": -0.671875, "logits/rejected": -2.046875, "logps/chosen": -0.65234375, "logps/rejected": -0.921875, "loss": 0.9062, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.25, "rewards/margins": 1.359375, "rewards/rejected": -4.625, "step": 338 }, { "epoch": 0.12625698324022347, "grad_norm": 10.8125, "learning_rate": 1.7962742422979892e-06, "logits/chosen": -0.10205078125, "logits/rejected": -1.9296875, "logps/chosen": -0.1904296875, "logps/rejected": -1.15625, "loss": 0.0425, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.94921875, "rewards/margins": 4.8125, "rewards/rejected": -5.75, "step": 339 }, { "epoch": 0.1266294227188082, "grad_norm": 54.75, "learning_rate": 1.7961671078807195e-06, "logits/chosen": -0.408203125, "logits/rejected": -2.203125, "logps/chosen": -0.65625, "logps/rejected": -1.125, "loss": 0.2188, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 2.34375, "rewards/rejected": -5.625, "step": 340 }, { "epoch": 0.12700186219739293, "grad_norm": 62.75, "learning_rate": 1.796058458176705e-06, "logits/chosen": -0.494140625, "logits/rejected": -1.9765625, "logps/chosen": -0.3515625, "logps/rejected": -0.828125, "loss": 0.3633, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.765625, "rewards/margins": 2.40625, "rewards/rejected": -4.15625, "step": 341 }, { "epoch": 0.12737430167597766, "grad_norm": 264.0, "learning_rate": 1.7959482933696566e-06, "logits/chosen": -0.408203125, "logits/rejected": -1.265625, "logps/chosen": -0.4453125, "logps/rejected": -0.73046875, "loss": 1.0781, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.234375, "rewards/margins": 1.40625, "rewards/rejected": -3.640625, "step": 342 }, { "epoch": 0.1277467411545624, "grad_norm": 113.5, "learning_rate": 1.795836613645846e-06, "logits/chosen": -0.447265625, "logits/rejected": -1.96875, "logps/chosen": -0.640625, "logps/rejected": -1.015625, "loss": 0.5781, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.21875, "rewards/margins": 1.8828125, "rewards/rejected": -5.09375, "step": 343 }, { "epoch": 0.12811918063314712, "grad_norm": 76.5, "learning_rate": 1.7957234191941081e-06, "logits/chosen": -0.46484375, "logits/rejected": -1.8671875, "logps/chosen": -0.6171875, "logps/rejected": -1.15625, "loss": 0.2559, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.09375, "rewards/margins": 2.703125, "rewards/rejected": -5.8125, "step": 344 }, { "epoch": 0.12849162011173185, "grad_norm": 29.0, "learning_rate": 1.7956087102058375e-06, "logits/chosen": -0.2197265625, "logits/rejected": -2.125, "logps/chosen": -0.283203125, "logps/rejected": -1.09375, "loss": 0.0923, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.421875, "rewards/margins": 4.0625, "rewards/rejected": -5.5, "step": 345 }, { "epoch": 0.12886405959031658, "grad_norm": 6.46875, "learning_rate": 1.7954924868749904e-06, "logits/chosen": -0.353515625, "logits/rejected": -2.0, "logps/chosen": -0.212890625, "logps/rejected": -1.109375, "loss": 0.0291, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0625, "rewards/margins": 4.46875, "rewards/rejected": -5.5625, "step": 346 }, { "epoch": 0.1292364990689013, "grad_norm": 3.484375, "learning_rate": 1.7953747493980832e-06, "logits/chosen": -0.197265625, "logits/rejected": -1.96875, "logps/chosen": -0.11376953125, "logps/rejected": -1.28125, "loss": 0.0132, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5703125, "rewards/margins": 5.8125, "rewards/rejected": -6.375, "step": 347 }, { "epoch": 0.12960893854748604, "grad_norm": 23.75, "learning_rate": 1.795255497974193e-06, "logits/chosen": -0.3671875, "logits/rejected": -2.109375, "logps/chosen": -0.42578125, "logps/rejected": -1.1328125, "loss": 0.0947, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 3.53125, "rewards/rejected": -5.65625, "step": 348 }, { "epoch": 0.12998137802607077, "grad_norm": 5.0, "learning_rate": 1.795134732804956e-06, "logits/chosen": -0.29296875, "logits/rejected": -1.90625, "logps/chosen": -0.208984375, "logps/rejected": -1.1640625, "loss": 0.0203, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 4.75, "rewards/rejected": -5.8125, "step": 349 }, { "epoch": 0.1303538175046555, "grad_norm": 1.7890625, "learning_rate": 1.7950124540945683e-06, "logits/chosen": -0.294921875, "logits/rejected": -1.9375, "logps/chosen": -0.240234375, "logps/rejected": -1.3828125, "loss": 0.0073, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.203125, "rewards/margins": 5.6875, "rewards/rejected": -6.90625, "step": 350 }, { "epoch": 0.13072625698324022, "grad_norm": 11.4375, "learning_rate": 1.7948886620497858e-06, "logits/chosen": -0.318359375, "logits/rejected": -2.015625, "logps/chosen": -0.408203125, "logps/rejected": -1.234375, "loss": 0.04, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.046875, "rewards/margins": 4.125, "rewards/rejected": -6.1875, "step": 351 }, { "epoch": 0.13109869646182495, "grad_norm": 96.0, "learning_rate": 1.7947633568799224e-06, "logits/chosen": -0.486328125, "logits/rejected": -2.0, "logps/chosen": -0.55859375, "logps/rejected": -1.1015625, "loss": 0.3555, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.796875, "rewards/margins": 2.71875, "rewards/rejected": -5.5, "step": 352 }, { "epoch": 0.13147113594040968, "grad_norm": 11.875, "learning_rate": 1.7946365387968502e-06, "logits/chosen": -0.578125, "logits/rejected": -1.7109375, "logps/chosen": -0.240234375, "logps/rejected": -0.9765625, "loss": 0.0552, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.203125, "rewards/margins": 3.671875, "rewards/rejected": -4.875, "step": 353 }, { "epoch": 0.1318435754189944, "grad_norm": 4.21875, "learning_rate": 1.7945082080150006e-06, "logits/chosen": -0.2275390625, "logits/rejected": -1.921875, "logps/chosen": -0.271484375, "logps/rejected": -1.265625, "loss": 0.0166, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.359375, "rewards/margins": 4.96875, "rewards/rejected": -6.3125, "step": 354 }, { "epoch": 0.13221601489757914, "grad_norm": 23.875, "learning_rate": 1.7943783647513616e-06, "logits/chosen": -0.322265625, "logits/rejected": -1.40625, "logps/chosen": -0.244140625, "logps/rejected": -1.0234375, "loss": 0.0786, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.21875, "rewards/margins": 3.890625, "rewards/rejected": -5.125, "step": 355 }, { "epoch": 0.13258845437616387, "grad_norm": 50.25, "learning_rate": 1.7942470092254794e-06, "logits/chosen": -0.578125, "logits/rejected": -1.71875, "logps/chosen": -0.498046875, "logps/rejected": -1.046875, "loss": 0.207, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5, "rewards/margins": 2.75, "rewards/rejected": -5.25, "step": 356 }, { "epoch": 0.1329608938547486, "grad_norm": 25.375, "learning_rate": 1.7941141416594563e-06, "logits/chosen": -0.37109375, "logits/rejected": -1.546875, "logps/chosen": -0.1552734375, "logps/rejected": -0.9296875, "loss": 0.104, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7734375, "rewards/margins": 3.859375, "rewards/rejected": -4.625, "step": 357 }, { "epoch": 0.13333333333333333, "grad_norm": 58.75, "learning_rate": 1.7939797622779525e-06, "logits/chosen": -0.19140625, "logits/rejected": -1.6171875, "logps/chosen": -0.3984375, "logps/rejected": -1.203125, "loss": 0.1699, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.984375, "rewards/margins": 4.0625, "rewards/rejected": -6.0625, "step": 358 }, { "epoch": 0.13370577281191806, "grad_norm": 23.5, "learning_rate": 1.7938438713081835e-06, "logits/chosen": -0.291015625, "logits/rejected": -2.109375, "logps/chosen": -0.431640625, "logps/rejected": -1.2265625, "loss": 0.0549, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 3.96875, "rewards/rejected": -6.125, "step": 359 }, { "epoch": 0.1340782122905028, "grad_norm": 11.375, "learning_rate": 1.7937064689799212e-06, "logits/chosen": -0.609375, "logits/rejected": -1.96875, "logps/chosen": -0.3515625, "logps/rejected": -1.1640625, "loss": 0.041, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.765625, "rewards/margins": 4.0625, "rewards/rejected": -5.8125, "step": 360 }, { "epoch": 0.13445065176908752, "grad_norm": 13.3125, "learning_rate": 1.793567555525492e-06, "logits/chosen": -0.21484375, "logits/rejected": -1.7890625, "logps/chosen": -0.369140625, "logps/rejected": -1.3671875, "loss": 0.0303, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.84375, "rewards/margins": 5.0, "rewards/rejected": -6.84375, "step": 361 }, { "epoch": 0.13482309124767225, "grad_norm": 3.875, "learning_rate": 1.7934271311797786e-06, "logits/chosen": -0.2421875, "logits/rejected": -2.109375, "logps/chosen": -0.3203125, "logps/rejected": -1.375, "loss": 0.0134, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 5.25, "rewards/rejected": -6.875, "step": 362 }, { "epoch": 0.13519553072625698, "grad_norm": 4.0625, "learning_rate": 1.793285196180218e-06, "logits/chosen": -0.220703125, "logits/rejected": -1.84375, "logps/chosen": -0.259765625, "logps/rejected": -1.4375, "loss": 0.0127, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3046875, "rewards/margins": 5.875, "rewards/rejected": -7.1875, "step": 363 }, { "epoch": 0.1355679702048417, "grad_norm": 1.1640625, "learning_rate": 1.7931417507668012e-06, "logits/chosen": -0.37109375, "logits/rejected": -1.9296875, "logps/chosen": -0.07958984375, "logps/rejected": -1.3515625, "loss": 0.0044, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3984375, "rewards/margins": 6.375, "rewards/rejected": -6.75, "step": 364 }, { "epoch": 0.13594040968342644, "grad_norm": 24.625, "learning_rate": 1.7929967951820735e-06, "logits/chosen": -0.39453125, "logits/rejected": -1.4453125, "logps/chosen": -0.3984375, "logps/rejected": -0.9765625, "loss": 0.125, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0, "rewards/margins": 2.875, "rewards/rejected": -4.875, "step": 365 }, { "epoch": 0.13631284916201117, "grad_norm": 89.5, "learning_rate": 1.7928503296711332e-06, "logits/chosen": -0.05029296875, "logits/rejected": -1.859375, "logps/chosen": -0.58203125, "logps/rejected": -1.2734375, "loss": 0.2578, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.921875, "rewards/margins": 3.453125, "rewards/rejected": -6.375, "step": 366 }, { "epoch": 0.1366852886405959, "grad_norm": 2.1875, "learning_rate": 1.7927023544816321e-06, "logits/chosen": -0.25390625, "logits/rejected": -1.7578125, "logps/chosen": -0.216796875, "logps/rejected": -1.421875, "loss": 0.0072, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0859375, "rewards/margins": 6.0, "rewards/rejected": -7.09375, "step": 367 }, { "epoch": 0.13705772811918063, "grad_norm": 6.125, "learning_rate": 1.7925528698637746e-06, "logits/chosen": -0.2021484375, "logits/rejected": -1.8046875, "logps/chosen": -0.220703125, "logps/rejected": -1.125, "loss": 0.0262, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015625, "rewards/margins": 4.53125, "rewards/rejected": -5.625, "step": 368 }, { "epoch": 0.13743016759776536, "grad_norm": 5.84375, "learning_rate": 1.7924018760703169e-06, "logits/chosen": -0.2890625, "logits/rejected": -1.890625, "logps/chosen": -0.5078125, "logps/rejected": -1.625, "loss": 0.0114, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.53125, "rewards/margins": 5.625, "rewards/rejected": -8.125, "step": 369 }, { "epoch": 0.1378026070763501, "grad_norm": 14.1875, "learning_rate": 1.7922493733565674e-06, "logits/chosen": -0.1962890625, "logits/rejected": -2.03125, "logps/chosen": -0.375, "logps/rejected": -1.2421875, "loss": 0.0361, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.875, "rewards/margins": 4.34375, "rewards/rejected": -6.21875, "step": 370 }, { "epoch": 0.13817504655493482, "grad_norm": 116.0, "learning_rate": 1.7920953619803858e-06, "logits/chosen": -0.2119140625, "logits/rejected": -1.8125, "logps/chosen": -0.609375, "logps/rejected": -1.40625, "loss": 0.2793, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.046875, "rewards/margins": 4.0, "rewards/rejected": -7.0625, "step": 371 }, { "epoch": 0.13854748603351955, "grad_norm": 8.0625, "learning_rate": 1.7919398422021826e-06, "logits/chosen": -0.11376953125, "logits/rejected": -1.953125, "logps/chosen": -0.5, "logps/rejected": -1.515625, "loss": 0.0162, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5, "rewards/margins": 5.09375, "rewards/rejected": -7.5625, "step": 372 }, { "epoch": 0.13891992551210428, "grad_norm": 3.6875, "learning_rate": 1.791782814284919e-06, "logits/chosen": -0.2451171875, "logits/rejected": -1.8515625, "logps/chosen": -0.25, "logps/rejected": -1.2734375, "loss": 0.0135, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 5.125, "rewards/rejected": -6.375, "step": 373 }, { "epoch": 0.139292364990689, "grad_norm": 51.75, "learning_rate": 1.7916242784941064e-06, "logits/chosen": -0.478515625, "logits/rejected": -1.7265625, "logps/chosen": -0.421875, "logps/rejected": -1.03125, "loss": 0.2256, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.109375, "rewards/margins": 3.046875, "rewards/rejected": -5.15625, "step": 374 }, { "epoch": 0.13966480446927373, "grad_norm": 5.8125, "learning_rate": 1.7914642350978049e-06, "logits/chosen": -0.390625, "logits/rejected": -1.65625, "logps/chosen": -0.337890625, "logps/rejected": -1.25, "loss": 0.0234, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875, "rewards/margins": 4.5625, "rewards/rejected": -6.25, "step": 375 }, { "epoch": 0.14003724394785846, "grad_norm": 39.75, "learning_rate": 1.7913026843666243e-06, "logits/chosen": -0.3203125, "logits/rejected": -1.640625, "logps/chosen": -0.4609375, "logps/rejected": -1.40625, "loss": 0.1016, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3125, "rewards/margins": 4.71875, "rewards/rejected": -7.03125, "step": 376 }, { "epoch": 0.1404096834264432, "grad_norm": 3.09375, "learning_rate": 1.7911396265737234e-06, "logits/chosen": -0.26953125, "logits/rejected": -1.6171875, "logps/chosen": -0.208984375, "logps/rejected": -1.265625, "loss": 0.0117, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 5.3125, "rewards/rejected": -6.375, "step": 377 }, { "epoch": 0.14078212290502792, "grad_norm": 36.25, "learning_rate": 1.7909750619948092e-06, "logits/chosen": -0.30078125, "logits/rejected": -1.59375, "logps/chosen": -0.45703125, "logps/rejected": -1.25, "loss": 0.1074, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.296875, "rewards/margins": 3.953125, "rewards/rejected": -6.25, "step": 378 }, { "epoch": 0.14115456238361265, "grad_norm": 17.875, "learning_rate": 1.790808990908136e-06, "logits/chosen": -0.134765625, "logits/rejected": -1.6171875, "logps/chosen": -0.640625, "logps/rejected": -1.390625, "loss": 0.0535, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.21875, "rewards/margins": 3.703125, "rewards/rejected": -6.9375, "step": 379 }, { "epoch": 0.14152700186219738, "grad_norm": 3.28125, "learning_rate": 1.7906414135945056e-06, "logits/chosen": -0.25, "logits/rejected": -1.9921875, "logps/chosen": -0.10546875, "logps/rejected": -1.1015625, "loss": 0.0157, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.52734375, "rewards/margins": 5.0, "rewards/rejected": -5.5, "step": 380 }, { "epoch": 0.1418994413407821, "grad_norm": 4.28125, "learning_rate": 1.7904723303372661e-06, "logits/chosen": -0.1015625, "logits/rejected": -1.875, "logps/chosen": -0.1640625, "logps/rejected": -1.359375, "loss": 0.0126, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8203125, "rewards/margins": 5.96875, "rewards/rejected": -6.8125, "step": 381 }, { "epoch": 0.14227188081936684, "grad_norm": 3.703125, "learning_rate": 1.7903017414223132e-06, "logits/chosen": -0.244140625, "logits/rejected": -1.5546875, "logps/chosen": -0.51953125, "logps/rejected": -1.609375, "loss": 0.0096, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.59375, "rewards/margins": 5.4375, "rewards/rejected": -8.0, "step": 382 }, { "epoch": 0.14264432029795157, "grad_norm": 7.28125, "learning_rate": 1.7901296471380872e-06, "logits/chosen": -0.1630859375, "logits/rejected": -1.71875, "logps/chosen": -0.29296875, "logps/rejected": -1.421875, "loss": 0.0265, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.46875, "rewards/margins": 5.625, "rewards/rejected": -7.09375, "step": 383 }, { "epoch": 0.1430167597765363, "grad_norm": 82.0, "learning_rate": 1.789956047775574e-06, "logits/chosen": -0.19921875, "logits/rejected": -1.7734375, "logps/chosen": -1.046875, "logps/rejected": -1.5703125, "loss": 0.2197, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.25, "rewards/margins": 2.609375, "rewards/rejected": -7.84375, "step": 384 }, { "epoch": 0.14338919925512103, "grad_norm": 0.578125, "learning_rate": 1.7897809436283051e-06, "logits/chosen": -0.08837890625, "logits/rejected": -1.5390625, "logps/chosen": -0.39453125, "logps/rejected": -1.84375, "loss": 0.0016, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.96875, "rewards/margins": 7.25, "rewards/rejected": -9.25, "step": 385 }, { "epoch": 0.14376163873370576, "grad_norm": 1.03125, "learning_rate": 1.7896043349923557e-06, "logits/chosen": -0.0712890625, "logits/rejected": -1.5859375, "logps/chosen": -0.2451171875, "logps/rejected": -1.546875, "loss": 0.0034, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2265625, "rewards/margins": 6.5, "rewards/rejected": -7.6875, "step": 386 }, { "epoch": 0.1441340782122905, "grad_norm": 15.625, "learning_rate": 1.7894262221663448e-06, "logits/chosen": -0.08349609375, "logits/rejected": -1.640625, "logps/chosen": -0.75, "logps/rejected": -1.671875, "loss": 0.0352, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.75, "rewards/margins": 4.625, "rewards/rejected": -8.375, "step": 387 }, { "epoch": 0.14450651769087522, "grad_norm": 156.0, "learning_rate": 1.7892466054514352e-06, "logits/chosen": -0.5078125, "logits/rejected": -1.734375, "logps/chosen": -0.5546875, "logps/rejected": -1.3359375, "loss": 0.6914, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.765625, "rewards/margins": 3.921875, "rewards/rejected": -6.6875, "step": 388 }, { "epoch": 0.14487895716945998, "grad_norm": 204.0, "learning_rate": 1.7890654851513324e-06, "logits/chosen": -0.28125, "logits/rejected": -0.328125, "logps/chosen": -0.6171875, "logps/rejected": -0.5703125, "loss": 1.3594, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.09375, "rewards/margins": -0.2421875, "rewards/rejected": -2.84375, "step": 389 }, { "epoch": 0.1452513966480447, "grad_norm": 4.53125, "learning_rate": 1.7888828615722837e-06, "logits/chosen": -0.14453125, "logits/rejected": -1.7578125, "logps/chosen": -0.4921875, "logps/rejected": -1.703125, "loss": 0.0084, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 6.0625, "rewards/rejected": -8.5, "step": 390 }, { "epoch": 0.14562383612662944, "grad_norm": 82.0, "learning_rate": 1.7886987350230794e-06, "logits/chosen": -0.345703125, "logits/rejected": -1.3984375, "logps/chosen": -0.53125, "logps/rejected": -0.953125, "loss": 0.3867, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.65625, "rewards/margins": 2.109375, "rewards/rejected": -4.75, "step": 391 }, { "epoch": 0.14599627560521417, "grad_norm": 5.625, "learning_rate": 1.7885131058150502e-06, "logits/chosen": -0.373046875, "logits/rejected": -1.2265625, "logps/chosen": -0.173828125, "logps/rejected": -1.203125, "loss": 0.0195, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8671875, "rewards/margins": 5.125, "rewards/rejected": -6.0, "step": 392 }, { "epoch": 0.1463687150837989, "grad_norm": 0.166015625, "learning_rate": 1.7883259742620672e-06, "logits/chosen": -0.09228515625, "logits/rejected": -1.546875, "logps/chosen": -0.197265625, "logps/rejected": -1.9296875, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.984375, "rewards/margins": 8.625, "rewards/rejected": -9.625, "step": 393 }, { "epoch": 0.14674115456238362, "grad_norm": 19.0, "learning_rate": 1.7881373406805431e-06, "logits/chosen": -0.09033203125, "logits/rejected": -1.6171875, "logps/chosen": -0.427734375, "logps/rejected": -1.6171875, "loss": 0.0447, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.140625, "rewards/margins": 5.9375, "rewards/rejected": -8.0625, "step": 394 }, { "epoch": 0.14711359404096835, "grad_norm": 0.3515625, "learning_rate": 1.7879472053894292e-06, "logits/chosen": -0.0625, "logits/rejected": -1.421875, "logps/chosen": -0.25390625, "logps/rejected": -1.8515625, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 8.0, "rewards/rejected": -9.25, "step": 395 }, { "epoch": 0.14748603351955308, "grad_norm": 0.1953125, "learning_rate": 1.7877555687102164e-06, "logits/chosen": -0.06787109375, "logits/rejected": -1.359375, "logps/chosen": -0.24609375, "logps/rejected": -1.921875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.234375, "rewards/margins": 8.375, "rewards/rejected": -9.625, "step": 396 }, { "epoch": 0.1478584729981378, "grad_norm": 0.2578125, "learning_rate": 1.7875624309669342e-06, "logits/chosen": -0.05322265625, "logits/rejected": -1.671875, "logps/chosen": -0.20703125, "logps/rejected": -1.875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.03125, "rewards/margins": 8.375, "rewards/rejected": -9.375, "step": 397 }, { "epoch": 0.14823091247672254, "grad_norm": 37.75, "learning_rate": 1.7873677924861494e-06, "logits/chosen": -0.3125, "logits/rejected": -1.046875, "logps/chosen": -0.3828125, "logps/rejected": -1.390625, "loss": 0.1245, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9140625, "rewards/margins": 5.0, "rewards/rejected": -6.9375, "step": 398 }, { "epoch": 0.14860335195530727, "grad_norm": 0.126953125, "learning_rate": 1.787171653596968e-06, "logits/chosen": -0.024169921875, "logits/rejected": -1.4765625, "logps/chosen": -0.16015625, "logps/rejected": -1.953125, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046875, "rewards/margins": 9.0, "rewards/rejected": -9.75, "step": 399 }, { "epoch": 0.148975791433892, "grad_norm": 4.5, "learning_rate": 1.7869740146310317e-06, "logits/chosen": -0.154296875, "logits/rejected": -1.3046875, "logps/chosen": -0.296875, "logps/rejected": -1.65625, "loss": 0.0127, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 6.8125, "rewards/rejected": -8.3125, "step": 400 }, { "epoch": 0.14934823091247673, "grad_norm": 106.5, "learning_rate": 1.7867748759225186e-06, "logits/chosen": -0.341796875, "logits/rejected": -0.6796875, "logps/chosen": -0.26953125, "logps/rejected": -0.62890625, "loss": 0.6367, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -1.3515625, "rewards/margins": 1.78125, "rewards/rejected": -3.140625, "step": 401 }, { "epoch": 0.14972067039106146, "grad_norm": 11.6875, "learning_rate": 1.7865742378081436e-06, "logits/chosen": -0.39453125, "logits/rejected": -1.6484375, "logps/chosen": -0.1953125, "logps/rejected": -1.0703125, "loss": 0.0437, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.97265625, "rewards/margins": 4.375, "rewards/rejected": -5.34375, "step": 402 }, { "epoch": 0.1500931098696462, "grad_norm": 0.65234375, "learning_rate": 1.7863721006271557e-06, "logits/chosen": 0.0498046875, "logits/rejected": -1.515625, "logps/chosen": -0.421875, "logps/rejected": -1.953125, "loss": 0.0015, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.109375, "rewards/margins": 7.625, "rewards/rejected": -9.75, "step": 403 }, { "epoch": 0.15046554934823092, "grad_norm": 0.193359375, "learning_rate": 1.786168464721339e-06, "logits/chosen": -0.2001953125, "logits/rejected": -1.5546875, "logps/chosen": -0.1005859375, "logps/rejected": -1.8125, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5, "rewards/margins": 8.5625, "rewards/rejected": -9.0625, "step": 404 }, { "epoch": 0.15083798882681565, "grad_norm": 57.5, "learning_rate": 1.7859633304350123e-06, "logits/chosen": 0.0045166015625, "logits/rejected": -1.890625, "logps/chosen": -0.66015625, "logps/rejected": -1.609375, "loss": 0.127, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3125, "rewards/margins": 4.75, "rewards/rejected": -8.0, "step": 405 }, { "epoch": 0.15121042830540038, "grad_norm": 41.5, "learning_rate": 1.785756698115027e-06, "logits/chosen": 0.08154296875, "logits/rejected": -1.2578125, "logps/chosen": -0.66796875, "logps/rejected": -1.515625, "loss": 0.0791, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.34375, "rewards/margins": 4.21875, "rewards/rejected": -7.5625, "step": 406 }, { "epoch": 0.1515828677839851, "grad_norm": 4.96875, "learning_rate": 1.7855485681107687e-06, "logits/chosen": -0.333984375, "logits/rejected": -1.28125, "logps/chosen": -0.388671875, "logps/rejected": -1.703125, "loss": 0.0101, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9453125, "rewards/margins": 6.5625, "rewards/rejected": -8.5, "step": 407 }, { "epoch": 0.15195530726256984, "grad_norm": 7.75, "learning_rate": 1.7853389407741543e-06, "logits/chosen": 0.068359375, "logits/rejected": -1.3671875, "logps/chosen": -0.66015625, "logps/rejected": -2.015625, "loss": 0.009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 6.8125, "rewards/rejected": -10.125, "step": 408 }, { "epoch": 0.15232774674115457, "grad_norm": 6.375, "learning_rate": 1.7851278164596322e-06, "logits/chosen": -0.130859375, "logits/rejected": -1.609375, "logps/chosen": -0.29296875, "logps/rejected": -1.515625, "loss": 0.0205, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.46875, "rewards/margins": 6.0625, "rewards/rejected": -7.5625, "step": 409 }, { "epoch": 0.1527001862197393, "grad_norm": 104.5, "learning_rate": 1.7849151955241836e-06, "logits/chosen": -0.248046875, "logits/rejected": -1.6328125, "logps/chosen": -0.875, "logps/rejected": -1.6015625, "loss": 0.2422, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.375, "rewards/margins": 3.65625, "rewards/rejected": -8.0, "step": 410 }, { "epoch": 0.15307262569832403, "grad_norm": 1.75, "learning_rate": 1.7847010783273184e-06, "logits/chosen": -0.0194091796875, "logits/rejected": -1.4921875, "logps/chosen": -0.51171875, "logps/rejected": -1.96875, "loss": 0.0031, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5625, "rewards/margins": 7.25, "rewards/rejected": -9.8125, "step": 411 }, { "epoch": 0.15344506517690876, "grad_norm": 15.875, "learning_rate": 1.7844854652310777e-06, "logits/chosen": -0.08056640625, "logits/rejected": -1.3671875, "logps/chosen": -0.6796875, "logps/rejected": -1.7109375, "loss": 0.0322, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.40625, "rewards/margins": 5.125, "rewards/rejected": -8.5, "step": 412 }, { "epoch": 0.1538175046554935, "grad_norm": 6.75, "learning_rate": 1.7842683566000314e-06, "logits/chosen": -0.267578125, "logits/rejected": -0.87890625, "logps/chosen": -0.2265625, "logps/rejected": -1.5703125, "loss": 0.0183, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1328125, "rewards/margins": 6.6875, "rewards/rejected": -7.84375, "step": 413 }, { "epoch": 0.15418994413407822, "grad_norm": 0.84375, "learning_rate": 1.7840497528012778e-06, "logits/chosen": -0.0859375, "logits/rejected": -1.2734375, "logps/chosen": -0.5859375, "logps/rejected": -1.96875, "loss": 0.0022, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9375, "rewards/margins": 6.90625, "rewards/rejected": -9.875, "step": 414 }, { "epoch": 0.15456238361266295, "grad_norm": 0.443359375, "learning_rate": 1.7838296542044447e-06, "logits/chosen": -0.1953125, "logits/rejected": -1.484375, "logps/chosen": -0.296875, "logps/rejected": -1.84375, "loss": 0.0012, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 7.75, "rewards/rejected": -9.25, "step": 415 }, { "epoch": 0.15493482309124768, "grad_norm": 0.412109375, "learning_rate": 1.7836080611816857e-06, "logits/chosen": -0.11279296875, "logits/rejected": -1.5078125, "logps/chosen": -0.09814453125, "logps/rejected": -1.75, "loss": 0.0011, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4921875, "rewards/margins": 8.25, "rewards/rejected": -8.75, "step": 416 }, { "epoch": 0.1553072625698324, "grad_norm": 10.5, "learning_rate": 1.7833849741076825e-06, "logits/chosen": -0.27734375, "logits/rejected": -1.328125, "logps/chosen": -0.8984375, "logps/rejected": -1.984375, "loss": 0.0184, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.5, "rewards/margins": 5.4375, "rewards/rejected": -9.9375, "step": 417 }, { "epoch": 0.15567970204841713, "grad_norm": 1.265625, "learning_rate": 1.783160393359642e-06, "logits/chosen": 0.08740234375, "logits/rejected": -1.484375, "logps/chosen": -0.271484375, "logps/rejected": -1.8046875, "loss": 0.0026, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.359375, "rewards/margins": 7.625, "rewards/rejected": -9.0, "step": 418 }, { "epoch": 0.15605214152700186, "grad_norm": 0.5859375, "learning_rate": 1.782934319317298e-06, "logits/chosen": -0.059326171875, "logits/rejected": -1.4609375, "logps/chosen": -0.0791015625, "logps/rejected": -1.578125, "loss": 0.0018, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39453125, "rewards/margins": 7.5, "rewards/rejected": -7.875, "step": 419 }, { "epoch": 0.1564245810055866, "grad_norm": 8.0625, "learning_rate": 1.7827067523629073e-06, "logits/chosen": -0.1416015625, "logits/rejected": -1.2578125, "logps/chosen": -0.734375, "logps/rejected": -2.09375, "loss": 0.0139, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.671875, "rewards/margins": 6.8125, "rewards/rejected": -10.5, "step": 420 }, { "epoch": 0.15679702048417132, "grad_norm": 224.0, "learning_rate": 1.782477692881253e-06, "logits/chosen": -0.08203125, "logits/rejected": -0.890625, "logps/chosen": -0.5703125, "logps/rejected": -1.3125, "loss": 1.0391, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.859375, "rewards/margins": 3.734375, "rewards/rejected": -6.5625, "step": 421 }, { "epoch": 0.15716945996275605, "grad_norm": 0.435546875, "learning_rate": 1.7822471412596409e-06, "logits/chosen": -0.146484375, "logits/rejected": -0.99609375, "logps/chosen": -0.365234375, "logps/rejected": -1.9375, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.828125, "rewards/margins": 7.875, "rewards/rejected": -9.75, "step": 422 }, { "epoch": 0.15754189944134078, "grad_norm": 2.03125, "learning_rate": 1.7820150978878998e-06, "logits/chosen": 0.02294921875, "logits/rejected": -1.1015625, "logps/chosen": -0.3671875, "logps/rejected": -1.953125, "loss": 0.0035, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.828125, "rewards/margins": 7.96875, "rewards/rejected": -9.8125, "step": 423 }, { "epoch": 0.1579143389199255, "grad_norm": 0.83984375, "learning_rate": 1.7817815631583805e-06, "logits/chosen": -0.1943359375, "logits/rejected": -1.0625, "logps/chosen": -0.3203125, "logps/rejected": -1.8125, "loss": 0.002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6015625, "rewards/margins": 7.46875, "rewards/rejected": -9.0625, "step": 424 }, { "epoch": 0.15828677839851024, "grad_norm": 0.78125, "learning_rate": 1.7815465374659564e-06, "logits/chosen": -0.03173828125, "logits/rejected": -1.328125, "logps/chosen": -0.50390625, "logps/rejected": -2.03125, "loss": 0.0014, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.53125, "rewards/margins": 7.65625, "rewards/rejected": -10.1875, "step": 425 }, { "epoch": 0.15865921787709497, "grad_norm": 1.5546875, "learning_rate": 1.7813100212080214e-06, "logits/chosen": -0.05322265625, "logits/rejected": -1.3125, "logps/chosen": -0.298828125, "logps/rejected": -1.9375, "loss": 0.003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5, "rewards/margins": 8.125, "rewards/rejected": -9.625, "step": 426 }, { "epoch": 0.1590316573556797, "grad_norm": 7.78125, "learning_rate": 1.7810720147844892e-06, "logits/chosen": -0.0050048828125, "logits/rejected": -1.1328125, "logps/chosen": -0.78515625, "logps/rejected": -1.921875, "loss": 0.0114, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9375, "rewards/margins": 5.65625, "rewards/rejected": -9.625, "step": 427 }, { "epoch": 0.15940409683426443, "grad_norm": 0.06396484375, "learning_rate": 1.780832518597794e-06, "logits/chosen": -0.022705078125, "logits/rejected": -0.88671875, "logps/chosen": -0.19140625, "logps/rejected": -2.34375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9609375, "rewards/margins": 10.8125, "rewards/rejected": -11.75, "step": 428 }, { "epoch": 0.15977653631284916, "grad_norm": 288.0, "learning_rate": 1.7805915330528888e-06, "logits/chosen": -0.2275390625, "logits/rejected": -1.3515625, "logps/chosen": -1.8046875, "logps/rejected": -2.015625, "loss": 2.2969, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -9.0, "rewards/margins": 1.03125, "rewards/rejected": -10.0625, "step": 429 }, { "epoch": 0.1601489757914339, "grad_norm": 1.8671875, "learning_rate": 1.7803490585572441e-06, "logits/chosen": -0.076171875, "logits/rejected": -1.0703125, "logps/chosen": -0.302734375, "logps/rejected": -1.734375, "loss": 0.0042, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 7.1875, "rewards/rejected": -8.6875, "step": 430 }, { "epoch": 0.16052141527001862, "grad_norm": 0.44140625, "learning_rate": 1.7801050955208493e-06, "logits/chosen": -0.345703125, "logits/rejected": -1.1640625, "logps/chosen": -0.486328125, "logps/rejected": -2.34375, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 9.25, "rewards/rejected": -11.6875, "step": 431 }, { "epoch": 0.16089385474860335, "grad_norm": 0.1416015625, "learning_rate": 1.7798596443562094e-06, "logits/chosen": -0.287109375, "logits/rejected": -1.03125, "logps/chosen": -0.361328125, "logps/rejected": -2.1875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8046875, "rewards/margins": 9.125, "rewards/rejected": -10.9375, "step": 432 }, { "epoch": 0.16126629422718808, "grad_norm": 0.318359375, "learning_rate": 1.7796127054783467e-06, "logits/chosen": -0.0185546875, "logits/rejected": -1.25, "logps/chosen": -0.31640625, "logps/rejected": -1.96875, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.578125, "rewards/margins": 8.25, "rewards/rejected": -9.8125, "step": 433 }, { "epoch": 0.1616387337057728, "grad_norm": 0.1455078125, "learning_rate": 1.7793642793047982e-06, "logits/chosen": -0.1416015625, "logits/rejected": -1.015625, "logps/chosen": -0.1826171875, "logps/rejected": -2.015625, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9140625, "rewards/margins": 9.125, "rewards/rejected": -10.0625, "step": 434 }, { "epoch": 0.16201117318435754, "grad_norm": 1.0625, "learning_rate": 1.779114366255616e-06, "logits/chosen": -0.125, "logits/rejected": -1.1015625, "logps/chosen": -0.51953125, "logps/rejected": -2.1875, "loss": 0.0015, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.59375, "rewards/margins": 8.375, "rewards/rejected": -11.0, "step": 435 }, { "epoch": 0.16238361266294227, "grad_norm": 0.1650390625, "learning_rate": 1.7788629667533667e-06, "logits/chosen": 0.022216796875, "logits/rejected": -1.0234375, "logps/chosen": -0.279296875, "logps/rejected": -2.296875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.390625, "rewards/margins": 10.0625, "rewards/rejected": -11.5, "step": 436 }, { "epoch": 0.162756052141527, "grad_norm": 18.5, "learning_rate": 1.7786100812231295e-06, "logits/chosen": -0.103515625, "logits/rejected": -1.4296875, "logps/chosen": -0.4375, "logps/rejected": -1.625, "loss": 0.0374, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1875, "rewards/margins": 5.90625, "rewards/rejected": -8.125, "step": 437 }, { "epoch": 0.16312849162011173, "grad_norm": 3.453125, "learning_rate": 1.7783557100924967e-06, "logits/chosen": -0.251953125, "logits/rejected": -1.203125, "logps/chosen": -0.65625, "logps/rejected": -2.0, "loss": 0.0051, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 6.75, "rewards/rejected": -10.0, "step": 438 }, { "epoch": 0.16350093109869646, "grad_norm": 5.1875, "learning_rate": 1.7780998537915722e-06, "logits/chosen": -0.11767578125, "logits/rejected": -0.73828125, "logps/chosen": -0.328125, "logps/rejected": -1.859375, "loss": 0.0106, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 7.6875, "rewards/rejected": -9.3125, "step": 439 }, { "epoch": 0.16387337057728119, "grad_norm": 0.162109375, "learning_rate": 1.7778425127529718e-06, "logits/chosen": -0.1083984375, "logits/rejected": -1.0078125, "logps/chosen": -0.439453125, "logps/rejected": -2.5, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.203125, "rewards/margins": 10.25, "rewards/rejected": -12.5, "step": 440 }, { "epoch": 0.16424581005586592, "grad_norm": 225.0, "learning_rate": 1.7775836874118214e-06, "logits/chosen": -0.158203125, "logits/rejected": -1.0, "logps/chosen": -1.21875, "logps/rejected": -2.3125, "loss": 0.5586, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.09375, "rewards/margins": 5.46875, "rewards/rejected": -11.5625, "step": 441 }, { "epoch": 0.16461824953445064, "grad_norm": 0.0223388671875, "learning_rate": 1.7773233782057564e-06, "logits/chosen": 0.058837890625, "logits/rejected": -0.92578125, "logps/chosen": -0.19140625, "logps/rejected": -2.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9609375, "rewards/margins": 10.9375, "rewards/rejected": -11.875, "step": 442 }, { "epoch": 0.16499068901303537, "grad_norm": 8.9375, "learning_rate": 1.7770615855749208e-06, "logits/chosen": -0.40234375, "logits/rejected": -1.4375, "logps/chosen": -0.35546875, "logps/rejected": -1.515625, "loss": 0.0188, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7734375, "rewards/margins": 5.8125, "rewards/rejected": -7.5625, "step": 443 }, { "epoch": 0.1653631284916201, "grad_norm": 7.59375, "learning_rate": 1.7767983099619687e-06, "logits/chosen": -0.36328125, "logits/rejected": -0.60546875, "logps/chosen": -0.279296875, "logps/rejected": -1.234375, "loss": 0.0194, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3984375, "rewards/margins": 4.78125, "rewards/rejected": -6.1875, "step": 444 }, { "epoch": 0.16573556797020483, "grad_norm": 0.251953125, "learning_rate": 1.7765335518120593e-06, "logits/chosen": -0.059326171875, "logits/rejected": -1.1015625, "logps/chosen": -0.107421875, "logps/rejected": -1.8515625, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.53515625, "rewards/margins": 8.75, "rewards/rejected": -9.25, "step": 445 }, { "epoch": 0.16610800744878956, "grad_norm": 0.5234375, "learning_rate": 1.7762673115728598e-06, "logits/chosen": -0.140625, "logits/rejected": -1.1796875, "logps/chosen": -0.25390625, "logps/rejected": -1.890625, "loss": 0.0012, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 8.1875, "rewards/rejected": -9.5, "step": 446 }, { "epoch": 0.1664804469273743, "grad_norm": 240.0, "learning_rate": 1.775999589694544e-06, "logits/chosen": -0.1025390625, "logits/rejected": -1.0625, "logps/chosen": -0.5546875, "logps/rejected": -1.8125, "loss": 1.0391, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.765625, "rewards/margins": 6.3125, "rewards/rejected": -9.0625, "step": 447 }, { "epoch": 0.16685288640595902, "grad_norm": 0.01153564453125, "learning_rate": 1.7757303866297896e-06, "logits/chosen": -0.2138671875, "logits/rejected": -1.03125, "logps/chosen": -0.09375, "logps/rejected": -2.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.466796875, "rewards/margins": 11.25, "rewards/rejected": -11.75, "step": 448 }, { "epoch": 0.16722532588454375, "grad_norm": 0.50390625, "learning_rate": 1.7754597028337796e-06, "logits/chosen": -0.06591796875, "logits/rejected": -0.921875, "logps/chosen": -0.291015625, "logps/rejected": -2.21875, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.453125, "rewards/margins": 9.625, "rewards/rejected": -11.125, "step": 449 }, { "epoch": 0.16759776536312848, "grad_norm": 0.10595703125, "learning_rate": 1.7751875387642004e-06, "logits/chosen": -0.05419921875, "logits/rejected": -0.390625, "logps/chosen": -0.35546875, "logps/rejected": -2.3125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.78125, "rewards/margins": 9.75, "rewards/rejected": -11.5, "step": 450 }, { "epoch": 0.1679702048417132, "grad_norm": 0.03955078125, "learning_rate": 1.7749138948812414e-06, "logits/chosen": 0.150390625, "logits/rejected": -0.6796875, "logps/chosen": -0.23828125, "logps/rejected": -2.265625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1953125, "rewards/margins": 10.125, "rewards/rejected": -11.375, "step": 451 }, { "epoch": 0.16834264432029794, "grad_norm": 1.7890625, "learning_rate": 1.7746387716475942e-06, "logits/chosen": -0.166015625, "logits/rejected": -1.1015625, "logps/chosen": -0.7265625, "logps/rejected": -2.34375, "loss": 0.0027, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.625, "rewards/margins": 8.0625, "rewards/rejected": -11.6875, "step": 452 }, { "epoch": 0.16871508379888267, "grad_norm": 0.048583984375, "learning_rate": 1.7743621695284518e-06, "logits/chosen": -0.1875, "logits/rejected": -0.8125, "logps/chosen": -0.30078125, "logps/rejected": -2.296875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5, "rewards/margins": 10.0, "rewards/rejected": -11.5, "step": 453 }, { "epoch": 0.1690875232774674, "grad_norm": 5.25, "learning_rate": 1.7740840889915073e-06, "logits/chosen": -0.05712890625, "logits/rejected": -0.87890625, "logps/chosen": -0.7421875, "logps/rejected": -2.09375, "loss": 0.0116, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6875, "rewards/margins": 6.78125, "rewards/rejected": -10.5, "step": 454 }, { "epoch": 0.16945996275605213, "grad_norm": 0.5703125, "learning_rate": 1.7738045305069545e-06, "logits/chosen": -0.1328125, "logits/rejected": -1.2265625, "logps/chosen": -0.0703125, "logps/rejected": -1.890625, "loss": 0.0015, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3515625, "rewards/margins": 9.0625, "rewards/rejected": -9.4375, "step": 455 }, { "epoch": 0.16983240223463686, "grad_norm": 0.0042724609375, "learning_rate": 1.7735234945474854e-06, "logits/chosen": -0.025390625, "logits/rejected": -0.828125, "logps/chosen": -0.109375, "logps/rejected": -2.578125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.546875, "rewards/margins": 12.375, "rewards/rejected": -12.875, "step": 456 }, { "epoch": 0.1702048417132216, "grad_norm": 1.390625, "learning_rate": 1.7732409815882906e-06, "logits/chosen": -0.197265625, "logits/rejected": -0.828125, "logps/chosen": -0.671875, "logps/rejected": -2.4375, "loss": 0.0018, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.34375, "rewards/margins": 8.8125, "rewards/rejected": -12.125, "step": 457 }, { "epoch": 0.17057728119180632, "grad_norm": 13.0625, "learning_rate": 1.7729569921070576e-06, "logits/chosen": -0.08642578125, "logits/rejected": -0.9609375, "logps/chosen": -0.302734375, "logps/rejected": -1.578125, "loss": 0.0184, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 6.375, "rewards/rejected": -7.875, "step": 458 }, { "epoch": 0.17094972067039105, "grad_norm": 0.1826171875, "learning_rate": 1.7726715265839714e-06, "logits/chosen": -0.185546875, "logits/rejected": -1.1796875, "logps/chosen": -0.32421875, "logps/rejected": -2.078125, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.625, "rewards/margins": 8.75, "rewards/rejected": -10.375, "step": 459 }, { "epoch": 0.1713221601489758, "grad_norm": 0.5390625, "learning_rate": 1.7723845855017116e-06, "logits/chosen": -0.423828125, "logits/rejected": -0.1279296875, "logps/chosen": -0.75, "logps/rejected": -2.78125, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.765625, "rewards/margins": 10.25, "rewards/rejected": -14.0, "step": 460 }, { "epoch": 0.17169459962756053, "grad_norm": 338.0, "learning_rate": 1.7720961693454538e-06, "logits/chosen": -0.08935546875, "logits/rejected": -0.5078125, "logps/chosen": -0.953125, "logps/rejected": -1.28125, "loss": 2.7812, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.78125, "rewards/margins": 1.609375, "rewards/rejected": -6.375, "step": 461 }, { "epoch": 0.17206703910614526, "grad_norm": 0.0035247802734375, "learning_rate": 1.7718062786028673e-06, "logits/chosen": 0.04541015625, "logits/rejected": -0.9375, "logps/chosen": -0.1611328125, "logps/rejected": -2.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046875, "rewards/margins": 12.75, "rewards/rejected": -13.5, "step": 462 }, { "epoch": 0.17243947858473, "grad_norm": 11.0625, "learning_rate": 1.7715149137641142e-06, "logits/chosen": -0.1640625, "logits/rejected": -1.328125, "logps/chosen": -0.423828125, "logps/rejected": -1.3984375, "loss": 0.0254, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 4.875, "rewards/rejected": -7.0, "step": 463 }, { "epoch": 0.17281191806331472, "grad_norm": 2.890625, "learning_rate": 1.7712220753218503e-06, "logits/chosen": -0.00165557861328125, "logits/rejected": -0.84765625, "logps/chosen": -0.3359375, "logps/rejected": -1.828125, "loss": 0.0067, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6796875, "rewards/margins": 7.5, "rewards/rejected": -9.125, "step": 464 }, { "epoch": 0.17318435754189945, "grad_norm": 0.015625, "learning_rate": 1.7709277637712217e-06, "logits/chosen": -0.0167236328125, "logits/rejected": -0.8125, "logps/chosen": -0.2294921875, "logps/rejected": -2.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1484375, "rewards/margins": 11.8125, "rewards/rejected": -12.9375, "step": 465 }, { "epoch": 0.17355679702048418, "grad_norm": 0.05224609375, "learning_rate": 1.7706319796098662e-06, "logits/chosen": 0.11962890625, "logits/rejected": -0.80859375, "logps/chosen": -0.40625, "logps/rejected": -2.5625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 10.8125, "rewards/rejected": -12.8125, "step": 466 }, { "epoch": 0.1739292364990689, "grad_norm": 0.00885009765625, "learning_rate": 1.7703347233379114e-06, "logits/chosen": -0.1044921875, "logits/rejected": -0.58984375, "logps/chosen": -0.3203125, "logps/rejected": -2.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 11.875, "rewards/rejected": -13.5, "step": 467 }, { "epoch": 0.17430167597765364, "grad_norm": 2.234375, "learning_rate": 1.770035995457974e-06, "logits/chosen": -0.06640625, "logits/rejected": -0.4609375, "logps/chosen": -0.34375, "logps/rejected": -1.84375, "loss": 0.0036, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.71875, "rewards/margins": 7.5, "rewards/rejected": -9.25, "step": 468 }, { "epoch": 0.17467411545623837, "grad_norm": 0.0031585693359375, "learning_rate": 1.7697357964751584e-06, "logits/chosen": -0.0703125, "logits/rejected": -0.62109375, "logps/chosen": -0.16796875, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.83984375, "rewards/margins": 12.9375, "rewards/rejected": -13.75, "step": 469 }, { "epoch": 0.1750465549348231, "grad_norm": 1.359375, "learning_rate": 1.769434126897057e-06, "logits/chosen": -0.2216796875, "logits/rejected": -1.3046875, "logps/chosen": -0.109375, "logps/rejected": -1.859375, "loss": 0.0042, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.546875, "rewards/margins": 8.75, "rewards/rejected": -9.3125, "step": 470 }, { "epoch": 0.17541899441340783, "grad_norm": 0.310546875, "learning_rate": 1.7691309872337495e-06, "logits/chosen": -0.054443359375, "logits/rejected": -1.0234375, "logps/chosen": -0.21484375, "logps/rejected": -2.421875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.078125, "rewards/margins": 11.0, "rewards/rejected": -12.125, "step": 471 }, { "epoch": 0.17579143389199256, "grad_norm": 0.04345703125, "learning_rate": 1.7688263779977997e-06, "logits/chosen": -0.431640625, "logits/rejected": -1.1953125, "logps/chosen": -0.32421875, "logps/rejected": -2.359375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.625, "rewards/margins": 10.1875, "rewards/rejected": -11.8125, "step": 472 }, { "epoch": 0.1761638733705773, "grad_norm": 0.031494140625, "learning_rate": 1.7685202997042572e-06, "logits/chosen": -0.058837890625, "logits/rejected": -0.76171875, "logps/chosen": -0.3046875, "logps/rejected": -2.59375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.53125, "rewards/margins": 11.5, "rewards/rejected": -13.0, "step": 473 }, { "epoch": 0.17653631284916202, "grad_norm": 0.119140625, "learning_rate": 1.7682127528706554e-06, "logits/chosen": -0.431640625, "logits/rejected": -0.859375, "logps/chosen": -0.44921875, "logps/rejected": -2.5, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 10.25, "rewards/rejected": -12.5, "step": 474 }, { "epoch": 0.17690875232774675, "grad_norm": 36.75, "learning_rate": 1.7679037380170106e-06, "logits/chosen": -0.365234375, "logits/rejected": -0.41796875, "logps/chosen": -0.609375, "logps/rejected": -1.46875, "loss": 0.166, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.046875, "rewards/margins": 4.3125, "rewards/rejected": -7.375, "step": 475 }, { "epoch": 0.17728119180633148, "grad_norm": 0.01214599609375, "learning_rate": 1.7675932556658215e-06, "logits/chosen": -0.1396484375, "logits/rejected": -0.8125, "logps/chosen": -0.30859375, "logps/rejected": -2.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.546875, "rewards/margins": 11.375, "rewards/rejected": -12.875, "step": 476 }, { "epoch": 0.1776536312849162, "grad_norm": 2.34375, "learning_rate": 1.7672813063420683e-06, "logits/chosen": -0.37890625, "logits/rejected": -1.046875, "logps/chosen": -0.71875, "logps/rejected": -2.375, "loss": 0.0026, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.609375, "rewards/margins": 8.25, "rewards/rejected": -11.875, "step": 477 }, { "epoch": 0.17802607076350094, "grad_norm": 0.004730224609375, "learning_rate": 1.7669678905732113e-06, "logits/chosen": -0.048095703125, "logits/rejected": -0.7109375, "logps/chosen": -0.1689453125, "logps/rejected": -2.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.84375, "rewards/margins": 12.75, "rewards/rejected": -13.5625, "step": 478 }, { "epoch": 0.17839851024208567, "grad_norm": 0.84765625, "learning_rate": 1.7666530088891902e-06, "logits/chosen": -0.2578125, "logits/rejected": -0.87109375, "logps/chosen": -0.11767578125, "logps/rejected": -1.6875, "loss": 0.0014, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.58984375, "rewards/margins": 7.875, "rewards/rejected": -8.5, "step": 479 }, { "epoch": 0.1787709497206704, "grad_norm": 0.2001953125, "learning_rate": 1.7663366618224241e-06, "logits/chosen": -0.01263427734375, "logits/rejected": -0.80859375, "logps/chosen": -0.48828125, "logps/rejected": -2.34375, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 9.25, "rewards/rejected": -11.75, "step": 480 }, { "epoch": 0.17914338919925513, "grad_norm": 0.0245361328125, "learning_rate": 1.7660188499078088e-06, "logits/chosen": 0.10791015625, "logits/rejected": -0.875, "logps/chosen": -0.1787109375, "logps/rejected": -2.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.89453125, "rewards/margins": 10.875, "rewards/rejected": -11.75, "step": 481 }, { "epoch": 0.17951582867783986, "grad_norm": 8.8125, "learning_rate": 1.7656995736827182e-06, "logits/chosen": -0.28125, "logits/rejected": -0.81640625, "logps/chosen": -0.3984375, "logps/rejected": -2.125, "loss": 0.0187, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0, "rewards/margins": 8.625, "rewards/rejected": -10.625, "step": 482 }, { "epoch": 0.17988826815642459, "grad_norm": 45.0, "learning_rate": 1.7653788336870007e-06, "logits/chosen": -0.2451171875, "logits/rejected": -0.98828125, "logps/chosen": -0.75390625, "logps/rejected": -2.0625, "loss": 0.0723, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.78125, "rewards/margins": 6.5625, "rewards/rejected": -10.375, "step": 483 }, { "epoch": 0.18026070763500932, "grad_norm": 0.5078125, "learning_rate": 1.765056630462981e-06, "logits/chosen": -0.07421875, "logits/rejected": -0.68359375, "logps/chosen": -0.34375, "logps/rejected": -2.28125, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.71875, "rewards/margins": 9.6875, "rewards/rejected": -11.4375, "step": 484 }, { "epoch": 0.18063314711359404, "grad_norm": 0.8046875, "learning_rate": 1.764732964555457e-06, "logits/chosen": 0.1669921875, "logits/rejected": -1.328125, "logps/chosen": -0.380859375, "logps/rejected": -1.953125, "loss": 0.0019, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8984375, "rewards/margins": 7.875, "rewards/rejected": -9.75, "step": 485 }, { "epoch": 0.18100558659217877, "grad_norm": 2.78125, "learning_rate": 1.7644078365117e-06, "logits/chosen": -0.1728515625, "logits/rejected": -0.84765625, "logps/chosen": -0.57421875, "logps/rejected": -2.234375, "loss": 0.0049, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 8.3125, "rewards/rejected": -11.125, "step": 486 }, { "epoch": 0.1813780260707635, "grad_norm": 0.271484375, "learning_rate": 1.7640812468814546e-06, "logits/chosen": -0.0067138671875, "logits/rejected": -1.0625, "logps/chosen": -0.12109375, "logps/rejected": -2.171875, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.60546875, "rewards/margins": 10.25, "rewards/rejected": -10.875, "step": 487 }, { "epoch": 0.18175046554934823, "grad_norm": 0.1865234375, "learning_rate": 1.7637531962169347e-06, "logits/chosen": -0.08447265625, "logits/rejected": -0.31640625, "logps/chosen": -0.19140625, "logps/rejected": -2.25, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.95703125, "rewards/margins": 10.3125, "rewards/rejected": -11.25, "step": 488 }, { "epoch": 0.18212290502793296, "grad_norm": 0.007598876953125, "learning_rate": 1.7634236850728262e-06, "logits/chosen": 0.0194091796875, "logits/rejected": -0.6484375, "logps/chosen": -0.427734375, "logps/rejected": -2.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 11.875, "rewards/rejected": -14.0, "step": 489 }, { "epoch": 0.1824953445065177, "grad_norm": 0.263671875, "learning_rate": 1.7630927140062839e-06, "logits/chosen": -0.265625, "logits/rejected": -0.97265625, "logps/chosen": -0.515625, "logps/rejected": -2.375, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.578125, "rewards/margins": 9.25, "rewards/rejected": -11.875, "step": 490 }, { "epoch": 0.18286778398510242, "grad_norm": 0.4296875, "learning_rate": 1.7627602835769312e-06, "logits/chosen": -0.341796875, "logits/rejected": -0.9296875, "logps/chosen": -0.359375, "logps/rejected": -2.0625, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8046875, "rewards/margins": 8.5, "rewards/rejected": -10.3125, "step": 491 }, { "epoch": 0.18324022346368715, "grad_norm": 2.828125, "learning_rate": 1.7624263943468592e-06, "logits/chosen": -0.08154296875, "logits/rejected": -0.9921875, "logps/chosen": -0.244140625, "logps/rejected": -1.90625, "loss": 0.0067, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.21875, "rewards/margins": 8.375, "rewards/rejected": -9.5625, "step": 492 }, { "epoch": 0.18361266294227188, "grad_norm": 0.28125, "learning_rate": 1.7620910468806254e-06, "logits/chosen": -0.072265625, "logits/rejected": -0.90625, "logps/chosen": -0.62890625, "logps/rejected": -2.6875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 10.375, "rewards/rejected": -13.5, "step": 493 }, { "epoch": 0.1839851024208566, "grad_norm": 18.25, "learning_rate": 1.761754241745253e-06, "logits/chosen": -0.1904296875, "logits/rejected": -0.79296875, "logps/chosen": -0.3359375, "logps/rejected": -1.796875, "loss": 0.0452, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875, "rewards/margins": 7.3125, "rewards/rejected": -9.0, "step": 494 }, { "epoch": 0.18435754189944134, "grad_norm": 30.625, "learning_rate": 1.7614159795102298e-06, "logits/chosen": 0.07080078125, "logits/rejected": -0.7734375, "logps/chosen": -0.73046875, "logps/rejected": -2.25, "loss": 0.0486, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.65625, "rewards/margins": 7.5625, "rewards/rejected": -11.25, "step": 495 }, { "epoch": 0.18472998137802607, "grad_norm": 0.0045166015625, "learning_rate": 1.7610762607475077e-06, "logits/chosen": 0.03125, "logits/rejected": -0.6328125, "logps/chosen": -0.125, "logps/rejected": -2.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.625, "rewards/margins": 12.5, "rewards/rejected": -13.125, "step": 496 }, { "epoch": 0.1851024208566108, "grad_norm": 0.001495361328125, "learning_rate": 1.760735086031501e-06, "logits/chosen": 0.1416015625, "logits/rejected": -0.6640625, "logps/chosen": -0.0927734375, "logps/rejected": -2.859375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.46484375, "rewards/margins": 13.875, "rewards/rejected": -14.3125, "step": 497 }, { "epoch": 0.18547486033519553, "grad_norm": 0.30859375, "learning_rate": 1.760392455939086e-06, "logits/chosen": -0.037841796875, "logits/rejected": -0.2890625, "logps/chosen": -0.37109375, "logps/rejected": -2.234375, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8515625, "rewards/margins": 9.3125, "rewards/rejected": -11.125, "step": 498 }, { "epoch": 0.18584729981378026, "grad_norm": 0.007537841796875, "learning_rate": 1.7600483710495998e-06, "logits/chosen": 0.11083984375, "logits/rejected": -0.640625, "logps/chosen": -0.373046875, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.859375, "rewards/margins": 12.375, "rewards/rejected": -14.25, "step": 499 }, { "epoch": 0.186219739292365, "grad_norm": 0.1396484375, "learning_rate": 1.7597028319448395e-06, "logits/chosen": -0.003387451171875, "logits/rejected": -0.71484375, "logps/chosen": -0.392578125, "logps/rejected": -2.65625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9609375, "rewards/margins": 11.375, "rewards/rejected": -13.3125, "step": 500 }, { "epoch": 0.18659217877094972, "grad_norm": 0.005340576171875, "learning_rate": 1.7593558392090605e-06, "logits/chosen": 0.046875, "logits/rejected": -0.671875, "logps/chosen": -0.111328125, "logps/rejected": -2.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5546875, "rewards/margins": 12.875, "rewards/rejected": -13.4375, "step": 501 }, { "epoch": 0.18696461824953445, "grad_norm": 1.515625, "learning_rate": 1.7590073934289765e-06, "logits/chosen": 0.07568359375, "logits/rejected": -1.2109375, "logps/chosen": -0.138671875, "logps/rejected": -2.265625, "loss": 0.0037, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6953125, "rewards/margins": 10.625, "rewards/rejected": -11.375, "step": 502 }, { "epoch": 0.18733705772811918, "grad_norm": 8.875, "learning_rate": 1.7586574951937586e-06, "logits/chosen": -0.1142578125, "logits/rejected": -1.234375, "logps/chosen": -0.68359375, "logps/rejected": -2.25, "loss": 0.0146, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.40625, "rewards/margins": 7.84375, "rewards/rejected": -11.25, "step": 503 }, { "epoch": 0.1877094972067039, "grad_norm": 0.08154296875, "learning_rate": 1.758306145095033e-06, "logits/chosen": 0.0703125, "logits/rejected": -0.671875, "logps/chosen": -0.96875, "logps/rejected": -2.890625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.84375, "rewards/margins": 9.625, "rewards/rejected": -14.4375, "step": 504 }, { "epoch": 0.18808193668528864, "grad_norm": 0.01470947265625, "learning_rate": 1.7579533437268805e-06, "logits/chosen": -0.00131988525390625, "logits/rejected": -0.80859375, "logps/chosen": -0.2109375, "logps/rejected": -2.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0546875, "rewards/margins": 11.3125, "rewards/rejected": -12.375, "step": 505 }, { "epoch": 0.18845437616387337, "grad_norm": 0.4375, "learning_rate": 1.7575990916858368e-06, "logits/chosen": -0.1318359375, "logits/rejected": -1.15625, "logps/chosen": -0.19140625, "logps/rejected": -1.8125, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.95703125, "rewards/margins": 8.125, "rewards/rejected": -9.0625, "step": 506 }, { "epoch": 0.1888268156424581, "grad_norm": 33.0, "learning_rate": 1.75724338957089e-06, "logits/chosen": -0.23828125, "logits/rejected": -0.65234375, "logps/chosen": -0.45703125, "logps/rejected": -1.765625, "loss": 0.0564, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.28125, "rewards/margins": 6.53125, "rewards/rejected": -8.8125, "step": 507 }, { "epoch": 0.18919925512104283, "grad_norm": 0.0068359375, "learning_rate": 1.7568862379834802e-06, "logits/chosen": 0.00421142578125, "logits/rejected": -0.546875, "logps/chosen": -0.197265625, "logps/rejected": -2.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.984375, "rewards/margins": 12.875, "rewards/rejected": -13.875, "step": 508 }, { "epoch": 0.18957169459962755, "grad_norm": 6.09375, "learning_rate": 1.7565276375274976e-06, "logits/chosen": -0.2158203125, "logits/rejected": -0.474609375, "logps/chosen": -0.734375, "logps/rejected": -1.96875, "loss": 0.0139, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.65625, "rewards/margins": 6.15625, "rewards/rejected": -9.8125, "step": 509 }, { "epoch": 0.18994413407821228, "grad_norm": 19.0, "learning_rate": 1.7561675888092834e-06, "logits/chosen": -0.236328125, "logits/rejected": -1.1484375, "logps/chosen": -0.7109375, "logps/rejected": -1.9609375, "loss": 0.0371, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.546875, "rewards/margins": 6.28125, "rewards/rejected": -9.8125, "step": 510 }, { "epoch": 0.19031657355679701, "grad_norm": 0.08935546875, "learning_rate": 1.7558060924376266e-06, "logits/chosen": -0.14453125, "logits/rejected": -0.71484375, "logps/chosen": -0.56640625, "logps/rejected": -2.671875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.828125, "rewards/margins": 10.5625, "rewards/rejected": -13.375, "step": 511 }, { "epoch": 0.19068901303538174, "grad_norm": 0.404296875, "learning_rate": 1.7554431490237647e-06, "logits/chosen": -0.1533203125, "logits/rejected": -1.1328125, "logps/chosen": -0.2421875, "logps/rejected": -2.15625, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.203125, "rewards/margins": 9.5625, "rewards/rejected": -10.75, "step": 512 }, { "epoch": 0.19106145251396647, "grad_norm": 200.0, "learning_rate": 1.7550787591813816e-06, "logits/chosen": -0.50390625, "logits/rejected": -0.71484375, "logps/chosen": -0.5625, "logps/rejected": -1.2421875, "loss": 1.1328, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.8125, "rewards/margins": 3.375, "rewards/rejected": -6.21875, "step": 513 }, { "epoch": 0.1914338919925512, "grad_norm": 0.14453125, "learning_rate": 1.7547129235266066e-06, "logits/chosen": -0.212890625, "logits/rejected": -0.52734375, "logps/chosen": -0.8671875, "logps/rejected": -2.78125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.34375, "rewards/margins": 9.5, "rewards/rejected": -13.875, "step": 514 }, { "epoch": 0.19180633147113593, "grad_norm": 0.047607421875, "learning_rate": 1.7543456426780144e-06, "logits/chosen": 0.00244140625, "logits/rejected": -0.546875, "logps/chosen": -0.31640625, "logps/rejected": -2.71875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.578125, "rewards/margins": 12.0625, "rewards/rejected": -13.625, "step": 515 }, { "epoch": 0.19217877094972066, "grad_norm": 22.875, "learning_rate": 1.7539769172566225e-06, "logits/chosen": -0.06689453125, "logits/rejected": -0.703125, "logps/chosen": -1.296875, "logps/rejected": -2.65625, "loss": 0.0297, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.46875, "rewards/margins": 6.8125, "rewards/rejected": -13.25, "step": 516 }, { "epoch": 0.1925512104283054, "grad_norm": 0.0198974609375, "learning_rate": 1.7536067478858917e-06, "logits/chosen": -0.00946044921875, "logits/rejected": -0.38671875, "logps/chosen": -0.291015625, "logps/rejected": -2.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.453125, "rewards/margins": 10.875, "rewards/rejected": -12.375, "step": 517 }, { "epoch": 0.19292364990689012, "grad_norm": 0.3125, "learning_rate": 1.7532351351917234e-06, "logits/chosen": -0.11474609375, "logits/rejected": -0.3046875, "logps/chosen": -0.34765625, "logps/rejected": -2.078125, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.734375, "rewards/margins": 8.625, "rewards/rejected": -10.375, "step": 518 }, { "epoch": 0.19329608938547485, "grad_norm": 0.012451171875, "learning_rate": 1.7528620798024608e-06, "logits/chosen": 0.06640625, "logits/rejected": -0.48046875, "logps/chosen": -0.248046875, "logps/rejected": -2.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2421875, "rewards/margins": 12.75, "rewards/rejected": -13.9375, "step": 519 }, { "epoch": 0.19366852886405958, "grad_norm": 0.94921875, "learning_rate": 1.752487582348885e-06, "logits/chosen": -0.36328125, "logits/rejected": -1.265625, "logps/chosen": -0.16796875, "logps/rejected": -1.703125, "loss": 0.002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.84375, "rewards/margins": 7.6875, "rewards/rejected": -8.5, "step": 520 }, { "epoch": 0.1940409683426443, "grad_norm": 0.77734375, "learning_rate": 1.7521116434642165e-06, "logits/chosen": -0.08251953125, "logits/rejected": -0.734375, "logps/chosen": -0.9140625, "logps/rejected": -2.71875, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.5625, "rewards/margins": 9.0, "rewards/rejected": -13.5625, "step": 521 }, { "epoch": 0.19441340782122904, "grad_norm": 10.5, "learning_rate": 1.7517342637841124e-06, "logits/chosen": -0.12890625, "logits/rejected": -1.1484375, "logps/chosen": -0.44140625, "logps/rejected": -1.9375, "loss": 0.0222, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.203125, "rewards/margins": 7.5, "rewards/rejected": -9.75, "step": 522 }, { "epoch": 0.19478584729981377, "grad_norm": 0.01007080078125, "learning_rate": 1.7513554439466664e-06, "logits/chosen": -0.032958984375, "logits/rejected": -0.31640625, "logps/chosen": -0.2255859375, "logps/rejected": -2.578125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.125, "rewards/margins": 11.75, "rewards/rejected": -12.875, "step": 523 }, { "epoch": 0.1951582867783985, "grad_norm": 1.109375, "learning_rate": 1.750975184592407e-06, "logits/chosen": -0.138671875, "logits/rejected": -0.7734375, "logps/chosen": -0.6015625, "logps/rejected": -1.96875, "loss": 0.0024, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0, "rewards/margins": 6.84375, "rewards/rejected": -9.875, "step": 524 }, { "epoch": 0.19553072625698323, "grad_norm": 8.625, "learning_rate": 1.750593486364297e-06, "logits/chosen": -0.23828125, "logits/rejected": -0.1943359375, "logps/chosen": -1.0234375, "logps/rejected": -1.9921875, "loss": 0.0173, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.125, "rewards/margins": 4.8125, "rewards/rejected": -9.9375, "step": 525 }, { "epoch": 0.19590316573556796, "grad_norm": 95.5, "learning_rate": 1.7502103499077315e-06, "logits/chosen": -0.1533203125, "logits/rejected": -0.447265625, "logps/chosen": -1.3984375, "logps/rejected": -1.765625, "loss": 0.3535, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0, "rewards/margins": 1.8203125, "rewards/rejected": -8.8125, "step": 526 }, { "epoch": 0.1962756052141527, "grad_norm": 0.001983642578125, "learning_rate": 1.7498257758705383e-06, "logits/chosen": 0.05029296875, "logits/rejected": -0.443359375, "logps/chosen": -0.12451171875, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.625, "rewards/margins": 13.625, "rewards/rejected": -14.25, "step": 527 }, { "epoch": 0.19664804469273742, "grad_norm": 5.4375, "learning_rate": 1.7494397649029756e-06, "logits/chosen": -0.10400390625, "logits/rejected": -0.53515625, "logps/chosen": -0.7578125, "logps/rejected": -2.40625, "loss": 0.0088, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.796875, "rewards/margins": 8.25, "rewards/rejected": -12.0, "step": 528 }, { "epoch": 0.19702048417132215, "grad_norm": 0.005859375, "learning_rate": 1.7490523176577309e-06, "logits/chosen": -0.23046875, "logits/rejected": -0.65234375, "logps/chosen": -0.1064453125, "logps/rejected": -2.515625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.53125, "rewards/margins": 12.0, "rewards/rejected": -12.5625, "step": 529 }, { "epoch": 0.1973929236499069, "grad_norm": 11.8125, "learning_rate": 1.7486634347899207e-06, "logits/chosen": -0.158203125, "logits/rejected": -0.640625, "logps/chosen": -0.34765625, "logps/rejected": -1.203125, "loss": 0.0337, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.734375, "rewards/margins": 4.3125, "rewards/rejected": -6.03125, "step": 530 }, { "epoch": 0.19776536312849163, "grad_norm": 0.034912109375, "learning_rate": 1.7482731169570883e-06, "logits/chosen": 0.04248046875, "logits/rejected": -0.65625, "logps/chosen": -0.435546875, "logps/rejected": -2.578125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.171875, "rewards/margins": 10.75, "rewards/rejected": -12.875, "step": 531 }, { "epoch": 0.19813780260707636, "grad_norm": 26.25, "learning_rate": 1.7478813648192043e-06, "logits/chosen": 0.076171875, "logits/rejected": -0.5234375, "logps/chosen": -1.1953125, "logps/rejected": -2.5625, "loss": 0.031, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0, "rewards/margins": 6.875, "rewards/rejected": -12.875, "step": 532 }, { "epoch": 0.1985102420856611, "grad_norm": 0.0081787109375, "learning_rate": 1.7474881790386635e-06, "logits/chosen": 0.060302734375, "logits/rejected": -0.48046875, "logps/chosen": -0.2373046875, "logps/rejected": -2.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1875, "rewards/margins": 12.0625, "rewards/rejected": -13.25, "step": 533 }, { "epoch": 0.19888268156424582, "grad_norm": 6.65625, "learning_rate": 1.7470935602802856e-06, "logits/chosen": -0.03857421875, "logits/rejected": -0.361328125, "logps/chosen": -0.373046875, "logps/rejected": -1.6796875, "loss": 0.0121, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8671875, "rewards/margins": 6.53125, "rewards/rejected": -8.375, "step": 534 }, { "epoch": 0.19925512104283055, "grad_norm": 0.003326416015625, "learning_rate": 1.7466975092113126e-06, "logits/chosen": -0.11865234375, "logits/rejected": -0.59375, "logps/chosen": -0.47265625, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.359375, "rewards/margins": 13.125, "rewards/rejected": -15.5, "step": 535 }, { "epoch": 0.19962756052141528, "grad_norm": 0.023681640625, "learning_rate": 1.7463000265014086e-06, "logits/chosen": -0.00653076171875, "logits/rejected": -0.64453125, "logps/chosen": -0.193359375, "logps/rejected": -2.359375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.96484375, "rewards/margins": 10.875, "rewards/rejected": -11.8125, "step": 536 }, { "epoch": 0.2, "grad_norm": 0.26171875, "learning_rate": 1.7459011128226584e-06, "logits/chosen": -0.1826171875, "logits/rejected": -0.494140625, "logps/chosen": -0.5234375, "logps/rejected": -2.25, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 8.625, "rewards/rejected": -11.25, "step": 537 }, { "epoch": 0.20037243947858474, "grad_norm": 12.5625, "learning_rate": 1.7455007688495665e-06, "logits/chosen": -0.15234375, "logits/rejected": -0.9609375, "logps/chosen": -0.7265625, "logps/rejected": -1.921875, "loss": 0.0299, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.640625, "rewards/margins": 6.0, "rewards/rejected": -9.625, "step": 538 }, { "epoch": 0.20074487895716947, "grad_norm": 2.59375, "learning_rate": 1.7450989952590556e-06, "logits/chosen": -0.08984375, "logits/rejected": -0.609375, "logps/chosen": -0.8984375, "logps/rejected": -2.796875, "loss": 0.0036, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.5, "rewards/margins": 9.5, "rewards/rejected": -14.0, "step": 539 }, { "epoch": 0.2011173184357542, "grad_norm": 0.306640625, "learning_rate": 1.7446957927304655e-06, "logits/chosen": -0.11767578125, "logits/rejected": -0.54296875, "logps/chosen": -0.54296875, "logps/rejected": -2.578125, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.703125, "rewards/margins": 10.1875, "rewards/rejected": -12.875, "step": 540 }, { "epoch": 0.20148975791433893, "grad_norm": 11.3125, "learning_rate": 1.7442911619455528e-06, "logits/chosen": -0.1923828125, "logits/rejected": -0.75, "logps/chosen": -0.3671875, "logps/rejected": -1.3828125, "loss": 0.0255, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.828125, "rewards/margins": 5.09375, "rewards/rejected": -6.9375, "step": 541 }, { "epoch": 0.20186219739292366, "grad_norm": 0.166015625, "learning_rate": 1.7438851035884882e-06, "logits/chosen": -0.263671875, "logits/rejected": -0.66796875, "logps/chosen": -0.37890625, "logps/rejected": -2.21875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 9.1875, "rewards/rejected": -11.0625, "step": 542 }, { "epoch": 0.2022346368715084, "grad_norm": 0.024658203125, "learning_rate": 1.7434776183458567e-06, "logits/chosen": 0.00909423828125, "logits/rejected": -0.3984375, "logps/chosen": -0.359375, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 11.875, "rewards/rejected": -13.75, "step": 543 }, { "epoch": 0.20260707635009312, "grad_norm": 23.75, "learning_rate": 1.7430687069066563e-06, "logits/chosen": -0.298828125, "logits/rejected": -0.470703125, "logps/chosen": -0.359375, "logps/rejected": -1.90625, "loss": 0.0547, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 7.75, "rewards/rejected": -9.5625, "step": 544 }, { "epoch": 0.20297951582867785, "grad_norm": 0.8046875, "learning_rate": 1.7426583699622956e-06, "logits/chosen": -0.08447265625, "logits/rejected": -1.0078125, "logps/chosen": -0.5703125, "logps/rejected": -2.140625, "loss": 0.0014, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 7.875, "rewards/rejected": -10.75, "step": 545 }, { "epoch": 0.20335195530726258, "grad_norm": 1.28125, "learning_rate": 1.7422466082065944e-06, "logits/chosen": -0.00274658203125, "logits/rejected": -0.84765625, "logps/chosen": -0.373046875, "logps/rejected": -2.21875, "loss": 0.0025, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.859375, "rewards/margins": 9.1875, "rewards/rejected": -11.0625, "step": 546 }, { "epoch": 0.2037243947858473, "grad_norm": 0.06982421875, "learning_rate": 1.7418334223357811e-06, "logits/chosen": -0.035400390625, "logits/rejected": -0.90625, "logps/chosen": -0.212890625, "logps/rejected": -2.5, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0625, "rewards/margins": 11.5, "rewards/rejected": -12.5, "step": 547 }, { "epoch": 0.20409683426443204, "grad_norm": 252.0, "learning_rate": 1.741418813048492e-06, "logits/chosen": -0.2294921875, "logits/rejected": -0.234375, "logps/chosen": -0.625, "logps/rejected": -1.546875, "loss": 0.5938, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.140625, "rewards/margins": 4.5625, "rewards/rejected": -7.6875, "step": 548 }, { "epoch": 0.20446927374301677, "grad_norm": 0.1279296875, "learning_rate": 1.7410027810457707e-06, "logits/chosen": -0.06640625, "logits/rejected": -0.5703125, "logps/chosen": -0.546875, "logps/rejected": -2.46875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.71875, "rewards/margins": 9.625, "rewards/rejected": -12.3125, "step": 549 }, { "epoch": 0.2048417132216015, "grad_norm": 0.01397705078125, "learning_rate": 1.7405853270310661e-06, "logits/chosen": 0.09765625, "logits/rejected": -0.734375, "logps/chosen": -0.60546875, "logps/rejected": -2.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.03125, "rewards/margins": 11.5, "rewards/rejected": -14.5, "step": 550 }, { "epoch": 0.20521415270018623, "grad_norm": 14.75, "learning_rate": 1.7401664517102317e-06, "logits/chosen": -0.00970458984375, "logits/rejected": -1.078125, "logps/chosen": -0.8828125, "logps/rejected": -2.1875, "loss": 0.0276, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4375, "rewards/margins": 6.46875, "rewards/rejected": -10.875, "step": 551 }, { "epoch": 0.20558659217877095, "grad_norm": 0.003631591796875, "learning_rate": 1.7397461557915234e-06, "logits/chosen": 0.05419921875, "logits/rejected": -0.53515625, "logps/chosen": -0.34765625, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7421875, "rewards/margins": 12.625, "rewards/rejected": -14.375, "step": 552 }, { "epoch": 0.20595903165735568, "grad_norm": 0.0240478515625, "learning_rate": 1.7393244399856004e-06, "logits/chosen": -0.0693359375, "logits/rejected": -0.392578125, "logps/chosen": -0.3046875, "logps/rejected": -2.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5234375, "rewards/margins": 11.75, "rewards/rejected": -13.25, "step": 553 }, { "epoch": 0.20633147113594041, "grad_norm": 247.0, "learning_rate": 1.7389013050055218e-06, "logits/chosen": -0.2578125, "logits/rejected": -0.9609375, "logps/chosen": -1.078125, "logps/rejected": -2.15625, "loss": 0.5352, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.375, "rewards/margins": 5.4375, "rewards/rejected": -10.8125, "step": 554 }, { "epoch": 0.20670391061452514, "grad_norm": 0.68359375, "learning_rate": 1.7384767515667466e-06, "logits/chosen": -0.06591796875, "logits/rejected": -0.0101318359375, "logps/chosen": -0.248046875, "logps/rejected": -2.125, "loss": 0.0015, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.234375, "rewards/margins": 9.375, "rewards/rejected": -10.625, "step": 555 }, { "epoch": 0.20707635009310987, "grad_norm": 0.03564453125, "learning_rate": 1.7380507803871325e-06, "logits/chosen": 0.1337890625, "logits/rejected": -0.314453125, "logps/chosen": -0.3828125, "logps/rejected": -2.609375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9140625, "rewards/margins": 11.125, "rewards/rejected": -13.0625, "step": 556 }, { "epoch": 0.2074487895716946, "grad_norm": 0.00150299072265625, "learning_rate": 1.7376233921869336e-06, "logits/chosen": 0.07958984375, "logits/rejected": -0.546875, "logps/chosen": -0.32421875, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 14.5, "rewards/rejected": -16.125, "step": 557 }, { "epoch": 0.20782122905027933, "grad_norm": 21.125, "learning_rate": 1.7371945876888009e-06, "logits/chosen": -0.21484375, "logits/rejected": -0.78125, "logps/chosen": -0.423828125, "logps/rejected": -1.4765625, "loss": 0.0459, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 5.25, "rewards/rejected": -7.375, "step": 558 }, { "epoch": 0.20819366852886406, "grad_norm": 0.3046875, "learning_rate": 1.7367643676177795e-06, "logits/chosen": -0.375, "logits/rejected": -0.51953125, "logps/chosen": -0.67578125, "logps/rejected": -2.6875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.375, "rewards/margins": 10.125, "rewards/rejected": -13.5, "step": 559 }, { "epoch": 0.2085661080074488, "grad_norm": 0.75, "learning_rate": 1.7363327327013085e-06, "logits/chosen": -0.0458984375, "logits/rejected": -0.2734375, "logps/chosen": -0.455078125, "logps/rejected": -2.578125, "loss": 0.0012, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.28125, "rewards/margins": 10.625, "rewards/rejected": -12.875, "step": 560 }, { "epoch": 0.20893854748603352, "grad_norm": 0.028076171875, "learning_rate": 1.735899683669219e-06, "logits/chosen": -0.0791015625, "logits/rejected": -0.482421875, "logps/chosen": -0.212890625, "logps/rejected": -2.3125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0625, "rewards/margins": 10.5, "rewards/rejected": -11.5625, "step": 561 }, { "epoch": 0.20931098696461825, "grad_norm": 0.263671875, "learning_rate": 1.7354652212537326e-06, "logits/chosen": -0.039794921875, "logits/rejected": -0.88671875, "logps/chosen": -0.1923828125, "logps/rejected": -2.0, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9609375, "rewards/margins": 9.0, "rewards/rejected": -10.0, "step": 562 }, { "epoch": 0.20968342644320298, "grad_norm": 334.0, "learning_rate": 1.7350293461894623e-06, "logits/chosen": -0.197265625, "logits/rejected": -0.62890625, "logps/chosen": -1.109375, "logps/rejected": -0.6640625, "loss": 3.1562, "nll_loss": 0.0, "rewards/accuracies": 0.0, "rewards/chosen": -5.5625, "rewards/margins": -2.21875, "rewards/rejected": -3.34375, "step": 563 }, { "epoch": 0.2100558659217877, "grad_norm": 0.1484375, "learning_rate": 1.7345920592134078e-06, "logits/chosen": 0.0247802734375, "logits/rejected": -0.8125, "logps/chosen": -0.60546875, "logps/rejected": -2.9375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.03125, "rewards/margins": 11.6875, "rewards/rejected": -14.75, "step": 564 }, { "epoch": 0.21042830540037244, "grad_norm": 0.1259765625, "learning_rate": 1.7341533610649576e-06, "logits/chosen": -0.240234375, "logits/rejected": -0.49609375, "logps/chosen": -0.427734375, "logps/rejected": -2.375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.140625, "rewards/margins": 9.75, "rewards/rejected": -11.875, "step": 565 }, { "epoch": 0.21080074487895717, "grad_norm": 0.033935546875, "learning_rate": 1.7337132524858859e-06, "logits/chosen": -0.142578125, "logits/rejected": -0.3359375, "logps/chosen": -0.359375, "logps/rejected": -2.515625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 10.75, "rewards/rejected": -12.5625, "step": 566 }, { "epoch": 0.2111731843575419, "grad_norm": 0.011474609375, "learning_rate": 1.733271734220351e-06, "logits/chosen": -0.050048828125, "logits/rejected": -0.451171875, "logps/chosen": -0.392578125, "logps/rejected": -2.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.96875, "rewards/margins": 11.4375, "rewards/rejected": -13.375, "step": 567 }, { "epoch": 0.21154562383612663, "grad_norm": 0.0037994384765625, "learning_rate": 1.7328288070148957e-06, "logits/chosen": 0.0096435546875, "logits/rejected": -0.421875, "logps/chosen": -0.19921875, "logps/rejected": -2.828125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.99609375, "rewards/margins": 13.125, "rewards/rejected": -14.125, "step": 568 }, { "epoch": 0.21191806331471136, "grad_norm": 0.0037994384765625, "learning_rate": 1.7323844716184444e-06, "logits/chosen": -0.031494140625, "logits/rejected": -0.40234375, "logps/chosen": -0.1884765625, "logps/rejected": -2.828125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9375, "rewards/margins": 13.1875, "rewards/rejected": -14.125, "step": 569 }, { "epoch": 0.2122905027932961, "grad_norm": 2.21875, "learning_rate": 1.7319387287823033e-06, "logits/chosen": -0.033935546875, "logits/rejected": -0.9609375, "logps/chosen": -0.2734375, "logps/rejected": -2.375, "loss": 0.0037, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.359375, "rewards/margins": 10.5625, "rewards/rejected": -11.875, "step": 570 }, { "epoch": 0.21266294227188082, "grad_norm": 0.0115966796875, "learning_rate": 1.731491579260158e-06, "logits/chosen": 0.169921875, "logits/rejected": -0.427734375, "logps/chosen": -0.41015625, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.046875, "rewards/margins": 11.75, "rewards/rejected": -13.8125, "step": 571 }, { "epoch": 0.21303538175046555, "grad_norm": 0.01806640625, "learning_rate": 1.7310430238080721e-06, "logits/chosen": -0.05810546875, "logits/rejected": -0.46484375, "logps/chosen": -0.3359375, "logps/rejected": -2.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875, "rewards/margins": 11.0, "rewards/rejected": -12.625, "step": 572 }, { "epoch": 0.21340782122905028, "grad_norm": 1.25, "learning_rate": 1.7305930631844873e-06, "logits/chosen": -0.051025390625, "logits/rejected": -1.0234375, "logps/chosen": -0.57421875, "logps/rejected": -2.359375, "loss": 0.0019, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 8.9375, "rewards/rejected": -11.75, "step": 573 }, { "epoch": 0.213780260707635, "grad_norm": 0.19921875, "learning_rate": 1.7301416981502207e-06, "logits/chosen": 0.1396484375, "logits/rejected": -0.9921875, "logps/chosen": -0.13671875, "logps/rejected": -2.28125, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6796875, "rewards/margins": 10.75, "rewards/rejected": -11.375, "step": 574 }, { "epoch": 0.21415270018621974, "grad_norm": 0.12158203125, "learning_rate": 1.7296889294684642e-06, "logits/chosen": -0.171875, "logits/rejected": -1.0078125, "logps/chosen": -0.306640625, "logps/rejected": -2.15625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5390625, "rewards/margins": 9.25, "rewards/rejected": -10.75, "step": 575 }, { "epoch": 0.21452513966480447, "grad_norm": 32.25, "learning_rate": 1.7292347579047829e-06, "logits/chosen": -0.2080078125, "logits/rejected": -0.375, "logps/chosen": -0.275390625, "logps/rejected": -1.90625, "loss": 0.0486, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3828125, "rewards/margins": 8.125, "rewards/rejected": -9.5, "step": 576 }, { "epoch": 0.2148975791433892, "grad_norm": 0.09619140625, "learning_rate": 1.728779184227114e-06, "logits/chosen": -0.03466796875, "logits/rejected": -0.5234375, "logps/chosen": -0.28125, "logps/rejected": -2.59375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.40625, "rewards/margins": 11.625, "rewards/rejected": -13.0, "step": 577 }, { "epoch": 0.21527001862197392, "grad_norm": 0.00183868408203125, "learning_rate": 1.728322209205766e-06, "logits/chosen": -0.2060546875, "logits/rejected": -0.42578125, "logps/chosen": -0.396484375, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9765625, "rewards/margins": 13.75, "rewards/rejected": -15.6875, "step": 578 }, { "epoch": 0.21564245810055865, "grad_norm": 0.002288818359375, "learning_rate": 1.727863833613416e-06, "logits/chosen": 0.23046875, "logits/rejected": -0.51953125, "logps/chosen": -0.314453125, "logps/rejected": -3.078125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5703125, "rewards/margins": 13.875, "rewards/rejected": -15.375, "step": 579 }, { "epoch": 0.21601489757914338, "grad_norm": 24.875, "learning_rate": 1.7274040582251099e-06, "logits/chosen": -0.0615234375, "logits/rejected": -0.302734375, "logps/chosen": -0.201171875, "logps/rejected": -1.5625, "loss": 0.0898, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0078125, "rewards/margins": 6.8125, "rewards/rejected": -7.8125, "step": 580 }, { "epoch": 0.2163873370577281, "grad_norm": 0.2314453125, "learning_rate": 1.7269428838182603e-06, "logits/chosen": -0.0595703125, "logits/rejected": -0.474609375, "logps/chosen": -0.330078125, "logps/rejected": -2.171875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 9.25, "rewards/rejected": -10.875, "step": 581 }, { "epoch": 0.21675977653631284, "grad_norm": 0.003082275390625, "learning_rate": 1.7264803111726452e-06, "logits/chosen": -0.0712890625, "logits/rejected": -0.43359375, "logps/chosen": -0.26171875, "logps/rejected": -2.828125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125, "rewards/margins": 12.875, "rewards/rejected": -14.125, "step": 582 }, { "epoch": 0.21713221601489757, "grad_norm": 0.443359375, "learning_rate": 1.726016341070407e-06, "logits/chosen": -0.384765625, "logits/rejected": -0.396484375, "logps/chosen": -0.2060546875, "logps/rejected": -1.71875, "loss": 0.0012, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.03125, "rewards/margins": 7.5625, "rewards/rejected": -8.625, "step": 583 }, { "epoch": 0.2175046554934823, "grad_norm": 1.7734375, "learning_rate": 1.7255509742960504e-06, "logits/chosen": -0.0849609375, "logits/rejected": -0.38671875, "logps/chosen": -0.71875, "logps/rejected": -2.65625, "loss": 0.0024, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.578125, "rewards/margins": 9.6875, "rewards/rejected": -13.25, "step": 584 }, { "epoch": 0.21787709497206703, "grad_norm": 0.005645751953125, "learning_rate": 1.7250842116364432e-06, "logits/chosen": -0.30078125, "logits/rejected": -0.58203125, "logps/chosen": -0.173828125, "logps/rejected": -2.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8671875, "rewards/margins": 12.5, "rewards/rejected": -13.375, "step": 585 }, { "epoch": 0.21824953445065176, "grad_norm": 45.5, "learning_rate": 1.724616053880811e-06, "logits/chosen": -0.1826171875, "logits/rejected": -0.1884765625, "logps/chosen": -0.46484375, "logps/rejected": -2.03125, "loss": 0.0967, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.328125, "rewards/margins": 7.8125, "rewards/rejected": -10.1875, "step": 586 }, { "epoch": 0.2186219739292365, "grad_norm": 0.048828125, "learning_rate": 1.7241465018207408e-06, "logits/chosen": 0.1435546875, "logits/rejected": -0.8359375, "logps/chosen": -0.296875, "logps/rejected": -2.53125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4765625, "rewards/margins": 11.125, "rewards/rejected": -12.625, "step": 587 }, { "epoch": 0.21899441340782122, "grad_norm": 0.1279296875, "learning_rate": 1.7236755562501758e-06, "logits/chosen": -0.05712890625, "logits/rejected": -0.5234375, "logps/chosen": -0.58984375, "logps/rejected": -2.71875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9375, "rewards/margins": 10.625, "rewards/rejected": -13.625, "step": 588 }, { "epoch": 0.21936685288640595, "grad_norm": 0.1630859375, "learning_rate": 1.7232032179654153e-06, "logits/chosen": -0.2734375, "logits/rejected": -0.5703125, "logps/chosen": -0.7421875, "logps/rejected": -2.640625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6875, "rewards/margins": 9.5, "rewards/rejected": -13.25, "step": 589 }, { "epoch": 0.21973929236499068, "grad_norm": 0.00933837890625, "learning_rate": 1.7227294877651144e-06, "logits/chosen": 0.0164794921875, "logits/rejected": -0.5, "logps/chosen": -0.451171875, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 13.25, "rewards/rejected": -15.5625, "step": 590 }, { "epoch": 0.2201117318435754, "grad_norm": 0.0035858154296875, "learning_rate": 1.7222543664502807e-06, "logits/chosen": -0.2158203125, "logits/rejected": -0.47265625, "logps/chosen": -0.11328125, "logps/rejected": -2.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.56640625, "rewards/margins": 13.0, "rewards/rejected": -13.5625, "step": 591 }, { "epoch": 0.22048417132216014, "grad_norm": 0.001556396484375, "learning_rate": 1.7217778548242748e-06, "logits/chosen": -0.017578125, "logits/rejected": -0.40234375, "logps/chosen": -0.13671875, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6875, "rewards/margins": 13.75, "rewards/rejected": -14.4375, "step": 592 }, { "epoch": 0.22085661080074487, "grad_norm": 0.07666015625, "learning_rate": 1.7212999536928074e-06, "logits/chosen": 0.0654296875, "logits/rejected": -0.3984375, "logps/chosen": -0.5703125, "logps/rejected": -2.90625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.859375, "rewards/margins": 11.6875, "rewards/rejected": -14.5625, "step": 593 }, { "epoch": 0.2212290502793296, "grad_norm": 32.75, "learning_rate": 1.7208206638639395e-06, "logits/chosen": -0.0032501220703125, "logits/rejected": -1.015625, "logps/chosen": -0.8515625, "logps/rejected": -2.296875, "loss": 0.064, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.25, "rewards/margins": 7.1875, "rewards/rejected": -11.5, "step": 594 }, { "epoch": 0.22160148975791433, "grad_norm": 0.02685546875, "learning_rate": 1.720339986148079e-06, "logits/chosen": 0.037109375, "logits/rejected": -0.062255859375, "logps/chosen": -0.318359375, "logps/rejected": -2.5, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.59375, "rewards/margins": 11.0, "rewards/rejected": -12.5625, "step": 595 }, { "epoch": 0.22197392923649906, "grad_norm": 0.0103759765625, "learning_rate": 1.719857921357982e-06, "logits/chosen": -0.1142578125, "logits/rejected": -0.2421875, "logps/chosen": -0.353515625, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7578125, "rewards/margins": 12.5, "rewards/rejected": -14.25, "step": 596 }, { "epoch": 0.2223463687150838, "grad_norm": 0.036865234375, "learning_rate": 1.7193744703087485e-06, "logits/chosen": -0.10986328125, "logits/rejected": -0.4921875, "logps/chosen": -0.91796875, "logps/rejected": -3.140625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.59375, "rewards/margins": 11.125, "rewards/rejected": -15.75, "step": 597 }, { "epoch": 0.22271880819366852, "grad_norm": 0.0228271484375, "learning_rate": 1.7188896338178236e-06, "logits/chosen": 0.057373046875, "logits/rejected": -0.58984375, "logps/chosen": -0.6640625, "logps/rejected": -2.984375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3125, "rewards/margins": 11.625, "rewards/rejected": -14.875, "step": 598 }, { "epoch": 0.22309124767225325, "grad_norm": 0.000293731689453125, "learning_rate": 1.718403412704994e-06, "logits/chosen": 0.0400390625, "logits/rejected": -0.5859375, "logps/chosen": -0.16015625, "logps/rejected": -3.234375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.80078125, "rewards/margins": 15.375, "rewards/rejected": -16.25, "step": 599 }, { "epoch": 0.22346368715083798, "grad_norm": 5.90625, "learning_rate": 1.7179158077923883e-06, "logits/chosen": -0.283203125, "logits/rejected": -0.1875, "logps/chosen": -0.8125, "logps/rejected": -2.03125, "loss": 0.01, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0625, "rewards/margins": 6.125, "rewards/rejected": -10.125, "step": 600 }, { "epoch": 0.22383612662942273, "grad_norm": 0.52734375, "learning_rate": 1.7174268199044747e-06, "logits/chosen": -0.2021484375, "logits/rejected": -1.0390625, "logps/chosen": -0.546875, "logps/rejected": -2.09375, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.71875, "rewards/margins": 7.78125, "rewards/rejected": -10.5, "step": 601 }, { "epoch": 0.22420856610800746, "grad_norm": 0.490234375, "learning_rate": 1.7169364498680601e-06, "logits/chosen": 0.042724609375, "logits/rejected": -1.171875, "logps/chosen": -0.2890625, "logps/rejected": -1.984375, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4453125, "rewards/margins": 8.5, "rewards/rejected": -9.9375, "step": 602 }, { "epoch": 0.2245810055865922, "grad_norm": 2.203125, "learning_rate": 1.7164446985122878e-06, "logits/chosen": -0.029052734375, "logits/rejected": -0.8359375, "logps/chosen": -0.2275390625, "logps/rejected": -2.3125, "loss": 0.0041, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.140625, "rewards/margins": 10.375, "rewards/rejected": -11.5625, "step": 603 }, { "epoch": 0.22495344506517692, "grad_norm": 0.10595703125, "learning_rate": 1.7159515666686364e-06, "logits/chosen": -0.333984375, "logits/rejected": -0.419921875, "logps/chosen": -0.7265625, "logps/rejected": -2.671875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.625, "rewards/margins": 9.6875, "rewards/rejected": -13.375, "step": 604 }, { "epoch": 0.22532588454376165, "grad_norm": 0.0023345947265625, "learning_rate": 1.7154570551709203e-06, "logits/chosen": 0.12353515625, "logits/rejected": -0.302734375, "logps/chosen": -0.29296875, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4609375, "rewards/margins": 13.375, "rewards/rejected": -14.8125, "step": 605 }, { "epoch": 0.22569832402234638, "grad_norm": 11.5625, "learning_rate": 1.7149611648552854e-06, "logits/chosen": -0.271484375, "logits/rejected": -1.0, "logps/chosen": -0.248046875, "logps/rejected": -2.359375, "loss": 0.0259, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2421875, "rewards/margins": 10.5625, "rewards/rejected": -11.75, "step": 606 }, { "epoch": 0.2260707635009311, "grad_norm": 0.25390625, "learning_rate": 1.7144638965602087e-06, "logits/chosen": -0.103515625, "logits/rejected": -0.30859375, "logps/chosen": -0.69921875, "logps/rejected": -2.546875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.484375, "rewards/margins": 9.25, "rewards/rejected": -12.75, "step": 607 }, { "epoch": 0.22644320297951584, "grad_norm": 0.12255859375, "learning_rate": 1.7139652511264987e-06, "logits/chosen": -0.1416015625, "logits/rejected": -0.478515625, "logps/chosen": -0.38671875, "logps/rejected": -2.40625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 10.0, "rewards/rejected": -12.0, "step": 608 }, { "epoch": 0.22681564245810057, "grad_norm": 0.462890625, "learning_rate": 1.7134652293972906e-06, "logits/chosen": -0.0576171875, "logits/rejected": -0.88671875, "logps/chosen": -0.173828125, "logps/rejected": -2.359375, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.87109375, "rewards/margins": 10.9375, "rewards/rejected": -11.75, "step": 609 }, { "epoch": 0.2271880819366853, "grad_norm": 18.125, "learning_rate": 1.712963832218048e-06, "logits/chosen": -0.27734375, "logits/rejected": -0.8125, "logps/chosen": -0.44140625, "logps/rejected": -2.0, "loss": 0.0396, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 7.78125, "rewards/rejected": -10.0, "step": 610 }, { "epoch": 0.22756052141527003, "grad_norm": 12.9375, "learning_rate": 1.7124610604365598e-06, "logits/chosen": 0.000759124755859375, "logits/rejected": -0.51953125, "logps/chosen": -0.84765625, "logps/rejected": -2.75, "loss": 0.0144, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.25, "rewards/margins": 9.5, "rewards/rejected": -13.75, "step": 611 }, { "epoch": 0.22793296089385476, "grad_norm": 0.0169677734375, "learning_rate": 1.7119569149029389e-06, "logits/chosen": -0.0888671875, "logits/rejected": -0.55859375, "logps/chosen": -0.58203125, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.90625, "rewards/margins": 11.375, "rewards/rejected": -14.25, "step": 612 }, { "epoch": 0.2283054003724395, "grad_norm": 22.625, "learning_rate": 1.7114513964696212e-06, "logits/chosen": -0.10302734375, "logits/rejected": -0.55078125, "logps/chosen": -0.703125, "logps/rejected": -2.359375, "loss": 0.0254, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5, "rewards/margins": 8.3125, "rewards/rejected": -11.8125, "step": 613 }, { "epoch": 0.22867783985102422, "grad_norm": 10.8125, "learning_rate": 1.7109445059913644e-06, "logits/chosen": -0.1513671875, "logits/rejected": -0.4765625, "logps/chosen": -0.1396484375, "logps/rejected": -1.40625, "loss": 0.0223, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6953125, "rewards/margins": 6.34375, "rewards/rejected": -7.03125, "step": 614 }, { "epoch": 0.22905027932960895, "grad_norm": 0.3046875, "learning_rate": 1.7104362443252456e-06, "logits/chosen": 0.06982421875, "logits/rejected": -0.220703125, "logps/chosen": -0.73828125, "logps/rejected": -2.5, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6875, "rewards/margins": 8.75, "rewards/rejected": -12.4375, "step": 615 }, { "epoch": 0.22942271880819368, "grad_norm": 0.0013580322265625, "learning_rate": 1.7099266123306604e-06, "logits/chosen": 0.060302734375, "logits/rejected": -0.51953125, "logps/chosen": -0.33203125, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 13.625, "rewards/rejected": -15.3125, "step": 616 }, { "epoch": 0.2297951582867784, "grad_norm": 1.96875, "learning_rate": 1.7094156108693213e-06, "logits/chosen": -0.400390625, "logits/rejected": -0.7890625, "logps/chosen": -0.337890625, "logps/rejected": -2.046875, "loss": 0.0047, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875, "rewards/margins": 8.5625, "rewards/rejected": -10.25, "step": 617 }, { "epoch": 0.23016759776536314, "grad_norm": 1.1484375, "learning_rate": 1.708903240805257e-06, "logits/chosen": 0.056396484375, "logits/rejected": -0.408203125, "logps/chosen": -0.423828125, "logps/rejected": -2.046875, "loss": 0.0027, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 8.125, "rewards/rejected": -10.25, "step": 618 }, { "epoch": 0.23054003724394787, "grad_norm": 0.0034942626953125, "learning_rate": 1.7083895030048096e-06, "logits/chosen": -0.08154296875, "logits/rejected": -0.55859375, "logps/chosen": -0.38671875, "logps/rejected": -3.015625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 13.125, "rewards/rejected": -15.0625, "step": 619 }, { "epoch": 0.2309124767225326, "grad_norm": 0.0025482177734375, "learning_rate": 1.7078743983366345e-06, "logits/chosen": 0.06640625, "logits/rejected": -0.1728515625, "logps/chosen": -0.27734375, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.390625, "rewards/margins": 13.5, "rewards/rejected": -14.875, "step": 620 }, { "epoch": 0.23128491620111732, "grad_norm": 1.421875, "learning_rate": 1.7073579276716977e-06, "logits/chosen": -0.1376953125, "logits/rejected": -0.9609375, "logps/chosen": -0.69921875, "logps/rejected": -2.46875, "loss": 0.0023, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5, "rewards/margins": 8.875, "rewards/rejected": -12.375, "step": 621 }, { "epoch": 0.23165735567970205, "grad_norm": 0.00025177001953125, "learning_rate": 1.706840091883275e-06, "logits/chosen": 0.0439453125, "logits/rejected": -0.173828125, "logps/chosen": -0.14453125, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.71875, "rewards/margins": 15.3125, "rewards/rejected": -16.0, "step": 622 }, { "epoch": 0.23202979515828678, "grad_norm": 0.03271484375, "learning_rate": 1.7063208918469503e-06, "logits/chosen": -0.11083984375, "logits/rejected": -0.7890625, "logps/chosen": -0.10400390625, "logps/rejected": -2.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.51953125, "rewards/margins": 12.25, "rewards/rejected": -12.8125, "step": 623 }, { "epoch": 0.2324022346368715, "grad_norm": 0.004180908203125, "learning_rate": 1.7058003284406155e-06, "logits/chosen": 0.004791259765625, "logits/rejected": -0.267578125, "logps/chosen": -0.166015625, "logps/rejected": -2.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8359375, "rewards/margins": 12.625, "rewards/rejected": -13.5, "step": 624 }, { "epoch": 0.23277467411545624, "grad_norm": 0.546875, "learning_rate": 1.7052784025444656e-06, "logits/chosen": -0.059326171875, "logits/rejected": -0.57421875, "logps/chosen": -0.30078125, "logps/rejected": -2.15625, "loss": 0.0012, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5078125, "rewards/margins": 9.25, "rewards/rejected": -10.75, "step": 625 }, { "epoch": 0.23314711359404097, "grad_norm": 0.162109375, "learning_rate": 1.7047551150410014e-06, "logits/chosen": -0.06884765625, "logits/rejected": -0.380859375, "logps/chosen": -0.81640625, "logps/rejected": -3.015625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.09375, "rewards/margins": 11.0, "rewards/rejected": -15.125, "step": 626 }, { "epoch": 0.2335195530726257, "grad_norm": 0.103515625, "learning_rate": 1.7042304668150243e-06, "logits/chosen": -0.115234375, "logits/rejected": -0.39453125, "logps/chosen": -0.318359375, "logps/rejected": -2.6875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.59375, "rewards/margins": 11.875, "rewards/rejected": -13.5, "step": 627 }, { "epoch": 0.23389199255121043, "grad_norm": 0.046875, "learning_rate": 1.7037044587536376e-06, "logits/chosen": 0.003021240234375, "logits/rejected": 0.05859375, "logps/chosen": -0.734375, "logps/rejected": -2.953125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.671875, "rewards/margins": 11.125, "rewards/rejected": -14.75, "step": 628 }, { "epoch": 0.23426443202979516, "grad_norm": 1.453125, "learning_rate": 1.703177091746244e-06, "logits/chosen": -0.359375, "logits/rejected": -0.263671875, "logps/chosen": -0.4921875, "logps/rejected": -2.25, "loss": 0.0036, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.453125, "rewards/margins": 8.8125, "rewards/rejected": -11.25, "step": 629 }, { "epoch": 0.2346368715083799, "grad_norm": 330.0, "learning_rate": 1.7026483666845427e-06, "logits/chosen": -0.005401611328125, "logits/rejected": -0.87890625, "logps/chosen": -1.421875, "logps/rejected": -2.25, "loss": 1.9297, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -7.09375, "rewards/margins": 4.15625, "rewards/rejected": -11.25, "step": 630 }, { "epoch": 0.23500931098696462, "grad_norm": 0.00012111663818359375, "learning_rate": 1.7021182844625308e-06, "logits/chosen": -0.025634765625, "logits/rejected": -0.2431640625, "logps/chosen": -0.123046875, "logps/rejected": -3.390625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6171875, "rewards/margins": 16.25, "rewards/rejected": -17.0, "step": 631 }, { "epoch": 0.23538175046554935, "grad_norm": 0.0010833740234375, "learning_rate": 1.7015868459764987e-06, "logits/chosen": -0.1484375, "logits/rejected": -0.1611328125, "logps/chosen": -0.2158203125, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.078125, "rewards/margins": 13.9375, "rewards/rejected": -15.0, "step": 632 }, { "epoch": 0.23575418994413408, "grad_norm": 11.0625, "learning_rate": 1.701054052125031e-06, "logits/chosen": -0.1640625, "logits/rejected": -0.07080078125, "logps/chosen": -0.32421875, "logps/rejected": -2.03125, "loss": 0.027, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.625, "rewards/margins": 8.5, "rewards/rejected": -10.125, "step": 633 }, { "epoch": 0.2361266294227188, "grad_norm": 0.024658203125, "learning_rate": 1.700519903809004e-06, "logits/chosen": -0.058837890625, "logits/rejected": -0.185546875, "logps/chosen": -0.248046875, "logps/rejected": -2.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2421875, "rewards/margins": 11.6875, "rewards/rejected": -12.9375, "step": 634 }, { "epoch": 0.23649906890130354, "grad_norm": 0.134765625, "learning_rate": 1.6999844019315834e-06, "logits/chosen": -0.031982421875, "logits/rejected": -0.310546875, "logps/chosen": -0.671875, "logps/rejected": -2.71875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.359375, "rewards/margins": 10.1875, "rewards/rejected": -13.5625, "step": 635 }, { "epoch": 0.23687150837988827, "grad_norm": 0.00124359130859375, "learning_rate": 1.6994475473982245e-06, "logits/chosen": 0.01104736328125, "logits/rejected": -0.2265625, "logps/chosen": -0.181640625, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.90625, "rewards/margins": 13.875, "rewards/rejected": -14.8125, "step": 636 }, { "epoch": 0.237243947858473, "grad_norm": 0.80078125, "learning_rate": 1.6989093411166689e-06, "logits/chosen": 0.0341796875, "logits/rejected": 0.33984375, "logps/chosen": -0.5390625, "logps/rejected": -2.109375, "loss": 0.0013, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.703125, "rewards/margins": 7.875, "rewards/rejected": -10.5625, "step": 637 }, { "epoch": 0.23761638733705773, "grad_norm": 0.048095703125, "learning_rate": 1.6983697839969445e-06, "logits/chosen": -0.1435546875, "logits/rejected": -0.251953125, "logps/chosen": -0.43359375, "logps/rejected": -2.78125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 11.6875, "rewards/rejected": -13.875, "step": 638 }, { "epoch": 0.23798882681564246, "grad_norm": 24.5, "learning_rate": 1.6978288769513633e-06, "logits/chosen": -0.008544921875, "logits/rejected": -0.98828125, "logps/chosen": -0.435546875, "logps/rejected": -2.171875, "loss": 0.0571, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1875, "rewards/margins": 8.6875, "rewards/rejected": -10.875, "step": 639 }, { "epoch": 0.2383612662942272, "grad_norm": 0.04150390625, "learning_rate": 1.697286620894519e-06, "logits/chosen": 0.10546875, "logits/rejected": -0.419921875, "logps/chosen": -0.8515625, "logps/rejected": -3.0625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.25, "rewards/margins": 11.0625, "rewards/rejected": -15.3125, "step": 640 }, { "epoch": 0.23873370577281192, "grad_norm": 0.01226806640625, "learning_rate": 1.6967430167432873e-06, "logits/chosen": -0.01226806640625, "logits/rejected": -0.1591796875, "logps/chosen": -0.51171875, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5625, "rewards/margins": 13.5, "rewards/rejected": -16.0, "step": 641 }, { "epoch": 0.23910614525139665, "grad_norm": 16.875, "learning_rate": 1.6961980654168226e-06, "logits/chosen": -0.19921875, "logits/rejected": -0.375, "logps/chosen": -1.4375, "logps/rejected": -2.6875, "loss": 0.0221, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.1875, "rewards/margins": 6.25, "rewards/rejected": -13.4375, "step": 642 }, { "epoch": 0.23947858472998138, "grad_norm": 0.029296875, "learning_rate": 1.6956517678365577e-06, "logits/chosen": -0.134765625, "logits/rejected": -0.025146484375, "logps/chosen": -0.6640625, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3125, "rewards/margins": 11.5625, "rewards/rejected": -14.875, "step": 643 }, { "epoch": 0.2398510242085661, "grad_norm": 0.34765625, "learning_rate": 1.6951041249262007e-06, "logits/chosen": -0.027587890625, "logits/rejected": -0.396484375, "logps/chosen": -0.69921875, "logps/rejected": -2.515625, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5, "rewards/margins": 9.0625, "rewards/rejected": -12.5625, "step": 644 }, { "epoch": 0.24022346368715083, "grad_norm": 2.734375, "learning_rate": 1.6945551376117361e-06, "logits/chosen": -0.023681640625, "logits/rejected": -1.0703125, "logps/chosen": -1.078125, "logps/rejected": -2.328125, "loss": 0.0043, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.375, "rewards/margins": 6.28125, "rewards/rejected": -11.625, "step": 645 }, { "epoch": 0.24059590316573556, "grad_norm": 0.00148773193359375, "learning_rate": 1.6940048068214202e-06, "logits/chosen": -0.12109375, "logits/rejected": -0.296875, "logps/chosen": -0.2578125, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.296875, "rewards/margins": 13.75, "rewards/rejected": -15.0, "step": 646 }, { "epoch": 0.2409683426443203, "grad_norm": 0.0001220703125, "learning_rate": 1.6934531334857813e-06, "logits/chosen": 0.1162109375, "logits/rejected": -0.2490234375, "logps/chosen": -0.087890625, "logps/rejected": -3.359375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4375, "rewards/margins": 16.375, "rewards/rejected": -16.75, "step": 647 }, { "epoch": 0.24134078212290502, "grad_norm": 68.5, "learning_rate": 1.6929001185376183e-06, "logits/chosen": 0.0052490234375, "logits/rejected": -0.51171875, "logps/chosen": -0.31640625, "logps/rejected": -2.0, "loss": 0.1934, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.578125, "rewards/margins": 8.4375, "rewards/rejected": -10.0625, "step": 648 }, { "epoch": 0.24171322160148975, "grad_norm": 0.017333984375, "learning_rate": 1.6923457629119981e-06, "logits/chosen": -0.00592041015625, "logits/rejected": -0.1962890625, "logps/chosen": -0.220703125, "logps/rejected": -2.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015625, "rewards/margins": 11.5, "rewards/rejected": -12.5625, "step": 649 }, { "epoch": 0.24208566108007448, "grad_norm": 0.0673828125, "learning_rate": 1.6917900675462538e-06, "logits/chosen": -0.01708984375, "logits/rejected": -0.51171875, "logps/chosen": -0.42578125, "logps/rejected": -2.625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.140625, "rewards/margins": 11.0, "rewards/rejected": -13.125, "step": 650 }, { "epoch": 0.2424581005586592, "grad_norm": 0.0091552734375, "learning_rate": 1.6912330333799852e-06, "logits/chosen": -0.05126953125, "logits/rejected": -0.373046875, "logps/chosen": -0.5546875, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.765625, "rewards/margins": 12.5625, "rewards/rejected": -15.375, "step": 651 }, { "epoch": 0.24283054003724394, "grad_norm": 53.75, "learning_rate": 1.690674661355055e-06, "logits/chosen": 0.00970458984375, "logits/rejected": 0.20703125, "logps/chosen": -0.443359375, "logps/rejected": -1.8671875, "loss": 0.1006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 7.0625, "rewards/rejected": -9.3125, "step": 652 }, { "epoch": 0.24320297951582867, "grad_norm": 39.5, "learning_rate": 1.690114952415588e-06, "logits/chosen": -0.0654296875, "logits/rejected": -1.5, "logps/chosen": -0.55078125, "logps/rejected": -1.765625, "loss": 0.0525, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 6.0625, "rewards/rejected": -8.8125, "step": 653 }, { "epoch": 0.2435754189944134, "grad_norm": 0.01434326171875, "learning_rate": 1.68955390750797e-06, "logits/chosen": -0.049072265625, "logits/rejected": -0.08154296875, "logps/chosen": -0.3828125, "logps/rejected": -2.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.90625, "rewards/margins": 11.4375, "rewards/rejected": -13.375, "step": 654 }, { "epoch": 0.24394785847299813, "grad_norm": 0.007415771484375, "learning_rate": 1.688991527580845e-06, "logits/chosen": -0.0625, "logits/rejected": -0.50390625, "logps/chosen": -0.3203125, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6015625, "rewards/margins": 12.1875, "rewards/rejected": -13.75, "step": 655 }, { "epoch": 0.24432029795158286, "grad_norm": 0.2265625, "learning_rate": 1.688427813585115e-06, "logits/chosen": -0.0003910064697265625, "logits/rejected": 0.0023651123046875, "logps/chosen": -0.328125, "logps/rejected": -2.484375, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6328125, "rewards/margins": 10.75, "rewards/rejected": -12.4375, "step": 656 }, { "epoch": 0.2446927374301676, "grad_norm": 0.2021484375, "learning_rate": 1.687862766473937e-06, "logits/chosen": 0.083984375, "logits/rejected": -0.88671875, "logps/chosen": -0.16015625, "logps/rejected": -2.484375, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046875, "rewards/margins": 11.625, "rewards/rejected": -12.5, "step": 657 }, { "epoch": 0.24506517690875232, "grad_norm": 0.00038909912109375, "learning_rate": 1.6872963872027232e-06, "logits/chosen": 0.0279541015625, "logits/rejected": -0.232421875, "logps/chosen": -0.10546875, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.52734375, "rewards/margins": 15.0, "rewards/rejected": -15.5, "step": 658 }, { "epoch": 0.24543761638733705, "grad_norm": 0.00106048583984375, "learning_rate": 1.6867286767291371e-06, "logits/chosen": 0.0020599365234375, "logits/rejected": -0.1943359375, "logps/chosen": -0.435546875, "logps/rejected": -3.296875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.171875, "rewards/margins": 14.3125, "rewards/rejected": -16.5, "step": 659 }, { "epoch": 0.24581005586592178, "grad_norm": 78.0, "learning_rate": 1.686159636013094e-06, "logits/chosen": 0.056884765625, "logits/rejected": -0.5625, "logps/chosen": -0.796875, "logps/rejected": -2.125, "loss": 0.1738, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0, "rewards/margins": 6.625, "rewards/rejected": -10.625, "step": 660 }, { "epoch": 0.2461824953445065, "grad_norm": 0.6015625, "learning_rate": 1.685589266016758e-06, "logits/chosen": -0.15625, "logits/rejected": -0.875, "logps/chosen": -0.3359375, "logps/rejected": -1.96875, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6796875, "rewards/margins": 8.125, "rewards/rejected": -9.875, "step": 661 }, { "epoch": 0.24655493482309124, "grad_norm": 0.43359375, "learning_rate": 1.6850175677045405e-06, "logits/chosen": -0.04150390625, "logits/rejected": -1.0703125, "logps/chosen": -0.48046875, "logps/rejected": -2.515625, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.40625, "rewards/margins": 10.125, "rewards/rejected": -12.5, "step": 662 }, { "epoch": 0.24692737430167597, "grad_norm": 1.5625, "learning_rate": 1.6844445420430993e-06, "logits/chosen": -0.228515625, "logits/rejected": 0.08544921875, "logps/chosen": -0.15625, "logps/rejected": -2.171875, "loss": 0.0028, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.78125, "rewards/margins": 10.0625, "rewards/rejected": -10.8125, "step": 663 }, { "epoch": 0.2472998137802607, "grad_norm": 0.006195068359375, "learning_rate": 1.6838701900013368e-06, "logits/chosen": 0.061767578125, "logits/rejected": -0.404296875, "logps/chosen": -0.318359375, "logps/rejected": -3.015625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5859375, "rewards/margins": 13.5, "rewards/rejected": -15.0625, "step": 664 }, { "epoch": 0.24767225325884543, "grad_norm": 0.004150390625, "learning_rate": 1.6832945125503976e-06, "logits/chosen": -0.1416015625, "logits/rejected": -0.34765625, "logps/chosen": -0.2333984375, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.171875, "rewards/margins": 14.25, "rewards/rejected": -15.5, "step": 665 }, { "epoch": 0.24804469273743016, "grad_norm": 30.75, "learning_rate": 1.6827175106636675e-06, "logits/chosen": -0.23046875, "logits/rejected": -0.62890625, "logps/chosen": -0.59375, "logps/rejected": -1.390625, "loss": 0.0762, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.984375, "rewards/margins": 3.96875, "rewards/rejected": -6.9375, "step": 666 }, { "epoch": 0.24841713221601489, "grad_norm": 0.031494140625, "learning_rate": 1.6821391853167722e-06, "logits/chosen": -0.205078125, "logits/rejected": -0.2060546875, "logps/chosen": -0.328125, "logps/rejected": -2.53125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 11.0, "rewards/rejected": -12.625, "step": 667 }, { "epoch": 0.24878957169459961, "grad_norm": 0.0150146484375, "learning_rate": 1.6815595374875746e-06, "logits/chosen": 0.1083984375, "logits/rejected": -0.287109375, "logps/chosen": -0.400390625, "logps/rejected": -3.046875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0, "rewards/margins": 13.1875, "rewards/rejected": -15.25, "step": 668 }, { "epoch": 0.24916201117318434, "grad_norm": 0.9140625, "learning_rate": 1.6809785681561734e-06, "logits/chosen": -0.01019287109375, "logits/rejected": -1.25, "logps/chosen": -0.4296875, "logps/rejected": -1.8359375, "loss": 0.002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 7.03125, "rewards/rejected": -9.1875, "step": 669 }, { "epoch": 0.24953445065176907, "grad_norm": 0.0003757476806640625, "learning_rate": 1.6803962783049032e-06, "logits/chosen": -0.00131988525390625, "logits/rejected": -0.1923828125, "logps/chosen": -0.1640625, "logps/rejected": -3.234375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8203125, "rewards/margins": 15.375, "rewards/rejected": -16.25, "step": 670 }, { "epoch": 0.2499068901303538, "grad_norm": 2.171875, "learning_rate": 1.6798126689183296e-06, "logits/chosen": -0.2138671875, "logits/rejected": 0.1435546875, "logps/chosen": -0.37890625, "logps/rejected": -2.234375, "loss": 0.0037, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 9.3125, "rewards/rejected": -11.1875, "step": 671 }, { "epoch": 0.25027932960893856, "grad_norm": 55.75, "learning_rate": 1.6792277409832509e-06, "logits/chosen": -0.06396484375, "logits/rejected": -0.4140625, "logps/chosen": -0.609375, "logps/rejected": -1.59375, "loss": 0.1099, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.046875, "rewards/margins": 4.90625, "rewards/rejected": -7.9375, "step": 672 }, { "epoch": 0.2506517690875233, "grad_norm": 0.08984375, "learning_rate": 1.6786414954886937e-06, "logits/chosen": 0.173828125, "logits/rejected": -0.578125, "logps/chosen": -0.61328125, "logps/rejected": -2.90625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.078125, "rewards/margins": 11.5, "rewards/rejected": -14.5625, "step": 673 }, { "epoch": 0.251024208566108, "grad_norm": 0.4453125, "learning_rate": 1.6780539334259129e-06, "logits/chosen": -0.025390625, "logits/rejected": -0.67578125, "logps/chosen": -0.29296875, "logps/rejected": -2.203125, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4609375, "rewards/margins": 9.5, "rewards/rejected": -11.0, "step": 674 }, { "epoch": 0.25139664804469275, "grad_norm": 0.36328125, "learning_rate": 1.6774650557883901e-06, "logits/chosen": 0.01806640625, "logits/rejected": -0.765625, "logps/chosen": -0.3046875, "logps/rejected": -2.0625, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.53125, "rewards/margins": 8.8125, "rewards/rejected": -10.375, "step": 675 }, { "epoch": 0.2517690875232775, "grad_norm": 0.005828857421875, "learning_rate": 1.6768748635718302e-06, "logits/chosen": 0.041748046875, "logits/rejected": -0.32421875, "logps/chosen": -0.392578125, "logps/rejected": -3.015625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9609375, "rewards/margins": 13.125, "rewards/rejected": -15.0625, "step": 676 }, { "epoch": 0.2521415270018622, "grad_norm": 0.0067138671875, "learning_rate": 1.6762833577741615e-06, "logits/chosen": -0.1376953125, "logits/rejected": -0.2080078125, "logps/chosen": -0.447265625, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.234375, "rewards/margins": 12.0, "rewards/rejected": -14.25, "step": 677 }, { "epoch": 0.25251396648044694, "grad_norm": 0.2109375, "learning_rate": 1.6756905393955335e-06, "logits/chosen": -0.2392578125, "logits/rejected": -0.26171875, "logps/chosen": -0.4140625, "logps/rejected": -2.484375, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 10.375, "rewards/rejected": -12.4375, "step": 678 }, { "epoch": 0.25288640595903167, "grad_norm": 0.008544921875, "learning_rate": 1.6750964094383146e-06, "logits/chosen": -0.0205078125, "logits/rejected": -0.404296875, "logps/chosen": -0.470703125, "logps/rejected": -2.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34375, "rewards/margins": 12.25, "rewards/rejected": -14.5625, "step": 679 }, { "epoch": 0.2532588454376164, "grad_norm": 1.328125, "learning_rate": 1.6745009689070913e-06, "logits/chosen": 0.05322265625, "logits/rejected": -0.828125, "logps/chosen": -0.49609375, "logps/rejected": -1.9609375, "loss": 0.0028, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.484375, "rewards/margins": 7.3125, "rewards/rejected": -9.8125, "step": 680 }, { "epoch": 0.2536312849162011, "grad_norm": 0.162109375, "learning_rate": 1.6739042188086658e-06, "logits/chosen": -0.11962890625, "logits/rejected": -0.296875, "logps/chosen": -0.34375, "logps/rejected": -2.671875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.71875, "rewards/margins": 11.6875, "rewards/rejected": -13.375, "step": 681 }, { "epoch": 0.25400372439478586, "grad_norm": 0.00121307373046875, "learning_rate": 1.673306160152055e-06, "logits/chosen": -0.00457763671875, "logits/rejected": -0.484375, "logps/chosen": -0.361328125, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8125, "rewards/margins": 13.8125, "rewards/rejected": -15.625, "step": 682 }, { "epoch": 0.2543761638733706, "grad_norm": 0.04833984375, "learning_rate": 1.6727067939484879e-06, "logits/chosen": 0.05908203125, "logits/rejected": -0.228515625, "logps/chosen": -0.7109375, "logps/rejected": -2.78125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5625, "rewards/margins": 10.3125, "rewards/rejected": -13.875, "step": 683 }, { "epoch": 0.2547486033519553, "grad_norm": 0.01171875, "learning_rate": 1.6721061212114047e-06, "logits/chosen": 0.0203857421875, "logits/rejected": -0.357421875, "logps/chosen": -0.51171875, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5625, "rewards/margins": 12.4375, "rewards/rejected": -15.0, "step": 684 }, { "epoch": 0.25512104283054005, "grad_norm": 0.0003509521484375, "learning_rate": 1.6715041429564546e-06, "logits/chosen": 0.06884765625, "logits/rejected": -0.052734375, "logps/chosen": -0.17578125, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.87890625, "rewards/margins": 16.125, "rewards/rejected": -17.0, "step": 685 }, { "epoch": 0.2554934823091248, "grad_norm": 6.40625, "learning_rate": 1.670900860201494e-06, "logits/chosen": 0.126953125, "logits/rejected": 0.057373046875, "logps/chosen": -0.177734375, "logps/rejected": -2.15625, "loss": 0.0128, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.88671875, "rewards/margins": 9.875, "rewards/rejected": -10.75, "step": 686 }, { "epoch": 0.2558659217877095, "grad_norm": 0.0003070831298828125, "learning_rate": 1.6702962739665858e-06, "logits/chosen": -0.1796875, "logits/rejected": -0.37109375, "logps/chosen": -0.1123046875, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5625, "rewards/margins": 15.125, "rewards/rejected": -15.625, "step": 687 }, { "epoch": 0.25623836126629423, "grad_norm": 0.0012359619140625, "learning_rate": 1.669690385273996e-06, "logits/chosen": -0.0966796875, "logits/rejected": -0.2275390625, "logps/chosen": -0.220703125, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 14.625, "rewards/rejected": -15.75, "step": 688 }, { "epoch": 0.25661080074487896, "grad_norm": 0.001983642578125, "learning_rate": 1.6690831951481932e-06, "logits/chosen": -0.08642578125, "logits/rejected": -0.3203125, "logps/chosen": -0.1640625, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8203125, "rewards/margins": 13.4375, "rewards/rejected": -14.25, "step": 689 }, { "epoch": 0.2569832402234637, "grad_norm": 102.0, "learning_rate": 1.668474704615847e-06, "logits/chosen": 0.08203125, "logits/rejected": 0.0194091796875, "logps/chosen": -0.220703125, "logps/rejected": -1.640625, "loss": 0.4648, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015625, "rewards/margins": 7.09375, "rewards/rejected": -8.1875, "step": 690 }, { "epoch": 0.2573556797020484, "grad_norm": 67.5, "learning_rate": 1.667864914705825e-06, "logits/chosen": -0.205078125, "logits/rejected": -1.0625, "logps/chosen": -0.6875, "logps/rejected": -1.875, "loss": 0.1621, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4375, "rewards/margins": 5.9375, "rewards/rejected": -9.375, "step": 691 }, { "epoch": 0.25772811918063315, "grad_norm": 1.5390625, "learning_rate": 1.6672538264491925e-06, "logits/chosen": -0.11474609375, "logits/rejected": -1.4453125, "logps/chosen": -0.18359375, "logps/rejected": -1.5, "loss": 0.0042, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.921875, "rewards/margins": 6.625, "rewards/rejected": -7.53125, "step": 692 }, { "epoch": 0.2581005586592179, "grad_norm": 0.0859375, "learning_rate": 1.6666414408792094e-06, "logits/chosen": -0.00201416015625, "logits/rejected": -0.36328125, "logps/chosen": -0.462890625, "logps/rejected": -2.84375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3125, "rewards/margins": 11.875, "rewards/rejected": -14.1875, "step": 693 }, { "epoch": 0.2584729981378026, "grad_norm": 0.027587890625, "learning_rate": 1.66602775903133e-06, "logits/chosen": -0.353515625, "logits/rejected": -0.419921875, "logps/chosen": -0.32421875, "logps/rejected": -2.46875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.625, "rewards/margins": 10.75, "rewards/rejected": -12.375, "step": 694 }, { "epoch": 0.25884543761638734, "grad_norm": 0.00159454345703125, "learning_rate": 1.6654127819432003e-06, "logits/chosen": -0.09326171875, "logits/rejected": -0.2578125, "logps/chosen": -0.48828125, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 13.8125, "rewards/rejected": -16.25, "step": 695 }, { "epoch": 0.25921787709497207, "grad_norm": 0.5234375, "learning_rate": 1.6647965106546554e-06, "logits/chosen": -0.00153350830078125, "logits/rejected": -0.6015625, "logps/chosen": -0.33984375, "logps/rejected": -2.46875, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.703125, "rewards/margins": 10.6875, "rewards/rejected": -12.375, "step": 696 }, { "epoch": 0.2595903165735568, "grad_norm": 2.53125, "learning_rate": 1.6641789462077195e-06, "logits/chosen": -0.0859375, "logits/rejected": -0.8046875, "logps/chosen": -0.421875, "logps/rejected": -2.3125, "loss": 0.0037, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.109375, "rewards/margins": 9.5, "rewards/rejected": -11.625, "step": 697 }, { "epoch": 0.25996275605214153, "grad_norm": 0.0263671875, "learning_rate": 1.663560089646604e-06, "logits/chosen": -0.1484375, "logits/rejected": -0.12890625, "logps/chosen": -0.609375, "logps/rejected": -2.953125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.046875, "rewards/margins": 11.6875, "rewards/rejected": -14.75, "step": 698 }, { "epoch": 0.26033519553072626, "grad_norm": 0.01055908203125, "learning_rate": 1.6629399420177032e-06, "logits/chosen": 0.0546875, "logits/rejected": -0.1787109375, "logps/chosen": -0.267578125, "logps/rejected": -2.703125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3359375, "rewards/margins": 12.1875, "rewards/rejected": -13.5, "step": 699 }, { "epoch": 0.260707635009311, "grad_norm": 54.0, "learning_rate": 1.6623185043695964e-06, "logits/chosen": 0.1611328125, "logits/rejected": -0.58984375, "logps/chosen": -0.7109375, "logps/rejected": -2.296875, "loss": 0.0874, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.546875, "rewards/margins": 7.96875, "rewards/rejected": -11.5, "step": 700 }, { "epoch": 0.2610800744878957, "grad_norm": 0.0024871826171875, "learning_rate": 1.6616957777530427e-06, "logits/chosen": 0.0224609375, "logits/rejected": -0.34765625, "logps/chosen": -0.3046875, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5234375, "rewards/margins": 14.125, "rewards/rejected": -15.625, "step": 701 }, { "epoch": 0.26145251396648045, "grad_norm": 0.005859375, "learning_rate": 1.6610717632209814e-06, "logits/chosen": 0.012939453125, "logits/rejected": -0.2333984375, "logps/chosen": -0.26953125, "logps/rejected": -2.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 12.25, "rewards/rejected": -13.625, "step": 702 }, { "epoch": 0.2618249534450652, "grad_norm": 0.015869140625, "learning_rate": 1.6604464618285296e-06, "logits/chosen": -0.037109375, "logits/rejected": -0.30859375, "logps/chosen": -0.283203125, "logps/rejected": -2.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4140625, "rewards/margins": 11.3125, "rewards/rejected": -12.75, "step": 703 }, { "epoch": 0.2621973929236499, "grad_norm": 0.0140380859375, "learning_rate": 1.6598198746329793e-06, "logits/chosen": 0.150390625, "logits/rejected": -0.0498046875, "logps/chosen": -0.7265625, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.625, "rewards/margins": 12.5625, "rewards/rejected": -16.25, "step": 704 }, { "epoch": 0.26256983240223464, "grad_norm": 0.095703125, "learning_rate": 1.6591920026937978e-06, "logits/chosen": -0.037353515625, "logits/rejected": -0.6796875, "logps/chosen": -0.2578125, "logps/rejected": -2.15625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2890625, "rewards/margins": 9.5, "rewards/rejected": -10.75, "step": 705 }, { "epoch": 0.26294227188081937, "grad_norm": 0.014892578125, "learning_rate": 1.6585628470726239e-06, "logits/chosen": -0.18359375, "logits/rejected": -0.255859375, "logps/chosen": -0.375, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8671875, "rewards/margins": 13.875, "rewards/rejected": -15.75, "step": 706 }, { "epoch": 0.2633147113594041, "grad_norm": 0.00156402587890625, "learning_rate": 1.6579324088332673e-06, "logits/chosen": -0.1904296875, "logits/rejected": -0.3125, "logps/chosen": -0.27734375, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3828125, "rewards/margins": 13.625, "rewards/rejected": -15.0, "step": 707 }, { "epoch": 0.2636871508379888, "grad_norm": 5.90625, "learning_rate": 1.6573006890417065e-06, "logits/chosen": 0.043212890625, "logits/rejected": -0.8046875, "logps/chosen": -0.5390625, "logps/rejected": -2.296875, "loss": 0.0113, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.71875, "rewards/margins": 8.75, "rewards/rejected": -11.5, "step": 708 }, { "epoch": 0.26405959031657356, "grad_norm": 0.30078125, "learning_rate": 1.6566676887660863e-06, "logits/chosen": -0.1201171875, "logits/rejected": -0.419921875, "logps/chosen": -0.703125, "logps/rejected": -2.90625, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.53125, "rewards/margins": 11.0, "rewards/rejected": -14.5, "step": 709 }, { "epoch": 0.2644320297951583, "grad_norm": 0.00142669677734375, "learning_rate": 1.6560334090767173e-06, "logits/chosen": -0.07568359375, "logits/rejected": -0.2236328125, "logps/chosen": -0.234375, "logps/rejected": -3.265625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.171875, "rewards/margins": 15.125, "rewards/rejected": -16.25, "step": 710 }, { "epoch": 0.264804469273743, "grad_norm": 0.00012493133544921875, "learning_rate": 1.6553978510460733e-06, "logits/chosen": -0.0458984375, "logits/rejected": -0.0732421875, "logps/chosen": -0.173828125, "logps/rejected": -3.390625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8671875, "rewards/margins": 16.125, "rewards/rejected": -17.0, "step": 711 }, { "epoch": 0.26517690875232774, "grad_norm": 0.0020904541015625, "learning_rate": 1.6547610157487891e-06, "logits/chosen": -0.07421875, "logits/rejected": -0.03857421875, "logps/chosen": -0.34375, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.71875, "rewards/margins": 13.25, "rewards/rejected": -15.0, "step": 712 }, { "epoch": 0.2655493482309125, "grad_norm": 0.1806640625, "learning_rate": 1.6541229042616597e-06, "logits/chosen": -0.2431640625, "logits/rejected": -0.6328125, "logps/chosen": -0.5703125, "logps/rejected": -2.328125, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.859375, "rewards/margins": 8.75, "rewards/rejected": -11.625, "step": 713 }, { "epoch": 0.2659217877094972, "grad_norm": 0.0009918212890625, "learning_rate": 1.653483517663638e-06, "logits/chosen": 0.1630859375, "logits/rejected": -0.2294921875, "logps/chosen": -0.123046875, "logps/rejected": -2.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.61328125, "rewards/margins": 14.0625, "rewards/rejected": -14.625, "step": 714 }, { "epoch": 0.26629422718808193, "grad_norm": 0.002288818359375, "learning_rate": 1.6528428570358323e-06, "logits/chosen": -0.0162353515625, "logits/rejected": -0.2275390625, "logps/chosen": -0.228515625, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.140625, "rewards/margins": 13.25, "rewards/rejected": -14.375, "step": 715 }, { "epoch": 0.26666666666666666, "grad_norm": 0.94921875, "learning_rate": 1.6522009234615054e-06, "logits/chosen": -0.10302734375, "logits/rejected": -0.27734375, "logps/chosen": -0.578125, "logps/rejected": -2.65625, "loss": 0.0016, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 10.4375, "rewards/rejected": -13.3125, "step": 716 }, { "epoch": 0.2670391061452514, "grad_norm": 9.25, "learning_rate": 1.6515577180260729e-06, "logits/chosen": -0.06494140625, "logits/rejected": -0.427734375, "logps/chosen": -1.0859375, "logps/rejected": -2.75, "loss": 0.0082, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4375, "rewards/margins": 8.3125, "rewards/rejected": -13.75, "step": 717 }, { "epoch": 0.2674115456238361, "grad_norm": 0.00086212158203125, "learning_rate": 1.6509132418171003e-06, "logits/chosen": -0.119140625, "logits/rejected": -0.1826171875, "logps/chosen": -0.251953125, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 14.6875, "rewards/rejected": -15.9375, "step": 718 }, { "epoch": 0.26778398510242085, "grad_norm": 3.046875, "learning_rate": 1.650267495924302e-06, "logits/chosen": -0.228515625, "logits/rejected": -1.21875, "logps/chosen": -0.52734375, "logps/rejected": -1.875, "loss": 0.0058, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.640625, "rewards/margins": 6.75, "rewards/rejected": -9.375, "step": 719 }, { "epoch": 0.2681564245810056, "grad_norm": 0.57421875, "learning_rate": 1.6496204814395397e-06, "logits/chosen": -0.039306640625, "logits/rejected": -1.0859375, "logps/chosen": -0.25, "logps/rejected": -1.96875, "loss": 0.0011, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 8.625, "rewards/rejected": -9.875, "step": 720 }, { "epoch": 0.2685288640595903, "grad_norm": 0.0181884765625, "learning_rate": 1.6489721994568193e-06, "logits/chosen": 0.058349609375, "logits/rejected": -0.578125, "logps/chosen": -0.08642578125, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.431640625, "rewards/margins": 13.75, "rewards/rejected": -14.1875, "step": 721 }, { "epoch": 0.26890130353817504, "grad_norm": 1.046875, "learning_rate": 1.6483226510722907e-06, "logits/chosen": -0.01483154296875, "logits/rejected": -0.234375, "logps/chosen": -1.0, "logps/rejected": -2.6875, "loss": 0.0014, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0, "rewards/margins": 8.375, "rewards/rejected": -13.375, "step": 722 }, { "epoch": 0.26927374301675977, "grad_norm": 7.03125, "learning_rate": 1.6476718373842443e-06, "logits/chosen": -0.07958984375, "logits/rejected": -0.220703125, "logps/chosen": -1.0234375, "logps/rejected": -2.53125, "loss": 0.0094, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.125, "rewards/margins": 7.59375, "rewards/rejected": -12.6875, "step": 723 }, { "epoch": 0.2696461824953445, "grad_norm": 0.50390625, "learning_rate": 1.6470197594931102e-06, "logits/chosen": -0.0185546875, "logits/rejected": 0.06494140625, "logps/chosen": -0.1767578125, "logps/rejected": -2.546875, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8828125, "rewards/margins": 11.875, "rewards/rejected": -12.75, "step": 724 }, { "epoch": 0.27001862197392923, "grad_norm": 0.07421875, "learning_rate": 1.6463664185014566e-06, "logits/chosen": -0.0091552734375, "logits/rejected": -0.3359375, "logps/chosen": -0.65625, "logps/rejected": -3.140625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.296875, "rewards/margins": 12.375, "rewards/rejected": -15.6875, "step": 725 }, { "epoch": 0.27039106145251396, "grad_norm": 0.0024261474609375, "learning_rate": 1.6457118155139867e-06, "logits/chosen": -0.030029296875, "logits/rejected": 0.015625, "logps/chosen": -0.259765625, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.296875, "rewards/margins": 13.125, "rewards/rejected": -14.375, "step": 726 }, { "epoch": 0.2707635009310987, "grad_norm": 0.0003490447998046875, "learning_rate": 1.6450559516375381e-06, "logits/chosen": 0.1455078125, "logits/rejected": -0.232421875, "logps/chosen": -0.181640625, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.90625, "rewards/margins": 15.125, "rewards/rejected": -16.0, "step": 727 }, { "epoch": 0.2711359404096834, "grad_norm": 0.8984375, "learning_rate": 1.64439882798108e-06, "logits/chosen": -0.1494140625, "logits/rejected": -0.50390625, "logps/chosen": -0.5234375, "logps/rejected": -2.34375, "loss": 0.0021, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 9.125, "rewards/rejected": -11.75, "step": 728 }, { "epoch": 0.27150837988826815, "grad_norm": 0.000827789306640625, "learning_rate": 1.6437404456557118e-06, "logits/chosen": 0.123046875, "logits/rejected": -0.212890625, "logps/chosen": -0.283203125, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.421875, "rewards/margins": 14.375, "rewards/rejected": -15.8125, "step": 729 }, { "epoch": 0.2718808193668529, "grad_norm": 0.171875, "learning_rate": 1.6430808057746609e-06, "logits/chosen": -0.03759765625, "logits/rejected": -0.75390625, "logps/chosen": -0.3828125, "logps/rejected": -2.46875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.921875, "rewards/margins": 10.4375, "rewards/rejected": -12.375, "step": 730 }, { "epoch": 0.2722532588454376, "grad_norm": 0.00274658203125, "learning_rate": 1.642419909453282e-06, "logits/chosen": -0.0869140625, "logits/rejected": 0.05029296875, "logps/chosen": -0.177734375, "logps/rejected": -2.890625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.890625, "rewards/margins": 13.5, "rewards/rejected": -14.4375, "step": 731 }, { "epoch": 0.27262569832402234, "grad_norm": 0.00057220458984375, "learning_rate": 1.6417577578090527e-06, "logits/chosen": 0.037109375, "logits/rejected": -0.1328125, "logps/chosen": -0.2490234375, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 15.5, "rewards/rejected": -16.75, "step": 732 }, { "epoch": 0.27299813780260707, "grad_norm": 0.0079345703125, "learning_rate": 1.6410943519615747e-06, "logits/chosen": -0.0478515625, "logits/rejected": -0.04638671875, "logps/chosen": -0.40234375, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0, "rewards/margins": 12.375, "rewards/rejected": -14.375, "step": 733 }, { "epoch": 0.2733705772811918, "grad_norm": 2.09375, "learning_rate": 1.6404296930325695e-06, "logits/chosen": -0.099609375, "logits/rejected": -0.8828125, "logps/chosen": -0.66796875, "logps/rejected": -1.9453125, "loss": 0.0037, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.34375, "rewards/margins": 6.375, "rewards/rejected": -9.75, "step": 734 }, { "epoch": 0.2737430167597765, "grad_norm": 0.00145721435546875, "learning_rate": 1.6397637821458774e-06, "logits/chosen": -0.0172119140625, "logits/rejected": -0.05029296875, "logps/chosen": -0.42578125, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 14.5, "rewards/rejected": -16.625, "step": 735 }, { "epoch": 0.27411545623836125, "grad_norm": 5.96875, "learning_rate": 1.639096620427456e-06, "logits/chosen": -0.185546875, "logits/rejected": 0.0859375, "logps/chosen": -0.6015625, "logps/rejected": -1.6171875, "loss": 0.0135, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0, "rewards/margins": 5.0625, "rewards/rejected": -8.0625, "step": 736 }, { "epoch": 0.274487895716946, "grad_norm": 0.01116943359375, "learning_rate": 1.6384282090053774e-06, "logits/chosen": -0.0277099609375, "logits/rejected": 0.08251953125, "logps/chosen": -0.171875, "logps/rejected": -2.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.859375, "rewards/margins": 12.5, "rewards/rejected": -13.375, "step": 737 }, { "epoch": 0.2748603351955307, "grad_norm": 12.75, "learning_rate": 1.637758549009827e-06, "logits/chosen": -0.01275634765625, "logits/rejected": -0.66015625, "logps/chosen": -0.57421875, "logps/rejected": -2.515625, "loss": 0.0232, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 9.75, "rewards/rejected": -12.625, "step": 738 }, { "epoch": 0.27523277467411544, "grad_norm": 0.1640625, "learning_rate": 1.6370876415731016e-06, "logits/chosen": -0.1728515625, "logits/rejected": -0.71875, "logps/chosen": -0.26171875, "logps/rejected": -2.34375, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125, "rewards/margins": 10.375, "rewards/rejected": -11.6875, "step": 739 }, { "epoch": 0.2756052141527002, "grad_norm": 0.08837890625, "learning_rate": 1.6364154878296066e-06, "logits/chosen": -0.1884765625, "logits/rejected": -0.734375, "logps/chosen": -0.357421875, "logps/rejected": -2.484375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.78125, "rewards/margins": 10.625, "rewards/rejected": -12.375, "step": 740 }, { "epoch": 0.2759776536312849, "grad_norm": 0.01080322265625, "learning_rate": 1.6357420889158553e-06, "logits/chosen": 0.05419921875, "logits/rejected": -0.1279296875, "logps/chosen": -0.31640625, "logps/rejected": -2.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.578125, "rewards/margins": 12.5, "rewards/rejected": -14.0, "step": 741 }, { "epoch": 0.27635009310986963, "grad_norm": 0.020263671875, "learning_rate": 1.6350674459704664e-06, "logits/chosen": 0.0966796875, "logits/rejected": 0.091796875, "logps/chosen": -0.296875, "logps/rejected": -2.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 11.3125, "rewards/rejected": -12.8125, "step": 742 }, { "epoch": 0.27672253258845436, "grad_norm": 1.0078125, "learning_rate": 1.6343915601341616e-06, "logits/chosen": -0.1689453125, "logits/rejected": -0.64453125, "logps/chosen": -0.462890625, "logps/rejected": -2.578125, "loss": 0.0018, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3125, "rewards/margins": 10.5625, "rewards/rejected": -12.875, "step": 743 }, { "epoch": 0.2770949720670391, "grad_norm": 0.154296875, "learning_rate": 1.6337144325497644e-06, "logits/chosen": -0.15234375, "logits/rejected": -0.0654296875, "logps/chosen": -0.83984375, "logps/rejected": -3.078125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.1875, "rewards/margins": 11.125, "rewards/rejected": -15.375, "step": 744 }, { "epoch": 0.2774674115456238, "grad_norm": 0.0002613067626953125, "learning_rate": 1.633036064362198e-06, "logits/chosen": 0.1259765625, "logits/rejected": -0.2294921875, "logps/chosen": -0.25, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 15.4375, "rewards/rejected": -16.75, "step": 745 }, { "epoch": 0.27783985102420855, "grad_norm": 0.00029754638671875, "learning_rate": 1.6323564567184833e-06, "logits/chosen": -0.060546875, "logits/rejected": -0.31640625, "logps/chosen": -0.2109375, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0546875, "rewards/margins": 15.5, "rewards/rejected": -16.5, "step": 746 }, { "epoch": 0.2782122905027933, "grad_norm": 0.0004444122314453125, "learning_rate": 1.6316756107677367e-06, "logits/chosen": -0.04248046875, "logits/rejected": -0.287109375, "logps/chosen": -0.275390625, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.375, "rewards/margins": 14.875, "rewards/rejected": -16.25, "step": 747 }, { "epoch": 0.278584729981378, "grad_norm": 13.5625, "learning_rate": 1.6309935276611686e-06, "logits/chosen": -0.1767578125, "logits/rejected": 0.049560546875, "logps/chosen": -0.498046875, "logps/rejected": -2.109375, "loss": 0.0146, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.484375, "rewards/margins": 8.125, "rewards/rejected": -10.5625, "step": 748 }, { "epoch": 0.27895716945996274, "grad_norm": 0.01544189453125, "learning_rate": 1.6303102085520813e-06, "logits/chosen": 0.087890625, "logits/rejected": -0.1689453125, "logps/chosen": -0.4375, "logps/rejected": -3.046875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1875, "rewards/margins": 13.0625, "rewards/rejected": -15.25, "step": 749 }, { "epoch": 0.27932960893854747, "grad_norm": 195.0, "learning_rate": 1.6296256545958668e-06, "logits/chosen": 0.00982666015625, "logits/rejected": -0.349609375, "logps/chosen": -0.458984375, "logps/rejected": -1.7890625, "loss": 1.6406, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.296875, "rewards/margins": 6.65625, "rewards/rejected": -8.9375, "step": 750 }, { "epoch": 0.2797020484171322, "grad_norm": 0.018798828125, "learning_rate": 1.6289398669500052e-06, "logits/chosen": 0.21484375, "logits/rejected": -0.025634765625, "logps/chosen": -0.30078125, "logps/rejected": -2.828125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 12.625, "rewards/rejected": -14.125, "step": 751 }, { "epoch": 0.28007448789571693, "grad_norm": 0.00250244140625, "learning_rate": 1.6282528467740623e-06, "logits/chosen": 0.123046875, "logits/rejected": -0.13671875, "logps/chosen": -0.275390625, "logps/rejected": -2.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.375, "rewards/margins": 13.25, "rewards/rejected": -14.625, "step": 752 }, { "epoch": 0.28044692737430166, "grad_norm": 11.625, "learning_rate": 1.6275645952296885e-06, "logits/chosen": -0.328125, "logits/rejected": -0.83984375, "logps/chosen": -0.40625, "logps/rejected": -2.25, "loss": 0.0203, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 9.25, "rewards/rejected": -11.25, "step": 753 }, { "epoch": 0.2808193668528864, "grad_norm": 0.0008392333984375, "learning_rate": 1.6268751134806157e-06, "logits/chosen": -0.013916015625, "logits/rejected": -0.05615234375, "logps/chosen": -0.140625, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.703125, "rewards/margins": 15.75, "rewards/rejected": -16.5, "step": 754 }, { "epoch": 0.2811918063314711, "grad_norm": 9.441375732421875e-05, "learning_rate": 1.6261844026926558e-06, "logits/chosen": -0.08203125, "logits/rejected": 0.11181640625, "logps/chosen": -0.25, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 17.0, "rewards/rejected": -18.25, "step": 755 }, { "epoch": 0.28156424581005585, "grad_norm": 0.001312255859375, "learning_rate": 1.6254924640337e-06, "logits/chosen": -0.0771484375, "logits/rejected": -0.080078125, "logps/chosen": -0.33203125, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 13.8125, "rewards/rejected": -15.5, "step": 756 }, { "epoch": 0.2819366852886406, "grad_norm": 0.0517578125, "learning_rate": 1.624799298673714e-06, "logits/chosen": 0.0240478515625, "logits/rejected": -0.234375, "logps/chosen": -0.71875, "logps/rejected": -3.0, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.59375, "rewards/margins": 11.375, "rewards/rejected": -15.0, "step": 757 }, { "epoch": 0.2823091247672253, "grad_norm": 0.34375, "learning_rate": 1.624104907784739e-06, "logits/chosen": -0.00677490234375, "logits/rejected": -0.1748046875, "logps/chosen": -0.7421875, "logps/rejected": -2.921875, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.71875, "rewards/margins": 10.875, "rewards/rejected": -14.625, "step": 758 }, { "epoch": 0.28268156424581004, "grad_norm": 0.0208740234375, "learning_rate": 1.623409292540887e-06, "logits/chosen": 0.031982421875, "logits/rejected": -0.1376953125, "logps/chosen": -0.54296875, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.71875, "rewards/margins": 14.5, "rewards/rejected": -17.25, "step": 759 }, { "epoch": 0.28305400372439476, "grad_norm": 0.65234375, "learning_rate": 1.622712454118342e-06, "logits/chosen": -0.042724609375, "logits/rejected": -0.045654296875, "logps/chosen": -0.232421875, "logps/rejected": -2.453125, "loss": 0.0012, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1640625, "rewards/margins": 11.125, "rewards/rejected": -12.25, "step": 760 }, { "epoch": 0.2834264432029795, "grad_norm": 4.09375, "learning_rate": 1.6220143936953544e-06, "logits/chosen": -0.1201171875, "logits/rejected": -0.85546875, "logps/chosen": -0.3203125, "logps/rejected": -1.90625, "loss": 0.0059, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 7.9375, "rewards/rejected": -9.5625, "step": 761 }, { "epoch": 0.2837988826815642, "grad_norm": 0.2109375, "learning_rate": 1.6213151124522428e-06, "logits/chosen": -0.03662109375, "logits/rejected": 4.267692565917969e-05, "logps/chosen": -0.357421875, "logps/rejected": -2.546875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.78125, "rewards/margins": 11.0, "rewards/rejected": -12.75, "step": 762 }, { "epoch": 0.28417132216014895, "grad_norm": 31.375, "learning_rate": 1.6206146115713876e-06, "logits/chosen": 0.00653076171875, "logits/rejected": -0.8359375, "logps/chosen": -0.193359375, "logps/rejected": -1.3046875, "loss": 0.0344, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.96875, "rewards/margins": 5.5625, "rewards/rejected": -6.5625, "step": 763 }, { "epoch": 0.2845437616387337, "grad_norm": 0.04345703125, "learning_rate": 1.6199128922372337e-06, "logits/chosen": -0.18359375, "logits/rejected": 0.10791015625, "logps/chosen": -0.5390625, "logps/rejected": -2.71875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.703125, "rewards/margins": 10.8125, "rewards/rejected": -13.5, "step": 764 }, { "epoch": 0.2849162011173184, "grad_norm": 0.3984375, "learning_rate": 1.6192099556362851e-06, "logits/chosen": 0.1513671875, "logits/rejected": 0.1943359375, "logps/chosen": -0.296875, "logps/rejected": -2.40625, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 10.5, "rewards/rejected": -12.0, "step": 765 }, { "epoch": 0.28528864059590314, "grad_norm": 0.2431640625, "learning_rate": 1.6185058029571037e-06, "logits/chosen": 0.12255859375, "logits/rejected": -0.55859375, "logps/chosen": -0.193359375, "logps/rejected": -2.40625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.96484375, "rewards/margins": 11.125, "rewards/rejected": -12.0, "step": 766 }, { "epoch": 0.28566108007448787, "grad_norm": 0.01373291015625, "learning_rate": 1.6178004353903087e-06, "logits/chosen": -0.0206298828125, "logits/rejected": -0.1748046875, "logps/chosen": -0.80078125, "logps/rejected": -3.203125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0, "rewards/margins": 12.0, "rewards/rejected": -16.0, "step": 767 }, { "epoch": 0.2860335195530726, "grad_norm": 0.00011396408081054688, "learning_rate": 1.6170938541285725e-06, "logits/chosen": 0.1416015625, "logits/rejected": 0.1513671875, "logps/chosen": -0.22265625, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 17.0, "rewards/rejected": -18.0, "step": 768 }, { "epoch": 0.28640595903165733, "grad_norm": 0.1630859375, "learning_rate": 1.6163860603666201e-06, "logits/chosen": 0.09130859375, "logits/rejected": -0.6875, "logps/chosen": -0.2890625, "logps/rejected": -2.875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4453125, "rewards/margins": 13.0, "rewards/rejected": -14.375, "step": 769 }, { "epoch": 0.28677839851024206, "grad_norm": 0.01446533203125, "learning_rate": 1.615677055301227e-06, "logits/chosen": 0.1728515625, "logits/rejected": -0.07958984375, "logps/chosen": -0.2265625, "logps/rejected": -2.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1328125, "rewards/margins": 13.0, "rewards/rejected": -14.125, "step": 770 }, { "epoch": 0.2871508379888268, "grad_norm": 10.6875, "learning_rate": 1.614966840131216e-06, "logits/chosen": 0.05224609375, "logits/rejected": -0.7109375, "logps/chosen": -0.63671875, "logps/rejected": -2.390625, "loss": 0.0161, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.171875, "rewards/margins": 8.8125, "rewards/rejected": -12.0, "step": 771 }, { "epoch": 0.2875232774674115, "grad_norm": 0.6015625, "learning_rate": 1.6142554160574566e-06, "logits/chosen": -0.09130859375, "logits/rejected": -0.86328125, "logps/chosen": -0.2255859375, "logps/rejected": -2.453125, "loss": 0.0012, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.125, "rewards/margins": 11.125, "rewards/rejected": -12.25, "step": 772 }, { "epoch": 0.28789571694599625, "grad_norm": 0.0038604736328125, "learning_rate": 1.6135427842828625e-06, "logits/chosen": 0.01129150390625, "logits/rejected": -0.345703125, "logps/chosen": -0.423828125, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 14.875, "rewards/rejected": -17.0, "step": 773 }, { "epoch": 0.288268156424581, "grad_norm": 318.0, "learning_rate": 1.6128289460123887e-06, "logits/chosen": -0.016845703125, "logits/rejected": 0.087890625, "logps/chosen": -0.93359375, "logps/rejected": -2.28125, "loss": 0.9492, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.65625, "rewards/margins": 6.78125, "rewards/rejected": -11.4375, "step": 774 }, { "epoch": 0.2886405959031657, "grad_norm": 0.65625, "learning_rate": 1.6121139024530311e-06, "logits/chosen": -0.072265625, "logits/rejected": -0.671875, "logps/chosen": -0.22265625, "logps/rejected": -2.4375, "loss": 0.0012, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 11.125, "rewards/rejected": -12.25, "step": 775 }, { "epoch": 0.28901303538175044, "grad_norm": 0.0004425048828125, "learning_rate": 1.6113976548138232e-06, "logits/chosen": 0.1630859375, "logits/rejected": -0.0986328125, "logps/chosen": -0.189453125, "logps/rejected": -3.203125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.953125, "rewards/margins": 15.0625, "rewards/rejected": -16.0, "step": 776 }, { "epoch": 0.28938547486033517, "grad_norm": 0.0128173828125, "learning_rate": 1.6106802043058342e-06, "logits/chosen": -0.0218505859375, "logits/rejected": -0.55078125, "logps/chosen": -0.244140625, "logps/rejected": -2.765625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.21875, "rewards/margins": 12.625, "rewards/rejected": -13.875, "step": 777 }, { "epoch": 0.28975791433891995, "grad_norm": 0.138671875, "learning_rate": 1.6099615521421676e-06, "logits/chosen": -0.072265625, "logits/rejected": -0.734375, "logps/chosen": -0.1611328125, "logps/rejected": -2.4375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046875, "rewards/margins": 11.375, "rewards/rejected": -12.1875, "step": 778 }, { "epoch": 0.2901303538175047, "grad_norm": 0.006072998046875, "learning_rate": 1.6092416995379582e-06, "logits/chosen": 0.1591796875, "logits/rejected": -0.1474609375, "logps/chosen": -0.4609375, "logps/rejected": -3.078125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3125, "rewards/margins": 13.0625, "rewards/rejected": -15.375, "step": 779 }, { "epoch": 0.2905027932960894, "grad_norm": 0.000499725341796875, "learning_rate": 1.608520647710371e-06, "logits/chosen": 0.08056640625, "logits/rejected": -0.376953125, "logps/chosen": -0.361328125, "logps/rejected": -3.453125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8046875, "rewards/margins": 15.5, "rewards/rejected": -17.25, "step": 780 }, { "epoch": 0.29087523277467414, "grad_norm": 2.953125, "learning_rate": 1.6077983978785982e-06, "logits/chosen": 0.04296875, "logits/rejected": 0.2060546875, "logps/chosen": -0.796875, "logps/rejected": -2.515625, "loss": 0.0057, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.96875, "rewards/margins": 8.625, "rewards/rejected": -12.5625, "step": 781 }, { "epoch": 0.29124767225325887, "grad_norm": 7.724761962890625e-05, "learning_rate": 1.6070749512638587e-06, "logits/chosen": 0.0240478515625, "logits/rejected": 0.041015625, "logps/chosen": -0.19140625, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9609375, "rewards/margins": 16.75, "rewards/rejected": -17.75, "step": 782 }, { "epoch": 0.2916201117318436, "grad_norm": 0.00592041015625, "learning_rate": 1.606350309089394e-06, "logits/chosen": 0.08447265625, "logits/rejected": 0.2236328125, "logps/chosen": -0.22265625, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 12.625, "rewards/rejected": -13.75, "step": 783 }, { "epoch": 0.29199255121042833, "grad_norm": 0.390625, "learning_rate": 1.605624472580467e-06, "logits/chosen": -0.0198974609375, "logits/rejected": -0.12255859375, "logps/chosen": -1.234375, "logps/rejected": -3.546875, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1875, "rewards/margins": 11.625, "rewards/rejected": -17.75, "step": 784 }, { "epoch": 0.29236499068901306, "grad_norm": 1.765625, "learning_rate": 1.6048974429643611e-06, "logits/chosen": -0.03857421875, "logits/rejected": -0.0252685546875, "logps/chosen": -0.9375, "logps/rejected": -2.609375, "loss": 0.0019, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.6875, "rewards/margins": 8.375, "rewards/rejected": -13.0625, "step": 785 }, { "epoch": 0.2927374301675978, "grad_norm": 0.2470703125, "learning_rate": 1.6041692214703763e-06, "logits/chosen": 0.08251953125, "logits/rejected": -0.78125, "logps/chosen": -0.431640625, "logps/rejected": -2.34375, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 9.5625, "rewards/rejected": -11.75, "step": 786 }, { "epoch": 0.2931098696461825, "grad_norm": 0.01416015625, "learning_rate": 1.6034398093298277e-06, "logits/chosen": 0.0927734375, "logits/rejected": 0.1279296875, "logps/chosen": -0.294921875, "logps/rejected": -2.984375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.46875, "rewards/margins": 13.4375, "rewards/rejected": -14.9375, "step": 787 }, { "epoch": 0.29348230912476725, "grad_norm": 0.0003986358642578125, "learning_rate": 1.6027092077760444e-06, "logits/chosen": 0.107421875, "logits/rejected": 0.2177734375, "logps/chosen": -0.220703125, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015625, "rewards/margins": 15.625, "rewards/rejected": -16.75, "step": 788 }, { "epoch": 0.293854748603352, "grad_norm": 0.00323486328125, "learning_rate": 1.6019774180443656e-06, "logits/chosen": -0.1533203125, "logits/rejected": -0.2109375, "logps/chosen": -0.5625, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8125, "rewards/margins": 12.875, "rewards/rejected": -15.75, "step": 789 }, { "epoch": 0.2942271880819367, "grad_norm": 2.234375, "learning_rate": 1.60124444137214e-06, "logits/chosen": -0.17578125, "logits/rejected": 0.17578125, "logps/chosen": -1.4921875, "logps/rejected": -2.78125, "loss": 0.0037, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.46875, "rewards/margins": 6.4375, "rewards/rejected": -13.875, "step": 790 }, { "epoch": 0.29459962756052144, "grad_norm": 0.095703125, "learning_rate": 1.600510278998724e-06, "logits/chosen": -0.001678466796875, "logits/rejected": -0.3984375, "logps/chosen": -0.2431640625, "logps/rejected": -2.640625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.21875, "rewards/margins": 12.0, "rewards/rejected": -13.25, "step": 791 }, { "epoch": 0.29497206703910617, "grad_norm": 0.0002536773681640625, "learning_rate": 1.5997749321654779e-06, "logits/chosen": 0.1630859375, "logits/rejected": -0.0654296875, "logps/chosen": -0.236328125, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1875, "rewards/margins": 15.5, "rewards/rejected": -16.75, "step": 792 }, { "epoch": 0.2953445065176909, "grad_norm": 0.005340576171875, "learning_rate": 1.599038402115765e-06, "logits/chosen": -0.059326171875, "logits/rejected": -0.1474609375, "logps/chosen": -0.119140625, "logps/rejected": -2.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.59765625, "rewards/margins": 13.375, "rewards/rejected": -14.0, "step": 793 }, { "epoch": 0.2957169459962756, "grad_norm": 0.0003814697265625, "learning_rate": 1.598300690094949e-06, "logits/chosen": -0.08447265625, "logits/rejected": 0.10009765625, "logps/chosen": -0.169921875, "logps/rejected": -3.203125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8515625, "rewards/margins": 15.125, "rewards/rejected": -16.0, "step": 794 }, { "epoch": 0.29608938547486036, "grad_norm": 0.03857421875, "learning_rate": 1.5975617973503927e-06, "logits/chosen": -0.10400390625, "logits/rejected": -0.146484375, "logps/chosen": -0.734375, "logps/rejected": -3.0625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6875, "rewards/margins": 11.625, "rewards/rejected": -15.375, "step": 795 }, { "epoch": 0.2964618249534451, "grad_norm": 0.005950927734375, "learning_rate": 1.596821725131455e-06, "logits/chosen": -0.06298828125, "logits/rejected": -0.203125, "logps/chosen": -0.31640625, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.578125, "rewards/margins": 13.625, "rewards/rejected": -15.25, "step": 796 }, { "epoch": 0.2968342644320298, "grad_norm": 0.435546875, "learning_rate": 1.596080474689489e-06, "logits/chosen": 0.0546875, "logits/rejected": 0.431640625, "logps/chosen": -0.1826171875, "logps/rejected": -2.5, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9140625, "rewards/margins": 11.5625, "rewards/rejected": -12.5, "step": 797 }, { "epoch": 0.29720670391061454, "grad_norm": 0.01434326171875, "learning_rate": 1.5953380472778404e-06, "logits/chosen": -0.029296875, "logits/rejected": -0.064453125, "logps/chosen": -0.53125, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.65625, "rewards/margins": 12.375, "rewards/rejected": -15.0625, "step": 798 }, { "epoch": 0.2975791433891993, "grad_norm": 82.5, "learning_rate": 1.5945944441518445e-06, "logits/chosen": -0.006317138671875, "logits/rejected": 0.09033203125, "logps/chosen": -0.90625, "logps/rejected": -2.71875, "loss": 0.1367, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.53125, "rewards/margins": 9.125, "rewards/rejected": -13.625, "step": 799 }, { "epoch": 0.297951582867784, "grad_norm": 9.8125, "learning_rate": 1.5938496665688248e-06, "logits/chosen": -0.01458740234375, "logits/rejected": -0.40625, "logps/chosen": -0.181640625, "logps/rejected": -1.8125, "loss": 0.0184, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.90625, "rewards/margins": 8.1875, "rewards/rejected": -9.0625, "step": 800 }, { "epoch": 0.29832402234636873, "grad_norm": 0.00023937225341796875, "learning_rate": 1.593103715788091e-06, "logits/chosen": -0.08056640625, "logits/rejected": -0.15625, "logps/chosen": -0.283203125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.421875, "rewards/margins": 15.875, "rewards/rejected": -17.25, "step": 801 }, { "epoch": 0.29869646182495346, "grad_norm": 0.298828125, "learning_rate": 1.5923565930709354e-06, "logits/chosen": 0.0194091796875, "logits/rejected": -0.70703125, "logps/chosen": -0.48046875, "logps/rejected": -2.609375, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.40625, "rewards/margins": 10.625, "rewards/rejected": -13.0625, "step": 802 }, { "epoch": 0.2990689013035382, "grad_norm": 4.887580871582031e-05, "learning_rate": 1.5916082996806336e-06, "logits/chosen": 0.041259765625, "logits/rejected": -0.2373046875, "logps/chosen": -0.119140625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.59375, "rewards/margins": 17.25, "rewards/rejected": -17.75, "step": 803 }, { "epoch": 0.2994413407821229, "grad_norm": 0.1259765625, "learning_rate": 1.5908588368824386e-06, "logits/chosen": 0.10546875, "logits/rejected": -0.7578125, "logps/chosen": -0.37109375, "logps/rejected": -2.5, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.859375, "rewards/margins": 10.625, "rewards/rejected": -12.5, "step": 804 }, { "epoch": 0.29981378026070765, "grad_norm": 0.001983642578125, "learning_rate": 1.5901082059435825e-06, "logits/chosen": -0.0201416015625, "logits/rejected": -0.0859375, "logps/chosen": -0.36328125, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8203125, "rewards/margins": 15.3125, "rewards/rejected": -17.125, "step": 805 }, { "epoch": 0.3001862197392924, "grad_norm": 0.001983642578125, "learning_rate": 1.5893564081332713e-06, "logits/chosen": 0.01239013671875, "logits/rejected": -0.1650390625, "logps/chosen": -0.5234375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 15.625, "rewards/rejected": -18.25, "step": 806 }, { "epoch": 0.3005586592178771, "grad_norm": 5.0, "learning_rate": 1.588603444722685e-06, "logits/chosen": -0.1435546875, "logits/rejected": -0.68359375, "logps/chosen": -0.427734375, "logps/rejected": -2.515625, "loss": 0.008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.140625, "rewards/margins": 10.4375, "rewards/rejected": -12.5625, "step": 807 }, { "epoch": 0.30093109869646184, "grad_norm": 0.01153564453125, "learning_rate": 1.5878493169849736e-06, "logits/chosen": -0.0478515625, "logits/rejected": 0.00726318359375, "logps/chosen": -0.39453125, "logps/rejected": -2.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.96875, "rewards/margins": 12.0625, "rewards/rejected": -14.0, "step": 808 }, { "epoch": 0.30130353817504657, "grad_norm": 0.9453125, "learning_rate": 1.5870940261952563e-06, "logits/chosen": -0.0230712890625, "logits/rejected": -0.8046875, "logps/chosen": -0.41796875, "logps/rejected": -2.34375, "loss": 0.0017, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.09375, "rewards/margins": 9.625, "rewards/rejected": -11.75, "step": 809 }, { "epoch": 0.3016759776536313, "grad_norm": 0.1572265625, "learning_rate": 1.5863375736306186e-06, "logits/chosen": 0.00045013427734375, "logits/rejected": 0.0693359375, "logps/chosen": -0.65234375, "logps/rejected": -2.8125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.265625, "rewards/margins": 10.75, "rewards/rejected": -14.0, "step": 810 }, { "epoch": 0.30204841713221603, "grad_norm": 0.0002002716064453125, "learning_rate": 1.5855799605701104e-06, "logits/chosen": -0.0517578125, "logits/rejected": 0.08642578125, "logps/chosen": -0.26953125, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3515625, "rewards/margins": 16.125, "rewards/rejected": -17.5, "step": 811 }, { "epoch": 0.30242085661080076, "grad_norm": 0.07177734375, "learning_rate": 1.5848211882947444e-06, "logits/chosen": 0.028564453125, "logits/rejected": 0.0034637451171875, "logps/chosen": -0.65625, "logps/rejected": -3.0625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 12.0, "rewards/rejected": -15.3125, "step": 812 }, { "epoch": 0.3027932960893855, "grad_norm": 0.0155029296875, "learning_rate": 1.5840612580874922e-06, "logits/chosen": -0.06982421875, "logits/rejected": 0.00104522705078125, "logps/chosen": -0.494140625, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 11.25, "rewards/rejected": -13.75, "step": 813 }, { "epoch": 0.3031657355679702, "grad_norm": 0.000705718994140625, "learning_rate": 1.583300171233285e-06, "logits/chosen": -0.042724609375, "logits/rejected": 0.05224609375, "logps/chosen": -0.1953125, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9765625, "rewards/margins": 14.75, "rewards/rejected": -15.6875, "step": 814 }, { "epoch": 0.30353817504655495, "grad_norm": 4.84375, "learning_rate": 1.5825379290190076e-06, "logits/chosen": 0.023681640625, "logits/rejected": -0.1591796875, "logps/chosen": -0.435546875, "logps/rejected": -2.21875, "loss": 0.0064, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.171875, "rewards/margins": 8.9375, "rewards/rejected": -11.125, "step": 815 }, { "epoch": 0.3039106145251397, "grad_norm": 0.00147247314453125, "learning_rate": 1.5817745327335e-06, "logits/chosen": 0.0004787445068359375, "logits/rejected": 0.07275390625, "logps/chosen": -0.28515625, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.421875, "rewards/margins": 14.5625, "rewards/rejected": -16.0, "step": 816 }, { "epoch": 0.3042830540037244, "grad_norm": 0.05615234375, "learning_rate": 1.5810099836675531e-06, "logits/chosen": 0.07421875, "logits/rejected": 0.1259765625, "logps/chosen": -0.48828125, "logps/rejected": -2.9375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 12.1875, "rewards/rejected": -14.625, "step": 817 }, { "epoch": 0.30465549348230914, "grad_norm": 0.0004215240478515625, "learning_rate": 1.5802442831139068e-06, "logits/chosen": -0.037353515625, "logits/rejected": -0.22265625, "logps/chosen": -0.41015625, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 15.875, "rewards/rejected": -18.0, "step": 818 }, { "epoch": 0.30502793296089387, "grad_norm": 0.162109375, "learning_rate": 1.579477432367248e-06, "logits/chosen": -0.25390625, "logits/rejected": -0.279296875, "logps/chosen": -0.51171875, "logps/rejected": -2.53125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5625, "rewards/margins": 10.125, "rewards/rejected": -12.75, "step": 819 }, { "epoch": 0.3054003724394786, "grad_norm": 0.038330078125, "learning_rate": 1.5787094327242087e-06, "logits/chosen": -0.09619140625, "logits/rejected": -0.275390625, "logps/chosen": -0.2421875, "logps/rejected": -2.5625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2109375, "rewards/margins": 11.5, "rewards/rejected": -12.75, "step": 820 }, { "epoch": 0.3057728119180633, "grad_norm": 5.90625, "learning_rate": 1.577940285483363e-06, "logits/chosen": -0.037353515625, "logits/rejected": -0.8515625, "logps/chosen": -0.296875, "logps/rejected": -1.421875, "loss": 0.0155, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 5.625, "rewards/rejected": -7.125, "step": 821 }, { "epoch": 0.30614525139664805, "grad_norm": 0.00445556640625, "learning_rate": 1.5771699919452259e-06, "logits/chosen": 0.125, "logits/rejected": -0.04296875, "logps/chosen": -0.443359375, "logps/rejected": -2.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 12.5, "rewards/rejected": -14.6875, "step": 822 }, { "epoch": 0.3065176908752328, "grad_norm": 0.08203125, "learning_rate": 1.5763985534122503e-06, "logits/chosen": -0.169921875, "logits/rejected": -0.53125, "logps/chosen": -0.17578125, "logps/rejected": -2.40625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8828125, "rewards/margins": 11.125, "rewards/rejected": -12.0, "step": 823 }, { "epoch": 0.3068901303538175, "grad_norm": 0.0002727508544921875, "learning_rate": 1.5756259711888258e-06, "logits/chosen": 0.09814453125, "logits/rejected": 0.03369140625, "logps/chosen": -0.2255859375, "logps/rejected": -3.328125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.125, "rewards/margins": 15.5, "rewards/rejected": -16.625, "step": 824 }, { "epoch": 0.30726256983240224, "grad_norm": 0.0012359619140625, "learning_rate": 1.5748522465812748e-06, "logits/chosen": 0.1572265625, "logits/rejected": -0.051025390625, "logps/chosen": -0.408203125, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.046875, "rewards/margins": 14.375, "rewards/rejected": -16.5, "step": 825 }, { "epoch": 0.307635009310987, "grad_norm": 0.10595703125, "learning_rate": 1.574077380897852e-06, "logits/chosen": 0.072265625, "logits/rejected": -0.373046875, "logps/chosen": -0.412109375, "logps/rejected": -2.8125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 12.0, "rewards/rejected": -14.125, "step": 826 }, { "epoch": 0.3080074487895717, "grad_norm": 0.000530242919921875, "learning_rate": 1.573301375448741e-06, "logits/chosen": 0.06103515625, "logits/rejected": -0.263671875, "logps/chosen": -0.5, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5, "rewards/margins": 15.875, "rewards/rejected": -18.375, "step": 827 }, { "epoch": 0.30837988826815643, "grad_norm": 0.00421142578125, "learning_rate": 1.572524231546053e-06, "logits/chosen": 0.1630859375, "logits/rejected": -0.123046875, "logps/chosen": -0.4765625, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.390625, "rewards/margins": 13.9375, "rewards/rejected": -16.25, "step": 828 }, { "epoch": 0.30875232774674116, "grad_norm": 0.130859375, "learning_rate": 1.5717459505038243e-06, "logits/chosen": 0.0751953125, "logits/rejected": -0.345703125, "logps/chosen": -0.36328125, "logps/rejected": -2.375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8125, "rewards/margins": 10.125, "rewards/rejected": -11.875, "step": 829 }, { "epoch": 0.3091247672253259, "grad_norm": 0.004425048828125, "learning_rate": 1.5709665336380133e-06, "logits/chosen": 0.021240234375, "logits/rejected": -0.01190185546875, "logps/chosen": -0.345703125, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.734375, "rewards/margins": 14.125, "rewards/rejected": -15.875, "step": 830 }, { "epoch": 0.3094972067039106, "grad_norm": 0.1865234375, "learning_rate": 1.5701859822664992e-06, "logits/chosen": 0.0400390625, "logits/rejected": -0.28515625, "logps/chosen": -0.5546875, "logps/rejected": -2.90625, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.765625, "rewards/margins": 11.75, "rewards/rejected": -14.5, "step": 831 }, { "epoch": 0.30986964618249535, "grad_norm": 0.7734375, "learning_rate": 1.56940429770908e-06, "logits/chosen": -0.0245361328125, "logits/rejected": -0.26953125, "logps/chosen": -0.1982421875, "logps/rejected": -2.40625, "loss": 0.0015, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9921875, "rewards/margins": 11.125, "rewards/rejected": -12.125, "step": 832 }, { "epoch": 0.3102420856610801, "grad_norm": 0.00799560546875, "learning_rate": 1.5686214812874688e-06, "logits/chosen": 0.0849609375, "logits/rejected": 0.1953125, "logps/chosen": -0.177734375, "logps/rejected": -3.078125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.88671875, "rewards/margins": 14.5, "rewards/rejected": -15.375, "step": 833 }, { "epoch": 0.3106145251396648, "grad_norm": 0.154296875, "learning_rate": 1.5678375343252933e-06, "logits/chosen": -0.1220703125, "logits/rejected": -0.2421875, "logps/chosen": -0.30078125, "logps/rejected": -2.5625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5078125, "rewards/margins": 11.3125, "rewards/rejected": -12.8125, "step": 834 }, { "epoch": 0.31098696461824954, "grad_norm": 0.01531982421875, "learning_rate": 1.5670524581480924e-06, "logits/chosen": 0.061279296875, "logits/rejected": 0.09716796875, "logps/chosen": -0.55078125, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 12.625, "rewards/rejected": -15.375, "step": 835 }, { "epoch": 0.31135940409683427, "grad_norm": 34.75, "learning_rate": 1.5662662540833147e-06, "logits/chosen": -0.030029296875, "logits/rejected": -0.1611328125, "logps/chosen": -0.6875, "logps/rejected": -2.203125, "loss": 0.0581, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4375, "rewards/margins": 7.5625, "rewards/rejected": -11.0, "step": 836 }, { "epoch": 0.311731843575419, "grad_norm": 0.000629425048828125, "learning_rate": 1.5654789234603152e-06, "logits/chosen": 0.00982666015625, "logits/rejected": -0.15625, "logps/chosen": -0.37109375, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.859375, "rewards/margins": 14.5, "rewards/rejected": -16.375, "step": 837 }, { "epoch": 0.31210428305400373, "grad_norm": 0.00014495849609375, "learning_rate": 1.5646904676103542e-06, "logits/chosen": 0.0703125, "logits/rejected": 0.08447265625, "logps/chosen": -0.1513671875, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7578125, "rewards/margins": 16.875, "rewards/rejected": -17.625, "step": 838 }, { "epoch": 0.31247672253258846, "grad_norm": 13.375, "learning_rate": 1.5639008878665947e-06, "logits/chosen": 0.013916015625, "logits/rejected": 0.1640625, "logps/chosen": -0.328125, "logps/rejected": -2.203125, "loss": 0.0275, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 9.375, "rewards/rejected": -11.0, "step": 839 }, { "epoch": 0.3128491620111732, "grad_norm": 1.171875, "learning_rate": 1.5631101855641e-06, "logits/chosen": -0.1259765625, "logits/rejected": -0.875, "logps/chosen": -0.6328125, "logps/rejected": -2.46875, "loss": 0.0016, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 9.1875, "rewards/rejected": -12.3125, "step": 840 }, { "epoch": 0.3132216014897579, "grad_norm": 0.0849609375, "learning_rate": 1.5623183620398308e-06, "logits/chosen": -0.10400390625, "logits/rejected": -0.6640625, "logps/chosen": -0.2080078125, "logps/rejected": -2.6875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0390625, "rewards/margins": 12.4375, "rewards/rejected": -13.5, "step": 841 }, { "epoch": 0.31359404096834265, "grad_norm": 0.03759765625, "learning_rate": 1.561525418632645e-06, "logits/chosen": -0.1171875, "logits/rejected": -0.6875, "logps/chosen": -0.271484375, "logps/rejected": -2.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.359375, "rewards/margins": 12.0, "rewards/rejected": -13.375, "step": 842 }, { "epoch": 0.3139664804469274, "grad_norm": 3.46875, "learning_rate": 1.560731356683293e-06, "logits/chosen": 0.03466796875, "logits/rejected": -0.314453125, "logps/chosen": -0.158203125, "logps/rejected": -2.0, "loss": 0.0091, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.79296875, "rewards/margins": 9.25, "rewards/rejected": -10.0, "step": 843 }, { "epoch": 0.3143389199255121, "grad_norm": 42.0, "learning_rate": 1.559936177534416e-06, "logits/chosen": -0.007720947265625, "logits/rejected": 0.326171875, "logps/chosen": -0.8828125, "logps/rejected": -2.390625, "loss": 0.0452, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.40625, "rewards/margins": 7.5, "rewards/rejected": -11.9375, "step": 844 }, { "epoch": 0.31471135940409684, "grad_norm": 2.171875, "learning_rate": 1.5591398825305461e-06, "logits/chosen": -0.267578125, "logits/rejected": 0.0283203125, "logps/chosen": -0.44921875, "logps/rejected": -2.46875, "loss": 0.0037, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 10.125, "rewards/rejected": -12.375, "step": 845 }, { "epoch": 0.31508379888268156, "grad_norm": 0.0810546875, "learning_rate": 1.5583424730181004e-06, "logits/chosen": 0.00299072265625, "logits/rejected": 0.1279296875, "logps/chosen": -0.291015625, "logps/rejected": -2.21875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4609375, "rewards/margins": 9.625, "rewards/rejected": -11.0625, "step": 846 }, { "epoch": 0.3154562383612663, "grad_norm": 0.62890625, "learning_rate": 1.557543950345381e-06, "logits/chosen": -0.11474609375, "logits/rejected": 0.2021484375, "logps/chosen": -0.86328125, "logps/rejected": -3.203125, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.3125, "rewards/margins": 11.6875, "rewards/rejected": -16.0, "step": 847 }, { "epoch": 0.315828677839851, "grad_norm": 0.0093994140625, "learning_rate": 1.5567443158625728e-06, "logits/chosen": -0.169921875, "logits/rejected": 0.07666015625, "logps/chosen": -0.43359375, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 12.0, "rewards/rejected": -14.1875, "step": 848 }, { "epoch": 0.31620111731843575, "grad_norm": 137.0, "learning_rate": 1.5559435709217392e-06, "logits/chosen": -0.1044921875, "logits/rejected": 0.13671875, "logps/chosen": -0.27734375, "logps/rejected": -1.9375, "loss": 0.7852, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -1.390625, "rewards/margins": 8.3125, "rewards/rejected": -9.6875, "step": 849 }, { "epoch": 0.3165735567970205, "grad_norm": 13.5625, "learning_rate": 1.555141716876823e-06, "logits/chosen": -0.2001953125, "logits/rejected": -0.10546875, "logps/chosen": -0.51953125, "logps/rejected": -2.71875, "loss": 0.019, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.59375, "rewards/margins": 10.9375, "rewards/rejected": -13.5, "step": 850 }, { "epoch": 0.3169459962756052, "grad_norm": 1.5546875, "learning_rate": 1.5543387550836409e-06, "logits/chosen": -0.203125, "logits/rejected": 0.0703125, "logps/chosen": -0.3203125, "logps/rejected": -1.828125, "loss": 0.0029, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.59375, "rewards/margins": 7.5625, "rewards/rejected": -9.125, "step": 851 }, { "epoch": 0.31731843575418994, "grad_norm": 0.000263214111328125, "learning_rate": 1.5535346868998834e-06, "logits/chosen": 0.07763671875, "logits/rejected": -0.10986328125, "logps/chosen": -0.25, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 15.625, "rewards/rejected": -16.875, "step": 852 }, { "epoch": 0.31769087523277467, "grad_norm": 0.00035858154296875, "learning_rate": 1.5527295136851114e-06, "logits/chosen": -0.0299072265625, "logits/rejected": 0.10888671875, "logps/chosen": -0.197265625, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.98828125, "rewards/margins": 17.375, "rewards/rejected": -18.25, "step": 853 }, { "epoch": 0.3180633147113594, "grad_norm": 0.00115966796875, "learning_rate": 1.5519232368007544e-06, "logits/chosen": -0.271484375, "logits/rejected": 0.034423828125, "logps/chosen": -0.259765625, "logps/rejected": -3.046875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.296875, "rewards/margins": 14.0, "rewards/rejected": -15.25, "step": 854 }, { "epoch": 0.31843575418994413, "grad_norm": 0.00014495849609375, "learning_rate": 1.5511158576101078e-06, "logits/chosen": 0.0986328125, "logits/rejected": 0.01025390625, "logps/chosen": -0.39453125, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.96875, "rewards/margins": 16.75, "rewards/rejected": -18.75, "step": 855 }, { "epoch": 0.31880819366852886, "grad_norm": 0.07666015625, "learning_rate": 1.5503073774783313e-06, "logits/chosen": 0.08154296875, "logits/rejected": 0.005126953125, "logps/chosen": -0.515625, "logps/rejected": -2.9375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.578125, "rewards/margins": 12.1875, "rewards/rejected": -14.75, "step": 856 }, { "epoch": 0.3191806331471136, "grad_norm": 0.515625, "learning_rate": 1.549497797772446e-06, "logits/chosen": -0.19140625, "logits/rejected": 0.004638671875, "logps/chosen": -0.765625, "logps/rejected": -2.59375, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.828125, "rewards/margins": 9.125, "rewards/rejected": -13.0, "step": 857 }, { "epoch": 0.3195530726256983, "grad_norm": 0.6171875, "learning_rate": 1.5486871198613316e-06, "logits/chosen": 0.0145263671875, "logits/rejected": 0.1376953125, "logps/chosen": -0.4453125, "logps/rejected": -2.71875, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.234375, "rewards/margins": 11.375, "rewards/rejected": -13.625, "step": 858 }, { "epoch": 0.31992551210428305, "grad_norm": 0.00183868408203125, "learning_rate": 1.5478753451157257e-06, "logits/chosen": 0.1103515625, "logits/rejected": 0.1376953125, "logps/chosen": -0.578125, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.90625, "rewards/margins": 15.25, "rewards/rejected": -18.25, "step": 859 }, { "epoch": 0.3202979515828678, "grad_norm": 71.0, "learning_rate": 1.5470624749082194e-06, "logits/chosen": -0.0341796875, "logits/rejected": -0.421875, "logps/chosen": -0.43359375, "logps/rejected": -1.984375, "loss": 0.3457, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.171875, "rewards/margins": 7.71875, "rewards/rejected": -9.875, "step": 860 }, { "epoch": 0.3206703910614525, "grad_norm": 0.51171875, "learning_rate": 1.5462485106132573e-06, "logits/chosen": 0.0191650390625, "logits/rejected": -0.328125, "logps/chosen": -0.38671875, "logps/rejected": -2.9375, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 12.6875, "rewards/rejected": -14.625, "step": 861 }, { "epoch": 0.32104283054003724, "grad_norm": 0.412109375, "learning_rate": 1.5454334536071326e-06, "logits/chosen": 0.26171875, "logits/rejected": -0.6328125, "logps/chosen": -0.392578125, "logps/rejected": -2.75, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.96875, "rewards/margins": 11.75, "rewards/rejected": -13.75, "step": 862 }, { "epoch": 0.32141527001862197, "grad_norm": 0.048095703125, "learning_rate": 1.5446173052679877e-06, "logits/chosen": -0.10205078125, "logits/rejected": 0.17578125, "logps/chosen": -0.44921875, "logps/rejected": -2.71875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 11.3125, "rewards/rejected": -13.5625, "step": 863 }, { "epoch": 0.3217877094972067, "grad_norm": 31.0, "learning_rate": 1.5438000669758082e-06, "logits/chosen": 0.03173828125, "logits/rejected": -0.80078125, "logps/chosen": -0.7265625, "logps/rejected": -2.46875, "loss": 0.0525, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.625, "rewards/margins": 8.75, "rewards/rejected": -12.375, "step": 864 }, { "epoch": 0.3221601489757914, "grad_norm": 0.08642578125, "learning_rate": 1.542981740112425e-06, "logits/chosen": 0.0869140625, "logits/rejected": -0.54296875, "logps/chosen": -0.2109375, "logps/rejected": -2.78125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0546875, "rewards/margins": 12.9375, "rewards/rejected": -14.0, "step": 865 }, { "epoch": 0.32253258845437616, "grad_norm": 0.19921875, "learning_rate": 1.5421623260615077e-06, "logits/chosen": -0.017822265625, "logits/rejected": 0.034423828125, "logps/chosen": -0.4375, "logps/rejected": -3.1875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1875, "rewards/margins": 13.75, "rewards/rejected": -15.9375, "step": 866 }, { "epoch": 0.3229050279329609, "grad_norm": 0.0003795623779296875, "learning_rate": 1.5413418262085652e-06, "logits/chosen": 0.2041015625, "logits/rejected": 0.279296875, "logps/chosen": -0.51953125, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.59375, "rewards/margins": 15.375, "rewards/rejected": -18.0, "step": 867 }, { "epoch": 0.3232774674115456, "grad_norm": 0.00830078125, "learning_rate": 1.5405202419409418e-06, "logits/chosen": 0.030029296875, "logits/rejected": 0.1083984375, "logps/chosen": -0.4765625, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.390625, "rewards/margins": 13.8125, "rewards/rejected": -16.25, "step": 868 }, { "epoch": 0.32364990689013035, "grad_norm": 0.01263427734375, "learning_rate": 1.5396975746478158e-06, "logits/chosen": 0.0732421875, "logits/rejected": 0.287109375, "logps/chosen": -0.265625, "logps/rejected": -2.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 13.375, "rewards/rejected": -14.75, "step": 869 }, { "epoch": 0.3240223463687151, "grad_norm": 8.875, "learning_rate": 1.5388738257201968e-06, "logits/chosen": -0.0021209716796875, "logits/rejected": 0.125, "logps/chosen": -0.52734375, "logps/rejected": -2.546875, "loss": 0.0126, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 10.125, "rewards/rejected": -12.75, "step": 870 }, { "epoch": 0.3243947858472998, "grad_norm": 0.000560760498046875, "learning_rate": 1.5380489965509227e-06, "logits/chosen": 0.2021484375, "logits/rejected": -0.035888671875, "logps/chosen": -0.38671875, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9296875, "rewards/margins": 15.75, "rewards/rejected": -17.75, "step": 871 }, { "epoch": 0.32476722532588453, "grad_norm": 34.5, "learning_rate": 1.5372230885346582e-06, "logits/chosen": -0.2353515625, "logits/rejected": -0.259765625, "logps/chosen": -0.17578125, "logps/rejected": -1.6171875, "loss": 0.124, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8828125, "rewards/margins": 7.21875, "rewards/rejected": -8.125, "step": 872 }, { "epoch": 0.32513966480446926, "grad_norm": 0.00115203857421875, "learning_rate": 1.5363961030678926e-06, "logits/chosen": 0.0242919921875, "logits/rejected": 0.0033721923828125, "logps/chosen": -0.2197265625, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015625, "rewards/margins": 14.625, "rewards/rejected": -15.75, "step": 873 }, { "epoch": 0.325512104283054, "grad_norm": 5.46875, "learning_rate": 1.5355680415489368e-06, "logits/chosen": -0.0107421875, "logits/rejected": 0.2431640625, "logps/chosen": -0.30078125, "logps/rejected": -2.234375, "loss": 0.0091, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5078125, "rewards/margins": 9.6875, "rewards/rejected": -11.1875, "step": 874 }, { "epoch": 0.3258845437616387, "grad_norm": 13.8125, "learning_rate": 1.5347389053779206e-06, "logits/chosen": 0.0286865234375, "logits/rejected": -0.1318359375, "logps/chosen": -0.2734375, "logps/rejected": -1.828125, "loss": 0.0287, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3671875, "rewards/margins": 7.78125, "rewards/rejected": -9.125, "step": 875 }, { "epoch": 0.32625698324022345, "grad_norm": 0.2890625, "learning_rate": 1.5339086959567912e-06, "logits/chosen": -0.171875, "logits/rejected": -0.69140625, "logps/chosen": -0.26171875, "logps/rejected": -2.59375, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125, "rewards/margins": 11.6875, "rewards/rejected": -13.0, "step": 876 }, { "epoch": 0.3266294227188082, "grad_norm": 1.1640625, "learning_rate": 1.5330774146893112e-06, "logits/chosen": -0.12353515625, "logits/rejected": -0.51171875, "logps/chosen": -0.5, "logps/rejected": -2.609375, "loss": 0.0014, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5, "rewards/margins": 10.5625, "rewards/rejected": -13.0625, "step": 877 }, { "epoch": 0.3270018621973929, "grad_norm": 0.0174560546875, "learning_rate": 1.5322450629810544e-06, "logits/chosen": 0.00830078125, "logits/rejected": -0.03662109375, "logps/chosen": -0.5078125, "logps/rejected": -3.078125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.53125, "rewards/margins": 12.875, "rewards/rejected": -15.375, "step": 878 }, { "epoch": 0.32737430167597764, "grad_norm": 0.0002040863037109375, "learning_rate": 1.5314116422394051e-06, "logits/chosen": 0.1513671875, "logits/rejected": 0.2099609375, "logps/chosen": -0.158203125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7890625, "rewards/margins": 16.625, "rewards/rejected": -17.375, "step": 879 }, { "epoch": 0.32774674115456237, "grad_norm": 0.006500244140625, "learning_rate": 1.5305771538735555e-06, "logits/chosen": 0.166015625, "logits/rejected": -0.0751953125, "logps/chosen": -0.50390625, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.515625, "rewards/margins": 14.3125, "rewards/rejected": -16.875, "step": 880 }, { "epoch": 0.3281191806331471, "grad_norm": 0.6484375, "learning_rate": 1.5297415992945025e-06, "logits/chosen": 0.169921875, "logits/rejected": -0.51171875, "logps/chosen": -0.3828125, "logps/rejected": -2.859375, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9140625, "rewards/margins": 12.375, "rewards/rejected": -14.25, "step": 881 }, { "epoch": 0.32849162011173183, "grad_norm": 7.0, "learning_rate": 1.528904979915046e-06, "logits/chosen": -0.1640625, "logits/rejected": -0.400390625, "logps/chosen": -0.71875, "logps/rejected": -2.75, "loss": 0.012, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.59375, "rewards/margins": 10.1875, "rewards/rejected": -13.75, "step": 882 }, { "epoch": 0.32886405959031656, "grad_norm": 0.10595703125, "learning_rate": 1.5280672971497862e-06, "logits/chosen": -0.000316619873046875, "logits/rejected": -0.0311279296875, "logps/chosen": -0.6015625, "logps/rejected": -2.859375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0, "rewards/margins": 11.3125, "rewards/rejected": -14.3125, "step": 883 }, { "epoch": 0.3292364990689013, "grad_norm": 0.046875, "learning_rate": 1.5272285524151215e-06, "logits/chosen": 0.01611328125, "logits/rejected": -0.64453125, "logps/chosen": -0.265625, "logps/rejected": -2.875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 13.125, "rewards/rejected": -14.4375, "step": 884 }, { "epoch": 0.329608938547486, "grad_norm": 0.001922607421875, "learning_rate": 1.526388747129246e-06, "logits/chosen": -0.1494140625, "logits/rejected": -0.09033203125, "logps/chosen": -0.27734375, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.390625, "rewards/margins": 14.5625, "rewards/rejected": -16.0, "step": 885 }, { "epoch": 0.32998137802607075, "grad_norm": 37.5, "learning_rate": 1.5255478827121466e-06, "logits/chosen": -0.11572265625, "logits/rejected": 0.1396484375, "logps/chosen": -0.93359375, "logps/rejected": -2.640625, "loss": 0.054, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.6875, "rewards/margins": 8.5, "rewards/rejected": -13.1875, "step": 886 }, { "epoch": 0.3303538175046555, "grad_norm": 0.000728607177734375, "learning_rate": 1.5247059605856015e-06, "logits/chosen": 0.24609375, "logits/rejected": -0.04638671875, "logps/chosen": -0.2392578125, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1953125, "rewards/margins": 14.5, "rewards/rejected": -15.625, "step": 887 }, { "epoch": 0.3307262569832402, "grad_norm": 0.0017547607421875, "learning_rate": 1.5238629821731775e-06, "logits/chosen": 0.01611328125, "logits/rejected": 0.0546875, "logps/chosen": -0.3828125, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.90625, "rewards/margins": 13.9375, "rewards/rejected": -15.875, "step": 888 }, { "epoch": 0.33109869646182494, "grad_norm": 0.000308990478515625, "learning_rate": 1.5230189489002264e-06, "logits/chosen": 0.10986328125, "logits/rejected": 0.0771484375, "logps/chosen": -0.375, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.875, "rewards/margins": 15.4375, "rewards/rejected": -17.25, "step": 889 }, { "epoch": 0.33147113594040967, "grad_norm": 0.001312255859375, "learning_rate": 1.5221738621938846e-06, "logits/chosen": 0.07470703125, "logits/rejected": 0.08935546875, "logps/chosen": -0.466796875, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34375, "rewards/margins": 15.0, "rewards/rejected": -17.25, "step": 890 }, { "epoch": 0.3318435754189944, "grad_norm": 0.1953125, "learning_rate": 1.5213277234830695e-06, "logits/chosen": 0.0098876953125, "logits/rejected": -0.34375, "logps/chosen": -0.8671875, "logps/rejected": -2.703125, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.34375, "rewards/margins": 9.1875, "rewards/rejected": -13.5, "step": 891 }, { "epoch": 0.3322160148975791, "grad_norm": 0.031494140625, "learning_rate": 1.5204805341984772e-06, "logits/chosen": -0.068359375, "logits/rejected": 0.05078125, "logps/chosen": -0.58203125, "logps/rejected": -2.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.90625, "rewards/margins": 11.8125, "rewards/rejected": -14.6875, "step": 892 }, { "epoch": 0.33258845437616386, "grad_norm": 0.87109375, "learning_rate": 1.51963229577258e-06, "logits/chosen": 0.1005859375, "logits/rejected": 0.5078125, "logps/chosen": -0.18359375, "logps/rejected": -2.53125, "loss": 0.0016, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.91796875, "rewards/margins": 11.75, "rewards/rejected": -12.625, "step": 893 }, { "epoch": 0.3329608938547486, "grad_norm": 0.00994873046875, "learning_rate": 1.5187830096396243e-06, "logits/chosen": 0.14453125, "logits/rejected": -0.51171875, "logps/chosen": -0.38671875, "logps/rejected": -2.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 12.75, "rewards/rejected": -14.75, "step": 894 }, { "epoch": 0.3333333333333333, "grad_norm": 0.02001953125, "learning_rate": 1.517932677235628e-06, "logits/chosen": 0.08154296875, "logits/rejected": 0.1611328125, "logps/chosen": -0.392578125, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9609375, "rewards/margins": 11.75, "rewards/rejected": -13.75, "step": 895 }, { "epoch": 0.33370577281191804, "grad_norm": 1.9375, "learning_rate": 1.5170812999983783e-06, "logits/chosen": -0.1513671875, "logits/rejected": -0.1845703125, "logps/chosen": -0.7265625, "logps/rejected": -2.625, "loss": 0.0026, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.640625, "rewards/margins": 9.4375, "rewards/rejected": -13.125, "step": 896 }, { "epoch": 0.3340782122905028, "grad_norm": 27.125, "learning_rate": 1.5162288793674287e-06, "logits/chosen": 0.0289306640625, "logits/rejected": -0.08837890625, "logps/chosen": -0.302734375, "logps/rejected": -2.1875, "loss": 0.0471, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 9.375, "rewards/rejected": -10.9375, "step": 897 }, { "epoch": 0.3344506517690875, "grad_norm": 0.0002689361572265625, "learning_rate": 1.515375416784097e-06, "logits/chosen": 0.2158203125, "logits/rejected": 0.17578125, "logps/chosen": -0.19921875, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0, "rewards/margins": 15.5625, "rewards/rejected": -16.5, "step": 898 }, { "epoch": 0.33482309124767223, "grad_norm": 0.00537109375, "learning_rate": 1.514520913691463e-06, "logits/chosen": -0.12109375, "logits/rejected": 0.045166015625, "logps/chosen": -0.2890625, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4453125, "rewards/margins": 14.25, "rewards/rejected": -15.75, "step": 899 }, { "epoch": 0.33519553072625696, "grad_norm": 0.00185394287109375, "learning_rate": 1.5136653715343663e-06, "logits/chosen": -0.06591796875, "logits/rejected": -0.01251220703125, "logps/chosen": -0.197265625, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.984375, "rewards/margins": 14.0625, "rewards/rejected": -15.0, "step": 900 }, { "epoch": 0.3355679702048417, "grad_norm": 3.21875, "learning_rate": 1.5128087917594016e-06, "logits/chosen": 0.01483154296875, "logits/rejected": 0.458984375, "logps/chosen": -0.421875, "logps/rejected": -2.5, "loss": 0.0055, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 10.375, "rewards/rejected": -12.5, "step": 901 }, { "epoch": 0.3359404096834264, "grad_norm": 176.0, "learning_rate": 1.5119511758149204e-06, "logits/chosen": -0.119140625, "logits/rejected": 0.09423828125, "logps/chosen": -0.578125, "logps/rejected": -2.328125, "loss": 0.5391, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.90625, "rewards/margins": 8.75, "rewards/rejected": -11.625, "step": 902 }, { "epoch": 0.33631284916201115, "grad_norm": 0.00010251998901367188, "learning_rate": 1.5110925251510244e-06, "logits/chosen": 0.1484375, "logits/rejected": 0.034423828125, "logps/chosen": -0.287109375, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4296875, "rewards/margins": 16.5, "rewards/rejected": -18.0, "step": 903 }, { "epoch": 0.3366852886405959, "grad_norm": 0.1298828125, "learning_rate": 1.5102328412195657e-06, "logits/chosen": -0.041748046875, "logits/rejected": -0.01361083984375, "logps/chosen": -0.443359375, "logps/rejected": -2.734375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 11.5, "rewards/rejected": -13.6875, "step": 904 }, { "epoch": 0.3370577281191806, "grad_norm": 9.441375732421875e-05, "learning_rate": 1.5093721254741435e-06, "logits/chosen": 0.05712890625, "logits/rejected": 0.1220703125, "logps/chosen": -0.3046875, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.53125, "rewards/margins": 17.25, "rewards/rejected": -18.75, "step": 905 }, { "epoch": 0.33743016759776534, "grad_norm": 0.0016326904296875, "learning_rate": 1.5085103793701016e-06, "logits/chosen": 0.0294189453125, "logits/rejected": -0.03857421875, "logps/chosen": -0.435546875, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.171875, "rewards/margins": 14.8125, "rewards/rejected": -17.0, "step": 906 }, { "epoch": 0.33780260707635007, "grad_norm": 0.016357421875, "learning_rate": 1.5076476043645258e-06, "logits/chosen": -0.027587890625, "logits/rejected": -0.07470703125, "logps/chosen": -0.5859375, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.921875, "rewards/margins": 13.8125, "rewards/rejected": -16.75, "step": 907 }, { "epoch": 0.3381750465549348, "grad_norm": 19.625, "learning_rate": 1.5067838019162416e-06, "logits/chosen": -0.0400390625, "logits/rejected": -1.3828125, "logps/chosen": -0.470703125, "logps/rejected": -1.875, "loss": 0.032, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34375, "rewards/margins": 7.0, "rewards/rejected": -9.375, "step": 908 }, { "epoch": 0.33854748603351953, "grad_norm": 0.59375, "learning_rate": 1.5059189734858117e-06, "logits/chosen": -0.095703125, "logits/rejected": 0.072265625, "logps/chosen": -0.1572265625, "logps/rejected": -2.640625, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.78125, "rewards/margins": 12.5, "rewards/rejected": -13.25, "step": 909 }, { "epoch": 0.33891992551210426, "grad_norm": 11.5, "learning_rate": 1.5050531205355342e-06, "logits/chosen": -0.0634765625, "logits/rejected": 0.46484375, "logps/chosen": -0.3125, "logps/rejected": -2.328125, "loss": 0.0275, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5625, "rewards/margins": 10.125, "rewards/rejected": -11.625, "step": 910 }, { "epoch": 0.339292364990689, "grad_norm": 0.068359375, "learning_rate": 1.504186244529439e-06, "logits/chosen": -0.326171875, "logits/rejected": -0.0498046875, "logps/chosen": -0.6875, "logps/rejected": -3.25, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4375, "rewards/margins": 12.8125, "rewards/rejected": -16.25, "step": 911 }, { "epoch": 0.3396648044692737, "grad_norm": 0.0002231597900390625, "learning_rate": 1.5033183469332856e-06, "logits/chosen": 0.03759765625, "logits/rejected": 0.08984375, "logps/chosen": -0.322265625, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6171875, "rewards/margins": 15.625, "rewards/rejected": -17.25, "step": 912 }, { "epoch": 0.34003724394785845, "grad_norm": 0.024169921875, "learning_rate": 1.5024494292145613e-06, "logits/chosen": -0.08349609375, "logits/rejected": -0.796875, "logps/chosen": -0.31640625, "logps/rejected": -2.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5859375, "rewards/margins": 11.1875, "rewards/rejected": -12.75, "step": 913 }, { "epoch": 0.3404096834264432, "grad_norm": 8.1875, "learning_rate": 1.5015794928424781e-06, "logits/chosen": 0.06640625, "logits/rejected": 0.41015625, "logps/chosen": -0.29296875, "logps/rejected": -2.03125, "loss": 0.0171, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.46875, "rewards/margins": 8.6875, "rewards/rejected": -10.1875, "step": 914 }, { "epoch": 0.3407821229050279, "grad_norm": 0.0947265625, "learning_rate": 1.5007085392879707e-06, "logits/chosen": -0.07373046875, "logits/rejected": -0.58984375, "logps/chosen": -0.16015625, "logps/rejected": -2.5625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.80078125, "rewards/margins": 12.0, "rewards/rejected": -12.8125, "step": 915 }, { "epoch": 0.34115456238361264, "grad_norm": 8.440017700195312e-05, "learning_rate": 1.499836570023693e-06, "logits/chosen": -0.1201171875, "logits/rejected": 0.0302734375, "logps/chosen": -0.158203125, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7890625, "rewards/margins": 16.75, "rewards/rejected": -17.5, "step": 916 }, { "epoch": 0.34152700186219737, "grad_norm": 0.0003566741943359375, "learning_rate": 1.4989635865240172e-06, "logits/chosen": 0.0849609375, "logits/rejected": 0.052490234375, "logps/chosen": -0.40625, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 16.75, "rewards/rejected": -18.75, "step": 917 }, { "epoch": 0.3418994413407821, "grad_norm": 0.08642578125, "learning_rate": 1.4980895902650292e-06, "logits/chosen": 0.06298828125, "logits/rejected": -0.453125, "logps/chosen": -0.8671875, "logps/rejected": -2.90625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.3125, "rewards/margins": 10.1875, "rewards/rejected": -14.5, "step": 918 }, { "epoch": 0.3422718808193669, "grad_norm": 0.216796875, "learning_rate": 1.497214582724529e-06, "logits/chosen": 0.08447265625, "logits/rejected": -0.267578125, "logps/chosen": -0.408203125, "logps/rejected": -2.859375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 12.25, "rewards/rejected": -14.25, "step": 919 }, { "epoch": 0.3426443202979516, "grad_norm": 0.00174713134765625, "learning_rate": 1.4963385653820254e-06, "logits/chosen": 0.11962890625, "logits/rejected": 0.1044921875, "logps/chosen": -0.27734375, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.390625, "rewards/margins": 14.0625, "rewards/rejected": -15.4375, "step": 920 }, { "epoch": 0.34301675977653634, "grad_norm": 0.004486083984375, "learning_rate": 1.4954615397187346e-06, "logits/chosen": 0.05224609375, "logits/rejected": 0.1259765625, "logps/chosen": -0.28125, "logps/rejected": -2.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.40625, "rewards/margins": 13.3125, "rewards/rejected": -14.75, "step": 921 }, { "epoch": 0.34338919925512107, "grad_norm": 0.00113677978515625, "learning_rate": 1.4945835072175782e-06, "logits/chosen": 0.021484375, "logits/rejected": 0.1611328125, "logps/chosen": -0.3515625, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7578125, "rewards/margins": 14.25, "rewards/rejected": -16.0, "step": 922 }, { "epoch": 0.3437616387337058, "grad_norm": 16.375, "learning_rate": 1.4937044693631796e-06, "logits/chosen": 0.08740234375, "logits/rejected": -0.1806640625, "logps/chosen": -0.33984375, "logps/rejected": -1.75, "loss": 0.0444, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.703125, "rewards/margins": 7.09375, "rewards/rejected": -8.75, "step": 923 }, { "epoch": 0.34413407821229053, "grad_norm": 0.091796875, "learning_rate": 1.492824427641863e-06, "logits/chosen": -0.001953125, "logits/rejected": -0.55859375, "logps/chosen": -0.2080078125, "logps/rejected": -2.515625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0390625, "rewards/margins": 11.5625, "rewards/rejected": -12.625, "step": 924 }, { "epoch": 0.34450651769087526, "grad_norm": 0.00010395050048828125, "learning_rate": 1.4919433835416494e-06, "logits/chosen": -0.0101318359375, "logits/rejected": 0.021240234375, "logps/chosen": -0.361328125, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8046875, "rewards/margins": 16.875, "rewards/rejected": -18.625, "step": 925 }, { "epoch": 0.34487895716946, "grad_norm": 0.00012493133544921875, "learning_rate": 1.4910613385522546e-06, "logits/chosen": 0.130859375, "logits/rejected": 0.26953125, "logps/chosen": -0.189453125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9453125, "rewards/margins": 16.25, "rewards/rejected": -17.25, "step": 926 }, { "epoch": 0.3452513966480447, "grad_norm": 0.00103759765625, "learning_rate": 1.490178294165087e-06, "logits/chosen": 0.03466796875, "logits/rejected": 0.2216796875, "logps/chosen": -0.263671875, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3203125, "rewards/margins": 15.375, "rewards/rejected": -16.75, "step": 927 }, { "epoch": 0.34562383612662945, "grad_norm": 0.04296875, "learning_rate": 1.4892942518732443e-06, "logits/chosen": 0.0869140625, "logits/rejected": 0.1357421875, "logps/chosen": -0.58984375, "logps/rejected": -3.171875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.96875, "rewards/margins": 12.875, "rewards/rejected": -15.875, "step": 928 }, { "epoch": 0.3459962756052142, "grad_norm": 0.00118255615234375, "learning_rate": 1.4884092131715126e-06, "logits/chosen": 0.01300048828125, "logits/rejected": 0.28515625, "logps/chosen": -0.2060546875, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.03125, "rewards/margins": 14.375, "rewards/rejected": -15.375, "step": 929 }, { "epoch": 0.3463687150837989, "grad_norm": 0.001617431640625, "learning_rate": 1.4875231795563622e-06, "logits/chosen": 0.10888671875, "logits/rejected": -0.06591796875, "logps/chosen": -0.337890625, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875, "rewards/margins": 13.75, "rewards/rejected": -15.4375, "step": 930 }, { "epoch": 0.34674115456238364, "grad_norm": 11.5625, "learning_rate": 1.4866361525259454e-06, "logits/chosen": -0.08935546875, "logits/rejected": -0.3046875, "logps/chosen": -0.2216796875, "logps/rejected": -1.796875, "loss": 0.0275, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 7.875, "rewards/rejected": -8.9375, "step": 931 }, { "epoch": 0.34711359404096837, "grad_norm": 0.002105712890625, "learning_rate": 1.4857481335800937e-06, "logits/chosen": -0.029296875, "logits/rejected": 0.047119140625, "logps/chosen": -0.21875, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.09375, "rewards/margins": 14.6875, "rewards/rejected": -15.8125, "step": 932 }, { "epoch": 0.3474860335195531, "grad_norm": 0.98046875, "learning_rate": 1.4848591242203178e-06, "logits/chosen": -0.0198974609375, "logits/rejected": 0.439453125, "logps/chosen": -0.1669921875, "logps/rejected": -2.265625, "loss": 0.002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8359375, "rewards/margins": 10.4375, "rewards/rejected": -11.3125, "step": 933 }, { "epoch": 0.3478584729981378, "grad_norm": 0.02734375, "learning_rate": 1.4839691259498007e-06, "logits/chosen": -0.40234375, "logits/rejected": -0.33984375, "logps/chosen": -0.404296875, "logps/rejected": -2.65625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.015625, "rewards/margins": 11.1875, "rewards/rejected": -13.25, "step": 934 }, { "epoch": 0.34823091247672255, "grad_norm": 5.46875, "learning_rate": 1.4830781402733989e-06, "logits/chosen": -0.0712890625, "logits/rejected": -0.06103515625, "logps/chosen": -1.109375, "logps/rejected": -3.125, "loss": 0.0061, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.53125, "rewards/margins": 10.125, "rewards/rejected": -15.625, "step": 935 }, { "epoch": 0.3486033519553073, "grad_norm": 0.0341796875, "learning_rate": 1.4821861686976386e-06, "logits/chosen": 0.029052734375, "logits/rejected": -0.05224609375, "logps/chosen": -0.3515625, "logps/rejected": -2.65625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7578125, "rewards/margins": 11.5, "rewards/rejected": -13.25, "step": 936 }, { "epoch": 0.348975791433892, "grad_norm": 0.0125732421875, "learning_rate": 1.4812932127307117e-06, "logits/chosen": -0.058349609375, "logits/rejected": 0.10693359375, "logps/chosen": -0.8046875, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.03125, "rewards/margins": 14.5, "rewards/rejected": -18.5, "step": 937 }, { "epoch": 0.34934823091247674, "grad_norm": 0.080078125, "learning_rate": 1.4803992738824758e-06, "logits/chosen": 0.0308837890625, "logits/rejected": -0.765625, "logps/chosen": -0.439453125, "logps/rejected": -2.53125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.203125, "rewards/margins": 10.4375, "rewards/rejected": -12.625, "step": 938 }, { "epoch": 0.34972067039106147, "grad_norm": 0.06494140625, "learning_rate": 1.47950435366445e-06, "logits/chosen": 0.1455078125, "logits/rejected": -0.328125, "logps/chosen": -0.201171875, "logps/rejected": -2.6875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0, "rewards/margins": 12.375, "rewards/rejected": -13.375, "step": 939 }, { "epoch": 0.3500931098696462, "grad_norm": 0.2490234375, "learning_rate": 1.4786084535898127e-06, "logits/chosen": -0.01177978515625, "logits/rejected": -0.515625, "logps/chosen": -0.7890625, "logps/rejected": -3.125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.953125, "rewards/margins": 11.625, "rewards/rejected": -15.625, "step": 940 }, { "epoch": 0.35046554934823093, "grad_norm": 0.0001506805419921875, "learning_rate": 1.4777115751733989e-06, "logits/chosen": 0.036376953125, "logits/rejected": 0.11962890625, "logps/chosen": -0.4609375, "logps/rejected": -3.703125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.296875, "rewards/margins": 16.25, "rewards/rejected": -18.5, "step": 941 }, { "epoch": 0.35083798882681566, "grad_norm": 0.00066375732421875, "learning_rate": 1.4768137199316984e-06, "logits/chosen": -0.049072265625, "logits/rejected": 0.0225830078125, "logps/chosen": -0.2734375, "logps/rejected": -3.328125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3671875, "rewards/margins": 15.3125, "rewards/rejected": -16.625, "step": 942 }, { "epoch": 0.3512104283054004, "grad_norm": 13.3125, "learning_rate": 1.4759148893828524e-06, "logits/chosen": -0.080078125, "logits/rejected": 0.041748046875, "logps/chosen": -0.1201171875, "logps/rejected": -2.28125, "loss": 0.0432, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6015625, "rewards/margins": 10.75, "rewards/rejected": -11.375, "step": 943 }, { "epoch": 0.3515828677839851, "grad_norm": 0.0025482177734375, "learning_rate": 1.4750150850466508e-06, "logits/chosen": 0.07373046875, "logits/rejected": 0.0830078125, "logps/chosen": -0.328125, "logps/rejected": -3.140625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 14.0, "rewards/rejected": -15.6875, "step": 944 }, { "epoch": 0.35195530726256985, "grad_norm": 0.0028533935546875, "learning_rate": 1.4741143084445303e-06, "logits/chosen": -0.002716064453125, "logits/rejected": 0.0693359375, "logps/chosen": -0.5234375, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 13.5, "rewards/rejected": -16.125, "step": 945 }, { "epoch": 0.3523277467411546, "grad_norm": 0.00051116943359375, "learning_rate": 1.4732125610995718e-06, "logits/chosen": 0.0164794921875, "logits/rejected": -0.09619140625, "logps/chosen": -0.248046875, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2421875, "rewards/margins": 15.0, "rewards/rejected": -16.25, "step": 946 }, { "epoch": 0.3527001862197393, "grad_norm": 19.0, "learning_rate": 1.4723098445364975e-06, "logits/chosen": 0.04248046875, "logits/rejected": -0.81640625, "logps/chosen": -0.6328125, "logps/rejected": -2.5, "loss": 0.0287, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 9.375, "rewards/rejected": -12.5, "step": 947 }, { "epoch": 0.35307262569832404, "grad_norm": 0.578125, "learning_rate": 1.4714061602816682e-06, "logits/chosen": 0.0108642578125, "logits/rejected": -0.1474609375, "logps/chosen": -0.53125, "logps/rejected": -2.84375, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.65625, "rewards/margins": 11.5, "rewards/rejected": -14.1875, "step": 948 }, { "epoch": 0.35344506517690877, "grad_norm": 0.00701904296875, "learning_rate": 1.4705015098630813e-06, "logits/chosen": 0.048583984375, "logits/rejected": 0.345703125, "logps/chosen": -0.361328125, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8125, "rewards/margins": 12.375, "rewards/rejected": -14.25, "step": 949 }, { "epoch": 0.3538175046554935, "grad_norm": 5.28125, "learning_rate": 1.469595894810367e-06, "logits/chosen": -0.0693359375, "logits/rejected": -0.66015625, "logps/chosen": -0.5546875, "logps/rejected": -2.203125, "loss": 0.0075, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.78125, "rewards/margins": 8.25, "rewards/rejected": -11.0, "step": 950 }, { "epoch": 0.3541899441340782, "grad_norm": 0.031494140625, "learning_rate": 1.4686893166547876e-06, "logits/chosen": -0.0286865234375, "logits/rejected": -0.515625, "logps/chosen": -0.3203125, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 13.8125, "rewards/rejected": -15.375, "step": 951 }, { "epoch": 0.35456238361266296, "grad_norm": 11.1875, "learning_rate": 1.4677817769292336e-06, "logits/chosen": 0.07080078125, "logits/rejected": -0.15234375, "logps/chosen": -0.58984375, "logps/rejected": -1.7109375, "loss": 0.0236, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9375, "rewards/margins": 5.59375, "rewards/rejected": -8.5625, "step": 952 }, { "epoch": 0.3549348230912477, "grad_norm": 0.515625, "learning_rate": 1.4668732771682204e-06, "logits/chosen": 0.1630859375, "logits/rejected": 0.5390625, "logps/chosen": -0.09130859375, "logps/rejected": -2.328125, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.45703125, "rewards/margins": 11.1875, "rewards/rejected": -11.625, "step": 953 }, { "epoch": 0.3553072625698324, "grad_norm": 0.000881195068359375, "learning_rate": 1.4659638189078879e-06, "logits/chosen": 0.1416015625, "logits/rejected": -0.0220947265625, "logps/chosen": -0.4765625, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 14.625, "rewards/rejected": -17.0, "step": 954 }, { "epoch": 0.35567970204841715, "grad_norm": 6.389617919921875e-05, "learning_rate": 1.4650534036859965e-06, "logits/chosen": 0.1513671875, "logits/rejected": 0.169921875, "logps/chosen": -0.21484375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.078125, "rewards/margins": 17.25, "rewards/rejected": -18.25, "step": 955 }, { "epoch": 0.3560521415270019, "grad_norm": 0.0003032684326171875, "learning_rate": 1.4641420330419237e-06, "logits/chosen": 0.024658203125, "logits/rejected": 0.1591796875, "logps/chosen": -0.341796875, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.703125, "rewards/margins": 15.8125, "rewards/rejected": -17.5, "step": 956 }, { "epoch": 0.3564245810055866, "grad_norm": 0.12890625, "learning_rate": 1.4632297085166636e-06, "logits/chosen": 0.0439453125, "logits/rejected": -0.45703125, "logps/chosen": -0.6328125, "logps/rejected": -2.625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 9.9375, "rewards/rejected": -13.125, "step": 957 }, { "epoch": 0.35679702048417133, "grad_norm": 0.06494140625, "learning_rate": 1.4623164316528233e-06, "logits/chosen": -0.05908203125, "logits/rejected": -0.486328125, "logps/chosen": -0.154296875, "logps/rejected": -2.65625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.76953125, "rewards/margins": 12.4375, "rewards/rejected": -13.25, "step": 958 }, { "epoch": 0.35716945996275606, "grad_norm": 0.000457763671875, "learning_rate": 1.4614022039946186e-06, "logits/chosen": 0.03662109375, "logits/rejected": 0.031005859375, "logps/chosen": -0.234375, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.171875, "rewards/margins": 16.25, "rewards/rejected": -17.5, "step": 959 }, { "epoch": 0.3575418994413408, "grad_norm": 9.0625, "learning_rate": 1.4604870270878747e-06, "logits/chosen": -0.06103515625, "logits/rejected": -0.287109375, "logps/chosen": -0.474609375, "logps/rejected": -2.21875, "loss": 0.025, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 8.75, "rewards/rejected": -11.125, "step": 960 }, { "epoch": 0.3579143389199255, "grad_norm": 0.00180816650390625, "learning_rate": 1.4595709024800206e-06, "logits/chosen": 0.03173828125, "logits/rejected": -0.10791015625, "logps/chosen": -0.337890625, "logps/rejected": -3.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875, "rewards/margins": 13.5, "rewards/rejected": -15.1875, "step": 961 }, { "epoch": 0.35828677839851025, "grad_norm": 0.01483154296875, "learning_rate": 1.4586538317200888e-06, "logits/chosen": 0.0245361328125, "logits/rejected": 0.279296875, "logps/chosen": -0.376953125, "logps/rejected": -3.015625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8828125, "rewards/margins": 13.25, "rewards/rejected": -15.125, "step": 962 }, { "epoch": 0.358659217877095, "grad_norm": 0.07275390625, "learning_rate": 1.457735816358711e-06, "logits/chosen": -0.1650390625, "logits/rejected": -0.380859375, "logps/chosen": -0.337890625, "logps/rejected": -2.75, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875, "rewards/margins": 12.0, "rewards/rejected": -13.75, "step": 963 }, { "epoch": 0.3590316573556797, "grad_norm": 0.0012664794921875, "learning_rate": 1.4568168579481151e-06, "logits/chosen": -0.138671875, "logits/rejected": 0.2177734375, "logps/chosen": -0.244140625, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.21875, "rewards/margins": 14.6875, "rewards/rejected": -15.9375, "step": 964 }, { "epoch": 0.35940409683426444, "grad_norm": 0.000640869140625, "learning_rate": 1.4558969580421262e-06, "logits/chosen": 0.052978515625, "logits/rejected": 0.130859375, "logps/chosen": -0.220703125, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 14.625, "rewards/rejected": -15.75, "step": 965 }, { "epoch": 0.35977653631284917, "grad_norm": 0.00225830078125, "learning_rate": 1.4549761181961585e-06, "logits/chosen": 0.04150390625, "logits/rejected": 0.042236328125, "logps/chosen": -0.7734375, "logps/rejected": -3.578125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.875, "rewards/margins": 14.0, "rewards/rejected": -17.875, "step": 966 }, { "epoch": 0.3601489757914339, "grad_norm": 0.0005950927734375, "learning_rate": 1.454054339967217e-06, "logits/chosen": 0.146484375, "logits/rejected": -0.08740234375, "logps/chosen": -0.193359375, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.96484375, "rewards/margins": 14.8125, "rewards/rejected": -15.75, "step": 967 }, { "epoch": 0.36052141527001863, "grad_norm": 0.79296875, "learning_rate": 1.4531316249138936e-06, "logits/chosen": -0.00107574462890625, "logits/rejected": -0.232421875, "logps/chosen": -0.2578125, "logps/rejected": -2.453125, "loss": 0.0017, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.296875, "rewards/margins": 10.9375, "rewards/rejected": -12.25, "step": 968 }, { "epoch": 0.36089385474860336, "grad_norm": 0.001007080078125, "learning_rate": 1.452207974596363e-06, "logits/chosen": 0.134765625, "logits/rejected": 0.2265625, "logps/chosen": -0.296875, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 15.625, "rewards/rejected": -17.125, "step": 969 }, { "epoch": 0.3612662942271881, "grad_norm": 0.000675201416015625, "learning_rate": 1.4512833905763825e-06, "logits/chosen": 0.0732421875, "logits/rejected": 0.228515625, "logps/chosen": -0.14453125, "logps/rejected": -3.171875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.71875, "rewards/margins": 15.125, "rewards/rejected": -15.875, "step": 970 }, { "epoch": 0.3616387337057728, "grad_norm": 0.1455078125, "learning_rate": 1.4503578744172874e-06, "logits/chosen": -0.296875, "logits/rejected": -0.79296875, "logps/chosen": -0.5859375, "logps/rejected": -2.5, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9375, "rewards/margins": 9.625, "rewards/rejected": -12.5625, "step": 971 }, { "epoch": 0.36201117318435755, "grad_norm": 1.671875, "learning_rate": 1.4494314276839893e-06, "logits/chosen": -0.0380859375, "logits/rejected": -0.66796875, "logps/chosen": -0.57421875, "logps/rejected": -2.53125, "loss": 0.002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 9.875, "rewards/rejected": -12.75, "step": 972 }, { "epoch": 0.3623836126629423, "grad_norm": 0.000263214111328125, "learning_rate": 1.4485040519429738e-06, "logits/chosen": 0.091796875, "logits/rejected": 0.11181640625, "logps/chosen": -0.16796875, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8359375, "rewards/margins": 16.25, "rewards/rejected": -17.0, "step": 973 }, { "epoch": 0.362756052141527, "grad_norm": 0.0078125, "learning_rate": 1.4475757487622962e-06, "logits/chosen": -0.01123046875, "logits/rejected": -0.0147705078125, "logps/chosen": -0.458984375, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.296875, "rewards/margins": 14.875, "rewards/rejected": -17.125, "step": 974 }, { "epoch": 0.36312849162011174, "grad_norm": 38.5, "learning_rate": 1.446646519711581e-06, "logits/chosen": 0.09033203125, "logits/rejected": 0.0810546875, "logps/chosen": -0.37109375, "logps/rejected": -2.1875, "loss": 0.0571, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.859375, "rewards/margins": 9.125, "rewards/rejected": -11.0, "step": 975 }, { "epoch": 0.36350093109869647, "grad_norm": 0.00170135498046875, "learning_rate": 1.4457163663620181e-06, "logits/chosen": 0.01318359375, "logits/rejected": 0.0546875, "logps/chosen": -0.267578125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 16.0, "rewards/rejected": -17.25, "step": 976 }, { "epoch": 0.3638733705772812, "grad_norm": 0.015380859375, "learning_rate": 1.4447852902863595e-06, "logits/chosen": -0.047607421875, "logits/rejected": 0.263671875, "logps/chosen": -0.33984375, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.703125, "rewards/margins": 12.625, "rewards/rejected": -14.375, "step": 977 }, { "epoch": 0.3642458100558659, "grad_norm": 0.10302734375, "learning_rate": 1.443853293058918e-06, "logits/chosen": 0.04296875, "logits/rejected": -0.435546875, "logps/chosen": -0.34375, "logps/rejected": -2.78125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.71875, "rewards/margins": 12.1875, "rewards/rejected": -13.875, "step": 978 }, { "epoch": 0.36461824953445066, "grad_norm": 0.004302978515625, "learning_rate": 1.4429203762555638e-06, "logits/chosen": -0.006927490234375, "logits/rejected": -0.34375, "logps/chosen": -0.1572265625, "logps/rejected": -3.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.78515625, "rewards/margins": 14.25, "rewards/rejected": -15.125, "step": 979 }, { "epoch": 0.3649906890130354, "grad_norm": 18.125, "learning_rate": 1.441986541453722e-06, "logits/chosen": -0.00872802734375, "logits/rejected": -0.52734375, "logps/chosen": -0.5703125, "logps/rejected": -2.71875, "loss": 0.0236, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 10.8125, "rewards/rejected": -13.625, "step": 980 }, { "epoch": 0.3653631284916201, "grad_norm": 1.3984375, "learning_rate": 1.4410517902323702e-06, "logits/chosen": -0.01397705078125, "logits/rejected": 0.1845703125, "logps/chosen": -0.7421875, "logps/rejected": -2.65625, "loss": 0.002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.703125, "rewards/margins": 9.5, "rewards/rejected": -13.25, "step": 981 }, { "epoch": 0.36573556797020484, "grad_norm": 0.00201416015625, "learning_rate": 1.4401161241720345e-06, "logits/chosen": -0.016845703125, "logits/rejected": 0.107421875, "logps/chosen": -0.455078125, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.28125, "rewards/margins": 13.5, "rewards/rejected": -15.75, "step": 982 }, { "epoch": 0.3661080074487896, "grad_norm": 0.00186920166015625, "learning_rate": 1.4391795448547895e-06, "logits/chosen": 0.02978515625, "logits/rejected": -0.38671875, "logps/chosen": -0.1337890625, "logps/rejected": -3.078125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.671875, "rewards/margins": 14.75, "rewards/rejected": -15.375, "step": 983 }, { "epoch": 0.3664804469273743, "grad_norm": 3.15625, "learning_rate": 1.4382420538642519e-06, "logits/chosen": 0.11474609375, "logits/rejected": 0.62890625, "logps/chosen": -0.25390625, "logps/rejected": -2.453125, "loss": 0.0069, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 11.0625, "rewards/rejected": -12.3125, "step": 984 }, { "epoch": 0.36685288640595903, "grad_norm": 6.0558319091796875e-05, "learning_rate": 1.437303652785582e-06, "logits/chosen": -0.12451171875, "logits/rejected": 0.2373046875, "logps/chosen": -0.35546875, "logps/rejected": -3.828125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7734375, "rewards/margins": 17.25, "rewards/rejected": -19.125, "step": 985 }, { "epoch": 0.36722532588454376, "grad_norm": 0.0003299713134765625, "learning_rate": 1.4363643432054778e-06, "logits/chosen": 0.11474609375, "logits/rejected": 0.1611328125, "logps/chosen": -0.361328125, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 16.125, "rewards/rejected": -18.0, "step": 986 }, { "epoch": 0.3675977653631285, "grad_norm": 0.23828125, "learning_rate": 1.4354241267121731e-06, "logits/chosen": -0.2119140625, "logits/rejected": 0.035888671875, "logps/chosen": -0.60546875, "logps/rejected": -3.09375, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.015625, "rewards/margins": 12.375, "rewards/rejected": -15.4375, "step": 987 }, { "epoch": 0.3679702048417132, "grad_norm": 0.0007781982421875, "learning_rate": 1.4344830048954364e-06, "logits/chosen": 0.0021820068359375, "logits/rejected": 0.1396484375, "logps/chosen": -0.158203125, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7890625, "rewards/margins": 14.625, "rewards/rejected": -15.4375, "step": 988 }, { "epoch": 0.36834264432029795, "grad_norm": 0.03369140625, "learning_rate": 1.4335409793465655e-06, "logits/chosen": 0.004669189453125, "logits/rejected": -0.271484375, "logps/chosen": -0.4609375, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.296875, "rewards/margins": 12.5625, "rewards/rejected": -14.875, "step": 989 }, { "epoch": 0.3687150837988827, "grad_norm": 0.05908203125, "learning_rate": 1.4325980516583874e-06, "logits/chosen": -0.0888671875, "logits/rejected": 0.1845703125, "logps/chosen": -0.28125, "logps/rejected": -3.125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.40625, "rewards/margins": 14.25, "rewards/rejected": -15.625, "step": 990 }, { "epoch": 0.3690875232774674, "grad_norm": 0.412109375, "learning_rate": 1.4316542234252544e-06, "logits/chosen": 0.058837890625, "logits/rejected": 0.1396484375, "logps/chosen": -0.98046875, "logps/rejected": -3.15625, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.90625, "rewards/margins": 10.875, "rewards/rejected": -15.75, "step": 991 }, { "epoch": 0.36945996275605214, "grad_norm": 0.00274658203125, "learning_rate": 1.4307094962430407e-06, "logits/chosen": -0.054931640625, "logits/rejected": -0.04833984375, "logps/chosen": -0.265625, "logps/rejected": -2.953125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 13.5, "rewards/rejected": -14.75, "step": 992 }, { "epoch": 0.36983240223463687, "grad_norm": 0.130859375, "learning_rate": 1.4297638717091415e-06, "logits/chosen": 0.1318359375, "logits/rejected": 0.146484375, "logps/chosen": -0.40625, "logps/rejected": -2.53125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 10.625, "rewards/rejected": -12.625, "step": 993 }, { "epoch": 0.3702048417132216, "grad_norm": 0.000263214111328125, "learning_rate": 1.4288173514224678e-06, "logits/chosen": 0.03173828125, "logits/rejected": 0.212890625, "logps/chosen": -0.2578125, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.28125, "rewards/margins": 16.75, "rewards/rejected": -18.0, "step": 994 }, { "epoch": 0.37057728119180633, "grad_norm": 0.73046875, "learning_rate": 1.4278699369834473e-06, "logits/chosen": -0.053466796875, "logits/rejected": 0.078125, "logps/chosen": -0.40234375, "logps/rejected": -2.46875, "loss": 0.0011, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.015625, "rewards/margins": 10.375, "rewards/rejected": -12.375, "step": 995 }, { "epoch": 0.37094972067039106, "grad_norm": 0.00201416015625, "learning_rate": 1.426921629994018e-06, "logits/chosen": 0.035888671875, "logits/rejected": 0.023681640625, "logps/chosen": -0.65625, "logps/rejected": -3.578125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.296875, "rewards/margins": 14.5625, "rewards/rejected": -17.875, "step": 996 }, { "epoch": 0.3713221601489758, "grad_norm": 0.00086212158203125, "learning_rate": 1.4259724320576274e-06, "logits/chosen": 0.0654296875, "logits/rejected": 0.2470703125, "logps/chosen": -0.197265625, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.984375, "rewards/margins": 14.5, "rewards/rejected": -15.4375, "step": 997 }, { "epoch": 0.3716945996275605, "grad_norm": 0.0010986328125, "learning_rate": 1.4250223447792295e-06, "logits/chosen": 0.11328125, "logits/rejected": 0.1298828125, "logps/chosen": -0.193359375, "logps/rejected": -3.484375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.96875, "rewards/margins": 16.5, "rewards/rejected": -17.375, "step": 998 }, { "epoch": 0.37206703910614525, "grad_norm": 0.0001163482666015625, "learning_rate": 1.4240713697652825e-06, "logits/chosen": -0.1259765625, "logits/rejected": -0.017333984375, "logps/chosen": -0.2470703125, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.234375, "rewards/margins": 16.25, "rewards/rejected": -17.5, "step": 999 }, { "epoch": 0.37243947858473, "grad_norm": 5.078315734863281e-05, "learning_rate": 1.423119508623745e-06, "logits/chosen": 0.1708984375, "logits/rejected": 0.28125, "logps/chosen": -0.099609375, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5, "rewards/margins": 17.25, "rewards/rejected": -17.75, "step": 1000 }, { "epoch": 0.3728119180633147, "grad_norm": 0.049072265625, "learning_rate": 1.422166762964074e-06, "logits/chosen": -0.1826171875, "logits/rejected": -1.1328125, "logps/chosen": -0.3046875, "logps/rejected": -2.34375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5234375, "rewards/margins": 10.1875, "rewards/rejected": -11.75, "step": 1001 }, { "epoch": 0.37318435754189944, "grad_norm": 0.0011444091796875, "learning_rate": 1.4212131343972225e-06, "logits/chosen": 0.15625, "logits/rejected": 0.1455078125, "logps/chosen": -0.5859375, "logps/rejected": -3.765625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9375, "rewards/margins": 15.875, "rewards/rejected": -18.875, "step": 1002 }, { "epoch": 0.37355679702048417, "grad_norm": 0.0023956298828125, "learning_rate": 1.4202586245356361e-06, "logits/chosen": 0.1279296875, "logits/rejected": 0.10302734375, "logps/chosen": -0.2109375, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0546875, "rewards/margins": 15.125, "rewards/rejected": -16.125, "step": 1003 }, { "epoch": 0.3739292364990689, "grad_norm": 0.000598907470703125, "learning_rate": 1.4193032349932507e-06, "logits/chosen": -0.0026397705078125, "logits/rejected": 0.1767578125, "logps/chosen": -0.0810546875, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.40625, "rewards/margins": 14.875, "rewards/rejected": -15.25, "step": 1004 }, { "epoch": 0.3743016759776536, "grad_norm": 0.005218505859375, "learning_rate": 1.4183469673854894e-06, "logits/chosen": -0.0732421875, "logits/rejected": 0.1767578125, "logps/chosen": -0.58984375, "logps/rejected": -3.515625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.953125, "rewards/margins": 14.625, "rewards/rejected": -17.5, "step": 1005 }, { "epoch": 0.37467411545623835, "grad_norm": 25.75, "learning_rate": 1.4173898233292595e-06, "logits/chosen": 0.1142578125, "logits/rejected": -0.0859375, "logps/chosen": -0.9921875, "logps/rejected": -2.15625, "loss": 0.0723, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.96875, "rewards/margins": 5.875, "rewards/rejected": -10.8125, "step": 1006 }, { "epoch": 0.3750465549348231, "grad_norm": 26.25, "learning_rate": 1.4164318044429515e-06, "logits/chosen": -0.2021484375, "logits/rejected": 0.047607421875, "logps/chosen": -0.50390625, "logps/rejected": -2.34375, "loss": 0.033, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.515625, "rewards/margins": 9.25, "rewards/rejected": -11.75, "step": 1007 }, { "epoch": 0.3754189944134078, "grad_norm": 0.007080078125, "learning_rate": 1.415472912346434e-06, "logits/chosen": -0.2216796875, "logits/rejected": 0.208984375, "logps/chosen": -0.16796875, "logps/rejected": -2.984375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.84375, "rewards/margins": 14.0, "rewards/rejected": -14.875, "step": 1008 }, { "epoch": 0.37579143389199254, "grad_norm": 5.0625, "learning_rate": 1.4145131486610527e-06, "logits/chosen": -0.0162353515625, "logits/rejected": -0.4140625, "logps/chosen": -0.2021484375, "logps/rejected": -2.015625, "loss": 0.0126, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.015625, "rewards/margins": 9.0625, "rewards/rejected": -10.0625, "step": 1009 }, { "epoch": 0.3761638733705773, "grad_norm": 0.265625, "learning_rate": 1.4135525150096267e-06, "logits/chosen": -0.1484375, "logits/rejected": 0.19140625, "logps/chosen": -0.328125, "logps/rejected": -2.53125, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 11.0, "rewards/rejected": -12.625, "step": 1010 }, { "epoch": 0.376536312849162, "grad_norm": 9.1552734375e-05, "learning_rate": 1.4125910130164464e-06, "logits/chosen": 0.053466796875, "logits/rejected": 0.234375, "logps/chosen": -0.337890625, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875, "rewards/margins": 16.75, "rewards/rejected": -18.375, "step": 1011 }, { "epoch": 0.37690875232774673, "grad_norm": 0.1884765625, "learning_rate": 1.41162864430727e-06, "logits/chosen": 0.0225830078125, "logits/rejected": -0.64453125, "logps/chosen": -0.9375, "logps/rejected": -2.78125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.6875, "rewards/margins": 9.1875, "rewards/rejected": -13.875, "step": 1012 }, { "epoch": 0.37728119180633146, "grad_norm": 0.00384521484375, "learning_rate": 1.4106654105093211e-06, "logits/chosen": -0.255859375, "logits/rejected": -0.07958984375, "logps/chosen": -0.6875, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4375, "rewards/margins": 12.875, "rewards/rejected": -16.375, "step": 1013 }, { "epoch": 0.3776536312849162, "grad_norm": 0.0002956390380859375, "learning_rate": 1.409701313251287e-06, "logits/chosen": 0.2060546875, "logits/rejected": 0.302734375, "logps/chosen": -0.6015625, "logps/rejected": -4.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.015625, "rewards/margins": 17.25, "rewards/rejected": -20.25, "step": 1014 }, { "epoch": 0.3780260707635009, "grad_norm": 0.0067138671875, "learning_rate": 1.4087363541633138e-06, "logits/chosen": -0.10009765625, "logits/rejected": 0.068359375, "logps/chosen": -0.98046875, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.90625, "rewards/margins": 14.3125, "rewards/rejected": -19.25, "step": 1015 }, { "epoch": 0.37839851024208565, "grad_norm": 0.000629425048828125, "learning_rate": 1.4077705348770058e-06, "logits/chosen": 0.05859375, "logits/rejected": 0.3359375, "logps/chosen": -0.232421875, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.15625, "rewards/margins": 15.0, "rewards/rejected": -16.125, "step": 1016 }, { "epoch": 0.3787709497206704, "grad_norm": 0.0025787353515625, "learning_rate": 1.406803857025421e-06, "logits/chosen": -0.0167236328125, "logits/rejected": 0.283203125, "logps/chosen": -0.46484375, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.328125, "rewards/margins": 13.4375, "rewards/rejected": -15.75, "step": 1017 }, { "epoch": 0.3791433891992551, "grad_norm": 0.0400390625, "learning_rate": 1.4058363222430694e-06, "logits/chosen": -0.02783203125, "logits/rejected": 0.251953125, "logps/chosen": -0.494140625, "logps/rejected": -2.75, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 11.3125, "rewards/rejected": -13.8125, "step": 1018 }, { "epoch": 0.37951582867783984, "grad_norm": 1.329183578491211e-05, "learning_rate": 1.4048679321659103e-06, "logits/chosen": 0.10595703125, "logits/rejected": 0.14453125, "logps/chosen": -0.1279296875, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.640625, "rewards/margins": 18.625, "rewards/rejected": -19.25, "step": 1019 }, { "epoch": 0.37988826815642457, "grad_norm": 0.0003528594970703125, "learning_rate": 1.403898688431349e-06, "logits/chosen": 0.11572265625, "logits/rejected": 0.1474609375, "logps/chosen": -0.359375, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 15.1875, "rewards/rejected": -17.0, "step": 1020 }, { "epoch": 0.3802607076350093, "grad_norm": 0.0002117156982421875, "learning_rate": 1.4029285926782334e-06, "logits/chosen": -0.06005859375, "logits/rejected": 0.1005859375, "logps/chosen": -0.12109375, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.609375, "rewards/margins": 15.875, "rewards/rejected": -16.5, "step": 1021 }, { "epoch": 0.38063314711359403, "grad_norm": 3.743171691894531e-05, "learning_rate": 1.4019576465468531e-06, "logits/chosen": 0.138671875, "logits/rejected": 0.05859375, "logps/chosen": -0.275390625, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.375, "rewards/margins": 17.5, "rewards/rejected": -18.875, "step": 1022 }, { "epoch": 0.38100558659217876, "grad_norm": 2.625, "learning_rate": 1.4009858516789353e-06, "logits/chosen": 0.12890625, "logits/rejected": 0.171875, "logps/chosen": -0.474609375, "logps/rejected": -2.625, "loss": 0.0057, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 10.75, "rewards/rejected": -13.125, "step": 1023 }, { "epoch": 0.3813780260707635, "grad_norm": 4.696846008300781e-05, "learning_rate": 1.400013209717642e-06, "logits/chosen": 0.10400390625, "logits/rejected": -0.039794921875, "logps/chosen": -0.251953125, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2578125, "rewards/margins": 17.5, "rewards/rejected": -18.75, "step": 1024 }, { "epoch": 0.3817504655493482, "grad_norm": 0.000759124755859375, "learning_rate": 1.3990397223075676e-06, "logits/chosen": -0.029296875, "logits/rejected": -0.1181640625, "logps/chosen": -0.42578125, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 15.625, "rewards/rejected": -17.75, "step": 1025 }, { "epoch": 0.38212290502793295, "grad_norm": 0.0020904541015625, "learning_rate": 1.3980653910947365e-06, "logits/chosen": -0.043701171875, "logits/rejected": 0.1552734375, "logps/chosen": -0.578125, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.90625, "rewards/margins": 14.25, "rewards/rejected": -17.25, "step": 1026 }, { "epoch": 0.3824953445065177, "grad_norm": 0.004791259765625, "learning_rate": 1.3970902177265985e-06, "logits/chosen": 0.002716064453125, "logits/rejected": 0.10302734375, "logps/chosen": -0.27734375, "logps/rejected": -3.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3828125, "rewards/margins": 13.75, "rewards/rejected": -15.125, "step": 1027 }, { "epoch": 0.3828677839851024, "grad_norm": 1.9375, "learning_rate": 1.3961142038520296e-06, "logits/chosen": 0.028076171875, "logits/rejected": 0.1630859375, "logps/chosen": -0.8671875, "logps/rejected": -2.96875, "loss": 0.0024, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.34375, "rewards/margins": 10.5, "rewards/rejected": -14.8125, "step": 1028 }, { "epoch": 0.38324022346368714, "grad_norm": 0.00151824951171875, "learning_rate": 1.3951373511213246e-06, "logits/chosen": 0.0615234375, "logits/rejected": 0.12109375, "logps/chosen": -0.291015625, "logps/rejected": -3.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.453125, "rewards/margins": 13.75, "rewards/rejected": -15.1875, "step": 1029 }, { "epoch": 0.38361266294227186, "grad_norm": 8.344650268554688e-05, "learning_rate": 1.3941596611861984e-06, "logits/chosen": 0.0223388671875, "logits/rejected": 0.279296875, "logps/chosen": -0.111328125, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5546875, "rewards/margins": 16.625, "rewards/rejected": -17.25, "step": 1030 }, { "epoch": 0.3839851024208566, "grad_norm": 0.58203125, "learning_rate": 1.3931811356997808e-06, "logits/chosen": -0.080078125, "logits/rejected": -0.39453125, "logps/chosen": -0.0908203125, "logps/rejected": -2.609375, "loss": 0.0011, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.453125, "rewards/margins": 12.625, "rewards/rejected": -13.0625, "step": 1031 }, { "epoch": 0.3843575418994413, "grad_norm": 0.189453125, "learning_rate": 1.392201776316614e-06, "logits/chosen": 0.14453125, "logits/rejected": -1.1171875, "logps/chosen": -0.3984375, "logps/rejected": -2.390625, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9921875, "rewards/margins": 9.9375, "rewards/rejected": -11.9375, "step": 1032 }, { "epoch": 0.38472998137802605, "grad_norm": 0.1669921875, "learning_rate": 1.391221584692651e-06, "logits/chosen": -0.2177734375, "logits/rejected": -0.28125, "logps/chosen": -0.2470703125, "logps/rejected": -2.796875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.234375, "rewards/margins": 12.75, "rewards/rejected": -14.0, "step": 1033 }, { "epoch": 0.3851024208566108, "grad_norm": 0.00144195556640625, "learning_rate": 1.3902405624852518e-06, "logits/chosen": 0.10791015625, "logits/rejected": 0.703125, "logps/chosen": -0.267578125, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 15.375, "rewards/rejected": -16.75, "step": 1034 }, { "epoch": 0.3854748603351955, "grad_norm": 5.91278076171875e-05, "learning_rate": 1.3892587113531802e-06, "logits/chosen": 0.1328125, "logits/rejected": 0.27734375, "logps/chosen": -0.431640625, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 17.25, "rewards/rejected": -19.5, "step": 1035 }, { "epoch": 0.38584729981378024, "grad_norm": 0.00031280517578125, "learning_rate": 1.3882760329566022e-06, "logits/chosen": 0.154296875, "logits/rejected": -0.029296875, "logps/chosen": -0.47265625, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.359375, "rewards/margins": 15.375, "rewards/rejected": -17.75, "step": 1036 }, { "epoch": 0.38621973929236497, "grad_norm": 0.000518798828125, "learning_rate": 1.3872925289570825e-06, "logits/chosen": -0.181640625, "logits/rejected": 0.203125, "logps/chosen": -0.171875, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.859375, "rewards/margins": 14.8125, "rewards/rejected": -15.625, "step": 1037 }, { "epoch": 0.3865921787709497, "grad_norm": 0.00147247314453125, "learning_rate": 1.3863082010175818e-06, "logits/chosen": -0.1865234375, "logits/rejected": 0.00787353515625, "logps/chosen": -0.26953125, "logps/rejected": -3.296875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3515625, "rewards/margins": 15.125, "rewards/rejected": -16.5, "step": 1038 }, { "epoch": 0.38696461824953443, "grad_norm": 0.00145721435546875, "learning_rate": 1.3853230508024535e-06, "logits/chosen": 0.130859375, "logits/rejected": 0.03955078125, "logps/chosen": -0.232421875, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.15625, "rewards/margins": 13.875, "rewards/rejected": -15.0625, "step": 1039 }, { "epoch": 0.38733705772811916, "grad_norm": 2.3125, "learning_rate": 1.3843370799774417e-06, "logits/chosen": 0.07177734375, "logits/rejected": 0.0693359375, "logps/chosen": -0.765625, "logps/rejected": -2.96875, "loss": 0.003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.828125, "rewards/margins": 11.0, "rewards/rejected": -14.8125, "step": 1040 }, { "epoch": 0.3877094972067039, "grad_norm": 0.00077056884765625, "learning_rate": 1.3833502902096784e-06, "logits/chosen": 0.0211181640625, "logits/rejected": 0.1396484375, "logps/chosen": -0.26171875, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3046875, "rewards/margins": 14.5, "rewards/rejected": -15.8125, "step": 1041 }, { "epoch": 0.3880819366852886, "grad_norm": 0.73828125, "learning_rate": 1.3823626831676796e-06, "logits/chosen": -0.1875, "logits/rejected": 0.34765625, "logps/chosen": -0.609375, "logps/rejected": -2.796875, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0625, "rewards/margins": 10.9375, "rewards/rejected": -14.0, "step": 1042 }, { "epoch": 0.38845437616387335, "grad_norm": 0.0002593994140625, "learning_rate": 1.3813742605213437e-06, "logits/chosen": 0.0869140625, "logits/rejected": -0.1142578125, "logps/chosen": -0.359375, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 16.25, "rewards/rejected": -18.0, "step": 1043 }, { "epoch": 0.3888268156424581, "grad_norm": 0.002532958984375, "learning_rate": 1.380385023941948e-06, "logits/chosen": 0.09033203125, "logits/rejected": 0.185546875, "logps/chosen": -0.404296875, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 13.375, "rewards/rejected": -15.375, "step": 1044 }, { "epoch": 0.3891992551210428, "grad_norm": 0.000560760498046875, "learning_rate": 1.379394975102146e-06, "logits/chosen": 0.10693359375, "logits/rejected": 0.40625, "logps/chosen": -0.265625, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 15.125, "rewards/rejected": -16.5, "step": 1045 }, { "epoch": 0.38957169459962754, "grad_norm": 0.0002040863037109375, "learning_rate": 1.3784041156759643e-06, "logits/chosen": 0.181640625, "logits/rejected": 0.3203125, "logps/chosen": -0.25, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 16.75, "rewards/rejected": -18.0, "step": 1046 }, { "epoch": 0.38994413407821227, "grad_norm": 0.08251953125, "learning_rate": 1.3774124473388008e-06, "logits/chosen": 0.003173828125, "logits/rejected": -0.56640625, "logps/chosen": -0.275390625, "logps/rejected": -2.8125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.375, "rewards/margins": 12.6875, "rewards/rejected": -14.0625, "step": 1047 }, { "epoch": 0.390316573556797, "grad_norm": 0.00028228759765625, "learning_rate": 1.376419971767421e-06, "logits/chosen": 0.1318359375, "logits/rejected": 0.166015625, "logps/chosen": -0.390625, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9609375, "rewards/margins": 16.0, "rewards/rejected": -18.0, "step": 1048 }, { "epoch": 0.3906890130353817, "grad_norm": 0.08642578125, "learning_rate": 1.3754266906399544e-06, "logits/chosen": 0.1318359375, "logits/rejected": 0.126953125, "logps/chosen": -0.91015625, "logps/rejected": -3.765625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.5625, "rewards/margins": 14.3125, "rewards/rejected": -18.875, "step": 1049 }, { "epoch": 0.39106145251396646, "grad_norm": 6.5625, "learning_rate": 1.3744326056358936e-06, "logits/chosen": 0.01214599609375, "logits/rejected": 0.330078125, "logps/chosen": -0.78515625, "logps/rejected": -2.609375, "loss": 0.0078, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.921875, "rewards/margins": 9.125, "rewards/rejected": -13.0, "step": 1050 }, { "epoch": 0.3914338919925512, "grad_norm": 0.0263671875, "learning_rate": 1.3734377184360903e-06, "logits/chosen": -0.040283203125, "logits/rejected": 0.1357421875, "logps/chosen": -0.9765625, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.875, "rewards/margins": 12.3125, "rewards/rejected": -17.25, "step": 1051 }, { "epoch": 0.3918063314711359, "grad_norm": 0.0013275146484375, "learning_rate": 1.3724420307227524e-06, "logits/chosen": 0.1865234375, "logits/rejected": -0.026611328125, "logps/chosen": -0.37890625, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.90625, "rewards/margins": 14.375, "rewards/rejected": -16.25, "step": 1052 }, { "epoch": 0.39217877094972065, "grad_norm": 0.0064697265625, "learning_rate": 1.3714455441794413e-06, "logits/chosen": -0.146484375, "logits/rejected": 0.126953125, "logps/chosen": -0.33203125, "logps/rejected": -2.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 13.0, "rewards/rejected": -14.75, "step": 1053 }, { "epoch": 0.3925512104283054, "grad_norm": 0.01220703125, "learning_rate": 1.3704482604910688e-06, "logits/chosen": -0.328125, "logits/rejected": -0.13671875, "logps/chosen": -1.03125, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.125, "rewards/margins": 11.625, "rewards/rejected": -16.75, "step": 1054 }, { "epoch": 0.3929236499068901, "grad_norm": 0.00019359588623046875, "learning_rate": 1.369450181343895e-06, "logits/chosen": 0.1640625, "logits/rejected": -0.07275390625, "logps/chosen": -0.064453125, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3203125, "rewards/margins": 15.8125, "rewards/rejected": -16.125, "step": 1055 }, { "epoch": 0.39329608938547483, "grad_norm": 0.017578125, "learning_rate": 1.3684513084255253e-06, "logits/chosen": 0.275390625, "logits/rejected": 0.166015625, "logps/chosen": -0.609375, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.03125, "rewards/margins": 13.625, "rewards/rejected": -16.75, "step": 1056 }, { "epoch": 0.39366852886405956, "grad_norm": 0.0086669921875, "learning_rate": 1.3674516434249065e-06, "logits/chosen": -0.1484375, "logits/rejected": -0.033203125, "logps/chosen": -0.427734375, "logps/rejected": -2.890625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.140625, "rewards/margins": 12.375, "rewards/rejected": -14.5, "step": 1057 }, { "epoch": 0.3940409683426443, "grad_norm": 0.419921875, "learning_rate": 1.366451188032325e-06, "logits/chosen": -0.02099609375, "logits/rejected": -0.041748046875, "logps/chosen": -0.31640625, "logps/rejected": -2.703125, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.578125, "rewards/margins": 11.9375, "rewards/rejected": -13.5, "step": 1058 }, { "epoch": 0.394413407821229, "grad_norm": 115.5, "learning_rate": 1.3654499439394043e-06, "logits/chosen": -0.1484375, "logits/rejected": 0.11328125, "logps/chosen": -1.078125, "logps/rejected": -2.25, "loss": 0.1982, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.40625, "rewards/margins": 5.8125, "rewards/rejected": -11.25, "step": 1059 }, { "epoch": 0.3947858472998138, "grad_norm": 182.0, "learning_rate": 1.3644479128391002e-06, "logits/chosen": 0.00927734375, "logits/rejected": 0.4609375, "logps/chosen": -0.474609375, "logps/rejected": -1.578125, "loss": 1.3203, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.375, "rewards/margins": 5.5, "rewards/rejected": -7.875, "step": 1060 }, { "epoch": 0.39515828677839854, "grad_norm": 0.32421875, "learning_rate": 1.3634450964257002e-06, "logits/chosen": 0.0289306640625, "logits/rejected": -0.58984375, "logps/chosen": -0.2421875, "logps/rejected": -2.0625, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.203125, "rewards/margins": 9.1875, "rewards/rejected": -10.375, "step": 1061 }, { "epoch": 0.39553072625698327, "grad_norm": 0.349609375, "learning_rate": 1.3624414963948193e-06, "logits/chosen": 0.03515625, "logits/rejected": 0.48828125, "logps/chosen": -0.28125, "logps/rejected": -2.625, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.40625, "rewards/margins": 11.75, "rewards/rejected": -13.125, "step": 1062 }, { "epoch": 0.395903165735568, "grad_norm": 11.5, "learning_rate": 1.3614371144433973e-06, "logits/chosen": -0.06494140625, "logits/rejected": 0.158203125, "logps/chosen": -0.404296875, "logps/rejected": -1.765625, "loss": 0.0292, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 6.8125, "rewards/rejected": -8.8125, "step": 1063 }, { "epoch": 0.3962756052141527, "grad_norm": 0.05712890625, "learning_rate": 1.3604319522696966e-06, "logits/chosen": 0.044921875, "logits/rejected": 0.0174560546875, "logps/chosen": -0.31640625, "logps/rejected": -3.03125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.578125, "rewards/margins": 13.625, "rewards/rejected": -15.125, "step": 1064 }, { "epoch": 0.39664804469273746, "grad_norm": 8.5, "learning_rate": 1.3594260115732984e-06, "logits/chosen": -0.1240234375, "logits/rejected": -0.035888671875, "logps/chosen": -0.64453125, "logps/rejected": -2.3125, "loss": 0.0093, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.21875, "rewards/margins": 8.3125, "rewards/rejected": -11.5, "step": 1065 }, { "epoch": 0.3970204841713222, "grad_norm": 0.005950927734375, "learning_rate": 1.3584192940551006e-06, "logits/chosen": 0.2236328125, "logits/rejected": 0.150390625, "logps/chosen": -0.380859375, "logps/rejected": -3.203125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.90625, "rewards/margins": 14.125, "rewards/rejected": -16.0, "step": 1066 }, { "epoch": 0.3973929236499069, "grad_norm": 0.0032196044921875, "learning_rate": 1.3574118014173143e-06, "logits/chosen": -0.1865234375, "logits/rejected": 0.08349609375, "logps/chosen": -0.453125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.265625, "rewards/margins": 15.0, "rewards/rejected": -17.25, "step": 1067 }, { "epoch": 0.39776536312849164, "grad_norm": 0.00032806396484375, "learning_rate": 1.3564035353634615e-06, "logits/chosen": 0.0947265625, "logits/rejected": 0.2412109375, "logps/chosen": -0.287109375, "logps/rejected": -3.390625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4375, "rewards/margins": 15.5, "rewards/rejected": -17.0, "step": 1068 }, { "epoch": 0.3981378026070764, "grad_norm": 120.5, "learning_rate": 1.3553944975983712e-06, "logits/chosen": -0.29296875, "logits/rejected": -0.1796875, "logps/chosen": -0.6640625, "logps/rejected": -2.234375, "loss": 0.3086, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.34375, "rewards/margins": 7.84375, "rewards/rejected": -11.1875, "step": 1069 }, { "epoch": 0.3985102420856611, "grad_norm": 0.00177001953125, "learning_rate": 1.3543846898281786e-06, "logits/chosen": 0.07958984375, "logits/rejected": 0.06591796875, "logps/chosen": -0.44140625, "logps/rejected": -3.265625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 14.125, "rewards/rejected": -16.375, "step": 1070 }, { "epoch": 0.39888268156424583, "grad_norm": 73.0, "learning_rate": 1.3533741137603197e-06, "logits/chosen": -0.1591796875, "logits/rejected": 0.361328125, "logps/chosen": -0.7421875, "logps/rejected": -2.71875, "loss": 0.1367, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.703125, "rewards/margins": 9.875, "rewards/rejected": -13.5, "step": 1071 }, { "epoch": 0.39925512104283056, "grad_norm": 0.040283203125, "learning_rate": 1.35236277110353e-06, "logits/chosen": 0.047119140625, "logits/rejected": 0.1123046875, "logps/chosen": -0.390625, "logps/rejected": -3.328125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9609375, "rewards/margins": 14.6875, "rewards/rejected": -16.625, "step": 1072 }, { "epoch": 0.3996275605214153, "grad_norm": 0.0179443359375, "learning_rate": 1.351350663567841e-06, "logits/chosen": 0.0164794921875, "logits/rejected": 0.24609375, "logps/chosen": -0.51953125, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.59375, "rewards/margins": 11.125, "rewards/rejected": -13.75, "step": 1073 }, { "epoch": 0.4, "grad_norm": 0.00048828125, "learning_rate": 1.350337792864578e-06, "logits/chosen": 0.1884765625, "logits/rejected": 0.365234375, "logps/chosen": -0.1435546875, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.71875, "rewards/margins": 15.25, "rewards/rejected": -15.9375, "step": 1074 }, { "epoch": 0.40037243947858475, "grad_norm": 0.173828125, "learning_rate": 1.3493241607063561e-06, "logits/chosen": -0.10205078125, "logits/rejected": 0.4765625, "logps/chosen": -0.40625, "logps/rejected": -3.015625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 13.0, "rewards/rejected": -15.0625, "step": 1075 }, { "epoch": 0.4007448789571695, "grad_norm": 0.00014495849609375, "learning_rate": 1.3483097688070783e-06, "logits/chosen": 0.267578125, "logits/rejected": 0.3203125, "logps/chosen": -0.40234375, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0, "rewards/margins": 16.125, "rewards/rejected": -18.125, "step": 1076 }, { "epoch": 0.4011173184357542, "grad_norm": 0.0002727508544921875, "learning_rate": 1.3472946188819321e-06, "logits/chosen": -0.13671875, "logits/rejected": 0.1435546875, "logps/chosen": -0.38671875, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9296875, "rewards/margins": 16.0, "rewards/rejected": -18.0, "step": 1077 }, { "epoch": 0.40148975791433894, "grad_norm": 0.1005859375, "learning_rate": 1.3462787126473864e-06, "logits/chosen": 0.003631591796875, "logits/rejected": 0.162109375, "logps/chosen": -0.97265625, "logps/rejected": -3.4375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.875, "rewards/margins": 12.375, "rewards/rejected": -17.25, "step": 1078 }, { "epoch": 0.40186219739292367, "grad_norm": 0.07861328125, "learning_rate": 1.3452620518211895e-06, "logits/chosen": -0.002777099609375, "logits/rejected": 0.3828125, "logps/chosen": -0.1962890625, "logps/rejected": -2.90625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.984375, "rewards/margins": 13.625, "rewards/rejected": -14.625, "step": 1079 }, { "epoch": 0.4022346368715084, "grad_norm": 0.00035858154296875, "learning_rate": 1.3442446381223653e-06, "logits/chosen": 0.0072021484375, "logits/rejected": 0.205078125, "logps/chosen": -0.28125, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.40625, "rewards/margins": 15.375, "rewards/rejected": -16.75, "step": 1080 }, { "epoch": 0.40260707635009313, "grad_norm": 1.6640625, "learning_rate": 1.3432264732712103e-06, "logits/chosen": -0.16796875, "logits/rejected": -0.22265625, "logps/chosen": -0.515625, "logps/rejected": -2.265625, "loss": 0.0033, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.578125, "rewards/margins": 8.75, "rewards/rejected": -11.3125, "step": 1081 }, { "epoch": 0.40297951582867786, "grad_norm": 1.296875, "learning_rate": 1.3422075589892918e-06, "logits/chosen": 0.294921875, "logits/rejected": -0.7578125, "logps/chosen": -0.64453125, "logps/rejected": -2.8125, "loss": 0.0017, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.234375, "rewards/margins": 10.875, "rewards/rejected": -14.125, "step": 1082 }, { "epoch": 0.4033519553072626, "grad_norm": 0.000865936279296875, "learning_rate": 1.3411878969994441e-06, "logits/chosen": -0.18359375, "logits/rejected": 0.1259765625, "logps/chosen": -0.25, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 14.375, "rewards/rejected": -15.625, "step": 1083 }, { "epoch": 0.4037243947858473, "grad_norm": 0.1171875, "learning_rate": 1.3401674890257653e-06, "logits/chosen": 0.1708984375, "logits/rejected": 0.7421875, "logps/chosen": -0.33984375, "logps/rejected": -3.015625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.703125, "rewards/margins": 13.375, "rewards/rejected": -15.0, "step": 1084 }, { "epoch": 0.40409683426443205, "grad_norm": 0.0003833770751953125, "learning_rate": 1.3391463367936154e-06, "logits/chosen": 0.2734375, "logits/rejected": 0.240234375, "logps/chosen": -0.25, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 15.1875, "rewards/rejected": -16.5, "step": 1085 }, { "epoch": 0.4044692737430168, "grad_norm": 0.00046539306640625, "learning_rate": 1.3381244420296126e-06, "logits/chosen": -0.006134033203125, "logits/rejected": 0.31640625, "logps/chosen": -0.5625, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8125, "rewards/margins": 15.5, "rewards/rejected": -18.25, "step": 1086 }, { "epoch": 0.4048417132216015, "grad_norm": 0.0003681182861328125, "learning_rate": 1.3371018064616305e-06, "logits/chosen": -0.1845703125, "logits/rejected": 0.251953125, "logps/chosen": -0.2734375, "logps/rejected": -3.296875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3671875, "rewards/margins": 15.125, "rewards/rejected": -16.5, "step": 1087 }, { "epoch": 0.40521415270018624, "grad_norm": 0.8984375, "learning_rate": 1.336078431818795e-06, "logits/chosen": -0.03515625, "logits/rejected": 0.10400390625, "logps/chosen": -0.1884765625, "logps/rejected": -2.890625, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.94140625, "rewards/margins": 13.5625, "rewards/rejected": -14.5, "step": 1088 }, { "epoch": 0.40558659217877097, "grad_norm": 4.887580871582031e-05, "learning_rate": 1.335054319831483e-06, "logits/chosen": 0.09521484375, "logits/rejected": 0.1533203125, "logps/chosen": -0.06640625, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.333984375, "rewards/margins": 18.125, "rewards/rejected": -18.5, "step": 1089 }, { "epoch": 0.4059590316573557, "grad_norm": 0.000637054443359375, "learning_rate": 1.3340294722313167e-06, "logits/chosen": 0.1279296875, "logits/rejected": 0.34375, "logps/chosen": -0.1943359375, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.97265625, "rewards/margins": 17.0, "rewards/rejected": -17.875, "step": 1090 }, { "epoch": 0.4063314711359404, "grad_norm": 0.000179290771484375, "learning_rate": 1.3330038907511625e-06, "logits/chosen": 0.00555419921875, "logits/rejected": 0.255859375, "logps/chosen": -0.30859375, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.546875, "rewards/margins": 16.625, "rewards/rejected": -18.25, "step": 1091 }, { "epoch": 0.40670391061452515, "grad_norm": 0.000873565673828125, "learning_rate": 1.3319775771251281e-06, "logits/chosen": -0.0108642578125, "logits/rejected": -0.126953125, "logps/chosen": -0.25, "logps/rejected": -3.234375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 15.0, "rewards/rejected": -16.25, "step": 1092 }, { "epoch": 0.4070763500931099, "grad_norm": 0.0004215240478515625, "learning_rate": 1.3309505330885586e-06, "logits/chosen": 0.2236328125, "logits/rejected": 0.2021484375, "logps/chosen": -0.1318359375, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.65625, "rewards/margins": 15.625, "rewards/rejected": -16.25, "step": 1093 }, { "epoch": 0.4074487895716946, "grad_norm": 0.030517578125, "learning_rate": 1.3299227603780343e-06, "logits/chosen": 0.036865234375, "logits/rejected": -0.197265625, "logps/chosen": -0.515625, "logps/rejected": -2.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.578125, "rewards/margins": 12.1875, "rewards/rejected": -14.75, "step": 1094 }, { "epoch": 0.40782122905027934, "grad_norm": 0.94921875, "learning_rate": 1.3288942607313683e-06, "logits/chosen": 0.1064453125, "logits/rejected": 0.00122833251953125, "logps/chosen": -0.5, "logps/rejected": -1.9296875, "loss": 0.0018, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5, "rewards/margins": 7.125, "rewards/rejected": -9.625, "step": 1095 }, { "epoch": 0.4081936685288641, "grad_norm": 0.000232696533203125, "learning_rate": 1.3278650358876017e-06, "logits/chosen": -0.033203125, "logits/rejected": 0.1083984375, "logps/chosen": -0.5390625, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6875, "rewards/margins": 15.625, "rewards/rejected": -18.25, "step": 1096 }, { "epoch": 0.4085661080074488, "grad_norm": 22.75, "learning_rate": 1.3268350875870022e-06, "logits/chosen": -0.0166015625, "logits/rejected": -0.09375, "logps/chosen": -0.318359375, "logps/rejected": -2.265625, "loss": 0.04, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5859375, "rewards/margins": 9.75, "rewards/rejected": -11.3125, "step": 1097 }, { "epoch": 0.40893854748603353, "grad_norm": 0.0146484375, "learning_rate": 1.325804417571061e-06, "logits/chosen": -0.04833984375, "logits/rejected": -0.07861328125, "logps/chosen": -1.109375, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.53125, "rewards/margins": 13.25, "rewards/rejected": -18.75, "step": 1098 }, { "epoch": 0.40931098696461826, "grad_norm": 7.009506225585938e-05, "learning_rate": 1.3247730275824896e-06, "logits/chosen": 0.04736328125, "logits/rejected": 0.26171875, "logps/chosen": -0.09228515625, "logps/rejected": -3.859375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4609375, "rewards/margins": 18.75, "rewards/rejected": -19.25, "step": 1099 }, { "epoch": 0.409683426443203, "grad_norm": 0.000499725341796875, "learning_rate": 1.3237409193652167e-06, "logits/chosen": 0.216796875, "logits/rejected": 0.21875, "logps/chosen": -0.205078125, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0234375, "rewards/margins": 15.875, "rewards/rejected": -16.875, "step": 1100 }, { "epoch": 0.4100558659217877, "grad_norm": 11.0, "learning_rate": 1.3227080946643853e-06, "logits/chosen": 0.029296875, "logits/rejected": 0.208984375, "logps/chosen": -0.71875, "logps/rejected": -2.625, "loss": 0.0173, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.59375, "rewards/margins": 9.625, "rewards/rejected": -13.1875, "step": 1101 }, { "epoch": 0.41042830540037245, "grad_norm": 0.00537109375, "learning_rate": 1.3216745552263502e-06, "logits/chosen": -0.0159912109375, "logits/rejected": 0.2734375, "logps/chosen": -0.578125, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.890625, "rewards/margins": 12.625, "rewards/rejected": -15.5, "step": 1102 }, { "epoch": 0.4108007448789572, "grad_norm": 0.0004673004150390625, "learning_rate": 1.3206403027986743e-06, "logits/chosen": 0.0947265625, "logits/rejected": 0.32421875, "logps/chosen": -0.09375, "logps/rejected": -3.078125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.466796875, "rewards/margins": 14.875, "rewards/rejected": -15.375, "step": 1103 }, { "epoch": 0.4111731843575419, "grad_norm": 0.043212890625, "learning_rate": 1.3196053391301267e-06, "logits/chosen": 0.0400390625, "logits/rejected": -0.296875, "logps/chosen": -0.150390625, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.75, "rewards/margins": 13.0, "rewards/rejected": -13.8125, "step": 1104 }, { "epoch": 0.41154562383612664, "grad_norm": 0.185546875, "learning_rate": 1.3185696659706787e-06, "logits/chosen": 0.10302734375, "logits/rejected": -0.67578125, "logps/chosen": -0.2421875, "logps/rejected": -2.4375, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2109375, "rewards/margins": 11.0, "rewards/rejected": -12.25, "step": 1105 }, { "epoch": 0.41191806331471137, "grad_norm": 0.037109375, "learning_rate": 1.3175332850715006e-06, "logits/chosen": 0.2158203125, "logits/rejected": 0.0225830078125, "logps/chosen": -0.87890625, "logps/rejected": -2.96875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.375, "rewards/margins": 10.4375, "rewards/rejected": -14.875, "step": 1106 }, { "epoch": 0.4122905027932961, "grad_norm": 0.000232696533203125, "learning_rate": 1.3164961981849606e-06, "logits/chosen": 0.1787109375, "logits/rejected": 0.333984375, "logps/chosen": -0.36328125, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8125, "rewards/margins": 16.25, "rewards/rejected": -18.0, "step": 1107 }, { "epoch": 0.41266294227188083, "grad_norm": 0.0031890869140625, "learning_rate": 1.3154584070646203e-06, "logits/chosen": -0.0625, "logits/rejected": 0.2265625, "logps/chosen": -0.498046875, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.484375, "rewards/margins": 15.375, "rewards/rejected": -17.875, "step": 1108 }, { "epoch": 0.41303538175046556, "grad_norm": 0.00010633468627929688, "learning_rate": 1.3144199134652314e-06, "logits/chosen": 0.0036773681640625, "logits/rejected": 0.314453125, "logps/chosen": -0.3046875, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 16.75, "rewards/rejected": -18.25, "step": 1109 }, { "epoch": 0.4134078212290503, "grad_norm": 0.00021648406982421875, "learning_rate": 1.3133807191427336e-06, "logits/chosen": 0.1953125, "logits/rejected": 0.431640625, "logps/chosen": -0.55859375, "logps/rejected": -3.890625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.796875, "rewards/margins": 16.625, "rewards/rejected": -19.5, "step": 1110 }, { "epoch": 0.413780260707635, "grad_norm": 0.126953125, "learning_rate": 1.3123408258542523e-06, "logits/chosen": 0.1376953125, "logits/rejected": 0.0028839111328125, "logps/chosen": -0.65234375, "logps/rejected": -3.1875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.265625, "rewards/margins": 12.625, "rewards/rejected": -15.9375, "step": 1111 }, { "epoch": 0.41415270018621975, "grad_norm": 0.0010833740234375, "learning_rate": 1.311300235358093e-06, "logits/chosen": -0.0576171875, "logits/rejected": -0.0198974609375, "logps/chosen": -0.388671875, "logps/rejected": -3.203125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 14.125, "rewards/rejected": -16.0, "step": 1112 }, { "epoch": 0.4145251396648045, "grad_norm": 0.00020599365234375, "learning_rate": 1.3102589494137424e-06, "logits/chosen": 0.1748046875, "logits/rejected": 0.051025390625, "logps/chosen": -0.37890625, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 15.8125, "rewards/rejected": -17.75, "step": 1113 }, { "epoch": 0.4148975791433892, "grad_norm": 0.1083984375, "learning_rate": 1.309216969781861e-06, "logits/chosen": 0.08203125, "logits/rejected": -0.458984375, "logps/chosen": -0.25390625, "logps/rejected": -2.921875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 13.375, "rewards/rejected": -14.625, "step": 1114 }, { "epoch": 0.41527001862197394, "grad_norm": 0.001708984375, "learning_rate": 1.3081742982242837e-06, "logits/chosen": 0.173828125, "logits/rejected": 0.2138671875, "logps/chosen": -0.60546875, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.03125, "rewards/margins": 14.125, "rewards/rejected": -17.25, "step": 1115 }, { "epoch": 0.41564245810055866, "grad_norm": 0.1845703125, "learning_rate": 1.307130936504014e-06, "logits/chosen": -0.130859375, "logits/rejected": 0.2890625, "logps/chosen": -0.3984375, "logps/rejected": -2.71875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9921875, "rewards/margins": 11.625, "rewards/rejected": -13.625, "step": 1116 }, { "epoch": 0.4160148975791434, "grad_norm": 0.000743865966796875, "learning_rate": 1.3060868863852235e-06, "logits/chosen": 0.0634765625, "logits/rejected": 0.318359375, "logps/chosen": -0.265625, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 15.8125, "rewards/rejected": -17.25, "step": 1117 }, { "epoch": 0.4163873370577281, "grad_norm": 0.169921875, "learning_rate": 1.3050421496332474e-06, "logits/chosen": -0.09033203125, "logits/rejected": 0.11572265625, "logps/chosen": -0.55078125, "logps/rejected": -3.3125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 13.875, "rewards/rejected": -16.625, "step": 1118 }, { "epoch": 0.41675977653631285, "grad_norm": 0.0654296875, "learning_rate": 1.303996728014582e-06, "logits/chosen": -0.06640625, "logits/rejected": 0.412109375, "logps/chosen": -1.0625, "logps/rejected": -3.25, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.3125, "rewards/margins": 10.9375, "rewards/rejected": -16.25, "step": 1119 }, { "epoch": 0.4171322160148976, "grad_norm": 4.71875, "learning_rate": 1.3029506232968808e-06, "logits/chosen": 0.1552734375, "logits/rejected": 0.283203125, "logps/chosen": -0.1865234375, "logps/rejected": -2.59375, "loss": 0.0093, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9296875, "rewards/margins": 12.0625, "rewards/rejected": -13.0, "step": 1120 }, { "epoch": 0.4175046554934823, "grad_norm": 0.000675201416015625, "learning_rate": 1.3019038372489536e-06, "logits/chosen": 0.09130859375, "logits/rejected": 0.294921875, "logps/chosen": -0.1875, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.93359375, "rewards/margins": 16.375, "rewards/rejected": -17.25, "step": 1121 }, { "epoch": 0.41787709497206704, "grad_norm": 0.2158203125, "learning_rate": 1.3008563716407613e-06, "logits/chosen": 0.0302734375, "logits/rejected": -0.09228515625, "logps/chosen": -0.69921875, "logps/rejected": -2.65625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5, "rewards/margins": 9.75, "rewards/rejected": -13.25, "step": 1122 }, { "epoch": 0.41824953445065177, "grad_norm": 0.466796875, "learning_rate": 1.2998082282434146e-06, "logits/chosen": -0.10302734375, "logits/rejected": -0.427734375, "logps/chosen": -0.81640625, "logps/rejected": -2.4375, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.09375, "rewards/margins": 8.0625, "rewards/rejected": -12.125, "step": 1123 }, { "epoch": 0.4186219739292365, "grad_norm": 0.018310546875, "learning_rate": 1.2987594088291691e-06, "logits/chosen": 0.0096435546875, "logits/rejected": -0.5078125, "logps/chosen": -0.365234375, "logps/rejected": -2.921875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.828125, "rewards/margins": 12.75, "rewards/rejected": -14.625, "step": 1124 }, { "epoch": 0.41899441340782123, "grad_norm": 202.0, "learning_rate": 1.2977099151714248e-06, "logits/chosen": 0.0250244140625, "logits/rejected": 0.625, "logps/chosen": -0.51953125, "logps/rejected": -1.9375, "loss": 1.3203, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.609375, "rewards/margins": 7.0625, "rewards/rejected": -9.6875, "step": 1125 }, { "epoch": 0.41936685288640596, "grad_norm": 0.018310546875, "learning_rate": 1.2966597490447204e-06, "logits/chosen": 0.0235595703125, "logits/rejected": 0.205078125, "logps/chosen": -0.74609375, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.734375, "rewards/margins": 13.125, "rewards/rejected": -16.875, "step": 1126 }, { "epoch": 0.4197392923649907, "grad_norm": 3.9577484130859375e-05, "learning_rate": 1.2956089122247332e-06, "logits/chosen": -0.028564453125, "logits/rejected": 0.240234375, "logps/chosen": -0.3515625, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.765625, "rewards/margins": 18.0, "rewards/rejected": -19.75, "step": 1127 }, { "epoch": 0.4201117318435754, "grad_norm": 0.01025390625, "learning_rate": 1.2945574064882731e-06, "logits/chosen": 0.19921875, "logits/rejected": 0.35546875, "logps/chosen": -0.3125, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5625, "rewards/margins": 14.4375, "rewards/rejected": -16.0, "step": 1128 }, { "epoch": 0.42048417132216015, "grad_norm": 0.00170135498046875, "learning_rate": 1.2935052336132815e-06, "logits/chosen": 0.1796875, "logits/rejected": 0.30859375, "logps/chosen": -0.150390625, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.75, "rewards/margins": 14.3125, "rewards/rejected": -15.0625, "step": 1129 }, { "epoch": 0.4208566108007449, "grad_norm": 0.451171875, "learning_rate": 1.2924523953788284e-06, "logits/chosen": -0.24609375, "logits/rejected": 0.1494140625, "logps/chosen": -1.421875, "logps/rejected": -3.625, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.09375, "rewards/margins": 11.0, "rewards/rejected": -18.125, "step": 1130 }, { "epoch": 0.4212290502793296, "grad_norm": 0.0018768310546875, "learning_rate": 1.2913988935651077e-06, "logits/chosen": 0.138671875, "logits/rejected": 0.10498046875, "logps/chosen": -0.44921875, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 16.125, "rewards/rejected": -18.375, "step": 1131 }, { "epoch": 0.42160148975791434, "grad_norm": 0.035888671875, "learning_rate": 1.290344729953437e-06, "logits/chosen": 0.029052734375, "logits/rejected": 0.1962890625, "logps/chosen": -0.58203125, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.90625, "rewards/margins": 12.9375, "rewards/rejected": -15.8125, "step": 1132 }, { "epoch": 0.42197392923649907, "grad_norm": 0.000766754150390625, "learning_rate": 1.289289906326251e-06, "logits/chosen": 0.06396484375, "logits/rejected": -0.216796875, "logps/chosen": -0.1328125, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6640625, "rewards/margins": 15.75, "rewards/rejected": -16.5, "step": 1133 }, { "epoch": 0.4223463687150838, "grad_norm": 0.000888824462890625, "learning_rate": 1.2882344244671016e-06, "logits/chosen": -0.08154296875, "logits/rejected": 0.279296875, "logps/chosen": -0.55078125, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 15.875, "rewards/rejected": -18.625, "step": 1134 }, { "epoch": 0.4227188081936685, "grad_norm": 0.00060272216796875, "learning_rate": 1.2871782861606533e-06, "logits/chosen": -0.040283203125, "logits/rejected": -0.047607421875, "logps/chosen": -0.205078125, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.03125, "rewards/margins": 16.75, "rewards/rejected": -17.75, "step": 1135 }, { "epoch": 0.42309124767225326, "grad_norm": 0.054931640625, "learning_rate": 1.2861214931926807e-06, "logits/chosen": -0.1533203125, "logits/rejected": -0.021240234375, "logps/chosen": -0.29296875, "logps/rejected": -3.1875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4609375, "rewards/margins": 14.5, "rewards/rejected": -16.0, "step": 1136 }, { "epoch": 0.423463687150838, "grad_norm": 0.03466796875, "learning_rate": 1.2850640473500653e-06, "logits/chosen": -0.05517578125, "logits/rejected": -0.421875, "logps/chosen": -0.333984375, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.671875, "rewards/margins": 13.125, "rewards/rejected": -14.75, "step": 1137 }, { "epoch": 0.4238361266294227, "grad_norm": 0.80078125, "learning_rate": 1.2840059504207925e-06, "logits/chosen": 0.2431640625, "logits/rejected": -1.28125, "logps/chosen": -0.4609375, "logps/rejected": -2.125, "loss": 0.0014, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3125, "rewards/margins": 8.3125, "rewards/rejected": -10.625, "step": 1138 }, { "epoch": 0.42420856610800745, "grad_norm": 0.00020694732666015625, "learning_rate": 1.282947204193948e-06, "logits/chosen": 0.0625, "logits/rejected": 0.08447265625, "logps/chosen": -0.41796875, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.09375, "rewards/margins": 15.875, "rewards/rejected": -18.0, "step": 1139 }, { "epoch": 0.4245810055865922, "grad_norm": 0.1025390625, "learning_rate": 1.2818878104597168e-06, "logits/chosen": -0.07568359375, "logits/rejected": 0.036865234375, "logps/chosen": -0.94921875, "logps/rejected": -3.375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.75, "rewards/margins": 12.125, "rewards/rejected": -16.875, "step": 1140 }, { "epoch": 0.4249534450651769, "grad_norm": 0.001617431640625, "learning_rate": 1.2808277710093768e-06, "logits/chosen": -0.09619140625, "logits/rejected": 0.020751953125, "logps/chosen": -0.388671875, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 13.8125, "rewards/rejected": -15.75, "step": 1141 }, { "epoch": 0.42532588454376163, "grad_norm": 0.00677490234375, "learning_rate": 1.2797670876352996e-06, "logits/chosen": 0.006988525390625, "logits/rejected": -0.03857421875, "logps/chosen": -0.890625, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4375, "rewards/margins": 13.0, "rewards/rejected": -17.5, "step": 1142 }, { "epoch": 0.42569832402234636, "grad_norm": 0.0001773834228515625, "learning_rate": 1.2787057621309448e-06, "logits/chosen": -0.051025390625, "logits/rejected": 0.12255859375, "logps/chosen": -0.30859375, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.546875, "rewards/margins": 17.125, "rewards/rejected": -18.75, "step": 1143 }, { "epoch": 0.4260707635009311, "grad_norm": 0.0002727508544921875, "learning_rate": 1.2776437962908572e-06, "logits/chosen": 0.048583984375, "logits/rejected": 0.279296875, "logps/chosen": -0.12890625, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.640625, "rewards/margins": 17.375, "rewards/rejected": -18.0, "step": 1144 }, { "epoch": 0.4264432029795158, "grad_norm": 0.9296875, "learning_rate": 1.2765811919106646e-06, "logits/chosen": 0.109375, "logits/rejected": -0.7265625, "logps/chosen": -0.38671875, "logps/rejected": -2.6875, "loss": 0.0013, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9296875, "rewards/margins": 11.5, "rewards/rejected": -13.375, "step": 1145 }, { "epoch": 0.42681564245810055, "grad_norm": 0.003204345703125, "learning_rate": 1.2755179507870757e-06, "logits/chosen": 0.0208740234375, "logits/rejected": 0.2158203125, "logps/chosen": -0.3359375, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6796875, "rewards/margins": 14.3125, "rewards/rejected": -16.0, "step": 1146 }, { "epoch": 0.4271880819366853, "grad_norm": 0.003936767578125, "learning_rate": 1.274454074717874e-06, "logits/chosen": 0.2265625, "logits/rejected": 0.263671875, "logps/chosen": -0.45703125, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.28125, "rewards/margins": 14.375, "rewards/rejected": -16.75, "step": 1147 }, { "epoch": 0.42756052141527, "grad_norm": 0.0018157958984375, "learning_rate": 1.2733895655019177e-06, "logits/chosen": 0.1865234375, "logits/rejected": 0.1533203125, "logps/chosen": -0.16015625, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.80078125, "rewards/margins": 13.625, "rewards/rejected": -14.375, "step": 1148 }, { "epoch": 0.42793296089385474, "grad_norm": 0.0068359375, "learning_rate": 1.2723244249391352e-06, "logits/chosen": 0.00726318359375, "logits/rejected": 0.236328125, "logps/chosen": -0.3359375, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6796875, "rewards/margins": 13.6875, "rewards/rejected": -15.375, "step": 1149 }, { "epoch": 0.42830540037243947, "grad_norm": 0.0003910064697265625, "learning_rate": 1.2712586548305225e-06, "logits/chosen": 0.0037078857421875, "logits/rejected": 0.193359375, "logps/chosen": -0.33203125, "logps/rejected": -3.421875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 15.5, "rewards/rejected": -17.125, "step": 1150 }, { "epoch": 0.4286778398510242, "grad_norm": 0.000274658203125, "learning_rate": 1.2701922569781403e-06, "logits/chosen": -0.057861328125, "logits/rejected": 0.07568359375, "logps/chosen": -0.48828125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 15.9375, "rewards/rejected": -18.375, "step": 1151 }, { "epoch": 0.42905027932960893, "grad_norm": 0.0013275146484375, "learning_rate": 1.2691252331851102e-06, "logits/chosen": -0.047607421875, "logits/rejected": 0.042724609375, "logps/chosen": -0.609375, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.046875, "rewards/margins": 14.9375, "rewards/rejected": -18.0, "step": 1152 }, { "epoch": 0.42942271880819366, "grad_norm": 0.98046875, "learning_rate": 1.2680575852556125e-06, "logits/chosen": 0.05126953125, "logits/rejected": 0.59765625, "logps/chosen": -0.2353515625, "logps/rejected": -2.5625, "loss": 0.0017, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1796875, "rewards/margins": 11.6875, "rewards/rejected": -12.875, "step": 1153 }, { "epoch": 0.4297951582867784, "grad_norm": 0.451171875, "learning_rate": 1.2669893149948824e-06, "logits/chosen": 0.173828125, "logits/rejected": -0.62109375, "logps/chosen": -0.34765625, "logps/rejected": -2.625, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7421875, "rewards/margins": 11.375, "rewards/rejected": -13.125, "step": 1154 }, { "epoch": 0.4301675977653631, "grad_norm": 0.07373046875, "learning_rate": 1.2659204242092089e-06, "logits/chosen": -0.0390625, "logits/rejected": 0.345703125, "logps/chosen": -0.24609375, "logps/rejected": -2.71875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.234375, "rewards/margins": 12.375, "rewards/rejected": -13.625, "step": 1155 }, { "epoch": 0.43054003724394785, "grad_norm": 0.109375, "learning_rate": 1.264850914705928e-06, "logits/chosen": -0.072265625, "logits/rejected": -0.6015625, "logps/chosen": -0.279296875, "logps/rejected": -2.34375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3984375, "rewards/margins": 10.375, "rewards/rejected": -11.75, "step": 1156 }, { "epoch": 0.4309124767225326, "grad_norm": 33.25, "learning_rate": 1.2637807882934235e-06, "logits/chosen": 0.10791015625, "logits/rejected": -0.4609375, "logps/chosen": -0.7890625, "logps/rejected": -2.828125, "loss": 0.0452, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.953125, "rewards/margins": 10.1875, "rewards/rejected": -14.1875, "step": 1157 }, { "epoch": 0.4312849162011173, "grad_norm": 0.007293701171875, "learning_rate": 1.2627100467811215e-06, "logits/chosen": 0.0159912109375, "logits/rejected": -0.37890625, "logps/chosen": -0.1728515625, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.86328125, "rewards/margins": 14.875, "rewards/rejected": -15.75, "step": 1158 }, { "epoch": 0.43165735567970204, "grad_norm": 0.0002498626708984375, "learning_rate": 1.2616386919794885e-06, "logits/chosen": 0.109375, "logits/rejected": 0.2490234375, "logps/chosen": -0.1650390625, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.82421875, "rewards/margins": 17.25, "rewards/rejected": -18.125, "step": 1159 }, { "epoch": 0.43202979515828677, "grad_norm": 0.91796875, "learning_rate": 1.2605667257000276e-06, "logits/chosen": -0.1416015625, "logits/rejected": 0.36328125, "logps/chosen": -0.36328125, "logps/rejected": -2.8125, "loss": 0.0014, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8125, "rewards/margins": 12.25, "rewards/rejected": -14.0625, "step": 1160 }, { "epoch": 0.4324022346368715, "grad_norm": 0.16015625, "learning_rate": 1.2594941497552764e-06, "logits/chosen": 0.0272216796875, "logits/rejected": -0.6953125, "logps/chosen": -0.478515625, "logps/rejected": -2.375, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.390625, "rewards/margins": 9.5, "rewards/rejected": -11.875, "step": 1161 }, { "epoch": 0.4327746741154562, "grad_norm": 0.2197265625, "learning_rate": 1.2584209659588032e-06, "logits/chosen": -0.1396484375, "logits/rejected": -0.396484375, "logps/chosen": -0.62890625, "logps/rejected": -2.90625, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.140625, "rewards/margins": 11.375, "rewards/rejected": -14.5, "step": 1162 }, { "epoch": 0.43314711359404096, "grad_norm": 0.00170135498046875, "learning_rate": 1.2573471761252034e-06, "logits/chosen": -0.1279296875, "logits/rejected": 0.369140625, "logps/chosen": -0.283203125, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.421875, "rewards/margins": 15.0, "rewards/rejected": -16.375, "step": 1163 }, { "epoch": 0.4335195530726257, "grad_norm": 0.00173187255859375, "learning_rate": 1.2562727820700978e-06, "logits/chosen": 0.03857421875, "logits/rejected": 0.080078125, "logps/chosen": -0.365234375, "logps/rejected": -3.296875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8203125, "rewards/margins": 14.6875, "rewards/rejected": -16.5, "step": 1164 }, { "epoch": 0.4338919925512104, "grad_norm": 0.22265625, "learning_rate": 1.2551977856101288e-06, "logits/chosen": 0.099609375, "logits/rejected": -0.58984375, "logps/chosen": -0.5703125, "logps/rejected": -2.40625, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.859375, "rewards/margins": 9.125, "rewards/rejected": -12.0, "step": 1165 }, { "epoch": 0.43426443202979514, "grad_norm": 8.4375, "learning_rate": 1.2541221885629574e-06, "logits/chosen": 0.1376953125, "logits/rejected": 0.4140625, "logps/chosen": -0.3359375, "logps/rejected": -2.15625, "loss": 0.0075, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6796875, "rewards/margins": 9.0, "rewards/rejected": -10.75, "step": 1166 }, { "epoch": 0.4346368715083799, "grad_norm": 9.375, "learning_rate": 1.2530459927472598e-06, "logits/chosen": 0.000347137451171875, "logits/rejected": -0.263671875, "logps/chosen": -0.7890625, "logps/rejected": -3.109375, "loss": 0.0098, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.953125, "rewards/margins": 11.5, "rewards/rejected": -15.5, "step": 1167 }, { "epoch": 0.4350093109869646, "grad_norm": 0.0003814697265625, "learning_rate": 1.2519691999827247e-06, "logits/chosen": -0.0291748046875, "logits/rejected": 0.1171875, "logps/chosen": -0.43359375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.171875, "rewards/margins": 16.0, "rewards/rejected": -18.25, "step": 1168 }, { "epoch": 0.43538175046554933, "grad_norm": 0.875, "learning_rate": 1.2508918120900503e-06, "logits/chosen": 0.1357421875, "logits/rejected": 0.349609375, "logps/chosen": -0.7421875, "logps/rejected": -2.984375, "loss": 0.0011, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.703125, "rewards/margins": 11.25, "rewards/rejected": -14.9375, "step": 1169 }, { "epoch": 0.43575418994413406, "grad_norm": 0.0341796875, "learning_rate": 1.2498138308909415e-06, "logits/chosen": -0.1611328125, "logits/rejected": -0.470703125, "logps/chosen": -0.1533203125, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.76953125, "rewards/margins": 14.125, "rewards/rejected": -14.9375, "step": 1170 }, { "epoch": 0.4361266294227188, "grad_norm": 0.1484375, "learning_rate": 1.2487352582081054e-06, "logits/chosen": -0.11669921875, "logits/rejected": 0.1083984375, "logps/chosen": -0.1943359375, "logps/rejected": -2.671875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.97265625, "rewards/margins": 12.375, "rewards/rejected": -13.375, "step": 1171 }, { "epoch": 0.4364990689013035, "grad_norm": 2.9921531677246094e-05, "learning_rate": 1.2476560958652499e-06, "logits/chosen": 0.1435546875, "logits/rejected": 0.296875, "logps/chosen": -0.220703125, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015625, "rewards/margins": 18.375, "rewards/rejected": -19.5, "step": 1172 }, { "epoch": 0.43687150837988825, "grad_norm": 0.00013637542724609375, "learning_rate": 1.2465763456870794e-06, "logits/chosen": 0.06689453125, "logits/rejected": 0.232421875, "logps/chosen": -0.33984375, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6953125, "rewards/margins": 16.75, "rewards/rejected": -18.5, "step": 1173 }, { "epoch": 0.437243947858473, "grad_norm": 0.001617431640625, "learning_rate": 1.2454960094992933e-06, "logits/chosen": 0.2373046875, "logits/rejected": 0.087890625, "logps/chosen": -0.34375, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7265625, "rewards/margins": 15.25, "rewards/rejected": -17.0, "step": 1174 }, { "epoch": 0.4376163873370577, "grad_norm": 0.451171875, "learning_rate": 1.2444150891285808e-06, "logits/chosen": -0.026611328125, "logits/rejected": -0.48828125, "logps/chosen": -0.1513671875, "logps/rejected": -2.5625, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.75390625, "rewards/margins": 12.0625, "rewards/rejected": -12.8125, "step": 1175 }, { "epoch": 0.43798882681564244, "grad_norm": 1.4609375, "learning_rate": 1.2433335864026192e-06, "logits/chosen": 0.0098876953125, "logits/rejected": 0.765625, "logps/chosen": -0.328125, "logps/rejected": -2.4375, "loss": 0.0028, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 10.5625, "rewards/rejected": -12.1875, "step": 1176 }, { "epoch": 0.43836126629422717, "grad_norm": 0.00168609619140625, "learning_rate": 1.2422515031500706e-06, "logits/chosen": 0.0947265625, "logits/rejected": -0.033447265625, "logps/chosen": -0.154296875, "logps/rejected": -2.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7734375, "rewards/margins": 13.75, "rewards/rejected": -14.5, "step": 1177 }, { "epoch": 0.4387337057728119, "grad_norm": 0.000255584716796875, "learning_rate": 1.2411688412005785e-06, "logits/chosen": 0.060546875, "logits/rejected": 0.365234375, "logps/chosen": -0.3984375, "logps/rejected": -3.640625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0, "rewards/margins": 16.25, "rewards/rejected": -18.25, "step": 1178 }, { "epoch": 0.43910614525139663, "grad_norm": 0.00628662109375, "learning_rate": 1.2400856023847654e-06, "logits/chosen": -0.07080078125, "logits/rejected": 0.341796875, "logps/chosen": -0.1826171875, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9140625, "rewards/margins": 13.875, "rewards/rejected": -14.8125, "step": 1179 }, { "epoch": 0.43947858472998136, "grad_norm": 0.06982421875, "learning_rate": 1.2390017885342282e-06, "logits/chosen": 0.0615234375, "logits/rejected": -0.019775390625, "logps/chosen": -0.5234375, "logps/rejected": -3.46875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 14.8125, "rewards/rejected": -17.375, "step": 1180 }, { "epoch": 0.4398510242085661, "grad_norm": 0.000133514404296875, "learning_rate": 1.2379174014815371e-06, "logits/chosen": -0.034912109375, "logits/rejected": 0.275390625, "logps/chosen": -0.28515625, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.421875, "rewards/margins": 16.625, "rewards/rejected": -18.0, "step": 1181 }, { "epoch": 0.4402234636871508, "grad_norm": 0.000743865966796875, "learning_rate": 1.2368324430602304e-06, "logits/chosen": 0.010009765625, "logits/rejected": 0.23046875, "logps/chosen": -0.53125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.65625, "rewards/margins": 14.625, "rewards/rejected": -17.25, "step": 1182 }, { "epoch": 0.44059590316573555, "grad_norm": 0.06396484375, "learning_rate": 1.235746915104814e-06, "logits/chosen": 0.0107421875, "logits/rejected": -0.126953125, "logps/chosen": -0.30859375, "logps/rejected": -2.78125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.546875, "rewards/margins": 12.375, "rewards/rejected": -13.9375, "step": 1183 }, { "epoch": 0.4409683426443203, "grad_norm": 0.0196533203125, "learning_rate": 1.2346608194507559e-06, "logits/chosen": -0.06640625, "logits/rejected": 0.302734375, "logps/chosen": -0.2451171875, "logps/rejected": -2.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2265625, "rewards/margins": 12.0, "rewards/rejected": -13.25, "step": 1184 }, { "epoch": 0.441340782122905, "grad_norm": 0.00830078125, "learning_rate": 1.2335741579344837e-06, "logits/chosen": 0.09423828125, "logits/rejected": 0.26171875, "logps/chosen": -0.134765625, "logps/rejected": -2.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.67578125, "rewards/margins": 12.0625, "rewards/rejected": -12.75, "step": 1185 }, { "epoch": 0.44171322160148974, "grad_norm": 0.0001983642578125, "learning_rate": 1.2324869323933823e-06, "logits/chosen": -0.057373046875, "logits/rejected": 0.11962890625, "logps/chosen": -0.30078125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5078125, "rewards/margins": 15.75, "rewards/rejected": -17.25, "step": 1186 }, { "epoch": 0.44208566108007447, "grad_norm": 324.0, "learning_rate": 1.2313991446657902e-06, "logits/chosen": -0.224609375, "logits/rejected": -0.015869140625, "logps/chosen": -1.34375, "logps/rejected": -1.84375, "loss": 3.875, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -6.6875, "rewards/margins": 2.53125, "rewards/rejected": -9.25, "step": 1187 }, { "epoch": 0.4424581005586592, "grad_norm": 0.1083984375, "learning_rate": 1.2303107965909962e-06, "logits/chosen": 0.004150390625, "logits/rejected": 0.455078125, "logps/chosen": -0.1484375, "logps/rejected": -2.4375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7421875, "rewards/margins": 11.5, "rewards/rejected": -12.1875, "step": 1188 }, { "epoch": 0.4428305400372439, "grad_norm": 0.000202178955078125, "learning_rate": 1.2292218900092371e-06, "logits/chosen": 0.12158203125, "logits/rejected": 0.216796875, "logps/chosen": -0.33984375, "logps/rejected": -3.578125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.703125, "rewards/margins": 16.25, "rewards/rejected": -17.875, "step": 1189 }, { "epoch": 0.44320297951582865, "grad_norm": 0.0004062652587890625, "learning_rate": 1.2281324267616938e-06, "logits/chosen": 0.1259765625, "logits/rejected": 0.439453125, "logps/chosen": -0.1953125, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9765625, "rewards/margins": 16.5, "rewards/rejected": -17.5, "step": 1190 }, { "epoch": 0.4435754189944134, "grad_norm": 0.006072998046875, "learning_rate": 1.2270424086904879e-06, "logits/chosen": -0.0213623046875, "logits/rejected": 0.484375, "logps/chosen": -0.6796875, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.40625, "rewards/margins": 12.75, "rewards/rejected": -16.25, "step": 1191 }, { "epoch": 0.4439478584729981, "grad_norm": 0.006561279296875, "learning_rate": 1.2259518376386799e-06, "logits/chosen": 0.0014190673828125, "logits/rejected": 0.06884765625, "logps/chosen": -0.458984375, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.28125, "rewards/margins": 13.625, "rewards/rejected": -15.9375, "step": 1192 }, { "epoch": 0.44432029795158284, "grad_norm": 33.75, "learning_rate": 1.2248607154502654e-06, "logits/chosen": -0.10498046875, "logits/rejected": -0.30078125, "logps/chosen": -0.38671875, "logps/rejected": -1.546875, "loss": 0.0664, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9296875, "rewards/margins": 5.78125, "rewards/rejected": -7.6875, "step": 1193 }, { "epoch": 0.4446927374301676, "grad_norm": 5.507469177246094e-05, "learning_rate": 1.2237690439701707e-06, "logits/chosen": 0.1943359375, "logits/rejected": 0.205078125, "logps/chosen": -0.08837890625, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.44140625, "rewards/margins": 17.75, "rewards/rejected": -18.125, "step": 1194 }, { "epoch": 0.4450651769087523, "grad_norm": 0.0001277923583984375, "learning_rate": 1.222676825044252e-06, "logits/chosen": 0.061279296875, "logits/rejected": -0.0810546875, "logps/chosen": -0.33203125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 16.75, "rewards/rejected": -18.5, "step": 1195 }, { "epoch": 0.44543761638733703, "grad_norm": 0.00019168853759765625, "learning_rate": 1.2215840605192916e-06, "logits/chosen": 0.1728515625, "logits/rejected": 0.423828125, "logps/chosen": -0.310546875, "logps/rejected": -3.984375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.546875, "rewards/margins": 18.25, "rewards/rejected": -19.875, "step": 1196 }, { "epoch": 0.44581005586592176, "grad_norm": 0.94140625, "learning_rate": 1.220490752242992e-06, "logits/chosen": 0.181640625, "logits/rejected": -0.67578125, "logps/chosen": -0.236328125, "logps/rejected": -2.359375, "loss": 0.0014, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1796875, "rewards/margins": 10.625, "rewards/rejected": -11.8125, "step": 1197 }, { "epoch": 0.4461824953445065, "grad_norm": 0.000278472900390625, "learning_rate": 1.2193969020639784e-06, "logits/chosen": -0.046142578125, "logits/rejected": 0.30078125, "logps/chosen": -0.2314453125, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.15625, "rewards/margins": 16.75, "rewards/rejected": -17.875, "step": 1198 }, { "epoch": 0.4465549348230912, "grad_norm": 55.25, "learning_rate": 1.2183025118317893e-06, "logits/chosen": 0.042236328125, "logits/rejected": 0.3046875, "logps/chosen": -0.46484375, "logps/rejected": -2.265625, "loss": 0.1016, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.328125, "rewards/margins": 9.0, "rewards/rejected": -11.3125, "step": 1199 }, { "epoch": 0.44692737430167595, "grad_norm": 0.000362396240234375, "learning_rate": 1.2172075833968782e-06, "logits/chosen": -0.040283203125, "logits/rejected": 0.2734375, "logps/chosen": -0.2373046875, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1875, "rewards/margins": 17.375, "rewards/rejected": -18.625, "step": 1200 }, { "epoch": 0.4472998137802607, "grad_norm": 12.0625, "learning_rate": 1.216112118610607e-06, "logits/chosen": 0.050537109375, "logits/rejected": 1.0078125, "logps/chosen": -0.5859375, "logps/rejected": -2.296875, "loss": 0.0222, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9375, "rewards/margins": 8.5625, "rewards/rejected": -11.5, "step": 1201 }, { "epoch": 0.44767225325884547, "grad_norm": 0.000118255615234375, "learning_rate": 1.2150161193252472e-06, "logits/chosen": 0.17578125, "logits/rejected": 0.2578125, "logps/chosen": -0.5859375, "logps/rejected": -4.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.921875, "rewards/margins": 17.75, "rewards/rejected": -20.75, "step": 1202 }, { "epoch": 0.4480446927374302, "grad_norm": 5.030632019042969e-05, "learning_rate": 1.213919587393971e-06, "logits/chosen": -0.038818359375, "logits/rejected": 0.1611328125, "logps/chosen": -0.208984375, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 17.125, "rewards/rejected": -18.125, "step": 1203 }, { "epoch": 0.4484171322160149, "grad_norm": 1.6808509826660156e-05, "learning_rate": 1.2128225246708533e-06, "logits/chosen": 0.08984375, "logits/rejected": 0.0322265625, "logps/chosen": -0.2294921875, "logps/rejected": -3.953125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1484375, "rewards/margins": 18.625, "rewards/rejected": -19.75, "step": 1204 }, { "epoch": 0.44878957169459965, "grad_norm": 0.002655029296875, "learning_rate": 1.2117249330108652e-06, "logits/chosen": -0.0216064453125, "logits/rejected": 0.1953125, "logps/chosen": -0.265625, "logps/rejected": -3.109375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 14.25, "rewards/rejected": -15.5625, "step": 1205 }, { "epoch": 0.4491620111731844, "grad_norm": 0.0001811981201171875, "learning_rate": 1.2106268142698731e-06, "logits/chosen": -0.048828125, "logits/rejected": -0.054443359375, "logps/chosen": -0.27734375, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3828125, "rewards/margins": 15.875, "rewards/rejected": -17.25, "step": 1206 }, { "epoch": 0.4495344506517691, "grad_norm": 0.000904083251953125, "learning_rate": 1.209528170304634e-06, "logits/chosen": 0.08544921875, "logits/rejected": 0.1650390625, "logps/chosen": -0.359375, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 15.125, "rewards/rejected": -17.0, "step": 1207 }, { "epoch": 0.44990689013035384, "grad_norm": 0.23828125, "learning_rate": 1.208429002972794e-06, "logits/chosen": -0.0810546875, "logits/rejected": 0.1611328125, "logps/chosen": -0.470703125, "logps/rejected": -2.96875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.359375, "rewards/margins": 12.5, "rewards/rejected": -14.875, "step": 1208 }, { "epoch": 0.45027932960893857, "grad_norm": 8.440017700195312e-05, "learning_rate": 1.2073293141328822e-06, "logits/chosen": 0.0291748046875, "logits/rejected": 0.166015625, "logps/chosen": -0.298828125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5, "rewards/margins": 17.0, "rewards/rejected": -18.5, "step": 1209 }, { "epoch": 0.4506517690875233, "grad_norm": 0.01165771484375, "learning_rate": 1.2062291056443115e-06, "logits/chosen": 0.035888671875, "logits/rejected": -0.40234375, "logps/chosen": -0.376953125, "logps/rejected": -2.890625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 12.5, "rewards/rejected": -14.4375, "step": 1210 }, { "epoch": 0.45102420856610803, "grad_norm": 0.07080078125, "learning_rate": 1.2051283793673722e-06, "logits/chosen": 0.0859375, "logits/rejected": 0.35546875, "logps/chosen": -0.138671875, "logps/rejected": -2.359375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.69140625, "rewards/margins": 11.125, "rewards/rejected": -11.8125, "step": 1211 }, { "epoch": 0.45139664804469276, "grad_norm": 73.0, "learning_rate": 1.2040271371632306e-06, "logits/chosen": -0.06298828125, "logits/rejected": -0.39453125, "logps/chosen": -1.0078125, "logps/rejected": -2.890625, "loss": 0.1079, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0625, "rewards/margins": 9.4375, "rewards/rejected": -14.5, "step": 1212 }, { "epoch": 0.4517690875232775, "grad_norm": 0.005096435546875, "learning_rate": 1.2029253808939257e-06, "logits/chosen": 0.146484375, "logits/rejected": 0.1689453125, "logps/chosen": -0.734375, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.671875, "rewards/margins": 12.875, "rewards/rejected": -16.625, "step": 1213 }, { "epoch": 0.4521415270018622, "grad_norm": 9.393692016601562e-05, "learning_rate": 1.2018231124223644e-06, "logits/chosen": -0.28125, "logits/rejected": -0.162109375, "logps/chosen": -0.3359375, "logps/rejected": -3.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6796875, "rewards/margins": 17.375, "rewards/rejected": -19.0, "step": 1214 }, { "epoch": 0.45251396648044695, "grad_norm": 0.00133514404296875, "learning_rate": 1.2007203336123213e-06, "logits/chosen": 0.1279296875, "logits/rejected": 0.341796875, "logps/chosen": -0.5, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5, "rewards/margins": 14.375, "rewards/rejected": -16.875, "step": 1215 }, { "epoch": 0.4528864059590317, "grad_norm": 9.875, "learning_rate": 1.1996170463284325e-06, "logits/chosen": -0.10986328125, "logits/rejected": 0.53125, "logps/chosen": -0.2421875, "logps/rejected": -1.3046875, "loss": 0.0293, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2109375, "rewards/margins": 5.34375, "rewards/rejected": -6.5625, "step": 1216 }, { "epoch": 0.4532588454376164, "grad_norm": 0.0005035400390625, "learning_rate": 1.1985132524361951e-06, "logits/chosen": -0.010986328125, "logits/rejected": 0.3046875, "logps/chosen": -0.3828125, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9140625, "rewards/margins": 15.0, "rewards/rejected": -16.875, "step": 1217 }, { "epoch": 0.45363128491620114, "grad_norm": 0.404296875, "learning_rate": 1.1974089538019616e-06, "logits/chosen": -0.2119140625, "logits/rejected": -0.86328125, "logps/chosen": -0.359375, "logps/rejected": -2.4375, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 10.375, "rewards/rejected": -12.1875, "step": 1218 }, { "epoch": 0.45400372439478587, "grad_norm": 4.124641418457031e-05, "learning_rate": 1.1963041522929395e-06, "logits/chosen": 0.051025390625, "logits/rejected": 0.1533203125, "logps/chosen": -0.07666015625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3828125, "rewards/margins": 17.5, "rewards/rejected": -17.75, "step": 1219 }, { "epoch": 0.4543761638733706, "grad_norm": 0.059814453125, "learning_rate": 1.1951988497771845e-06, "logits/chosen": 0.107421875, "logits/rejected": -0.375, "logps/chosen": -0.3046875, "logps/rejected": -2.765625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5234375, "rewards/margins": 12.25, "rewards/rejected": -13.8125, "step": 1220 }, { "epoch": 0.4547486033519553, "grad_norm": 1.921875, "learning_rate": 1.194093048123601e-06, "logits/chosen": -0.01171875, "logits/rejected": -0.3203125, "logps/chosen": -0.2021484375, "logps/rejected": -2.0, "loss": 0.0035, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.015625, "rewards/margins": 9.0, "rewards/rejected": -10.0, "step": 1221 }, { "epoch": 0.45512104283054006, "grad_norm": 0.0224609375, "learning_rate": 1.1929867492019373e-06, "logits/chosen": -0.068359375, "logits/rejected": 0.01416015625, "logps/chosen": -0.390625, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.953125, "rewards/margins": 13.0, "rewards/rejected": -14.9375, "step": 1222 }, { "epoch": 0.4554934823091248, "grad_norm": 0.0002574920654296875, "learning_rate": 1.1918799548827814e-06, "logits/chosen": 0.2119140625, "logits/rejected": 0.2001953125, "logps/chosen": -0.46484375, "logps/rejected": -3.578125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3125, "rewards/margins": 15.5625, "rewards/rejected": -17.875, "step": 1223 }, { "epoch": 0.4558659217877095, "grad_norm": 0.482421875, "learning_rate": 1.1907726670375604e-06, "logits/chosen": -0.021728515625, "logits/rejected": 0.44921875, "logps/chosen": -0.287109375, "logps/rejected": -2.28125, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4375, "rewards/margins": 10.0, "rewards/rejected": -11.375, "step": 1224 }, { "epoch": 0.45623836126629425, "grad_norm": 0.0224609375, "learning_rate": 1.1896648875385347e-06, "logits/chosen": 0.14453125, "logits/rejected": 0.375, "logps/chosen": -0.515625, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.578125, "rewards/margins": 15.4375, "rewards/rejected": -18.0, "step": 1225 }, { "epoch": 0.456610800744879, "grad_norm": 0.00116729736328125, "learning_rate": 1.1885566182587964e-06, "logits/chosen": -0.07470703125, "logits/rejected": 0.228515625, "logps/chosen": -0.5078125, "logps/rejected": -3.515625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.53125, "rewards/margins": 15.0, "rewards/rejected": -17.5, "step": 1226 }, { "epoch": 0.4569832402234637, "grad_norm": 0.0255126953125, "learning_rate": 1.187447861072266e-06, "logits/chosen": -0.3203125, "logits/rejected": 0.2431640625, "logps/chosen": -0.34375, "logps/rejected": -2.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.71875, "rewards/margins": 11.625, "rewards/rejected": -13.375, "step": 1227 }, { "epoch": 0.45735567970204843, "grad_norm": 0.00037384033203125, "learning_rate": 1.1863386178536891e-06, "logits/chosen": 0.02783203125, "logits/rejected": 0.55859375, "logps/chosen": -0.224609375, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.125, "rewards/margins": 15.25, "rewards/rejected": -16.375, "step": 1228 }, { "epoch": 0.45772811918063316, "grad_norm": 3.123283386230469e-05, "learning_rate": 1.1852288904786324e-06, "logits/chosen": 0.130859375, "logits/rejected": 0.2138671875, "logps/chosen": -0.240234375, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.203125, "rewards/margins": 18.25, "rewards/rejected": -19.5, "step": 1229 }, { "epoch": 0.4581005586592179, "grad_norm": 0.400390625, "learning_rate": 1.1841186808234816e-06, "logits/chosen": 0.06298828125, "logits/rejected": 0.24609375, "logps/chosen": -0.40234375, "logps/rejected": -2.3125, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.015625, "rewards/margins": 9.625, "rewards/rejected": -11.625, "step": 1230 }, { "epoch": 0.4584729981378026, "grad_norm": 0.00110626220703125, "learning_rate": 1.1830079907654382e-06, "logits/chosen": 0.25, "logits/rejected": 0.80078125, "logps/chosen": -0.419921875, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.09375, "rewards/margins": 14.8125, "rewards/rejected": -16.875, "step": 1231 }, { "epoch": 0.45884543761638735, "grad_norm": 0.59375, "learning_rate": 1.1818968221825157e-06, "logits/chosen": -0.03564453125, "logits/rejected": -0.1357421875, "logps/chosen": -1.75, "logps/rejected": -3.46875, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.75, "rewards/margins": 8.5625, "rewards/rejected": -17.25, "step": 1232 }, { "epoch": 0.4592178770949721, "grad_norm": 0.00011587142944335938, "learning_rate": 1.1807851769535363e-06, "logits/chosen": 0.1376953125, "logits/rejected": 0.12255859375, "logps/chosen": -0.2333984375, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1640625, "rewards/margins": 16.5, "rewards/rejected": -17.75, "step": 1233 }, { "epoch": 0.4595903165735568, "grad_norm": 0.0113525390625, "learning_rate": 1.1796730569581287e-06, "logits/chosen": -0.026611328125, "logits/rejected": 0.41796875, "logps/chosen": -0.6875, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4375, "rewards/margins": 12.75, "rewards/rejected": -16.25, "step": 1234 }, { "epoch": 0.45996275605214154, "grad_norm": 4.267692565917969e-05, "learning_rate": 1.1785604640767246e-06, "logits/chosen": 0.1650390625, "logits/rejected": 0.431640625, "logps/chosen": -0.216796875, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0859375, "rewards/margins": 17.5, "rewards/rejected": -18.5, "step": 1235 }, { "epoch": 0.46033519553072627, "grad_norm": 0.00341796875, "learning_rate": 1.1774474001905546e-06, "logits/chosen": 0.07470703125, "logits/rejected": -0.037353515625, "logps/chosen": -0.158203125, "logps/rejected": -2.921875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7890625, "rewards/margins": 13.8125, "rewards/rejected": -14.625, "step": 1236 }, { "epoch": 0.460707635009311, "grad_norm": 0.515625, "learning_rate": 1.1763338671816456e-06, "logits/chosen": 0.193359375, "logits/rejected": -0.0021209716796875, "logps/chosen": -0.1025390625, "logps/rejected": -2.375, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.515625, "rewards/margins": 11.3125, "rewards/rejected": -11.875, "step": 1237 }, { "epoch": 0.46108007448789573, "grad_norm": 0.51953125, "learning_rate": 1.175219866932819e-06, "logits/chosen": -0.021728515625, "logits/rejected": 0.056640625, "logps/chosen": -0.484375, "logps/rejected": -2.625, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 10.625, "rewards/rejected": -13.125, "step": 1238 }, { "epoch": 0.46145251396648046, "grad_norm": 0.0002155303955078125, "learning_rate": 1.1741054013276841e-06, "logits/chosen": 0.111328125, "logits/rejected": 0.2197265625, "logps/chosen": -0.2255859375, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.125, "rewards/margins": 15.875, "rewards/rejected": -17.0, "step": 1239 }, { "epoch": 0.4618249534450652, "grad_norm": 0.0023956298828125, "learning_rate": 1.1729904722506398e-06, "logits/chosen": -0.0322265625, "logits/rejected": 0.138671875, "logps/chosen": -0.2451171875, "logps/rejected": -3.328125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2265625, "rewards/margins": 15.4375, "rewards/rejected": -16.625, "step": 1240 }, { "epoch": 0.4621973929236499, "grad_norm": 1.15625, "learning_rate": 1.171875081586866e-06, "logits/chosen": 0.041015625, "logits/rejected": -0.06787109375, "logps/chosen": -0.2890625, "logps/rejected": -2.8125, "loss": 0.0026, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4453125, "rewards/margins": 12.625, "rewards/rejected": -14.0625, "step": 1241 }, { "epoch": 0.46256983240223465, "grad_norm": 0.0033416748046875, "learning_rate": 1.1707592312223247e-06, "logits/chosen": 0.22265625, "logits/rejected": 0.064453125, "logps/chosen": -0.146484375, "logps/rejected": -3.046875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.734375, "rewards/margins": 14.5, "rewards/rejected": -15.25, "step": 1242 }, { "epoch": 0.4629422718808194, "grad_norm": 0.0003490447998046875, "learning_rate": 1.1696429230437547e-06, "logits/chosen": 0.10888671875, "logits/rejected": 0.2255859375, "logps/chosen": -0.3046875, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5234375, "rewards/margins": 15.5, "rewards/rejected": -17.0, "step": 1243 }, { "epoch": 0.4633147113594041, "grad_norm": 0.03125, "learning_rate": 1.1685261589386687e-06, "logits/chosen": -0.1611328125, "logits/rejected": -0.45703125, "logps/chosen": -0.201171875, "logps/rejected": -2.34375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0078125, "rewards/margins": 10.75, "rewards/rejected": -11.75, "step": 1244 }, { "epoch": 0.46368715083798884, "grad_norm": 188.0, "learning_rate": 1.1674089407953507e-06, "logits/chosen": -0.1943359375, "logits/rejected": -0.140625, "logps/chosen": -0.50390625, "logps/rejected": -2.03125, "loss": 0.7812, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.53125, "rewards/margins": 7.625, "rewards/rejected": -10.1875, "step": 1245 }, { "epoch": 0.46405959031657357, "grad_norm": 0.017822265625, "learning_rate": 1.166291270502852e-06, "logits/chosen": 0.01068115234375, "logits/rejected": 0.51953125, "logps/chosen": -0.212890625, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0625, "rewards/margins": 13.375, "rewards/rejected": -14.4375, "step": 1246 }, { "epoch": 0.4644320297951583, "grad_norm": 0.0078125, "learning_rate": 1.165173149950989e-06, "logits/chosen": 0.09814453125, "logits/rejected": 0.33984375, "logps/chosen": -0.70703125, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.546875, "rewards/margins": 13.625, "rewards/rejected": -17.25, "step": 1247 }, { "epoch": 0.464804469273743, "grad_norm": 0.0004291534423828125, "learning_rate": 1.164054581030339e-06, "logits/chosen": -0.0120849609375, "logits/rejected": 0.30078125, "logps/chosen": -0.1845703125, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.921875, "rewards/margins": 16.0, "rewards/rejected": -17.0, "step": 1248 }, { "epoch": 0.46517690875232776, "grad_norm": 0.00021648406982421875, "learning_rate": 1.1629355656322371e-06, "logits/chosen": 0.0654296875, "logits/rejected": 0.337890625, "logps/chosen": -0.208984375, "logps/rejected": -3.703125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 17.5, "rewards/rejected": -18.5, "step": 1249 }, { "epoch": 0.4655493482309125, "grad_norm": 0.0216064453125, "learning_rate": 1.161816105648774e-06, "logits/chosen": 0.09765625, "logits/rejected": -0.1455078125, "logps/chosen": -0.1591796875, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.796875, "rewards/margins": 14.125, "rewards/rejected": -14.875, "step": 1250 }, { "epoch": 0.4659217877094972, "grad_norm": 0.0294189453125, "learning_rate": 1.160696202972792e-06, "logits/chosen": -0.2470703125, "logits/rejected": -0.34375, "logps/chosen": -0.1943359375, "logps/rejected": -2.421875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.97265625, "rewards/margins": 11.125, "rewards/rejected": -12.125, "step": 1251 }, { "epoch": 0.46629422718808194, "grad_norm": 0.000713348388671875, "learning_rate": 1.1595758594978818e-06, "logits/chosen": 0.224609375, "logits/rejected": 0.302734375, "logps/chosen": -0.322265625, "logps/rejected": -3.359375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 15.125, "rewards/rejected": -16.75, "step": 1252 }, { "epoch": 0.4666666666666667, "grad_norm": 0.1171875, "learning_rate": 1.158455077118379e-06, "logits/chosen": 0.059814453125, "logits/rejected": -0.2578125, "logps/chosen": -0.4140625, "logps/rejected": -2.6875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 11.375, "rewards/rejected": -13.5, "step": 1253 }, { "epoch": 0.4670391061452514, "grad_norm": 7.05718994140625e-05, "learning_rate": 1.1573338577293619e-06, "logits/chosen": -0.111328125, "logits/rejected": 0.05078125, "logps/chosen": -0.2412109375, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.203125, "rewards/margins": 17.25, "rewards/rejected": -18.375, "step": 1254 }, { "epoch": 0.46741154562383613, "grad_norm": 0.412109375, "learning_rate": 1.156212203226648e-06, "logits/chosen": -0.03466796875, "logits/rejected": 0.83203125, "logps/chosen": -0.43359375, "logps/rejected": -3.171875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 13.75, "rewards/rejected": -15.875, "step": 1255 }, { "epoch": 0.46778398510242086, "grad_norm": 8.487701416015625e-05, "learning_rate": 1.1550901155067891e-06, "logits/chosen": 0.158203125, "logits/rejected": 0.2080078125, "logps/chosen": -0.25390625, "logps/rejected": -3.640625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 17.0, "rewards/rejected": -18.25, "step": 1256 }, { "epoch": 0.4681564245810056, "grad_norm": 0.003265380859375, "learning_rate": 1.1539675964670712e-06, "logits/chosen": -0.265625, "logits/rejected": 0.1982421875, "logps/chosen": -0.64453125, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.21875, "rewards/margins": 13.6875, "rewards/rejected": -16.875, "step": 1257 }, { "epoch": 0.4685288640595903, "grad_norm": 0.0537109375, "learning_rate": 1.1528446480055089e-06, "logits/chosen": 0.0262451171875, "logits/rejected": -0.439453125, "logps/chosen": -0.3359375, "logps/rejected": -2.90625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.671875, "rewards/margins": 12.9375, "rewards/rejected": -14.625, "step": 1258 }, { "epoch": 0.46890130353817505, "grad_norm": 0.1083984375, "learning_rate": 1.1517212720208425e-06, "logits/chosen": 0.007720947265625, "logits/rejected": 0.66796875, "logps/chosen": -0.259765625, "logps/rejected": -2.984375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3046875, "rewards/margins": 13.5625, "rewards/rejected": -14.875, "step": 1259 }, { "epoch": 0.4692737430167598, "grad_norm": 0.11865234375, "learning_rate": 1.1505974704125355e-06, "logits/chosen": 0.0947265625, "logits/rejected": -0.451171875, "logps/chosen": -0.26171875, "logps/rejected": -2.078125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125, "rewards/margins": 9.125, "rewards/rejected": -10.375, "step": 1260 }, { "epoch": 0.4696461824953445, "grad_norm": 0.0084228515625, "learning_rate": 1.1494732450807716e-06, "logits/chosen": 0.0169677734375, "logits/rejected": -0.35546875, "logps/chosen": -0.376953125, "logps/rejected": -2.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8828125, "rewards/margins": 12.6875, "rewards/rejected": -14.5625, "step": 1261 }, { "epoch": 0.47001862197392924, "grad_norm": 0.6875, "learning_rate": 1.14834859792645e-06, "logits/chosen": 0.1611328125, "logits/rejected": -0.0184326171875, "logps/chosen": -0.7734375, "logps/rejected": -3.0, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.859375, "rewards/margins": 11.0625, "rewards/rejected": -14.9375, "step": 1262 }, { "epoch": 0.47039106145251397, "grad_norm": 0.0045166015625, "learning_rate": 1.1472235308511834e-06, "logits/chosen": 0.2373046875, "logits/rejected": 0.365234375, "logps/chosen": -0.34375, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.71875, "rewards/margins": 13.875, "rewards/rejected": -15.625, "step": 1263 }, { "epoch": 0.4707635009310987, "grad_norm": 0.002471923828125, "learning_rate": 1.1460980457572954e-06, "logits/chosen": -0.00958251953125, "logits/rejected": 0.306640625, "logps/chosen": -0.251953125, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 15.375, "rewards/rejected": -16.625, "step": 1264 }, { "epoch": 0.47113594040968343, "grad_norm": 0.00274658203125, "learning_rate": 1.1449721445478152e-06, "logits/chosen": -0.10302734375, "logits/rejected": -0.19921875, "logps/chosen": -0.2236328125, "logps/rejected": -3.421875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.125, "rewards/margins": 16.0, "rewards/rejected": -17.125, "step": 1265 }, { "epoch": 0.47150837988826816, "grad_norm": 0.00010442733764648438, "learning_rate": 1.1438458291264763e-06, "logits/chosen": 0.1591796875, "logits/rejected": 0.1708984375, "logps/chosen": -0.216796875, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0859375, "rewards/margins": 17.125, "rewards/rejected": -18.25, "step": 1266 }, { "epoch": 0.4718808193668529, "grad_norm": 3.24249267578125e-05, "learning_rate": 1.1427191013977123e-06, "logits/chosen": 0.01177978515625, "logits/rejected": 0.26953125, "logps/chosen": -0.1494140625, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.75, "rewards/margins": 18.5, "rewards/rejected": -19.25, "step": 1267 }, { "epoch": 0.4722532588454376, "grad_norm": 4.4375, "learning_rate": 1.1415919632666542e-06, "logits/chosen": 0.010009765625, "logits/rejected": -0.78515625, "logps/chosen": -0.6328125, "logps/rejected": -1.796875, "loss": 0.0093, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.171875, "rewards/margins": 5.8125, "rewards/rejected": -9.0, "step": 1268 }, { "epoch": 0.47262569832402235, "grad_norm": 0.16796875, "learning_rate": 1.1404644166391266e-06, "logits/chosen": 0.046142578125, "logits/rejected": 0.376953125, "logps/chosen": -0.2578125, "logps/rejected": -3.0, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.296875, "rewards/margins": 13.625, "rewards/rejected": -15.0, "step": 1269 }, { "epoch": 0.4729981378026071, "grad_norm": 0.2119140625, "learning_rate": 1.139336463421645e-06, "logits/chosen": 0.060546875, "logits/rejected": 0.37109375, "logps/chosen": -0.3828125, "logps/rejected": -2.875, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9140625, "rewards/margins": 12.375, "rewards/rejected": -14.3125, "step": 1270 }, { "epoch": 0.4733705772811918, "grad_norm": 5.6743621826171875e-05, "learning_rate": 1.1382081055214126e-06, "logits/chosen": 0.1640625, "logits/rejected": 0.3125, "logps/chosen": -0.1552734375, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.77734375, "rewards/margins": 17.25, "rewards/rejected": -18.0, "step": 1271 }, { "epoch": 0.47374301675977654, "grad_norm": 0.00164031982421875, "learning_rate": 1.137079344846316e-06, "logits/chosen": 0.0230712890625, "logits/rejected": -0.197265625, "logps/chosen": -0.296875, "logps/rejected": -3.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 13.75, "rewards/rejected": -15.25, "step": 1272 }, { "epoch": 0.47411545623836127, "grad_norm": 0.00023746490478515625, "learning_rate": 1.135950183304924e-06, "logits/chosen": 0.158203125, "logits/rejected": 0.34375, "logps/chosen": -0.12890625, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6484375, "rewards/margins": 16.75, "rewards/rejected": -17.375, "step": 1273 }, { "epoch": 0.474487895716946, "grad_norm": 0.0026702880859375, "learning_rate": 1.1348206228064827e-06, "logits/chosen": -0.061279296875, "logits/rejected": 0.08935546875, "logps/chosen": -0.330078125, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 15.5, "rewards/rejected": -17.25, "step": 1274 }, { "epoch": 0.4748603351955307, "grad_norm": 0.004058837890625, "learning_rate": 1.1336906652609126e-06, "logits/chosen": 0.11669921875, "logits/rejected": 0.462890625, "logps/chosen": -0.41015625, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 14.6875, "rewards/rejected": -16.75, "step": 1275 }, { "epoch": 0.47523277467411545, "grad_norm": 0.000797271728515625, "learning_rate": 1.1325603125788051e-06, "logits/chosen": -0.04833984375, "logits/rejected": 0.08056640625, "logps/chosen": -0.16796875, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.84375, "rewards/margins": 14.75, "rewards/rejected": -15.625, "step": 1276 }, { "epoch": 0.4756052141527002, "grad_norm": 14.0, "learning_rate": 1.1314295666714214e-06, "logits/chosen": 0.0341796875, "logits/rejected": -0.2236328125, "logps/chosen": -1.0625, "logps/rejected": -3.109375, "loss": 0.0187, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.3125, "rewards/margins": 10.25, "rewards/rejected": -15.5, "step": 1277 }, { "epoch": 0.4759776536312849, "grad_norm": 0.037353515625, "learning_rate": 1.1302984294506854e-06, "logits/chosen": -0.06298828125, "logits/rejected": 0.09912109375, "logps/chosen": -0.421875, "logps/rejected": -2.75, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.109375, "rewards/margins": 11.625, "rewards/rejected": -13.75, "step": 1278 }, { "epoch": 0.47635009310986964, "grad_norm": 0.0004138946533203125, "learning_rate": 1.1291669028291848e-06, "logits/chosen": 0.08935546875, "logits/rejected": -0.0341796875, "logps/chosen": -0.2294921875, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1484375, "rewards/margins": 16.0, "rewards/rejected": -17.25, "step": 1279 }, { "epoch": 0.4767225325884544, "grad_norm": 0.000293731689453125, "learning_rate": 1.128034988720164e-06, "logits/chosen": -0.0301513671875, "logits/rejected": 0.2119140625, "logps/chosen": -0.1328125, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6640625, "rewards/margins": 15.875, "rewards/rejected": -16.5, "step": 1280 }, { "epoch": 0.4770949720670391, "grad_norm": 0.0003681182861328125, "learning_rate": 1.1269026890375232e-06, "logits/chosen": -0.0264892578125, "logits/rejected": 0.291015625, "logps/chosen": -0.24609375, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2265625, "rewards/margins": 15.625, "rewards/rejected": -16.875, "step": 1281 }, { "epoch": 0.47746741154562383, "grad_norm": 0.00043487548828125, "learning_rate": 1.1257700056958147e-06, "logits/chosen": 0.031982421875, "logits/rejected": 0.205078125, "logps/chosen": -0.283203125, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.421875, "rewards/margins": 15.5, "rewards/rejected": -16.875, "step": 1282 }, { "epoch": 0.47783985102420856, "grad_norm": 0.66796875, "learning_rate": 1.1246369406102395e-06, "logits/chosen": -0.0230712890625, "logits/rejected": 0.275390625, "logps/chosen": -0.4453125, "logps/rejected": -2.8125, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 11.875, "rewards/rejected": -14.125, "step": 1283 }, { "epoch": 0.4782122905027933, "grad_norm": 0.00182342529296875, "learning_rate": 1.123503495696644e-06, "logits/chosen": 0.16015625, "logits/rejected": 0.08544921875, "logps/chosen": -0.4765625, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 14.1875, "rewards/rejected": -16.5, "step": 1284 }, { "epoch": 0.478584729981378, "grad_norm": 0.1904296875, "learning_rate": 1.1223696728715168e-06, "logits/chosen": 0.0194091796875, "logits/rejected": -0.50390625, "logps/chosen": -0.16015625, "logps/rejected": -2.875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046875, "rewards/margins": 13.625, "rewards/rejected": -14.375, "step": 1285 }, { "epoch": 0.47895716945996275, "grad_norm": 1.671875, "learning_rate": 1.1212354740519848e-06, "logits/chosen": 0.08837890625, "logits/rejected": -1.2890625, "logps/chosen": -0.56640625, "logps/rejected": -1.96875, "loss": 0.0029, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 6.96875, "rewards/rejected": -9.8125, "step": 1286 }, { "epoch": 0.4793296089385475, "grad_norm": 0.333984375, "learning_rate": 1.1201009011558128e-06, "logits/chosen": -0.283203125, "logits/rejected": -0.369140625, "logps/chosen": -0.294921875, "logps/rejected": -2.15625, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4765625, "rewards/margins": 9.25, "rewards/rejected": -10.75, "step": 1287 }, { "epoch": 0.4797020484171322, "grad_norm": 0.00011348724365234375, "learning_rate": 1.1189659561013957e-06, "logits/chosen": -0.031982421875, "logits/rejected": 0.314453125, "logps/chosen": -0.26953125, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 16.75, "rewards/rejected": -18.0, "step": 1288 }, { "epoch": 0.48007448789571694, "grad_norm": 0.1142578125, "learning_rate": 1.1178306408077587e-06, "logits/chosen": 0.09619140625, "logits/rejected": 0.61328125, "logps/chosen": -0.2333984375, "logps/rejected": -2.78125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1640625, "rewards/margins": 12.75, "rewards/rejected": -13.875, "step": 1289 }, { "epoch": 0.48044692737430167, "grad_norm": 0.00012683868408203125, "learning_rate": 1.116694957194553e-06, "logits/chosen": 0.12890625, "logits/rejected": 0.25390625, "logps/chosen": -0.46484375, "logps/rejected": -3.765625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.328125, "rewards/margins": 16.5, "rewards/rejected": -18.75, "step": 1290 }, { "epoch": 0.4808193668528864, "grad_norm": 1.578125, "learning_rate": 1.1155589071820522e-06, "logits/chosen": 0.11328125, "logits/rejected": 0.3984375, "logps/chosen": -0.212890625, "logps/rejected": -2.59375, "loss": 0.0013, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0703125, "rewards/margins": 11.875, "rewards/rejected": -12.9375, "step": 1291 }, { "epoch": 0.48119180633147113, "grad_norm": 0.000835418701171875, "learning_rate": 1.1144224926911504e-06, "logits/chosen": 0.036376953125, "logits/rejected": 0.1484375, "logps/chosen": -0.44140625, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.203125, "rewards/margins": 16.625, "rewards/rejected": -18.75, "step": 1292 }, { "epoch": 0.48156424581005586, "grad_norm": 5.221366882324219e-05, "learning_rate": 1.1132857156433568e-06, "logits/chosen": 0.09375, "logits/rejected": 0.1328125, "logps/chosen": -0.185546875, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.92578125, "rewards/margins": 17.25, "rewards/rejected": -18.125, "step": 1293 }, { "epoch": 0.4819366852886406, "grad_norm": 0.00909423828125, "learning_rate": 1.1121485779607948e-06, "logits/chosen": 0.07763671875, "logits/rejected": -0.27734375, "logps/chosen": -0.3203125, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 15.25, "rewards/rejected": -16.875, "step": 1294 }, { "epoch": 0.4823091247672253, "grad_norm": 9.632110595703125e-05, "learning_rate": 1.1110110815661963e-06, "logits/chosen": 0.04833984375, "logits/rejected": 0.33203125, "logps/chosen": -0.44921875, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 16.75, "rewards/rejected": -19.0, "step": 1295 }, { "epoch": 0.48268156424581005, "grad_norm": 0.0008392333984375, "learning_rate": 1.1098732283829006e-06, "logits/chosen": 0.007232666015625, "logits/rejected": 0.16015625, "logps/chosen": -0.5078125, "logps/rejected": -3.359375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.53125, "rewards/margins": 14.25, "rewards/rejected": -16.75, "step": 1296 }, { "epoch": 0.4830540037243948, "grad_norm": 0.5234375, "learning_rate": 1.1087350203348498e-06, "logits/chosen": 0.0201416015625, "logits/rejected": 0.5078125, "logps/chosen": -0.1396484375, "logps/rejected": -2.71875, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6953125, "rewards/margins": 12.875, "rewards/rejected": -13.5625, "step": 1297 }, { "epoch": 0.4834264432029795, "grad_norm": 0.00823974609375, "learning_rate": 1.1075964593465868e-06, "logits/chosen": 0.04736328125, "logits/rejected": 0.470703125, "logps/chosen": -0.16015625, "logps/rejected": -3.015625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046875, "rewards/margins": 14.25, "rewards/rejected": -15.0625, "step": 1298 }, { "epoch": 0.48379888268156424, "grad_norm": 0.01190185546875, "learning_rate": 1.1064575473432503e-06, "logits/chosen": 0.0849609375, "logits/rejected": -0.06787109375, "logps/chosen": -0.087890625, "logps/rejected": -2.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.439453125, "rewards/margins": 13.5625, "rewards/rejected": -14.0, "step": 1299 }, { "epoch": 0.48417132216014896, "grad_norm": 0.022216796875, "learning_rate": 1.1053182862505732e-06, "logits/chosen": 0.09765625, "logits/rejected": 0.66015625, "logps/chosen": -0.40625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 15.75, "rewards/rejected": -17.75, "step": 1300 }, { "epoch": 0.4845437616387337, "grad_norm": 1.8203125, "learning_rate": 1.1041786779948779e-06, "logits/chosen": 0.10302734375, "logits/rejected": -0.0439453125, "logps/chosen": -0.44140625, "logps/rejected": -2.859375, "loss": 0.0019, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.203125, "rewards/margins": 12.125, "rewards/rejected": -14.25, "step": 1301 }, { "epoch": 0.4849162011173184, "grad_norm": 0.027587890625, "learning_rate": 1.103038724503075e-06, "logits/chosen": 0.10205078125, "logits/rejected": -0.189453125, "logps/chosen": -0.29296875, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.46875, "rewards/margins": 12.875, "rewards/rejected": -14.375, "step": 1302 }, { "epoch": 0.48528864059590315, "grad_norm": 27.5, "learning_rate": 1.1018984277026578e-06, "logits/chosen": -0.353515625, "logits/rejected": -0.53515625, "logps/chosen": -0.8125, "logps/rejected": -2.09375, "loss": 0.0334, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0625, "rewards/margins": 6.40625, "rewards/rejected": -10.5, "step": 1303 }, { "epoch": 0.4856610800744879, "grad_norm": 0.447265625, "learning_rate": 1.1007577895217007e-06, "logits/chosen": 0.1435546875, "logits/rejected": -0.2470703125, "logps/chosen": -0.228515625, "logps/rejected": -2.625, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.140625, "rewards/margins": 11.9375, "rewards/rejected": -13.125, "step": 1304 }, { "epoch": 0.4860335195530726, "grad_norm": 0.130859375, "learning_rate": 1.0996168118888545e-06, "logits/chosen": -0.0390625, "logits/rejected": -0.2431640625, "logps/chosen": -0.49609375, "logps/rejected": -2.875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 11.875, "rewards/rejected": -14.3125, "step": 1305 }, { "epoch": 0.48640595903165734, "grad_norm": 0.018310546875, "learning_rate": 1.0984754967333452e-06, "logits/chosen": 0.1484375, "logits/rejected": -0.515625, "logps/chosen": -0.1953125, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.98046875, "rewards/margins": 14.875, "rewards/rejected": -15.875, "step": 1306 }, { "epoch": 0.48677839851024207, "grad_norm": 19.5, "learning_rate": 1.0973338459849684e-06, "logits/chosen": -0.1689453125, "logits/rejected": 0.32421875, "logps/chosen": -0.5703125, "logps/rejected": -2.796875, "loss": 0.033, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 11.1875, "rewards/rejected": -14.0, "step": 1307 }, { "epoch": 0.4871508379888268, "grad_norm": 0.1943359375, "learning_rate": 1.0961918615740878e-06, "logits/chosen": -0.0235595703125, "logits/rejected": 0.337890625, "logps/chosen": -0.2890625, "logps/rejected": -2.78125, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4375, "rewards/margins": 12.375, "rewards/rejected": -13.875, "step": 1308 }, { "epoch": 0.48752327746741153, "grad_norm": 0.80078125, "learning_rate": 1.0950495454316308e-06, "logits/chosen": 0.10546875, "logits/rejected": -0.9453125, "logps/chosen": -0.451171875, "logps/rejected": -2.53125, "loss": 0.0011, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 10.375, "rewards/rejected": -12.625, "step": 1309 }, { "epoch": 0.48789571694599626, "grad_norm": 16.75, "learning_rate": 1.093906899489086e-06, "logits/chosen": -0.02001953125, "logits/rejected": 0.6484375, "logps/chosen": -0.1572265625, "logps/rejected": -2.125, "loss": 0.0598, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.78515625, "rewards/margins": 9.8125, "rewards/rejected": -10.625, "step": 1310 }, { "epoch": 0.488268156424581, "grad_norm": 1.1796875, "learning_rate": 1.0927639256785001e-06, "logits/chosen": 0.1435546875, "logits/rejected": -0.10791015625, "logps/chosen": -0.1904296875, "logps/rejected": -2.578125, "loss": 0.002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.953125, "rewards/margins": 11.875, "rewards/rejected": -12.875, "step": 1311 }, { "epoch": 0.4886405959031657, "grad_norm": 0.06640625, "learning_rate": 1.0916206259324728e-06, "logits/chosen": 0.1767578125, "logits/rejected": 0.26953125, "logps/chosen": -0.361328125, "logps/rejected": -2.640625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 11.4375, "rewards/rejected": -13.25, "step": 1312 }, { "epoch": 0.48901303538175045, "grad_norm": 0.11083984375, "learning_rate": 1.0904770021841564e-06, "logits/chosen": -0.09423828125, "logits/rejected": 0.412109375, "logps/chosen": -0.48046875, "logps/rejected": -3.265625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.40625, "rewards/margins": 13.875, "rewards/rejected": -16.25, "step": 1313 }, { "epoch": 0.4893854748603352, "grad_norm": 0.0274658203125, "learning_rate": 1.0893330563672503e-06, "logits/chosen": -0.03662109375, "logits/rejected": -0.396484375, "logps/chosen": -0.1123046875, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5625, "rewards/margins": 13.125, "rewards/rejected": -13.75, "step": 1314 }, { "epoch": 0.4897579143389199, "grad_norm": 0.006683349609375, "learning_rate": 1.0881887904159986e-06, "logits/chosen": 0.06982421875, "logits/rejected": -0.65234375, "logps/chosen": -0.2001953125, "logps/rejected": -2.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0, "rewards/margins": 13.5625, "rewards/rejected": -14.5625, "step": 1315 }, { "epoch": 0.49013035381750464, "grad_norm": 0.1064453125, "learning_rate": 1.0870442062651865e-06, "logits/chosen": 0.169921875, "logits/rejected": -0.6484375, "logps/chosen": -0.62109375, "logps/rejected": -2.515625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.109375, "rewards/margins": 9.4375, "rewards/rejected": -12.5625, "step": 1316 }, { "epoch": 0.49050279329608937, "grad_norm": 0.003173828125, "learning_rate": 1.0858993058501377e-06, "logits/chosen": 0.037353515625, "logits/rejected": 0.03564453125, "logps/chosen": -0.30859375, "logps/rejected": -3.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5390625, "rewards/margins": 13.5625, "rewards/rejected": -15.125, "step": 1317 }, { "epoch": 0.4908752327746741, "grad_norm": 0.000362396240234375, "learning_rate": 1.0847540911067103e-06, "logits/chosen": 0.061279296875, "logits/rejected": 0.392578125, "logps/chosen": -0.369140625, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.84375, "rewards/margins": 16.375, "rewards/rejected": -18.25, "step": 1318 }, { "epoch": 0.4912476722532588, "grad_norm": 0.033935546875, "learning_rate": 1.0836085639712937e-06, "logits/chosen": 0.07666015625, "logits/rejected": 0.6484375, "logps/chosen": -0.345703125, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7265625, "rewards/margins": 13.25, "rewards/rejected": -15.0, "step": 1319 }, { "epoch": 0.49162011173184356, "grad_norm": 0.0003795623779296875, "learning_rate": 1.0824627263808058e-06, "logits/chosen": 0.07177734375, "logits/rejected": 0.384765625, "logps/chosen": -0.375, "logps/rejected": -3.859375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.875, "rewards/margins": 17.375, "rewards/rejected": -19.25, "step": 1320 }, { "epoch": 0.4919925512104283, "grad_norm": 2.796875, "learning_rate": 1.0813165802726901e-06, "logits/chosen": -0.07373046875, "logits/rejected": -0.2392578125, "logps/chosen": -0.380859375, "logps/rejected": -2.375, "loss": 0.0038, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8984375, "rewards/margins": 10.0, "rewards/rejected": -11.875, "step": 1321 }, { "epoch": 0.492364990689013, "grad_norm": 0.00011110305786132812, "learning_rate": 1.0801701275849101e-06, "logits/chosen": -0.007476806640625, "logits/rejected": 0.373046875, "logps/chosen": -0.283203125, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.421875, "rewards/margins": 17.25, "rewards/rejected": -18.5, "step": 1322 }, { "epoch": 0.49273743016759775, "grad_norm": 0.000316619873046875, "learning_rate": 1.0790233702559492e-06, "logits/chosen": -0.0091552734375, "logits/rejected": 0.181640625, "logps/chosen": -0.26953125, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 17.5, "rewards/rejected": -18.75, "step": 1323 }, { "epoch": 0.4931098696461825, "grad_norm": 0.0078125, "learning_rate": 1.077876310224805e-06, "logits/chosen": -0.007293701171875, "logits/rejected": -0.146484375, "logps/chosen": -0.1904296875, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.953125, "rewards/margins": 14.5, "rewards/rejected": -15.375, "step": 1324 }, { "epoch": 0.4934823091247672, "grad_norm": 0.00982666015625, "learning_rate": 1.0767289494309878e-06, "logits/chosen": 0.123046875, "logits/rejected": 0.482421875, "logps/chosen": -0.6171875, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.09375, "rewards/margins": 13.875, "rewards/rejected": -17.0, "step": 1325 }, { "epoch": 0.49385474860335193, "grad_norm": 0.001312255859375, "learning_rate": 1.0755812898145155e-06, "logits/chosen": -0.1533203125, "logits/rejected": 0.11865234375, "logps/chosen": -0.9609375, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.8125, "rewards/margins": 14.1875, "rewards/rejected": -19.0, "step": 1326 }, { "epoch": 0.49422718808193666, "grad_norm": 0.0045166015625, "learning_rate": 1.0744333333159117e-06, "logits/chosen": 0.130859375, "logits/rejected": 0.279296875, "logps/chosen": -0.2451171875, "logps/rejected": -3.515625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2265625, "rewards/margins": 16.375, "rewards/rejected": -17.5, "step": 1327 }, { "epoch": 0.4945996275605214, "grad_norm": 0.0003452301025390625, "learning_rate": 1.0732850818762022e-06, "logits/chosen": -0.08837890625, "logits/rejected": 0.02734375, "logps/chosen": -0.45703125, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.296875, "rewards/margins": 15.5, "rewards/rejected": -17.75, "step": 1328 }, { "epoch": 0.4949720670391061, "grad_norm": 1.5859375, "learning_rate": 1.0721365374369108e-06, "logits/chosen": -0.2490234375, "logits/rejected": 0.1982421875, "logps/chosen": -0.546875, "logps/rejected": -2.421875, "loss": 0.0025, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.734375, "rewards/margins": 9.375, "rewards/rejected": -12.125, "step": 1329 }, { "epoch": 0.49534450651769085, "grad_norm": 0.00139617919921875, "learning_rate": 1.0709877019400577e-06, "logits/chosen": -0.126953125, "logits/rejected": 0.353515625, "logps/chosen": -0.236328125, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1796875, "rewards/margins": 13.875, "rewards/rejected": -15.0, "step": 1330 }, { "epoch": 0.4957169459962756, "grad_norm": 4.125, "learning_rate": 1.0698385773281543e-06, "logits/chosen": -0.04638671875, "logits/rejected": 0.61328125, "logps/chosen": -0.33984375, "logps/rejected": -2.359375, "loss": 0.0082, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6953125, "rewards/margins": 10.125, "rewards/rejected": -11.875, "step": 1331 }, { "epoch": 0.4960893854748603, "grad_norm": 0.1318359375, "learning_rate": 1.068689165544202e-06, "logits/chosen": 0.091796875, "logits/rejected": -0.50390625, "logps/chosen": -0.298828125, "logps/rejected": -2.75, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4921875, "rewards/margins": 12.25, "rewards/rejected": -13.75, "step": 1332 }, { "epoch": 0.49646182495344504, "grad_norm": 0.00140380859375, "learning_rate": 1.0675394685316861e-06, "logits/chosen": 0.0810546875, "logits/rejected": 0.10693359375, "logps/chosen": -0.23046875, "logps/rejected": -3.359375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.15625, "rewards/margins": 15.625, "rewards/rejected": -16.75, "step": 1333 }, { "epoch": 0.49683426443202977, "grad_norm": 0.1513671875, "learning_rate": 1.0663894882345757e-06, "logits/chosen": 0.0272216796875, "logits/rejected": -0.21484375, "logps/chosen": -0.265625, "logps/rejected": -2.796875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 12.625, "rewards/rejected": -14.0, "step": 1334 }, { "epoch": 0.4972067039106145, "grad_norm": 0.0002956390380859375, "learning_rate": 1.0652392265973179e-06, "logits/chosen": -0.05859375, "logits/rejected": 0.10498046875, "logps/chosen": -0.263671875, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125, "rewards/margins": 15.5, "rewards/rejected": -16.75, "step": 1335 }, { "epoch": 0.49757914338919923, "grad_norm": 0.026123046875, "learning_rate": 1.0640886855648366e-06, "logits/chosen": -0.1376953125, "logits/rejected": 0.546875, "logps/chosen": -0.216796875, "logps/rejected": -2.828125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0859375, "rewards/margins": 13.0625, "rewards/rejected": -14.125, "step": 1336 }, { "epoch": 0.49795158286778396, "grad_norm": 0.000659942626953125, "learning_rate": 1.0629378670825267e-06, "logits/chosen": 0.25390625, "logits/rejected": 0.140625, "logps/chosen": -0.4609375, "logps/rejected": -3.703125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.296875, "rewards/margins": 16.25, "rewards/rejected": -18.5, "step": 1337 }, { "epoch": 0.4983240223463687, "grad_norm": 0.00102996826171875, "learning_rate": 1.0617867730962531e-06, "logits/chosen": 0.0869140625, "logits/rejected": 0.150390625, "logps/chosen": -0.609375, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.046875, "rewards/margins": 14.125, "rewards/rejected": -17.25, "step": 1338 }, { "epoch": 0.4986964618249534, "grad_norm": 0.0002918243408203125, "learning_rate": 1.0606354055523466e-06, "logits/chosen": 0.1328125, "logits/rejected": 0.375, "logps/chosen": -0.56640625, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 17.5, "rewards/rejected": -20.375, "step": 1339 }, { "epoch": 0.49906890130353815, "grad_norm": 0.000568389892578125, "learning_rate": 1.0594837663976006e-06, "logits/chosen": 0.01025390625, "logits/rejected": 0.271484375, "logps/chosen": -0.2734375, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.359375, "rewards/margins": 16.125, "rewards/rejected": -17.5, "step": 1340 }, { "epoch": 0.4994413407821229, "grad_norm": 0.002105712890625, "learning_rate": 1.058331857579267e-06, "logits/chosen": 0.07177734375, "logits/rejected": 0.07861328125, "logps/chosen": -0.859375, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.3125, "rewards/margins": 13.75, "rewards/rejected": -18.0, "step": 1341 }, { "epoch": 0.4998137802607076, "grad_norm": 0.000804901123046875, "learning_rate": 1.0571796810450542e-06, "logits/chosen": -0.173828125, "logits/rejected": 0.32421875, "logps/chosen": -0.6796875, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.390625, "rewards/margins": 14.5625, "rewards/rejected": -18.0, "step": 1342 }, { "epoch": 0.5001862197392923, "grad_norm": 0.0019989013671875, "learning_rate": 1.0560272387431234e-06, "logits/chosen": -0.041748046875, "logits/rejected": 0.10302734375, "logps/chosen": -0.25390625, "logps/rejected": -3.171875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 14.625, "rewards/rejected": -15.875, "step": 1343 }, { "epoch": 0.5005586592178771, "grad_norm": 0.0108642578125, "learning_rate": 1.0548745326220851e-06, "logits/chosen": 0.0771484375, "logits/rejected": 0.1318359375, "logps/chosen": -0.59765625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.984375, "rewards/margins": 14.875, "rewards/rejected": -17.875, "step": 1344 }, { "epoch": 0.5009310986964618, "grad_norm": 0.00640869140625, "learning_rate": 1.0537215646309956e-06, "logits/chosen": 0.07421875, "logits/rejected": 0.470703125, "logps/chosen": -0.4609375, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3125, "rewards/margins": 14.625, "rewards/rejected": -16.875, "step": 1345 }, { "epoch": 0.5013035381750466, "grad_norm": 0.018310546875, "learning_rate": 1.052568336719354e-06, "logits/chosen": 0.01007080078125, "logits/rejected": -0.443359375, "logps/chosen": -0.466796875, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34375, "rewards/margins": 12.625, "rewards/rejected": -15.0, "step": 1346 }, { "epoch": 0.5016759776536313, "grad_norm": 0.048828125, "learning_rate": 1.0514148508370995e-06, "logits/chosen": -0.12451171875, "logits/rejected": 0.6328125, "logps/chosen": -0.67578125, "logps/rejected": -3.21875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.375, "rewards/margins": 12.6875, "rewards/rejected": -16.0, "step": 1347 }, { "epoch": 0.502048417132216, "grad_norm": 0.000274658203125, "learning_rate": 1.0502611089346065e-06, "logits/chosen": -0.040283203125, "logits/rejected": 0.54296875, "logps/chosen": -0.09375, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.46875, "rewards/margins": 15.5, "rewards/rejected": -16.0, "step": 1348 }, { "epoch": 0.5024208566108007, "grad_norm": 0.0054931640625, "learning_rate": 1.0491071129626834e-06, "logits/chosen": -0.00543212890625, "logits/rejected": 0.251953125, "logps/chosen": -0.33984375, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6953125, "rewards/margins": 16.0, "rewards/rejected": -17.75, "step": 1349 }, { "epoch": 0.5027932960893855, "grad_norm": 0.0011749267578125, "learning_rate": 1.0479528648725673e-06, "logits/chosen": 0.130859375, "logits/rejected": 0.470703125, "logps/chosen": -0.326171875, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.625, "rewards/margins": 14.5, "rewards/rejected": -16.25, "step": 1350 }, { "epoch": 0.5031657355679702, "grad_norm": 0.000274658203125, "learning_rate": 1.0467983666159218e-06, "logits/chosen": 0.1640625, "logits/rejected": 0.1279296875, "logps/chosen": -0.2578125, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2890625, "rewards/margins": 16.125, "rewards/rejected": -17.5, "step": 1351 }, { "epoch": 0.503538175046555, "grad_norm": 0.314453125, "learning_rate": 1.0456436201448336e-06, "logits/chosen": 0.142578125, "logits/rejected": -0.007598876953125, "logps/chosen": -0.43359375, "logps/rejected": -2.9375, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.171875, "rewards/margins": 12.5625, "rewards/rejected": -14.75, "step": 1352 }, { "epoch": 0.5039106145251396, "grad_norm": 2.765625, "learning_rate": 1.0444886274118097e-06, "logits/chosen": -0.05517578125, "logits/rejected": 0.076171875, "logps/chosen": -0.462890625, "logps/rejected": -2.703125, "loss": 0.0059, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3125, "rewards/margins": 11.1875, "rewards/rejected": -13.5, "step": 1353 }, { "epoch": 0.5042830540037244, "grad_norm": 0.9609375, "learning_rate": 1.043333390369772e-06, "logits/chosen": -0.07958984375, "logits/rejected": -1.0859375, "logps/chosen": -0.087890625, "logps/rejected": -1.75, "loss": 0.0021, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.44140625, "rewards/margins": 8.3125, "rewards/rejected": -8.75, "step": 1354 }, { "epoch": 0.5046554934823091, "grad_norm": 0.000606536865234375, "learning_rate": 1.0421779109720574e-06, "logits/chosen": -0.051025390625, "logits/rejected": 0.287109375, "logps/chosen": -0.2373046875, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1875, "rewards/margins": 16.5, "rewards/rejected": -17.75, "step": 1355 }, { "epoch": 0.5050279329608939, "grad_norm": 0.734375, "learning_rate": 1.0410221911724106e-06, "logits/chosen": -0.000865936279296875, "logits/rejected": 0.37109375, "logps/chosen": -0.291015625, "logps/rejected": -2.8125, "loss": 0.0013, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.453125, "rewards/margins": 12.625, "rewards/rejected": -14.125, "step": 1356 }, { "epoch": 0.5054003724394786, "grad_norm": 0.054931640625, "learning_rate": 1.0398662329249844e-06, "logits/chosen": -0.0091552734375, "logits/rejected": 0.2353515625, "logps/chosen": -0.29296875, "logps/rejected": -2.90625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.46875, "rewards/margins": 13.125, "rewards/rejected": -14.5625, "step": 1357 }, { "epoch": 0.5057728119180633, "grad_norm": 0.0010223388671875, "learning_rate": 1.038710038184334e-06, "logits/chosen": 0.30078125, "logits/rejected": 0.27734375, "logps/chosen": -0.6328125, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 14.5, "rewards/rejected": -17.75, "step": 1358 }, { "epoch": 0.506145251396648, "grad_norm": 0.0001678466796875, "learning_rate": 1.0375536089054148e-06, "logits/chosen": 0.039794921875, "logits/rejected": 0.326171875, "logps/chosen": -0.181640625, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.90625, "rewards/margins": 16.375, "rewards/rejected": -17.25, "step": 1359 }, { "epoch": 0.5065176908752328, "grad_norm": 0.002655029296875, "learning_rate": 1.0363969470435777e-06, "logits/chosen": -0.061279296875, "logits/rejected": -0.1416015625, "logps/chosen": -0.3828125, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.921875, "rewards/margins": 15.0625, "rewards/rejected": -17.0, "step": 1360 }, { "epoch": 0.5068901303538175, "grad_norm": 0.06640625, "learning_rate": 1.0352400545545689e-06, "logits/chosen": -0.1533203125, "logits/rejected": 0.265625, "logps/chosen": -0.5703125, "logps/rejected": -3.109375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 12.75, "rewards/rejected": -15.5625, "step": 1361 }, { "epoch": 0.5072625698324023, "grad_norm": 3.828125, "learning_rate": 1.0340829333945228e-06, "logits/chosen": 0.158203125, "logits/rejected": -0.171875, "logps/chosen": -0.197265625, "logps/rejected": -2.34375, "loss": 0.0088, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.984375, "rewards/margins": 10.6875, "rewards/rejected": -11.6875, "step": 1362 }, { "epoch": 0.5076350093109869, "grad_norm": 0.0003204345703125, "learning_rate": 1.0329255855199612e-06, "logits/chosen": 0.0458984375, "logits/rejected": 0.546875, "logps/chosen": -0.1669921875, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8359375, "rewards/margins": 15.9375, "rewards/rejected": -16.75, "step": 1363 }, { "epoch": 0.5080074487895717, "grad_norm": 3.921875, "learning_rate": 1.031768012887789e-06, "logits/chosen": 0.17578125, "logits/rejected": -0.7421875, "logps/chosen": -0.48046875, "logps/rejected": -2.34375, "loss": 0.0055, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.40625, "rewards/margins": 9.25, "rewards/rejected": -11.625, "step": 1364 }, { "epoch": 0.5083798882681564, "grad_norm": 0.197265625, "learning_rate": 1.0306102174552915e-06, "logits/chosen": 0.04150390625, "logits/rejected": 0.48046875, "logps/chosen": -0.0703125, "logps/rejected": -2.625, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3515625, "rewards/margins": 12.75, "rewards/rejected": -13.0625, "step": 1365 }, { "epoch": 0.5087523277467412, "grad_norm": 0.796875, "learning_rate": 1.0294522011801304e-06, "logits/chosen": 0.0076904296875, "logits/rejected": 0.1298828125, "logps/chosen": -0.484375, "logps/rejected": -2.90625, "loss": 0.0013, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 12.125, "rewards/rejected": -14.5, "step": 1366 }, { "epoch": 0.5091247672253258, "grad_norm": 0.000530242919921875, "learning_rate": 1.0282939660203403e-06, "logits/chosen": 0.06689453125, "logits/rejected": -0.051025390625, "logps/chosen": -0.310546875, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5546875, "rewards/margins": 14.75, "rewards/rejected": -16.25, "step": 1367 }, { "epoch": 0.5094972067039106, "grad_norm": 0.0001506805419921875, "learning_rate": 1.0271355139343272e-06, "logits/chosen": 0.10400390625, "logits/rejected": 0.6171875, "logps/chosen": -0.27734375, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3828125, "rewards/margins": 16.75, "rewards/rejected": -18.0, "step": 1368 }, { "epoch": 0.5098696461824953, "grad_norm": 0.0009918212890625, "learning_rate": 1.0259768468808627e-06, "logits/chosen": 0.018798828125, "logits/rejected": 0.400390625, "logps/chosen": -0.671875, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.375, "rewards/margins": 15.1875, "rewards/rejected": -18.5, "step": 1369 }, { "epoch": 0.5102420856610801, "grad_norm": 0.00014495849609375, "learning_rate": 1.0248179668190826e-06, "logits/chosen": 0.1572265625, "logits/rejected": 0.271484375, "logps/chosen": -0.37890625, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 16.5, "rewards/rejected": -18.5, "step": 1370 }, { "epoch": 0.5106145251396648, "grad_norm": 5.5, "learning_rate": 1.0236588757084825e-06, "logits/chosen": 0.0419921875, "logits/rejected": 0.7890625, "logps/chosen": -0.7421875, "logps/rejected": -2.40625, "loss": 0.0135, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.71875, "rewards/margins": 8.3125, "rewards/rejected": -12.0625, "step": 1371 }, { "epoch": 0.5109869646182496, "grad_norm": 0.0302734375, "learning_rate": 1.022499575508915e-06, "logits/chosen": -0.048583984375, "logits/rejected": -0.1728515625, "logps/chosen": -0.46875, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34375, "rewards/margins": 13.6875, "rewards/rejected": -16.0, "step": 1372 }, { "epoch": 0.5113594040968342, "grad_norm": 0.0019989013671875, "learning_rate": 1.0213400681805859e-06, "logits/chosen": 0.0322265625, "logits/rejected": 0.369140625, "logps/chosen": -0.35546875, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7734375, "rewards/margins": 16.0, "rewards/rejected": -17.875, "step": 1373 }, { "epoch": 0.511731843575419, "grad_norm": 0.00013828277587890625, "learning_rate": 1.0201803556840521e-06, "logits/chosen": 0.10693359375, "logits/rejected": 0.408203125, "logps/chosen": -0.4453125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 16.25, "rewards/rejected": -18.5, "step": 1374 }, { "epoch": 0.5121042830540037, "grad_norm": 3.375, "learning_rate": 1.0190204399802164e-06, "logits/chosen": 0.0654296875, "logits/rejected": 0.421875, "logps/chosen": -0.34375, "logps/rejected": -2.6875, "loss": 0.0059, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.71875, "rewards/margins": 11.75, "rewards/rejected": -13.4375, "step": 1375 }, { "epoch": 0.5124767225325885, "grad_norm": 0.271484375, "learning_rate": 1.0178603230303257e-06, "logits/chosen": 0.047607421875, "logits/rejected": -0.890625, "logps/chosen": -0.234375, "logps/rejected": -2.03125, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.171875, "rewards/margins": 9.0, "rewards/rejected": -10.125, "step": 1376 }, { "epoch": 0.5128491620111731, "grad_norm": 0.00054931640625, "learning_rate": 1.016700006795967e-06, "logits/chosen": 0.08935546875, "logits/rejected": 0.251953125, "logps/chosen": -0.32421875, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 16.125, "rewards/rejected": -17.75, "step": 1377 }, { "epoch": 0.5132216014897579, "grad_norm": 0.466796875, "learning_rate": 1.0155394932390645e-06, "logits/chosen": -0.023681640625, "logits/rejected": 0.11962890625, "logps/chosen": -0.20703125, "logps/rejected": -2.75, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.03125, "rewards/margins": 12.75, "rewards/rejected": -13.75, "step": 1378 }, { "epoch": 0.5135940409683426, "grad_norm": 0.000659942626953125, "learning_rate": 1.014378784321876e-06, "logits/chosen": -0.0306396484375, "logits/rejected": 0.248046875, "logps/chosen": -0.287109375, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4375, "rewards/margins": 16.25, "rewards/rejected": -17.75, "step": 1379 }, { "epoch": 0.5139664804469274, "grad_norm": 0.003936767578125, "learning_rate": 1.0132178820069892e-06, "logits/chosen": -0.01202392578125, "logits/rejected": 0.291015625, "logps/chosen": -0.57421875, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 14.8125, "rewards/rejected": -17.75, "step": 1380 }, { "epoch": 0.5143389199255121, "grad_norm": 0.020751953125, "learning_rate": 1.0120567882573195e-06, "logits/chosen": 0.07861328125, "logits/rejected": -0.33984375, "logps/chosen": -0.12353515625, "logps/rejected": -2.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6171875, "rewards/margins": 13.25, "rewards/rejected": -13.875, "step": 1381 }, { "epoch": 0.5147113594040968, "grad_norm": 0.0002651214599609375, "learning_rate": 1.0108955050361054e-06, "logits/chosen": 0.134765625, "logits/rejected": 0.2470703125, "logps/chosen": -0.16015625, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046875, "rewards/margins": 16.375, "rewards/rejected": -17.25, "step": 1382 }, { "epoch": 0.5150837988826815, "grad_norm": 4.4375, "learning_rate": 1.0097340343069061e-06, "logits/chosen": 0.2197265625, "logits/rejected": -1.3515625, "logps/chosen": -0.486328125, "logps/rejected": -1.828125, "loss": 0.0062, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 6.6875, "rewards/rejected": -9.125, "step": 1383 }, { "epoch": 0.5154562383612663, "grad_norm": 0.0024566650390625, "learning_rate": 1.0085723780335977e-06, "logits/chosen": 0.07568359375, "logits/rejected": 0.357421875, "logps/chosen": -0.4140625, "logps/rejected": -3.484375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 15.3125, "rewards/rejected": -17.375, "step": 1384 }, { "epoch": 0.515828677839851, "grad_norm": 0.000972747802734375, "learning_rate": 1.00741053818037e-06, "logits/chosen": 0.1650390625, "logits/rejected": 0.216796875, "logps/chosen": -0.353515625, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.765625, "rewards/margins": 15.625, "rewards/rejected": -17.375, "step": 1385 }, { "epoch": 0.5162011173184358, "grad_norm": 0.000873565673828125, "learning_rate": 1.0062485167117233e-06, "logits/chosen": 0.031982421875, "logits/rejected": 0.05615234375, "logps/chosen": -0.28515625, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4296875, "rewards/margins": 15.6875, "rewards/rejected": -17.125, "step": 1386 }, { "epoch": 0.5165735567970204, "grad_norm": 296.0, "learning_rate": 1.0050863155924652e-06, "logits/chosen": 0.08447265625, "logits/rejected": 0.265625, "logps/chosen": -0.5859375, "logps/rejected": -2.140625, "loss": 1.1328, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.9375, "rewards/margins": 7.75, "rewards/rejected": -10.6875, "step": 1387 }, { "epoch": 0.5169459962756052, "grad_norm": 5.0067901611328125e-05, "learning_rate": 1.0039239367877064e-06, "logits/chosen": 0.16796875, "logits/rejected": 0.275390625, "logps/chosen": -0.298828125, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4921875, "rewards/margins": 18.0, "rewards/rejected": -19.5, "step": 1388 }, { "epoch": 0.5173184357541899, "grad_norm": 0.00616455078125, "learning_rate": 1.0027613822628587e-06, "logits/chosen": 0.08837890625, "logits/rejected": -0.1552734375, "logps/chosen": -0.271484375, "logps/rejected": -3.515625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.359375, "rewards/margins": 16.25, "rewards/rejected": -17.5, "step": 1389 }, { "epoch": 0.5176908752327747, "grad_norm": 0.00048065185546875, "learning_rate": 1.0015986539836305e-06, "logits/chosen": 0.09423828125, "logits/rejected": 0.25390625, "logps/chosen": -0.330078125, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 15.4375, "rewards/rejected": -17.0, "step": 1390 }, { "epoch": 0.5180633147113594, "grad_norm": 4.9173831939697266e-06, "learning_rate": 1.000435753916025e-06, "logits/chosen": 0.072265625, "logits/rejected": 0.26953125, "logps/chosen": -0.2265625, "logps/rejected": -4.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1328125, "rewards/margins": 19.75, "rewards/rejected": -21.0, "step": 1391 }, { "epoch": 0.5184357541899441, "grad_norm": 123.5, "learning_rate": 9.992726840263344e-07, "logits/chosen": -0.0252685546875, "logits/rejected": -0.5546875, "logps/chosen": -0.5625, "logps/rejected": -1.359375, "loss": 0.3516, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8125, "rewards/margins": 4.0, "rewards/rejected": -6.8125, "step": 1392 }, { "epoch": 0.5188081936685288, "grad_norm": 0.002532958984375, "learning_rate": 9.981094462811391e-07, "logits/chosen": 0.158203125, "logits/rejected": -0.10302734375, "logps/chosen": -0.62890625, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 14.9375, "rewards/rejected": -18.125, "step": 1393 }, { "epoch": 0.5191806331471136, "grad_norm": 0.0010223388671875, "learning_rate": 9.96946042647303e-07, "logits/chosen": 0.061767578125, "logits/rejected": 0.0194091796875, "logps/chosen": -0.248046875, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.234375, "rewards/margins": 16.625, "rewards/rejected": -17.875, "step": 1394 }, { "epoch": 0.5195530726256983, "grad_norm": 0.07763671875, "learning_rate": 9.957824750919705e-07, "logits/chosen": -0.00872802734375, "logits/rejected": 0.30078125, "logps/chosen": -0.359375, "logps/rejected": -3.28125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7890625, "rewards/margins": 14.625, "rewards/rejected": -16.375, "step": 1395 }, { "epoch": 0.5199255121042831, "grad_norm": 0.0001163482666015625, "learning_rate": 9.94618745582563e-07, "logits/chosen": 0.1171875, "logits/rejected": 0.5546875, "logps/chosen": -0.29296875, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.46875, "rewards/margins": 16.5, "rewards/rejected": -18.0, "step": 1396 }, { "epoch": 0.5202979515828677, "grad_norm": 4.09375, "learning_rate": 9.934548560867762e-07, "logits/chosen": 0.07421875, "logits/rejected": 0.7734375, "logps/chosen": -0.94921875, "logps/rejected": -2.984375, "loss": 0.0041, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.75, "rewards/margins": 10.1875, "rewards/rejected": -14.9375, "step": 1397 }, { "epoch": 0.5206703910614525, "grad_norm": 0.00506591796875, "learning_rate": 9.922908085725758e-07, "logits/chosen": 0.16796875, "logits/rejected": -0.365234375, "logps/chosen": -0.263671875, "logps/rejected": -2.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3203125, "rewards/margins": 12.625, "rewards/rejected": -14.0, "step": 1398 }, { "epoch": 0.5210428305400372, "grad_norm": 6.29425048828125e-05, "learning_rate": 9.911266050081948e-07, "logits/chosen": 0.2197265625, "logits/rejected": 0.3046875, "logps/chosen": -0.2451171875, "logps/rejected": -3.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2265625, "rewards/margins": 17.75, "rewards/rejected": -19.0, "step": 1399 }, { "epoch": 0.521415270018622, "grad_norm": 0.002777099609375, "learning_rate": 9.899622473621303e-07, "logits/chosen": -0.123046875, "logits/rejected": 0.04296875, "logps/chosen": -0.1669921875, "logps/rejected": -2.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8359375, "rewards/margins": 13.75, "rewards/rejected": -14.5625, "step": 1400 }, { "epoch": 0.5217877094972067, "grad_norm": 0.1484375, "learning_rate": 9.887977376031397e-07, "logits/chosen": 0.166015625, "logits/rejected": 0.62890625, "logps/chosen": -0.1455078125, "logps/rejected": -3.0625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7265625, "rewards/margins": 14.625, "rewards/rejected": -15.3125, "step": 1401 }, { "epoch": 0.5221601489757914, "grad_norm": 1.2890625, "learning_rate": 9.876330777002382e-07, "logits/chosen": -0.012451171875, "logits/rejected": 0.7109375, "logps/chosen": -0.43359375, "logps/rejected": -1.9453125, "loss": 0.0021, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.171875, "rewards/margins": 7.5625, "rewards/rejected": -9.75, "step": 1402 }, { "epoch": 0.5225325884543761, "grad_norm": 0.00531005859375, "learning_rate": 9.864682696226937e-07, "logits/chosen": 0.2080078125, "logits/rejected": 0.0284423828125, "logps/chosen": -0.51171875, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5625, "rewards/margins": 13.6875, "rewards/rejected": -16.25, "step": 1403 }, { "epoch": 0.5229050279329609, "grad_norm": 0.000209808349609375, "learning_rate": 9.853033153400255e-07, "logits/chosen": 0.162109375, "logits/rejected": 0.3203125, "logps/chosen": -0.216796875, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0859375, "rewards/margins": 16.75, "rewards/rejected": -17.75, "step": 1404 }, { "epoch": 0.5232774674115456, "grad_norm": 0.0009307861328125, "learning_rate": 9.84138216822e-07, "logits/chosen": -0.08935546875, "logits/rejected": 0.302734375, "logps/chosen": -0.484375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.421875, "rewards/margins": 15.875, "rewards/rejected": -18.25, "step": 1405 }, { "epoch": 0.5236499068901304, "grad_norm": 0.00017833709716796875, "learning_rate": 9.829729760386276e-07, "logits/chosen": 0.15234375, "logits/rejected": 0.23828125, "logps/chosen": -0.41796875, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.09375, "rewards/margins": 17.0, "rewards/rejected": -19.0, "step": 1406 }, { "epoch": 0.524022346368715, "grad_norm": 0.0001544952392578125, "learning_rate": 9.818075949601587e-07, "logits/chosen": 0.050048828125, "logits/rejected": 0.4765625, "logps/chosen": -0.234375, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.171875, "rewards/margins": 16.25, "rewards/rejected": -17.25, "step": 1407 }, { "epoch": 0.5243947858472998, "grad_norm": 6.29425048828125e-05, "learning_rate": 9.806420755570813e-07, "logits/chosen": -0.04736328125, "logits/rejected": 0.25390625, "logps/chosen": -0.12109375, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.60546875, "rewards/margins": 17.25, "rewards/rejected": -18.0, "step": 1408 }, { "epoch": 0.5247672253258845, "grad_norm": 14.625, "learning_rate": 9.794764198001173e-07, "logits/chosen": -0.29296875, "logits/rejected": -0.1533203125, "logps/chosen": -0.498046875, "logps/rejected": -2.40625, "loss": 0.0168, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5, "rewards/margins": 9.5, "rewards/rejected": -12.0, "step": 1409 }, { "epoch": 0.5251396648044693, "grad_norm": 0.00119781494140625, "learning_rate": 9.783106296602193e-07, "logits/chosen": -0.35546875, "logits/rejected": -0.11328125, "logps/chosen": -0.265625, "logps/rejected": -3.421875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 15.75, "rewards/rejected": -17.125, "step": 1410 }, { "epoch": 0.525512104283054, "grad_norm": 0.0004787445068359375, "learning_rate": 9.771447071085665e-07, "logits/chosen": 0.1279296875, "logits/rejected": 0.291015625, "logps/chosen": -0.208984375, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 15.8125, "rewards/rejected": -16.875, "step": 1411 }, { "epoch": 0.5258845437616387, "grad_norm": 0.00015163421630859375, "learning_rate": 9.75978654116563e-07, "logits/chosen": 0.0546875, "logits/rejected": 0.328125, "logps/chosen": -0.228515625, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.140625, "rewards/margins": 17.0, "rewards/rejected": -18.0, "step": 1412 }, { "epoch": 0.5262569832402234, "grad_norm": 0.0084228515625, "learning_rate": 9.748124726558325e-07, "logits/chosen": 0.1982421875, "logits/rejected": 0.10546875, "logps/chosen": -0.59765625, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.984375, "rewards/margins": 13.25, "rewards/rejected": -16.25, "step": 1413 }, { "epoch": 0.5266294227188082, "grad_norm": 21.5, "learning_rate": 9.73646164698216e-07, "logits/chosen": -0.043701171875, "logits/rejected": 0.96484375, "logps/chosen": -0.4765625, "logps/rejected": -1.765625, "loss": 0.0344, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 6.46875, "rewards/rejected": -8.875, "step": 1414 }, { "epoch": 0.527001862197393, "grad_norm": 0.00015926361083984375, "learning_rate": 9.724797322157695e-07, "logits/chosen": 0.11865234375, "logits/rejected": 0.375, "logps/chosen": -0.30859375, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.546875, "rewards/margins": 16.25, "rewards/rejected": -17.75, "step": 1415 }, { "epoch": 0.5273743016759777, "grad_norm": 20.0, "learning_rate": 9.713131771807583e-07, "logits/chosen": 0.057861328125, "logits/rejected": 0.30078125, "logps/chosen": -0.88671875, "logps/rejected": -2.890625, "loss": 0.0247, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4375, "rewards/margins": 10.0, "rewards/rejected": -14.5, "step": 1416 }, { "epoch": 0.5277467411545624, "grad_norm": 0.000339508056640625, "learning_rate": 9.70146501565655e-07, "logits/chosen": 0.1767578125, "logits/rejected": 0.4453125, "logps/chosen": -0.29296875, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4609375, "rewards/margins": 16.125, "rewards/rejected": -17.625, "step": 1417 }, { "epoch": 0.5281191806331471, "grad_norm": 0.31640625, "learning_rate": 9.689797073431368e-07, "logits/chosen": 0.09033203125, "logits/rejected": -0.287109375, "logps/chosen": -0.26953125, "logps/rejected": -2.875, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.359375, "rewards/margins": 13.0625, "rewards/rejected": -14.4375, "step": 1418 }, { "epoch": 0.5284916201117319, "grad_norm": 3.266334533691406e-05, "learning_rate": 9.678127964860812e-07, "logits/chosen": 0.2060546875, "logits/rejected": 0.33984375, "logps/chosen": -0.140625, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.703125, "rewards/margins": 18.5, "rewards/rejected": -19.125, "step": 1419 }, { "epoch": 0.5288640595903166, "grad_norm": 3.890625, "learning_rate": 9.666457709675626e-07, "logits/chosen": -0.0302734375, "logits/rejected": 0.466796875, "logps/chosen": -0.259765625, "logps/rejected": -2.59375, "loss": 0.0055, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.296875, "rewards/margins": 11.625, "rewards/rejected": -12.9375, "step": 1420 }, { "epoch": 0.5292364990689014, "grad_norm": 7.40625, "learning_rate": 9.654786327608496e-07, "logits/chosen": 0.12060546875, "logits/rejected": -0.052734375, "logps/chosen": -0.45703125, "logps/rejected": -2.515625, "loss": 0.0113, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.296875, "rewards/margins": 10.25, "rewards/rejected": -12.5, "step": 1421 }, { "epoch": 0.529608938547486, "grad_norm": 0.51171875, "learning_rate": 9.643113838394009e-07, "logits/chosen": 0.10205078125, "logits/rejected": -0.51953125, "logps/chosen": -0.6171875, "logps/rejected": -2.546875, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.078125, "rewards/margins": 9.625, "rewards/rejected": -12.6875, "step": 1422 }, { "epoch": 0.5299813780260708, "grad_norm": 0.062255859375, "learning_rate": 9.631440261768628e-07, "logits/chosen": 0.03369140625, "logits/rejected": -0.9921875, "logps/chosen": -0.240234375, "logps/rejected": -2.375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.203125, "rewards/margins": 10.6875, "rewards/rejected": -11.875, "step": 1423 }, { "epoch": 0.5303538175046555, "grad_norm": 0.00029754638671875, "learning_rate": 9.619765617470658e-07, "logits/chosen": 0.083984375, "logits/rejected": 0.453125, "logps/chosen": -0.310546875, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5546875, "rewards/margins": 17.25, "rewards/rejected": -18.875, "step": 1424 }, { "epoch": 0.5307262569832403, "grad_norm": 1.424551010131836e-05, "learning_rate": 9.608089925240205e-07, "logits/chosen": 0.10791015625, "logits/rejected": 0.234375, "logps/chosen": -0.3203125, "logps/rejected": -4.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6015625, "rewards/margins": 19.0, "rewards/rejected": -20.5, "step": 1425 }, { "epoch": 0.531098696461825, "grad_norm": 0.0034332275390625, "learning_rate": 9.596413204819137e-07, "logits/chosen": 0.01116943359375, "logits/rejected": 0.17578125, "logps/chosen": -0.265625, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 12.875, "rewards/rejected": -14.25, "step": 1426 }, { "epoch": 0.5314711359404097, "grad_norm": 0.003082275390625, "learning_rate": 9.584735475951083e-07, "logits/chosen": 0.15625, "logits/rejected": 0.45703125, "logps/chosen": -0.375, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8828125, "rewards/margins": 15.9375, "rewards/rejected": -17.75, "step": 1427 }, { "epoch": 0.5318435754189944, "grad_norm": 0.00262451171875, "learning_rate": 9.573056758381356e-07, "logits/chosen": 0.2421875, "logits/rejected": 0.1396484375, "logps/chosen": -0.75, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.75, "rewards/margins": 13.5, "rewards/rejected": -17.25, "step": 1428 }, { "epoch": 0.5322160148975792, "grad_norm": 0.00885009765625, "learning_rate": 9.561377071856954e-07, "logits/chosen": 0.119140625, "logits/rejected": 0.28515625, "logps/chosen": -0.275390625, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.375, "rewards/margins": 14.375, "rewards/rejected": -15.75, "step": 1429 }, { "epoch": 0.5325884543761639, "grad_norm": 0.016845703125, "learning_rate": 9.54969643612651e-07, "logits/chosen": -0.03955078125, "logits/rejected": -0.1953125, "logps/chosen": -0.45703125, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.28125, "rewards/margins": 13.5, "rewards/rejected": -15.8125, "step": 1430 }, { "epoch": 0.5329608938547487, "grad_norm": 0.60546875, "learning_rate": 9.538014870940254e-07, "logits/chosen": 0.23828125, "logits/rejected": -0.55859375, "logps/chosen": -0.37890625, "logps/rejected": -2.84375, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 12.3125, "rewards/rejected": -14.1875, "step": 1431 }, { "epoch": 0.5333333333333333, "grad_norm": 0.005828857421875, "learning_rate": 9.52633239605e-07, "logits/chosen": 0.05615234375, "logits/rejected": 0.318359375, "logps/chosen": -0.8203125, "logps/rejected": -3.890625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.125, "rewards/margins": 15.3125, "rewards/rejected": -19.5, "step": 1432 }, { "epoch": 0.5337057728119181, "grad_norm": 4.76837158203125e-05, "learning_rate": 9.51464903120909e-07, "logits/chosen": 0.142578125, "logits/rejected": 0.478515625, "logps/chosen": -0.1328125, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6640625, "rewards/margins": 18.25, "rewards/rejected": -18.875, "step": 1433 }, { "epoch": 0.5340782122905028, "grad_norm": 0.016357421875, "learning_rate": 9.502964796172383e-07, "logits/chosen": 0.193359375, "logits/rejected": -0.04443359375, "logps/chosen": -0.279296875, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.390625, "rewards/margins": 12.8125, "rewards/rejected": -14.25, "step": 1434 }, { "epoch": 0.5344506517690876, "grad_norm": 0.00020313262939453125, "learning_rate": 9.491279710696194e-07, "logits/chosen": -0.1435546875, "logits/rejected": 0.21484375, "logps/chosen": -0.33984375, "logps/rejected": -3.515625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6953125, "rewards/margins": 15.875, "rewards/rejected": -17.5, "step": 1435 }, { "epoch": 0.5348230912476722, "grad_norm": 0.0849609375, "learning_rate": 9.479593794538287e-07, "logits/chosen": 0.006011962890625, "logits/rejected": 0.53125, "logps/chosen": -0.27734375, "logps/rejected": -2.921875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.390625, "rewards/margins": 13.25, "rewards/rejected": -14.625, "step": 1436 }, { "epoch": 0.535195530726257, "grad_norm": 1.0859375, "learning_rate": 9.467907067457823e-07, "logits/chosen": 0.1669921875, "logits/rejected": -0.6171875, "logps/chosen": -0.376953125, "logps/rejected": -2.5, "loss": 0.0017, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 10.625, "rewards/rejected": -12.5625, "step": 1437 }, { "epoch": 0.5355679702048417, "grad_norm": 0.1767578125, "learning_rate": 9.456219549215345e-07, "logits/chosen": -0.032958984375, "logits/rejected": -0.58984375, "logps/chosen": -0.375, "logps/rejected": -2.875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.875, "rewards/margins": 12.5, "rewards/rejected": -14.375, "step": 1438 }, { "epoch": 0.5359404096834265, "grad_norm": 0.03857421875, "learning_rate": 9.44453125957272e-07, "logits/chosen": 0.1689453125, "logits/rejected": -0.78515625, "logps/chosen": -0.66796875, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.34375, "rewards/margins": 11.875, "rewards/rejected": -15.25, "step": 1439 }, { "epoch": 0.5363128491620112, "grad_norm": 0.000331878662109375, "learning_rate": 9.432842218293134e-07, "logits/chosen": 0.0849609375, "logits/rejected": 0.515625, "logps/chosen": -0.451171875, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 15.625, "rewards/rejected": -17.875, "step": 1440 }, { "epoch": 0.536685288640596, "grad_norm": 0.00012493133544921875, "learning_rate": 9.421152445141028e-07, "logits/chosen": 0.1953125, "logits/rejected": 0.482421875, "logps/chosen": -0.177734375, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8828125, "rewards/margins": 16.75, "rewards/rejected": -17.75, "step": 1441 }, { "epoch": 0.5370577281191806, "grad_norm": 0.0001583099365234375, "learning_rate": 9.409461959882094e-07, "logits/chosen": 0.171875, "logits/rejected": 0.4140625, "logps/chosen": -0.1787109375, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.890625, "rewards/margins": 18.625, "rewards/rejected": -19.5, "step": 1442 }, { "epoch": 0.5374301675977654, "grad_norm": 0.0029296875, "learning_rate": 9.397770782283221e-07, "logits/chosen": 0.06640625, "logits/rejected": -0.365234375, "logps/chosen": -0.16015625, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046875, "rewards/margins": 16.375, "rewards/rejected": -17.125, "step": 1443 }, { "epoch": 0.5378026070763501, "grad_norm": 0.00042724609375, "learning_rate": 9.386078932112472e-07, "logits/chosen": 0.12451171875, "logits/rejected": 0.1572265625, "logps/chosen": -0.322265625, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 15.75, "rewards/rejected": -17.375, "step": 1444 }, { "epoch": 0.5381750465549349, "grad_norm": 1.7523765563964844e-05, "learning_rate": 9.374386429139047e-07, "logits/chosen": 0.2080078125, "logits/rejected": 0.4375, "logps/chosen": -0.1650390625, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.82421875, "rewards/margins": 18.5, "rewards/rejected": -19.375, "step": 1445 }, { "epoch": 0.5385474860335195, "grad_norm": 2.03125, "learning_rate": 9.362693293133243e-07, "logits/chosen": 0.1689453125, "logits/rejected": -0.359375, "logps/chosen": -0.60546875, "logps/rejected": -3.109375, "loss": 0.0021, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.03125, "rewards/margins": 12.5, "rewards/rejected": -15.5, "step": 1446 }, { "epoch": 0.5389199255121043, "grad_norm": 5.245208740234375e-06, "learning_rate": 9.350999543866436e-07, "logits/chosen": 0.06640625, "logits/rejected": 0.384765625, "logps/chosen": -0.173828125, "logps/rejected": -4.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8671875, "rewards/margins": 19.75, "rewards/rejected": -20.5, "step": 1447 }, { "epoch": 0.539292364990689, "grad_norm": 0.000164031982421875, "learning_rate": 9.339305201111037e-07, "logits/chosen": 0.17578125, "logits/rejected": 0.416015625, "logps/chosen": -0.349609375, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.75, "rewards/margins": 16.375, "rewards/rejected": -18.125, "step": 1448 }, { "epoch": 0.5396648044692738, "grad_norm": 0.000186920166015625, "learning_rate": 9.327610284640459e-07, "logits/chosen": 0.16015625, "logits/rejected": 0.53515625, "logps/chosen": -0.421875, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 17.5, "rewards/rejected": -19.5, "step": 1449 }, { "epoch": 0.5400372439478585, "grad_norm": 0.01025390625, "learning_rate": 9.31591481422908e-07, "logits/chosen": 0.173828125, "logits/rejected": 0.020751953125, "logps/chosen": -0.84765625, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.25, "rewards/margins": 12.625, "rewards/rejected": -16.875, "step": 1450 }, { "epoch": 0.5404096834264432, "grad_norm": 6.246566772460938e-05, "learning_rate": 9.304218809652228e-07, "logits/chosen": -0.05859375, "logits/rejected": 0.21484375, "logps/chosen": -0.2109375, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 17.125, "rewards/rejected": -18.125, "step": 1451 }, { "epoch": 0.5407821229050279, "grad_norm": 0.00762939453125, "learning_rate": 9.292522290686119e-07, "logits/chosen": -0.0289306640625, "logits/rejected": 0.119140625, "logps/chosen": -0.625, "logps/rejected": -3.328125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.125, "rewards/margins": 13.5, "rewards/rejected": -16.625, "step": 1452 }, { "epoch": 0.5411545623836127, "grad_norm": 0.0022735595703125, "learning_rate": 9.280825277107851e-07, "logits/chosen": 0.060546875, "logits/rejected": -0.2578125, "logps/chosen": -0.373046875, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.875, "rewards/margins": 13.875, "rewards/rejected": -15.75, "step": 1453 }, { "epoch": 0.5415270018621974, "grad_norm": 0.00714111328125, "learning_rate": 9.269127788695353e-07, "logits/chosen": 0.0218505859375, "logits/rejected": 0.28515625, "logps/chosen": -0.71484375, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5625, "rewards/margins": 14.5, "rewards/rejected": -18.125, "step": 1454 }, { "epoch": 0.5418994413407822, "grad_norm": 20.0, "learning_rate": 9.257429845227354e-07, "logits/chosen": 0.15625, "logits/rejected": 0.2294921875, "logps/chosen": -0.9140625, "logps/rejected": -2.875, "loss": 0.0247, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.5625, "rewards/margins": 9.8125, "rewards/rejected": -14.375, "step": 1455 }, { "epoch": 0.5422718808193668, "grad_norm": 0.41015625, "learning_rate": 9.245731466483355e-07, "logits/chosen": 0.146484375, "logits/rejected": 0.625, "logps/chosen": -0.443359375, "logps/rejected": -2.78125, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 11.625, "rewards/rejected": -13.875, "step": 1456 }, { "epoch": 0.5426443202979516, "grad_norm": 0.0023193359375, "learning_rate": 9.234032672243601e-07, "logits/chosen": 0.208984375, "logits/rejected": -0.138671875, "logps/chosen": -0.1669921875, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8359375, "rewards/margins": 15.875, "rewards/rejected": -16.75, "step": 1457 }, { "epoch": 0.5430167597765363, "grad_norm": 0.458984375, "learning_rate": 9.222333482289021e-07, "logits/chosen": -0.037353515625, "logits/rejected": -0.515625, "logps/chosen": -0.34765625, "logps/rejected": -2.84375, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7421875, "rewards/margins": 12.5, "rewards/rejected": -14.25, "step": 1458 }, { "epoch": 0.5433891992551211, "grad_norm": 0.01202392578125, "learning_rate": 9.210633916401232e-07, "logits/chosen": 0.10693359375, "logits/rejected": -0.361328125, "logps/chosen": -0.41015625, "logps/rejected": -2.921875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.046875, "rewards/margins": 12.5625, "rewards/rejected": -14.625, "step": 1459 }, { "epoch": 0.5437616387337058, "grad_norm": 0.022216796875, "learning_rate": 9.198933994362479e-07, "logits/chosen": -0.0087890625, "logits/rejected": -0.1494140625, "logps/chosen": -0.5234375, "logps/rejected": -3.171875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 13.25, "rewards/rejected": -15.875, "step": 1460 }, { "epoch": 0.5441340782122905, "grad_norm": 0.00567626953125, "learning_rate": 9.187233735955602e-07, "logits/chosen": -0.0361328125, "logits/rejected": 0.361328125, "logps/chosen": -0.439453125, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.203125, "rewards/margins": 13.9375, "rewards/rejected": -16.25, "step": 1461 }, { "epoch": 0.5445065176908752, "grad_norm": 0.0458984375, "learning_rate": 9.175533160964023e-07, "logits/chosen": -0.0732421875, "logits/rejected": -0.236328125, "logps/chosen": -0.392578125, "logps/rejected": -3.28125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9609375, "rewards/margins": 14.4375, "rewards/rejected": -16.375, "step": 1462 }, { "epoch": 0.54487895716946, "grad_norm": 0.01361083984375, "learning_rate": 9.163832289171691e-07, "logits/chosen": 0.16015625, "logits/rejected": 0.462890625, "logps/chosen": -0.6875, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4375, "rewards/margins": 13.0, "rewards/rejected": -16.375, "step": 1463 }, { "epoch": 0.5452513966480447, "grad_norm": 0.059326171875, "learning_rate": 9.152131140363063e-07, "logits/chosen": 0.1533203125, "logits/rejected": -0.59375, "logps/chosen": -0.2177734375, "logps/rejected": -2.6875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.09375, "rewards/margins": 12.375, "rewards/rejected": -13.4375, "step": 1464 }, { "epoch": 0.5456238361266295, "grad_norm": 0.0022430419921875, "learning_rate": 9.140429734323054e-07, "logits/chosen": -0.0751953125, "logits/rejected": 0.322265625, "logps/chosen": -0.59375, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.96875, "rewards/margins": 14.5625, "rewards/rejected": -17.5, "step": 1465 }, { "epoch": 0.5459962756052141, "grad_norm": 8.20159912109375e-05, "learning_rate": 9.128728090837022e-07, "logits/chosen": -0.01092529296875, "logits/rejected": 0.23828125, "logps/chosen": -0.22265625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 16.75, "rewards/rejected": -17.875, "step": 1466 }, { "epoch": 0.5463687150837989, "grad_norm": 8.392333984375e-05, "learning_rate": 9.117026229690724e-07, "logits/chosen": -0.0255126953125, "logits/rejected": 0.1923828125, "logps/chosen": -0.2294921875, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1484375, "rewards/margins": 17.125, "rewards/rejected": -18.25, "step": 1467 }, { "epoch": 0.5467411545623836, "grad_norm": 0.0002651214599609375, "learning_rate": 9.105324170670291e-07, "logits/chosen": 0.0830078125, "logits/rejected": 0.353515625, "logps/chosen": -0.6171875, "logps/rejected": -3.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.078125, "rewards/margins": 15.9375, "rewards/rejected": -19.0, "step": 1468 }, { "epoch": 0.5471135940409684, "grad_norm": 0.000263214111328125, "learning_rate": 9.093621933562178e-07, "logits/chosen": 0.0634765625, "logits/rejected": -0.1259765625, "logps/chosen": -0.357421875, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7890625, "rewards/margins": 15.6875, "rewards/rejected": -17.5, "step": 1469 }, { "epoch": 0.547486033519553, "grad_norm": 0.000949859619140625, "learning_rate": 9.081919538153148e-07, "logits/chosen": 0.142578125, "logits/rejected": 0.431640625, "logps/chosen": -0.53515625, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.671875, "rewards/margins": 15.125, "rewards/rejected": -17.75, "step": 1470 }, { "epoch": 0.5478584729981378, "grad_norm": 1.2636184692382812e-05, "learning_rate": 9.070217004230228e-07, "logits/chosen": 0.1044921875, "logits/rejected": 0.2470703125, "logps/chosen": -0.169921875, "logps/rejected": -3.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8515625, "rewards/margins": 18.875, "rewards/rejected": -19.75, "step": 1471 }, { "epoch": 0.5482309124767225, "grad_norm": 0.00640869140625, "learning_rate": 9.058514351580687e-07, "logits/chosen": 0.205078125, "logits/rejected": 0.3984375, "logps/chosen": -0.6328125, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 13.25, "rewards/rejected": -16.5, "step": 1472 }, { "epoch": 0.5486033519553073, "grad_norm": 0.0001964569091796875, "learning_rate": 9.046811599991983e-07, "logits/chosen": -0.14453125, "logits/rejected": 0.2734375, "logps/chosen": -0.734375, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6875, "rewards/margins": 16.75, "rewards/rejected": -20.5, "step": 1473 }, { "epoch": 0.548975791433892, "grad_norm": 11.1875, "learning_rate": 9.035108769251752e-07, "logits/chosen": 0.1240234375, "logits/rejected": 0.29296875, "logps/chosen": -0.34765625, "logps/rejected": -2.1875, "loss": 0.0275, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7421875, "rewards/margins": 9.125, "rewards/rejected": -10.875, "step": 1474 }, { "epoch": 0.5493482309124768, "grad_norm": 9.489059448242188e-05, "learning_rate": 9.023405879147753e-07, "logits/chosen": 0.12158203125, "logits/rejected": 0.2265625, "logps/chosen": -0.4296875, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 17.0, "rewards/rejected": -19.0, "step": 1475 }, { "epoch": 0.5497206703910614, "grad_norm": 0.205078125, "learning_rate": 9.01170294946786e-07, "logits/chosen": -0.1630859375, "logits/rejected": 0.56640625, "logps/chosen": -0.19921875, "logps/rejected": -2.53125, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.99609375, "rewards/margins": 11.6875, "rewards/rejected": -12.6875, "step": 1476 }, { "epoch": 0.5500931098696462, "grad_norm": 0.007415771484375, "learning_rate": 9e-07, "logits/chosen": 0.1826171875, "logits/rejected": 0.059814453125, "logps/chosen": -0.31640625, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5859375, "rewards/margins": 12.1875, "rewards/rejected": -13.75, "step": 1477 }, { "epoch": 0.5504655493482309, "grad_norm": 0.00078582763671875, "learning_rate": 8.988297050532139e-07, "logits/chosen": 0.10791015625, "logits/rejected": 0.07666015625, "logps/chosen": -0.4140625, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 16.75, "rewards/rejected": -18.875, "step": 1478 }, { "epoch": 0.5508379888268157, "grad_norm": 0.002197265625, "learning_rate": 8.976594120852246e-07, "logits/chosen": 0.2138671875, "logits/rejected": 0.3828125, "logps/chosen": -0.453125, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.265625, "rewards/margins": 15.75, "rewards/rejected": -18.0, "step": 1479 }, { "epoch": 0.5512104283054003, "grad_norm": 0.1865234375, "learning_rate": 8.964891230748248e-07, "logits/chosen": 0.1533203125, "logits/rejected": 0.248046875, "logps/chosen": -0.353515625, "logps/rejected": -3.09375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.765625, "rewards/margins": 13.6875, "rewards/rejected": -15.4375, "step": 1480 }, { "epoch": 0.5515828677839851, "grad_norm": 0.5546875, "learning_rate": 8.953188400008019e-07, "logits/chosen": 0.1484375, "logits/rejected": 0.3359375, "logps/chosen": -0.24609375, "logps/rejected": -2.625, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.234375, "rewards/margins": 11.875, "rewards/rejected": -13.125, "step": 1481 }, { "epoch": 0.5519553072625698, "grad_norm": 0.35546875, "learning_rate": 8.941485648419313e-07, "logits/chosen": -0.0228271484375, "logits/rejected": -0.30078125, "logps/chosen": -0.2236328125, "logps/rejected": -1.8125, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1171875, "rewards/margins": 7.9375, "rewards/rejected": -9.0625, "step": 1482 }, { "epoch": 0.5523277467411546, "grad_norm": 0.000812530517578125, "learning_rate": 8.929782995769769e-07, "logits/chosen": 0.166015625, "logits/rejected": -0.032958984375, "logps/chosen": -0.10595703125, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.53125, "rewards/margins": 14.75, "rewards/rejected": -15.3125, "step": 1483 }, { "epoch": 0.5527001862197393, "grad_norm": 0.65234375, "learning_rate": 8.918080461846851e-07, "logits/chosen": 0.126953125, "logits/rejected": -0.39453125, "logps/chosen": -0.337890625, "logps/rejected": -2.5, "loss": 0.0013, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875, "rewards/margins": 10.8125, "rewards/rejected": -12.5, "step": 1484 }, { "epoch": 0.553072625698324, "grad_norm": 0.032470703125, "learning_rate": 8.906378066437822e-07, "logits/chosen": -0.2275390625, "logits/rejected": 0.059326171875, "logps/chosen": -0.33984375, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6953125, "rewards/margins": 13.625, "rewards/rejected": -15.3125, "step": 1485 }, { "epoch": 0.5534450651769087, "grad_norm": 0.00012493133544921875, "learning_rate": 8.894675829329708e-07, "logits/chosen": -0.054931640625, "logits/rejected": 0.41015625, "logps/chosen": -0.158203125, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.79296875, "rewards/margins": 16.25, "rewards/rejected": -17.125, "step": 1486 }, { "epoch": 0.5538175046554935, "grad_norm": 0.000476837158203125, "learning_rate": 8.882973770309277e-07, "logits/chosen": 0.1455078125, "logits/rejected": 0.45703125, "logps/chosen": -0.1318359375, "logps/rejected": -3.484375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.66015625, "rewards/margins": 16.75, "rewards/rejected": -17.5, "step": 1487 }, { "epoch": 0.5541899441340782, "grad_norm": 0.0198974609375, "learning_rate": 8.871271909162979e-07, "logits/chosen": 0.07373046875, "logits/rejected": 0.1611328125, "logps/chosen": -0.56640625, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 12.5, "rewards/rejected": -15.375, "step": 1488 }, { "epoch": 0.554562383612663, "grad_norm": 0.0036163330078125, "learning_rate": 8.85957026567695e-07, "logits/chosen": -0.11669921875, "logits/rejected": 0.1953125, "logps/chosen": -0.56640625, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 15.25, "rewards/rejected": -18.0, "step": 1489 }, { "epoch": 0.5549348230912476, "grad_norm": 0.08837890625, "learning_rate": 8.847868859636939e-07, "logits/chosen": -0.154296875, "logits/rejected": -0.640625, "logps/chosen": -0.1708984375, "logps/rejected": -2.375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8515625, "rewards/margins": 11.0625, "rewards/rejected": -11.875, "step": 1490 }, { "epoch": 0.5553072625698324, "grad_norm": 0.330078125, "learning_rate": 8.836167710828308e-07, "logits/chosen": 0.158203125, "logits/rejected": -0.19140625, "logps/chosen": -0.3203125, "logps/rejected": -2.59375, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6015625, "rewards/margins": 11.375, "rewards/rejected": -13.0, "step": 1491 }, { "epoch": 0.5556797020484171, "grad_norm": 0.00396728515625, "learning_rate": 8.824466839035974e-07, "logits/chosen": -0.0228271484375, "logits/rejected": 0.3828125, "logps/chosen": -0.30859375, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5390625, "rewards/margins": 14.75, "rewards/rejected": -16.375, "step": 1492 }, { "epoch": 0.5560521415270019, "grad_norm": 0.006072998046875, "learning_rate": 8.812766264044399e-07, "logits/chosen": -0.1435546875, "logits/rejected": 0.298828125, "logps/chosen": -0.349609375, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.75, "rewards/margins": 15.75, "rewards/rejected": -17.5, "step": 1493 }, { "epoch": 0.5564245810055866, "grad_norm": 0.000701904296875, "learning_rate": 8.801066005637523e-07, "logits/chosen": 0.10693359375, "logits/rejected": 0.2392578125, "logps/chosen": -0.208984375, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 15.375, "rewards/rejected": -16.5, "step": 1494 }, { "epoch": 0.5567970204841713, "grad_norm": 0.00604248046875, "learning_rate": 8.789366083598769e-07, "logits/chosen": 0.142578125, "logits/rejected": 0.0791015625, "logps/chosen": -0.5703125, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 14.875, "rewards/rejected": -17.75, "step": 1495 }, { "epoch": 0.557169459962756, "grad_norm": 0.0830078125, "learning_rate": 8.777666517710979e-07, "logits/chosen": -0.0299072265625, "logits/rejected": 0.1572265625, "logps/chosen": -0.380859375, "logps/rejected": -3.046875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.90625, "rewards/margins": 13.3125, "rewards/rejected": -15.25, "step": 1496 }, { "epoch": 0.5575418994413408, "grad_norm": 0.0014190673828125, "learning_rate": 8.7659673277564e-07, "logits/chosen": 0.0025482177734375, "logits/rejected": -0.486328125, "logps/chosen": -0.12890625, "logps/rejected": -3.203125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.64453125, "rewards/margins": 15.375, "rewards/rejected": -16.0, "step": 1497 }, { "epoch": 0.5579143389199255, "grad_norm": 7.724761962890625e-05, "learning_rate": 8.754268533516645e-07, "logits/chosen": 0.078125, "logits/rejected": 0.240234375, "logps/chosen": -0.2265625, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1328125, "rewards/margins": 17.25, "rewards/rejected": -18.375, "step": 1498 }, { "epoch": 0.5582867783985103, "grad_norm": 0.625, "learning_rate": 8.742570154772647e-07, "logits/chosen": -0.0712890625, "logits/rejected": 0.90234375, "logps/chosen": -0.302734375, "logps/rejected": -2.96875, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 13.3125, "rewards/rejected": -14.875, "step": 1499 }, { "epoch": 0.5586592178770949, "grad_norm": 0.001220703125, "learning_rate": 8.73087221130465e-07, "logits/chosen": 0.05126953125, "logits/rejected": 0.4453125, "logps/chosen": -0.71875, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.59375, "rewards/margins": 14.25, "rewards/rejected": -17.875, "step": 1500 }, { "epoch": 0.5590316573556797, "grad_norm": 1.3046875, "learning_rate": 8.719174722892146e-07, "logits/chosen": 0.0172119140625, "logits/rejected": -0.271484375, "logps/chosen": -0.2431640625, "logps/rejected": -2.6875, "loss": 0.0026, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.21875, "rewards/margins": 12.1875, "rewards/rejected": -13.375, "step": 1501 }, { "epoch": 0.5594040968342644, "grad_norm": 12.0625, "learning_rate": 8.707477709313878e-07, "logits/chosen": 0.1318359375, "logits/rejected": -0.2294921875, "logps/chosen": -0.6015625, "logps/rejected": -2.953125, "loss": 0.0182, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.015625, "rewards/margins": 11.8125, "rewards/rejected": -14.8125, "step": 1502 }, { "epoch": 0.5597765363128492, "grad_norm": 0.09619140625, "learning_rate": 8.695781190347771e-07, "logits/chosen": -0.0986328125, "logits/rejected": 0.4375, "logps/chosen": -0.2470703125, "logps/rejected": -2.71875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.234375, "rewards/margins": 12.375, "rewards/rejected": -13.625, "step": 1503 }, { "epoch": 0.5601489757914339, "grad_norm": 0.000789642333984375, "learning_rate": 8.68408518577092e-07, "logits/chosen": 0.123046875, "logits/rejected": 0.423828125, "logps/chosen": -0.40625, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 14.625, "rewards/rejected": -16.75, "step": 1504 }, { "epoch": 0.5605214152700186, "grad_norm": 0.048583984375, "learning_rate": 8.672389715359542e-07, "logits/chosen": 0.048095703125, "logits/rejected": -0.380859375, "logps/chosen": -0.388671875, "logps/rejected": -2.96875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9453125, "rewards/margins": 12.9375, "rewards/rejected": -14.875, "step": 1505 }, { "epoch": 0.5608938547486033, "grad_norm": 9.5625, "learning_rate": 8.660694798888964e-07, "logits/chosen": 0.1162109375, "logits/rejected": 0.064453125, "logps/chosen": -0.453125, "logps/rejected": -2.6875, "loss": 0.0134, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.265625, "rewards/margins": 11.125, "rewards/rejected": -13.4375, "step": 1506 }, { "epoch": 0.5612662942271881, "grad_norm": 0.2578125, "learning_rate": 8.649000456133563e-07, "logits/chosen": 0.0086669921875, "logits/rejected": 0.53515625, "logps/chosen": -0.51171875, "logps/rejected": -2.984375, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5625, "rewards/margins": 12.375, "rewards/rejected": -14.9375, "step": 1507 }, { "epoch": 0.5616387337057728, "grad_norm": 4.28125, "learning_rate": 8.637306706866757e-07, "logits/chosen": 0.1884765625, "logits/rejected": 0.490234375, "logps/chosen": -0.5078125, "logps/rejected": -2.25, "loss": 0.0078, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.53125, "rewards/margins": 8.75, "rewards/rejected": -11.3125, "step": 1508 }, { "epoch": 0.5620111731843576, "grad_norm": 0.00103759765625, "learning_rate": 8.625613570860955e-07, "logits/chosen": 0.130859375, "logits/rejected": 0.1826171875, "logps/chosen": -0.53125, "logps/rejected": -3.390625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.65625, "rewards/margins": 14.3125, "rewards/rejected": -17.0, "step": 1509 }, { "epoch": 0.5623836126629422, "grad_norm": 0.00012969970703125, "learning_rate": 8.613921067887527e-07, "logits/chosen": 0.0067138671875, "logits/rejected": 0.30078125, "logps/chosen": -0.30078125, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5078125, "rewards/margins": 16.5, "rewards/rejected": -18.0, "step": 1510 }, { "epoch": 0.562756052141527, "grad_norm": 1.3671875, "learning_rate": 8.602229217716776e-07, "logits/chosen": 0.0576171875, "logits/rejected": -0.3671875, "logps/chosen": -0.236328125, "logps/rejected": -2.40625, "loss": 0.003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1796875, "rewards/margins": 10.875, "rewards/rejected": -12.0625, "step": 1511 }, { "epoch": 0.5631284916201117, "grad_norm": 0.1787109375, "learning_rate": 8.590538040117906e-07, "logits/chosen": -0.07177734375, "logits/rejected": -1.4375, "logps/chosen": -0.142578125, "logps/rejected": -1.96875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7109375, "rewards/margins": 9.125, "rewards/rejected": -9.875, "step": 1512 }, { "epoch": 0.5635009310986965, "grad_norm": 0.51953125, "learning_rate": 8.578847554858971e-07, "logits/chosen": -0.076171875, "logits/rejected": 0.46484375, "logps/chosen": -0.482421875, "logps/rejected": -2.34375, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.40625, "rewards/margins": 9.3125, "rewards/rejected": -11.6875, "step": 1513 }, { "epoch": 0.5638733705772812, "grad_norm": 0.5546875, "learning_rate": 8.567157781706866e-07, "logits/chosen": 0.1806640625, "logits/rejected": -0.46484375, "logps/chosen": -0.43359375, "logps/rejected": -2.921875, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.171875, "rewards/margins": 12.5, "rewards/rejected": -14.625, "step": 1514 }, { "epoch": 0.5642458100558659, "grad_norm": 0.1513671875, "learning_rate": 8.55546874042728e-07, "logits/chosen": -0.13671875, "logits/rejected": -0.376953125, "logps/chosen": -0.625, "logps/rejected": -3.3125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.125, "rewards/margins": 13.375, "rewards/rejected": -16.5, "step": 1515 }, { "epoch": 0.5646182495344506, "grad_norm": 1.53125, "learning_rate": 8.543780450784656e-07, "logits/chosen": -0.02734375, "logits/rejected": 0.73046875, "logps/chosen": -0.19921875, "logps/rejected": -2.515625, "loss": 0.0028, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0, "rewards/margins": 11.5625, "rewards/rejected": -12.5625, "step": 1516 }, { "epoch": 0.5649906890130354, "grad_norm": 0.059326171875, "learning_rate": 8.532092932542178e-07, "logits/chosen": 0.15234375, "logits/rejected": -0.431640625, "logps/chosen": -0.2490234375, "logps/rejected": -3.5, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 16.25, "rewards/rejected": -17.5, "step": 1517 }, { "epoch": 0.5653631284916201, "grad_norm": 0.013916015625, "learning_rate": 8.520406205461715e-07, "logits/chosen": 0.044921875, "logits/rejected": -0.267578125, "logps/chosen": -0.2734375, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3671875, "rewards/margins": 14.0625, "rewards/rejected": -15.375, "step": 1518 }, { "epoch": 0.5657355679702049, "grad_norm": 1.823902130126953e-05, "learning_rate": 8.508720289303809e-07, "logits/chosen": 0.1669921875, "logits/rejected": 0.390625, "logps/chosen": -0.23046875, "logps/rejected": -3.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.15625, "rewards/margins": 18.5, "rewards/rejected": -19.75, "step": 1519 }, { "epoch": 0.5661080074487895, "grad_norm": 0.0029296875, "learning_rate": 8.497035203827615e-07, "logits/chosen": 0.1171875, "logits/rejected": -0.10693359375, "logps/chosen": -0.4140625, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 13.125, "rewards/rejected": -15.25, "step": 1520 }, { "epoch": 0.5664804469273743, "grad_norm": 0.0002956390380859375, "learning_rate": 8.485350968790907e-07, "logits/chosen": 0.255859375, "logits/rejected": -0.041748046875, "logps/chosen": -0.427734375, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.140625, "rewards/margins": 15.625, "rewards/rejected": -17.75, "step": 1521 }, { "epoch": 0.566852886405959, "grad_norm": 2.4318695068359375e-05, "learning_rate": 8.473667603949999e-07, "logits/chosen": 0.12890625, "logits/rejected": 0.27734375, "logps/chosen": -0.26953125, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3515625, "rewards/margins": 18.0, "rewards/rejected": -19.375, "step": 1522 }, { "epoch": 0.5672253258845438, "grad_norm": 0.345703125, "learning_rate": 8.461985129059746e-07, "logits/chosen": -0.08837890625, "logits/rejected": -0.208984375, "logps/chosen": -0.37890625, "logps/rejected": -2.53125, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 10.8125, "rewards/rejected": -12.6875, "step": 1523 }, { "epoch": 0.5675977653631284, "grad_norm": 0.076171875, "learning_rate": 8.450303563873492e-07, "logits/chosen": 0.1513671875, "logits/rejected": -0.76171875, "logps/chosen": -0.4765625, "logps/rejected": -2.453125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 9.875, "rewards/rejected": -12.25, "step": 1524 }, { "epoch": 0.5679702048417132, "grad_norm": 6.580352783203125e-05, "learning_rate": 8.438622928143046e-07, "logits/chosen": -0.014892578125, "logits/rejected": 0.380859375, "logps/chosen": -0.20703125, "logps/rejected": -4.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0390625, "rewards/margins": 19.5, "rewards/rejected": -20.5, "step": 1525 }, { "epoch": 0.5683426443202979, "grad_norm": 0.00016880035400390625, "learning_rate": 8.426943241618643e-07, "logits/chosen": 0.123046875, "logits/rejected": 0.416015625, "logps/chosen": -0.2890625, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.453125, "rewards/margins": 16.0, "rewards/rejected": -17.5, "step": 1526 }, { "epoch": 0.5687150837988827, "grad_norm": 0.0234375, "learning_rate": 8.415264524048918e-07, "logits/chosen": 0.1171875, "logits/rejected": -0.10009765625, "logps/chosen": -0.49609375, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.484375, "rewards/margins": 11.875, "rewards/rejected": -14.375, "step": 1527 }, { "epoch": 0.5690875232774674, "grad_norm": 15.3125, "learning_rate": 8.403586795180864e-07, "logits/chosen": -0.08984375, "logits/rejected": 0.2734375, "logps/chosen": -1.3515625, "logps/rejected": -2.59375, "loss": 0.0176, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.75, "rewards/margins": 6.25, "rewards/rejected": -13.0, "step": 1528 }, { "epoch": 0.5694599627560522, "grad_norm": 0.00020694732666015625, "learning_rate": 8.391910074759797e-07, "logits/chosen": 0.21875, "logits/rejected": 0.1376953125, "logps/chosen": -0.7109375, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.53125, "rewards/margins": 16.375, "rewards/rejected": -19.875, "step": 1529 }, { "epoch": 0.5698324022346368, "grad_norm": 4.744529724121094e-05, "learning_rate": 8.380234382529341e-07, "logits/chosen": 0.028564453125, "logits/rejected": 0.41796875, "logps/chosen": -0.193359375, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.96484375, "rewards/margins": 17.625, "rewards/rejected": -18.625, "step": 1530 }, { "epoch": 0.5702048417132216, "grad_norm": 0.033935546875, "learning_rate": 8.368559738231371e-07, "logits/chosen": -0.0302734375, "logits/rejected": -0.09521484375, "logps/chosen": -0.30078125, "logps/rejected": -2.4375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5, "rewards/margins": 10.625, "rewards/rejected": -12.125, "step": 1531 }, { "epoch": 0.5705772811918063, "grad_norm": 0.00077056884765625, "learning_rate": 8.356886161605991e-07, "logits/chosen": -0.1328125, "logits/rejected": 0.150390625, "logps/chosen": -0.49609375, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 15.375, "rewards/rejected": -17.875, "step": 1532 }, { "epoch": 0.5709497206703911, "grad_norm": 12.3125, "learning_rate": 8.345213672391506e-07, "logits/chosen": -0.125, "logits/rejected": 0.1123046875, "logps/chosen": -1.546875, "logps/rejected": -3.59375, "loss": 0.0109, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.75, "rewards/margins": 10.25, "rewards/rejected": -18.0, "step": 1533 }, { "epoch": 0.5713221601489757, "grad_norm": 7.05718994140625e-05, "learning_rate": 8.333542290324375e-07, "logits/chosen": 0.09375, "logits/rejected": 0.34375, "logps/chosen": -0.5390625, "logps/rejected": -4.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6875, "rewards/margins": 17.5, "rewards/rejected": -20.25, "step": 1534 }, { "epoch": 0.5716945996275605, "grad_norm": 0.1572265625, "learning_rate": 8.321872035139188e-07, "logits/chosen": 0.03662109375, "logits/rejected": -0.4140625, "logps/chosen": -0.2021484375, "logps/rejected": -2.765625, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0078125, "rewards/margins": 12.8125, "rewards/rejected": -13.8125, "step": 1535 }, { "epoch": 0.5720670391061452, "grad_norm": 0.005279541015625, "learning_rate": 8.310202926568633e-07, "logits/chosen": 0.1875, "logits/rejected": 0.287109375, "logps/chosen": -0.4375, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1875, "rewards/margins": 15.5, "rewards/rejected": -17.75, "step": 1536 }, { "epoch": 0.57243947858473, "grad_norm": 0.0003452301025390625, "learning_rate": 8.298534984343451e-07, "logits/chosen": 0.1943359375, "logits/rejected": 0.2216796875, "logps/chosen": -0.54296875, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.71875, "rewards/margins": 15.9375, "rewards/rejected": -18.625, "step": 1537 }, { "epoch": 0.5728119180633147, "grad_norm": 0.023681640625, "learning_rate": 8.28686822819242e-07, "logits/chosen": 0.039794921875, "logits/rejected": -0.31640625, "logps/chosen": -0.33984375, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6953125, "rewards/margins": 12.6875, "rewards/rejected": -14.375, "step": 1538 }, { "epoch": 0.5731843575418994, "grad_norm": 0.0003814697265625, "learning_rate": 8.275202677842303e-07, "logits/chosen": 0.056640625, "logits/rejected": 0.1591796875, "logps/chosen": -0.33984375, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6953125, "rewards/margins": 16.25, "rewards/rejected": -18.0, "step": 1539 }, { "epoch": 0.5735567970204841, "grad_norm": 9.059906005859375e-05, "learning_rate": 8.263538353017837e-07, "logits/chosen": 0.05615234375, "logits/rejected": 0.384765625, "logps/chosen": -0.1953125, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9765625, "rewards/margins": 17.75, "rewards/rejected": -18.75, "step": 1540 }, { "epoch": 0.5739292364990689, "grad_norm": 0.25, "learning_rate": 8.251875273441675e-07, "logits/chosen": 0.044921875, "logits/rejected": -0.490234375, "logps/chosen": -0.34765625, "logps/rejected": -3.140625, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.734375, "rewards/margins": 13.9375, "rewards/rejected": -15.6875, "step": 1541 }, { "epoch": 0.5743016759776536, "grad_norm": 33.0, "learning_rate": 8.240213458834372e-07, "logits/chosen": -0.00153350830078125, "logits/rejected": 0.06982421875, "logps/chosen": -0.474609375, "logps/rejected": -2.796875, "loss": 0.0713, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 11.5625, "rewards/rejected": -14.0, "step": 1542 }, { "epoch": 0.5746741154562384, "grad_norm": 0.0289306640625, "learning_rate": 8.228552928914334e-07, "logits/chosen": 0.059814453125, "logits/rejected": 0.2255859375, "logps/chosen": -0.62890625, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 13.5625, "rewards/rejected": -16.75, "step": 1543 }, { "epoch": 0.575046554934823, "grad_norm": 0.0189208984375, "learning_rate": 8.216893703397808e-07, "logits/chosen": 0.0096435546875, "logits/rejected": -0.3203125, "logps/chosen": -0.25390625, "logps/rejected": -2.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 12.75, "rewards/rejected": -14.0, "step": 1544 }, { "epoch": 0.5754189944134078, "grad_norm": 0.005340576171875, "learning_rate": 8.205235801998828e-07, "logits/chosen": 0.11328125, "logits/rejected": -0.06591796875, "logps/chosen": -0.578125, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.890625, "rewards/margins": 13.0, "rewards/rejected": -15.875, "step": 1545 }, { "epoch": 0.5757914338919925, "grad_norm": 16.875, "learning_rate": 8.193579244429188e-07, "logits/chosen": -0.158203125, "logits/rejected": -0.65625, "logps/chosen": -0.921875, "logps/rejected": -2.34375, "loss": 0.0219, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.625, "rewards/margins": 7.125, "rewards/rejected": -11.75, "step": 1546 }, { "epoch": 0.5761638733705773, "grad_norm": 2.3245811462402344e-05, "learning_rate": 8.181924050398416e-07, "logits/chosen": 0.19921875, "logits/rejected": 0.55078125, "logps/chosen": -0.1904296875, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.94921875, "rewards/margins": 18.25, "rewards/rejected": -19.125, "step": 1547 }, { "epoch": 0.576536312849162, "grad_norm": 0.0002899169921875, "learning_rate": 8.170270239613724e-07, "logits/chosen": 0.1416015625, "logits/rejected": 0.890625, "logps/chosen": -0.314453125, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5625, "rewards/margins": 16.5, "rewards/rejected": -18.0, "step": 1548 }, { "epoch": 0.5769087523277467, "grad_norm": 0.00762939453125, "learning_rate": 8.158617831779998e-07, "logits/chosen": -0.05419921875, "logits/rejected": 0.2158203125, "logps/chosen": -1.140625, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.71875, "rewards/margins": 12.5625, "rewards/rejected": -18.25, "step": 1549 }, { "epoch": 0.5772811918063314, "grad_norm": 0.000457763671875, "learning_rate": 8.146966846599742e-07, "logits/chosen": -0.1337890625, "logits/rejected": 0.35546875, "logps/chosen": -0.60546875, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.015625, "rewards/margins": 16.0, "rewards/rejected": -19.0, "step": 1550 }, { "epoch": 0.5776536312849162, "grad_norm": 0.0002231597900390625, "learning_rate": 8.135317303773064e-07, "logits/chosen": 0.0712890625, "logits/rejected": 0.498046875, "logps/chosen": -0.326171875, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6328125, "rewards/margins": 17.25, "rewards/rejected": -18.875, "step": 1551 }, { "epoch": 0.5780260707635009, "grad_norm": 0.000865936279296875, "learning_rate": 8.123669222997618e-07, "logits/chosen": -0.0299072265625, "logits/rejected": 0.10302734375, "logps/chosen": -0.1796875, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8984375, "rewards/margins": 15.375, "rewards/rejected": -16.25, "step": 1552 }, { "epoch": 0.5783985102420857, "grad_norm": 0.00148773193359375, "learning_rate": 8.112022623968603e-07, "logits/chosen": -0.11279296875, "logits/rejected": 0.58203125, "logps/chosen": -0.578125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.890625, "rewards/margins": 14.5, "rewards/rejected": -17.375, "step": 1553 }, { "epoch": 0.5787709497206703, "grad_norm": 0.046875, "learning_rate": 8.100377526378697e-07, "logits/chosen": -0.00238037109375, "logits/rejected": 0.451171875, "logps/chosen": -0.27734375, "logps/rejected": -3.15625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.390625, "rewards/margins": 14.375, "rewards/rejected": -15.75, "step": 1554 }, { "epoch": 0.5791433891992551, "grad_norm": 0.00125885009765625, "learning_rate": 8.088733949918054e-07, "logits/chosen": -0.08154296875, "logits/rejected": 0.1103515625, "logps/chosen": -0.7109375, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5625, "rewards/margins": 14.5, "rewards/rejected": -18.0, "step": 1555 }, { "epoch": 0.5795158286778399, "grad_norm": 0.005645751953125, "learning_rate": 8.077091914274245e-07, "logits/chosen": 0.037109375, "logits/rejected": 0.404296875, "logps/chosen": -0.25, "logps/rejected": -3.328125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 15.375, "rewards/rejected": -16.625, "step": 1556 }, { "epoch": 0.5798882681564246, "grad_norm": 0.00012874603271484375, "learning_rate": 8.06545143913224e-07, "logits/chosen": 0.068359375, "logits/rejected": 0.396484375, "logps/chosen": -0.142578125, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7109375, "rewards/margins": 16.375, "rewards/rejected": -17.0, "step": 1557 }, { "epoch": 0.5802607076350094, "grad_norm": 0.423828125, "learning_rate": 8.053812544174368e-07, "logits/chosen": 0.1455078125, "logits/rejected": 0.68359375, "logps/chosen": -0.328125, "logps/rejected": -2.890625, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 12.8125, "rewards/rejected": -14.4375, "step": 1558 }, { "epoch": 0.580633147113594, "grad_norm": 0.0001621246337890625, "learning_rate": 8.042175249080295e-07, "logits/chosen": 0.30859375, "logits/rejected": 0.38671875, "logps/chosen": -0.216796875, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.078125, "rewards/margins": 16.625, "rewards/rejected": -17.75, "step": 1559 }, { "epoch": 0.5810055865921788, "grad_norm": 0.431640625, "learning_rate": 8.030539573526968e-07, "logits/chosen": -0.064453125, "logits/rejected": 0.51953125, "logps/chosen": -0.38671875, "logps/rejected": -2.5625, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 10.875, "rewards/rejected": -12.875, "step": 1560 }, { "epoch": 0.5813780260707635, "grad_norm": 0.41015625, "learning_rate": 8.018905537188609e-07, "logits/chosen": 0.09423828125, "logits/rejected": -0.259765625, "logps/chosen": -0.5859375, "logps/rejected": -3.0, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.921875, "rewards/margins": 12.0625, "rewards/rejected": -15.0, "step": 1561 }, { "epoch": 0.5817504655493483, "grad_norm": 0.0002422332763671875, "learning_rate": 8.007273159736656e-07, "logits/chosen": 0.09033203125, "logits/rejected": 0.318359375, "logps/chosen": -0.22265625, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 15.875, "rewards/rejected": -17.0, "step": 1562 }, { "epoch": 0.582122905027933, "grad_norm": 0.0016326904296875, "learning_rate": 7.995642460839751e-07, "logits/chosen": 0.12060546875, "logits/rejected": 0.427734375, "logps/chosen": -0.5859375, "logps/rejected": -3.421875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9375, "rewards/margins": 14.125, "rewards/rejected": -17.125, "step": 1563 }, { "epoch": 0.5824953445065177, "grad_norm": 0.0001354217529296875, "learning_rate": 7.984013460163695e-07, "logits/chosen": 0.1318359375, "logits/rejected": 0.291015625, "logps/chosen": -0.32421875, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.625, "rewards/margins": 17.125, "rewards/rejected": -18.75, "step": 1564 }, { "epoch": 0.5828677839851024, "grad_norm": 0.00018787384033203125, "learning_rate": 7.972386177371415e-07, "logits/chosen": 0.09814453125, "logits/rejected": 0.349609375, "logps/chosen": -0.4453125, "logps/rejected": -3.640625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 15.9375, "rewards/rejected": -18.25, "step": 1565 }, { "epoch": 0.5832402234636872, "grad_norm": 0.0283203125, "learning_rate": 7.96076063212294e-07, "logits/chosen": 0.0244140625, "logits/rejected": 0.75390625, "logps/chosen": -0.8203125, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.09375, "rewards/margins": 11.8125, "rewards/rejected": -15.875, "step": 1566 }, { "epoch": 0.5836126629422719, "grad_norm": 0.0203857421875, "learning_rate": 7.949136844075351e-07, "logits/chosen": 0.197265625, "logits/rejected": -0.03173828125, "logps/chosen": -0.71484375, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.578125, "rewards/margins": 14.25, "rewards/rejected": -17.75, "step": 1567 }, { "epoch": 0.5839851024208567, "grad_norm": 0.027587890625, "learning_rate": 7.937514832882767e-07, "logits/chosen": 0.050537109375, "logits/rejected": -0.32421875, "logps/chosen": -0.361328125, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8125, "rewards/margins": 14.25, "rewards/rejected": -16.0, "step": 1568 }, { "epoch": 0.5843575418994413, "grad_norm": 0.005706787109375, "learning_rate": 7.9258946181963e-07, "logits/chosen": 0.09326171875, "logits/rejected": 0.4609375, "logps/chosen": -0.5546875, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.765625, "rewards/margins": 15.5625, "rewards/rejected": -18.25, "step": 1569 }, { "epoch": 0.5847299813780261, "grad_norm": 0.0079345703125, "learning_rate": 7.914276219664024e-07, "logits/chosen": 0.10498046875, "logits/rejected": 0.2158203125, "logps/chosen": -0.1708984375, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.85546875, "rewards/margins": 14.875, "rewards/rejected": -15.6875, "step": 1570 }, { "epoch": 0.5851024208566108, "grad_norm": 128.0, "learning_rate": 7.90265965693094e-07, "logits/chosen": 0.10791015625, "logits/rejected": 0.408203125, "logps/chosen": -0.74609375, "logps/rejected": -2.46875, "loss": 0.1191, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.75, "rewards/margins": 8.5625, "rewards/rejected": -12.3125, "step": 1571 }, { "epoch": 0.5854748603351956, "grad_norm": 5.155801773071289e-06, "learning_rate": 7.891044949638948e-07, "logits/chosen": 0.25390625, "logits/rejected": 0.35546875, "logps/chosen": -0.244140625, "logps/rejected": -4.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.21875, "rewards/margins": 20.25, "rewards/rejected": -21.5, "step": 1572 }, { "epoch": 0.5858472998137803, "grad_norm": 0.0002536773681640625, "learning_rate": 7.879432117426807e-07, "logits/chosen": 0.11279296875, "logits/rejected": 0.54296875, "logps/chosen": -0.2216796875, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 16.75, "rewards/rejected": -17.75, "step": 1573 }, { "epoch": 0.586219739292365, "grad_norm": 0.119140625, "learning_rate": 7.867821179930108e-07, "logits/chosen": -0.1904296875, "logits/rejected": -0.2275390625, "logps/chosen": -0.3671875, "logps/rejected": -2.25, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.828125, "rewards/margins": 9.4375, "rewards/rejected": -11.25, "step": 1574 }, { "epoch": 0.5865921787709497, "grad_norm": 0.0107421875, "learning_rate": 7.856212156781243e-07, "logits/chosen": 0.0869140625, "logits/rejected": 0.69140625, "logps/chosen": -0.63671875, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1875, "rewards/margins": 12.1875, "rewards/rejected": -15.375, "step": 1575 }, { "epoch": 0.5869646182495345, "grad_norm": 0.00274658203125, "learning_rate": 7.844605067609356e-07, "logits/chosen": 0.07177734375, "logits/rejected": 0.298828125, "logps/chosen": -0.46875, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34375, "rewards/margins": 14.375, "rewards/rejected": -16.75, "step": 1576 }, { "epoch": 0.5873370577281192, "grad_norm": 0.00225830078125, "learning_rate": 7.832999932040329e-07, "logits/chosen": -0.09716796875, "logits/rejected": 0.3203125, "logps/chosen": -0.5546875, "logps/rejected": -3.421875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.765625, "rewards/margins": 14.3125, "rewards/rejected": -17.0, "step": 1577 }, { "epoch": 0.587709497206704, "grad_norm": 0.0009002685546875, "learning_rate": 7.821396769696745e-07, "logits/chosen": -0.0849609375, "logits/rejected": 0.294921875, "logps/chosen": -0.1640625, "logps/rejected": -3.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.81640625, "rewards/margins": 14.375, "rewards/rejected": -15.1875, "step": 1578 }, { "epoch": 0.5880819366852886, "grad_norm": 5.340576171875e-05, "learning_rate": 7.809795600197836e-07, "logits/chosen": 0.09326171875, "logits/rejected": 0.486328125, "logps/chosen": -0.2001953125, "logps/rejected": -3.765625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0, "rewards/margins": 17.75, "rewards/rejected": -18.75, "step": 1579 }, { "epoch": 0.5884543761638734, "grad_norm": 0.0025634765625, "learning_rate": 7.798196443159478e-07, "logits/chosen": 0.051513671875, "logits/rejected": 0.2275390625, "logps/chosen": -0.66796875, "logps/rejected": -3.453125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.34375, "rewards/margins": 13.875, "rewards/rejected": -17.25, "step": 1580 }, { "epoch": 0.5888268156424581, "grad_norm": 0.032958984375, "learning_rate": 7.78659931819414e-07, "logits/chosen": -0.047119140625, "logits/rejected": 0.0556640625, "logps/chosen": -0.2119140625, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0625, "rewards/margins": 14.625, "rewards/rejected": -15.6875, "step": 1581 }, { "epoch": 0.5891992551210429, "grad_norm": 0.00408935546875, "learning_rate": 7.775004244910851e-07, "logits/chosen": 0.18359375, "logits/rejected": 0.486328125, "logps/chosen": -0.185546875, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9296875, "rewards/margins": 14.3125, "rewards/rejected": -15.25, "step": 1582 }, { "epoch": 0.5895716945996275, "grad_norm": 2.300739288330078e-05, "learning_rate": 7.763411242915177e-07, "logits/chosen": -0.0240478515625, "logits/rejected": 0.3125, "logps/chosen": -0.166015625, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.828125, "rewards/margins": 18.375, "rewards/rejected": -19.25, "step": 1583 }, { "epoch": 0.5899441340782123, "grad_norm": 0.0050048828125, "learning_rate": 7.751820331809175e-07, "logits/chosen": -0.006866455078125, "logits/rejected": 0.44140625, "logps/chosen": -0.8515625, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.25, "rewards/margins": 14.8125, "rewards/rejected": -19.0, "step": 1584 }, { "epoch": 0.590316573556797, "grad_norm": 38.0, "learning_rate": 7.740231531191375e-07, "logits/chosen": 0.049560546875, "logits/rejected": -0.306640625, "logps/chosen": -0.77734375, "logps/rejected": -2.59375, "loss": 0.0525, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.875, "rewards/margins": 9.125, "rewards/rejected": -13.0, "step": 1585 }, { "epoch": 0.5906890130353818, "grad_norm": 0.1767578125, "learning_rate": 7.728644860656727e-07, "logits/chosen": 0.072265625, "logits/rejected": -0.71484375, "logps/chosen": -0.2216796875, "logps/rejected": -2.375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 10.75, "rewards/rejected": -11.875, "step": 1586 }, { "epoch": 0.5910614525139665, "grad_norm": 0.0023651123046875, "learning_rate": 7.717060339796596e-07, "logits/chosen": -0.15234375, "logits/rejected": 0.45703125, "logps/chosen": -0.2734375, "logps/rejected": -3.234375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.375, "rewards/margins": 14.75, "rewards/rejected": -16.125, "step": 1587 }, { "epoch": 0.5914338919925513, "grad_norm": 1.9550323486328125e-05, "learning_rate": 7.705477988198697e-07, "logits/chosen": -0.083984375, "logits/rejected": 0.42578125, "logps/chosen": -0.1796875, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.89453125, "rewards/margins": 18.875, "rewards/rejected": -19.75, "step": 1588 }, { "epoch": 0.5918063314711359, "grad_norm": 0.0595703125, "learning_rate": 7.693897825447085e-07, "logits/chosen": -0.01422119140625, "logits/rejected": 0.265625, "logps/chosen": -0.30078125, "logps/rejected": -2.84375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5, "rewards/margins": 12.75, "rewards/rejected": -14.25, "step": 1589 }, { "epoch": 0.5921787709497207, "grad_norm": 2.562999725341797e-05, "learning_rate": 7.682319871122108e-07, "logits/chosen": 0.12109375, "logits/rejected": 0.404296875, "logps/chosen": -0.1650390625, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.828125, "rewards/margins": 18.5, "rewards/rejected": -19.375, "step": 1590 }, { "epoch": 0.5925512104283054, "grad_norm": 0.004150390625, "learning_rate": 7.67074414480039e-07, "logits/chosen": 0.044677734375, "logits/rejected": 0.59765625, "logps/chosen": -0.55859375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.78125, "rewards/margins": 15.5, "rewards/rejected": -18.25, "step": 1591 }, { "epoch": 0.5929236499068902, "grad_norm": 0.0008697509765625, "learning_rate": 7.659170666054773e-07, "logits/chosen": -0.09423828125, "logits/rejected": 0.08154296875, "logps/chosen": -0.61328125, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0625, "rewards/margins": 14.4375, "rewards/rejected": -17.5, "step": 1592 }, { "epoch": 0.5932960893854748, "grad_norm": 0.005645751953125, "learning_rate": 7.647599454454312e-07, "logits/chosen": 0.2412109375, "logits/rejected": 0.37890625, "logps/chosen": -0.21875, "logps/rejected": -3.171875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.09375, "rewards/margins": 14.75, "rewards/rejected": -15.875, "step": 1593 }, { "epoch": 0.5936685288640596, "grad_norm": 0.00335693359375, "learning_rate": 7.636030529564223e-07, "logits/chosen": -0.11767578125, "logits/rejected": 0.30859375, "logps/chosen": -0.65234375, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.265625, "rewards/margins": 15.5, "rewards/rejected": -18.75, "step": 1594 }, { "epoch": 0.5940409683426443, "grad_norm": 0.00046539306640625, "learning_rate": 7.624463910945855e-07, "logits/chosen": 0.1240234375, "logits/rejected": 0.41015625, "logps/chosen": -0.265625, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 15.0, "rewards/rejected": -16.375, "step": 1595 }, { "epoch": 0.5944134078212291, "grad_norm": 0.046875, "learning_rate": 7.61289961815666e-07, "logits/chosen": 0.0458984375, "logits/rejected": -0.51953125, "logps/chosen": -0.671875, "logps/rejected": -2.734375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.359375, "rewards/margins": 10.375, "rewards/rejected": -13.6875, "step": 1596 }, { "epoch": 0.5947858472998138, "grad_norm": 0.400390625, "learning_rate": 7.601337670750156e-07, "logits/chosen": 0.033203125, "logits/rejected": 0.640625, "logps/chosen": -0.18359375, "logps/rejected": -2.703125, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.91796875, "rewards/margins": 12.5625, "rewards/rejected": -13.5, "step": 1597 }, { "epoch": 0.5951582867783985, "grad_norm": 0.03759765625, "learning_rate": 7.589778088275893e-07, "logits/chosen": -0.1279296875, "logits/rejected": 0.29296875, "logps/chosen": -0.828125, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.125, "rewards/margins": 13.5625, "rewards/rejected": -17.75, "step": 1598 }, { "epoch": 0.5955307262569832, "grad_norm": 0.1689453125, "learning_rate": 7.578220890279426e-07, "logits/chosen": -0.05712890625, "logits/rejected": -0.94921875, "logps/chosen": -0.29296875, "logps/rejected": -2.25, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.46875, "rewards/margins": 9.75, "rewards/rejected": -11.1875, "step": 1599 }, { "epoch": 0.595903165735568, "grad_norm": 0.84375, "learning_rate": 7.566666096302279e-07, "logits/chosen": -0.07470703125, "logits/rejected": -0.16796875, "logps/chosen": -0.87109375, "logps/rejected": -2.34375, "loss": 0.0015, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.375, "rewards/margins": 7.375, "rewards/rejected": -11.75, "step": 1600 }, { "epoch": 0.5962756052141527, "grad_norm": 0.0011444091796875, "learning_rate": 7.555113725881904e-07, "logits/chosen": -0.1171875, "logits/rejected": 0.2314453125, "logps/chosen": -0.345703125, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7265625, "rewards/margins": 15.875, "rewards/rejected": -17.625, "step": 1601 }, { "epoch": 0.5966480446927375, "grad_norm": 0.00244140625, "learning_rate": 7.543563798551665e-07, "logits/chosen": 0.1640625, "logits/rejected": 0.462890625, "logps/chosen": -0.58984375, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.953125, "rewards/margins": 14.0, "rewards/rejected": -16.875, "step": 1602 }, { "epoch": 0.5970204841713221, "grad_norm": 0.384765625, "learning_rate": 7.532016333840784e-07, "logits/chosen": 0.1259765625, "logits/rejected": 0.6171875, "logps/chosen": -0.3203125, "logps/rejected": -1.9375, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 8.125, "rewards/rejected": -9.6875, "step": 1603 }, { "epoch": 0.5973929236499069, "grad_norm": 0.0003910064697265625, "learning_rate": 7.520471351274332e-07, "logits/chosen": -0.04736328125, "logits/rejected": 0.158203125, "logps/chosen": -0.2294921875, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.140625, "rewards/margins": 15.5, "rewards/rejected": -16.75, "step": 1604 }, { "epoch": 0.5977653631284916, "grad_norm": 4.100799560546875e-05, "learning_rate": 7.508928870373165e-07, "logits/chosen": 0.091796875, "logits/rejected": 0.31640625, "logps/chosen": -0.23828125, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1875, "rewards/margins": 17.75, "rewards/rejected": -19.0, "step": 1605 }, { "epoch": 0.5981378026070764, "grad_norm": 14.0625, "learning_rate": 7.497388910653933e-07, "logits/chosen": -0.1005859375, "logits/rejected": -0.484375, "logps/chosen": -0.703125, "logps/rejected": -2.71875, "loss": 0.019, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.53125, "rewards/margins": 10.0625, "rewards/rejected": -13.625, "step": 1606 }, { "epoch": 0.5985102420856611, "grad_norm": 13.1875, "learning_rate": 7.485851491629005e-07, "logits/chosen": 0.08740234375, "logits/rejected": 0.216796875, "logps/chosen": -1.4140625, "logps/rejected": -2.734375, "loss": 0.0137, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0625, "rewards/margins": 6.5625, "rewards/rejected": -13.625, "step": 1607 }, { "epoch": 0.5988826815642458, "grad_norm": 11.6875, "learning_rate": 7.47431663280646e-07, "logits/chosen": 0.0966796875, "logits/rejected": 0.291015625, "logps/chosen": -0.734375, "logps/rejected": -2.8125, "loss": 0.0161, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.65625, "rewards/margins": 10.375, "rewards/rejected": -14.0625, "step": 1608 }, { "epoch": 0.5992551210428305, "grad_norm": 9.202957153320312e-05, "learning_rate": 7.462784353690046e-07, "logits/chosen": 0.00421142578125, "logits/rejected": 0.328125, "logps/chosen": -0.138671875, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6953125, "rewards/margins": 16.75, "rewards/rejected": -17.375, "step": 1609 }, { "epoch": 0.5996275605214153, "grad_norm": 0.181640625, "learning_rate": 7.451254673779149e-07, "logits/chosen": 0.1201171875, "logits/rejected": 0.9609375, "logps/chosen": -0.5625, "logps/rejected": -2.890625, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8125, "rewards/margins": 11.625, "rewards/rejected": -14.4375, "step": 1610 }, { "epoch": 0.6, "grad_norm": 0.018310546875, "learning_rate": 7.439727612568766e-07, "logits/chosen": 0.12890625, "logits/rejected": 0.054443359375, "logps/chosen": -0.28515625, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4296875, "rewards/margins": 14.875, "rewards/rejected": -16.25, "step": 1611 }, { "epoch": 0.6003724394785848, "grad_norm": 4.291534423828125e-05, "learning_rate": 7.428203189549459e-07, "logits/chosen": 0.0400390625, "logits/rejected": 0.31640625, "logps/chosen": -0.2734375, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.359375, "rewards/margins": 17.75, "rewards/rejected": -19.0, "step": 1612 }, { "epoch": 0.6007448789571694, "grad_norm": 1.9073486328125e-05, "learning_rate": 7.416681424207333e-07, "logits/chosen": 0.2041015625, "logits/rejected": 0.302734375, "logps/chosen": -0.33203125, "logps/rejected": -4.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.671875, "rewards/margins": 18.875, "rewards/rejected": -20.5, "step": 1613 }, { "epoch": 0.6011173184357542, "grad_norm": 0.1884765625, "learning_rate": 7.405162336023994e-07, "logits/chosen": -0.07421875, "logits/rejected": 0.53515625, "logps/chosen": -0.328125, "logps/rejected": -2.90625, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 12.875, "rewards/rejected": -14.5, "step": 1614 }, { "epoch": 0.6014897579143389, "grad_norm": 0.2060546875, "learning_rate": 7.393645944476532e-07, "logits/chosen": 0.08740234375, "logits/rejected": -0.55078125, "logps/chosen": -0.4296875, "logps/rejected": -2.6875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 11.3125, "rewards/rejected": -13.5, "step": 1615 }, { "epoch": 0.6018621973929237, "grad_norm": 0.01385498046875, "learning_rate": 7.382132269037468e-07, "logits/chosen": 0.09130859375, "logits/rejected": -0.326171875, "logps/chosen": -0.23828125, "logps/rejected": -2.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1875, "rewards/margins": 12.6875, "rewards/rejected": -13.875, "step": 1616 }, { "epoch": 0.6022346368715084, "grad_norm": 2.671875, "learning_rate": 7.370621329174735e-07, "logits/chosen": 0.1953125, "logits/rejected": 0.8359375, "logps/chosen": -0.23046875, "logps/rejected": -2.25, "loss": 0.0043, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.15625, "rewards/margins": 10.125, "rewards/rejected": -11.25, "step": 1617 }, { "epoch": 0.6026070763500931, "grad_norm": 0.0028533935546875, "learning_rate": 7.359113144351636e-07, "logits/chosen": -0.09033203125, "logits/rejected": 0.361328125, "logps/chosen": -0.490234375, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.453125, "rewards/margins": 14.9375, "rewards/rejected": -17.375, "step": 1618 }, { "epoch": 0.6029795158286778, "grad_norm": 0.0203857421875, "learning_rate": 7.34760773402682e-07, "logits/chosen": -0.07421875, "logits/rejected": -0.1962890625, "logps/chosen": -0.2255859375, "logps/rejected": -2.890625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.125, "rewards/margins": 13.375, "rewards/rejected": -14.5, "step": 1619 }, { "epoch": 0.6033519553072626, "grad_norm": 0.040771484375, "learning_rate": 7.336105117654245e-07, "logits/chosen": -0.046875, "logits/rejected": 0.197265625, "logps/chosen": -0.2109375, "logps/rejected": -2.359375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0546875, "rewards/margins": 10.75, "rewards/rejected": -11.75, "step": 1620 }, { "epoch": 0.6037243947858473, "grad_norm": 0.00015735626220703125, "learning_rate": 7.324605314683142e-07, "logits/chosen": 0.09326171875, "logits/rejected": 0.43359375, "logps/chosen": -0.2431640625, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.21875, "rewards/margins": 17.0, "rewards/rejected": -18.25, "step": 1621 }, { "epoch": 0.6040968342644321, "grad_norm": 54.0, "learning_rate": 7.313108344557982e-07, "logits/chosen": 0.0732421875, "logits/rejected": 0.482421875, "logps/chosen": -0.421875, "logps/rejected": -2.34375, "loss": 0.1035, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.109375, "rewards/margins": 9.5625, "rewards/rejected": -11.75, "step": 1622 }, { "epoch": 0.6044692737430167, "grad_norm": 9.441375732421875e-05, "learning_rate": 7.301614226718455e-07, "logits/chosen": 0.1279296875, "logits/rejected": 0.203125, "logps/chosen": -0.126953125, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6328125, "rewards/margins": 18.375, "rewards/rejected": -19.0, "step": 1623 }, { "epoch": 0.6048417132216015, "grad_norm": 0.068359375, "learning_rate": 7.290122980599422e-07, "logits/chosen": -0.1689453125, "logits/rejected": 0.3515625, "logps/chosen": -0.8125, "logps/rejected": -3.46875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0625, "rewards/margins": 13.375, "rewards/rejected": -17.375, "step": 1624 }, { "epoch": 0.6052141527001862, "grad_norm": 0.04443359375, "learning_rate": 7.278634625630892e-07, "logits/chosen": -0.09765625, "logits/rejected": 0.224609375, "logps/chosen": -0.65625, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 14.3125, "rewards/rejected": -17.625, "step": 1625 }, { "epoch": 0.605586592178771, "grad_norm": 0.421875, "learning_rate": 7.267149181237979e-07, "logits/chosen": -0.130859375, "logits/rejected": 0.25, "logps/chosen": -0.5546875, "logps/rejected": -3.0625, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.765625, "rewards/margins": 12.625, "rewards/rejected": -15.375, "step": 1626 }, { "epoch": 0.6059590316573557, "grad_norm": 0.002166748046875, "learning_rate": 7.255666666840885e-07, "logits/chosen": -0.02978515625, "logits/rejected": 0.08203125, "logps/chosen": -0.3671875, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.828125, "rewards/margins": 15.5625, "rewards/rejected": -17.375, "step": 1627 }, { "epoch": 0.6063314711359404, "grad_norm": 0.486328125, "learning_rate": 7.244187101854845e-07, "logits/chosen": -0.0059814453125, "logits/rejected": 0.54296875, "logps/chosen": -0.55859375, "logps/rejected": -2.96875, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.796875, "rewards/margins": 12.0625, "rewards/rejected": -14.875, "step": 1628 }, { "epoch": 0.6067039106145251, "grad_norm": 1.728534698486328e-05, "learning_rate": 7.232710505690122e-07, "logits/chosen": 0.08349609375, "logits/rejected": 0.44140625, "logps/chosen": -0.205078125, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.03125, "rewards/margins": 18.75, "rewards/rejected": -19.875, "step": 1629 }, { "epoch": 0.6070763500931099, "grad_norm": 7.96875, "learning_rate": 7.22123689775195e-07, "logits/chosen": -0.166015625, "logits/rejected": 0.73828125, "logps/chosen": -0.6953125, "logps/rejected": -2.8125, "loss": 0.0075, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.46875, "rewards/margins": 10.625, "rewards/rejected": -14.0625, "step": 1630 }, { "epoch": 0.6074487895716946, "grad_norm": 214.0, "learning_rate": 7.209766297440509e-07, "logits/chosen": 0.10009765625, "logits/rejected": 0.51171875, "logps/chosen": -0.9765625, "logps/rejected": -2.84375, "loss": 0.3613, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.90625, "rewards/margins": 9.25, "rewards/rejected": -14.125, "step": 1631 }, { "epoch": 0.6078212290502794, "grad_norm": 8.296966552734375e-05, "learning_rate": 7.198298724150901e-07, "logits/chosen": 0.181640625, "logits/rejected": 0.5859375, "logps/chosen": -0.52734375, "logps/rejected": -4.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.640625, "rewards/margins": 17.375, "rewards/rejected": -20.0, "step": 1632 }, { "epoch": 0.608193668528864, "grad_norm": 5.5625, "learning_rate": 7.186834197273101e-07, "logits/chosen": 0.04248046875, "logits/rejected": 0.369140625, "logps/chosen": -0.3359375, "logps/rejected": -2.640625, "loss": 0.0088, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.671875, "rewards/margins": 11.5, "rewards/rejected": -13.25, "step": 1633 }, { "epoch": 0.6085661080074488, "grad_norm": 0.0002231597900390625, "learning_rate": 7.17537273619194e-07, "logits/chosen": 0.1044921875, "logits/rejected": 0.419921875, "logps/chosen": -0.51953125, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.59375, "rewards/margins": 17.25, "rewards/rejected": -19.875, "step": 1634 }, { "epoch": 0.6089385474860335, "grad_norm": 0.0267333984375, "learning_rate": 7.163914360287063e-07, "logits/chosen": -0.0888671875, "logits/rejected": 0.392578125, "logps/chosen": -0.322265625, "logps/rejected": -2.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 13.0, "rewards/rejected": -14.5625, "step": 1635 }, { "epoch": 0.6093109869646183, "grad_norm": 6.6875, "learning_rate": 7.152459088932899e-07, "logits/chosen": -0.142578125, "logits/rejected": 0.2890625, "logps/chosen": -1.3828125, "logps/rejected": -2.78125, "loss": 0.0066, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.90625, "rewards/margins": 6.96875, "rewards/rejected": -13.875, "step": 1636 }, { "epoch": 0.609683426443203, "grad_norm": 5.09375, "learning_rate": 7.141006941498623e-07, "logits/chosen": 0.032958984375, "logits/rejected": -0.52734375, "logps/chosen": -0.9140625, "logps/rejected": -2.265625, "loss": 0.0078, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.5625, "rewards/margins": 6.75, "rewards/rejected": -11.375, "step": 1637 }, { "epoch": 0.6100558659217877, "grad_norm": 0.046875, "learning_rate": 7.129557937348135e-07, "logits/chosen": 0.1376953125, "logits/rejected": 0.443359375, "logps/chosen": -0.73828125, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6875, "rewards/margins": 14.375, "rewards/rejected": -18.0, "step": 1638 }, { "epoch": 0.6104283054003724, "grad_norm": 8.153915405273438e-05, "learning_rate": 7.118112095840015e-07, "logits/chosen": -0.06591796875, "logits/rejected": 0.21484375, "logps/chosen": -0.166015625, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8359375, "rewards/margins": 17.125, "rewards/rejected": -18.0, "step": 1639 }, { "epoch": 0.6108007448789572, "grad_norm": 4.38690185546875e-05, "learning_rate": 7.106669436327499e-07, "logits/chosen": 0.1484375, "logits/rejected": 0.220703125, "logps/chosen": -0.1171875, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5859375, "rewards/margins": 18.0, "rewards/rejected": -18.625, "step": 1640 }, { "epoch": 0.6111731843575419, "grad_norm": 0.00026702880859375, "learning_rate": 7.095229978158436e-07, "logits/chosen": 0.15234375, "logits/rejected": 0.55078125, "logps/chosen": -0.25, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2578125, "rewards/margins": 16.75, "rewards/rejected": -18.0, "step": 1641 }, { "epoch": 0.6115456238361267, "grad_norm": 6.96875, "learning_rate": 7.083793740675273e-07, "logits/chosen": 0.0625, "logits/rejected": 0.734375, "logps/chosen": -0.50390625, "logps/rejected": -1.59375, "loss": 0.0134, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.515625, "rewards/margins": 5.4375, "rewards/rejected": -7.96875, "step": 1642 }, { "epoch": 0.6119180633147113, "grad_norm": 0.412109375, "learning_rate": 7.072360743214999e-07, "logits/chosen": 0.1162109375, "logits/rejected": -0.318359375, "logps/chosen": -0.3671875, "logps/rejected": -2.78125, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8359375, "rewards/margins": 12.0, "rewards/rejected": -13.875, "step": 1643 }, { "epoch": 0.6122905027932961, "grad_norm": 0.014892578125, "learning_rate": 7.060931005109138e-07, "logits/chosen": 0.169921875, "logits/rejected": 0.474609375, "logps/chosen": -0.453125, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 15.25, "rewards/rejected": -17.5, "step": 1644 }, { "epoch": 0.6126629422718808, "grad_norm": 1.055002212524414e-05, "learning_rate": 7.049504545683691e-07, "logits/chosen": 0.012939453125, "logits/rejected": 0.421875, "logps/chosen": -0.396484375, "logps/rejected": -4.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.984375, "rewards/margins": 19.25, "rewards/rejected": -21.25, "step": 1645 }, { "epoch": 0.6130353817504656, "grad_norm": 0.003204345703125, "learning_rate": 7.038081384259123e-07, "logits/chosen": -0.087890625, "logits/rejected": 0.291015625, "logps/chosen": -0.36328125, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8203125, "rewards/margins": 16.25, "rewards/rejected": -18.0, "step": 1646 }, { "epoch": 0.6134078212290502, "grad_norm": 0.0010833740234375, "learning_rate": 7.026661540150317e-07, "logits/chosen": -0.1435546875, "logits/rejected": 0.28125, "logps/chosen": -0.27734375, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3828125, "rewards/margins": 14.875, "rewards/rejected": -16.25, "step": 1647 }, { "epoch": 0.613780260707635, "grad_norm": 0.1611328125, "learning_rate": 7.015245032666548e-07, "logits/chosen": -0.0294189453125, "logits/rejected": -1.1171875, "logps/chosen": -0.119140625, "logps/rejected": -2.09375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.59375, "rewards/margins": 9.875, "rewards/rejected": -10.5, "step": 1648 }, { "epoch": 0.6141527001862197, "grad_norm": 0.58203125, "learning_rate": 7.003831881111455e-07, "logits/chosen": 0.083984375, "logits/rejected": -0.09912109375, "logps/chosen": -0.640625, "logps/rejected": -2.78125, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.21875, "rewards/margins": 10.75, "rewards/rejected": -13.9375, "step": 1649 }, { "epoch": 0.6145251396648045, "grad_norm": 0.0001354217529296875, "learning_rate": 6.992422104782995e-07, "logits/chosen": -0.193359375, "logits/rejected": 0.30078125, "logps/chosen": -0.63671875, "logps/rejected": -4.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1875, "rewards/margins": 18.25, "rewards/rejected": -21.375, "step": 1650 }, { "epoch": 0.6148975791433892, "grad_norm": 0.000659942626953125, "learning_rate": 6.981015722973424e-07, "logits/chosen": 0.11376953125, "logits/rejected": 0.322265625, "logps/chosen": -0.369140625, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.84375, "rewards/margins": 17.25, "rewards/rejected": -19.0, "step": 1651 }, { "epoch": 0.615270018621974, "grad_norm": 0.00022983551025390625, "learning_rate": 6.969612754969248e-07, "logits/chosen": 0.16796875, "logits/rejected": 0.5234375, "logps/chosen": -0.265625, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 16.0, "rewards/rejected": -17.25, "step": 1652 }, { "epoch": 0.6156424581005586, "grad_norm": 0.0517578125, "learning_rate": 6.958213220051219e-07, "logits/chosen": -0.07763671875, "logits/rejected": 0.359375, "logps/chosen": -0.23046875, "logps/rejected": -3.125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1484375, "rewards/margins": 14.5, "rewards/rejected": -15.625, "step": 1653 }, { "epoch": 0.6160148975791434, "grad_norm": 0.019287109375, "learning_rate": 6.946817137494268e-07, "logits/chosen": 0.013916015625, "logits/rejected": -0.1708984375, "logps/chosen": -0.3046875, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.53125, "rewards/margins": 13.9375, "rewards/rejected": -15.4375, "step": 1654 }, { "epoch": 0.6163873370577281, "grad_norm": 0.03466796875, "learning_rate": 6.935424526567498e-07, "logits/chosen": -0.056640625, "logits/rejected": -0.1865234375, "logps/chosen": -0.3046875, "logps/rejected": -3.265625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 14.8125, "rewards/rejected": -16.375, "step": 1655 }, { "epoch": 0.6167597765363129, "grad_norm": 0.09375, "learning_rate": 6.924035406534132e-07, "logits/chosen": -0.08251953125, "logits/rejected": -0.2119140625, "logps/chosen": -0.36328125, "logps/rejected": -2.5, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8125, "rewards/margins": 10.75, "rewards/rejected": -12.5, "step": 1656 }, { "epoch": 0.6171322160148975, "grad_norm": 0.00017547607421875, "learning_rate": 6.912649796651502e-07, "logits/chosen": 0.185546875, "logits/rejected": 0.349609375, "logps/chosen": -0.251953125, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2578125, "rewards/margins": 16.875, "rewards/rejected": -18.125, "step": 1657 }, { "epoch": 0.6175046554934823, "grad_norm": 0.00019741058349609375, "learning_rate": 6.901267716170996e-07, "logits/chosen": 0.2138671875, "logits/rejected": 0.54296875, "logps/chosen": -0.36328125, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8125, "rewards/margins": 16.75, "rewards/rejected": -18.5, "step": 1658 }, { "epoch": 0.617877094972067, "grad_norm": 0.002593994140625, "learning_rate": 6.889889184338038e-07, "logits/chosen": 0.1435546875, "logits/rejected": 0.408203125, "logps/chosen": -0.73828125, "logps/rejected": -3.734375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.703125, "rewards/margins": 15.0, "rewards/rejected": -18.75, "step": 1659 }, { "epoch": 0.6182495344506518, "grad_norm": 0.000507354736328125, "learning_rate": 6.878514220392054e-07, "logits/chosen": 0.1494140625, "logits/rejected": -0.10205078125, "logps/chosen": -0.1220703125, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.609375, "rewards/margins": 16.375, "rewards/rejected": -17.0, "step": 1660 }, { "epoch": 0.6186219739292365, "grad_norm": 0.34765625, "learning_rate": 6.867142843566431e-07, "logits/chosen": -0.119140625, "logits/rejected": 0.703125, "logps/chosen": -0.2412109375, "logps/rejected": -2.75, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.203125, "rewards/margins": 12.5, "rewards/rejected": -13.75, "step": 1661 }, { "epoch": 0.6189944134078212, "grad_norm": 0.003875732421875, "learning_rate": 6.855775073088495e-07, "logits/chosen": 0.0216064453125, "logits/rejected": 0.1796875, "logps/chosen": -0.6796875, "logps/rejected": -4.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.40625, "rewards/margins": 16.5, "rewards/rejected": -20.0, "step": 1662 }, { "epoch": 0.6193668528864059, "grad_norm": 0.000152587890625, "learning_rate": 6.844410928179478e-07, "logits/chosen": 0.04638671875, "logits/rejected": 0.25, "logps/chosen": -0.41796875, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.09375, "rewards/margins": 16.875, "rewards/rejected": -19.0, "step": 1663 }, { "epoch": 0.6197392923649907, "grad_norm": 0.00012111663818359375, "learning_rate": 6.833050428054472e-07, "logits/chosen": 0.0074462890625, "logits/rejected": 0.314453125, "logps/chosen": -0.2236328125, "logps/rejected": -3.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1171875, "rewards/margins": 17.75, "rewards/rejected": -19.0, "step": 1664 }, { "epoch": 0.6201117318435754, "grad_norm": 8.1875, "learning_rate": 6.821693591922415e-07, "logits/chosen": 0.1611328125, "logits/rejected": 0.400390625, "logps/chosen": -0.4140625, "logps/rejected": -2.65625, "loss": 0.0098, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 11.1875, "rewards/rejected": -13.25, "step": 1665 }, { "epoch": 0.6204841713221602, "grad_norm": 3.1948089599609375e-05, "learning_rate": 6.810340438986045e-07, "logits/chosen": 0.0033111572265625, "logits/rejected": 0.349609375, "logps/chosen": -0.263671875, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3203125, "rewards/margins": 19.0, "rewards/rejected": -20.25, "step": 1666 }, { "epoch": 0.6208566108007448, "grad_norm": 0.369140625, "learning_rate": 6.798990988441871e-07, "logits/chosen": -0.0115966796875, "logits/rejected": -0.07861328125, "logps/chosen": -0.220703125, "logps/rejected": -2.609375, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015625, "rewards/margins": 11.9375, "rewards/rejected": -13.0, "step": 1667 }, { "epoch": 0.6212290502793296, "grad_norm": 0.333984375, "learning_rate": 6.78764525948015e-07, "logits/chosen": -0.130859375, "logits/rejected": -0.09716796875, "logps/chosen": -0.494140625, "logps/rejected": -2.875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 11.875, "rewards/rejected": -14.3125, "step": 1668 }, { "epoch": 0.6216014897579143, "grad_norm": 0.455078125, "learning_rate": 6.776303271284836e-07, "logits/chosen": 0.17578125, "logits/rejected": 0.875, "logps/chosen": -0.349609375, "logps/rejected": -3.15625, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.75, "rewards/margins": 14.0625, "rewards/rejected": -15.8125, "step": 1669 }, { "epoch": 0.6219739292364991, "grad_norm": 0.00860595703125, "learning_rate": 6.764965043033563e-07, "logits/chosen": 0.2392578125, "logits/rejected": 0.8984375, "logps/chosen": -0.5546875, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.765625, "rewards/margins": 13.0625, "rewards/rejected": -15.875, "step": 1670 }, { "epoch": 0.6223463687150838, "grad_norm": 0.00677490234375, "learning_rate": 6.753630593897603e-07, "logits/chosen": 0.028076171875, "logits/rejected": 0.388671875, "logps/chosen": -0.69921875, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5, "rewards/margins": 12.625, "rewards/rejected": -16.125, "step": 1671 }, { "epoch": 0.6227188081936685, "grad_norm": 0.000518798828125, "learning_rate": 6.742299943041853e-07, "logits/chosen": 0.197265625, "logits/rejected": 0.287109375, "logps/chosen": -0.09375, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.46875, "rewards/margins": 15.0, "rewards/rejected": -15.4375, "step": 1672 }, { "epoch": 0.6230912476722532, "grad_norm": 294.0, "learning_rate": 6.730973109624768e-07, "logits/chosen": 0.0167236328125, "logits/rejected": -0.2392578125, "logps/chosen": -1.09375, "logps/rejected": -2.53125, "loss": 1.3359, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -5.46875, "rewards/margins": 7.1875, "rewards/rejected": -12.625, "step": 1673 }, { "epoch": 0.623463687150838, "grad_norm": 0.00177001953125, "learning_rate": 6.719650112798361e-07, "logits/chosen": 0.283203125, "logits/rejected": 0.318359375, "logps/chosen": -0.2236328125, "logps/rejected": -2.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1171875, "rewards/margins": 13.5625, "rewards/rejected": -14.6875, "step": 1674 }, { "epoch": 0.6238361266294227, "grad_norm": 9.012222290039062e-05, "learning_rate": 6.708330971708152e-07, "logits/chosen": 0.2470703125, "logits/rejected": 0.5078125, "logps/chosen": -0.1025390625, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.51171875, "rewards/margins": 18.25, "rewards/rejected": -18.75, "step": 1675 }, { "epoch": 0.6242085661080075, "grad_norm": 7.625, "learning_rate": 6.697015705493144e-07, "logits/chosen": -0.0296630859375, "logits/rejected": 0.369140625, "logps/chosen": -0.279296875, "logps/rejected": -2.265625, "loss": 0.0168, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.390625, "rewards/margins": 10.0, "rewards/rejected": -11.375, "step": 1676 }, { "epoch": 0.6245810055865921, "grad_norm": 218.0, "learning_rate": 6.685704333285787e-07, "logits/chosen": 0.05615234375, "logits/rejected": 0.0206298828125, "logps/chosen": -0.515625, "logps/rejected": -2.171875, "loss": 0.6914, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.59375, "rewards/margins": 8.25, "rewards/rejected": -10.8125, "step": 1677 }, { "epoch": 0.6249534450651769, "grad_norm": 0.005401611328125, "learning_rate": 6.674396874211948e-07, "logits/chosen": 0.09033203125, "logits/rejected": 0.33203125, "logps/chosen": -0.68359375, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.40625, "rewards/margins": 15.3125, "rewards/rejected": -18.75, "step": 1678 }, { "epoch": 0.6253258845437616, "grad_norm": 0.015625, "learning_rate": 6.663093347390878e-07, "logits/chosen": 0.185546875, "logits/rejected": -0.1083984375, "logps/chosen": -0.474609375, "logps/rejected": -3.453125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 14.875, "rewards/rejected": -17.25, "step": 1679 }, { "epoch": 0.6256983240223464, "grad_norm": 181.0, "learning_rate": 6.651793771935172e-07, "logits/chosen": 0.044189453125, "logits/rejected": 0.458984375, "logps/chosen": -0.443359375, "logps/rejected": -2.328125, "loss": 1.1406, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -2.21875, "rewards/margins": 9.375, "rewards/rejected": -11.625, "step": 1680 }, { "epoch": 0.626070763500931, "grad_norm": 8.7738037109375e-05, "learning_rate": 6.640498166950758e-07, "logits/chosen": 0.08642578125, "logits/rejected": 0.32421875, "logps/chosen": -0.38671875, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 17.625, "rewards/rejected": -19.5, "step": 1681 }, { "epoch": 0.6264432029795158, "grad_norm": 0.0014190673828125, "learning_rate": 6.62920655153684e-07, "logits/chosen": 0.166015625, "logits/rejected": 0.345703125, "logps/chosen": -0.2109375, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0546875, "rewards/margins": 16.75, "rewards/rejected": -17.75, "step": 1682 }, { "epoch": 0.6268156424581005, "grad_norm": 0.263671875, "learning_rate": 6.617918944785876e-07, "logits/chosen": -0.07373046875, "logits/rejected": -0.047119140625, "logps/chosen": -0.5625, "logps/rejected": -3.125, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.828125, "rewards/margins": 12.8125, "rewards/rejected": -15.625, "step": 1683 }, { "epoch": 0.6271880819366853, "grad_norm": 0.00021457672119140625, "learning_rate": 6.606635365783549e-07, "logits/chosen": 0.03271484375, "logits/rejected": 0.353515625, "logps/chosen": -0.33203125, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 15.875, "rewards/rejected": -17.5, "step": 1684 }, { "epoch": 0.62756052141527, "grad_norm": 0.185546875, "learning_rate": 6.595355833608735e-07, "logits/chosen": 0.265625, "logits/rejected": -0.48046875, "logps/chosen": -0.6953125, "logps/rejected": -3.015625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.46875, "rewards/margins": 11.625, "rewards/rejected": -15.125, "step": 1685 }, { "epoch": 0.6279329608938548, "grad_norm": 0.029052734375, "learning_rate": 6.584080367333458e-07, "logits/chosen": -0.1005859375, "logits/rejected": 0.1611328125, "logps/chosen": -0.984375, "logps/rejected": -3.453125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9375, "rewards/margins": 12.3125, "rewards/rejected": -17.25, "step": 1686 }, { "epoch": 0.6283054003724394, "grad_norm": 0.0048828125, "learning_rate": 6.572808986022877e-07, "logits/chosen": 0.061279296875, "logits/rejected": 0.322265625, "logps/chosen": -0.197265625, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.984375, "rewards/margins": 15.8125, "rewards/rejected": -16.75, "step": 1687 }, { "epoch": 0.6286778398510242, "grad_norm": 0.05078125, "learning_rate": 6.561541708735237e-07, "logits/chosen": 0.109375, "logits/rejected": 0.67578125, "logps/chosen": -0.88671875, "logps/rejected": -2.953125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4375, "rewards/margins": 10.25, "rewards/rejected": -14.75, "step": 1688 }, { "epoch": 0.6290502793296089, "grad_norm": 0.0164794921875, "learning_rate": 6.55027855452185e-07, "logits/chosen": 0.045166015625, "logits/rejected": 0.154296875, "logps/chosen": -0.2275390625, "logps/rejected": -2.921875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.140625, "rewards/margins": 13.5, "rewards/rejected": -14.625, "step": 1689 }, { "epoch": 0.6294227188081937, "grad_norm": 0.00537109375, "learning_rate": 6.539019542427045e-07, "logits/chosen": -0.007659912109375, "logits/rejected": -0.341796875, "logps/chosen": -0.2119140625, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0625, "rewards/margins": 15.0625, "rewards/rejected": -16.125, "step": 1690 }, { "epoch": 0.6297951582867783, "grad_norm": 0.224609375, "learning_rate": 6.527764691488165e-07, "logits/chosen": 0.043212890625, "logits/rejected": -0.1533203125, "logps/chosen": -0.173828125, "logps/rejected": -2.1875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.87109375, "rewards/margins": 10.125, "rewards/rejected": -11.0, "step": 1691 }, { "epoch": 0.6301675977653631, "grad_norm": 0.14453125, "learning_rate": 6.516514020735501e-07, "logits/chosen": -0.1279296875, "logits/rejected": -0.412109375, "logps/chosen": -0.5703125, "logps/rejected": -2.890625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 11.625, "rewards/rejected": -14.5, "step": 1692 }, { "epoch": 0.6305400372439478, "grad_norm": 0.000499725341796875, "learning_rate": 6.505267549192285e-07, "logits/chosen": 0.019775390625, "logits/rejected": 0.150390625, "logps/chosen": -0.0986328125, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4921875, "rewards/margins": 14.875, "rewards/rejected": -15.375, "step": 1693 }, { "epoch": 0.6309124767225326, "grad_norm": 0.1376953125, "learning_rate": 6.494025295874644e-07, "logits/chosen": 0.09521484375, "logits/rejected": 0.427734375, "logps/chosen": -0.251953125, "logps/rejected": -3.015625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2578125, "rewards/margins": 13.875, "rewards/rejected": -15.125, "step": 1694 }, { "epoch": 0.6312849162011173, "grad_norm": 0.2421875, "learning_rate": 6.482787279791575e-07, "logits/chosen": 0.1796875, "logits/rejected": 0.93359375, "logps/chosen": -0.65625, "logps/rejected": -2.9375, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 11.375, "rewards/rejected": -14.625, "step": 1695 }, { "epoch": 0.631657355679702, "grad_norm": 0.0004825592041015625, "learning_rate": 6.471553519944913e-07, "logits/chosen": 0.2119140625, "logits/rejected": 0.369140625, "logps/chosen": -0.396484375, "logps/rejected": -3.703125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.984375, "rewards/margins": 16.5, "rewards/rejected": -18.5, "step": 1696 }, { "epoch": 0.6320297951582868, "grad_norm": 0.0002651214599609375, "learning_rate": 6.460324035329287e-07, "logits/chosen": 0.0927734375, "logits/rejected": 0.2099609375, "logps/chosen": -0.1376953125, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6875, "rewards/margins": 15.875, "rewards/rejected": -16.5, "step": 1697 }, { "epoch": 0.6324022346368715, "grad_norm": 6.03125, "learning_rate": 6.449098844932111e-07, "logits/chosen": 0.10986328125, "logits/rejected": 0.92578125, "logps/chosen": -0.25, "logps/rejected": -2.640625, "loss": 0.0096, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 11.9375, "rewards/rejected": -13.1875, "step": 1698 }, { "epoch": 0.6327746741154563, "grad_norm": 0.000408172607421875, "learning_rate": 6.437877967733523e-07, "logits/chosen": 0.11767578125, "logits/rejected": 0.55078125, "logps/chosen": -0.158203125, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.79296875, "rewards/margins": 18.0, "rewards/rejected": -18.75, "step": 1699 }, { "epoch": 0.633147113594041, "grad_norm": 0.0026092529296875, "learning_rate": 6.426661422706379e-07, "logits/chosen": -0.06787109375, "logits/rejected": 0.158203125, "logps/chosen": -0.734375, "logps/rejected": -3.421875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.671875, "rewards/margins": 13.4375, "rewards/rejected": -17.125, "step": 1700 }, { "epoch": 0.6335195530726258, "grad_norm": 0.01422119140625, "learning_rate": 6.415449228816209e-07, "logits/chosen": 0.033447265625, "logits/rejected": 0.5390625, "logps/chosen": -0.1796875, "logps/rejected": -2.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8984375, "rewards/margins": 13.0625, "rewards/rejected": -14.0, "step": 1701 }, { "epoch": 0.6338919925512104, "grad_norm": 0.00014591217041015625, "learning_rate": 6.404241405021182e-07, "logits/chosen": 0.1513671875, "logits/rejected": 0.490234375, "logps/chosen": -0.25, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 18.0, "rewards/rejected": -19.25, "step": 1702 }, { "epoch": 0.6342644320297952, "grad_norm": 0.00011205673217773438, "learning_rate": 6.393037970272077e-07, "logits/chosen": 0.2333984375, "logits/rejected": 0.2734375, "logps/chosen": -0.41796875, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.09375, "rewards/margins": 17.5, "rewards/rejected": -19.5, "step": 1703 }, { "epoch": 0.6346368715083799, "grad_norm": 0.00025177001953125, "learning_rate": 6.381838943512259e-07, "logits/chosen": 0.11572265625, "logits/rejected": 0.306640625, "logps/chosen": -0.279296875, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3984375, "rewards/margins": 17.625, "rewards/rejected": -19.0, "step": 1704 }, { "epoch": 0.6350093109869647, "grad_norm": 0.00131988525390625, "learning_rate": 6.370644343677629e-07, "logits/chosen": 0.080078125, "logits/rejected": 0.625, "logps/chosen": -0.5234375, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.609375, "rewards/margins": 14.75, "rewards/rejected": -17.375, "step": 1705 }, { "epoch": 0.6353817504655493, "grad_norm": 0.00011491775512695312, "learning_rate": 6.359454189696613e-07, "logits/chosen": -0.046630859375, "logits/rejected": 0.58984375, "logps/chosen": -0.267578125, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 17.625, "rewards/rejected": -19.0, "step": 1706 }, { "epoch": 0.6357541899441341, "grad_norm": 5.030632019042969e-05, "learning_rate": 6.348268500490112e-07, "logits/chosen": 0.126953125, "logits/rejected": 0.267578125, "logps/chosen": -0.359375, "logps/rejected": -3.921875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 17.75, "rewards/rejected": -19.625, "step": 1707 }, { "epoch": 0.6361266294227188, "grad_norm": 26.75, "learning_rate": 6.337087294971481e-07, "logits/chosen": -0.287109375, "logits/rejected": 0.333984375, "logps/chosen": -0.40234375, "logps/rejected": -2.1875, "loss": 0.0615, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0, "rewards/margins": 9.0, "rewards/rejected": -11.0, "step": 1708 }, { "epoch": 0.6364990689013036, "grad_norm": 3.296875, "learning_rate": 6.325910592046493e-07, "logits/chosen": 0.054443359375, "logits/rejected": -0.2451171875, "logps/chosen": -0.484375, "logps/rejected": -2.5625, "loss": 0.005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 10.4375, "rewards/rejected": -12.875, "step": 1709 }, { "epoch": 0.6368715083798883, "grad_norm": 0.000125885009765625, "learning_rate": 6.314738410613314e-07, "logits/chosen": 0.265625, "logits/rejected": 0.6953125, "logps/chosen": -0.302734375, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 17.75, "rewards/rejected": -19.25, "step": 1710 }, { "epoch": 0.637243947858473, "grad_norm": 0.0106201171875, "learning_rate": 6.303570769562453e-07, "logits/chosen": 0.0010528564453125, "logits/rejected": -0.09130859375, "logps/chosen": -0.208984375, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 14.5, "rewards/rejected": -15.5, "step": 1711 }, { "epoch": 0.6376163873370577, "grad_norm": 0.000759124755859375, "learning_rate": 6.292407687776754e-07, "logits/chosen": 0.07958984375, "logits/rejected": -0.0908203125, "logps/chosen": -0.23828125, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1875, "rewards/margins": 15.8125, "rewards/rejected": -17.0, "step": 1712 }, { "epoch": 0.6379888268156425, "grad_norm": 0.00628662109375, "learning_rate": 6.28124918413134e-07, "logits/chosen": 0.15234375, "logits/rejected": -0.00634765625, "logps/chosen": -0.2177734375, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0859375, "rewards/margins": 16.0, "rewards/rejected": -17.25, "step": 1713 }, { "epoch": 0.6383612662942272, "grad_norm": 0.0025787353515625, "learning_rate": 6.270095277493602e-07, "logits/chosen": 0.11962890625, "logits/rejected": -0.08935546875, "logps/chosen": -0.1484375, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7421875, "rewards/margins": 15.375, "rewards/rejected": -16.125, "step": 1714 }, { "epoch": 0.638733705772812, "grad_norm": 8.0108642578125e-05, "learning_rate": 6.258945986723158e-07, "logits/chosen": 0.1513671875, "logits/rejected": 0.484375, "logps/chosen": -0.1005859375, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.50390625, "rewards/margins": 17.125, "rewards/rejected": -17.625, "step": 1715 }, { "epoch": 0.6391061452513966, "grad_norm": 0.000415802001953125, "learning_rate": 6.247801330671813e-07, "logits/chosen": 0.1259765625, "logits/rejected": 0.06982421875, "logps/chosen": -0.44921875, "logps/rejected": -3.765625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 16.5, "rewards/rejected": -18.75, "step": 1716 }, { "epoch": 0.6394785847299814, "grad_norm": 6.103515625e-05, "learning_rate": 6.236661328183546e-07, "logits/chosen": 0.07568359375, "logits/rejected": 0.345703125, "logps/chosen": -0.1962890625, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9765625, "rewards/margins": 17.875, "rewards/rejected": -18.875, "step": 1717 }, { "epoch": 0.6398510242085661, "grad_norm": 0.02099609375, "learning_rate": 6.225525998094456e-07, "logits/chosen": 0.0123291015625, "logits/rejected": 0.01031494140625, "logps/chosen": -0.302734375, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5078125, "rewards/margins": 13.5625, "rewards/rejected": -15.0625, "step": 1718 }, { "epoch": 0.6402234636871509, "grad_norm": 8.106231689453125e-05, "learning_rate": 6.214395359232754e-07, "logits/chosen": 0.173828125, "logits/rejected": 0.2333984375, "logps/chosen": -0.166015625, "logps/rejected": -3.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.828125, "rewards/margins": 18.75, "rewards/rejected": -19.75, "step": 1719 }, { "epoch": 0.6405959031657356, "grad_norm": 6.29425048828125e-05, "learning_rate": 6.203269430418711e-07, "logits/chosen": 0.21875, "logits/rejected": 0.5390625, "logps/chosen": -0.3125, "logps/rejected": -3.859375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5625, "rewards/margins": 17.75, "rewards/rejected": -19.25, "step": 1720 }, { "epoch": 0.6409683426443203, "grad_norm": 0.00335693359375, "learning_rate": 6.192148230464638e-07, "logits/chosen": -0.00189971923828125, "logits/rejected": 0.28515625, "logps/chosen": -0.275390625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3828125, "rewards/margins": 16.5, "rewards/rejected": -17.875, "step": 1721 }, { "epoch": 0.641340782122905, "grad_norm": 0.000179290771484375, "learning_rate": 6.181031778174844e-07, "logits/chosen": 0.1376953125, "logits/rejected": 0.5625, "logps/chosen": -0.275390625, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.375, "rewards/margins": 16.625, "rewards/rejected": -18.0, "step": 1722 }, { "epoch": 0.6417132216014898, "grad_norm": 21.125, "learning_rate": 6.169920092345617e-07, "logits/chosen": -0.1826171875, "logits/rejected": 0.455078125, "logps/chosen": -0.74609375, "logps/rejected": -2.4375, "loss": 0.031, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.71875, "rewards/margins": 8.5, "rewards/rejected": -12.25, "step": 1723 }, { "epoch": 0.6420856610800745, "grad_norm": 3.981590270996094e-05, "learning_rate": 6.158813191765184e-07, "logits/chosen": 0.177734375, "logits/rejected": 0.3046875, "logps/chosen": -0.30078125, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5078125, "rewards/margins": 18.0, "rewards/rejected": -19.5, "step": 1724 }, { "epoch": 0.6424581005586593, "grad_norm": 0.0205078125, "learning_rate": 6.147711095213676e-07, "logits/chosen": 0.1865234375, "logits/rejected": 0.400390625, "logps/chosen": -0.515625, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5625, "rewards/margins": 16.125, "rewards/rejected": -18.75, "step": 1725 }, { "epoch": 0.6428305400372439, "grad_norm": 0.0022430419921875, "learning_rate": 6.13661382146311e-07, "logits/chosen": 0.11083984375, "logits/rejected": 0.55859375, "logps/chosen": -0.2421875, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2109375, "rewards/margins": 15.25, "rewards/rejected": -16.375, "step": 1726 }, { "epoch": 0.6432029795158287, "grad_norm": 2.1576881408691406e-05, "learning_rate": 6.125521389277339e-07, "logits/chosen": 0.1416015625, "logits/rejected": 0.2412109375, "logps/chosen": -0.08935546875, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4453125, "rewards/margins": 18.25, "rewards/rejected": -18.75, "step": 1727 }, { "epoch": 0.6435754189944134, "grad_norm": 3.337860107421875e-05, "learning_rate": 6.114433817412034e-07, "logits/chosen": -0.0196533203125, "logits/rejected": 0.447265625, "logps/chosen": -0.19921875, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9921875, "rewards/margins": 18.5, "rewards/rejected": -19.375, "step": 1728 }, { "epoch": 0.6439478584729982, "grad_norm": 6.246566772460938e-05, "learning_rate": 6.103351124614655e-07, "logits/chosen": 0.05126953125, "logits/rejected": 0.349609375, "logps/chosen": -0.267578125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 17.0, "rewards/rejected": -18.5, "step": 1729 }, { "epoch": 0.6443202979515829, "grad_norm": 0.72265625, "learning_rate": 6.092273329624397e-07, "logits/chosen": 0.00787353515625, "logits/rejected": 0.482421875, "logps/chosen": -0.8125, "logps/rejected": -3.25, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0625, "rewards/margins": 12.1875, "rewards/rejected": -16.25, "step": 1730 }, { "epoch": 0.6446927374301676, "grad_norm": 1.7578125, "learning_rate": 6.081200451172185e-07, "logits/chosen": 0.0625, "logits/rejected": 0.9140625, "logps/chosen": -0.431640625, "logps/rejected": -2.921875, "loss": 0.0026, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 12.4375, "rewards/rejected": -14.625, "step": 1731 }, { "epoch": 0.6450651769087523, "grad_norm": 0.005096435546875, "learning_rate": 6.07013250798063e-07, "logits/chosen": 0.07958984375, "logits/rejected": -0.25390625, "logps/chosen": -0.220703125, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015625, "rewards/margins": 13.25, "rewards/rejected": -14.375, "step": 1732 }, { "epoch": 0.6454376163873371, "grad_norm": 0.00421142578125, "learning_rate": 6.05906951876399e-07, "logits/chosen": -0.2001953125, "logits/rejected": 0.173828125, "logps/chosen": -0.546875, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.734375, "rewards/margins": 13.125, "rewards/rejected": -15.875, "step": 1733 }, { "epoch": 0.6458100558659218, "grad_norm": 0.0169677734375, "learning_rate": 6.048011502228157e-07, "logits/chosen": -0.1318359375, "logits/rejected": 0.19921875, "logps/chosen": -0.62109375, "logps/rejected": -3.046875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.109375, "rewards/margins": 12.125, "rewards/rejected": -15.25, "step": 1734 }, { "epoch": 0.6461824953445066, "grad_norm": 0.375, "learning_rate": 6.036958477070608e-07, "logits/chosen": 0.002685546875, "logits/rejected": -0.1806640625, "logps/chosen": -0.41015625, "logps/rejected": -3.125, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.046875, "rewards/margins": 13.5625, "rewards/rejected": -15.625, "step": 1735 }, { "epoch": 0.6465549348230912, "grad_norm": 2.4199485778808594e-05, "learning_rate": 6.025910461980384e-07, "logits/chosen": 0.0157470703125, "logits/rejected": 0.2353515625, "logps/chosen": -0.115234375, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.57421875, "rewards/margins": 18.25, "rewards/rejected": -18.75, "step": 1736 }, { "epoch": 0.646927374301676, "grad_norm": 0.00013256072998046875, "learning_rate": 6.014867475638047e-07, "logits/chosen": 0.12060546875, "logits/rejected": 0.380859375, "logps/chosen": -0.40625, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 16.375, "rewards/rejected": -18.375, "step": 1737 }, { "epoch": 0.6472998137802607, "grad_norm": 143.0, "learning_rate": 6.003829536715674e-07, "logits/chosen": 0.06884765625, "logits/rejected": -0.09228515625, "logps/chosen": -1.1875, "logps/rejected": -2.3125, "loss": 0.3926, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.9375, "rewards/margins": 5.6875, "rewards/rejected": -11.625, "step": 1738 }, { "epoch": 0.6476722532588455, "grad_norm": 0.00034332275390625, "learning_rate": 5.992796663876788e-07, "logits/chosen": -0.00872802734375, "logits/rejected": 0.220703125, "logps/chosen": -0.359375, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 17.375, "rewards/rejected": -19.25, "step": 1739 }, { "epoch": 0.6480446927374302, "grad_norm": 0.0703125, "learning_rate": 5.981768875776357e-07, "logits/chosen": 0.0322265625, "logits/rejected": -0.33984375, "logps/chosen": -0.287109375, "logps/rejected": -2.90625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4296875, "rewards/margins": 13.125, "rewards/rejected": -14.5625, "step": 1740 }, { "epoch": 0.6484171322160149, "grad_norm": 0.00015544891357421875, "learning_rate": 5.970746191060745e-07, "logits/chosen": -0.0211181640625, "logits/rejected": 0.56640625, "logps/chosen": -0.265625, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 16.75, "rewards/rejected": -18.125, "step": 1741 }, { "epoch": 0.6487895716945996, "grad_norm": 4.1875, "learning_rate": 5.959728628367693e-07, "logits/chosen": 0.1376953125, "logits/rejected": -0.423828125, "logps/chosen": -0.71484375, "logps/rejected": -2.15625, "loss": 0.0058, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5625, "rewards/margins": 7.1875, "rewards/rejected": -10.75, "step": 1742 }, { "epoch": 0.6491620111731844, "grad_norm": 0.0002498626708984375, "learning_rate": 5.948716206326277e-07, "logits/chosen": 0.040283203125, "logits/rejected": 0.37890625, "logps/chosen": -0.16796875, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8359375, "rewards/margins": 16.625, "rewards/rejected": -17.375, "step": 1743 }, { "epoch": 0.6495344506517691, "grad_norm": 0.003662109375, "learning_rate": 5.937708943556885e-07, "logits/chosen": 0.15625, "logits/rejected": -0.0177001953125, "logps/chosen": -1.1875, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.9375, "rewards/margins": 13.3125, "rewards/rejected": -19.25, "step": 1744 }, { "epoch": 0.6499068901303539, "grad_norm": 0.419921875, "learning_rate": 5.926706858671179e-07, "logits/chosen": -0.337890625, "logits/rejected": -0.80078125, "logps/chosen": -0.3984375, "logps/rejected": -3.015625, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.984375, "rewards/margins": 13.125, "rewards/rejected": -15.0625, "step": 1745 }, { "epoch": 0.6502793296089385, "grad_norm": 0.00016307830810546875, "learning_rate": 5.91570997027206e-07, "logits/chosen": 0.17578125, "logits/rejected": 0.330078125, "logps/chosen": -0.359375, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7890625, "rewards/margins": 17.625, "rewards/rejected": -19.5, "step": 1746 }, { "epoch": 0.6506517690875233, "grad_norm": 0.01361083984375, "learning_rate": 5.904718296953655e-07, "logits/chosen": 0.05029296875, "logits/rejected": -0.244140625, "logps/chosen": -0.1806640625, "logps/rejected": -3.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.90234375, "rewards/margins": 14.25, "rewards/rejected": -15.125, "step": 1747 }, { "epoch": 0.651024208566108, "grad_norm": 0.2109375, "learning_rate": 5.893731857301269e-07, "logits/chosen": 0.2099609375, "logits/rejected": 0.71484375, "logps/chosen": -1.015625, "logps/rejected": -3.21875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.09375, "rewards/margins": 10.9375, "rewards/rejected": -16.0, "step": 1748 }, { "epoch": 0.6513966480446928, "grad_norm": 0.05322265625, "learning_rate": 5.882750669891348e-07, "logits/chosen": 0.038330078125, "logits/rejected": 0.21875, "logps/chosen": -0.5234375, "logps/rejected": -2.984375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 12.3125, "rewards/rejected": -14.9375, "step": 1749 }, { "epoch": 0.6517690875232774, "grad_norm": 0.0011138916015625, "learning_rate": 5.871774753291468e-07, "logits/chosen": -0.0269775390625, "logits/rejected": 0.294921875, "logps/chosen": -0.201171875, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0078125, "rewards/margins": 15.0625, "rewards/rejected": -16.125, "step": 1750 }, { "epoch": 0.6521415270018622, "grad_norm": 0.008544921875, "learning_rate": 5.86080412606029e-07, "logits/chosen": 0.068359375, "logits/rejected": 0.49609375, "logps/chosen": -0.77734375, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.890625, "rewards/margins": 14.25, "rewards/rejected": -18.25, "step": 1751 }, { "epoch": 0.6525139664804469, "grad_norm": 0.000148773193359375, "learning_rate": 5.849838806747529e-07, "logits/chosen": 0.044921875, "logits/rejected": 0.62109375, "logps/chosen": -0.1943359375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.97265625, "rewards/margins": 17.375, "rewards/rejected": -18.25, "step": 1752 }, { "epoch": 0.6528864059590317, "grad_norm": 0.000347137451171875, "learning_rate": 5.838878813893929e-07, "logits/chosen": 0.0908203125, "logits/rejected": 0.5859375, "logps/chosen": -0.2333984375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.171875, "rewards/margins": 17.0, "rewards/rejected": -18.25, "step": 1753 }, { "epoch": 0.6532588454376164, "grad_norm": 0.0013275146484375, "learning_rate": 5.827924166031223e-07, "logits/chosen": 0.0927734375, "logits/rejected": 0.4765625, "logps/chosen": -0.28125, "logps/rejected": -3.515625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.40625, "rewards/margins": 16.25, "rewards/rejected": -17.625, "step": 1754 }, { "epoch": 0.6536312849162011, "grad_norm": 0.0185546875, "learning_rate": 5.816974881682111e-07, "logits/chosen": 0.1181640625, "logits/rejected": 0.48828125, "logps/chosen": -0.5625, "logps/rejected": -3.453125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8125, "rewards/margins": 14.5, "rewards/rejected": -17.25, "step": 1755 }, { "epoch": 0.6540037243947858, "grad_norm": 0.0220947265625, "learning_rate": 5.806030979360216e-07, "logits/chosen": -0.06396484375, "logits/rejected": 0.326171875, "logps/chosen": -1.046875, "logps/rejected": -3.390625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.21875, "rewards/margins": 11.8125, "rewards/rejected": -17.0, "step": 1756 }, { "epoch": 0.6543761638733706, "grad_norm": 0.00018787384033203125, "learning_rate": 5.795092477570077e-07, "logits/chosen": 0.083984375, "logits/rejected": 0.51953125, "logps/chosen": -0.1005859375, "logps/rejected": -3.296875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5, "rewards/margins": 16.0, "rewards/rejected": -16.5, "step": 1757 }, { "epoch": 0.6547486033519553, "grad_norm": 0.006256103515625, "learning_rate": 5.784159394807085e-07, "logits/chosen": -0.04638671875, "logits/rejected": 0.412109375, "logps/chosen": -0.09716796875, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.484375, "rewards/margins": 15.125, "rewards/rejected": -15.625, "step": 1758 }, { "epoch": 0.6551210428305401, "grad_norm": 0.0022430419921875, "learning_rate": 5.773231749557479e-07, "logits/chosen": 0.08935546875, "logits/rejected": 0.458984375, "logps/chosen": -0.69140625, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.46875, "rewards/margins": 13.625, "rewards/rejected": -17.125, "step": 1759 }, { "epoch": 0.6554934823091247, "grad_norm": 0.0004291534423828125, "learning_rate": 5.762309560298294e-07, "logits/chosen": 0.12060546875, "logits/rejected": 0.53515625, "logps/chosen": -0.13671875, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6796875, "rewards/margins": 17.5, "rewards/rejected": -18.125, "step": 1760 }, { "epoch": 0.6558659217877095, "grad_norm": 0.02880859375, "learning_rate": 5.751392845497349e-07, "logits/chosen": 0.01214599609375, "logits/rejected": 0.298828125, "logps/chosen": -0.1953125, "logps/rejected": -2.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9765625, "rewards/margins": 12.125, "rewards/rejected": -13.125, "step": 1761 }, { "epoch": 0.6562383612662942, "grad_norm": 7.05718994140625e-05, "learning_rate": 5.740481623613202e-07, "logits/chosen": 0.031005859375, "logits/rejected": 0.255859375, "logps/chosen": -0.37890625, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 17.625, "rewards/rejected": -19.5, "step": 1762 }, { "epoch": 0.656610800744879, "grad_norm": 0.000400543212890625, "learning_rate": 5.729575913095123e-07, "logits/chosen": -0.1962890625, "logits/rejected": 0.18359375, "logps/chosen": -0.1884765625, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9453125, "rewards/margins": 16.75, "rewards/rejected": -17.75, "step": 1763 }, { "epoch": 0.6569832402234637, "grad_norm": 2.46875, "learning_rate": 5.718675732383066e-07, "logits/chosen": 0.07080078125, "logits/rejected": 0.43359375, "logps/chosen": -0.263671875, "logps/rejected": -2.953125, "loss": 0.0017, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3203125, "rewards/margins": 13.5, "rewards/rejected": -14.8125, "step": 1764 }, { "epoch": 0.6573556797020484, "grad_norm": 8.940696716308594e-06, "learning_rate": 5.707781099907628e-07, "logits/chosen": 0.1669921875, "logits/rejected": 0.33984375, "logps/chosen": -0.255859375, "logps/rejected": -4.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.28125, "rewards/margins": 19.375, "rewards/rejected": -20.625, "step": 1765 }, { "epoch": 0.6577281191806331, "grad_norm": 7.34375, "learning_rate": 5.696892034090038e-07, "logits/chosen": -0.043701171875, "logits/rejected": -0.875, "logps/chosen": -0.310546875, "logps/rejected": -1.40625, "loss": 0.0212, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.546875, "rewards/margins": 5.5, "rewards/rejected": -7.03125, "step": 1766 }, { "epoch": 0.6581005586592179, "grad_norm": 0.0152587890625, "learning_rate": 5.686008553342098e-07, "logits/chosen": 0.1181640625, "logits/rejected": -0.142578125, "logps/chosen": -0.4765625, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 11.875, "rewards/rejected": -14.25, "step": 1767 }, { "epoch": 0.6584729981378026, "grad_norm": 0.205078125, "learning_rate": 5.675130676066177e-07, "logits/chosen": 0.0279541015625, "logits/rejected": -0.044921875, "logps/chosen": -0.09619140625, "logps/rejected": -2.0, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.48046875, "rewards/margins": 9.625, "rewards/rejected": -10.0625, "step": 1768 }, { "epoch": 0.6588454376163874, "grad_norm": 0.01055908203125, "learning_rate": 5.664258420655162e-07, "logits/chosen": 0.061767578125, "logits/rejected": 0.1650390625, "logps/chosen": -0.4765625, "logps/rejected": -2.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 12.125, "rewards/rejected": -14.5, "step": 1769 }, { "epoch": 0.659217877094972, "grad_norm": 0.00173187255859375, "learning_rate": 5.653391805492442e-07, "logits/chosen": -0.0228271484375, "logits/rejected": 0.40625, "logps/chosen": -0.50390625, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.515625, "rewards/margins": 15.625, "rewards/rejected": -18.25, "step": 1770 }, { "epoch": 0.6595903165735568, "grad_norm": 1.25, "learning_rate": 5.642530848951858e-07, "logits/chosen": 0.0546875, "logits/rejected": -0.3515625, "logps/chosen": -0.640625, "logps/rejected": -2.609375, "loss": 0.0014, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.203125, "rewards/margins": 9.875, "rewards/rejected": -13.0625, "step": 1771 }, { "epoch": 0.6599627560521415, "grad_norm": 1.7890625, "learning_rate": 5.631675569397696e-07, "logits/chosen": -0.0830078125, "logits/rejected": 0.146484375, "logps/chosen": -0.56640625, "logps/rejected": -2.671875, "loss": 0.0032, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.828125, "rewards/margins": 10.5, "rewards/rejected": -13.375, "step": 1772 }, { "epoch": 0.6603351955307263, "grad_norm": 2.484375, "learning_rate": 5.620825985184632e-07, "logits/chosen": 0.166015625, "logits/rejected": -0.921875, "logps/chosen": -0.66015625, "logps/rejected": -2.28125, "loss": 0.0037, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3125, "rewards/margins": 8.0625, "rewards/rejected": -11.375, "step": 1773 }, { "epoch": 0.660707635009311, "grad_norm": 0.62890625, "learning_rate": 5.60998211465772e-07, "logits/chosen": 0.0255126953125, "logits/rejected": 0.251953125, "logps/chosen": -0.8828125, "logps/rejected": -3.65625, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.40625, "rewards/margins": 13.8125, "rewards/rejected": -18.25, "step": 1774 }, { "epoch": 0.6610800744878957, "grad_norm": 0.06201171875, "learning_rate": 5.599143976152346e-07, "logits/chosen": -0.0238037109375, "logits/rejected": 0.451171875, "logps/chosen": -0.4140625, "logps/rejected": -2.78125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 11.875, "rewards/rejected": -13.9375, "step": 1775 }, { "epoch": 0.6614525139664804, "grad_norm": 0.00072479248046875, "learning_rate": 5.588311587994213e-07, "logits/chosen": -0.203125, "logits/rejected": 0.306640625, "logps/chosen": -0.58984375, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9375, "rewards/margins": 15.875, "rewards/rejected": -18.75, "step": 1776 }, { "epoch": 0.6618249534450652, "grad_norm": 2.1576881408691406e-05, "learning_rate": 5.577484968499292e-07, "logits/chosen": 0.01165771484375, "logits/rejected": 0.306640625, "logps/chosen": -0.376953125, "logps/rejected": -4.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8828125, "rewards/margins": 18.25, "rewards/rejected": -20.125, "step": 1777 }, { "epoch": 0.6621973929236499, "grad_norm": 3.4332275390625e-05, "learning_rate": 5.566664135973808e-07, "logits/chosen": 0.015869140625, "logits/rejected": 0.45703125, "logps/chosen": -0.220703125, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015625, "rewards/margins": 18.875, "rewards/rejected": -20.0, "step": 1778 }, { "epoch": 0.6625698324022347, "grad_norm": 0.00848388671875, "learning_rate": 5.555849108714193e-07, "logits/chosen": 0.09228515625, "logits/rejected": -0.296875, "logps/chosen": -0.3203125, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6015625, "rewards/margins": 14.5, "rewards/rejected": -16.125, "step": 1779 }, { "epoch": 0.6629422718808193, "grad_norm": 0.01019287109375, "learning_rate": 5.545039905007067e-07, "logits/chosen": -0.0693359375, "logits/rejected": -0.265625, "logps/chosen": -0.162109375, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046875, "rewards/margins": 16.25, "rewards/rejected": -17.0, "step": 1780 }, { "epoch": 0.6633147113594041, "grad_norm": 3.453125, "learning_rate": 5.534236543129207e-07, "logits/chosen": 0.115234375, "logits/rejected": -1.140625, "logps/chosen": -0.7109375, "logps/rejected": -1.984375, "loss": 0.0047, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5625, "rewards/margins": 6.375, "rewards/rejected": -9.875, "step": 1781 }, { "epoch": 0.6636871508379888, "grad_norm": 0.00112152099609375, "learning_rate": 5.523439041347505e-07, "logits/chosen": 0.01080322265625, "logits/rejected": 0.33203125, "logps/chosen": -0.279296875, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.390625, "rewards/margins": 14.3125, "rewards/rejected": -15.75, "step": 1782 }, { "epoch": 0.6640595903165736, "grad_norm": 9.34600830078125e-05, "learning_rate": 5.51264741791895e-07, "logits/chosen": 0.0615234375, "logits/rejected": 0.5390625, "logps/chosen": -0.314453125, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5703125, "rewards/margins": 17.0, "rewards/rejected": -18.625, "step": 1783 }, { "epoch": 0.6644320297951583, "grad_norm": 0.000568389892578125, "learning_rate": 5.501861691090586e-07, "logits/chosen": -0.0096435546875, "logits/rejected": 0.30859375, "logps/chosen": -0.1328125, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6640625, "rewards/margins": 16.5, "rewards/rejected": -17.125, "step": 1784 }, { "epoch": 0.664804469273743, "grad_norm": 0.009033203125, "learning_rate": 5.491081879099496e-07, "logits/chosen": 0.036865234375, "logits/rejected": 0.1337890625, "logps/chosen": -0.54296875, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.71875, "rewards/margins": 13.9375, "rewards/rejected": -16.75, "step": 1785 }, { "epoch": 0.6651769087523277, "grad_norm": 0.00014495849609375, "learning_rate": 5.480308000172754e-07, "logits/chosen": -0.00469970703125, "logits/rejected": 0.30859375, "logps/chosen": -0.19921875, "logps/rejected": -3.515625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.99609375, "rewards/margins": 16.5, "rewards/rejected": -17.5, "step": 1786 }, { "epoch": 0.6655493482309125, "grad_norm": 0.000438690185546875, "learning_rate": 5.469540072527404e-07, "logits/chosen": -0.1650390625, "logits/rejected": -0.17578125, "logps/chosen": -0.30078125, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5, "rewards/margins": 15.375, "rewards/rejected": -16.875, "step": 1787 }, { "epoch": 0.6659217877094972, "grad_norm": 0.01092529296875, "learning_rate": 5.458778114370427e-07, "logits/chosen": 0.056884765625, "logits/rejected": 0.50390625, "logps/chosen": -0.71875, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.59375, "rewards/margins": 12.625, "rewards/rejected": -16.25, "step": 1788 }, { "epoch": 0.666294227188082, "grad_norm": 0.00015926361083984375, "learning_rate": 5.448022143898713e-07, "logits/chosen": 0.0128173828125, "logits/rejected": 0.07275390625, "logps/chosen": -0.326171875, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6328125, "rewards/margins": 16.25, "rewards/rejected": -17.75, "step": 1789 }, { "epoch": 0.6666666666666666, "grad_norm": 9.870529174804688e-05, "learning_rate": 5.437272179299022e-07, "logits/chosen": 0.1826171875, "logits/rejected": 0.36328125, "logps/chosen": -0.70703125, "logps/rejected": -4.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.53125, "rewards/margins": 17.125, "rewards/rejected": -20.625, "step": 1790 }, { "epoch": 0.6670391061452514, "grad_norm": 0.000621795654296875, "learning_rate": 5.426528238747969e-07, "logits/chosen": 0.09375, "logits/rejected": 0.546875, "logps/chosen": -0.2265625, "logps/rejected": -3.578125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1328125, "rewards/margins": 16.75, "rewards/rejected": -18.0, "step": 1791 }, { "epoch": 0.6674115456238361, "grad_norm": 0.07421875, "learning_rate": 5.41579034041197e-07, "logits/chosen": 0.01904296875, "logits/rejected": 0.4453125, "logps/chosen": -0.46875, "logps/rejected": -3.0, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34375, "rewards/margins": 12.75, "rewards/rejected": -15.0, "step": 1792 }, { "epoch": 0.6677839851024209, "grad_norm": 0.0004138946533203125, "learning_rate": 5.405058502447236e-07, "logits/chosen": 0.1474609375, "logits/rejected": 0.318359375, "logps/chosen": -0.671875, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.359375, "rewards/margins": 15.8125, "rewards/rejected": -19.25, "step": 1793 }, { "epoch": 0.6681564245810055, "grad_norm": 0.0004634857177734375, "learning_rate": 5.39433274299972e-07, "logits/chosen": -0.130859375, "logits/rejected": 0.474609375, "logps/chosen": -0.1181640625, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.58984375, "rewards/margins": 17.625, "rewards/rejected": -18.25, "step": 1794 }, { "epoch": 0.6685288640595903, "grad_norm": 3.546476364135742e-06, "learning_rate": 5.383613080205115e-07, "logits/chosen": -0.08544921875, "logits/rejected": 0.345703125, "logps/chosen": -0.1572265625, "logps/rejected": -4.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7890625, "rewards/margins": 20.25, "rewards/rejected": -21.0, "step": 1795 }, { "epoch": 0.668901303538175, "grad_norm": 0.03173828125, "learning_rate": 5.372899532188783e-07, "logits/chosen": 0.0498046875, "logits/rejected": -0.08642578125, "logps/chosen": -0.22265625, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1171875, "rewards/margins": 15.625, "rewards/rejected": -16.75, "step": 1796 }, { "epoch": 0.6692737430167598, "grad_norm": 0.0015106201171875, "learning_rate": 5.362192117065765e-07, "logits/chosen": 0.038818359375, "logits/rejected": 0.47265625, "logps/chosen": -0.9296875, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.65625, "rewards/margins": 15.6875, "rewards/rejected": -20.25, "step": 1797 }, { "epoch": 0.6696461824953445, "grad_norm": 12.625, "learning_rate": 5.351490852940719e-07, "logits/chosen": -0.04345703125, "logits/rejected": -0.031005859375, "logps/chosen": -0.671875, "logps/rejected": -2.28125, "loss": 0.0283, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.34375, "rewards/margins": 8.0, "rewards/rejected": -11.375, "step": 1798 }, { "epoch": 0.6700186219739293, "grad_norm": 8.630752563476562e-05, "learning_rate": 5.34079575790791e-07, "logits/chosen": 0.1787109375, "logits/rejected": 0.453125, "logps/chosen": -0.32421875, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.625, "rewards/margins": 16.875, "rewards/rejected": -18.5, "step": 1799 }, { "epoch": 0.6703910614525139, "grad_norm": 0.0033416748046875, "learning_rate": 5.330106850051175e-07, "logits/chosen": 0.154296875, "logits/rejected": -0.041259765625, "logps/chosen": -0.6328125, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 12.875, "rewards/rejected": -16.0, "step": 1800 }, { "epoch": 0.6707635009310987, "grad_norm": 0.006195068359375, "learning_rate": 5.319424147443878e-07, "logits/chosen": -0.1826171875, "logits/rejected": 0.29296875, "logps/chosen": -0.5703125, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 12.875, "rewards/rejected": -15.75, "step": 1801 }, { "epoch": 0.6711359404096834, "grad_norm": 0.01141357421875, "learning_rate": 5.308747668148902e-07, "logits/chosen": 0.06982421875, "logits/rejected": 0.26953125, "logps/chosen": -0.40625, "logps/rejected": -2.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 12.625, "rewards/rejected": -14.625, "step": 1802 }, { "epoch": 0.6715083798882682, "grad_norm": 0.158203125, "learning_rate": 5.298077430218596e-07, "logits/chosen": 0.06787109375, "logits/rejected": -0.1025390625, "logps/chosen": -0.3359375, "logps/rejected": -3.0, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6796875, "rewards/margins": 13.3125, "rewards/rejected": -15.0, "step": 1803 }, { "epoch": 0.6718808193668528, "grad_norm": 1.1484375, "learning_rate": 5.287413451694774e-07, "logits/chosen": 0.01519775390625, "logits/rejected": -0.5234375, "logps/chosen": -0.80078125, "logps/rejected": -2.984375, "loss": 0.0017, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0, "rewards/margins": 10.9375, "rewards/rejected": -14.9375, "step": 1804 }, { "epoch": 0.6722532588454376, "grad_norm": 0.000720977783203125, "learning_rate": 5.276755750608647e-07, "logits/chosen": 0.16796875, "logits/rejected": 0.384765625, "logps/chosen": -0.44140625, "logps/rejected": -3.703125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 16.25, "rewards/rejected": -18.5, "step": 1805 }, { "epoch": 0.6726256983240223, "grad_norm": 0.11669921875, "learning_rate": 5.266104344980823e-07, "logits/chosen": 0.05712890625, "logits/rejected": 0.60546875, "logps/chosen": -0.43359375, "logps/rejected": -3.375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 14.75, "rewards/rejected": -16.875, "step": 1806 }, { "epoch": 0.6729981378026071, "grad_norm": 0.002716064453125, "learning_rate": 5.255459252821258e-07, "logits/chosen": 0.01336669921875, "logits/rejected": 0.5625, "logps/chosen": -0.32421875, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.625, "rewards/margins": 13.25, "rewards/rejected": -14.875, "step": 1807 }, { "epoch": 0.6733705772811918, "grad_norm": 2.046875, "learning_rate": 5.244820492129243e-07, "logits/chosen": 0.279296875, "logits/rejected": -0.390625, "logps/chosen": -0.56640625, "logps/rejected": -2.84375, "loss": 0.0036, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.828125, "rewards/margins": 11.375, "rewards/rejected": -14.1875, "step": 1808 }, { "epoch": 0.6737430167597765, "grad_norm": 0.00110626220703125, "learning_rate": 5.234188080893352e-07, "logits/chosen": 0.1337890625, "logits/rejected": 0.04736328125, "logps/chosen": -0.328125, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 15.375, "rewards/rejected": -17.0, "step": 1809 }, { "epoch": 0.6741154562383612, "grad_norm": 0.00026702880859375, "learning_rate": 5.223562037091429e-07, "logits/chosen": 0.0186767578125, "logits/rejected": 0.3046875, "logps/chosen": -0.36328125, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8203125, "rewards/margins": 17.0, "rewards/rejected": -18.75, "step": 1810 }, { "epoch": 0.674487895716946, "grad_norm": 0.0002956390380859375, "learning_rate": 5.212942378690552e-07, "logits/chosen": 0.193359375, "logits/rejected": 0.419921875, "logps/chosen": -0.48828125, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 15.5625, "rewards/rejected": -18.0, "step": 1811 }, { "epoch": 0.6748603351955307, "grad_norm": 0.044677734375, "learning_rate": 5.202329123647005e-07, "logits/chosen": 0.11279296875, "logits/rejected": 0.8046875, "logps/chosen": -0.408203125, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.046875, "rewards/margins": 14.0625, "rewards/rejected": -16.125, "step": 1812 }, { "epoch": 0.6752327746741155, "grad_norm": 0.000118255615234375, "learning_rate": 5.191722289906229e-07, "logits/chosen": 0.1025390625, "logits/rejected": 0.2255859375, "logps/chosen": -0.26953125, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 16.5, "rewards/rejected": -17.75, "step": 1813 }, { "epoch": 0.6756052141527001, "grad_norm": 5.5730342864990234e-06, "learning_rate": 5.181121895402832e-07, "logits/chosen": -0.0019989013671875, "logits/rejected": 0.158203125, "logps/chosen": -0.0771484375, "logps/rejected": -4.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.38671875, "rewards/margins": 20.5, "rewards/rejected": -21.0, "step": 1814 }, { "epoch": 0.6759776536312849, "grad_norm": 102.5, "learning_rate": 5.170527958060521e-07, "logits/chosen": -0.15234375, "logits/rejected": 0.482421875, "logps/chosen": -0.94140625, "logps/rejected": -2.78125, "loss": 0.1426, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.71875, "rewards/margins": 9.1875, "rewards/rejected": -13.875, "step": 1815 }, { "epoch": 0.6763500931098696, "grad_norm": 0.01141357421875, "learning_rate": 5.159940495792078e-07, "logits/chosen": 0.0201416015625, "logits/rejected": -0.25390625, "logps/chosen": -0.2060546875, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.03125, "rewards/margins": 14.0, "rewards/rejected": -15.0, "step": 1816 }, { "epoch": 0.6767225325884544, "grad_norm": 5.626678466796875e-05, "learning_rate": 5.149359526499346e-07, "logits/chosen": 0.1357421875, "logits/rejected": 0.296875, "logps/chosen": -0.32421875, "logps/rejected": -3.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6171875, "rewards/margins": 17.25, "rewards/rejected": -19.0, "step": 1817 }, { "epoch": 0.6770949720670391, "grad_norm": 0.027099609375, "learning_rate": 5.138785068073192e-07, "logits/chosen": 0.0439453125, "logits/rejected": -0.205078125, "logps/chosen": -0.359375, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 13.875, "rewards/rejected": -15.625, "step": 1818 }, { "epoch": 0.6774674115456238, "grad_norm": 0.02197265625, "learning_rate": 5.128217138393466e-07, "logits/chosen": -0.0458984375, "logits/rejected": 0.2412109375, "logps/chosen": -0.8671875, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.3125, "rewards/margins": 13.375, "rewards/rejected": -17.75, "step": 1819 }, { "epoch": 0.6778398510242085, "grad_norm": 0.388671875, "learning_rate": 5.117655755328987e-07, "logits/chosen": 0.017333984375, "logits/rejected": -0.69921875, "logps/chosen": -0.41796875, "logps/rejected": -2.3125, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.078125, "rewards/margins": 9.5, "rewards/rejected": -11.625, "step": 1820 }, { "epoch": 0.6782122905027933, "grad_norm": 2.046875, "learning_rate": 5.107100936737493e-07, "logits/chosen": -0.01409912109375, "logits/rejected": 0.353515625, "logps/chosen": -0.205078125, "logps/rejected": -2.84375, "loss": 0.0041, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0234375, "rewards/margins": 13.1875, "rewards/rejected": -14.25, "step": 1821 }, { "epoch": 0.678584729981378, "grad_norm": 0.006561279296875, "learning_rate": 5.096552700465629e-07, "logits/chosen": 0.28125, "logits/rejected": 0.193359375, "logps/chosen": -0.359375, "logps/rejected": -3.171875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7890625, "rewards/margins": 14.0625, "rewards/rejected": -15.875, "step": 1822 }, { "epoch": 0.6789571694599628, "grad_norm": 5.173683166503906e-05, "learning_rate": 5.086011064348919e-07, "logits/chosen": 0.042724609375, "logits/rejected": 0.2177734375, "logps/chosen": -0.1142578125, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5703125, "rewards/margins": 19.25, "rewards/rejected": -19.75, "step": 1823 }, { "epoch": 0.6793296089385474, "grad_norm": 0.0002536773681640625, "learning_rate": 5.075476046211718e-07, "logits/chosen": 0.0029296875, "logits/rejected": 0.017822265625, "logps/chosen": -0.251953125, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2578125, "rewards/margins": 15.6875, "rewards/rejected": -17.0, "step": 1824 }, { "epoch": 0.6797020484171322, "grad_norm": 0.0003337860107421875, "learning_rate": 5.064947663867185e-07, "logits/chosen": 0.17578125, "logits/rejected": 0.326171875, "logps/chosen": -0.224609375, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.125, "rewards/margins": 17.25, "rewards/rejected": -18.375, "step": 1825 }, { "epoch": 0.6800744878957169, "grad_norm": 0.021484375, "learning_rate": 5.05442593511727e-07, "logits/chosen": 0.220703125, "logits/rejected": -0.10986328125, "logps/chosen": -0.44140625, "logps/rejected": -2.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 11.625, "rewards/rejected": -13.875, "step": 1826 }, { "epoch": 0.6804469273743017, "grad_norm": 0.003814697265625, "learning_rate": 5.043910877752669e-07, "logits/chosen": -0.1162109375, "logits/rejected": 0.140625, "logps/chosen": -0.216796875, "logps/rejected": -3.234375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.078125, "rewards/margins": 15.125, "rewards/rejected": -16.25, "step": 1827 }, { "epoch": 0.6808193668528864, "grad_norm": 0.0023040771484375, "learning_rate": 5.033402509552793e-07, "logits/chosen": 0.09716796875, "logits/rejected": 0.03857421875, "logps/chosen": -0.2109375, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0546875, "rewards/margins": 15.875, "rewards/rejected": -16.875, "step": 1828 }, { "epoch": 0.6811918063314711, "grad_norm": 1.1563301086425781e-05, "learning_rate": 5.022900848285755e-07, "logits/chosen": 0.25390625, "logits/rejected": 0.447265625, "logps/chosen": -0.09033203125, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.451171875, "rewards/margins": 19.125, "rewards/rejected": -19.5, "step": 1829 }, { "epoch": 0.6815642458100558, "grad_norm": 7.534027099609375e-05, "learning_rate": 5.01240591170831e-07, "logits/chosen": 0.072265625, "logits/rejected": 0.0986328125, "logps/chosen": -0.1572265625, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.78515625, "rewards/margins": 17.875, "rewards/rejected": -18.625, "step": 1830 }, { "epoch": 0.6819366852886406, "grad_norm": 6.29425048828125e-05, "learning_rate": 5.001917717565853e-07, "logits/chosen": 0.0615234375, "logits/rejected": 0.609375, "logps/chosen": -0.1455078125, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7265625, "rewards/margins": 17.5, "rewards/rejected": -18.25, "step": 1831 }, { "epoch": 0.6823091247672253, "grad_norm": 0.015380859375, "learning_rate": 4.991436283592387e-07, "logits/chosen": 0.068359375, "logits/rejected": 0.265625, "logps/chosen": -0.69921875, "logps/rejected": -3.265625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5, "rewards/margins": 12.875, "rewards/rejected": -16.375, "step": 1832 }, { "epoch": 0.6826815642458101, "grad_norm": 1.5234375, "learning_rate": 4.980961627510464e-07, "logits/chosen": -0.1259765625, "logits/rejected": -0.984375, "logps/chosen": -0.412109375, "logps/rejected": -2.078125, "loss": 0.0023, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 8.375, "rewards/rejected": -10.375, "step": 1833 }, { "epoch": 0.6830540037243947, "grad_norm": 1.0728836059570312e-05, "learning_rate": 4.970493767031191e-07, "logits/chosen": 0.1142578125, "logits/rejected": 0.46484375, "logps/chosen": -0.248046875, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2421875, "rewards/margins": 19.0, "rewards/rejected": -20.25, "step": 1834 }, { "epoch": 0.6834264432029795, "grad_norm": 0.0001964569091796875, "learning_rate": 4.960032719854182e-07, "logits/chosen": 0.12255859375, "logits/rejected": 0.51953125, "logps/chosen": -0.55078125, "logps/rejected": -4.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 17.75, "rewards/rejected": -20.5, "step": 1835 }, { "epoch": 0.6837988826815642, "grad_norm": 0.0030670166015625, "learning_rate": 4.949578503667524e-07, "logits/chosen": 0.007354736328125, "logits/rejected": 0.478515625, "logps/chosen": -0.3203125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 15.8125, "rewards/rejected": -17.375, "step": 1836 }, { "epoch": 0.684171322160149, "grad_norm": 0.0791015625, "learning_rate": 4.939131136147766e-07, "logits/chosen": 0.1083984375, "logits/rejected": 0.2734375, "logps/chosen": -1.0859375, "logps/rejected": -3.75, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4375, "rewards/margins": 13.25, "rewards/rejected": -18.75, "step": 1837 }, { "epoch": 0.6845437616387338, "grad_norm": 0.287109375, "learning_rate": 4.928690634959862e-07, "logits/chosen": 0.2197265625, "logits/rejected": -0.38671875, "logps/chosen": -0.37890625, "logps/rejected": -2.9375, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 12.75, "rewards/rejected": -14.6875, "step": 1838 }, { "epoch": 0.6849162011173184, "grad_norm": 0.0016021728515625, "learning_rate": 4.918257017757165e-07, "logits/chosen": 0.10009765625, "logits/rejected": 0.466796875, "logps/chosen": -0.7421875, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.703125, "rewards/margins": 14.25, "rewards/rejected": -18.0, "step": 1839 }, { "epoch": 0.6852886405959032, "grad_norm": 0.000335693359375, "learning_rate": 4.907830302181389e-07, "logits/chosen": 0.076171875, "logits/rejected": 0.3203125, "logps/chosen": -0.31640625, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.578125, "rewards/margins": 16.75, "rewards/rejected": -18.25, "step": 1840 }, { "epoch": 0.6856610800744879, "grad_norm": 0.01519775390625, "learning_rate": 4.897410505862575e-07, "logits/chosen": 0.09521484375, "logits/rejected": 0.609375, "logps/chosen": -0.7890625, "logps/rejected": -3.484375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9375, "rewards/margins": 13.5, "rewards/rejected": -17.5, "step": 1841 }, { "epoch": 0.6860335195530727, "grad_norm": 0.138671875, "learning_rate": 4.886997646419067e-07, "logits/chosen": 0.06884765625, "logits/rejected": -0.396484375, "logps/chosen": -0.328125, "logps/rejected": -2.15625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 9.125, "rewards/rejected": -10.8125, "step": 1842 }, { "epoch": 0.6864059590316574, "grad_norm": 0.07861328125, "learning_rate": 4.876591741457479e-07, "logits/chosen": 0.1103515625, "logits/rejected": 0.48828125, "logps/chosen": -0.34765625, "logps/rejected": -3.015625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.734375, "rewards/margins": 13.375, "rewards/rejected": -15.125, "step": 1843 }, { "epoch": 0.6867783985102421, "grad_norm": 0.06494140625, "learning_rate": 4.866192808572663e-07, "logits/chosen": 0.00628662109375, "logits/rejected": 0.08203125, "logps/chosen": -0.75, "logps/rejected": -3.265625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.734375, "rewards/margins": 12.625, "rewards/rejected": -16.375, "step": 1844 }, { "epoch": 0.6871508379888268, "grad_norm": 166.0, "learning_rate": 4.855800865347688e-07, "logits/chosen": 0.109375, "logits/rejected": -0.60546875, "logps/chosen": -1.0546875, "logps/rejected": -2.5, "loss": 0.2773, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.25, "rewards/margins": 7.25, "rewards/rejected": -12.5, "step": 1845 }, { "epoch": 0.6875232774674116, "grad_norm": 0.0004787445068359375, "learning_rate": 4.845415929353798e-07, "logits/chosen": 0.1357421875, "logits/rejected": 0.462890625, "logps/chosen": -0.22265625, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1171875, "rewards/margins": 15.375, "rewards/rejected": -16.5, "step": 1846 }, { "epoch": 0.6878957169459963, "grad_norm": 0.00014495849609375, "learning_rate": 4.835038018150392e-07, "logits/chosen": 0.0277099609375, "logits/rejected": 0.443359375, "logps/chosen": -0.7578125, "logps/rejected": -4.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.78125, "rewards/margins": 17.25, "rewards/rejected": -21.0, "step": 1847 }, { "epoch": 0.6882681564245811, "grad_norm": 4.935264587402344e-05, "learning_rate": 4.824667149284992e-07, "logits/chosen": 0.0130615234375, "logits/rejected": 0.474609375, "logps/chosen": -0.43359375, "logps/rejected": -3.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.171875, "rewards/margins": 17.5, "rewards/rejected": -19.75, "step": 1848 }, { "epoch": 0.6886405959031657, "grad_norm": 9.6875, "learning_rate": 4.814303340293214e-07, "logits/chosen": -0.1845703125, "logits/rejected": 3.933906555175781e-05, "logps/chosen": -0.462890625, "logps/rejected": -2.21875, "loss": 0.0173, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3125, "rewards/margins": 8.75, "rewards/rejected": -11.0625, "step": 1849 }, { "epoch": 0.6890130353817505, "grad_norm": 0.0037994384765625, "learning_rate": 4.803946608698731e-07, "logits/chosen": -0.058837890625, "logits/rejected": 0.37890625, "logps/chosen": -0.51171875, "logps/rejected": -3.296875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5625, "rewards/margins": 13.9375, "rewards/rejected": -16.5, "step": 1850 }, { "epoch": 0.6893854748603352, "grad_norm": 0.0003261566162109375, "learning_rate": 4.793596972013255e-07, "logits/chosen": 0.251953125, "logits/rejected": 0.251953125, "logps/chosen": -0.30078125, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5078125, "rewards/margins": 16.75, "rewards/rejected": -18.25, "step": 1851 }, { "epoch": 0.68975791433892, "grad_norm": 62.5, "learning_rate": 4.783254447736496e-07, "logits/chosen": 0.052978515625, "logits/rejected": 0.2578125, "logps/chosen": -0.408203125, "logps/rejected": -2.328125, "loss": 0.1279, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.046875, "rewards/margins": 9.625, "rewards/rejected": -11.6875, "step": 1852 }, { "epoch": 0.6901303538175046, "grad_norm": 129.0, "learning_rate": 4.772919053356145e-07, "logits/chosen": -0.053955078125, "logits/rejected": 0.39453125, "logps/chosen": -0.640625, "logps/rejected": -2.015625, "loss": 0.168, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.203125, "rewards/margins": 6.875, "rewards/rejected": -10.125, "step": 1853 }, { "epoch": 0.6905027932960894, "grad_norm": 0.000354766845703125, "learning_rate": 4.762590806347834e-07, "logits/chosen": -0.1328125, "logits/rejected": 0.5078125, "logps/chosen": -0.326171875, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6328125, "rewards/margins": 16.0, "rewards/rejected": -17.75, "step": 1854 }, { "epoch": 0.6908752327746741, "grad_norm": 7.867813110351562e-06, "learning_rate": 4.752269724175105e-07, "logits/chosen": 0.01904296875, "logits/rejected": 0.330078125, "logps/chosen": -0.25390625, "logps/rejected": -4.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 19.25, "rewards/rejected": -20.625, "step": 1855 }, { "epoch": 0.6912476722532589, "grad_norm": 0.0673828125, "learning_rate": 4.7419558242893887e-07, "logits/chosen": 0.173828125, "logits/rejected": -0.416015625, "logps/chosen": -0.205078125, "logps/rejected": -2.78125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0234375, "rewards/margins": 12.8125, "rewards/rejected": -13.875, "step": 1856 }, { "epoch": 0.6916201117318436, "grad_norm": 0.0291748046875, "learning_rate": 4.731649124129978e-07, "logits/chosen": 0.1005859375, "logits/rejected": -0.061279296875, "logps/chosen": -0.359375, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 13.875, "rewards/rejected": -15.625, "step": 1857 }, { "epoch": 0.6919925512104284, "grad_norm": 0.0004405975341796875, "learning_rate": 4.721349641123983e-07, "logits/chosen": -0.01068115234375, "logits/rejected": 0.275390625, "logps/chosen": -0.453125, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.265625, "rewards/margins": 15.25, "rewards/rejected": -17.5, "step": 1858 }, { "epoch": 0.692364990689013, "grad_norm": 7.62939453125e-05, "learning_rate": 4.711057392686319e-07, "logits/chosen": 0.220703125, "logits/rejected": 0.40234375, "logps/chosen": -0.2373046875, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1875, "rewards/margins": 17.0, "rewards/rejected": -18.125, "step": 1859 }, { "epoch": 0.6927374301675978, "grad_norm": 0.54296875, "learning_rate": 4.7007723962196526e-07, "logits/chosen": -0.04150390625, "logits/rejected": -0.046630859375, "logps/chosen": -0.4921875, "logps/rejected": -2.0625, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.453125, "rewards/margins": 7.8125, "rewards/rejected": -10.25, "step": 1860 }, { "epoch": 0.6931098696461825, "grad_norm": 1.6689300537109375e-05, "learning_rate": 4.690494669114412e-07, "logits/chosen": 0.1806640625, "logits/rejected": 0.41796875, "logps/chosen": -0.2109375, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 18.25, "rewards/rejected": -19.375, "step": 1861 }, { "epoch": 0.6934823091247673, "grad_norm": 1.0013580322265625e-05, "learning_rate": 4.6802242287487204e-07, "logits/chosen": 0.06494140625, "logits/rejected": 0.44921875, "logps/chosen": -0.138671875, "logps/rejected": -4.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6953125, "rewards/margins": 21.625, "rewards/rejected": -22.25, "step": 1862 }, { "epoch": 0.693854748603352, "grad_norm": 0.0030517578125, "learning_rate": 4.669961092488376e-07, "logits/chosen": 0.08837890625, "logits/rejected": 0.1259765625, "logps/chosen": -0.451171875, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 15.25, "rewards/rejected": -17.5, "step": 1863 }, { "epoch": 0.6942271880819367, "grad_norm": 6.375, "learning_rate": 4.6597052776868345e-07, "logits/chosen": 0.0888671875, "logits/rejected": 0.279296875, "logps/chosen": -0.5, "logps/rejected": -2.375, "loss": 0.0096, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5, "rewards/margins": 9.375, "rewards/rejected": -11.875, "step": 1864 }, { "epoch": 0.6945996275605214, "grad_norm": 6.29425048828125e-05, "learning_rate": 4.649456801685169e-07, "logits/chosen": 0.051513671875, "logits/rejected": 0.23828125, "logps/chosen": -0.201171875, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0078125, "rewards/margins": 17.25, "rewards/rejected": -18.25, "step": 1865 }, { "epoch": 0.6949720670391062, "grad_norm": 0.016845703125, "learning_rate": 4.639215681812047e-07, "logits/chosen": -0.0703125, "logits/rejected": -0.35546875, "logps/chosen": -0.427734375, "logps/rejected": -2.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.140625, "rewards/margins": 12.375, "rewards/rejected": -14.5, "step": 1866 }, { "epoch": 0.6953445065176909, "grad_norm": 1.9453125, "learning_rate": 4.6289819353836986e-07, "logits/chosen": 0.068359375, "logits/rejected": -0.1298828125, "logps/chosen": -0.2216796875, "logps/rejected": -2.3125, "loss": 0.0028, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 10.5, "rewards/rejected": -11.625, "step": 1867 }, { "epoch": 0.6957169459962756, "grad_norm": 0.0003910064697265625, "learning_rate": 4.618755579703877e-07, "logits/chosen": 0.0250244140625, "logits/rejected": 0.32421875, "logps/chosen": -0.171875, "logps/rejected": -3.359375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.859375, "rewards/margins": 15.875, "rewards/rejected": -16.75, "step": 1868 }, { "epoch": 0.6960893854748603, "grad_norm": 0.177734375, "learning_rate": 4.6085366320638434e-07, "logits/chosen": -0.01025390625, "logits/rejected": 0.68359375, "logps/chosen": -0.2177734375, "logps/rejected": -2.609375, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.09375, "rewards/margins": 11.9375, "rewards/rejected": -13.0, "step": 1869 }, { "epoch": 0.6964618249534451, "grad_norm": 1.1324882507324219e-05, "learning_rate": 4.598325109742344e-07, "logits/chosen": 0.0267333984375, "logits/rejected": 0.61328125, "logps/chosen": -0.1240234375, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.62109375, "rewards/margins": 19.0, "rewards/rejected": -19.5, "step": 1870 }, { "epoch": 0.6968342644320298, "grad_norm": 3.890625, "learning_rate": 4.5881210300055605e-07, "logits/chosen": -0.1513671875, "logits/rejected": -0.32421875, "logps/chosen": -0.75, "logps/rejected": -2.71875, "loss": 0.005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.75, "rewards/margins": 9.875, "rewards/rejected": -13.625, "step": 1871 }, { "epoch": 0.6972067039106146, "grad_norm": 5.459785461425781e-05, "learning_rate": 4.5779244101070823e-07, "logits/chosen": 0.10009765625, "logits/rejected": 0.3984375, "logps/chosen": -0.390625, "logps/rejected": -4.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.953125, "rewards/margins": 18.25, "rewards/rejected": -20.125, "step": 1872 }, { "epoch": 0.6975791433891992, "grad_norm": 0.0057373046875, "learning_rate": 4.5677352672878975e-07, "logits/chosen": 0.177734375, "logits/rejected": -0.1748046875, "logps/chosen": -0.32421875, "logps/rejected": -2.828125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 12.5, "rewards/rejected": -14.125, "step": 1873 }, { "epoch": 0.697951582867784, "grad_norm": 5.066394805908203e-06, "learning_rate": 4.5575536187763477e-07, "logits/chosen": 0.21875, "logits/rejected": 0.5546875, "logps/chosen": -0.234375, "logps/rejected": -4.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.171875, "rewards/margins": 19.75, "rewards/rejected": -21.0, "step": 1874 }, { "epoch": 0.6983240223463687, "grad_norm": 0.0264892578125, "learning_rate": 4.547379481788107e-07, "logits/chosen": 0.109375, "logits/rejected": 0.3203125, "logps/chosen": -0.5078125, "logps/rejected": -3.078125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.53125, "rewards/margins": 12.875, "rewards/rejected": -15.375, "step": 1875 }, { "epoch": 0.6986964618249535, "grad_norm": 0.0013885498046875, "learning_rate": 4.537212873526137e-07, "logits/chosen": 0.11181640625, "logits/rejected": -0.10498046875, "logps/chosen": -0.7734375, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.875, "rewards/margins": 14.1875, "rewards/rejected": -18.0, "step": 1876 }, { "epoch": 0.6990689013035382, "grad_norm": 2.6345252990722656e-05, "learning_rate": 4.5270538111806804e-07, "logits/chosen": 0.150390625, "logits/rejected": 0.341796875, "logps/chosen": -0.29296875, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.46875, "rewards/margins": 18.0, "rewards/rejected": -19.5, "step": 1877 }, { "epoch": 0.6994413407821229, "grad_norm": 0.0001811981201171875, "learning_rate": 4.516902311929217e-07, "logits/chosen": 0.05126953125, "logits/rejected": 0.427734375, "logps/chosen": -0.216796875, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.078125, "rewards/margins": 18.25, "rewards/rejected": -19.375, "step": 1878 }, { "epoch": 0.6998137802607076, "grad_norm": 0.00040435791015625, "learning_rate": 4.506758392936438e-07, "logits/chosen": -0.095703125, "logits/rejected": 0.361328125, "logps/chosen": -0.4765625, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 16.25, "rewards/rejected": -18.625, "step": 1879 }, { "epoch": 0.7001862197392924, "grad_norm": 0.0067138671875, "learning_rate": 4.4966220713542186e-07, "logits/chosen": -0.050537109375, "logits/rejected": -0.2021484375, "logps/chosen": -0.3359375, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.671875, "rewards/margins": 14.625, "rewards/rejected": -16.25, "step": 1880 }, { "epoch": 0.7005586592178771, "grad_norm": 0.003997802734375, "learning_rate": 4.4864933643215876e-07, "logits/chosen": -0.21484375, "logits/rejected": 0.423828125, "logps/chosen": -0.58203125, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.890625, "rewards/margins": 15.5, "rewards/rejected": -18.375, "step": 1881 }, { "epoch": 0.7009310986964619, "grad_norm": 0.03564453125, "learning_rate": 4.4763722889646987e-07, "logits/chosen": 0.046875, "logits/rejected": -0.169921875, "logps/chosen": -0.5078125, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.53125, "rewards/margins": 13.6875, "rewards/rejected": -16.25, "step": 1882 }, { "epoch": 0.7013035381750465, "grad_norm": 0.0002899169921875, "learning_rate": 4.4662588623968007e-07, "logits/chosen": 0.09814453125, "logits/rejected": 0.296875, "logps/chosen": -0.310546875, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.546875, "rewards/margins": 16.0, "rewards/rejected": -17.625, "step": 1883 }, { "epoch": 0.7016759776536313, "grad_norm": 8.058547973632812e-05, "learning_rate": 4.4561531017182145e-07, "logits/chosen": 0.07177734375, "logits/rejected": 0.6328125, "logps/chosen": -0.25, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 18.0, "rewards/rejected": -19.25, "step": 1884 }, { "epoch": 0.702048417132216, "grad_norm": 0.000102996826171875, "learning_rate": 4.446055024016288e-07, "logits/chosen": -0.1171875, "logits/rejected": 0.5078125, "logps/chosen": -0.28515625, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.421875, "rewards/margins": 16.75, "rewards/rejected": -18.25, "step": 1885 }, { "epoch": 0.7024208566108008, "grad_norm": 0.08544921875, "learning_rate": 4.4359646463653876e-07, "logits/chosen": -0.123046875, "logits/rejected": 0.27734375, "logps/chosen": -0.46875, "logps/rejected": -2.671875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34375, "rewards/margins": 11.0, "rewards/rejected": -13.375, "step": 1886 }, { "epoch": 0.7027932960893855, "grad_norm": 0.00170135498046875, "learning_rate": 4.425881985826857e-07, "logits/chosen": -0.02783203125, "logits/rejected": 0.345703125, "logps/chosen": -0.3359375, "logps/rejected": -3.203125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.671875, "rewards/margins": 14.3125, "rewards/rejected": -16.0, "step": 1887 }, { "epoch": 0.7031657355679702, "grad_norm": 0.005126953125, "learning_rate": 4.415807059448993e-07, "logits/chosen": 0.166015625, "logits/rejected": 0.0625, "logps/chosen": -0.59765625, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0, "rewards/margins": 14.3125, "rewards/rejected": -17.25, "step": 1888 }, { "epoch": 0.7035381750465549, "grad_norm": 0.34765625, "learning_rate": 4.4057398842670147e-07, "logits/chosen": 0.0244140625, "logits/rejected": -0.326171875, "logps/chosen": -0.515625, "logps/rejected": -3.09375, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.59375, "rewards/margins": 12.75, "rewards/rejected": -15.375, "step": 1889 }, { "epoch": 0.7039106145251397, "grad_norm": 0.0005645751953125, "learning_rate": 4.395680477303033e-07, "logits/chosen": 0.05859375, "logits/rejected": 0.4921875, "logps/chosen": -0.42578125, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 16.0, "rewards/rejected": -18.0, "step": 1890 }, { "epoch": 0.7042830540037244, "grad_norm": 0.0003566741943359375, "learning_rate": 4.385628855566025e-07, "logits/chosen": -0.298828125, "logits/rejected": 0.330078125, "logps/chosen": -0.625, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.125, "rewards/margins": 15.25, "rewards/rejected": -18.375, "step": 1891 }, { "epoch": 0.7046554934823092, "grad_norm": 378.0, "learning_rate": 4.37558503605181e-07, "logits/chosen": -0.072265625, "logits/rejected": 0.33984375, "logps/chosen": -0.73828125, "logps/rejected": -2.28125, "loss": 0.7773, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -3.6875, "rewards/margins": 7.71875, "rewards/rejected": -11.375, "step": 1892 }, { "epoch": 0.7050279329608938, "grad_norm": 18.25, "learning_rate": 4.365549035742999e-07, "logits/chosen": 0.07763671875, "logits/rejected": -0.412109375, "logps/chosen": -0.56640625, "logps/rejected": -2.453125, "loss": 0.025, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.828125, "rewards/margins": 9.375, "rewards/rejected": -12.25, "step": 1893 }, { "epoch": 0.7054003724394786, "grad_norm": 0.001800537109375, "learning_rate": 4.3555208716089984e-07, "logits/chosen": -0.07080078125, "logits/rejected": 0.17578125, "logps/chosen": -0.6171875, "logps/rejected": -3.421875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.09375, "rewards/margins": 14.0, "rewards/rejected": -17.125, "step": 1894 }, { "epoch": 0.7057728119180633, "grad_norm": 0.04638671875, "learning_rate": 4.3455005606059576e-07, "logits/chosen": 0.007568359375, "logits/rejected": 0.78125, "logps/chosen": -0.55078125, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 13.375, "rewards/rejected": -16.125, "step": 1895 }, { "epoch": 0.7061452513966481, "grad_norm": 0.43359375, "learning_rate": 4.3354881196767483e-07, "logits/chosen": -0.0174560546875, "logits/rejected": 0.515625, "logps/chosen": -0.47265625, "logps/rejected": -2.296875, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.359375, "rewards/margins": 9.125, "rewards/rejected": -11.5, "step": 1896 }, { "epoch": 0.7065176908752328, "grad_norm": 0.0034637451171875, "learning_rate": 4.325483565750935e-07, "logits/chosen": 0.1044921875, "logits/rejected": 0.181640625, "logps/chosen": -0.4140625, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 14.875, "rewards/rejected": -17.0, "step": 1897 }, { "epoch": 0.7068901303538175, "grad_norm": 7.8125, "learning_rate": 4.3154869157447467e-07, "logits/chosen": 0.03173828125, "logits/rejected": 0.41015625, "logps/chosen": -1.140625, "logps/rejected": -3.09375, "loss": 0.0082, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.71875, "rewards/margins": 9.6875, "rewards/rejected": -15.375, "step": 1898 }, { "epoch": 0.7072625698324022, "grad_norm": 0.006134033203125, "learning_rate": 4.3054981865610487e-07, "logits/chosen": -0.08837890625, "logits/rejected": -0.2119140625, "logps/chosen": -0.111328125, "logps/rejected": -3.109375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5546875, "rewards/margins": 15.0, "rewards/rejected": -15.5, "step": 1899 }, { "epoch": 0.707635009310987, "grad_norm": 0.00011920928955078125, "learning_rate": 4.295517395089312e-07, "logits/chosen": -0.03955078125, "logits/rejected": 0.1552734375, "logps/chosen": -0.515625, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.578125, "rewards/margins": 16.5, "rewards/rejected": -19.25, "step": 1900 }, { "epoch": 0.7080074487895717, "grad_norm": 8.75, "learning_rate": 4.2855445582055896e-07, "logits/chosen": 0.060791015625, "logits/rejected": 0.61328125, "logps/chosen": -0.30859375, "logps/rejected": -2.625, "loss": 0.0075, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5390625, "rewards/margins": 11.5625, "rewards/rejected": -13.0625, "step": 1901 }, { "epoch": 0.7083798882681565, "grad_norm": 0.00070953369140625, "learning_rate": 4.275579692772476e-07, "logits/chosen": -0.08154296875, "logits/rejected": 0.228515625, "logps/chosen": -0.50390625, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.515625, "rewards/margins": 14.875, "rewards/rejected": -17.375, "step": 1902 }, { "epoch": 0.7087523277467411, "grad_norm": 7.808208465576172e-06, "learning_rate": 4.265622815639096e-07, "logits/chosen": 0.26171875, "logits/rejected": 0.328125, "logps/chosen": -0.1767578125, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8828125, "rewards/margins": 19.5, "rewards/rejected": -20.25, "step": 1903 }, { "epoch": 0.7091247672253259, "grad_norm": 38.0, "learning_rate": 4.2556739436410626e-07, "logits/chosen": 0.0263671875, "logits/rejected": 0.326171875, "logps/chosen": -0.232421875, "logps/rejected": -2.1875, "loss": 0.1104, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.15625, "rewards/margins": 9.8125, "rewards/rejected": -10.9375, "step": 1904 }, { "epoch": 0.7094972067039106, "grad_norm": 0.000896453857421875, "learning_rate": 4.2457330936004595e-07, "logits/chosen": -0.0004787445068359375, "logits/rejected": 0.294921875, "logps/chosen": -0.375, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8828125, "rewards/margins": 14.5625, "rewards/rejected": -16.5, "step": 1905 }, { "epoch": 0.7098696461824954, "grad_norm": 0.0179443359375, "learning_rate": 4.2358002823257945e-07, "logits/chosen": 0.035888671875, "logits/rejected": 0.15234375, "logps/chosen": -0.77734375, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.875, "rewards/margins": 14.0625, "rewards/rejected": -18.0, "step": 1906 }, { "epoch": 0.71024208566108, "grad_norm": 0.392578125, "learning_rate": 4.2258755266119894e-07, "logits/chosen": 0.048828125, "logits/rejected": -0.47265625, "logps/chosen": -0.3046875, "logps/rejected": -2.953125, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5234375, "rewards/margins": 13.3125, "rewards/rejected": -14.8125, "step": 1907 }, { "epoch": 0.7106145251396648, "grad_norm": 0.001129150390625, "learning_rate": 4.2159588432403554e-07, "logits/chosen": 0.3046875, "logits/rejected": 0.2177734375, "logps/chosen": -0.45703125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.28125, "rewards/margins": 15.125, "rewards/rejected": -17.5, "step": 1908 }, { "epoch": 0.7109869646182495, "grad_norm": 0.267578125, "learning_rate": 4.2060502489785424e-07, "logits/chosen": 0.12890625, "logits/rejected": 0.66015625, "logps/chosen": -0.12109375, "logps/rejected": -2.921875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.60546875, "rewards/margins": 14.0, "rewards/rejected": -14.625, "step": 1909 }, { "epoch": 0.7113594040968343, "grad_norm": 7.05718994140625e-05, "learning_rate": 4.19614976058052e-07, "logits/chosen": -0.0009613037109375, "logits/rejected": 0.4140625, "logps/chosen": -0.126953125, "logps/rejected": -3.765625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.63671875, "rewards/margins": 18.25, "rewards/rejected": -18.875, "step": 1910 }, { "epoch": 0.711731843575419, "grad_norm": 0.0030670166015625, "learning_rate": 4.186257394786562e-07, "logits/chosen": 0.08203125, "logits/rejected": -0.1484375, "logps/chosen": -0.390625, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.953125, "rewards/margins": 13.375, "rewards/rejected": -15.3125, "step": 1911 }, { "epoch": 0.7121042830540038, "grad_norm": 0.0002841949462890625, "learning_rate": 4.176373168323203e-07, "logits/chosen": 0.052490234375, "logits/rejected": 0.244140625, "logps/chosen": -0.3828125, "logps/rejected": -3.484375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.921875, "rewards/margins": 15.5, "rewards/rejected": -17.375, "step": 1912 }, { "epoch": 0.7124767225325884, "grad_norm": 0.017578125, "learning_rate": 4.166497097903215e-07, "logits/chosen": 0.037841796875, "logits/rejected": -0.1484375, "logps/chosen": -0.61328125, "logps/rejected": -3.328125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.078125, "rewards/margins": 13.5625, "rewards/rejected": -16.625, "step": 1913 }, { "epoch": 0.7128491620111732, "grad_norm": 0.404296875, "learning_rate": 4.156629200225584e-07, "logits/chosen": -0.1708984375, "logits/rejected": -0.26171875, "logps/chosen": -0.376953125, "logps/rejected": -2.765625, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 11.9375, "rewards/rejected": -13.875, "step": 1914 }, { "epoch": 0.7132216014897579, "grad_norm": 0.05322265625, "learning_rate": 4.146769491975467e-07, "logits/chosen": -0.07666015625, "logits/rejected": 1.5234375, "logps/chosen": -0.3046875, "logps/rejected": -3.46875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.53125, "rewards/margins": 15.8125, "rewards/rejected": -17.25, "step": 1915 }, { "epoch": 0.7135940409683427, "grad_norm": 0.000202178955078125, "learning_rate": 4.1369179898241815e-07, "logits/chosen": 0.16015625, "logits/rejected": 0.1884765625, "logps/chosen": -0.419921875, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.09375, "rewards/margins": 16.25, "rewards/rejected": -18.25, "step": 1916 }, { "epoch": 0.7139664804469273, "grad_norm": 0.000720977783203125, "learning_rate": 4.1270747104291755e-07, "logits/chosen": 0.04736328125, "logits/rejected": -0.01904296875, "logps/chosen": -0.28515625, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.421875, "rewards/margins": 15.0, "rewards/rejected": -16.5, "step": 1917 }, { "epoch": 0.7143389199255121, "grad_norm": 7.915496826171875e-05, "learning_rate": 4.1172396704339776e-07, "logits/chosen": 0.08642578125, "logits/rejected": 0.51953125, "logps/chosen": -0.2109375, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 17.5, "rewards/rejected": -18.5, "step": 1918 }, { "epoch": 0.7147113594040968, "grad_norm": 0.0712890625, "learning_rate": 4.107412886468198e-07, "logits/chosen": -0.11083984375, "logits/rejected": 0.2099609375, "logps/chosen": -0.91796875, "logps/rejected": -3.65625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.59375, "rewards/margins": 13.75, "rewards/rejected": -18.25, "step": 1919 }, { "epoch": 0.7150837988826816, "grad_norm": 0.482421875, "learning_rate": 4.097594375147483e-07, "logits/chosen": 0.01239013671875, "logits/rejected": -0.259765625, "logps/chosen": -0.486328125, "logps/rejected": -2.78125, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 11.4375, "rewards/rejected": -13.875, "step": 1920 }, { "epoch": 0.7154562383612663, "grad_norm": 0.1767578125, "learning_rate": 4.087784153073488e-07, "logits/chosen": 0.05859375, "logits/rejected": 0.171875, "logps/chosen": -1.1640625, "logps/rejected": -3.0, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.8125, "rewards/margins": 9.125, "rewards/rejected": -15.0, "step": 1921 }, { "epoch": 0.715828677839851, "grad_norm": 0.0096435546875, "learning_rate": 4.077982236833862e-07, "logits/chosen": -0.0216064453125, "logits/rejected": 0.130859375, "logps/chosen": -0.4453125, "logps/rejected": -2.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 11.75, "rewards/rejected": -14.0, "step": 1922 }, { "epoch": 0.7162011173184357, "grad_norm": 0.04443359375, "learning_rate": 4.0681886430021944e-07, "logits/chosen": 0.00787353515625, "logits/rejected": 0.25390625, "logps/chosen": -0.4921875, "logps/rejected": -2.71875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.453125, "rewards/margins": 11.125, "rewards/rejected": -13.5625, "step": 1923 }, { "epoch": 0.7165735567970205, "grad_norm": 0.67578125, "learning_rate": 4.0584033881380167e-07, "logits/chosen": 0.038818359375, "logits/rejected": 0.640625, "logps/chosen": -0.2431640625, "logps/rejected": -3.046875, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2109375, "rewards/margins": 14.0, "rewards/rejected": -15.25, "step": 1924 }, { "epoch": 0.7169459962756052, "grad_norm": 0.000705718994140625, "learning_rate": 4.048626488786753e-07, "logits/chosen": 0.07421875, "logits/rejected": 0.349609375, "logps/chosen": -0.259765625, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.296875, "rewards/margins": 16.5, "rewards/rejected": -17.75, "step": 1925 }, { "epoch": 0.71731843575419, "grad_norm": 0.000415802001953125, "learning_rate": 4.0388579614797046e-07, "logits/chosen": -0.0216064453125, "logits/rejected": 0.375, "logps/chosen": -0.400390625, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.015625, "rewards/margins": 16.625, "rewards/rejected": -18.75, "step": 1926 }, { "epoch": 0.7176908752327746, "grad_norm": 0.0001983642578125, "learning_rate": 4.029097822734013e-07, "logits/chosen": 0.1630859375, "logits/rejected": 0.43359375, "logps/chosen": -0.296875, "logps/rejected": -3.984375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 18.375, "rewards/rejected": -19.875, "step": 1927 }, { "epoch": 0.7180633147113594, "grad_norm": 0.71484375, "learning_rate": 4.0193460890526364e-07, "logits/chosen": -0.003936767578125, "logits/rejected": 0.267578125, "logps/chosen": -0.75, "logps/rejected": -2.5, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.734375, "rewards/margins": 8.8125, "rewards/rejected": -12.5, "step": 1928 }, { "epoch": 0.7184357541899441, "grad_norm": 0.00019550323486328125, "learning_rate": 4.009602776924323e-07, "logits/chosen": 0.150390625, "logits/rejected": 0.263671875, "logps/chosen": -0.244140625, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.21875, "rewards/margins": 16.75, "rewards/rejected": -18.0, "step": 1929 }, { "epoch": 0.7188081936685289, "grad_norm": 0.0003681182861328125, "learning_rate": 3.9998679028235825e-07, "logits/chosen": 0.0206298828125, "logits/rejected": 0.51171875, "logps/chosen": -0.25, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 16.75, "rewards/rejected": -18.0, "step": 1930 }, { "epoch": 0.7191806331471136, "grad_norm": 0.0024566650390625, "learning_rate": 3.990141483210648e-07, "logits/chosen": 0.1953125, "logits/rejected": 0.41796875, "logps/chosen": -0.1689453125, "logps/rejected": -3.171875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.84375, "rewards/margins": 15.0, "rewards/rejected": -15.875, "step": 1931 }, { "epoch": 0.7195530726256983, "grad_norm": 0.005523681640625, "learning_rate": 3.9804235345314695e-07, "logits/chosen": 0.130859375, "logits/rejected": -0.27734375, "logps/chosen": -0.2138671875, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0703125, "rewards/margins": 15.9375, "rewards/rejected": -17.0, "step": 1932 }, { "epoch": 0.719925512104283, "grad_norm": 0.00048065185546875, "learning_rate": 3.9707140732176666e-07, "logits/chosen": 0.0546875, "logits/rejected": 0.3046875, "logps/chosen": -0.34375, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.71875, "rewards/margins": 15.3125, "rewards/rejected": -17.0, "step": 1933 }, { "epoch": 0.7202979515828678, "grad_norm": 0.0255126953125, "learning_rate": 3.9610131156865117e-07, "logits/chosen": -0.04296875, "logits/rejected": -0.150390625, "logps/chosen": -0.1787109375, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.89453125, "rewards/margins": 12.875, "rewards/rejected": -13.75, "step": 1934 }, { "epoch": 0.7206703910614525, "grad_norm": 0.60546875, "learning_rate": 3.951320678340896e-07, "logits/chosen": -0.049560546875, "logits/rejected": 0.8671875, "logps/chosen": -0.359375, "logps/rejected": -3.0625, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 13.5625, "rewards/rejected": -15.375, "step": 1935 }, { "epoch": 0.7210428305400373, "grad_norm": 0.0026397705078125, "learning_rate": 3.941636777569305e-07, "logits/chosen": -0.1259765625, "logits/rejected": 0.408203125, "logps/chosen": -0.291015625, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4609375, "rewards/margins": 15.375, "rewards/rejected": -16.875, "step": 1936 }, { "epoch": 0.7214152700186219, "grad_norm": 0.0029449462890625, "learning_rate": 3.9319614297457904e-07, "logits/chosen": 0.12353515625, "logits/rejected": 0.1494140625, "logps/chosen": -0.361328125, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8046875, "rewards/margins": 13.8125, "rewards/rejected": -15.625, "step": 1937 }, { "epoch": 0.7217877094972067, "grad_norm": 0.11083984375, "learning_rate": 3.9222946512299414e-07, "logits/chosen": -0.036376953125, "logits/rejected": 0.33203125, "logps/chosen": -0.2158203125, "logps/rejected": -2.78125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.078125, "rewards/margins": 12.875, "rewards/rejected": -14.0, "step": 1938 }, { "epoch": 0.7221601489757914, "grad_norm": 1.895427703857422e-05, "learning_rate": 3.912636458366863e-07, "logits/chosen": 0.033447265625, "logits/rejected": 0.26953125, "logps/chosen": -0.1513671875, "logps/rejected": -4.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.75390625, "rewards/margins": 20.25, "rewards/rejected": -21.0, "step": 1939 }, { "epoch": 0.7225325884543762, "grad_norm": 0.00096893310546875, "learning_rate": 3.9029868674871327e-07, "logits/chosen": 0.0966796875, "logits/rejected": 0.345703125, "logps/chosen": -0.4140625, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 14.75, "rewards/rejected": -16.75, "step": 1940 }, { "epoch": 0.7229050279329609, "grad_norm": 5.4375, "learning_rate": 3.893345894906789e-07, "logits/chosen": 0.087890625, "logits/rejected": -0.89453125, "logps/chosen": -0.59375, "logps/rejected": -2.0625, "loss": 0.0092, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.96875, "rewards/margins": 7.34375, "rewards/rejected": -10.3125, "step": 1941 }, { "epoch": 0.7232774674115456, "grad_norm": 0.000484466552734375, "learning_rate": 3.883713556927302e-07, "logits/chosen": 0.08203125, "logits/rejected": 0.384765625, "logps/chosen": -0.267578125, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3359375, "rewards/margins": 15.875, "rewards/rejected": -17.25, "step": 1942 }, { "epoch": 0.7236499068901303, "grad_norm": 6.818771362304688e-05, "learning_rate": 3.8740898698355356e-07, "logits/chosen": 0.111328125, "logits/rejected": 0.38671875, "logps/chosen": -0.3046875, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5234375, "rewards/margins": 17.25, "rewards/rejected": -18.75, "step": 1943 }, { "epoch": 0.7240223463687151, "grad_norm": 0.002197265625, "learning_rate": 3.8644748499037337e-07, "logits/chosen": 0.0615234375, "logits/rejected": -0.1787109375, "logps/chosen": -0.16796875, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.83984375, "rewards/margins": 16.0, "rewards/rejected": -16.875, "step": 1944 }, { "epoch": 0.7243947858472998, "grad_norm": 0.000560760498046875, "learning_rate": 3.85486851338947e-07, "logits/chosen": 0.1005859375, "logits/rejected": 0.4140625, "logps/chosen": -0.57421875, "logps/rejected": -3.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 16.875, "rewards/rejected": -19.75, "step": 1945 }, { "epoch": 0.7247672253258846, "grad_norm": 0.0004596710205078125, "learning_rate": 3.8452708765356574e-07, "logits/chosen": 0.0283203125, "logits/rejected": 0.466796875, "logps/chosen": -0.267578125, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 15.125, "rewards/rejected": -16.5, "step": 1946 }, { "epoch": 0.7251396648044692, "grad_norm": 0.64453125, "learning_rate": 3.835681955570486e-07, "logits/chosen": 0.0064697265625, "logits/rejected": 0.330078125, "logps/chosen": -0.330078125, "logps/rejected": -2.25, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 9.625, "rewards/rejected": -11.25, "step": 1947 }, { "epoch": 0.725512104283054, "grad_norm": 0.0019683837890625, "learning_rate": 3.8261017667074047e-07, "logits/chosen": -0.03271484375, "logits/rejected": 0.19140625, "logps/chosen": -0.33203125, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 13.625, "rewards/rejected": -15.3125, "step": 1948 }, { "epoch": 0.7258845437616387, "grad_norm": 4.887580871582031e-05, "learning_rate": 3.816530326145108e-07, "logits/chosen": 0.0177001953125, "logits/rejected": 0.421875, "logps/chosen": -0.42578125, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 18.125, "rewards/rejected": -20.25, "step": 1949 }, { "epoch": 0.7262569832402235, "grad_norm": 1.6808509826660156e-05, "learning_rate": 3.8069676500674924e-07, "logits/chosen": 0.10009765625, "logits/rejected": 0.2197265625, "logps/chosen": -0.205078125, "logps/rejected": -3.921875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0234375, "rewards/margins": 18.5, "rewards/rejected": -19.5, "step": 1950 }, { "epoch": 0.7266294227188081, "grad_norm": 0.000530242919921875, "learning_rate": 3.7974137546436363e-07, "logits/chosen": 0.1298828125, "logits/rejected": 0.3515625, "logps/chosen": -0.390625, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.953125, "rewards/margins": 17.25, "rewards/rejected": -19.25, "step": 1951 }, { "epoch": 0.7270018621973929, "grad_norm": 3.5625, "learning_rate": 3.7878686560277763e-07, "logits/chosen": -0.134765625, "logits/rejected": 0.3359375, "logps/chosen": -0.48828125, "logps/rejected": -2.46875, "loss": 0.0064, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 9.9375, "rewards/rejected": -12.375, "step": 1952 }, { "epoch": 0.7273743016759776, "grad_norm": 3.21875, "learning_rate": 3.778332370359261e-07, "logits/chosen": 0.16796875, "logits/rejected": -0.76171875, "logps/chosen": -0.45703125, "logps/rejected": -2.671875, "loss": 0.0052, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.28125, "rewards/margins": 11.0625, "rewards/rejected": -13.375, "step": 1953 }, { "epoch": 0.7277467411545624, "grad_norm": 0.0693359375, "learning_rate": 3.7688049137625495e-07, "logits/chosen": -0.07666015625, "logits/rejected": -0.2294921875, "logps/chosen": -0.326171875, "logps/rejected": -2.828125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.625, "rewards/margins": 12.5, "rewards/rejected": -14.125, "step": 1954 }, { "epoch": 0.7281191806331471, "grad_norm": 0.259765625, "learning_rate": 3.7592863023471734e-07, "logits/chosen": -0.07275390625, "logits/rejected": -0.1591796875, "logps/chosen": -0.51953125, "logps/rejected": -2.234375, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.59375, "rewards/margins": 8.625, "rewards/rejected": -11.1875, "step": 1955 }, { "epoch": 0.7284916201117319, "grad_norm": 2.8252601623535156e-05, "learning_rate": 3.7497765522077057e-07, "logits/chosen": -0.027099609375, "logits/rejected": 0.3515625, "logps/chosen": -0.185546875, "logps/rejected": -3.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.92578125, "rewards/margins": 18.0, "rewards/rejected": -19.0, "step": 1956 }, { "epoch": 0.7288640595903165, "grad_norm": 0.00016498565673828125, "learning_rate": 3.740275679423727e-07, "logits/chosen": 0.00543212890625, "logits/rejected": 0.40234375, "logps/chosen": -0.5546875, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.78125, "rewards/margins": 16.25, "rewards/rejected": -19.0, "step": 1957 }, { "epoch": 0.7292364990689013, "grad_norm": 1.4960765838623047e-05, "learning_rate": 3.730783700059821e-07, "logits/chosen": 0.15625, "logits/rejected": 0.462890625, "logps/chosen": -0.134765625, "logps/rejected": -3.921875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.671875, "rewards/margins": 19.0, "rewards/rejected": -19.625, "step": 1958 }, { "epoch": 0.729608938547486, "grad_norm": 0.06103515625, "learning_rate": 3.721300630165526e-07, "logits/chosen": 0.06298828125, "logits/rejected": -0.32421875, "logps/chosen": -0.39453125, "logps/rejected": -2.875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.984375, "rewards/margins": 12.375, "rewards/rejected": -14.375, "step": 1959 }, { "epoch": 0.7299813780260708, "grad_norm": 0.08447265625, "learning_rate": 3.711826485775323e-07, "logits/chosen": 0.0634765625, "logits/rejected": -0.51171875, "logps/chosen": -0.1796875, "logps/rejected": -2.4375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8984375, "rewards/margins": 11.25, "rewards/rejected": -12.1875, "step": 1960 }, { "epoch": 0.7303538175046554, "grad_norm": 5.9604644775390625e-05, "learning_rate": 3.7023612829085893e-07, "logits/chosen": 0.267578125, "logits/rejected": 0.59375, "logps/chosen": -0.2021484375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.015625, "rewards/margins": 17.25, "rewards/rejected": -18.25, "step": 1961 }, { "epoch": 0.7307262569832402, "grad_norm": 0.169921875, "learning_rate": 3.692905037569595e-07, "logits/chosen": -0.08251953125, "logits/rejected": -0.15234375, "logps/chosen": -0.25390625, "logps/rejected": -2.40625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2734375, "rewards/margins": 10.6875, "rewards/rejected": -12.0, "step": 1962 }, { "epoch": 0.7310986964618249, "grad_norm": 2.9206275939941406e-05, "learning_rate": 3.683457765747457e-07, "logits/chosen": 0.259765625, "logits/rejected": 0.4140625, "logps/chosen": -0.5234375, "logps/rejected": -4.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 18.0, "rewards/rejected": -20.625, "step": 1963 }, { "epoch": 0.7314711359404097, "grad_norm": 5.6875, "learning_rate": 3.6740194834161256e-07, "logits/chosen": 0.03662109375, "logits/rejected": 0.5078125, "logps/chosen": -0.482421875, "logps/rejected": -2.6875, "loss": 0.0075, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.40625, "rewards/margins": 11.0, "rewards/rejected": -13.375, "step": 1964 }, { "epoch": 0.7318435754189944, "grad_norm": 0.00133514404296875, "learning_rate": 3.664590206534345e-07, "logits/chosen": 0.012939453125, "logits/rejected": 0.279296875, "logps/chosen": -0.5703125, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 15.875, "rewards/rejected": -18.75, "step": 1965 }, { "epoch": 0.7322160148975791, "grad_norm": 0.0017547607421875, "learning_rate": 3.6551699510456374e-07, "logits/chosen": 0.054443359375, "logits/rejected": 0.361328125, "logps/chosen": -0.3828125, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.90625, "rewards/margins": 14.5, "rewards/rejected": -16.5, "step": 1966 }, { "epoch": 0.7325884543761638, "grad_norm": 1.5234375, "learning_rate": 3.6457587328782673e-07, "logits/chosen": 0.0849609375, "logits/rejected": -0.267578125, "logps/chosen": -0.31640625, "logps/rejected": -2.921875, "loss": 0.0025, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.578125, "rewards/margins": 13.0, "rewards/rejected": -14.625, "step": 1967 }, { "epoch": 0.7329608938547486, "grad_norm": 0.06494140625, "learning_rate": 3.636356567945222e-07, "logits/chosen": 0.05224609375, "logits/rejected": 0.65234375, "logps/chosen": -0.62109375, "logps/rejected": -3.109375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.09375, "rewards/margins": 12.4375, "rewards/rejected": -15.5, "step": 1968 }, { "epoch": 0.7333333333333333, "grad_norm": 0.0018157958984375, "learning_rate": 3.62696347214418e-07, "logits/chosen": -0.1484375, "logits/rejected": 0.43359375, "logps/chosen": -0.44921875, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 14.3125, "rewards/rejected": -16.5, "step": 1969 }, { "epoch": 0.7337057728119181, "grad_norm": 0.0230712890625, "learning_rate": 3.61757946135748e-07, "logits/chosen": -0.019775390625, "logits/rejected": -0.091796875, "logps/chosen": -0.2890625, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.453125, "rewards/margins": 14.5, "rewards/rejected": -15.9375, "step": 1970 }, { "epoch": 0.7340782122905027, "grad_norm": 0.041259765625, "learning_rate": 3.608204551452107e-07, "logits/chosen": 0.060546875, "logits/rejected": -0.359375, "logps/chosen": -0.296875, "logps/rejected": -2.875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 12.9375, "rewards/rejected": -14.4375, "step": 1971 }, { "epoch": 0.7344506517690875, "grad_norm": 0.0078125, "learning_rate": 3.598838758279653e-07, "logits/chosen": 0.06982421875, "logits/rejected": 0.0556640625, "logps/chosen": -0.625, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.125, "rewards/margins": 12.875, "rewards/rejected": -16.0, "step": 1972 }, { "epoch": 0.7348230912476722, "grad_norm": 0.0196533203125, "learning_rate": 3.589482097676298e-07, "logits/chosen": -0.06396484375, "logits/rejected": 0.5234375, "logps/chosen": -0.5703125, "logps/rejected": -3.078125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 12.5, "rewards/rejected": -15.375, "step": 1973 }, { "epoch": 0.735195530726257, "grad_norm": 0.6875, "learning_rate": 3.580134585462777e-07, "logits/chosen": 0.1826171875, "logits/rejected": -0.212890625, "logps/chosen": -0.34765625, "logps/rejected": -2.703125, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.734375, "rewards/margins": 11.75, "rewards/rejected": -13.5, "step": 1974 }, { "epoch": 0.7355679702048417, "grad_norm": 0.0036468505859375, "learning_rate": 3.5707962374443595e-07, "logits/chosen": 0.0262451171875, "logits/rejected": -0.08203125, "logps/chosen": -0.1240234375, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.62109375, "rewards/margins": 16.375, "rewards/rejected": -17.0, "step": 1975 }, { "epoch": 0.7359404096834264, "grad_norm": 0.046142578125, "learning_rate": 3.561467069410818e-07, "logits/chosen": -0.06591796875, "logits/rejected": -0.341796875, "logps/chosen": -0.193359375, "logps/rejected": -2.75, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.96875, "rewards/margins": 12.75, "rewards/rejected": -13.75, "step": 1976 }, { "epoch": 0.7363128491620111, "grad_norm": 0.00160980224609375, "learning_rate": 3.5521470971364066e-07, "logits/chosen": 0.0712890625, "logits/rejected": 0.322265625, "logps/chosen": -0.45703125, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.28125, "rewards/margins": 15.5, "rewards/rejected": -17.75, "step": 1977 }, { "epoch": 0.7366852886405959, "grad_norm": 0.01806640625, "learning_rate": 3.54283633637982e-07, "logits/chosen": -0.04345703125, "logits/rejected": -0.28125, "logps/chosen": -0.353515625, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.765625, "rewards/margins": 14.5, "rewards/rejected": -16.25, "step": 1978 }, { "epoch": 0.7370577281191807, "grad_norm": 0.0093994140625, "learning_rate": 3.5335348028841884e-07, "logits/chosen": -0.06396484375, "logits/rejected": -0.267578125, "logps/chosen": -0.1591796875, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.796875, "rewards/margins": 15.0, "rewards/rejected": -15.8125, "step": 1979 }, { "epoch": 0.7374301675977654, "grad_norm": 0.006927490234375, "learning_rate": 3.524242512377038e-07, "logits/chosen": -0.010009765625, "logits/rejected": 0.1357421875, "logps/chosen": -0.46875, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34375, "rewards/margins": 13.0, "rewards/rejected": -15.375, "step": 1980 }, { "epoch": 0.7378026070763501, "grad_norm": 0.220703125, "learning_rate": 3.5149594805702634e-07, "logits/chosen": -0.21875, "logits/rejected": -0.49609375, "logps/chosen": -0.625, "logps/rejected": -2.53125, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.140625, "rewards/margins": 9.5625, "rewards/rejected": -12.6875, "step": 1981 }, { "epoch": 0.7381750465549348, "grad_norm": 0.00104522705078125, "learning_rate": 3.5056857231601063e-07, "logits/chosen": -0.1826171875, "logits/rejected": 0.12451171875, "logps/chosen": -0.23828125, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1875, "rewards/margins": 14.875, "rewards/rejected": -16.0, "step": 1982 }, { "epoch": 0.7385474860335196, "grad_norm": 0.01171875, "learning_rate": 3.4964212558271264e-07, "logits/chosen": 0.06298828125, "logits/rejected": -0.201171875, "logps/chosen": -0.0966796875, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.482421875, "rewards/margins": 16.5, "rewards/rejected": -17.0, "step": 1983 }, { "epoch": 0.7389199255121043, "grad_norm": 0.04150390625, "learning_rate": 3.487166094236175e-07, "logits/chosen": 0.02587890625, "logits/rejected": 0.58984375, "logps/chosen": -0.39453125, "logps/rejected": -2.53125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.96875, "rewards/margins": 10.75, "rewards/rejected": -12.6875, "step": 1984 }, { "epoch": 0.7392923649906891, "grad_norm": 0.00107574462890625, "learning_rate": 3.477920254036369e-07, "logits/chosen": 0.1328125, "logits/rejected": 0.357421875, "logps/chosen": -0.203125, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.015625, "rewards/margins": 17.0, "rewards/rejected": -18.0, "step": 1985 }, { "epoch": 0.7396648044692737, "grad_norm": 2.453125, "learning_rate": 3.468683750861066e-07, "logits/chosen": 0.1181640625, "logits/rejected": 0.71484375, "logps/chosen": -0.17578125, "logps/rejected": -2.359375, "loss": 0.0064, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.87890625, "rewards/margins": 10.875, "rewards/rejected": -11.75, "step": 1986 }, { "epoch": 0.7400372439478585, "grad_norm": 0.036376953125, "learning_rate": 3.459456600327829e-07, "logits/chosen": 0.09423828125, "logits/rejected": -0.41796875, "logps/chosen": -0.296875, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 14.0, "rewards/rejected": -15.5, "step": 1987 }, { "epoch": 0.7404096834264432, "grad_norm": 14.0, "learning_rate": 3.4502388180384157e-07, "logits/chosen": -0.365234375, "logits/rejected": -0.30859375, "logps/chosen": -0.4453125, "logps/rejected": -2.25, "loss": 0.0236, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 9.0, "rewards/rejected": -11.25, "step": 1988 }, { "epoch": 0.740782122905028, "grad_norm": 0.0003490447998046875, "learning_rate": 3.4410304195787386e-07, "logits/chosen": 0.07861328125, "logits/rejected": 0.40625, "logps/chosen": -0.625, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.109375, "rewards/margins": 16.375, "rewards/rejected": -19.5, "step": 1989 }, { "epoch": 0.7411545623836127, "grad_norm": 0.0034332275390625, "learning_rate": 3.431831420518849e-07, "logits/chosen": 0.023193359375, "logits/rejected": 0.140625, "logps/chosen": -0.447265625, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.234375, "rewards/margins": 13.375, "rewards/rejected": -15.625, "step": 1990 }, { "epoch": 0.7415270018621974, "grad_norm": 0.02197265625, "learning_rate": 3.422641836412895e-07, "logits/chosen": 0.255859375, "logits/rejected": 0.2265625, "logps/chosen": -0.7421875, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.703125, "rewards/margins": 11.25, "rewards/rejected": -15.0, "step": 1991 }, { "epoch": 0.7418994413407821, "grad_norm": 0.0029754638671875, "learning_rate": 3.4134616827991103e-07, "logits/chosen": 0.008544921875, "logits/rejected": 0.275390625, "logps/chosen": -0.3515625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7578125, "rewards/margins": 16.0, "rewards/rejected": -17.75, "step": 1992 }, { "epoch": 0.7422718808193669, "grad_norm": 4.267692565917969e-05, "learning_rate": 3.4042909751997907e-07, "logits/chosen": 0.09521484375, "logits/rejected": 0.333984375, "logps/chosen": -0.30859375, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.546875, "rewards/margins": 18.375, "rewards/rejected": -19.875, "step": 1993 }, { "epoch": 0.7426443202979516, "grad_norm": 14.25, "learning_rate": 3.3951297291212545e-07, "logits/chosen": -0.1103515625, "logits/rejected": 0.043701171875, "logps/chosen": -0.671875, "logps/rejected": -2.359375, "loss": 0.0366, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.359375, "rewards/margins": 8.4375, "rewards/rejected": -11.75, "step": 1994 }, { "epoch": 0.7430167597765364, "grad_norm": 0.01214599609375, "learning_rate": 3.385977960053814e-07, "logits/chosen": -0.03857421875, "logits/rejected": 0.337890625, "logps/chosen": -0.228515625, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.140625, "rewards/margins": 13.8125, "rewards/rejected": -15.0, "step": 1995 }, { "epoch": 0.743389199255121, "grad_norm": 0.03857421875, "learning_rate": 3.376835683471768e-07, "logits/chosen": 0.12060546875, "logits/rejected": 0.62890625, "logps/chosen": -0.1552734375, "logps/rejected": -3.015625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.77734375, "rewards/margins": 14.25, "rewards/rejected": -15.0625, "step": 1996 }, { "epoch": 0.7437616387337058, "grad_norm": 0.03955078125, "learning_rate": 3.367702914833362e-07, "logits/chosen": 0.177734375, "logits/rejected": 0.2412109375, "logps/chosen": -0.63671875, "logps/rejected": -3.265625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1875, "rewards/margins": 13.125, "rewards/rejected": -16.375, "step": 1997 }, { "epoch": 0.7441340782122905, "grad_norm": 22.125, "learning_rate": 3.3585796695807624e-07, "logits/chosen": 0.2314453125, "logits/rejected": 0.232421875, "logps/chosen": -0.296875, "logps/rejected": -2.359375, "loss": 0.0688, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 10.3125, "rewards/rejected": -11.75, "step": 1998 }, { "epoch": 0.7445065176908753, "grad_norm": 0.007476806640625, "learning_rate": 3.349465963140038e-07, "logits/chosen": 0.10595703125, "logits/rejected": 0.0693359375, "logps/chosen": -0.412109375, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 13.0, "rewards/rejected": -15.0625, "step": 1999 }, { "epoch": 0.74487895716946, "grad_norm": 4.26173210144043e-06, "learning_rate": 3.340361810921122e-07, "logits/chosen": 0.1025390625, "logits/rejected": 0.42578125, "logps/chosen": -0.2578125, "logps/rejected": -4.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2890625, "rewards/margins": 20.25, "rewards/rejected": -21.625, "step": 2000 }, { "epoch": 0.7452513966480447, "grad_norm": 0.000804901123046875, "learning_rate": 3.3312672283177946e-07, "logits/chosen": 0.0155029296875, "logits/rejected": 0.341796875, "logps/chosen": -0.28125, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.40625, "rewards/margins": 15.1875, "rewards/rejected": -16.625, "step": 2001 }, { "epoch": 0.7456238361266294, "grad_norm": 6.84375, "learning_rate": 3.322182230707666e-07, "logits/chosen": -0.1640625, "logits/rejected": 0.4296875, "logps/chosen": -1.2890625, "logps/rejected": -3.40625, "loss": 0.0064, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.46875, "rewards/margins": 10.5, "rewards/rejected": -17.0, "step": 2002 }, { "epoch": 0.7459962756052142, "grad_norm": 14.5625, "learning_rate": 3.313106833452122e-07, "logits/chosen": -0.0277099609375, "logits/rejected": -0.1611328125, "logps/chosen": -0.65625, "logps/rejected": -2.9375, "loss": 0.0228, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 11.4375, "rewards/rejected": -14.6875, "step": 2003 }, { "epoch": 0.7463687150837989, "grad_norm": 2.3603439331054688e-05, "learning_rate": 3.3040410518963285e-07, "logits/chosen": 0.138671875, "logits/rejected": 0.578125, "logps/chosen": -0.09326171875, "logps/rejected": -3.734375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.46484375, "rewards/margins": 18.25, "rewards/rejected": -18.625, "step": 2004 }, { "epoch": 0.7467411545623837, "grad_norm": 7.772445678710938e-05, "learning_rate": 3.2949849013691865e-07, "logits/chosen": 0.201171875, "logits/rejected": 0.55078125, "logps/chosen": -0.333984375, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.671875, "rewards/margins": 17.0, "rewards/rejected": -18.75, "step": 2005 }, { "epoch": 0.7471135940409683, "grad_norm": 0.154296875, "learning_rate": 3.2859383971833154e-07, "logits/chosen": 0.1328125, "logits/rejected": -0.296875, "logps/chosen": -0.224609375, "logps/rejected": -2.84375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.125, "rewards/margins": 13.0, "rewards/rejected": -14.1875, "step": 2006 }, { "epoch": 0.7474860335195531, "grad_norm": 5.8650970458984375e-05, "learning_rate": 3.2769015546350254e-07, "logits/chosen": 0.1552734375, "logits/rejected": 0.1484375, "logps/chosen": -0.322265625, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 18.25, "rewards/rejected": -19.75, "step": 2007 }, { "epoch": 0.7478584729981378, "grad_norm": 0.1455078125, "learning_rate": 3.2678743890042826e-07, "logits/chosen": 0.0927734375, "logits/rejected": 0.84765625, "logps/chosen": -0.138671875, "logps/rejected": -2.421875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6953125, "rewards/margins": 11.375, "rewards/rejected": -12.125, "step": 2008 }, { "epoch": 0.7482309124767226, "grad_norm": 0.00054168701171875, "learning_rate": 3.258856915554698e-07, "logits/chosen": -0.130859375, "logits/rejected": 0.314453125, "logps/chosen": -0.1787109375, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.890625, "rewards/margins": 15.125, "rewards/rejected": -16.0, "step": 2009 }, { "epoch": 0.7486033519553073, "grad_norm": 0.0004138946533203125, "learning_rate": 3.249849149533494e-07, "logits/chosen": 0.055908203125, "logits/rejected": 0.53515625, "logps/chosen": -0.443359375, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 15.25, "rewards/rejected": -17.5, "step": 2010 }, { "epoch": 0.748975791433892, "grad_norm": 0.000858306884765625, "learning_rate": 3.2408511061714766e-07, "logits/chosen": -0.0279541015625, "logits/rejected": 0.2314453125, "logps/chosen": -0.53125, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.65625, "rewards/margins": 15.3125, "rewards/rejected": -18.0, "step": 2011 }, { "epoch": 0.7493482309124767, "grad_norm": 0.024169921875, "learning_rate": 3.231862800683015e-07, "logits/chosen": 0.25390625, "logits/rejected": 0.330078125, "logps/chosen": -0.703125, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5, "rewards/margins": 13.375, "rewards/rejected": -17.0, "step": 2012 }, { "epoch": 0.7497206703910615, "grad_norm": 0.00083160400390625, "learning_rate": 3.2228842482660093e-07, "logits/chosen": -0.09814453125, "logits/rejected": 0.41796875, "logps/chosen": -0.162109375, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046875, "rewards/margins": 15.25, "rewards/rejected": -16.0, "step": 2013 }, { "epoch": 0.7500931098696462, "grad_norm": 0.000324249267578125, "learning_rate": 3.213915464101873e-07, "logits/chosen": 0.146484375, "logits/rejected": 0.263671875, "logps/chosen": -0.65625, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 15.5, "rewards/rejected": -18.75, "step": 2014 }, { "epoch": 0.750465549348231, "grad_norm": 3.981590270996094e-05, "learning_rate": 3.2049564633554985e-07, "logits/chosen": -0.0211181640625, "logits/rejected": 0.3828125, "logps/chosen": -0.314453125, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5703125, "rewards/margins": 18.0, "rewards/rejected": -19.5, "step": 2015 }, { "epoch": 0.7508379888268156, "grad_norm": 0.0028228759765625, "learning_rate": 3.1960072611752425e-07, "logits/chosen": 0.03369140625, "logits/rejected": 0.326171875, "logps/chosen": -0.310546875, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5546875, "rewards/margins": 17.75, "rewards/rejected": -19.25, "step": 2016 }, { "epoch": 0.7512104283054004, "grad_norm": 0.00022792816162109375, "learning_rate": 3.187067872692884e-07, "logits/chosen": -0.0057373046875, "logits/rejected": 0.478515625, "logps/chosen": -0.41796875, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.09375, "rewards/margins": 16.75, "rewards/rejected": -19.0, "step": 2017 }, { "epoch": 0.7515828677839851, "grad_norm": 3.296875, "learning_rate": 3.178138313023616e-07, "logits/chosen": 0.169921875, "logits/rejected": 0.462890625, "logps/chosen": -0.1494140625, "logps/rejected": -2.390625, "loss": 0.0067, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.74609375, "rewards/margins": 11.1875, "rewards/rejected": -11.9375, "step": 2018 }, { "epoch": 0.7519553072625699, "grad_norm": 0.002197265625, "learning_rate": 3.16921859726601e-07, "logits/chosen": 0.138671875, "logits/rejected": 0.2216796875, "logps/chosen": -0.1826171875, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9140625, "rewards/margins": 16.25, "rewards/rejected": -17.125, "step": 2019 }, { "epoch": 0.7523277467411545, "grad_norm": 0.00020694732666015625, "learning_rate": 3.1603087405019925e-07, "logits/chosen": 0.154296875, "logits/rejected": 0.38671875, "logps/chosen": -0.265625, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 16.25, "rewards/rejected": -17.5, "step": 2020 }, { "epoch": 0.7527001862197393, "grad_norm": 0.70703125, "learning_rate": 3.1514087577968223e-07, "logits/chosen": 0.03466796875, "logits/rejected": -0.2060546875, "logps/chosen": -0.29296875, "logps/rejected": -2.5, "loss": 0.0014, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.453125, "rewards/margins": 11.0625, "rewards/rejected": -12.5, "step": 2021 }, { "epoch": 0.753072625698324, "grad_norm": 7.82012939453125e-05, "learning_rate": 3.142518664199061e-07, "logits/chosen": -0.04345703125, "logits/rejected": 0.33203125, "logps/chosen": -0.423828125, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 17.125, "rewards/rejected": -19.25, "step": 2022 }, { "epoch": 0.7534450651769088, "grad_norm": 68.0, "learning_rate": 3.133638474740548e-07, "logits/chosen": 0.04296875, "logits/rejected": 0.03662109375, "logps/chosen": -0.21484375, "logps/rejected": -2.03125, "loss": 0.2344, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0703125, "rewards/margins": 9.125, "rewards/rejected": -10.1875, "step": 2023 }, { "epoch": 0.7538175046554935, "grad_norm": 0.00180816650390625, "learning_rate": 3.1247682044363796e-07, "logits/chosen": 0.17578125, "logits/rejected": 0.8203125, "logps/chosen": -0.166015625, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.828125, "rewards/margins": 16.5, "rewards/rejected": -17.375, "step": 2024 }, { "epoch": 0.7541899441340782, "grad_norm": 1.0546875, "learning_rate": 3.1159078682848726e-07, "logits/chosen": -0.01251220703125, "logits/rejected": -0.466796875, "logps/chosen": -0.4296875, "logps/rejected": -2.78125, "loss": 0.0017, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 11.75, "rewards/rejected": -13.875, "step": 2025 }, { "epoch": 0.7545623836126629, "grad_norm": 0.01019287109375, "learning_rate": 3.1070574812675566e-07, "logits/chosen": 0.1572265625, "logits/rejected": -0.0771484375, "logps/chosen": -0.15625, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.78515625, "rewards/margins": 15.1875, "rewards/rejected": -16.0, "step": 2026 }, { "epoch": 0.7549348230912477, "grad_norm": 0.181640625, "learning_rate": 3.098217058349132e-07, "logits/chosen": -0.036376953125, "logits/rejected": 0.71875, "logps/chosen": -0.44140625, "logps/rejected": -3.078125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.203125, "rewards/margins": 13.125, "rewards/rejected": -15.375, "step": 2027 }, { "epoch": 0.7553072625698324, "grad_norm": 0.0086669921875, "learning_rate": 3.089386614477454e-07, "logits/chosen": -0.1416015625, "logits/rejected": 0.46875, "logps/chosen": -0.1044921875, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5234375, "rewards/margins": 14.5, "rewards/rejected": -15.0, "step": 2028 }, { "epoch": 0.7556797020484172, "grad_norm": 0.1962890625, "learning_rate": 3.080566164583508e-07, "logits/chosen": 0.007781982421875, "logits/rejected": -0.71875, "logps/chosen": -0.298828125, "logps/rejected": -2.609375, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4921875, "rewards/margins": 11.625, "rewards/rejected": -13.125, "step": 2029 }, { "epoch": 0.7560521415270018, "grad_norm": 0.00124359130859375, "learning_rate": 3.0717557235813676e-07, "logits/chosen": 0.06689453125, "logits/rejected": 0.21875, "logps/chosen": -0.7421875, "logps/rejected": -3.890625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.703125, "rewards/margins": 15.75, "rewards/rejected": -19.5, "step": 2030 }, { "epoch": 0.7564245810055866, "grad_norm": 0.000225067138671875, "learning_rate": 3.062955306368201e-07, "logits/chosen": 0.0458984375, "logits/rejected": 0.427734375, "logps/chosen": -0.3515625, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7578125, "rewards/margins": 16.625, "rewards/rejected": -18.375, "step": 2031 }, { "epoch": 0.7567970204841713, "grad_norm": 0.044189453125, "learning_rate": 3.05416492782422e-07, "logits/chosen": 0.09130859375, "logits/rejected": 0.578125, "logps/chosen": -1.3671875, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.8125, "rewards/margins": 11.5625, "rewards/rejected": -18.375, "step": 2032 }, { "epoch": 0.7571694599627561, "grad_norm": 0.10546875, "learning_rate": 3.0453846028126536e-07, "logits/chosen": -0.193359375, "logits/rejected": 0.3828125, "logps/chosen": -0.875, "logps/rejected": -3.28125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.375, "rewards/margins": 12.0625, "rewards/rejected": -16.5, "step": 2033 }, { "epoch": 0.7575418994413408, "grad_norm": 4.9591064453125e-05, "learning_rate": 3.036614346179745e-07, "logits/chosen": 0.09814453125, "logits/rejected": 0.484375, "logps/chosen": -0.328125, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 17.5, "rewards/rejected": -19.25, "step": 2034 }, { "epoch": 0.7579143389199255, "grad_norm": 0.006011962890625, "learning_rate": 3.0278541727547086e-07, "logits/chosen": 0.2353515625, "logits/rejected": 0.2412109375, "logps/chosen": -0.85546875, "logps/rejected": -3.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.28125, "rewards/margins": 14.625, "rewards/rejected": -19.0, "step": 2035 }, { "epoch": 0.7582867783985102, "grad_norm": 6.723403930664062e-05, "learning_rate": 3.019104097349706e-07, "logits/chosen": -0.2412109375, "logits/rejected": 0.298828125, "logps/chosen": -0.2080078125, "logps/rejected": -3.921875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0390625, "rewards/margins": 18.5, "rewards/rejected": -19.5, "step": 2036 }, { "epoch": 0.758659217877095, "grad_norm": 1.5, "learning_rate": 3.010364134759832e-07, "logits/chosen": -0.166015625, "logits/rejected": 0.376953125, "logps/chosen": -0.625, "logps/rejected": -2.5625, "loss": 0.002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.125, "rewards/margins": 9.6875, "rewards/rejected": -12.875, "step": 2037 }, { "epoch": 0.7590316573556797, "grad_norm": 0.0002460479736328125, "learning_rate": 3.0016342997630723e-07, "logits/chosen": 0.0791015625, "logits/rejected": 0.15234375, "logps/chosen": -0.474609375, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 16.25, "rewards/rejected": -18.5, "step": 2038 }, { "epoch": 0.7594040968342645, "grad_norm": 0.007354736328125, "learning_rate": 2.992914607120292e-07, "logits/chosen": -0.0771484375, "logits/rejected": -0.1240234375, "logps/chosen": -0.26171875, "logps/rejected": -2.828125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125, "rewards/margins": 12.8125, "rewards/rejected": -14.125, "step": 2039 }, { "epoch": 0.7597765363128491, "grad_norm": 0.0947265625, "learning_rate": 2.9842050715752164e-07, "logits/chosen": 0.018798828125, "logits/rejected": 0.408203125, "logps/chosen": -0.396484375, "logps/rejected": -2.8125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.984375, "rewards/margins": 12.0, "rewards/rejected": -14.0, "step": 2040 }, { "epoch": 0.7601489757914339, "grad_norm": 0.1845703125, "learning_rate": 2.975505707854388e-07, "logits/chosen": -0.07177734375, "logits/rejected": -0.46484375, "logps/chosen": -0.271484375, "logps/rejected": -2.90625, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.359375, "rewards/margins": 13.1875, "rewards/rejected": -14.5625, "step": 2041 }, { "epoch": 0.7605214152700186, "grad_norm": 34.0, "learning_rate": 2.966816530667145e-07, "logits/chosen": 0.07421875, "logits/rejected": -0.025390625, "logps/chosen": -0.9453125, "logps/rejected": -2.25, "loss": 0.05, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.71875, "rewards/margins": 6.53125, "rewards/rejected": -11.25, "step": 2042 }, { "epoch": 0.7608938547486034, "grad_norm": 0.1435546875, "learning_rate": 2.9581375547056096e-07, "logits/chosen": 0.091796875, "logits/rejected": -0.79296875, "logps/chosen": -0.11279296875, "logps/rejected": -2.953125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5625, "rewards/margins": 14.125, "rewards/rejected": -14.75, "step": 2043 }, { "epoch": 0.7612662942271881, "grad_norm": 0.0260009765625, "learning_rate": 2.9494687946446563e-07, "logits/chosen": 0.12890625, "logits/rejected": 0.1484375, "logps/chosen": -0.49609375, "logps/rejected": -2.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 11.125, "rewards/rejected": -13.625, "step": 2044 }, { "epoch": 0.7616387337057728, "grad_norm": 0.0028533935546875, "learning_rate": 2.940810265141881e-07, "logits/chosen": 0.11572265625, "logits/rejected": -0.1962890625, "logps/chosen": -0.341796875, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.703125, "rewards/margins": 14.375, "rewards/rejected": -16.0, "step": 2045 }, { "epoch": 0.7620111731843575, "grad_norm": 0.00118255615234375, "learning_rate": 2.9321619808375864e-07, "logits/chosen": 0.11181640625, "logits/rejected": 0.4296875, "logps/chosen": -0.30859375, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.546875, "rewards/margins": 14.375, "rewards/rejected": -15.875, "step": 2046 }, { "epoch": 0.7623836126629423, "grad_norm": 2.2411346435546875e-05, "learning_rate": 2.9235239563547445e-07, "logits/chosen": 0.058837890625, "logits/rejected": 0.38671875, "logps/chosen": -0.2578125, "logps/rejected": -3.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.296875, "rewards/margins": 18.5, "rewards/rejected": -19.75, "step": 2047 }, { "epoch": 0.762756052141527, "grad_norm": 0.1376953125, "learning_rate": 2.914896206298982e-07, "logits/chosen": 0.0927734375, "logits/rejected": 0.78125, "logps/chosen": -0.306640625, "logps/rejected": -3.09375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.53125, "rewards/margins": 13.875, "rewards/rejected": -15.375, "step": 2048 }, { "epoch": 0.7631284916201118, "grad_norm": 0.15234375, "learning_rate": 2.906278745258565e-07, "logits/chosen": 0.09033203125, "logits/rejected": -0.318359375, "logps/chosen": -0.3515625, "logps/rejected": -2.96875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.75, "rewards/margins": 13.0625, "rewards/rejected": -14.8125, "step": 2049 }, { "epoch": 0.7635009310986964, "grad_norm": 0.00017833709716796875, "learning_rate": 2.8976715878043434e-07, "logits/chosen": 0.08544921875, "logits/rejected": 0.337890625, "logps/chosen": -0.37109375, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.84375, "rewards/margins": 16.875, "rewards/rejected": -18.75, "step": 2050 }, { "epoch": 0.7638733705772812, "grad_norm": 0.07275390625, "learning_rate": 2.8890747484897573e-07, "logits/chosen": 0.1806640625, "logits/rejected": 0.51953125, "logps/chosen": -0.53515625, "logps/rejected": -3.4375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6875, "rewards/margins": 14.5, "rewards/rejected": -17.125, "step": 2051 }, { "epoch": 0.7642458100558659, "grad_norm": 5.53131103515625e-05, "learning_rate": 2.880488241850796e-07, "logits/chosen": 0.0027008056640625, "logits/rejected": 0.2578125, "logps/chosen": -0.15234375, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.76171875, "rewards/margins": 17.375, "rewards/rejected": -18.125, "step": 2052 }, { "epoch": 0.7646182495344507, "grad_norm": 0.55078125, "learning_rate": 2.871912082405982e-07, "logits/chosen": -0.022216796875, "logits/rejected": 0.82421875, "logps/chosen": -0.439453125, "logps/rejected": -2.625, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.203125, "rewards/margins": 10.9375, "rewards/rejected": -13.125, "step": 2053 }, { "epoch": 0.7649906890130354, "grad_norm": 0.0023956298828125, "learning_rate": 2.863346284656341e-07, "logits/chosen": 0.06396484375, "logits/rejected": 0.06298828125, "logps/chosen": -0.7734375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.875, "rewards/margins": 14.375, "rewards/rejected": -18.25, "step": 2054 }, { "epoch": 0.7653631284916201, "grad_norm": 0.03369140625, "learning_rate": 2.8547908630853684e-07, "logits/chosen": -0.32421875, "logits/rejected": 0.482421875, "logps/chosen": -0.203125, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.015625, "rewards/margins": 13.8125, "rewards/rejected": -14.875, "step": 2055 }, { "epoch": 0.7657355679702048, "grad_norm": 0.000545501708984375, "learning_rate": 2.84624583215903e-07, "logits/chosen": 0.03662109375, "logits/rejected": 0.26171875, "logps/chosen": -0.431640625, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 15.125, "rewards/rejected": -17.25, "step": 2056 }, { "epoch": 0.7661080074487896, "grad_norm": 0.0576171875, "learning_rate": 2.8377112063257134e-07, "logits/chosen": 0.134765625, "logits/rejected": -0.1494140625, "logps/chosen": -0.5546875, "logps/rejected": -3.09375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.765625, "rewards/margins": 12.75, "rewards/rejected": -15.5, "step": 2057 }, { "epoch": 0.7664804469273743, "grad_norm": 1.9431114196777344e-05, "learning_rate": 2.829187000016217e-07, "logits/chosen": 0.15234375, "logits/rejected": 0.5390625, "logps/chosen": -0.1748046875, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.875, "rewards/margins": 18.375, "rewards/rejected": -19.25, "step": 2058 }, { "epoch": 0.7668528864059591, "grad_norm": 0.0546875, "learning_rate": 2.8206732276437197e-07, "logits/chosen": 0.1015625, "logits/rejected": -0.52734375, "logps/chosen": -0.37890625, "logps/rejected": -2.96875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 13.0, "rewards/rejected": -14.875, "step": 2059 }, { "epoch": 0.7672253258845437, "grad_norm": 0.0004119873046875, "learning_rate": 2.812169903603758e-07, "logits/chosen": -0.024658203125, "logits/rejected": 0.251953125, "logps/chosen": -0.310546875, "logps/rejected": -3.296875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5546875, "rewards/margins": 14.875, "rewards/rejected": -16.5, "step": 2060 }, { "epoch": 0.7675977653631285, "grad_norm": 9.584426879882812e-05, "learning_rate": 2.8036770422742e-07, "logits/chosen": 0.1875, "logits/rejected": 0.3125, "logps/chosen": -0.2734375, "logps/rejected": -3.953125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3671875, "rewards/margins": 18.5, "rewards/rejected": -19.75, "step": 2061 }, { "epoch": 0.7679702048417132, "grad_norm": 4.9173831939697266e-06, "learning_rate": 2.795194658015231e-07, "logits/chosen": 0.220703125, "logits/rejected": 0.357421875, "logps/chosen": -0.1318359375, "logps/rejected": -4.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.65625, "rewards/margins": 20.25, "rewards/rejected": -21.0, "step": 2062 }, { "epoch": 0.768342644320298, "grad_norm": 0.0223388671875, "learning_rate": 2.7867227651693065e-07, "logits/chosen": -0.1279296875, "logits/rejected": 0.64453125, "logps/chosen": -0.365234375, "logps/rejected": -2.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.828125, "rewards/margins": 11.5, "rewards/rejected": -13.3125, "step": 2063 }, { "epoch": 0.7687150837988826, "grad_norm": 2.9375, "learning_rate": 2.7782613780611544e-07, "logits/chosen": -0.10498046875, "logits/rejected": -0.296875, "logps/chosen": -0.43359375, "logps/rejected": -2.328125, "loss": 0.005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 9.4375, "rewards/rejected": -11.625, "step": 2064 }, { "epoch": 0.7690875232774674, "grad_norm": 4.410743713378906e-05, "learning_rate": 2.769810510997737e-07, "logits/chosen": 0.044677734375, "logits/rejected": 0.40625, "logps/chosen": -0.255859375, "logps/rejected": -4.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.28125, "rewards/margins": 18.75, "rewards/rejected": -20.125, "step": 2065 }, { "epoch": 0.7694599627560521, "grad_norm": 1.4066696166992188e-05, "learning_rate": 2.761370178268226e-07, "logits/chosen": 0.1552734375, "logits/rejected": 0.453125, "logps/chosen": -0.1552734375, "logps/rejected": -3.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.77734375, "rewards/margins": 19.0, "rewards/rejected": -19.75, "step": 2066 }, { "epoch": 0.7698324022346369, "grad_norm": 0.00011110305786132812, "learning_rate": 2.752940394143982e-07, "logits/chosen": 0.08740234375, "logits/rejected": 0.361328125, "logps/chosen": -0.255859375, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.28125, "rewards/margins": 17.75, "rewards/rejected": -19.0, "step": 2067 }, { "epoch": 0.7702048417132216, "grad_norm": 0.052490234375, "learning_rate": 2.7445211728785316e-07, "logits/chosen": 0.045654296875, "logits/rejected": 0.72265625, "logps/chosen": -0.16015625, "logps/rejected": -2.84375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.796875, "rewards/margins": 13.4375, "rewards/rejected": -14.25, "step": 2068 }, { "epoch": 0.7705772811918064, "grad_norm": 3.09375, "learning_rate": 2.7361125287075375e-07, "logits/chosen": -0.03515625, "logits/rejected": -0.796875, "logps/chosen": -0.298828125, "logps/rejected": -1.890625, "loss": 0.0051, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4921875, "rewards/margins": 7.96875, "rewards/rejected": -9.5, "step": 2069 }, { "epoch": 0.770949720670391, "grad_norm": 0.002593994140625, "learning_rate": 2.7277144758487814e-07, "logits/chosen": 0.1796875, "logits/rejected": 0.283203125, "logps/chosen": -0.515625, "logps/rejected": -3.203125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.59375, "rewards/margins": 13.375, "rewards/rejected": -16.0, "step": 2070 }, { "epoch": 0.7713221601489758, "grad_norm": 3.03125, "learning_rate": 2.7193270285021375e-07, "logits/chosen": 0.06298828125, "logits/rejected": -0.546875, "logps/chosen": -0.66015625, "logps/rejected": -2.734375, "loss": 0.0035, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3125, "rewards/margins": 10.375, "rewards/rejected": -13.6875, "step": 2071 }, { "epoch": 0.7716945996275605, "grad_norm": 1.7762184143066406e-05, "learning_rate": 2.71095020084954e-07, "logits/chosen": 0.01239013671875, "logits/rejected": 0.1337890625, "logps/chosen": -0.310546875, "logps/rejected": -3.984375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5546875, "rewards/margins": 18.5, "rewards/rejected": -20.0, "step": 2072 }, { "epoch": 0.7720670391061453, "grad_norm": 0.01275634765625, "learning_rate": 2.702584007054974e-07, "logits/chosen": 0.0654296875, "logits/rejected": -0.265625, "logps/chosen": -0.30078125, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5, "rewards/margins": 15.625, "rewards/rejected": -17.125, "step": 2073 }, { "epoch": 0.7724394785847299, "grad_norm": 0.033447265625, "learning_rate": 2.6942284612644443e-07, "logits/chosen": -0.037841796875, "logits/rejected": -0.09716796875, "logps/chosen": -0.30859375, "logps/rejected": -2.5625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5390625, "rewards/margins": 11.25, "rewards/rejected": -12.8125, "step": 2074 }, { "epoch": 0.7728119180633147, "grad_norm": 0.17578125, "learning_rate": 2.68588357760595e-07, "logits/chosen": 0.04736328125, "logits/rejected": 0.65234375, "logps/chosen": -0.99609375, "logps/rejected": -3.59375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.96875, "rewards/margins": 13.0, "rewards/rejected": -18.0, "step": 2075 }, { "epoch": 0.7731843575418994, "grad_norm": 0.0003223419189453125, "learning_rate": 2.677549370189459e-07, "logits/chosen": 0.06201171875, "logits/rejected": 0.470703125, "logps/chosen": -0.7109375, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.53125, "rewards/margins": 15.6875, "rewards/rejected": -19.25, "step": 2076 }, { "epoch": 0.7735567970204842, "grad_norm": 5.125999450683594e-06, "learning_rate": 2.6692258531068874e-07, "logits/chosen": -0.0181884765625, "logits/rejected": 0.4609375, "logps/chosen": -0.1103515625, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.55078125, "rewards/margins": 19.75, "rewards/rejected": -20.25, "step": 2077 }, { "epoch": 0.7739292364990689, "grad_norm": 0.00030517578125, "learning_rate": 2.660913040432086e-07, "logits/chosen": 0.040771484375, "logits/rejected": 0.1953125, "logps/chosen": -0.375, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.875, "rewards/margins": 17.25, "rewards/rejected": -19.25, "step": 2078 }, { "epoch": 0.7743016759776536, "grad_norm": 0.328125, "learning_rate": 2.652610946220796e-07, "logits/chosen": -0.10791015625, "logits/rejected": -0.49609375, "logps/chosen": -0.19140625, "logps/rejected": -2.625, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9609375, "rewards/margins": 12.125, "rewards/rejected": -13.125, "step": 2079 }, { "epoch": 0.7746741154562383, "grad_norm": 0.0001735687255859375, "learning_rate": 2.644319584510633e-07, "logits/chosen": 0.032470703125, "logits/rejected": 0.478515625, "logps/chosen": -0.234375, "logps/rejected": -3.703125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.171875, "rewards/margins": 17.25, "rewards/rejected": -18.5, "step": 2080 }, { "epoch": 0.7750465549348231, "grad_norm": 0.765625, "learning_rate": 2.636038969321073e-07, "logits/chosen": -0.3359375, "logits/rejected": 0.39453125, "logps/chosen": -1.5546875, "logps/rejected": -3.6875, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.75, "rewards/margins": 10.6875, "rewards/rejected": -18.5, "step": 2081 }, { "epoch": 0.7754189944134078, "grad_norm": 0.0086669921875, "learning_rate": 2.627769114653417e-07, "logits/chosen": 0.134765625, "logits/rejected": 0.6171875, "logps/chosen": -0.47265625, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.359375, "rewards/margins": 14.3125, "rewards/rejected": -16.75, "step": 2082 }, { "epoch": 0.7757914338919926, "grad_norm": 0.007110595703125, "learning_rate": 2.6195100344907736e-07, "logits/chosen": -0.0947265625, "logits/rejected": 0.328125, "logps/chosen": -1.109375, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.5625, "rewards/margins": 12.5625, "rewards/rejected": -18.125, "step": 2083 }, { "epoch": 0.7761638733705772, "grad_norm": 0.1513671875, "learning_rate": 2.611261742798034e-07, "logits/chosen": -0.06884765625, "logits/rejected": 0.41015625, "logps/chosen": -0.33984375, "logps/rejected": -2.71875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6953125, "rewards/margins": 11.875, "rewards/rejected": -13.625, "step": 2084 }, { "epoch": 0.776536312849162, "grad_norm": 0.000835418701171875, "learning_rate": 2.603024253521843e-07, "logits/chosen": -0.0615234375, "logits/rejected": 0.478515625, "logps/chosen": -0.51171875, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.546875, "rewards/margins": 15.5625, "rewards/rejected": -18.125, "step": 2085 }, { "epoch": 0.7769087523277467, "grad_norm": 0.208984375, "learning_rate": 2.5947975805905806e-07, "logits/chosen": 0.058349609375, "logits/rejected": 0.69140625, "logps/chosen": -0.46484375, "logps/rejected": -2.75, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.328125, "rewards/margins": 11.5, "rewards/rejected": -13.75, "step": 2086 }, { "epoch": 0.7772811918063315, "grad_norm": 8.296966552734375e-05, "learning_rate": 2.586581737914347e-07, "logits/chosen": 0.166015625, "logits/rejected": 0.328125, "logps/chosen": -0.240234375, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.203125, "rewards/margins": 17.75, "rewards/rejected": -19.0, "step": 2087 }, { "epoch": 0.7776536312849162, "grad_norm": 3.453125, "learning_rate": 2.578376739384923e-07, "logits/chosen": 0.02490234375, "logits/rejected": -0.6484375, "logps/chosen": -0.11865234375, "logps/rejected": -2.1875, "loss": 0.0059, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.59375, "rewards/margins": 10.3125, "rewards/rejected": -10.875, "step": 2088 }, { "epoch": 0.7780260707635009, "grad_norm": 0.006500244140625, "learning_rate": 2.5701825988757495e-07, "logits/chosen": -0.12890625, "logits/rejected": 0.466796875, "logps/chosen": -0.439453125, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1875, "rewards/margins": 13.375, "rewards/rejected": -15.5, "step": 2089 }, { "epoch": 0.7783985102420856, "grad_norm": 3.838539123535156e-05, "learning_rate": 2.561999330241915e-07, "logits/chosen": 0.0419921875, "logits/rejected": 0.294921875, "logps/chosen": -0.404296875, "logps/rejected": -4.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.015625, "rewards/margins": 19.5, "rewards/rejected": -21.5, "step": 2090 }, { "epoch": 0.7787709497206704, "grad_norm": 0.06494140625, "learning_rate": 2.553826947320123e-07, "logits/chosen": 0.0208740234375, "logits/rejected": 0.5546875, "logps/chosen": -0.5703125, "logps/rejected": -3.234375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.859375, "rewards/margins": 13.3125, "rewards/rejected": -16.25, "step": 2091 }, { "epoch": 0.7791433891992551, "grad_norm": 0.000415802001953125, "learning_rate": 2.545665463928674e-07, "logits/chosen": -0.0157470703125, "logits/rejected": 0.185546875, "logps/chosen": -0.349609375, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7421875, "rewards/margins": 16.0, "rewards/rejected": -17.75, "step": 2092 }, { "epoch": 0.7795158286778399, "grad_norm": 0.0001888275146484375, "learning_rate": 2.537514893867428e-07, "logits/chosen": 0.0159912109375, "logits/rejected": 0.41796875, "logps/chosen": -0.1953125, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9765625, "rewards/margins": 17.0, "rewards/rejected": -18.0, "step": 2093 }, { "epoch": 0.7798882681564245, "grad_norm": 0.0634765625, "learning_rate": 2.5293752509178063e-07, "logits/chosen": 0.0693359375, "logits/rejected": 0.478515625, "logps/chosen": -0.474609375, "logps/rejected": -3.265625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 13.9375, "rewards/rejected": -16.25, "step": 2094 }, { "epoch": 0.7802607076350093, "grad_norm": 3.314018249511719e-05, "learning_rate": 2.5212465488427445e-07, "logits/chosen": -0.00074005126953125, "logits/rejected": 0.384765625, "logps/chosen": -0.37890625, "logps/rejected": -4.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 18.25, "rewards/rejected": -20.0, "step": 2095 }, { "epoch": 0.780633147113594, "grad_norm": 0.0004730224609375, "learning_rate": 2.5131288013866846e-07, "logits/chosen": 0.1005859375, "logits/rejected": 0.54296875, "logps/chosen": -0.234375, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.171875, "rewards/margins": 16.0, "rewards/rejected": -17.25, "step": 2096 }, { "epoch": 0.7810055865921788, "grad_norm": 0.05224609375, "learning_rate": 2.505022022275541e-07, "logits/chosen": -0.130859375, "logits/rejected": 0.06982421875, "logps/chosen": -0.10888671875, "logps/rejected": -2.53125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.546875, "rewards/margins": 12.125, "rewards/rejected": -12.625, "step": 2097 }, { "epoch": 0.7813780260707635, "grad_norm": 0.0004177093505859375, "learning_rate": 2.496926225216686e-07, "logits/chosen": 0.154296875, "logits/rejected": 0.373046875, "logps/chosen": -0.228515625, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.140625, "rewards/margins": 15.375, "rewards/rejected": -16.5, "step": 2098 }, { "epoch": 0.7817504655493482, "grad_norm": 4.5, "learning_rate": 2.4888414238989205e-07, "logits/chosen": -0.0045166015625, "logits/rejected": 0.87890625, "logps/chosen": -0.322265625, "logps/rejected": -2.40625, "loss": 0.008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6171875, "rewards/margins": 10.4375, "rewards/rejected": -12.0625, "step": 2099 }, { "epoch": 0.7821229050279329, "grad_norm": 19.5, "learning_rate": 2.480767631992456e-07, "logits/chosen": 0.06396484375, "logits/rejected": -0.1875, "logps/chosen": -0.462890625, "logps/rejected": -2.5, "loss": 0.0486, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3125, "rewards/margins": 10.125, "rewards/rejected": -12.5, "step": 2100 }, { "epoch": 0.7824953445065177, "grad_norm": 0.00019550323486328125, "learning_rate": 2.4727048631488877e-07, "logits/chosen": -0.0164794921875, "logits/rejected": 0.41796875, "logps/chosen": -0.0908203125, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.455078125, "rewards/margins": 17.5, "rewards/rejected": -18.0, "step": 2101 }, { "epoch": 0.7828677839851024, "grad_norm": 0.004119873046875, "learning_rate": 2.4646531310011665e-07, "logits/chosen": 0.11181640625, "logits/rejected": -0.1240234375, "logps/chosen": -0.6015625, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.015625, "rewards/margins": 12.9375, "rewards/rejected": -15.9375, "step": 2102 }, { "epoch": 0.7832402234636872, "grad_norm": 0.259765625, "learning_rate": 2.4566124491635905e-07, "logits/chosen": -0.00970458984375, "logits/rejected": 0.0018463134765625, "logps/chosen": -0.671875, "logps/rejected": -2.5625, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.359375, "rewards/margins": 9.4375, "rewards/rejected": -12.75, "step": 2103 }, { "epoch": 0.7836126629422718, "grad_norm": 3.62396240234375e-05, "learning_rate": 2.44858283123177e-07, "logits/chosen": 0.12353515625, "logits/rejected": 0.3515625, "logps/chosen": -0.310546875, "logps/rejected": -4.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5546875, "rewards/margins": 18.5, "rewards/rejected": -20.0, "step": 2104 }, { "epoch": 0.7839851024208566, "grad_norm": 0.31640625, "learning_rate": 2.4405642907826065e-07, "logits/chosen": 0.10595703125, "logits/rejected": -0.32421875, "logps/chosen": -0.26953125, "logps/rejected": -3.0, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 13.75, "rewards/rejected": -15.0625, "step": 2105 }, { "epoch": 0.7843575418994413, "grad_norm": 0.345703125, "learning_rate": 2.4325568413742727e-07, "logits/chosen": 0.10302734375, "logits/rejected": -0.318359375, "logps/chosen": -0.3359375, "logps/rejected": -2.84375, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875, "rewards/margins": 12.5, "rewards/rejected": -14.1875, "step": 2106 }, { "epoch": 0.7847299813780261, "grad_norm": 0.00019550323486328125, "learning_rate": 2.4245604965461875e-07, "logits/chosen": 0.022216796875, "logits/rejected": 0.37109375, "logps/chosen": -0.263671875, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125, "rewards/margins": 16.375, "rewards/rejected": -17.75, "step": 2107 }, { "epoch": 0.7851024208566107, "grad_norm": 0.00133514404296875, "learning_rate": 2.416575269818994e-07, "logits/chosen": 0.10498046875, "logits/rejected": 0.37890625, "logps/chosen": -0.1689453125, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.84375, "rewards/margins": 15.8125, "rewards/rejected": -16.625, "step": 2108 }, { "epoch": 0.7854748603351955, "grad_norm": 0.00836181640625, "learning_rate": 2.4086011746945393e-07, "logits/chosen": 0.0830078125, "logits/rejected": 0.005401611328125, "logps/chosen": -0.47265625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 15.5, "rewards/rejected": -17.875, "step": 2109 }, { "epoch": 0.7858472998137802, "grad_norm": 0.0020904541015625, "learning_rate": 2.400638224655839e-07, "logits/chosen": 0.0177001953125, "logits/rejected": 0.345703125, "logps/chosen": -0.37109375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.859375, "rewards/margins": 16.375, "rewards/rejected": -18.25, "step": 2110 }, { "epoch": 0.786219739292365, "grad_norm": 0.37890625, "learning_rate": 2.3926864331670726e-07, "logits/chosen": 0.005645751953125, "logits/rejected": -0.3125, "logps/chosen": -0.734375, "logps/rejected": -2.78125, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.65625, "rewards/margins": 10.25, "rewards/rejected": -13.875, "step": 2111 }, { "epoch": 0.7865921787709497, "grad_norm": 0.09765625, "learning_rate": 2.38474581367355e-07, "logits/chosen": -0.263671875, "logits/rejected": -0.640625, "logps/chosen": -0.10009765625, "logps/rejected": -2.28125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5, "rewards/margins": 10.875, "rewards/rejected": -11.375, "step": 2112 }, { "epoch": 0.7869646182495345, "grad_norm": 5.3125, "learning_rate": 2.3768163796016902e-07, "logits/chosen": 0.0113525390625, "logits/rejected": 0.052001953125, "logps/chosen": -0.462890625, "logps/rejected": -2.296875, "loss": 0.0085, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3125, "rewards/margins": 9.1875, "rewards/rejected": -11.5, "step": 2113 }, { "epoch": 0.7873370577281191, "grad_norm": 0.00156402587890625, "learning_rate": 2.3688981443590035e-07, "logits/chosen": 0.00689697265625, "logits/rejected": -0.07763671875, "logps/chosen": -0.158203125, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.79296875, "rewards/margins": 15.6875, "rewards/rejected": -16.5, "step": 2114 }, { "epoch": 0.7877094972067039, "grad_norm": 0.1337890625, "learning_rate": 2.360991121334052e-07, "logits/chosen": 0.0771484375, "logits/rejected": -0.173828125, "logps/chosen": -0.19140625, "logps/rejected": -2.84375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.95703125, "rewards/margins": 13.25, "rewards/rejected": -14.25, "step": 2115 }, { "epoch": 0.7880819366852886, "grad_norm": 0.052490234375, "learning_rate": 2.3530953238964564e-07, "logits/chosen": 0.045654296875, "logits/rejected": 0.953125, "logps/chosen": -0.279296875, "logps/rejected": -3.0, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3984375, "rewards/margins": 13.625, "rewards/rejected": -15.0, "step": 2116 }, { "epoch": 0.7884543761638734, "grad_norm": 4.023313522338867e-06, "learning_rate": 2.345210765396849e-07, "logits/chosen": -0.01446533203125, "logits/rejected": 0.4140625, "logps/chosen": -0.171875, "logps/rejected": -4.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.859375, "rewards/margins": 20.25, "rewards/rejected": -21.0, "step": 2117 }, { "epoch": 0.788826815642458, "grad_norm": 0.000217437744140625, "learning_rate": 2.3373374591668536e-07, "logits/chosen": 0.11767578125, "logits/rejected": 0.427734375, "logps/chosen": -0.1484375, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7421875, "rewards/margins": 16.375, "rewards/rejected": -17.125, "step": 2118 }, { "epoch": 0.7891992551210428, "grad_norm": 0.0108642578125, "learning_rate": 2.3294754185190744e-07, "logits/chosen": 0.201171875, "logits/rejected": 0.333984375, "logps/chosen": -0.49609375, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.484375, "rewards/margins": 12.9375, "rewards/rejected": -15.4375, "step": 2119 }, { "epoch": 0.7895716945996276, "grad_norm": 0.048828125, "learning_rate": 2.3216246567470653e-07, "logits/chosen": -0.130859375, "logits/rejected": -0.1376953125, "logps/chosen": -0.5859375, "logps/rejected": -2.78125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.921875, "rewards/margins": 11.0, "rewards/rejected": -13.9375, "step": 2120 }, { "epoch": 0.7899441340782123, "grad_norm": 0.0001239776611328125, "learning_rate": 2.3137851871253105e-07, "logits/chosen": 0.1044921875, "logits/rejected": 0.27734375, "logps/chosen": -0.39453125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.96875, "rewards/margins": 16.5, "rewards/rejected": -18.5, "step": 2121 }, { "epoch": 0.7903165735567971, "grad_norm": 0.05859375, "learning_rate": 2.305957022909202e-07, "logits/chosen": -0.1884765625, "logits/rejected": -0.34375, "logps/chosen": -0.365234375, "logps/rejected": -2.890625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.828125, "rewards/margins": 12.625, "rewards/rejected": -14.4375, "step": 2122 }, { "epoch": 0.7906890130353817, "grad_norm": 3.125, "learning_rate": 2.298140177335008e-07, "logits/chosen": 0.2197265625, "logits/rejected": -0.9140625, "logps/chosen": -0.7578125, "logps/rejected": -2.84375, "loss": 0.0043, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.78125, "rewards/margins": 10.4375, "rewards/rejected": -14.25, "step": 2123 }, { "epoch": 0.7910614525139665, "grad_norm": 0.0001277923583984375, "learning_rate": 2.2903346636198655e-07, "logits/chosen": 0.111328125, "logits/rejected": 0.3046875, "logps/chosen": -0.296875, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 16.5, "rewards/rejected": -18.0, "step": 2124 }, { "epoch": 0.7914338919925512, "grad_norm": 5.626678466796875e-05, "learning_rate": 2.282540494961756e-07, "logits/chosen": 0.06982421875, "logits/rejected": 0.45703125, "logps/chosen": -0.2138671875, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0703125, "rewards/margins": 17.25, "rewards/rejected": -18.25, "step": 2125 }, { "epoch": 0.791806331471136, "grad_norm": 0.000415802001953125, "learning_rate": 2.27475768453947e-07, "logits/chosen": 0.37890625, "logits/rejected": 0.5234375, "logps/chosen": -0.20703125, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.03125, "rewards/margins": 15.125, "rewards/rejected": -16.125, "step": 2126 }, { "epoch": 0.7921787709497207, "grad_norm": 126.0, "learning_rate": 2.2669862455125912e-07, "logits/chosen": 0.01361083984375, "logits/rejected": -0.06005859375, "logps/chosen": -0.490234375, "logps/rejected": -2.484375, "loss": 0.4297, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.453125, "rewards/margins": 9.9375, "rewards/rejected": -12.375, "step": 2127 }, { "epoch": 0.7925512104283055, "grad_norm": 3.933906555175781e-05, "learning_rate": 2.2592261910214807e-07, "logits/chosen": 0.056396484375, "logits/rejected": 0.37890625, "logps/chosen": -0.314453125, "logps/rejected": -4.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5703125, "rewards/margins": 19.5, "rewards/rejected": -21.0, "step": 2128 }, { "epoch": 0.7929236499068901, "grad_norm": 0.000240325927734375, "learning_rate": 2.2514775341872516e-07, "logits/chosen": 0.240234375, "logits/rejected": 0.26953125, "logps/chosen": -0.546875, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 15.625, "rewards/rejected": -18.375, "step": 2129 }, { "epoch": 0.7932960893854749, "grad_norm": 7.963180541992188e-05, "learning_rate": 2.2437402881117414e-07, "logits/chosen": 0.039794921875, "logits/rejected": 0.345703125, "logps/chosen": -0.359375, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.796875, "rewards/margins": 17.75, "rewards/rejected": -19.5, "step": 2130 }, { "epoch": 0.7936685288640596, "grad_norm": 1.704692840576172e-05, "learning_rate": 2.236014465877497e-07, "logits/chosen": -0.0732421875, "logits/rejected": 0.384765625, "logps/chosen": -0.20703125, "logps/rejected": -3.984375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.03125, "rewards/margins": 19.0, "rewards/rejected": -20.0, "step": 2131 }, { "epoch": 0.7940409683426444, "grad_norm": 0.08056640625, "learning_rate": 2.228300080547742e-07, "logits/chosen": -0.08056640625, "logits/rejected": -0.66796875, "logps/chosen": -0.4375, "logps/rejected": -2.59375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1875, "rewards/margins": 10.8125, "rewards/rejected": -13.0, "step": 2132 }, { "epoch": 0.794413407821229, "grad_norm": 0.0016937255859375, "learning_rate": 2.2205971451663685e-07, "logits/chosen": 0.01507568359375, "logits/rejected": -0.25, "logps/chosen": -0.31640625, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.578125, "rewards/margins": 15.375, "rewards/rejected": -17.0, "step": 2133 }, { "epoch": 0.7947858472998138, "grad_norm": 5.4836273193359375e-05, "learning_rate": 2.212905672757914e-07, "logits/chosen": -0.0162353515625, "logits/rejected": 0.328125, "logps/chosen": -0.28125, "logps/rejected": -3.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.40625, "rewards/margins": 17.5, "rewards/rejected": -19.0, "step": 2134 }, { "epoch": 0.7951582867783985, "grad_norm": 6.84375, "learning_rate": 2.2052256763275194e-07, "logits/chosen": -0.08544921875, "logits/rejected": 0.259765625, "logps/chosen": -0.99609375, "logps/rejected": -3.125, "loss": 0.0091, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0, "rewards/margins": 10.75, "rewards/rejected": -15.6875, "step": 2135 }, { "epoch": 0.7955307262569833, "grad_norm": 0.006256103515625, "learning_rate": 2.1975571688609308e-07, "logits/chosen": -0.111328125, "logits/rejected": 0.263671875, "logps/chosen": -0.42578125, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 12.875, "rewards/rejected": -15.0, "step": 2136 }, { "epoch": 0.795903165735568, "grad_norm": 0.1357421875, "learning_rate": 2.1899001633244668e-07, "logits/chosen": 0.08935546875, "logits/rejected": -0.3984375, "logps/chosen": -0.1796875, "logps/rejected": -3.109375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8984375, "rewards/margins": 14.6875, "rewards/rejected": -15.5625, "step": 2137 }, { "epoch": 0.7962756052141527, "grad_norm": 0.0002765655517578125, "learning_rate": 2.1822546726649976e-07, "logits/chosen": 0.0203857421875, "logits/rejected": 0.314453125, "logps/chosen": -0.1357421875, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6796875, "rewards/margins": 17.25, "rewards/rejected": -18.0, "step": 2138 }, { "epoch": 0.7966480446927374, "grad_norm": 0.20703125, "learning_rate": 2.1746207098099246e-07, "logits/chosen": 0.00341796875, "logits/rejected": -0.1865234375, "logps/chosen": -0.45703125, "logps/rejected": -2.734375, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.296875, "rewards/margins": 11.375, "rewards/rejected": -13.6875, "step": 2139 }, { "epoch": 0.7970204841713222, "grad_norm": 0.017333984375, "learning_rate": 2.1669982876671523e-07, "logits/chosen": 0.125, "logits/rejected": 1.1953125, "logps/chosen": -0.357421875, "logps/rejected": -3.015625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7890625, "rewards/margins": 13.3125, "rewards/rejected": -15.125, "step": 2140 }, { "epoch": 0.7973929236499069, "grad_norm": 0.0002899169921875, "learning_rate": 2.1593874191250761e-07, "logits/chosen": 0.041015625, "logits/rejected": 0.36328125, "logps/chosen": -0.322265625, "logps/rejected": -3.421875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 15.4375, "rewards/rejected": -17.0, "step": 2141 }, { "epoch": 0.7977653631284917, "grad_norm": 3.24249267578125e-05, "learning_rate": 2.151788117052557e-07, "logits/chosen": 0.138671875, "logits/rejected": 0.40234375, "logps/chosen": -0.2001953125, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0, "rewards/margins": 17.75, "rewards/rejected": -18.75, "step": 2142 }, { "epoch": 0.7981378026070763, "grad_norm": 0.00372314453125, "learning_rate": 2.144200394298895e-07, "logits/chosen": -0.310546875, "logits/rejected": 0.1767578125, "logps/chosen": -0.34375, "logps/rejected": -3.515625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7109375, "rewards/margins": 15.875, "rewards/rejected": -17.625, "step": 2143 }, { "epoch": 0.7985102420856611, "grad_norm": 3.552436828613281e-05, "learning_rate": 2.1366242636938147e-07, "logits/chosen": 0.091796875, "logits/rejected": 0.255859375, "logps/chosen": -0.28515625, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.421875, "rewards/margins": 18.125, "rewards/rejected": -19.5, "step": 2144 }, { "epoch": 0.7988826815642458, "grad_norm": 0.0107421875, "learning_rate": 2.1290597380474374e-07, "logits/chosen": 0.10546875, "logits/rejected": 0.52734375, "logps/chosen": -0.52734375, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 12.8125, "rewards/rejected": -15.4375, "step": 2145 }, { "epoch": 0.7992551210428306, "grad_norm": 0.000396728515625, "learning_rate": 2.1215068301502628e-07, "logits/chosen": 0.04150390625, "logits/rejected": 0.373046875, "logps/chosen": -0.55859375, "logps/rejected": -3.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.796875, "rewards/margins": 16.75, "rewards/rejected": -19.625, "step": 2146 }, { "epoch": 0.7996275605214153, "grad_norm": 8.58306884765625e-05, "learning_rate": 2.1139655527731512e-07, "logits/chosen": 0.05224609375, "logits/rejected": 0.3828125, "logps/chosen": -0.0849609375, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.423828125, "rewards/margins": 17.625, "rewards/rejected": -18.0, "step": 2147 }, { "epoch": 0.8, "grad_norm": 0.0014495849609375, "learning_rate": 2.1064359186672868e-07, "logits/chosen": 0.032470703125, "logits/rejected": 0.25, "logps/chosen": -0.77734375, "logps/rejected": -3.640625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.875, "rewards/margins": 14.3125, "rewards/rejected": -18.25, "step": 2148 }, { "epoch": 0.8003724394785847, "grad_norm": 0.000194549560546875, "learning_rate": 2.0989179405641756e-07, "logits/chosen": 0.08251953125, "logits/rejected": 0.19140625, "logps/chosen": -0.26953125, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 17.0, "rewards/rejected": -18.25, "step": 2149 }, { "epoch": 0.8007448789571695, "grad_norm": 0.0001316070556640625, "learning_rate": 2.0914116311756136e-07, "logits/chosen": 0.12158203125, "logits/rejected": 0.32421875, "logps/chosen": -0.33984375, "logps/rejected": -3.640625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.703125, "rewards/margins": 16.5, "rewards/rejected": -18.25, "step": 2150 }, { "epoch": 0.8011173184357542, "grad_norm": 0.0654296875, "learning_rate": 2.083917003193666e-07, "logits/chosen": 0.055908203125, "logits/rejected": -0.369140625, "logps/chosen": -0.306640625, "logps/rejected": -3.0, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.53125, "rewards/margins": 13.375, "rewards/rejected": -14.9375, "step": 2151 }, { "epoch": 0.801489757914339, "grad_norm": 0.051513671875, "learning_rate": 2.0764340692906443e-07, "logits/chosen": 0.15625, "logits/rejected": -0.4296875, "logps/chosen": -0.5390625, "logps/rejected": -3.25, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6875, "rewards/margins": 13.625, "rewards/rejected": -16.25, "step": 2152 }, { "epoch": 0.8018621973929236, "grad_norm": 9.489059448242188e-05, "learning_rate": 2.06896284211909e-07, "logits/chosen": 0.1943359375, "logits/rejected": 0.63671875, "logps/chosen": -0.1923828125, "logps/rejected": -3.609375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.96484375, "rewards/margins": 17.0, "rewards/rejected": -18.0, "step": 2153 }, { "epoch": 0.8022346368715084, "grad_norm": 0.002044677734375, "learning_rate": 2.0615033343117497e-07, "logits/chosen": -0.1533203125, "logits/rejected": 0.30078125, "logps/chosen": -0.59765625, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0, "rewards/margins": 13.625, "rewards/rejected": -16.625, "step": 2154 }, { "epoch": 0.8026070763500931, "grad_norm": 0.03125, "learning_rate": 2.0540555584815533e-07, "logits/chosen": 0.115234375, "logits/rejected": 0.3046875, "logps/chosen": -0.1923828125, "logps/rejected": -2.703125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9609375, "rewards/margins": 12.5625, "rewards/rejected": -13.5, "step": 2155 }, { "epoch": 0.8029795158286779, "grad_norm": 16.25, "learning_rate": 2.046619527221597e-07, "logits/chosen": 0.09619140625, "logits/rejected": 0.2197265625, "logps/chosen": -0.416015625, "logps/rejected": -2.28125, "loss": 0.05, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.078125, "rewards/margins": 9.375, "rewards/rejected": -11.4375, "step": 2156 }, { "epoch": 0.8033519553072626, "grad_norm": 0.11279296875, "learning_rate": 2.0391952531051096e-07, "logits/chosen": -0.046630859375, "logits/rejected": -0.375, "logps/chosen": -0.25390625, "logps/rejected": -2.875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2734375, "rewards/margins": 13.125, "rewards/rejected": -14.375, "step": 2157 }, { "epoch": 0.8037243947858473, "grad_norm": 0.00017261505126953125, "learning_rate": 2.0317827486854505e-07, "logits/chosen": 0.07421875, "logits/rejected": 0.2265625, "logps/chosen": -0.287109375, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4375, "rewards/margins": 17.0, "rewards/rejected": -18.5, "step": 2158 }, { "epoch": 0.804096834264432, "grad_norm": 0.302734375, "learning_rate": 2.0243820264960727e-07, "logits/chosen": -0.0162353515625, "logits/rejected": 0.578125, "logps/chosen": -0.6015625, "logps/rejected": -2.8125, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0, "rewards/margins": 11.0625, "rewards/rejected": -14.0625, "step": 2159 }, { "epoch": 0.8044692737430168, "grad_norm": 0.000942230224609375, "learning_rate": 2.0169930990505094e-07, "logits/chosen": 0.08056640625, "logits/rejected": 0.2431640625, "logps/chosen": -0.30078125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5, "rewards/margins": 15.875, "rewards/rejected": -17.375, "step": 2160 }, { "epoch": 0.8048417132216015, "grad_norm": 0.00010251998901367188, "learning_rate": 2.009615978842352e-07, "logits/chosen": 0.0005950927734375, "logits/rejected": 0.474609375, "logps/chosen": -0.5390625, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6875, "rewards/margins": 16.875, "rewards/rejected": -19.5, "step": 2161 }, { "epoch": 0.8052141527001863, "grad_norm": 0.00011968612670898438, "learning_rate": 2.0022506783452197e-07, "logits/chosen": -0.103515625, "logits/rejected": 0.251953125, "logps/chosen": -0.31640625, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5859375, "rewards/margins": 16.75, "rewards/rejected": -18.375, "step": 2162 }, { "epoch": 0.8055865921787709, "grad_norm": 0.1943359375, "learning_rate": 1.994897210012757e-07, "logits/chosen": 0.1484375, "logits/rejected": 0.07861328125, "logps/chosen": -0.27734375, "logps/rejected": -2.640625, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.390625, "rewards/margins": 11.8125, "rewards/rejected": -13.1875, "step": 2163 }, { "epoch": 0.8059590316573557, "grad_norm": 0.09521484375, "learning_rate": 1.9875555862785988e-07, "logits/chosen": -0.0595703125, "logits/rejected": -0.3125, "logps/chosen": -0.220703125, "logps/rejected": -2.546875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 11.625, "rewards/rejected": -12.75, "step": 2164 }, { "epoch": 0.8063314711359404, "grad_norm": 5.15625, "learning_rate": 1.980225819556346e-07, "logits/chosen": -0.11572265625, "logits/rejected": 0.72265625, "logps/chosen": -0.271484375, "logps/rejected": -2.65625, "loss": 0.008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.359375, "rewards/margins": 11.875, "rewards/rejected": -13.25, "step": 2165 }, { "epoch": 0.8067039106145252, "grad_norm": 0.072265625, "learning_rate": 1.9729079222395575e-07, "logits/chosen": 0.039306640625, "logits/rejected": 0.42578125, "logps/chosen": -0.29296875, "logps/rejected": -2.875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4609375, "rewards/margins": 12.875, "rewards/rejected": -14.375, "step": 2166 }, { "epoch": 0.8070763500931099, "grad_norm": 57.0, "learning_rate": 1.9656019067017219e-07, "logits/chosen": -0.01409912109375, "logits/rejected": -0.2373046875, "logps/chosen": -0.7578125, "logps/rejected": -2.625, "loss": 0.1035, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.796875, "rewards/margins": 9.375, "rewards/rejected": -13.125, "step": 2167 }, { "epoch": 0.8074487895716946, "grad_norm": 0.001220703125, "learning_rate": 1.9583077852962362e-07, "logits/chosen": 0.1708984375, "logits/rejected": 0.330078125, "logps/chosen": -0.5, "logps/rejected": -3.390625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5, "rewards/margins": 14.4375, "rewards/rejected": -17.0, "step": 2168 }, { "epoch": 0.8078212290502793, "grad_norm": 0.0146484375, "learning_rate": 1.951025570356389e-07, "logits/chosen": 0.1240234375, "logits/rejected": -0.3828125, "logps/chosen": -0.384765625, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.921875, "rewards/margins": 13.375, "rewards/rejected": -15.3125, "step": 2169 }, { "epoch": 0.8081936685288641, "grad_norm": 0.001373291015625, "learning_rate": 1.9437552741953295e-07, "logits/chosen": 0.000885009765625, "logits/rejected": 0.58203125, "logps/chosen": -0.2236328125, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1171875, "rewards/margins": 15.625, "rewards/rejected": -16.75, "step": 2170 }, { "epoch": 0.8085661080074488, "grad_norm": 0.84375, "learning_rate": 1.9364969091060587e-07, "logits/chosen": 0.010986328125, "logits/rejected": 0.443359375, "logps/chosen": -0.26171875, "logps/rejected": -2.6875, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125, "rewards/margins": 12.0625, "rewards/rejected": -13.375, "step": 2171 }, { "epoch": 0.8089385474860336, "grad_norm": 0.34765625, "learning_rate": 1.92925048736141e-07, "logits/chosen": 0.08154296875, "logits/rejected": 0.0537109375, "logps/chosen": -0.3046875, "logps/rejected": -2.125, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.53125, "rewards/margins": 9.125, "rewards/rejected": -10.625, "step": 2172 }, { "epoch": 0.8093109869646182, "grad_norm": 0.3125, "learning_rate": 1.922016021214017e-07, "logits/chosen": 0.173828125, "logits/rejected": 0.10302734375, "logps/chosen": -0.388671875, "logps/rejected": -3.078125, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 13.5, "rewards/rejected": -15.375, "step": 2173 }, { "epoch": 0.809683426443203, "grad_norm": 0.0001964569091796875, "learning_rate": 1.9147935228962918e-07, "logits/chosen": 0.04248046875, "logits/rejected": 0.47265625, "logps/chosen": -0.25, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 15.9375, "rewards/rejected": -17.25, "step": 2174 }, { "epoch": 0.8100558659217877, "grad_norm": 0.0026702880859375, "learning_rate": 1.9075830046204183e-07, "logits/chosen": 0.04541015625, "logits/rejected": -0.0634765625, "logps/chosen": -0.3515625, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.765625, "rewards/margins": 15.625, "rewards/rejected": -17.5, "step": 2175 }, { "epoch": 0.8104283054003725, "grad_norm": 0.0018463134765625, "learning_rate": 1.9003844785783238e-07, "logits/chosen": 0.041259765625, "logits/rejected": 0.3828125, "logps/chosen": -0.546875, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.734375, "rewards/margins": 16.5, "rewards/rejected": -19.25, "step": 2176 }, { "epoch": 0.8108007448789571, "grad_norm": 0.006256103515625, "learning_rate": 1.8931979569416582e-07, "logits/chosen": 0.02294921875, "logits/rejected": 0.64453125, "logps/chosen": -0.330078125, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 14.875, "rewards/rejected": -16.5, "step": 2177 }, { "epoch": 0.8111731843575419, "grad_norm": 6.03125, "learning_rate": 1.886023451861768e-07, "logits/chosen": -0.019775390625, "logits/rejected": 0.474609375, "logps/chosen": -0.30859375, "logps/rejected": -2.1875, "loss": 0.0109, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5390625, "rewards/margins": 9.4375, "rewards/rejected": -10.9375, "step": 2178 }, { "epoch": 0.8115456238361266, "grad_norm": 0.000499725341796875, "learning_rate": 1.8788609754696875e-07, "logits/chosen": -0.08056640625, "logits/rejected": 0.09033203125, "logps/chosen": -0.1484375, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.73828125, "rewards/margins": 17.25, "rewards/rejected": -18.0, "step": 2179 }, { "epoch": 0.8119180633147114, "grad_norm": 0.458984375, "learning_rate": 1.8717105398761126e-07, "logits/chosen": 0.042724609375, "logits/rejected": -0.291015625, "logps/chosen": -0.7890625, "logps/rejected": -3.109375, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9375, "rewards/margins": 11.625, "rewards/rejected": -15.5625, "step": 2180 }, { "epoch": 0.8122905027932961, "grad_norm": 0.001129150390625, "learning_rate": 1.8645721571713753e-07, "logits/chosen": 0.048095703125, "logits/rejected": 0.26171875, "logps/chosen": -0.55078125, "logps/rejected": -3.390625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 14.25, "rewards/rejected": -17.0, "step": 2181 }, { "epoch": 0.8126629422718809, "grad_norm": 0.035888671875, "learning_rate": 1.8574458394254332e-07, "logits/chosen": -0.12451171875, "logits/rejected": -0.3515625, "logps/chosen": -0.4140625, "logps/rejected": -2.59375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.078125, "rewards/margins": 11.0, "rewards/rejected": -13.0, "step": 2182 }, { "epoch": 0.8130353817504655, "grad_norm": 2.109375, "learning_rate": 1.8503315986878397e-07, "logits/chosen": 0.2216796875, "logits/rejected": 0.126953125, "logps/chosen": -0.30859375, "logps/rejected": -1.9140625, "loss": 0.0037, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5390625, "rewards/margins": 8.0, "rewards/rejected": -9.5625, "step": 2183 }, { "epoch": 0.8134078212290503, "grad_norm": 8.821487426757812e-06, "learning_rate": 1.843229446987729e-07, "logits/chosen": 0.04345703125, "logits/rejected": 0.44921875, "logps/chosen": -0.185546875, "logps/rejected": -4.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9296875, "rewards/margins": 19.75, "rewards/rejected": -20.75, "step": 2184 }, { "epoch": 0.813780260707635, "grad_norm": 0.212890625, "learning_rate": 1.8361393963337964e-07, "logits/chosen": 0.0093994140625, "logits/rejected": -0.376953125, "logps/chosen": -0.2236328125, "logps/rejected": -2.578125, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1171875, "rewards/margins": 11.8125, "rewards/rejected": -12.875, "step": 2185 }, { "epoch": 0.8141527001862198, "grad_norm": 0.66015625, "learning_rate": 1.8290614587142753e-07, "logits/chosen": -0.06103515625, "logits/rejected": 0.271484375, "logps/chosen": -1.015625, "logps/rejected": -3.25, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0625, "rewards/margins": 11.1875, "rewards/rejected": -16.25, "step": 2186 }, { "epoch": 0.8145251396648044, "grad_norm": 2.6875, "learning_rate": 1.8219956460969136e-07, "logits/chosen": 0.193359375, "logits/rejected": -0.7421875, "logps/chosen": -0.890625, "logps/rejected": -2.8125, "loss": 0.0042, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4375, "rewards/margins": 9.6875, "rewards/rejected": -14.125, "step": 2187 }, { "epoch": 0.8148975791433892, "grad_norm": 0.2451171875, "learning_rate": 1.8149419704289623e-07, "logits/chosen": 0.1171875, "logits/rejected": 0.56640625, "logps/chosen": -0.8515625, "logps/rejected": -3.25, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.25, "rewards/margins": 11.875, "rewards/rejected": -16.25, "step": 2188 }, { "epoch": 0.8152700186219739, "grad_norm": 0.38671875, "learning_rate": 1.8079004436371494e-07, "logits/chosen": -0.09521484375, "logits/rejected": 0.1201171875, "logps/chosen": -0.455078125, "logps/rejected": -2.375, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.28125, "rewards/margins": 9.625, "rewards/rejected": -11.875, "step": 2189 }, { "epoch": 0.8156424581005587, "grad_norm": 0.37109375, "learning_rate": 1.8008710776276612e-07, "logits/chosen": -0.16015625, "logits/rejected": -0.52734375, "logps/chosen": -0.203125, "logps/rejected": -2.46875, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.015625, "rewards/margins": 11.375, "rewards/rejected": -12.375, "step": 2190 }, { "epoch": 0.8160148975791434, "grad_norm": 0.37109375, "learning_rate": 1.793853884286122e-07, "logits/chosen": -0.0194091796875, "logits/rejected": -0.578125, "logps/chosen": -0.71484375, "logps/rejected": -2.328125, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.578125, "rewards/margins": 8.0625, "rewards/rejected": -11.625, "step": 2191 }, { "epoch": 0.8163873370577281, "grad_norm": 0.00014972686767578125, "learning_rate": 1.786848875477573e-07, "logits/chosen": 0.0986328125, "logits/rejected": 0.369140625, "logps/chosen": -0.244140625, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.21875, "rewards/margins": 18.125, "rewards/rejected": -19.375, "step": 2192 }, { "epoch": 0.8167597765363128, "grad_norm": 0.337890625, "learning_rate": 1.7798560630464534e-07, "logits/chosen": -0.0052490234375, "logits/rejected": -0.330078125, "logps/chosen": -0.2314453125, "logps/rejected": -2.65625, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.15625, "rewards/margins": 12.1875, "rewards/rejected": -13.375, "step": 2193 }, { "epoch": 0.8171322160148976, "grad_norm": 0.00238037109375, "learning_rate": 1.7728754588165823e-07, "logits/chosen": 0.10009765625, "logits/rejected": -0.1787109375, "logps/chosen": -0.140625, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.69921875, "rewards/margins": 15.25, "rewards/rejected": -15.9375, "step": 2194 }, { "epoch": 0.8175046554934823, "grad_norm": 0.0198974609375, "learning_rate": 1.7659070745911308e-07, "logits/chosen": -0.1962890625, "logits/rejected": 0.263671875, "logps/chosen": -0.490234375, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.453125, "rewards/margins": 12.5, "rewards/rejected": -14.9375, "step": 2195 }, { "epoch": 0.8178770949720671, "grad_norm": 0.000133514404296875, "learning_rate": 1.7589509221526126e-07, "logits/chosen": -0.0615234375, "logits/rejected": 0.2734375, "logps/chosen": -0.52734375, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.640625, "rewards/margins": 16.5, "rewards/rejected": -19.0, "step": 2196 }, { "epoch": 0.8182495344506517, "grad_norm": 2.28125, "learning_rate": 1.7520070132628591e-07, "logits/chosen": 0.11572265625, "logits/rejected": -0.279296875, "logps/chosen": -0.59765625, "logps/rejected": -2.046875, "loss": 0.0028, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0, "rewards/margins": 7.25, "rewards/rejected": -10.25, "step": 2197 }, { "epoch": 0.8186219739292365, "grad_norm": 0.0225830078125, "learning_rate": 1.7450753596629997e-07, "logits/chosen": 0.0673828125, "logits/rejected": -0.08935546875, "logps/chosen": -0.31640625, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.578125, "rewards/margins": 13.25, "rewards/rejected": -14.8125, "step": 2198 }, { "epoch": 0.8189944134078212, "grad_norm": 0.003936767578125, "learning_rate": 1.7381559730734389e-07, "logits/chosen": 0.08447265625, "logits/rejected": 0.439453125, "logps/chosen": -0.546875, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.734375, "rewards/margins": 13.1875, "rewards/rejected": -15.9375, "step": 2199 }, { "epoch": 0.819366852886406, "grad_norm": 7.25, "learning_rate": 1.7312488651938426e-07, "logits/chosen": 0.05029296875, "logits/rejected": -0.6875, "logps/chosen": -0.484375, "logps/rejected": -1.84375, "loss": 0.0137, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 6.8125, "rewards/rejected": -9.25, "step": 2200 }, { "epoch": 0.8197392923649907, "grad_norm": 0.0032196044921875, "learning_rate": 1.7243540477031143e-07, "logits/chosen": 0.08740234375, "logits/rejected": 0.251953125, "logps/chosen": -0.435546875, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.171875, "rewards/margins": 16.25, "rewards/rejected": -18.5, "step": 2201 }, { "epoch": 0.8201117318435754, "grad_norm": 3.039836883544922e-05, "learning_rate": 1.7174715322593747e-07, "logits/chosen": -0.033203125, "logits/rejected": 0.4296875, "logps/chosen": -0.2099609375, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 17.875, "rewards/rejected": -18.875, "step": 2202 }, { "epoch": 0.8204841713221601, "grad_norm": 152.0, "learning_rate": 1.7106013304999485e-07, "logits/chosen": -0.12109375, "logits/rejected": -0.1123046875, "logps/chosen": -0.6640625, "logps/rejected": -2.40625, "loss": 0.3613, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3125, "rewards/margins": 8.75, "rewards/rejected": -12.0625, "step": 2203 }, { "epoch": 0.8208566108007449, "grad_norm": 0.00118255615234375, "learning_rate": 1.7037434540413323e-07, "logits/chosen": 0.248046875, "logits/rejected": 0.5078125, "logps/chosen": -0.65234375, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 14.5625, "rewards/rejected": -17.75, "step": 2204 }, { "epoch": 0.8212290502793296, "grad_norm": 7.534027099609375e-05, "learning_rate": 1.6968979144791866e-07, "logits/chosen": -0.1357421875, "logits/rejected": 0.287109375, "logps/chosen": -0.263671875, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125, "rewards/margins": 17.0, "rewards/rejected": -18.25, "step": 2205 }, { "epoch": 0.8216014897579144, "grad_norm": 0.0052490234375, "learning_rate": 1.6900647233883133e-07, "logits/chosen": 0.1796875, "logits/rejected": 0.2197265625, "logps/chosen": -0.765625, "logps/rejected": -4.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.828125, "rewards/margins": 16.625, "rewards/rejected": -20.5, "step": 2206 }, { "epoch": 0.821973929236499, "grad_norm": 0.00011730194091796875, "learning_rate": 1.6832438923226353e-07, "logits/chosen": 0.205078125, "logits/rejected": 0.0052490234375, "logps/chosen": -0.2578125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.28125, "rewards/margins": 17.125, "rewards/rejected": -18.5, "step": 2207 }, { "epoch": 0.8223463687150838, "grad_norm": 0.001312255859375, "learning_rate": 1.676435432815169e-07, "logits/chosen": 0.181640625, "logits/rejected": 0.33984375, "logps/chosen": -0.076171875, "logps/rejected": -3.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.37890625, "rewards/margins": 14.75, "rewards/rejected": -15.1875, "step": 2208 }, { "epoch": 0.8227188081936685, "grad_norm": 0.0001239776611328125, "learning_rate": 1.669639356378019e-07, "logits/chosen": 0.1044921875, "logits/rejected": 0.388671875, "logps/chosen": -0.2265625, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1328125, "rewards/margins": 16.5, "rewards/rejected": -17.5, "step": 2209 }, { "epoch": 0.8230912476722533, "grad_norm": 7.963180541992188e-05, "learning_rate": 1.6628556745023552e-07, "logits/chosen": 0.0291748046875, "logits/rejected": 0.65234375, "logps/chosen": -0.310546875, "logps/rejected": -3.703125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5546875, "rewards/margins": 17.0, "rewards/rejected": -18.5, "step": 2210 }, { "epoch": 0.823463687150838, "grad_norm": 7.43865966796875e-05, "learning_rate": 1.6560843986583855e-07, "logits/chosen": 0.125, "logits/rejected": 0.447265625, "logps/chosen": -0.400390625, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0, "rewards/margins": 17.375, "rewards/rejected": -19.375, "step": 2211 }, { "epoch": 0.8238361266294227, "grad_norm": 0.00079345703125, "learning_rate": 1.6493255402953372e-07, "logits/chosen": 0.1337890625, "logits/rejected": 0.34375, "logps/chosen": -0.37890625, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 14.75, "rewards/rejected": -16.625, "step": 2212 }, { "epoch": 0.8242085661080074, "grad_norm": 2.0384788513183594e-05, "learning_rate": 1.6425791108414455e-07, "logits/chosen": 0.12255859375, "logits/rejected": 0.5625, "logps/chosen": -0.328125, "logps/rejected": -4.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 19.0, "rewards/rejected": -20.75, "step": 2213 }, { "epoch": 0.8245810055865922, "grad_norm": 4.38690185546875e-05, "learning_rate": 1.6358451217039333e-07, "logits/chosen": 0.09716796875, "logits/rejected": 0.244140625, "logps/chosen": -0.27734375, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3828125, "rewards/margins": 17.5, "rewards/rejected": -19.0, "step": 2214 }, { "epoch": 0.8249534450651769, "grad_norm": 0.310546875, "learning_rate": 1.6291235842689838e-07, "logits/chosen": -0.019775390625, "logits/rejected": -0.10498046875, "logps/chosen": -0.302734375, "logps/rejected": -2.96875, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 13.3125, "rewards/rejected": -14.8125, "step": 2215 }, { "epoch": 0.8253258845437617, "grad_norm": 7.390975952148438e-05, "learning_rate": 1.6224145099017312e-07, "logits/chosen": 0.1572265625, "logits/rejected": 0.359375, "logps/chosen": -0.220703125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 17.375, "rewards/rejected": -18.5, "step": 2216 }, { "epoch": 0.8256983240223463, "grad_norm": 0.283203125, "learning_rate": 1.615717909946227e-07, "logits/chosen": -0.01055908203125, "logits/rejected": 0.12158203125, "logps/chosen": -0.5859375, "logps/rejected": -3.125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.921875, "rewards/margins": 12.75, "rewards/rejected": -15.625, "step": 2217 }, { "epoch": 0.8260707635009311, "grad_norm": 1.65625, "learning_rate": 1.6090337957254384e-07, "logits/chosen": -0.0107421875, "logits/rejected": -0.12890625, "logps/chosen": -0.8515625, "logps/rejected": -3.078125, "loss": 0.0021, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.28125, "rewards/margins": 11.125, "rewards/rejected": -15.375, "step": 2218 }, { "epoch": 0.8264432029795158, "grad_norm": 0.04833984375, "learning_rate": 1.602362178541226e-07, "logits/chosen": -0.1201171875, "logits/rejected": 0.427734375, "logps/chosen": -1.3203125, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.625, "rewards/margins": 11.25, "rewards/rejected": -17.875, "step": 2219 }, { "epoch": 0.8268156424581006, "grad_norm": 0.087890625, "learning_rate": 1.595703069674305e-07, "logits/chosen": 0.08154296875, "logits/rejected": 0.197265625, "logps/chosen": -1.34375, "logps/rejected": -3.65625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6875, "rewards/margins": 11.625, "rewards/rejected": -18.25, "step": 2220 }, { "epoch": 0.8271880819366852, "grad_norm": 0.0093994140625, "learning_rate": 1.5890564803842517e-07, "logits/chosen": -0.01312255859375, "logits/rejected": 0.40625, "logps/chosen": -0.73046875, "logps/rejected": -3.703125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.65625, "rewards/margins": 14.875, "rewards/rejected": -18.5, "step": 2221 }, { "epoch": 0.82756052141527, "grad_norm": 370.0, "learning_rate": 1.582422421909472e-07, "logits/chosen": 0.0286865234375, "logits/rejected": -0.482421875, "logps/chosen": -1.1015625, "logps/rejected": -2.5625, "loss": 1.5, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -5.5, "rewards/margins": 7.375, "rewards/rejected": -12.875, "step": 2222 }, { "epoch": 0.8279329608938547, "grad_norm": 0.048095703125, "learning_rate": 1.5758009054671807e-07, "logits/chosen": 0.036376953125, "logits/rejected": 0.478515625, "logps/chosen": -0.90234375, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.53125, "rewards/margins": 13.8125, "rewards/rejected": -18.25, "step": 2223 }, { "epoch": 0.8283054003724395, "grad_norm": 2.828125, "learning_rate": 1.5691919422533913e-07, "logits/chosen": 0.02099609375, "logits/rejected": 0.65234375, "logps/chosen": -0.55078125, "logps/rejected": -2.78125, "loss": 0.0038, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 11.25, "rewards/rejected": -14.0, "step": 2224 }, { "epoch": 0.8286778398510242, "grad_norm": 0.00689697265625, "learning_rate": 1.5625955434428837e-07, "logits/chosen": -0.0654296875, "logits/rejected": 0.11572265625, "logps/chosen": -0.25390625, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 14.5, "rewards/rejected": -15.75, "step": 2225 }, { "epoch": 0.829050279329609, "grad_norm": 0.1845703125, "learning_rate": 1.556011720189201e-07, "logits/chosen": 0.01092529296875, "logits/rejected": -0.4296875, "logps/chosen": -0.515625, "logps/rejected": -2.71875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.578125, "rewards/margins": 11.125, "rewards/rejected": -13.625, "step": 2226 }, { "epoch": 0.8294227188081936, "grad_norm": 0.0020751953125, "learning_rate": 1.5494404836246192e-07, "logits/chosen": 0.12451171875, "logits/rejected": 0.12255859375, "logps/chosen": -0.48046875, "logps/rejected": -3.453125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.40625, "rewards/margins": 14.875, "rewards/rejected": -17.25, "step": 2227 }, { "epoch": 0.8297951582867784, "grad_norm": 0.15625, "learning_rate": 1.5428818448601315e-07, "logits/chosen": -0.1748046875, "logits/rejected": -0.466796875, "logps/chosen": -0.189453125, "logps/rejected": -2.5, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.94921875, "rewards/margins": 11.5, "rewards/rejected": -12.5, "step": 2228 }, { "epoch": 0.8301675977653631, "grad_norm": 0.059326171875, "learning_rate": 1.536335814985433e-07, "logits/chosen": 0.0218505859375, "logits/rejected": -0.318359375, "logps/chosen": -0.5078125, "logps/rejected": -2.5, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.546875, "rewards/margins": 10.0, "rewards/rejected": -12.5625, "step": 2229 }, { "epoch": 0.8305400372439479, "grad_norm": 4.1961669921875e-05, "learning_rate": 1.529802405068896e-07, "logits/chosen": 0.259765625, "logits/rejected": 0.169921875, "logps/chosen": -0.3046875, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 17.875, "rewards/rejected": -19.375, "step": 2230 }, { "epoch": 0.8309124767225325, "grad_norm": 0.007415771484375, "learning_rate": 1.5232816261575567e-07, "logits/chosen": 0.10205078125, "logits/rejected": -0.2294921875, "logps/chosen": -0.3203125, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.59375, "rewards/margins": 13.875, "rewards/rejected": -15.4375, "step": 2231 }, { "epoch": 0.8312849162011173, "grad_norm": 0.00017070770263671875, "learning_rate": 1.516773489277092e-07, "logits/chosen": 0.1376953125, "logits/rejected": 0.349609375, "logps/chosen": -0.283203125, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.421875, "rewards/margins": 17.5, "rewards/rejected": -19.0, "step": 2232 }, { "epoch": 0.831657355679702, "grad_norm": 0.001129150390625, "learning_rate": 1.5102780054318064e-07, "logits/chosen": -0.060546875, "logits/rejected": 0.341796875, "logps/chosen": -0.1953125, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9765625, "rewards/margins": 15.3125, "rewards/rejected": -16.25, "step": 2233 }, { "epoch": 0.8320297951582868, "grad_norm": 0.002716064453125, "learning_rate": 1.503795185604603e-07, "logits/chosen": 0.1435546875, "logits/rejected": 0.1201171875, "logps/chosen": -0.2451171875, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2265625, "rewards/margins": 14.625, "rewards/rejected": -15.8125, "step": 2234 }, { "epoch": 0.8324022346368715, "grad_norm": 6.78125, "learning_rate": 1.49732504075698e-07, "logits/chosen": 0.1259765625, "logits/rejected": 0.96875, "logps/chosen": -0.38671875, "logps/rejected": -2.15625, "loss": 0.0082, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.921875, "rewards/margins": 8.9375, "rewards/rejected": -10.875, "step": 2235 }, { "epoch": 0.8327746741154562, "grad_norm": 0.0179443359375, "learning_rate": 1.4908675818289984e-07, "logits/chosen": 0.0947265625, "logits/rejected": 0.38671875, "logps/chosen": -0.5, "logps/rejected": -3.484375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5, "rewards/margins": 14.9375, "rewards/rejected": -17.375, "step": 2236 }, { "epoch": 0.8331471135940409, "grad_norm": 190.0, "learning_rate": 1.4844228197392715e-07, "logits/chosen": -0.265625, "logits/rejected": 0.1611328125, "logps/chosen": -0.8828125, "logps/rejected": -2.1875, "loss": 0.8945, "nll_loss": 0.0, "rewards/accuracies": 0.5, "rewards/chosen": -4.4375, "rewards/margins": 6.5, "rewards/rejected": -10.9375, "step": 2237 }, { "epoch": 0.8335195530726257, "grad_norm": 0.09033203125, "learning_rate": 1.4779907653849458e-07, "logits/chosen": 0.039306640625, "logits/rejected": 0.0299072265625, "logps/chosen": -0.4296875, "logps/rejected": -3.390625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 14.75, "rewards/rejected": -17.0, "step": 2238 }, { "epoch": 0.8338919925512104, "grad_norm": 0.0001926422119140625, "learning_rate": 1.4715714296416772e-07, "logits/chosen": 0.04931640625, "logits/rejected": 0.38671875, "logps/chosen": -0.263671875, "logps/rejected": -3.453125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3203125, "rewards/margins": 15.9375, "rewards/rejected": -17.25, "step": 2239 }, { "epoch": 0.8342644320297952, "grad_norm": 0.000591278076171875, "learning_rate": 1.4651648233636196e-07, "logits/chosen": 0.1416015625, "logits/rejected": 0.32421875, "logps/chosen": -0.45703125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.28125, "rewards/margins": 15.0, "rewards/rejected": -17.25, "step": 2240 }, { "epoch": 0.8346368715083798, "grad_norm": 0.022216796875, "learning_rate": 1.4587709573834028e-07, "logits/chosen": -0.1298828125, "logits/rejected": 0.1435546875, "logps/chosen": -0.48828125, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 11.25, "rewards/rejected": -13.6875, "step": 2241 }, { "epoch": 0.8350093109869646, "grad_norm": 0.185546875, "learning_rate": 1.4523898425121088e-07, "logits/chosen": -0.0299072265625, "logits/rejected": -0.384765625, "logps/chosen": -0.373046875, "logps/rejected": -2.609375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8671875, "rewards/margins": 11.125, "rewards/rejected": -13.0, "step": 2242 }, { "epoch": 0.8353817504655493, "grad_norm": 0.00933837890625, "learning_rate": 1.4460214895392675e-07, "logits/chosen": -0.00830078125, "logits/rejected": 0.328125, "logps/chosen": -0.263671875, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3203125, "rewards/margins": 14.8125, "rewards/rejected": -16.125, "step": 2243 }, { "epoch": 0.8357541899441341, "grad_norm": 1.671875, "learning_rate": 1.4396659092328262e-07, "logits/chosen": 0.06591796875, "logits/rejected": -0.365234375, "logps/chosen": -0.458984375, "logps/rejected": -2.6875, "loss": 0.0023, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.296875, "rewards/margins": 11.125, "rewards/rejected": -13.4375, "step": 2244 }, { "epoch": 0.8361266294227188, "grad_norm": 9.775161743164062e-05, "learning_rate": 1.4333231123391367e-07, "logits/chosen": -0.12353515625, "logits/rejected": 0.400390625, "logps/chosen": -0.0673828125, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3359375, "rewards/margins": 17.875, "rewards/rejected": -18.25, "step": 2245 }, { "epoch": 0.8364990689013035, "grad_norm": 1.2874603271484375e-05, "learning_rate": 1.426993109582938e-07, "logits/chosen": 0.0625, "logits/rejected": 0.1376953125, "logps/chosen": -0.125, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.625, "rewards/margins": 18.75, "rewards/rejected": -19.25, "step": 2246 }, { "epoch": 0.8368715083798882, "grad_norm": 0.419921875, "learning_rate": 1.420675911667325e-07, "logits/chosen": 0.125, "logits/rejected": 0.44921875, "logps/chosen": -0.0966796875, "logps/rejected": -1.8125, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.482421875, "rewards/margins": 8.5625, "rewards/rejected": -9.0625, "step": 2247 }, { "epoch": 0.837243947858473, "grad_norm": 0.0029144287109375, "learning_rate": 1.4143715292737587e-07, "logits/chosen": 0.21484375, "logits/rejected": 0.0703125, "logps/chosen": -0.609375, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0625, "rewards/margins": 13.375, "rewards/rejected": -16.5, "step": 2248 }, { "epoch": 0.8376163873370577, "grad_norm": 0.1025390625, "learning_rate": 1.4080799730620221e-07, "logits/chosen": -0.025146484375, "logits/rejected": -0.578125, "logps/chosen": -0.26171875, "logps/rejected": -2.6875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125, "rewards/margins": 12.125, "rewards/rejected": -13.5, "step": 2249 }, { "epoch": 0.8379888268156425, "grad_norm": 2.7418136596679688e-05, "learning_rate": 1.4018012536702065e-07, "logits/chosen": -0.006805419921875, "logits/rejected": 0.2333984375, "logps/chosen": -0.177734375, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.88671875, "rewards/margins": 18.25, "rewards/rejected": -19.125, "step": 2250 }, { "epoch": 0.8383612662942271, "grad_norm": 0.0034637451171875, "learning_rate": 1.3955353817147036e-07, "logits/chosen": 0.0751953125, "logits/rejected": 0.359375, "logps/chosen": -0.404296875, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 13.0, "rewards/rejected": -15.0, "step": 2251 }, { "epoch": 0.8387337057728119, "grad_norm": 0.6875, "learning_rate": 1.3892823677901837e-07, "logits/chosen": -0.0517578125, "logits/rejected": 0.38671875, "logps/chosen": -0.419921875, "logps/rejected": -2.625, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.09375, "rewards/margins": 11.0, "rewards/rejected": -13.0625, "step": 2252 }, { "epoch": 0.8391061452513966, "grad_norm": 0.005157470703125, "learning_rate": 1.3830422224695712e-07, "logits/chosen": 0.05908203125, "logits/rejected": 0.2109375, "logps/chosen": -0.71875, "logps/rejected": -3.640625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.59375, "rewards/margins": 14.625, "rewards/rejected": -18.25, "step": 2253 }, { "epoch": 0.8394785847299814, "grad_norm": 0.2578125, "learning_rate": 1.376814956304037e-07, "logits/chosen": -0.0986328125, "logits/rejected": -0.51953125, "logps/chosen": -0.2392578125, "logps/rejected": -2.5625, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1953125, "rewards/margins": 11.625, "rewards/rejected": -12.8125, "step": 2254 }, { "epoch": 0.839851024208566, "grad_norm": 0.00171661376953125, "learning_rate": 1.3706005798229668e-07, "logits/chosen": 0.09716796875, "logits/rejected": -0.1328125, "logps/chosen": -0.32421875, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6171875, "rewards/margins": 14.5625, "rewards/rejected": -16.25, "step": 2255 }, { "epoch": 0.8402234636871508, "grad_norm": 1.1205673217773438e-05, "learning_rate": 1.3643991035339592e-07, "logits/chosen": 0.07080078125, "logits/rejected": 0.361328125, "logps/chosen": -0.287109375, "logps/rejected": -4.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4375, "rewards/margins": 19.25, "rewards/rejected": -20.75, "step": 2256 }, { "epoch": 0.8405959031657355, "grad_norm": 0.1474609375, "learning_rate": 1.3582105379228012e-07, "logits/chosen": 0.0810546875, "logits/rejected": -0.26171875, "logps/chosen": -0.41015625, "logps/rejected": -3.1875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 13.8125, "rewards/rejected": -15.875, "step": 2257 }, { "epoch": 0.8409683426443203, "grad_norm": 0.546875, "learning_rate": 1.352034893453446e-07, "logits/chosen": 0.09716796875, "logits/rejected": 0.59765625, "logps/chosen": -0.2216796875, "logps/rejected": -2.4375, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.109375, "rewards/margins": 11.0, "rewards/rejected": -12.125, "step": 2258 }, { "epoch": 0.841340782122905, "grad_norm": 3.8125, "learning_rate": 1.3458721805679974e-07, "logits/chosen": 0.0830078125, "logits/rejected": 0.453125, "logps/chosen": -0.16015625, "logps/rejected": -2.375, "loss": 0.0025, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.80078125, "rewards/margins": 11.0, "rewards/rejected": -11.8125, "step": 2259 }, { "epoch": 0.8417132216014898, "grad_norm": 0.01116943359375, "learning_rate": 1.3397224096866968e-07, "logits/chosen": -0.080078125, "logits/rejected": -0.224609375, "logps/chosen": -0.228515625, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.140625, "rewards/margins": 16.125, "rewards/rejected": -17.25, "step": 2260 }, { "epoch": 0.8420856610800744, "grad_norm": 4.220008850097656e-05, "learning_rate": 1.333585591207904e-07, "logits/chosen": -0.00982666015625, "logits/rejected": 0.419921875, "logps/chosen": -0.1298828125, "logps/rejected": -3.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6484375, "rewards/margins": 19.125, "rewards/rejected": -19.75, "step": 2261 }, { "epoch": 0.8424581005586592, "grad_norm": 6.151199340820312e-05, "learning_rate": 1.3274617355080767e-07, "logits/chosen": 0.1328125, "logits/rejected": 0.384765625, "logps/chosen": -0.251953125, "logps/rejected": -4.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2578125, "rewards/margins": 18.75, "rewards/rejected": -20.0, "step": 2262 }, { "epoch": 0.842830540037244, "grad_norm": 6.4849853515625e-05, "learning_rate": 1.3213508529417505e-07, "logits/chosen": -0.051025390625, "logits/rejected": 0.3046875, "logps/chosen": -0.2890625, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4375, "rewards/margins": 17.5, "rewards/rejected": -19.0, "step": 2263 }, { "epoch": 0.8432029795158287, "grad_norm": 4.40625, "learning_rate": 1.3152529538415307e-07, "logits/chosen": 0.07275390625, "logits/rejected": 0.08203125, "logps/chosen": -0.4765625, "logps/rejected": -2.0625, "loss": 0.0056, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 7.90625, "rewards/rejected": -10.25, "step": 2264 }, { "epoch": 0.8435754189944135, "grad_norm": 0.2138671875, "learning_rate": 1.3091680485180673e-07, "logits/chosen": 0.0625, "logits/rejected": 0.490234375, "logps/chosen": -0.37890625, "logps/rejected": -2.921875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 12.75, "rewards/rejected": -14.625, "step": 2265 }, { "epoch": 0.8439478584729981, "grad_norm": 0.00133514404296875, "learning_rate": 1.3030961472600405e-07, "logits/chosen": -0.07763671875, "logits/rejected": 0.48828125, "logps/chosen": -0.169921875, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.84765625, "rewards/margins": 16.125, "rewards/rejected": -17.0, "step": 2266 }, { "epoch": 0.8443202979515829, "grad_norm": 4.380941390991211e-06, "learning_rate": 1.2970372603341428e-07, "logits/chosen": -0.0478515625, "logits/rejected": 0.16796875, "logps/chosen": -0.130859375, "logps/rejected": -4.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.65625, "rewards/margins": 19.75, "rewards/rejected": -20.5, "step": 2267 }, { "epoch": 0.8446927374301676, "grad_norm": 0.1318359375, "learning_rate": 1.2909913979850589e-07, "logits/chosen": 0.08544921875, "logits/rejected": -0.34765625, "logps/chosen": -0.4140625, "logps/rejected": -2.75, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 11.6875, "rewards/rejected": -13.75, "step": 2268 }, { "epoch": 0.8450651769087524, "grad_norm": 0.015869140625, "learning_rate": 1.2849585704354549e-07, "logits/chosen": 0.01171875, "logits/rejected": -0.255859375, "logps/chosen": -0.4765625, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 12.4375, "rewards/rejected": -14.8125, "step": 2269 }, { "epoch": 0.845437616387337, "grad_norm": 1.5546875, "learning_rate": 1.2789387878859522e-07, "logits/chosen": -0.0257568359375, "logits/rejected": 0.34375, "logps/chosen": -0.25, "logps/rejected": -2.703125, "loss": 0.0024, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 12.25, "rewards/rejected": -13.5, "step": 2270 }, { "epoch": 0.8458100558659218, "grad_norm": 0.1025390625, "learning_rate": 1.2729320605151224e-07, "logits/chosen": -0.043701171875, "logits/rejected": -0.42578125, "logps/chosen": -1.0625, "logps/rejected": -3.046875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.3125, "rewards/margins": 9.9375, "rewards/rejected": -15.25, "step": 2271 }, { "epoch": 0.8461824953445065, "grad_norm": 0.00024318695068359375, "learning_rate": 1.2669383984794505e-07, "logits/chosen": 0.0133056640625, "logits/rejected": 0.330078125, "logps/chosen": -0.373046875, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8671875, "rewards/margins": 16.375, "rewards/rejected": -18.25, "step": 2272 }, { "epoch": 0.8465549348230913, "grad_norm": 38.75, "learning_rate": 1.2609578119133425e-07, "logits/chosen": 0.02978515625, "logits/rejected": -0.6484375, "logps/chosen": -0.8046875, "logps/rejected": -2.75, "loss": 0.04, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0, "rewards/margins": 9.75, "rewards/rejected": -13.75, "step": 2273 }, { "epoch": 0.846927374301676, "grad_norm": 7.2479248046875e-05, "learning_rate": 1.2549903109290878e-07, "logits/chosen": 0.11865234375, "logits/rejected": 0.255859375, "logps/chosen": -0.27734375, "logps/rejected": -3.765625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3828125, "rewards/margins": 17.5, "rewards/rejected": -18.75, "step": 2274 }, { "epoch": 0.8472998137802608, "grad_norm": 0.00016689300537109375, "learning_rate": 1.2490359056168548e-07, "logits/chosen": 0.1357421875, "logits/rejected": 0.294921875, "logps/chosen": -0.2275390625, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.140625, "rewards/margins": 17.0, "rewards/rejected": -18.125, "step": 2275 }, { "epoch": 0.8476722532588454, "grad_norm": 21.0, "learning_rate": 1.243094606044666e-07, "logits/chosen": 0.0810546875, "logits/rejected": -0.27734375, "logps/chosen": -0.296875, "logps/rejected": -1.5859375, "loss": 0.0464, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.484375, "rewards/margins": 6.4375, "rewards/rejected": -7.9375, "step": 2276 }, { "epoch": 0.8480446927374302, "grad_norm": 131.0, "learning_rate": 1.2371664222583843e-07, "logits/chosen": 0.10595703125, "logits/rejected": 0.703125, "logps/chosen": -0.265625, "logps/rejected": -1.6953125, "loss": 0.334, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 7.125, "rewards/rejected": -8.4375, "step": 2277 }, { "epoch": 0.8484171322160149, "grad_norm": 57.0, "learning_rate": 1.2312513642816973e-07, "logits/chosen": -0.007568359375, "logits/rejected": 0.314453125, "logps/chosen": -0.47265625, "logps/rejected": -2.265625, "loss": 0.1338, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 9.0, "rewards/rejected": -11.375, "step": 2278 }, { "epoch": 0.8487895716945997, "grad_norm": 0.0002803802490234375, "learning_rate": 1.2253494421161e-07, "logits/chosen": 0.2001953125, "logits/rejected": 0.146484375, "logps/chosen": -0.33203125, "logps/rejected": -3.765625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 17.25, "rewards/rejected": -18.875, "step": 2279 }, { "epoch": 0.8491620111731844, "grad_norm": 0.1005859375, "learning_rate": 1.2194606657408698e-07, "logits/chosen": 0.07763671875, "logits/rejected": -0.328125, "logps/chosen": -0.6015625, "logps/rejected": -2.71875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0, "rewards/margins": 10.5625, "rewards/rejected": -13.5625, "step": 2280 }, { "epoch": 0.8495344506517691, "grad_norm": 0.003143310546875, "learning_rate": 1.213585045113064e-07, "logits/chosen": 0.1728515625, "logits/rejected": -0.296875, "logps/chosen": -0.14453125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.72265625, "rewards/margins": 17.75, "rewards/rejected": -18.5, "step": 2281 }, { "epoch": 0.8499068901303538, "grad_norm": 0.007293701171875, "learning_rate": 1.2077225901674918e-07, "logits/chosen": 0.1826171875, "logits/rejected": 0.33203125, "logps/chosen": -0.78125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.921875, "rewards/margins": 14.625, "rewards/rejected": -18.5, "step": 2282 }, { "epoch": 0.8502793296089386, "grad_norm": 0.2890625, "learning_rate": 1.2018733108167028e-07, "logits/chosen": 0.078125, "logits/rejected": 0.1484375, "logps/chosen": -0.353515625, "logps/rejected": -3.046875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.765625, "rewards/margins": 13.4375, "rewards/rejected": -15.25, "step": 2283 }, { "epoch": 0.8506517690875233, "grad_norm": 0.0205078125, "learning_rate": 1.1960372169509685e-07, "logits/chosen": 0.032470703125, "logits/rejected": -0.291015625, "logps/chosen": -0.3125, "logps/rejected": -2.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5625, "rewards/margins": 12.875, "rewards/rejected": -14.5, "step": 2284 }, { "epoch": 0.851024208566108, "grad_norm": 0.283203125, "learning_rate": 1.1902143184382635e-07, "logits/chosen": -0.02392578125, "logits/rejected": 0.33984375, "logps/chosen": -1.2109375, "logps/rejected": -3.21875, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0625, "rewards/margins": 10.125, "rewards/rejected": -16.125, "step": 2285 }, { "epoch": 0.8513966480446927, "grad_norm": 9.5367431640625e-05, "learning_rate": 1.184404625124254e-07, "logits/chosen": 0.0194091796875, "logits/rejected": 0.451171875, "logps/chosen": -0.1904296875, "logps/rejected": -3.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.953125, "rewards/margins": 18.25, "rewards/rejected": -19.25, "step": 2286 }, { "epoch": 0.8517690875232775, "grad_norm": 0.00131988525390625, "learning_rate": 1.1786081468322761e-07, "logits/chosen": 0.053955078125, "logits/rejected": 0.486328125, "logps/chosen": -0.72265625, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.625, "rewards/margins": 14.9375, "rewards/rejected": -18.5, "step": 2287 }, { "epoch": 0.8521415270018622, "grad_norm": 0.01385498046875, "learning_rate": 1.172824893363324e-07, "logits/chosen": 0.11474609375, "logits/rejected": -0.228515625, "logps/chosen": -0.201171875, "logps/rejected": -2.578125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0078125, "rewards/margins": 11.875, "rewards/rejected": -12.875, "step": 2288 }, { "epoch": 0.852513966480447, "grad_norm": 5.173683166503906e-05, "learning_rate": 1.1670548744960234e-07, "logits/chosen": 0.1650390625, "logits/rejected": 0.2314453125, "logps/chosen": -0.19140625, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.953125, "rewards/margins": 17.625, "rewards/rejected": -18.625, "step": 2289 }, { "epoch": 0.8528864059590316, "grad_norm": 1.5390625, "learning_rate": 1.1612980999866323e-07, "logits/chosen": 0.034423828125, "logits/rejected": -0.14453125, "logps/chosen": -0.48828125, "logps/rejected": -2.40625, "loss": 0.0019, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 9.5625, "rewards/rejected": -12.0, "step": 2290 }, { "epoch": 0.8532588454376164, "grad_norm": 1.5546875, "learning_rate": 1.1555545795690062e-07, "logits/chosen": 0.03564453125, "logits/rejected": 0.06689453125, "logps/chosen": -0.349609375, "logps/rejected": -2.046875, "loss": 0.0025, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.75, "rewards/margins": 8.5, "rewards/rejected": -10.25, "step": 2291 }, { "epoch": 0.8536312849162011, "grad_norm": 0.000213623046875, "learning_rate": 1.149824322954597e-07, "logits/chosen": 0.07080078125, "logits/rejected": 0.3203125, "logps/chosen": -0.2236328125, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1171875, "rewards/margins": 15.75, "rewards/rejected": -16.875, "step": 2292 }, { "epoch": 0.8540037243947859, "grad_norm": 0.02490234375, "learning_rate": 1.144107339832422e-07, "logits/chosen": 0.1630859375, "logits/rejected": -0.380859375, "logps/chosen": -0.134765625, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.671875, "rewards/margins": 14.25, "rewards/rejected": -14.9375, "step": 2293 }, { "epoch": 0.8543761638733706, "grad_norm": 0.003936767578125, "learning_rate": 1.138403639869058e-07, "logits/chosen": -0.00927734375, "logits/rejected": 0.103515625, "logps/chosen": -0.6640625, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3125, "rewards/margins": 13.5625, "rewards/rejected": -16.875, "step": 2294 }, { "epoch": 0.8547486033519553, "grad_norm": 0.005889892578125, "learning_rate": 1.1327132327086262e-07, "logits/chosen": 0.06884765625, "logits/rejected": 0.494140625, "logps/chosen": -0.349609375, "logps/rejected": -3.140625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.75, "rewards/margins": 14.0, "rewards/rejected": -15.75, "step": 2295 }, { "epoch": 0.85512104283054, "grad_norm": 0.0137939453125, "learning_rate": 1.1270361279727675e-07, "logits/chosen": 0.026123046875, "logits/rejected": 0.10107421875, "logps/chosen": -0.54296875, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.703125, "rewards/margins": 12.75, "rewards/rejected": -15.4375, "step": 2296 }, { "epoch": 0.8554934823091248, "grad_norm": 0.0693359375, "learning_rate": 1.121372335260629e-07, "logits/chosen": 0.02001953125, "logits/rejected": 0.185546875, "logps/chosen": -1.2578125, "logps/rejected": -3.5, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.3125, "rewards/margins": 11.1875, "rewards/rejected": -17.5, "step": 2297 }, { "epoch": 0.8558659217877095, "grad_norm": 0.00830078125, "learning_rate": 1.1157218641488514e-07, "logits/chosen": -0.1337890625, "logits/rejected": -0.1318359375, "logps/chosen": -0.185546875, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.92578125, "rewards/margins": 15.4375, "rewards/rejected": -16.375, "step": 2298 }, { "epoch": 0.8562383612662943, "grad_norm": 8.58306884765625e-06, "learning_rate": 1.1100847241915501e-07, "logits/chosen": 0.24609375, "logits/rejected": 0.58984375, "logps/chosen": -0.375, "logps/rejected": -4.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8671875, "rewards/margins": 19.5, "rewards/rejected": -21.25, "step": 2299 }, { "epoch": 0.8566108007448789, "grad_norm": 1.4453125, "learning_rate": 1.1044609249202996e-07, "logits/chosen": -0.10595703125, "logits/rejected": 0.578125, "logps/chosen": -0.5703125, "logps/rejected": -3.0, "loss": 0.0015, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 12.1875, "rewards/rejected": -15.0, "step": 2300 }, { "epoch": 0.8569832402234637, "grad_norm": 2.9087066650390625e-05, "learning_rate": 1.0988504758441196e-07, "logits/chosen": 0.038330078125, "logits/rejected": 0.328125, "logps/chosen": -0.41015625, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 18.25, "rewards/rejected": -20.25, "step": 2301 }, { "epoch": 0.8573556797020484, "grad_norm": 3.4809112548828125e-05, "learning_rate": 1.0932533864494506e-07, "logits/chosen": 0.185546875, "logits/rejected": 0.75390625, "logps/chosen": -0.154296875, "logps/rejected": -3.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7734375, "rewards/margins": 18.25, "rewards/rejected": -19.0, "step": 2302 }, { "epoch": 0.8577281191806332, "grad_norm": 0.000835418701171875, "learning_rate": 1.0876696662001454e-07, "logits/chosen": 0.01361083984375, "logits/rejected": 0.20703125, "logps/chosen": -0.177734375, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.88671875, "rewards/margins": 15.125, "rewards/rejected": -16.0, "step": 2303 }, { "epoch": 0.8581005586592179, "grad_norm": 0.000431060791015625, "learning_rate": 1.0820993245374617e-07, "logits/chosen": 0.0203857421875, "logits/rejected": 0.4375, "logps/chosen": -0.255859375, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2734375, "rewards/margins": 18.875, "rewards/rejected": -20.25, "step": 2304 }, { "epoch": 0.8584729981378026, "grad_norm": 0.08251953125, "learning_rate": 1.0765423708800206e-07, "logits/chosen": 0.1943359375, "logits/rejected": 0.234375, "logps/chosen": -0.1962890625, "logps/rejected": -3.0, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.98046875, "rewards/margins": 13.9375, "rewards/rejected": -14.9375, "step": 2305 }, { "epoch": 0.8588454376163873, "grad_norm": 0.0255126953125, "learning_rate": 1.0709988146238157e-07, "logits/chosen": -0.076171875, "logits/rejected": 0.279296875, "logps/chosen": -0.60546875, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.03125, "rewards/margins": 12.75, "rewards/rejected": -15.8125, "step": 2306 }, { "epoch": 0.8592178770949721, "grad_norm": 0.1533203125, "learning_rate": 1.0654686651421844e-07, "logits/chosen": 0.025634765625, "logits/rejected": -0.330078125, "logps/chosen": -0.53125, "logps/rejected": -2.4375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.65625, "rewards/margins": 9.625, "rewards/rejected": -12.25, "step": 2307 }, { "epoch": 0.8595903165735568, "grad_norm": 0.0013427734375, "learning_rate": 1.0599519317857968e-07, "logits/chosen": 0.06298828125, "logits/rejected": 0.3203125, "logps/chosen": -0.25390625, "logps/rejected": -3.234375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 14.875, "rewards/rejected": -16.125, "step": 2308 }, { "epoch": 0.8599627560521416, "grad_norm": 0.037353515625, "learning_rate": 1.0544486238826393e-07, "logits/chosen": 0.10400390625, "logits/rejected": -0.40234375, "logps/chosen": -0.259765625, "logps/rejected": -2.734375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.296875, "rewards/margins": 12.375, "rewards/rejected": -13.6875, "step": 2309 }, { "epoch": 0.8603351955307262, "grad_norm": 0.41796875, "learning_rate": 1.0489587507379925e-07, "logits/chosen": -0.1005859375, "logits/rejected": -0.251953125, "logps/chosen": -0.51171875, "logps/rejected": -2.53125, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5625, "rewards/margins": 10.125, "rewards/rejected": -12.6875, "step": 2310 }, { "epoch": 0.860707635009311, "grad_norm": 0.07080078125, "learning_rate": 1.0434823216344242e-07, "logits/chosen": 0.146484375, "logits/rejected": -0.318359375, "logps/chosen": -0.23046875, "logps/rejected": -2.59375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.15625, "rewards/margins": 11.75, "rewards/rejected": -12.875, "step": 2311 }, { "epoch": 0.8610800744878957, "grad_norm": 0.55078125, "learning_rate": 1.038019345831773e-07, "logits/chosen": -0.12158203125, "logits/rejected": 0.365234375, "logps/chosen": -0.453125, "logps/rejected": -2.59375, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.265625, "rewards/margins": 10.75, "rewards/rejected": -13.0, "step": 2312 }, { "epoch": 0.8614525139664805, "grad_norm": 0.037353515625, "learning_rate": 1.0325698325671254e-07, "logits/chosen": -0.0947265625, "logits/rejected": -0.380859375, "logps/chosen": -0.1748046875, "logps/rejected": -2.625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.875, "rewards/margins": 12.25, "rewards/rejected": -13.125, "step": 2313 }, { "epoch": 0.8618249534450652, "grad_norm": 0.0001697540283203125, "learning_rate": 1.0271337910548073e-07, "logits/chosen": 0.125, "logits/rejected": 0.388671875, "logps/chosen": -0.41796875, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.09375, "rewards/margins": 16.5, "rewards/rejected": -18.625, "step": 2314 }, { "epoch": 0.8621973929236499, "grad_norm": 0.0003223419189453125, "learning_rate": 1.0217112304863656e-07, "logits/chosen": -0.040283203125, "logits/rejected": 0.177734375, "logps/chosen": -0.2890625, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4453125, "rewards/margins": 15.8125, "rewards/rejected": -17.25, "step": 2315 }, { "epoch": 0.8625698324022346, "grad_norm": 0.0037841796875, "learning_rate": 1.0163021600305527e-07, "logits/chosen": -0.05615234375, "logits/rejected": 0.353515625, "logps/chosen": -0.84375, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.21875, "rewards/margins": 14.625, "rewards/rejected": -18.875, "step": 2316 }, { "epoch": 0.8629422718808194, "grad_norm": 0.0299072265625, "learning_rate": 1.0109065888333099e-07, "logits/chosen": 0.019775390625, "logits/rejected": 0.345703125, "logps/chosen": -0.734375, "logps/rejected": -3.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.65625, "rewards/margins": 11.4375, "rewards/rejected": -15.125, "step": 2317 }, { "epoch": 0.8633147113594041, "grad_norm": 0.7890625, "learning_rate": 1.0055245260177561e-07, "logits/chosen": -0.1376953125, "logits/rejected": 0.212890625, "logps/chosen": -0.412109375, "logps/rejected": -2.578125, "loss": 0.0012, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 10.8125, "rewards/rejected": -12.875, "step": 2318 }, { "epoch": 0.8636871508379889, "grad_norm": 0.07275390625, "learning_rate": 1.0001559806841662e-07, "logits/chosen": -0.11328125, "logits/rejected": -0.58203125, "logps/chosen": -0.130859375, "logps/rejected": -2.625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.65625, "rewards/margins": 12.5, "rewards/rejected": -13.125, "step": 2319 }, { "epoch": 0.8640595903165735, "grad_norm": 0.06591796875, "learning_rate": 9.9480096190996e-08, "logits/chosen": 0.08349609375, "logits/rejected": -0.53515625, "logps/chosen": -0.294921875, "logps/rejected": -2.671875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.46875, "rewards/margins": 11.875, "rewards/rejected": -13.375, "step": 2320 }, { "epoch": 0.8644320297951583, "grad_norm": 0.0023040771484375, "learning_rate": 9.894594787496874e-08, "logits/chosen": 0.1552734375, "logits/rejected": -0.1513671875, "logps/chosen": -0.0810546875, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.40625, "rewards/margins": 15.875, "rewards/rejected": -16.25, "step": 2321 }, { "epoch": 0.864804469273743, "grad_norm": 0.00020503997802734375, "learning_rate": 9.841315402350119e-08, "logits/chosen": 0.053955078125, "logits/rejected": 0.390625, "logps/chosen": -0.373046875, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8671875, "rewards/margins": 16.75, "rewards/rejected": -18.5, "step": 2322 }, { "epoch": 0.8651769087523278, "grad_norm": 0.0001544952392578125, "learning_rate": 9.788171553746913e-08, "logits/chosen": -0.0615234375, "logits/rejected": 0.326171875, "logps/chosen": -0.26953125, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 16.25, "rewards/rejected": -17.75, "step": 2323 }, { "epoch": 0.8655493482309125, "grad_norm": 0.0011444091796875, "learning_rate": 9.735163331545706e-08, "logits/chosen": -0.0016937255859375, "logits/rejected": 0.1796875, "logps/chosen": -0.265625, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 14.375, "rewards/rejected": -15.625, "step": 2324 }, { "epoch": 0.8659217877094972, "grad_norm": 0.000583648681640625, "learning_rate": 9.682290825375591e-08, "logits/chosen": -0.1796875, "logits/rejected": 0.65234375, "logps/chosen": -0.62890625, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.140625, "rewards/margins": 17.125, "rewards/rejected": -20.25, "step": 2325 }, { "epoch": 0.8662942271880819, "grad_norm": 52.5, "learning_rate": 9.629554124636228e-08, "logits/chosen": -0.040771484375, "logits/rejected": 0.036376953125, "logps/chosen": -0.41015625, "logps/rejected": -2.203125, "loss": 0.1216, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 9.0, "rewards/rejected": -11.0, "step": 2326 }, { "epoch": 0.8666666666666667, "grad_norm": 0.000629425048828125, "learning_rate": 9.576953318497577e-08, "logits/chosen": 0.054443359375, "logits/rejected": 0.271484375, "logps/chosen": -0.5078125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.546875, "rewards/margins": 14.75, "rewards/rejected": -17.25, "step": 2327 }, { "epoch": 0.8670391061452514, "grad_norm": 1.8835067749023438e-05, "learning_rate": 9.524488495899874e-08, "logits/chosen": 0.2060546875, "logits/rejected": 0.6171875, "logps/chosen": -0.072265625, "logps/rejected": -4.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.359375, "rewards/margins": 20.25, "rewards/rejected": -20.5, "step": 2328 }, { "epoch": 0.8674115456238362, "grad_norm": 0.00013446807861328125, "learning_rate": 9.472159745553428e-08, "logits/chosen": 0.2490234375, "logits/rejected": 0.431640625, "logps/chosen": -0.142578125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7109375, "rewards/margins": 16.5, "rewards/rejected": -17.25, "step": 2329 }, { "epoch": 0.8677839851024208, "grad_norm": 4.3392181396484375e-05, "learning_rate": 9.419967155938451e-08, "logits/chosen": 0.0849609375, "logits/rejected": 0.427734375, "logps/chosen": -0.37890625, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 17.5, "rewards/rejected": -19.5, "step": 2330 }, { "epoch": 0.8681564245810056, "grad_norm": 0.00016880035400390625, "learning_rate": 9.367910815304961e-08, "logits/chosen": 0.06689453125, "logits/rejected": 0.498046875, "logps/chosen": -0.189453125, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9453125, "rewards/margins": 16.5, "rewards/rejected": -17.5, "step": 2331 }, { "epoch": 0.8685288640595903, "grad_norm": 4.00543212890625e-05, "learning_rate": 9.315990811672501e-08, "logits/chosen": -0.047607421875, "logits/rejected": 0.328125, "logps/chosen": -0.35546875, "logps/rejected": -3.890625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.78125, "rewards/margins": 17.75, "rewards/rejected": -19.5, "step": 2332 }, { "epoch": 0.8689013035381751, "grad_norm": 0.345703125, "learning_rate": 9.264207232830235e-08, "logits/chosen": 0.130859375, "logits/rejected": 0.76953125, "logps/chosen": -0.2578125, "logps/rejected": -2.953125, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.296875, "rewards/margins": 13.4375, "rewards/rejected": -14.75, "step": 2333 }, { "epoch": 0.8692737430167597, "grad_norm": 0.0172119140625, "learning_rate": 9.212560166336553e-08, "logits/chosen": 0.00799560546875, "logits/rejected": 0.3828125, "logps/chosen": -0.890625, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4375, "rewards/margins": 13.25, "rewards/rejected": -17.75, "step": 2334 }, { "epoch": 0.8696461824953445, "grad_norm": 0.00022792816162109375, "learning_rate": 9.161049699519034e-08, "logits/chosen": 0.296875, "logits/rejected": 0.388671875, "logps/chosen": -0.41015625, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 15.9375, "rewards/rejected": -18.0, "step": 2335 }, { "epoch": 0.8700186219739292, "grad_norm": 4.28125, "learning_rate": 9.109675919474293e-08, "logits/chosen": -0.1767578125, "logits/rejected": -0.2109375, "logps/chosen": -0.2255859375, "logps/rejected": -1.9375, "loss": 0.0075, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.125, "rewards/margins": 8.5625, "rewards/rejected": -9.75, "step": 2336 }, { "epoch": 0.870391061452514, "grad_norm": 0.00017070770263671875, "learning_rate": 9.058438913067858e-08, "logits/chosen": 0.115234375, "logits/rejected": 0.41015625, "logps/chosen": -0.3515625, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7578125, "rewards/margins": 16.875, "rewards/rejected": -18.625, "step": 2337 }, { "epoch": 0.8707635009310987, "grad_norm": 0.00750732421875, "learning_rate": 9.007338766933959e-08, "logits/chosen": 0.07373046875, "logits/rejected": 0.443359375, "logps/chosen": -0.38671875, "logps/rejected": -2.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9296875, "rewards/margins": 12.75, "rewards/rejected": -14.6875, "step": 2338 }, { "epoch": 0.8711359404096835, "grad_norm": 0.00010204315185546875, "learning_rate": 8.956375567475448e-08, "logits/chosen": 0.05224609375, "logits/rejected": 0.380859375, "logps/chosen": -0.361328125, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8125, "rewards/margins": 16.75, "rewards/rejected": -18.5, "step": 2339 }, { "epoch": 0.8715083798882681, "grad_norm": 0.007537841796875, "learning_rate": 8.905549400863548e-08, "logits/chosen": 0.00970458984375, "logits/rejected": -0.2314453125, "logps/chosen": -0.318359375, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.59375, "rewards/margins": 15.0625, "rewards/rejected": -16.625, "step": 2340 }, { "epoch": 0.8718808193668529, "grad_norm": 6.914138793945312e-05, "learning_rate": 8.854860353037853e-08, "logits/chosen": 0.10888671875, "logits/rejected": 0.263671875, "logps/chosen": -0.25390625, "logps/rejected": -3.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2734375, "rewards/margins": 17.625, "rewards/rejected": -19.0, "step": 2341 }, { "epoch": 0.8722532588454376, "grad_norm": 0.0012359619140625, "learning_rate": 8.804308509706105e-08, "logits/chosen": 0.0341796875, "logits/rejected": 0.34375, "logps/chosen": -0.212890625, "logps/rejected": -3.140625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0625, "rewards/margins": 14.625, "rewards/rejected": -15.75, "step": 2342 }, { "epoch": 0.8726256983240224, "grad_norm": 23.0, "learning_rate": 8.753893956344029e-08, "logits/chosen": 0.080078125, "logits/rejected": 0.578125, "logps/chosen": -0.380859375, "logps/rejected": -2.34375, "loss": 0.0564, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.90625, "rewards/margins": 9.875, "rewards/rejected": -11.75, "step": 2343 }, { "epoch": 0.872998137802607, "grad_norm": 1.8125, "learning_rate": 8.703616778195208e-08, "logits/chosen": 0.2451171875, "logits/rejected": -0.44921875, "logps/chosen": -0.251953125, "logps/rejected": -2.34375, "loss": 0.0025, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2578125, "rewards/margins": 10.5, "rewards/rejected": -11.75, "step": 2344 }, { "epoch": 0.8733705772811918, "grad_norm": 0.0169677734375, "learning_rate": 8.653477060270939e-08, "logits/chosen": -0.043701171875, "logits/rejected": -0.279296875, "logps/chosen": -0.185546875, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9296875, "rewards/margins": 14.875, "rewards/rejected": -15.8125, "step": 2345 }, { "epoch": 0.8737430167597765, "grad_norm": 0.45703125, "learning_rate": 8.603474887350138e-08, "logits/chosen": -0.0849609375, "logits/rejected": 0.470703125, "logps/chosen": -0.19140625, "logps/rejected": -2.828125, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9609375, "rewards/margins": 13.1875, "rewards/rejected": -14.1875, "step": 2346 }, { "epoch": 0.8741154562383613, "grad_norm": 0.017333984375, "learning_rate": 8.553610343979114e-08, "logits/chosen": 0.0947265625, "logits/rejected": 0.045654296875, "logps/chosen": -0.63671875, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1875, "rewards/margins": 13.1875, "rewards/rejected": -16.375, "step": 2347 }, { "epoch": 0.874487895716946, "grad_norm": 0.000637054443359375, "learning_rate": 8.503883514471483e-08, "logits/chosen": 0.09326171875, "logits/rejected": 0.03564453125, "logps/chosen": -0.337890625, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875, "rewards/margins": 15.3125, "rewards/rejected": -17.0, "step": 2348 }, { "epoch": 0.8748603351955307, "grad_norm": 0.000568389892578125, "learning_rate": 8.454294482907966e-08, "logits/chosen": 0.162109375, "logits/rejected": 0.31640625, "logps/chosen": -0.7578125, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.78125, "rewards/margins": 15.0, "rewards/rejected": -18.75, "step": 2349 }, { "epoch": 0.8752327746741154, "grad_norm": 0.0673828125, "learning_rate": 8.404843333136335e-08, "logits/chosen": -0.083984375, "logits/rejected": 0.625, "logps/chosen": -0.49609375, "logps/rejected": -2.78125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 11.5, "rewards/rejected": -13.9375, "step": 2350 }, { "epoch": 0.8756052141527002, "grad_norm": 7.62939453125e-05, "learning_rate": 8.355530148771242e-08, "logits/chosen": -0.1171875, "logits/rejected": 0.275390625, "logps/chosen": -0.32421875, "logps/rejected": -3.703125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6171875, "rewards/margins": 16.875, "rewards/rejected": -18.5, "step": 2351 }, { "epoch": 0.8759776536312849, "grad_norm": 0.11572265625, "learning_rate": 8.306355013193989e-08, "logits/chosen": -0.1728515625, "logits/rejected": -0.32421875, "logps/chosen": -0.283203125, "logps/rejected": -2.6875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4140625, "rewards/margins": 11.9375, "rewards/rejected": -13.375, "step": 2352 }, { "epoch": 0.8763500931098697, "grad_norm": 0.39453125, "learning_rate": 8.257318009552508e-08, "logits/chosen": -0.0147705078125, "logits/rejected": -0.1767578125, "logps/chosen": -0.65234375, "logps/rejected": -3.375, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.265625, "rewards/margins": 13.5625, "rewards/rejected": -16.875, "step": 2353 }, { "epoch": 0.8767225325884543, "grad_norm": 0.0008392333984375, "learning_rate": 8.20841922076115e-08, "logits/chosen": -0.01312255859375, "logits/rejected": 0.3671875, "logps/chosen": -0.412109375, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 14.6875, "rewards/rejected": -16.75, "step": 2354 }, { "epoch": 0.8770949720670391, "grad_norm": 44.0, "learning_rate": 8.15965872950059e-08, "logits/chosen": -0.08642578125, "logits/rejected": 0.62890625, "logps/chosen": -0.33203125, "logps/rejected": -2.078125, "loss": 0.0898, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 8.75, "rewards/rejected": -10.375, "step": 2355 }, { "epoch": 0.8774674115456238, "grad_norm": 46.5, "learning_rate": 8.111036618217657e-08, "logits/chosen": -0.083984375, "logits/rejected": -0.21875, "logps/chosen": -0.62109375, "logps/rejected": -1.796875, "loss": 0.1133, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.09375, "rewards/margins": 5.875, "rewards/rejected": -9.0, "step": 2356 }, { "epoch": 0.8778398510242086, "grad_norm": 0.0228271484375, "learning_rate": 8.062552969125157e-08, "logits/chosen": -0.04443359375, "logits/rejected": 0.298828125, "logps/chosen": -0.5234375, "logps/rejected": -2.984375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 12.375, "rewards/rejected": -14.9375, "step": 2357 }, { "epoch": 0.8782122905027933, "grad_norm": 37.25, "learning_rate": 8.014207864201813e-08, "logits/chosen": -0.1484375, "logits/rejected": -0.10302734375, "logps/chosen": -1.078125, "logps/rejected": -2.328125, "loss": 0.054, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.375, "rewards/margins": 6.25, "rewards/rejected": -11.625, "step": 2358 }, { "epoch": 0.878584729981378, "grad_norm": 0.005706787109375, "learning_rate": 7.9660013851921e-08, "logits/chosen": 0.099609375, "logits/rejected": 0.62109375, "logps/chosen": -0.38671875, "logps/rejected": -3.453125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 15.3125, "rewards/rejected": -17.25, "step": 2359 }, { "epoch": 0.8789571694599627, "grad_norm": 8.046627044677734e-06, "learning_rate": 7.917933613606062e-08, "logits/chosen": 0.0703125, "logits/rejected": 0.28125, "logps/chosen": -0.28515625, "logps/rejected": -4.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4296875, "rewards/margins": 19.5, "rewards/rejected": -21.0, "step": 2360 }, { "epoch": 0.8793296089385475, "grad_norm": 0.0174560546875, "learning_rate": 7.870004630719257e-08, "logits/chosen": 0.173828125, "logits/rejected": 0.51953125, "logps/chosen": -0.302734375, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 14.125, "rewards/rejected": -15.625, "step": 2361 }, { "epoch": 0.8797020484171322, "grad_norm": 7.867813110351562e-05, "learning_rate": 7.82221451757252e-08, "logits/chosen": 0.10205078125, "logits/rejected": 0.361328125, "logps/chosen": -0.2578125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.28125, "rewards/margins": 17.125, "rewards/rejected": -18.375, "step": 2362 }, { "epoch": 0.880074487895717, "grad_norm": 0.00482177734375, "learning_rate": 7.774563354971921e-08, "logits/chosen": -0.0283203125, "logits/rejected": 0.03662109375, "logps/chosen": -0.63671875, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.171875, "rewards/margins": 13.6875, "rewards/rejected": -16.875, "step": 2363 }, { "epoch": 0.8804469273743016, "grad_norm": 0.1611328125, "learning_rate": 7.727051223488572e-08, "logits/chosen": 0.0615234375, "logits/rejected": -0.28125, "logps/chosen": -0.33984375, "logps/rejected": -2.59375, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.703125, "rewards/margins": 11.25, "rewards/rejected": -13.0, "step": 2364 }, { "epoch": 0.8808193668528864, "grad_norm": 2.09808349609375e-05, "learning_rate": 7.679678203458459e-08, "logits/chosen": 0.10302734375, "logits/rejected": 0.32421875, "logps/chosen": -0.2021484375, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.015625, "rewards/margins": 18.5, "rewards/rejected": -19.5, "step": 2365 }, { "epoch": 0.8811918063314711, "grad_norm": 0.2578125, "learning_rate": 7.63244437498242e-08, "logits/chosen": 0.060791015625, "logits/rejected": 0.40234375, "logps/chosen": -0.208984375, "logps/rejected": -2.53125, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 11.625, "rewards/rejected": -12.625, "step": 2366 }, { "epoch": 0.8815642458100559, "grad_norm": 7.28125, "learning_rate": 7.585349817925907e-08, "logits/chosen": -0.0341796875, "logits/rejected": 0.419921875, "logps/chosen": -0.71875, "logps/rejected": -2.375, "loss": 0.0156, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.59375, "rewards/margins": 8.3125, "rewards/rejected": -11.875, "step": 2367 }, { "epoch": 0.8819366852886406, "grad_norm": 8.535385131835938e-05, "learning_rate": 7.538394611918884e-08, "logits/chosen": 0.0017852783203125, "logits/rejected": 0.330078125, "logps/chosen": -0.7265625, "logps/rejected": -4.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.640625, "rewards/margins": 17.0, "rewards/rejected": -20.5, "step": 2368 }, { "epoch": 0.8823091247672253, "grad_norm": 0.00067138671875, "learning_rate": 7.491578836355691e-08, "logits/chosen": 0.09326171875, "logits/rejected": 0.31640625, "logps/chosen": -0.384765625, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.921875, "rewards/margins": 16.75, "rewards/rejected": -18.75, "step": 2369 }, { "epoch": 0.88268156424581, "grad_norm": 0.28125, "learning_rate": 7.444902570394929e-08, "logits/chosen": -0.01300048828125, "logits/rejected": -0.3828125, "logps/chosen": -0.3671875, "logps/rejected": -2.8125, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.828125, "rewards/margins": 12.125, "rewards/rejected": -14.0, "step": 2370 }, { "epoch": 0.8830540037243948, "grad_norm": 0.1357421875, "learning_rate": 7.398365892959296e-08, "logits/chosen": -0.02197265625, "logits/rejected": 0.2578125, "logps/chosen": -0.5546875, "logps/rejected": -2.921875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.78125, "rewards/margins": 11.875, "rewards/rejected": -14.625, "step": 2371 }, { "epoch": 0.8834264432029795, "grad_norm": 0.0002803802490234375, "learning_rate": 7.351968882735467e-08, "logits/chosen": -0.1376953125, "logits/rejected": 0.431640625, "logps/chosen": -0.21875, "logps/rejected": -3.453125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.09375, "rewards/margins": 16.125, "rewards/rejected": -17.25, "step": 2372 }, { "epoch": 0.8837988826815643, "grad_norm": 0.005279541015625, "learning_rate": 7.305711618173962e-08, "logits/chosen": 0.08544921875, "logits/rejected": 0.81640625, "logps/chosen": -0.29296875, "logps/rejected": -2.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4609375, "rewards/margins": 12.625, "rewards/rejected": -14.125, "step": 2373 }, { "epoch": 0.8841713221601489, "grad_norm": 0.0002651214599609375, "learning_rate": 7.259594177489003e-08, "logits/chosen": 0.03759765625, "logits/rejected": 0.3828125, "logps/chosen": -0.142578125, "logps/rejected": -3.25, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7109375, "rewards/margins": 15.625, "rewards/rejected": -16.25, "step": 2374 }, { "epoch": 0.8845437616387337, "grad_norm": 0.000492095947265625, "learning_rate": 7.213616638658397e-08, "logits/chosen": 0.10888671875, "logits/rejected": 0.458984375, "logps/chosen": -0.26953125, "logps/rejected": -3.421875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 15.8125, "rewards/rejected": -17.125, "step": 2375 }, { "epoch": 0.8849162011173184, "grad_norm": 0.000377655029296875, "learning_rate": 7.167779079423404e-08, "logits/chosen": 0.2421875, "logits/rejected": 0.248046875, "logps/chosen": -0.171875, "logps/rejected": -3.390625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.859375, "rewards/margins": 16.125, "rewards/rejected": -17.0, "step": 2376 }, { "epoch": 0.8852886405959032, "grad_norm": 0.052734375, "learning_rate": 7.122081577288599e-08, "logits/chosen": 0.095703125, "logits/rejected": 0.0125732421875, "logps/chosen": -0.48828125, "logps/rejected": -3.234375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 13.75, "rewards/rejected": -16.25, "step": 2377 }, { "epoch": 0.8856610800744878, "grad_norm": 0.00173187255859375, "learning_rate": 7.076524209521724e-08, "logits/chosen": 0.173828125, "logits/rejected": 0.0185546875, "logps/chosen": -0.162109375, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8125, "rewards/margins": 16.625, "rewards/rejected": -17.5, "step": 2378 }, { "epoch": 0.8860335195530726, "grad_norm": 0.01068115234375, "learning_rate": 7.03110705315358e-08, "logits/chosen": -0.0025634765625, "logits/rejected": 0.4140625, "logps/chosen": -0.4765625, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 14.0, "rewards/rejected": -16.375, "step": 2379 }, { "epoch": 0.8864059590316573, "grad_norm": 9.012222290039062e-05, "learning_rate": 6.985830184977909e-08, "logits/chosen": 0.08984375, "logits/rejected": 0.357421875, "logps/chosen": -0.5859375, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9375, "rewards/margins": 17.0, "rewards/rejected": -19.875, "step": 2380 }, { "epoch": 0.8867783985102421, "grad_norm": 7.724761962890625e-05, "learning_rate": 6.940693681551261e-08, "logits/chosen": 0.12451171875, "logits/rejected": 0.5078125, "logps/chosen": -0.1396484375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.69921875, "rewards/margins": 17.5, "rewards/rejected": -18.25, "step": 2381 }, { "epoch": 0.8871508379888268, "grad_norm": 0.0201416015625, "learning_rate": 6.89569761919278e-08, "logits/chosen": -0.109375, "logits/rejected": 0.47265625, "logps/chosen": -0.7734375, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.875, "rewards/margins": 14.25, "rewards/rejected": -18.125, "step": 2382 }, { "epoch": 0.8875232774674116, "grad_norm": 0.470703125, "learning_rate": 6.850842073984193e-08, "logits/chosen": -0.01953125, "logits/rejected": 0.6484375, "logps/chosen": -0.12890625, "logps/rejected": -2.71875, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.640625, "rewards/margins": 12.9375, "rewards/rejected": -13.625, "step": 2383 }, { "epoch": 0.8878957169459962, "grad_norm": 0.0002689361572265625, "learning_rate": 6.806127121769654e-08, "logits/chosen": 0.06884765625, "logits/rejected": 0.58203125, "logps/chosen": -0.263671875, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125, "rewards/margins": 17.375, "rewards/rejected": -18.75, "step": 2384 }, { "epoch": 0.888268156424581, "grad_norm": 0.00029754638671875, "learning_rate": 6.761552838155542e-08, "logits/chosen": -0.039306640625, "logits/rejected": 0.404296875, "logps/chosen": -0.2041015625, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.015625, "rewards/margins": 15.9375, "rewards/rejected": -17.0, "step": 2385 }, { "epoch": 0.8886405959031657, "grad_norm": 121.0, "learning_rate": 6.71711929851045e-08, "logits/chosen": -0.0673828125, "logits/rejected": 0.82421875, "logps/chosen": -0.625, "logps/rejected": -1.453125, "loss": 0.3418, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.125, "rewards/margins": 4.15625, "rewards/rejected": -7.28125, "step": 2386 }, { "epoch": 0.8890130353817505, "grad_norm": 0.001953125, "learning_rate": 6.672826577964909e-08, "logits/chosen": 0.083984375, "logits/rejected": 0.2890625, "logps/chosen": -0.248046875, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2421875, "rewards/margins": 15.125, "rewards/rejected": -16.375, "step": 2387 }, { "epoch": 0.8893854748603351, "grad_norm": 2.0, "learning_rate": 6.628674751411411e-08, "logits/chosen": 0.0439453125, "logits/rejected": -0.44921875, "logps/chosen": -0.5546875, "logps/rejected": -2.875, "loss": 0.0026, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.765625, "rewards/margins": 11.625, "rewards/rejected": -14.375, "step": 2388 }, { "epoch": 0.8897579143389199, "grad_norm": 0.0111083984375, "learning_rate": 6.584663893504219e-08, "logits/chosen": 0.240234375, "logits/rejected": -0.0294189453125, "logps/chosen": -0.1259765625, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.62890625, "rewards/margins": 15.9375, "rewards/rejected": -16.5, "step": 2389 }, { "epoch": 0.8901303538175046, "grad_norm": 0.01611328125, "learning_rate": 6.540794078659216e-08, "logits/chosen": -0.0751953125, "logits/rejected": 0.384765625, "logps/chosen": -0.578125, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 13.25, "rewards/rejected": -16.125, "step": 2390 }, { "epoch": 0.8905027932960894, "grad_norm": 0.000148773193359375, "learning_rate": 6.497065381053788e-08, "logits/chosen": 0.09912109375, "logits/rejected": 0.51953125, "logps/chosen": -0.1142578125, "logps/rejected": -3.453125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5703125, "rewards/margins": 16.75, "rewards/rejected": -17.25, "step": 2391 }, { "epoch": 0.8908752327746741, "grad_norm": 0.345703125, "learning_rate": 6.453477874626728e-08, "logits/chosen": 0.134765625, "logits/rejected": 0.30078125, "logps/chosen": -0.59765625, "logps/rejected": -2.8125, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.984375, "rewards/margins": 11.125, "rewards/rejected": -14.125, "step": 2392 }, { "epoch": 0.8912476722532588, "grad_norm": 30.125, "learning_rate": 6.410031633078113e-08, "logits/chosen": -0.0196533203125, "logits/rejected": 1.0625, "logps/chosen": -0.396484375, "logps/rejected": -2.1875, "loss": 0.0581, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.984375, "rewards/margins": 8.9375, "rewards/rejected": -10.9375, "step": 2393 }, { "epoch": 0.8916201117318435, "grad_norm": 0.1669921875, "learning_rate": 6.36672672986915e-08, "logits/chosen": 0.2109375, "logits/rejected": -0.1884765625, "logps/chosen": -0.404296875, "logps/rejected": -3.125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 13.625, "rewards/rejected": -15.625, "step": 2394 }, { "epoch": 0.8919925512104283, "grad_norm": 0.029541015625, "learning_rate": 6.323563238222039e-08, "logits/chosen": -0.0216064453125, "logits/rejected": 0.65625, "logps/chosen": -0.439453125, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1875, "rewards/margins": 13.625, "rewards/rejected": -15.8125, "step": 2395 }, { "epoch": 0.892364990689013, "grad_norm": 1.9311904907226562e-05, "learning_rate": 6.280541231119906e-08, "logits/chosen": 0.216796875, "logits/rejected": 0.21484375, "logps/chosen": -0.244140625, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.21875, "rewards/margins": 18.625, "rewards/rejected": -19.75, "step": 2396 }, { "epoch": 0.8927374301675978, "grad_norm": 0.00051116943359375, "learning_rate": 6.237660781306634e-08, "logits/chosen": 0.032470703125, "logits/rejected": 0.3359375, "logps/chosen": -0.3984375, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9921875, "rewards/margins": 15.3125, "rewards/rejected": -17.25, "step": 2397 }, { "epoch": 0.8931098696461824, "grad_norm": 0.00104522705078125, "learning_rate": 6.194921961286756e-08, "logits/chosen": 0.173828125, "logits/rejected": 0.50390625, "logps/chosen": -0.42578125, "logps/rejected": -3.578125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 15.8125, "rewards/rejected": -18.0, "step": 2398 }, { "epoch": 0.8934823091247672, "grad_norm": 1.109375, "learning_rate": 6.152324843325324e-08, "logits/chosen": -0.0703125, "logits/rejected": 0.38671875, "logps/chosen": -0.546875, "logps/rejected": -2.859375, "loss": 0.0014, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 11.5625, "rewards/rejected": -14.25, "step": 2399 }, { "epoch": 0.8938547486033519, "grad_norm": 0.3984375, "learning_rate": 6.109869499447815e-08, "logits/chosen": 0.1982421875, "logits/rejected": -0.455078125, "logps/chosen": -0.318359375, "logps/rejected": -2.59375, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.59375, "rewards/margins": 11.4375, "rewards/rejected": -13.0, "step": 2400 }, { "epoch": 0.8942271880819367, "grad_norm": 0.000698089599609375, "learning_rate": 6.067556001439956e-08, "logits/chosen": 0.007659912109375, "logits/rejected": 0.306640625, "logps/chosen": -0.333984375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.671875, "rewards/margins": 16.75, "rewards/rejected": -18.25, "step": 2401 }, { "epoch": 0.8945996275605214, "grad_norm": 3.546875, "learning_rate": 6.025384420847647e-08, "logits/chosen": -0.07275390625, "logits/rejected": -0.93359375, "logps/chosen": -0.49609375, "logps/rejected": -1.765625, "loss": 0.0092, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.484375, "rewards/margins": 6.3125, "rewards/rejected": -8.8125, "step": 2402 }, { "epoch": 0.8949720670391061, "grad_norm": 0.000370025634765625, "learning_rate": 5.983354828976838e-08, "logits/chosen": -0.1845703125, "logits/rejected": 0.12451171875, "logps/chosen": -0.224609375, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.125, "rewards/margins": 16.25, "rewards/rejected": -17.375, "step": 2403 }, { "epoch": 0.8953445065176909, "grad_norm": 0.177734375, "learning_rate": 5.941467296893372e-08, "logits/chosen": -0.0673828125, "logits/rejected": 0.328125, "logps/chosen": -0.37109375, "logps/rejected": -2.84375, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8515625, "rewards/margins": 12.375, "rewards/rejected": -14.25, "step": 2404 }, { "epoch": 0.8957169459962756, "grad_norm": 0.66015625, "learning_rate": 5.8997218954229154e-08, "logits/chosen": 0.0908203125, "logits/rejected": 0.447265625, "logps/chosen": -0.66796875, "logps/rejected": -2.46875, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.34375, "rewards/margins": 9.0, "rewards/rejected": -12.3125, "step": 2405 }, { "epoch": 0.8960893854748604, "grad_norm": 3.814697265625e-05, "learning_rate": 5.858118695150786e-08, "logits/chosen": 0.2060546875, "logits/rejected": 0.37890625, "logps/chosen": -0.1064453125, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.53125, "rewards/margins": 18.125, "rewards/rejected": -18.625, "step": 2406 }, { "epoch": 0.8964618249534451, "grad_norm": 0.244140625, "learning_rate": 5.816657766421892e-08, "logits/chosen": -0.0247802734375, "logits/rejected": 0.61328125, "logps/chosen": -0.2236328125, "logps/rejected": -2.78125, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1171875, "rewards/margins": 12.75, "rewards/rejected": -13.875, "step": 2407 }, { "epoch": 0.8968342644320298, "grad_norm": 0.640625, "learning_rate": 5.775339179340547e-08, "logits/chosen": 0.01470947265625, "logits/rejected": 0.796875, "logps/chosen": -0.3046875, "logps/rejected": -2.53125, "loss": 0.0011, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.53125, "rewards/margins": 11.125, "rewards/rejected": -12.625, "step": 2408 }, { "epoch": 0.8972067039106145, "grad_norm": 0.0004138946533203125, "learning_rate": 5.7341630037704156e-08, "logits/chosen": -0.11962890625, "logits/rejected": 0.0771484375, "logps/chosen": -0.546875, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.734375, "rewards/margins": 15.5625, "rewards/rejected": -18.25, "step": 2409 }, { "epoch": 0.8975791433891993, "grad_norm": 8.58306884765625e-05, "learning_rate": 5.693129309334356e-08, "logits/chosen": 0.0294189453125, "logits/rejected": 0.3828125, "logps/chosen": -0.2578125, "logps/rejected": -3.796875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.28125, "rewards/margins": 17.75, "rewards/rejected": -19.0, "step": 2410 }, { "epoch": 0.897951582867784, "grad_norm": 0.0033416748046875, "learning_rate": 5.652238165414317e-08, "logits/chosen": 0.04150390625, "logits/rejected": 0.326171875, "logps/chosen": -0.73046875, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.640625, "rewards/margins": 14.875, "rewards/rejected": -18.5, "step": 2411 }, { "epoch": 0.8983240223463688, "grad_norm": 1.015625, "learning_rate": 5.611489641151182e-08, "logits/chosen": 0.10107421875, "logits/rejected": 0.6484375, "logps/chosen": -0.197265625, "logps/rejected": -1.96875, "loss": 0.0014, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.984375, "rewards/margins": 8.875, "rewards/rejected": -9.875, "step": 2412 }, { "epoch": 0.8986964618249534, "grad_norm": 0.00011873245239257812, "learning_rate": 5.5708838054447285e-08, "logits/chosen": 0.06884765625, "logits/rejected": 0.37890625, "logps/chosen": -0.2890625, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4453125, "rewards/margins": 17.25, "rewards/rejected": -18.75, "step": 2413 }, { "epoch": 0.8990689013035382, "grad_norm": 0.10302734375, "learning_rate": 5.5304207269534433e-08, "logits/chosen": -0.033203125, "logits/rejected": -0.3359375, "logps/chosen": -0.216796875, "logps/rejected": -2.6875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0859375, "rewards/margins": 12.375, "rewards/rejected": -13.5, "step": 2414 }, { "epoch": 0.8994413407821229, "grad_norm": 5.4836273193359375e-05, "learning_rate": 5.490100474094439e-08, "logits/chosen": 0.058837890625, "logits/rejected": 0.458984375, "logps/chosen": -0.1357421875, "logps/rejected": -3.765625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6796875, "rewards/margins": 18.125, "rewards/rejected": -18.75, "step": 2415 }, { "epoch": 0.8998137802607077, "grad_norm": 0.10302734375, "learning_rate": 5.449923115043351e-08, "logits/chosen": 0.09228515625, "logits/rejected": 0.357421875, "logps/chosen": -0.64453125, "logps/rejected": -3.21875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.21875, "rewards/margins": 12.9375, "rewards/rejected": -16.125, "step": 2416 }, { "epoch": 0.9001862197392924, "grad_norm": 1.5, "learning_rate": 5.4098887177341376e-08, "logits/chosen": -0.318359375, "logits/rejected": -0.5390625, "logps/chosen": -0.59375, "logps/rejected": -2.90625, "loss": 0.0026, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.96875, "rewards/margins": 11.5625, "rewards/rejected": -14.5, "step": 2417 }, { "epoch": 0.9005586592178771, "grad_norm": 5.125, "learning_rate": 5.369997349859121e-08, "logits/chosen": 0.0390625, "logits/rejected": -1.1953125, "logps/chosen": -0.70703125, "logps/rejected": -2.25, "loss": 0.0061, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.53125, "rewards/margins": 7.75, "rewards/rejected": -11.25, "step": 2418 }, { "epoch": 0.9009310986964618, "grad_norm": 0.0047607421875, "learning_rate": 5.330249078868736e-08, "logits/chosen": 0.1376953125, "logits/rejected": 0.244140625, "logps/chosen": -0.69140625, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.46875, "rewards/margins": 13.75, "rewards/rejected": -17.25, "step": 2419 }, { "epoch": 0.9013035381750466, "grad_norm": 1.234375, "learning_rate": 5.2906439719714315e-08, "logits/chosen": -0.044189453125, "logits/rejected": -0.5859375, "logps/chosen": -0.7265625, "logps/rejected": -2.75, "loss": 0.0017, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.625, "rewards/margins": 10.1875, "rewards/rejected": -13.8125, "step": 2420 }, { "epoch": 0.9016759776536313, "grad_norm": 0.00323486328125, "learning_rate": 5.2511820961336294e-08, "logits/chosen": 0.0191650390625, "logits/rejected": 0.42578125, "logps/chosen": -0.232421875, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.15625, "rewards/margins": 14.375, "rewards/rejected": -15.5, "step": 2421 }, { "epoch": 0.9020484171322161, "grad_norm": 6.90625, "learning_rate": 5.2118635180795667e-08, "logits/chosen": 0.12890625, "logits/rejected": 0.7890625, "logps/chosen": -0.7734375, "logps/rejected": -2.625, "loss": 0.0099, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.859375, "rewards/margins": 9.25, "rewards/rejected": -13.125, "step": 2422 }, { "epoch": 0.9024208566108007, "grad_norm": 2.703125, "learning_rate": 5.1726883042911526e-08, "logits/chosen": -0.2294921875, "logits/rejected": 0.22265625, "logps/chosen": -0.1884765625, "logps/rejected": -1.953125, "loss": 0.0059, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9453125, "rewards/margins": 8.875, "rewards/rejected": -9.8125, "step": 2423 }, { "epoch": 0.9027932960893855, "grad_norm": 0.0021209716796875, "learning_rate": 5.133656521007952e-08, "logits/chosen": -0.021484375, "logits/rejected": 0.38671875, "logps/chosen": -0.2001953125, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0, "rewards/margins": 14.25, "rewards/rejected": -15.25, "step": 2424 }, { "epoch": 0.9031657355679702, "grad_norm": 0.0751953125, "learning_rate": 5.094768234226912e-08, "logits/chosen": -0.033447265625, "logits/rejected": 0.11279296875, "logps/chosen": -0.20703125, "logps/rejected": -2.21875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.03125, "rewards/margins": 10.0, "rewards/rejected": -11.125, "step": 2425 }, { "epoch": 0.903538175046555, "grad_norm": 1.2695789337158203e-05, "learning_rate": 5.056023509702434e-08, "logits/chosen": 0.054931640625, "logits/rejected": 0.431640625, "logps/chosen": -0.1953125, "logps/rejected": -4.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.98046875, "rewards/margins": 19.25, "rewards/rejected": -20.25, "step": 2426 }, { "epoch": 0.9039106145251397, "grad_norm": 8.5625, "learning_rate": 5.017422412946155e-08, "logits/chosen": 0.03857421875, "logits/rejected": 0.62890625, "logps/chosen": -0.2236328125, "logps/rejected": -2.3125, "loss": 0.0171, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1171875, "rewards/margins": 10.5, "rewards/rejected": -11.5625, "step": 2427 }, { "epoch": 0.9042830540037244, "grad_norm": 9.953975677490234e-06, "learning_rate": 4.9789650092268555e-08, "logits/chosen": 0.07275390625, "logits/rejected": 0.330078125, "logps/chosen": -0.14453125, "logps/rejected": -4.03125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7265625, "rewards/margins": 19.5, "rewards/rejected": -20.125, "step": 2428 }, { "epoch": 0.9046554934823091, "grad_norm": 0.01806640625, "learning_rate": 4.94065136357032e-08, "logits/chosen": 0.06884765625, "logits/rejected": 0.0625, "logps/chosen": -0.408203125, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 13.25, "rewards/rejected": -15.25, "step": 2429 }, { "epoch": 0.9050279329608939, "grad_norm": 0.00093841552734375, "learning_rate": 4.902481540759297e-08, "logits/chosen": -0.1357421875, "logits/rejected": 0.47265625, "logps/chosen": -0.287109375, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4375, "rewards/margins": 15.3125, "rewards/rejected": -16.75, "step": 2430 }, { "epoch": 0.9054003724394786, "grad_norm": 0.0078125, "learning_rate": 4.864455605333361e-08, "logits/chosen": -0.130859375, "logits/rejected": 0.404296875, "logps/chosen": -0.279296875, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3984375, "rewards/margins": 15.4375, "rewards/rejected": -16.75, "step": 2431 }, { "epoch": 0.9057728119180634, "grad_norm": 3.24249267578125e-05, "learning_rate": 4.82657362158875e-08, "logits/chosen": 0.0203857421875, "logits/rejected": 0.35546875, "logps/chosen": -0.228515625, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.140625, "rewards/margins": 17.75, "rewards/rejected": -19.0, "step": 2432 }, { "epoch": 0.906145251396648, "grad_norm": 0.0014495849609375, "learning_rate": 4.788835653578357e-08, "logits/chosen": 0.2099609375, "logits/rejected": -0.072265625, "logps/chosen": -0.419921875, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.09375, "rewards/margins": 15.0, "rewards/rejected": -17.0, "step": 2433 }, { "epoch": 0.9065176908752328, "grad_norm": 0.01446533203125, "learning_rate": 4.75124176511149e-08, "logits/chosen": -0.021240234375, "logits/rejected": -0.259765625, "logps/chosen": -0.26953125, "logps/rejected": -2.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 13.25, "rewards/rejected": -14.5625, "step": 2434 }, { "epoch": 0.9068901303538175, "grad_norm": 0.010498046875, "learning_rate": 4.713792019753902e-08, "logits/chosen": -0.0120849609375, "logits/rejected": -0.2470703125, "logps/chosen": -0.0625, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3125, "rewards/margins": 14.5, "rewards/rejected": -14.875, "step": 2435 }, { "epoch": 0.9072625698324023, "grad_norm": 0.012939453125, "learning_rate": 4.67648648082764e-08, "logits/chosen": -0.07421875, "logits/rejected": 0.4765625, "logps/chosen": -0.66796875, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.34375, "rewards/margins": 13.6875, "rewards/rejected": -17.0, "step": 2436 }, { "epoch": 0.907635009310987, "grad_norm": 0.000377655029296875, "learning_rate": 4.639325211410836e-08, "logits/chosen": -0.003936767578125, "logits/rejected": 0.072265625, "logps/chosen": -0.3828125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9140625, "rewards/margins": 15.375, "rewards/rejected": -17.25, "step": 2437 }, { "epoch": 0.9080074487895717, "grad_norm": 0.07861328125, "learning_rate": 4.60230827433774e-08, "logits/chosen": 0.052978515625, "logits/rejected": 0.71875, "logps/chosen": -0.4765625, "logps/rejected": -3.46875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 14.875, "rewards/rejected": -17.25, "step": 2438 }, { "epoch": 0.9083798882681564, "grad_norm": 11.4375, "learning_rate": 4.5654357321985524e-08, "logits/chosen": 0.265625, "logits/rejected": 0.7578125, "logps/chosen": -0.19140625, "logps/rejected": -1.921875, "loss": 0.0405, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.953125, "rewards/margins": 8.625, "rewards/rejected": -9.625, "step": 2439 }, { "epoch": 0.9087523277467412, "grad_norm": 0.138671875, "learning_rate": 4.528707647339324e-08, "logits/chosen": 0.049072265625, "logits/rejected": 0.515625, "logps/chosen": -0.29296875, "logps/rejected": -2.53125, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.453125, "rewards/margins": 11.25, "rewards/rejected": -12.625, "step": 2440 }, { "epoch": 0.9091247672253259, "grad_norm": 0.0242919921875, "learning_rate": 4.4921240818618366e-08, "logits/chosen": 0.2470703125, "logits/rejected": 0.341796875, "logps/chosen": -0.87890625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.375, "rewards/margins": 13.5, "rewards/rejected": -17.875, "step": 2441 }, { "epoch": 0.9094972067039107, "grad_norm": 0.076171875, "learning_rate": 4.45568509762352e-08, "logits/chosen": 0.0869140625, "logits/rejected": -0.6953125, "logps/chosen": -0.36328125, "logps/rejected": -2.46875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.828125, "rewards/margins": 10.5, "rewards/rejected": -12.3125, "step": 2442 }, { "epoch": 0.9098696461824953, "grad_norm": 0.0001811981201171875, "learning_rate": 4.41939075623732e-08, "logits/chosen": 0.048095703125, "logits/rejected": 0.35546875, "logps/chosen": -0.1953125, "logps/rejected": -3.515625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9765625, "rewards/margins": 16.625, "rewards/rejected": -17.5, "step": 2443 }, { "epoch": 0.9102420856610801, "grad_norm": 0.64453125, "learning_rate": 4.3832411190716556e-08, "logits/chosen": 0.047119140625, "logits/rejected": 0.48046875, "logps/chosen": -0.20703125, "logps/rejected": -2.859375, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.03125, "rewards/margins": 13.25, "rewards/rejected": -14.25, "step": 2444 }, { "epoch": 0.9106145251396648, "grad_norm": 0.000942230224609375, "learning_rate": 4.347236247250223e-08, "logits/chosen": 0.158203125, "logits/rejected": 0.259765625, "logps/chosen": -0.39453125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.96875, "rewards/margins": 15.375, "rewards/rejected": -17.25, "step": 2445 }, { "epoch": 0.9109869646182496, "grad_norm": 0.005401611328125, "learning_rate": 4.311376201651975e-08, "logits/chosen": -0.17578125, "logits/rejected": 0.41796875, "logps/chosen": -0.7734375, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.84375, "rewards/margins": 13.0, "rewards/rejected": -16.75, "step": 2446 }, { "epoch": 0.9113594040968342, "grad_norm": 9.584426879882812e-05, "learning_rate": 4.275661042910979e-08, "logits/chosen": 0.023193359375, "logits/rejected": 0.369140625, "logps/chosen": -0.33984375, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6953125, "rewards/margins": 17.75, "rewards/rejected": -19.5, "step": 2447 }, { "epoch": 0.911731843575419, "grad_norm": 4.03125, "learning_rate": 4.240090831416297e-08, "logits/chosen": 0.142578125, "logits/rejected": -0.474609375, "logps/chosen": -0.38671875, "logps/rejected": -2.46875, "loss": 0.0069, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 10.5, "rewards/rejected": -12.375, "step": 2448 }, { "epoch": 0.9121042830540037, "grad_norm": 4.00543212890625e-05, "learning_rate": 4.2046656273119516e-08, "logits/chosen": 0.0712890625, "logits/rejected": 0.53125, "logps/chosen": -0.208984375, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 17.75, "rewards/rejected": -18.75, "step": 2449 }, { "epoch": 0.9124767225325885, "grad_norm": 0.0299072265625, "learning_rate": 4.169385490496717e-08, "logits/chosen": 0.06298828125, "logits/rejected": -0.1650390625, "logps/chosen": -0.15625, "logps/rejected": -2.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.78125, "rewards/margins": 13.25, "rewards/rejected": -14.0625, "step": 2450 }, { "epoch": 0.9128491620111732, "grad_norm": 0.00021648406982421875, "learning_rate": 4.1342504806241266e-08, "logits/chosen": 0.11865234375, "logits/rejected": 0.466796875, "logps/chosen": -0.421875, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.109375, "rewards/margins": 15.875, "rewards/rejected": -18.0, "step": 2451 }, { "epoch": 0.913221601489758, "grad_norm": 0.0024566650390625, "learning_rate": 4.0992606571023266e-08, "logits/chosen": 0.03173828125, "logits/rejected": -0.04443359375, "logps/chosen": -0.47265625, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 15.75, "rewards/rejected": -18.125, "step": 2452 }, { "epoch": 0.9135940409683426, "grad_norm": 5.507469177246094e-05, "learning_rate": 4.064416079093954e-08, "logits/chosen": -0.053955078125, "logits/rejected": 0.283203125, "logps/chosen": -0.294921875, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4765625, "rewards/margins": 17.375, "rewards/rejected": -18.75, "step": 2453 }, { "epoch": 0.9139664804469274, "grad_norm": 0.0003032684326171875, "learning_rate": 4.029716805516059e-08, "logits/chosen": 0.1171875, "logits/rejected": 0.59375, "logps/chosen": -0.357421875, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.78125, "rewards/margins": 15.875, "rewards/rejected": -17.75, "step": 2454 }, { "epoch": 0.9143389199255121, "grad_norm": 0.494140625, "learning_rate": 3.9951628950400097e-08, "logits/chosen": 0.037109375, "logits/rejected": 0.2001953125, "logps/chosen": -0.375, "logps/rejected": -2.3125, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.875, "rewards/margins": 9.6875, "rewards/rejected": -11.5625, "step": 2455 }, { "epoch": 0.9147113594040969, "grad_norm": 2.5625, "learning_rate": 3.96075440609139e-08, "logits/chosen": -0.0302734375, "logits/rejected": 0.5703125, "logps/chosen": -0.59375, "logps/rejected": -2.828125, "loss": 0.0041, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.96875, "rewards/margins": 11.1875, "rewards/rejected": -14.125, "step": 2456 }, { "epoch": 0.9150837988826815, "grad_norm": 0.000255584716796875, "learning_rate": 3.926491396849891e-08, "logits/chosen": -0.1396484375, "logits/rejected": 0.189453125, "logps/chosen": -0.32421875, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6171875, "rewards/margins": 16.5, "rewards/rejected": -18.125, "step": 2457 }, { "epoch": 0.9154562383612663, "grad_norm": 0.376953125, "learning_rate": 3.8923739252492314e-08, "logits/chosen": 0.1259765625, "logits/rejected": -0.6015625, "logps/chosen": -0.2470703125, "logps/rejected": -2.96875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.234375, "rewards/margins": 13.625, "rewards/rejected": -14.8125, "step": 2458 }, { "epoch": 0.915828677839851, "grad_norm": 184.0, "learning_rate": 3.858402048977015e-08, "logits/chosen": -0.0167236328125, "logits/rejected": 0.31640625, "logps/chosen": -0.9609375, "logps/rejected": -2.9375, "loss": 0.1621, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.8125, "rewards/margins": 9.8125, "rewards/rejected": -14.625, "step": 2459 }, { "epoch": 0.9162011173184358, "grad_norm": 5.9604644775390625e-05, "learning_rate": 3.824575825474704e-08, "logits/chosen": -0.06884765625, "logits/rejected": 0.2578125, "logps/chosen": -0.29296875, "logps/rejected": -3.859375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.46875, "rewards/margins": 17.875, "rewards/rejected": -19.25, "step": 2460 }, { "epoch": 0.9165735567970205, "grad_norm": 1.329183578491211e-05, "learning_rate": 3.7908953119374564e-08, "logits/chosen": -0.007232666015625, "logits/rejected": 0.3828125, "logps/chosen": -0.1826171875, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9140625, "rewards/margins": 19.5, "rewards/rejected": -20.375, "step": 2461 }, { "epoch": 0.9169459962756052, "grad_norm": 33.5, "learning_rate": 3.7573605653140686e-08, "logits/chosen": -0.064453125, "logits/rejected": 0.2431640625, "logps/chosen": -0.26953125, "logps/rejected": -2.03125, "loss": 0.0859, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.359375, "rewards/margins": 8.75, "rewards/rejected": -10.125, "step": 2462 }, { "epoch": 0.9173184357541899, "grad_norm": 0.0003223419189453125, "learning_rate": 3.723971642306882e-08, "logits/chosen": 0.2333984375, "logits/rejected": -0.0250244140625, "logps/chosen": -0.81640625, "logps/rejected": -3.921875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.09375, "rewards/margins": 15.5625, "rewards/rejected": -19.625, "step": 2463 }, { "epoch": 0.9176908752327747, "grad_norm": 0.00177764892578125, "learning_rate": 3.690728599371609e-08, "logits/chosen": -0.0047607421875, "logits/rejected": 0.25390625, "logps/chosen": -0.16796875, "logps/rejected": -2.953125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.83984375, "rewards/margins": 13.875, "rewards/rejected": -14.75, "step": 2464 }, { "epoch": 0.9180633147113594, "grad_norm": 0.314453125, "learning_rate": 3.657631492717385e-08, "logits/chosen": 0.1357421875, "logits/rejected": -0.203125, "logps/chosen": -0.35546875, "logps/rejected": -2.125, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7734375, "rewards/margins": 8.8125, "rewards/rejected": -10.625, "step": 2465 }, { "epoch": 0.9184357541899442, "grad_norm": 0.00112152099609375, "learning_rate": 3.624680378306545e-08, "logits/chosen": 0.1357421875, "logits/rejected": 0.1240234375, "logps/chosen": -0.349609375, "logps/rejected": -3.484375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.75, "rewards/margins": 15.6875, "rewards/rejected": -17.375, "step": 2466 }, { "epoch": 0.9188081936685288, "grad_norm": 0.04833984375, "learning_rate": 3.591875311854562e-08, "logits/chosen": -0.0048828125, "logits/rejected": -0.134765625, "logps/chosen": -0.14453125, "logps/rejected": -2.921875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.72265625, "rewards/margins": 13.875, "rewards/rejected": -14.625, "step": 2467 }, { "epoch": 0.9191806331471136, "grad_norm": 0.00049591064453125, "learning_rate": 3.559216348829975e-08, "logits/chosen": 0.16015625, "logits/rejected": 0.291015625, "logps/chosen": -0.1328125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6640625, "rewards/margins": 16.625, "rewards/rejected": -17.25, "step": 2468 }, { "epoch": 0.9195530726256983, "grad_norm": 0.66015625, "learning_rate": 3.526703544454302e-08, "logits/chosen": 0.185546875, "logits/rejected": 0.1416015625, "logps/chosen": -1.5234375, "logps/rejected": -3.828125, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.625, "rewards/margins": 11.5, "rewards/rejected": -19.125, "step": 2469 }, { "epoch": 0.9199255121042831, "grad_norm": 0.0003070831298828125, "learning_rate": 3.494336953701899e-08, "logits/chosen": -0.0595703125, "logits/rejected": 0.1162109375, "logps/chosen": -0.40234375, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0, "rewards/margins": 15.5, "rewards/rejected": -17.5, "step": 2470 }, { "epoch": 0.9202979515828678, "grad_norm": 0.0069580078125, "learning_rate": 3.4621166312999264e-08, "logits/chosen": 0.0771484375, "logits/rejected": 0.1845703125, "logps/chosen": -1.03125, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.15625, "rewards/margins": 12.5, "rewards/rejected": -17.75, "step": 2471 }, { "epoch": 0.9206703910614525, "grad_norm": 1.609375, "learning_rate": 3.430042631728186e-08, "logits/chosen": 0.0031890869140625, "logits/rejected": 0.51953125, "logps/chosen": -0.2099609375, "logps/rejected": -2.421875, "loss": 0.0021, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 11.0625, "rewards/rejected": -12.125, "step": 2472 }, { "epoch": 0.9210428305400372, "grad_norm": 8.678436279296875e-05, "learning_rate": 3.398115009219087e-08, "logits/chosen": 0.01318359375, "logits/rejected": 0.4609375, "logps/chosen": -0.1708984375, "logps/rejected": -3.703125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8515625, "rewards/margins": 17.625, "rewards/rejected": -18.5, "step": 2473 }, { "epoch": 0.921415270018622, "grad_norm": 0.0208740234375, "learning_rate": 3.366333817757575e-08, "logits/chosen": -0.185546875, "logits/rejected": 0.04541015625, "logps/chosen": -0.5703125, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 14.0, "rewards/rejected": -16.875, "step": 2474 }, { "epoch": 0.9217877094972067, "grad_norm": 0.00244140625, "learning_rate": 3.334699111080975e-08, "logits/chosen": 0.08203125, "logits/rejected": 0.41015625, "logps/chosen": -0.1845703125, "logps/rejected": -3.140625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.921875, "rewards/margins": 14.8125, "rewards/rejected": -15.75, "step": 2475 }, { "epoch": 0.9221601489757915, "grad_norm": 0.01190185546875, "learning_rate": 3.30321094267888e-08, "logits/chosen": 0.11865234375, "logits/rejected": 0.3359375, "logps/chosen": -0.4921875, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 15.625, "rewards/rejected": -18.0, "step": 2476 }, { "epoch": 0.9225325884543761, "grad_norm": 0.0003452301025390625, "learning_rate": 3.2718693657931705e-08, "logits/chosen": 0.1201171875, "logits/rejected": 0.408203125, "logps/chosen": -0.3203125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6015625, "rewards/margins": 16.75, "rewards/rejected": -18.5, "step": 2477 }, { "epoch": 0.9229050279329609, "grad_norm": 0.32421875, "learning_rate": 3.240674433417846e-08, "logits/chosen": 0.036865234375, "logits/rejected": -0.15234375, "logps/chosen": -0.30859375, "logps/rejected": -3.21875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.546875, "rewards/margins": 14.5625, "rewards/rejected": -16.125, "step": 2478 }, { "epoch": 0.9232774674115456, "grad_norm": 0.0002956390380859375, "learning_rate": 3.2096261982989536e-08, "logits/chosen": 0.0595703125, "logits/rejected": 0.1669921875, "logps/chosen": -0.4453125, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 15.5, "rewards/rejected": -17.75, "step": 2479 }, { "epoch": 0.9236499068901304, "grad_norm": 0.0001201629638671875, "learning_rate": 3.17872471293448e-08, "logits/chosen": -0.095703125, "logits/rejected": 0.11767578125, "logps/chosen": -0.302734375, "logps/rejected": -3.578125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 16.5, "rewards/rejected": -18.0, "step": 2480 }, { "epoch": 0.924022346368715, "grad_norm": 2.7298927307128906e-05, "learning_rate": 3.147970029574288e-08, "logits/chosen": 0.1376953125, "logits/rejected": 0.40625, "logps/chosen": -0.28125, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.40625, "rewards/margins": 18.5, "rewards/rejected": -19.875, "step": 2481 }, { "epoch": 0.9243947858472998, "grad_norm": 3.4332275390625e-05, "learning_rate": 3.117362200220033e-08, "logits/chosen": 0.08935546875, "logits/rejected": 0.490234375, "logps/chosen": -0.38671875, "logps/rejected": -4.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9296875, "rewards/margins": 18.625, "rewards/rejected": -20.5, "step": 2482 }, { "epoch": 0.9247672253258845, "grad_norm": 0.0026092529296875, "learning_rate": 3.086901276625045e-08, "logits/chosen": 0.1767578125, "logits/rejected": 0.419921875, "logps/chosen": -0.828125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.125, "rewards/margins": 14.3125, "rewards/rejected": -18.5, "step": 2483 }, { "epoch": 0.9251396648044693, "grad_norm": 1.823902130126953e-05, "learning_rate": 3.056587310294274e-08, "logits/chosen": -0.046630859375, "logits/rejected": 0.53515625, "logps/chosen": -0.267578125, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34375, "rewards/margins": 19.0, "rewards/rejected": -20.25, "step": 2484 }, { "epoch": 0.925512104283054, "grad_norm": 0.0096435546875, "learning_rate": 3.02642035248416e-08, "logits/chosen": 0.0859375, "logits/rejected": -0.0016326904296875, "logps/chosen": -0.326171875, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6328125, "rewards/margins": 14.8125, "rewards/rejected": -16.5, "step": 2485 }, { "epoch": 0.9258845437616388, "grad_norm": 0.0040283203125, "learning_rate": 2.996400454202611e-08, "logits/chosen": 0.181640625, "logits/rejected": 0.55859375, "logps/chosen": -0.37109375, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.859375, "rewards/margins": 15.3125, "rewards/rejected": -17.125, "step": 2486 }, { "epoch": 0.9262569832402234, "grad_norm": 0.0016937255859375, "learning_rate": 2.9665276662088435e-08, "logits/chosen": 0.0771484375, "logits/rejected": 0.3359375, "logps/chosen": -0.2197265625, "logps/rejected": -3.078125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015625, "rewards/margins": 14.3125, "rewards/rejected": -15.375, "step": 2487 }, { "epoch": 0.9266294227188082, "grad_norm": 0.2412109375, "learning_rate": 2.936802039013373e-08, "logits/chosen": -0.025390625, "logits/rejected": 0.34375, "logps/chosen": -1.140625, "logps/rejected": -3.875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.71875, "rewards/margins": 13.625, "rewards/rejected": -19.375, "step": 2488 }, { "epoch": 0.9270018621973929, "grad_norm": 0.00970458984375, "learning_rate": 2.9072236228778245e-08, "logits/chosen": -0.0167236328125, "logits/rejected": 0.050048828125, "logps/chosen": -0.2470703125, "logps/rejected": -2.953125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.234375, "rewards/margins": 13.5, "rewards/rejected": -14.75, "step": 2489 }, { "epoch": 0.9273743016759777, "grad_norm": 1.6808509826660156e-05, "learning_rate": 2.8777924678149722e-08, "logits/chosen": 0.2041015625, "logits/rejected": 0.328125, "logps/chosen": -0.41796875, "logps/rejected": -4.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.09375, "rewards/margins": 19.0, "rewards/rejected": -21.0, "step": 2490 }, { "epoch": 0.9277467411545623, "grad_norm": 0.01318359375, "learning_rate": 2.848508623588569e-08, "logits/chosen": -0.15234375, "logits/rejected": 0.06396484375, "logps/chosen": -0.388671875, "logps/rejected": -2.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 11.875, "rewards/rejected": -13.75, "step": 2491 }, { "epoch": 0.9281191806331471, "grad_norm": 2.9087066650390625e-05, "learning_rate": 2.8193721397132775e-08, "logits/chosen": 0.0703125, "logits/rejected": 0.3359375, "logps/chosen": -0.1171875, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5859375, "rewards/margins": 17.75, "rewards/rejected": -18.375, "step": 2492 }, { "epoch": 0.9284916201117318, "grad_norm": 84.0, "learning_rate": 2.790383065454609e-08, "logits/chosen": 0.0091552734375, "logits/rejected": 0.55859375, "logps/chosen": -1.15625, "logps/rejected": -3.21875, "loss": 0.1113, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.78125, "rewards/margins": 10.375, "rewards/rejected": -16.125, "step": 2493 }, { "epoch": 0.9288640595903166, "grad_norm": 1.90625, "learning_rate": 2.7615414498288247e-08, "logits/chosen": -0.240234375, "logits/rejected": -0.458984375, "logps/chosen": -1.1640625, "logps/rejected": -2.65625, "loss": 0.0031, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.8125, "rewards/margins": 7.5, "rewards/rejected": -13.375, "step": 2494 }, { "epoch": 0.9292364990689013, "grad_norm": 8.916854858398438e-05, "learning_rate": 2.7328473416028554e-08, "logits/chosen": 0.1494140625, "logits/rejected": 0.5, "logps/chosen": -0.201171875, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0, "rewards/margins": 16.75, "rewards/rejected": -17.75, "step": 2495 }, { "epoch": 0.929608938547486, "grad_norm": 0.00518798828125, "learning_rate": 2.7043007892942303e-08, "logits/chosen": -0.08056640625, "logits/rejected": 0.6484375, "logps/chosen": -0.59375, "logps/rejected": -3.21875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.96875, "rewards/margins": 13.125, "rewards/rejected": -16.125, "step": 2496 }, { "epoch": 0.9299813780260707, "grad_norm": 0.10205078125, "learning_rate": 2.67590184117095e-08, "logits/chosen": -0.24609375, "logits/rejected": -0.9140625, "logps/chosen": -0.42578125, "logps/rejected": -2.40625, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.125, "rewards/margins": 9.875, "rewards/rejected": -12.0, "step": 2497 }, { "epoch": 0.9303538175046555, "grad_norm": 0.01220703125, "learning_rate": 2.647650545251453e-08, "logits/chosen": -0.17578125, "logits/rejected": 0.1982421875, "logps/chosen": -0.5546875, "logps/rejected": -3.484375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.765625, "rewards/margins": 14.625, "rewards/rejected": -17.5, "step": 2498 }, { "epoch": 0.9307262569832402, "grad_norm": 0.03662109375, "learning_rate": 2.6195469493045386e-08, "logits/chosen": 0.08349609375, "logits/rejected": 0.57421875, "logps/chosen": -0.51171875, "logps/rejected": -2.71875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5625, "rewards/margins": 11.0, "rewards/rejected": -13.5, "step": 2499 }, { "epoch": 0.931098696461825, "grad_norm": 0.01446533203125, "learning_rate": 2.5915911008492554e-08, "logits/chosen": 0.046630859375, "logits/rejected": -0.56640625, "logps/chosen": -0.3671875, "logps/rejected": -3.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8359375, "rewards/margins": 13.875, "rewards/rejected": -15.625, "step": 2500 }, { "epoch": 0.9314711359404096, "grad_norm": 0.26171875, "learning_rate": 2.5637830471548218e-08, "logits/chosen": 0.1689453125, "logits/rejected": -0.061279296875, "logps/chosen": -0.7109375, "logps/rejected": -3.28125, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5625, "rewards/margins": 12.8125, "rewards/rejected": -16.375, "step": 2501 }, { "epoch": 0.9318435754189944, "grad_norm": 0.059814453125, "learning_rate": 2.536122835240566e-08, "logits/chosen": 0.1875, "logits/rejected": -0.287109375, "logps/chosen": -0.177734375, "logps/rejected": -2.28125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.88671875, "rewards/margins": 10.5, "rewards/rejected": -11.375, "step": 2502 }, { "epoch": 0.9322160148975791, "grad_norm": 0.9765625, "learning_rate": 2.508610511875847e-08, "logits/chosen": 0.11962890625, "logits/rejected": -0.1953125, "logps/chosen": -0.5234375, "logps/rejected": -2.390625, "loss": 0.0017, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.59375, "rewards/margins": 9.375, "rewards/rejected": -12.0, "step": 2503 }, { "epoch": 0.9325884543761639, "grad_norm": 225.0, "learning_rate": 2.4812461235799532e-08, "logits/chosen": -0.18359375, "logits/rejected": 0.3515625, "logps/chosen": -1.484375, "logps/rejected": -3.3125, "loss": 0.3926, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.4375, "rewards/margins": 9.1875, "rewards/rejected": -16.625, "step": 2504 }, { "epoch": 0.9329608938547486, "grad_norm": 0.0264892578125, "learning_rate": 2.4540297166220424e-08, "logits/chosen": 0.0947265625, "logits/rejected": 0.37890625, "logps/chosen": -0.369140625, "logps/rejected": -2.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.84375, "rewards/margins": 11.25, "rewards/rejected": -13.125, "step": 2505 }, { "epoch": 0.9333333333333333, "grad_norm": 0.005279541015625, "learning_rate": 2.426961337021034e-08, "logits/chosen": -0.0245361328125, "logits/rejected": 0.357421875, "logps/chosen": -0.30859375, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.546875, "rewards/margins": 16.125, "rewards/rejected": -17.75, "step": 2506 }, { "epoch": 0.933705772811918, "grad_norm": 0.0012969970703125, "learning_rate": 2.4000410305455964e-08, "logits/chosen": 0.1015625, "logits/rejected": 0.3359375, "logps/chosen": -0.31640625, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.578125, "rewards/margins": 14.875, "rewards/rejected": -16.375, "step": 2507 }, { "epoch": 0.9340782122905028, "grad_norm": 0.000301361083984375, "learning_rate": 2.373268842713999e-08, "logits/chosen": -0.045654296875, "logits/rejected": 0.4140625, "logps/chosen": -0.62109375, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.109375, "rewards/margins": 15.8125, "rewards/rejected": -19.0, "step": 2508 }, { "epoch": 0.9344506517690875, "grad_norm": 0.0242919921875, "learning_rate": 2.3466448187940812e-08, "logits/chosen": 0.1533203125, "logits/rejected": 0.421875, "logps/chosen": -0.78125, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.90625, "rewards/margins": 13.25, "rewards/rejected": -17.25, "step": 2509 }, { "epoch": 0.9348230912476723, "grad_norm": 0.1279296875, "learning_rate": 2.320169003803143e-08, "logits/chosen": 0.08935546875, "logits/rejected": -0.94140625, "logps/chosen": -0.451171875, "logps/rejected": -2.359375, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 9.5625, "rewards/rejected": -11.8125, "step": 2510 }, { "epoch": 0.9351955307262569, "grad_norm": 0.357421875, "learning_rate": 2.2938414425078845e-08, "logits/chosen": -0.11767578125, "logits/rejected": 0.6640625, "logps/chosen": -0.30078125, "logps/rejected": -2.84375, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5, "rewards/margins": 12.6875, "rewards/rejected": -14.1875, "step": 2511 }, { "epoch": 0.9355679702048417, "grad_norm": 0.0003337860107421875, "learning_rate": 2.2676621794243667e-08, "logits/chosen": 0.09228515625, "logits/rejected": 0.365234375, "logps/chosen": -0.44140625, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.203125, "rewards/margins": 16.0, "rewards/rejected": -18.25, "step": 2512 }, { "epoch": 0.9359404096834264, "grad_norm": 2.9325485229492188e-05, "learning_rate": 2.241631258817851e-08, "logits/chosen": 0.0186767578125, "logits/rejected": 0.2373046875, "logps/chosen": -0.1943359375, "logps/rejected": -3.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.97265625, "rewards/margins": 18.625, "rewards/rejected": -19.625, "step": 2513 }, { "epoch": 0.9363128491620112, "grad_norm": 0.00074005126953125, "learning_rate": 2.2157487247027995e-08, "logits/chosen": 0.1455078125, "logits/rejected": 0.392578125, "logps/chosen": -0.259765625, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.296875, "rewards/margins": 16.375, "rewards/rejected": -17.625, "step": 2514 }, { "epoch": 0.9366852886405959, "grad_norm": 1.1953125, "learning_rate": 2.190014620842765e-08, "logits/chosen": 0.138671875, "logits/rejected": 0.19921875, "logps/chosen": -0.32421875, "logps/rejected": -1.859375, "loss": 0.002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 7.6875, "rewards/rejected": -9.25, "step": 2515 }, { "epoch": 0.9370577281191806, "grad_norm": 1.078125, "learning_rate": 2.164428990750341e-08, "logits/chosen": 0.001190185546875, "logits/rejected": 0.447265625, "logps/chosen": -0.35546875, "logps/rejected": -2.921875, "loss": 0.0018, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7734375, "rewards/margins": 12.875, "rewards/rejected": -14.625, "step": 2516 }, { "epoch": 0.9374301675977653, "grad_norm": 2.47955322265625e-05, "learning_rate": 2.138991877687052e-08, "logits/chosen": 0.10595703125, "logits/rejected": 0.310546875, "logps/chosen": -0.2451171875, "logps/rejected": -3.9375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2265625, "rewards/margins": 18.5, "rewards/rejected": -19.75, "step": 2517 }, { "epoch": 0.9378026070763501, "grad_norm": 0.0001239776611328125, "learning_rate": 2.113703324663333e-08, "logits/chosen": 0.078125, "logits/rejected": 0.263671875, "logps/chosen": -0.4609375, "logps/rejected": -3.890625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3125, "rewards/margins": 17.125, "rewards/rejected": -19.5, "step": 2518 }, { "epoch": 0.9381750465549348, "grad_norm": 0.66015625, "learning_rate": 2.0885633744383802e-08, "logits/chosen": 0.12451171875, "logits/rejected": 0.65625, "logps/chosen": -0.50390625, "logps/rejected": -2.65625, "loss": 0.0011, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.515625, "rewards/margins": 10.75, "rewards/rejected": -13.25, "step": 2519 }, { "epoch": 0.9385474860335196, "grad_norm": 0.000396728515625, "learning_rate": 2.063572069520171e-08, "logits/chosen": 0.052734375, "logits/rejected": 0.2099609375, "logps/chosen": -0.4765625, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 16.25, "rewards/rejected": -18.5, "step": 2520 }, { "epoch": 0.9389199255121042, "grad_norm": 0.0021514892578125, "learning_rate": 2.038729452165323e-08, "logits/chosen": 0.21875, "logits/rejected": 0.0263671875, "logps/chosen": -0.3125, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5625, "rewards/margins": 13.875, "rewards/rejected": -15.4375, "step": 2521 }, { "epoch": 0.939292364990689, "grad_norm": 0.00110626220703125, "learning_rate": 2.0140355643790463e-08, "logits/chosen": 0.11962890625, "logits/rejected": 0.08154296875, "logps/chosen": -0.53125, "logps/rejected": -3.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.65625, "rewards/margins": 16.75, "rewards/rejected": -19.375, "step": 2522 }, { "epoch": 0.9396648044692737, "grad_norm": 8.7738037109375e-05, "learning_rate": 1.989490447915071e-08, "logits/chosen": -0.00970458984375, "logits/rejected": 0.326171875, "logps/chosen": -0.0830078125, "logps/rejected": -3.40625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4140625, "rewards/margins": 16.625, "rewards/rejected": -17.0, "step": 2523 }, { "epoch": 0.9400372439478585, "grad_norm": 0.005126953125, "learning_rate": 1.9650941442755685e-08, "logits/chosen": -0.1796875, "logits/rejected": 0.1943359375, "logps/chosen": -0.703125, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5, "rewards/margins": 12.9375, "rewards/rejected": -16.5, "step": 2524 }, { "epoch": 0.9404096834264432, "grad_norm": 0.1689453125, "learning_rate": 1.9408466947111224e-08, "logits/chosen": 0.080078125, "logits/rejected": 0.51171875, "logps/chosen": -0.380859375, "logps/rejected": -3.1875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.90625, "rewards/margins": 14.0625, "rewards/rejected": -16.0, "step": 2525 }, { "epoch": 0.9407821229050279, "grad_norm": 0.51171875, "learning_rate": 1.9167481402205866e-08, "logits/chosen": -0.2216796875, "logits/rejected": 0.53515625, "logps/chosen": -0.27734375, "logps/rejected": -2.546875, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3828125, "rewards/margins": 11.375, "rewards/rejected": -12.75, "step": 2526 }, { "epoch": 0.9411545623836126, "grad_norm": 0.0002651214599609375, "learning_rate": 1.8927985215510766e-08, "logits/chosen": -0.11572265625, "logits/rejected": 0.40625, "logps/chosen": -0.2734375, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.359375, "rewards/margins": 18.125, "rewards/rejected": -19.5, "step": 2527 }, { "epoch": 0.9415270018621974, "grad_norm": 0.0002307891845703125, "learning_rate": 1.86899787919786e-08, "logits/chosen": 0.16796875, "logits/rejected": 0.396484375, "logps/chosen": -0.255859375, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.28125, "rewards/margins": 15.8125, "rewards/rejected": -17.125, "step": 2528 }, { "epoch": 0.9418994413407821, "grad_norm": 0.0019073486328125, "learning_rate": 1.8453462534043452e-08, "logits/chosen": 0.2314453125, "logits/rejected": 0.2197265625, "logps/chosen": -0.25390625, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 13.75, "rewards/rejected": -15.0, "step": 2529 }, { "epoch": 0.9422718808193669, "grad_norm": 3.504753112792969e-05, "learning_rate": 1.8218436841619422e-08, "logits/chosen": 0.126953125, "logits/rejected": 0.30078125, "logps/chosen": -0.3671875, "logps/rejected": -4.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.84375, "rewards/margins": 18.875, "rewards/rejected": -20.75, "step": 2530 }, { "epoch": 0.9426443202979515, "grad_norm": 0.423828125, "learning_rate": 1.7984902112100232e-08, "logits/chosen": 0.1328125, "logits/rejected": -0.390625, "logps/chosen": -0.6796875, "logps/rejected": -2.859375, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.40625, "rewards/margins": 10.875, "rewards/rejected": -14.25, "step": 2531 }, { "epoch": 0.9430167597765363, "grad_norm": 64.0, "learning_rate": 1.7752858740359012e-08, "logits/chosen": 0.0751953125, "logits/rejected": 0.5234375, "logps/chosen": -0.578125, "logps/rejected": -2.09375, "loss": 0.1006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 7.625, "rewards/rejected": -10.5, "step": 2532 }, { "epoch": 0.943389199255121, "grad_norm": 0.061279296875, "learning_rate": 1.7522307118746814e-08, "logits/chosen": -0.0478515625, "logits/rejected": -0.01483154296875, "logps/chosen": -0.19140625, "logps/rejected": -3.0625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.953125, "rewards/margins": 14.375, "rewards/rejected": -15.25, "step": 2533 }, { "epoch": 0.9437616387337058, "grad_norm": 6.67572021484375e-05, "learning_rate": 1.7293247637092613e-08, "logits/chosen": 0.04833984375, "logits/rejected": 0.416015625, "logps/chosen": -0.34375, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.71875, "rewards/margins": 17.0, "rewards/rejected": -18.75, "step": 2534 }, { "epoch": 0.9441340782122905, "grad_norm": 0.006439208984375, "learning_rate": 1.706568068270219e-08, "logits/chosen": 0.0228271484375, "logits/rejected": 0.65234375, "logps/chosen": -0.2470703125, "logps/rejected": -3.296875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.234375, "rewards/margins": 15.25, "rewards/rejected": -16.5, "step": 2535 }, { "epoch": 0.9445065176908752, "grad_norm": 0.0025634765625, "learning_rate": 1.6839606640357852e-08, "logits/chosen": -0.103515625, "logits/rejected": 0.287109375, "logps/chosen": -0.41015625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.046875, "rewards/margins": 15.75, "rewards/rejected": -17.75, "step": 2536 }, { "epoch": 0.9448789571694599, "grad_norm": 0.061767578125, "learning_rate": 1.6615025892317425e-08, "logits/chosen": -0.11669921875, "logits/rejected": -0.494140625, "logps/chosen": -0.34765625, "logps/rejected": -3.1875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.734375, "rewards/margins": 14.25, "rewards/rejected": -16.0, "step": 2537 }, { "epoch": 0.9452513966480447, "grad_norm": 0.010986328125, "learning_rate": 1.6391938818314155e-08, "logits/chosen": -0.0986328125, "logits/rejected": 0.1689453125, "logps/chosen": -0.87890625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.375, "rewards/margins": 13.375, "rewards/rejected": -17.75, "step": 2538 }, { "epoch": 0.9456238361266294, "grad_norm": 2.234375, "learning_rate": 1.6170345795555207e-08, "logits/chosen": 0.055908203125, "logits/rejected": -0.11865234375, "logps/chosen": -0.3984375, "logps/rejected": -2.375, "loss": 0.0041, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9921875, "rewards/margins": 9.9375, "rewards/rejected": -11.875, "step": 2539 }, { "epoch": 0.9459962756052142, "grad_norm": 0.2216796875, "learning_rate": 1.5950247198721966e-08, "logits/chosen": -0.048095703125, "logits/rejected": 0.4609375, "logps/chosen": -1.0625, "logps/rejected": -3.71875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.3125, "rewards/margins": 13.25, "rewards/rejected": -18.5, "step": 2540 }, { "epoch": 0.9463687150837988, "grad_norm": 0.2490234375, "learning_rate": 1.573164339996864e-08, "logits/chosen": -0.00579833984375, "logits/rejected": -0.1962890625, "logps/chosen": -0.39453125, "logps/rejected": -2.171875, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.96875, "rewards/margins": 8.875, "rewards/rejected": -10.875, "step": 2541 }, { "epoch": 0.9467411545623836, "grad_norm": 0.00017261505126953125, "learning_rate": 1.551453476892226e-08, "logits/chosen": -0.007476806640625, "logits/rejected": 0.07763671875, "logps/chosen": -0.341796875, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7109375, "rewards/margins": 16.5, "rewards/rejected": -18.25, "step": 2542 }, { "epoch": 0.9471135940409683, "grad_norm": 0.0257568359375, "learning_rate": 1.5298921672681474e-08, "logits/chosen": 0.0031890869140625, "logits/rejected": -0.482421875, "logps/chosen": -0.34765625, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.734375, "rewards/margins": 13.625, "rewards/rejected": -15.375, "step": 2543 }, { "epoch": 0.9474860335195531, "grad_norm": 0.034423828125, "learning_rate": 1.508480447581636e-08, "logits/chosen": -0.031982421875, "logits/rejected": 0.3671875, "logps/chosen": -0.60546875, "logps/rejected": -2.84375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.03125, "rewards/margins": 11.25, "rewards/rejected": -14.25, "step": 2544 }, { "epoch": 0.9478584729981379, "grad_norm": 0.000579833984375, "learning_rate": 1.487218354036751e-08, "logits/chosen": 0.1220703125, "logits/rejected": 0.11474609375, "logps/chosen": -0.0859375, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.427734375, "rewards/margins": 15.0, "rewards/rejected": -15.4375, "step": 2545 }, { "epoch": 0.9482309124767225, "grad_norm": 0.0203857421875, "learning_rate": 1.4661059225845752e-08, "logits/chosen": 0.205078125, "logits/rejected": -0.453125, "logps/chosen": -0.2451171875, "logps/rejected": -2.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2265625, "rewards/margins": 12.75, "rewards/rejected": -14.0, "step": 2546 }, { "epoch": 0.9486033519553073, "grad_norm": 4.315376281738281e-05, "learning_rate": 1.4451431889231124e-08, "logits/chosen": -0.0164794921875, "logits/rejected": 0.2109375, "logps/chosen": -0.13671875, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6796875, "rewards/margins": 18.0, "rewards/rejected": -18.75, "step": 2547 }, { "epoch": 0.948975791433892, "grad_norm": 0.000782012939453125, "learning_rate": 1.4243301884972803e-08, "logits/chosen": -0.07666015625, "logits/rejected": 0.25, "logps/chosen": -0.6015625, "logps/rejected": -3.546875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0, "rewards/margins": 14.6875, "rewards/rejected": -17.75, "step": 2548 }, { "epoch": 0.9493482309124768, "grad_norm": 1.4921875, "learning_rate": 1.403666956498768e-08, "logits/chosen": -0.078125, "logits/rejected": 0.279296875, "logps/chosen": -0.37890625, "logps/rejected": -2.875, "loss": 0.0016, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.890625, "rewards/margins": 12.4375, "rewards/rejected": -14.375, "step": 2549 }, { "epoch": 0.9497206703910615, "grad_norm": 7.271766662597656e-06, "learning_rate": 1.3831535278660977e-08, "logits/chosen": 0.25390625, "logits/rejected": 0.5703125, "logps/chosen": -0.1416015625, "logps/rejected": -4.125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.70703125, "rewards/margins": 20.0, "rewards/rejected": -20.625, "step": 2550 }, { "epoch": 0.9500931098696462, "grad_norm": 0.001983642578125, "learning_rate": 1.3627899372844543e-08, "logits/chosen": 0.00445556640625, "logits/rejected": 0.14453125, "logps/chosen": -0.3828125, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.921875, "rewards/margins": 14.0, "rewards/rejected": -15.9375, "step": 2551 }, { "epoch": 0.9504655493482309, "grad_norm": 0.0015869140625, "learning_rate": 1.3425762191856449e-08, "logits/chosen": 0.064453125, "logits/rejected": 0.4765625, "logps/chosen": -0.6875, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4375, "rewards/margins": 15.5, "rewards/rejected": -19.0, "step": 2552 }, { "epoch": 0.9508379888268157, "grad_norm": 0.17578125, "learning_rate": 1.3225124077481198e-08, "logits/chosen": 0.13671875, "logits/rejected": 0.5859375, "logps/chosen": -0.228515625, "logps/rejected": -2.765625, "loss": 0.0003, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.140625, "rewards/margins": 12.6875, "rewards/rejected": -13.875, "step": 2553 }, { "epoch": 0.9512104283054004, "grad_norm": 0.0001773834228515625, "learning_rate": 1.3025985368968217e-08, "logits/chosen": -0.0179443359375, "logits/rejected": 0.45703125, "logps/chosen": -0.4921875, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.453125, "rewards/margins": 16.25, "rewards/rejected": -18.625, "step": 2554 }, { "epoch": 0.9515828677839852, "grad_norm": 0.00121307373046875, "learning_rate": 1.2828346403031765e-08, "logits/chosen": 0.1435546875, "logits/rejected": 0.328125, "logps/chosen": -0.408203125, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 15.5625, "rewards/rejected": -17.5, "step": 2555 }, { "epoch": 0.9519553072625698, "grad_norm": 0.041259765625, "learning_rate": 1.2632207513850423e-08, "logits/chosen": 0.17578125, "logits/rejected": -0.443359375, "logps/chosen": -0.6484375, "logps/rejected": -2.9375, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.234375, "rewards/margins": 11.5, "rewards/rejected": -14.6875, "step": 2556 }, { "epoch": 0.9523277467411546, "grad_norm": 0.0001659393310546875, "learning_rate": 1.2437569033066009e-08, "logits/chosen": 0.28515625, "logits/rejected": 0.392578125, "logps/chosen": -0.47265625, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.359375, "rewards/margins": 18.0, "rewards/rejected": -20.25, "step": 2557 }, { "epoch": 0.9527001862197393, "grad_norm": 0.609375, "learning_rate": 1.2244431289783463e-08, "logits/chosen": 0.07568359375, "logits/rejected": 0.65234375, "logps/chosen": -0.255859375, "logps/rejected": -2.6875, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.28125, "rewards/margins": 12.1875, "rewards/rejected": -13.5, "step": 2558 }, { "epoch": 0.9530726256983241, "grad_norm": 0.00179290771484375, "learning_rate": 1.205279461057056e-08, "logits/chosen": 0.1484375, "logits/rejected": 0.0228271484375, "logps/chosen": -0.40625, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.03125, "rewards/margins": 14.625, "rewards/rejected": -16.75, "step": 2559 }, { "epoch": 0.9534450651769087, "grad_norm": 0.00109100341796875, "learning_rate": 1.1862659319456703e-08, "logits/chosen": -0.0218505859375, "logits/rejected": 0.212890625, "logps/chosen": -0.65625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 14.5, "rewards/rejected": -17.75, "step": 2560 }, { "epoch": 0.9538175046554935, "grad_norm": 0.017333984375, "learning_rate": 1.1674025737932624e-08, "logits/chosen": 0.1279296875, "logits/rejected": -0.26171875, "logps/chosen": -0.33203125, "logps/rejected": -3.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 13.625, "rewards/rejected": -15.25, "step": 2561 }, { "epoch": 0.9541899441340782, "grad_norm": 1.0390625, "learning_rate": 1.1486894184949892e-08, "logits/chosen": 0.099609375, "logits/rejected": -0.96875, "logps/chosen": -0.3125, "logps/rejected": -2.234375, "loss": 0.0015, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5625, "rewards/margins": 9.625, "rewards/rejected": -11.125, "step": 2562 }, { "epoch": 0.954562383612663, "grad_norm": 0.46875, "learning_rate": 1.13012649769205e-08, "logits/chosen": 0.185546875, "logits/rejected": -0.32421875, "logps/chosen": -0.2578125, "logps/rejected": -2.765625, "loss": 0.0007, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2890625, "rewards/margins": 12.5, "rewards/rejected": -13.8125, "step": 2563 }, { "epoch": 0.9549348230912477, "grad_norm": 0.671875, "learning_rate": 1.1117138427716178e-08, "logits/chosen": 0.020263671875, "logits/rejected": -0.5625, "logps/chosen": -0.59375, "logps/rejected": -2.40625, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.96875, "rewards/margins": 9.125, "rewards/rejected": -12.0625, "step": 2564 }, { "epoch": 0.9553072625698324, "grad_norm": 0.00958251953125, "learning_rate": 1.0934514848667687e-08, "logits/chosen": -0.146484375, "logits/rejected": 0.39453125, "logps/chosen": -0.671875, "logps/rejected": -3.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.359375, "rewards/margins": 12.125, "rewards/rejected": -15.4375, "step": 2565 }, { "epoch": 0.9556797020484171, "grad_norm": 4.76837158203125e-05, "learning_rate": 1.075339454856472e-08, "logits/chosen": 0.1591796875, "logits/rejected": 0.4921875, "logps/chosen": -0.2265625, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1328125, "rewards/margins": 17.375, "rewards/rejected": -18.5, "step": 2566 }, { "epoch": 0.9560521415270019, "grad_norm": 1.6927719116210938e-05, "learning_rate": 1.0573777833655107e-08, "logits/chosen": 0.08984375, "logits/rejected": 0.396484375, "logps/chosen": -0.1650390625, "logps/rejected": -3.90625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.82421875, "rewards/margins": 18.625, "rewards/rejected": -19.5, "step": 2567 }, { "epoch": 0.9564245810055866, "grad_norm": 0.01226806640625, "learning_rate": 1.0395665007644305e-08, "logits/chosen": -0.037353515625, "logits/rejected": 0.189453125, "logps/chosen": -0.703125, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.515625, "rewards/margins": 13.6875, "rewards/rejected": -17.25, "step": 2568 }, { "epoch": 0.9567970204841714, "grad_norm": 0.0003223419189453125, "learning_rate": 1.0219056371694813e-08, "logits/chosen": 0.001312255859375, "logits/rejected": 0.35546875, "logps/chosen": -0.205078125, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.03125, "rewards/margins": 16.25, "rewards/rejected": -17.25, "step": 2569 }, { "epoch": 0.957169459962756, "grad_norm": 0.0004749298095703125, "learning_rate": 1.0043952224425856e-08, "logits/chosen": 0.04248046875, "logits/rejected": 0.443359375, "logps/chosen": -0.302734375, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.515625, "rewards/margins": 15.125, "rewards/rejected": -16.625, "step": 2570 }, { "epoch": 0.9575418994413408, "grad_norm": 0.000141143798828125, "learning_rate": 9.8703528619129e-09, "logits/chosen": 0.11279296875, "logits/rejected": 0.33984375, "logps/chosen": -0.12890625, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.640625, "rewards/margins": 17.75, "rewards/rejected": -18.375, "step": 2571 }, { "epoch": 0.9579143389199255, "grad_norm": 16.625, "learning_rate": 9.698258577686747e-09, "logits/chosen": 0.1435546875, "logits/rejected": -0.3359375, "logps/chosen": -0.75, "logps/rejected": -3.09375, "loss": 0.0156, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.75, "rewards/margins": 11.6875, "rewards/rejected": -15.5, "step": 2572 }, { "epoch": 0.9582867783985103, "grad_norm": 0.000415802001953125, "learning_rate": 9.527669662733728e-09, "logits/chosen": 0.1796875, "logits/rejected": 0.419921875, "logps/chosen": -0.62109375, "logps/rejected": -3.71875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.09375, "rewards/margins": 15.5, "rewards/rejected": -18.5, "step": 2573 }, { "epoch": 0.958659217877095, "grad_norm": 0.00140380859375, "learning_rate": 9.358586405494518e-09, "logits/chosen": 0.06005859375, "logits/rejected": 0.734375, "logps/chosen": -0.2578125, "logps/rejected": -3.515625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.296875, "rewards/margins": 16.25, "rewards/rejected": -17.5, "step": 2574 }, { "epoch": 0.9590316573556797, "grad_norm": 0.00064849853515625, "learning_rate": 9.191009091863922e-09, "logits/chosen": 0.064453125, "logits/rejected": 0.29296875, "logps/chosen": -0.498046875, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.484375, "rewards/margins": 15.125, "rewards/rejected": -17.625, "step": 2575 }, { "epoch": 0.9594040968342644, "grad_norm": 0.0067138671875, "learning_rate": 9.024938005190585e-09, "logits/chosen": -0.1220703125, "logits/rejected": -0.33203125, "logps/chosen": -0.345703125, "logps/rejected": -2.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7265625, "rewards/margins": 12.375, "rewards/rejected": -14.125, "step": 2576 }, { "epoch": 0.9597765363128492, "grad_norm": 0.00122833251953125, "learning_rate": 8.860373426276391e-09, "logits/chosen": 0.002288818359375, "logits/rejected": 0.2099609375, "logps/chosen": -0.55078125, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 16.0, "rewards/rejected": -18.75, "step": 2577 }, { "epoch": 0.9601489757914339, "grad_norm": 0.0067138671875, "learning_rate": 8.697315633375657e-09, "logits/chosen": -0.00628662109375, "logits/rejected": 0.48046875, "logps/chosen": -0.91796875, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.59375, "rewards/margins": 14.3125, "rewards/rejected": -19.0, "step": 2578 }, { "epoch": 0.9605214152700187, "grad_norm": 0.004425048828125, "learning_rate": 8.535764902195242e-09, "logits/chosen": 0.040771484375, "logits/rejected": -0.1201171875, "logps/chosen": -0.296875, "logps/rejected": -3.171875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4765625, "rewards/margins": 14.375, "rewards/rejected": -15.875, "step": 2579 }, { "epoch": 0.9608938547486033, "grad_norm": 0.0029144287109375, "learning_rate": 8.375721505893641e-09, "logits/chosen": -0.10107421875, "logits/rejected": 0.326171875, "logps/chosen": -0.71875, "logps/rejected": -3.734375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.59375, "rewards/margins": 15.0, "rewards/rejected": -18.625, "step": 2580 }, { "epoch": 0.9612662942271881, "grad_norm": 119.0, "learning_rate": 8.217185715080788e-09, "logits/chosen": -0.002105712890625, "logits/rejected": -0.1630859375, "logps/chosen": -0.416015625, "logps/rejected": -2.078125, "loss": 0.1089, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.078125, "rewards/margins": 8.3125, "rewards/rejected": -10.375, "step": 2581 }, { "epoch": 0.9616387337057728, "grad_norm": 0.062255859375, "learning_rate": 8.060157797817257e-09, "logits/chosen": -0.197265625, "logits/rejected": 0.095703125, "logps/chosen": -0.54296875, "logps/rejected": -2.546875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.71875, "rewards/margins": 10.0, "rewards/rejected": -12.75, "step": 2582 }, { "epoch": 0.9620111731843576, "grad_norm": 1.5703125, "learning_rate": 7.904638019614162e-09, "logits/chosen": 0.044677734375, "logits/rejected": -0.2001953125, "logps/chosen": -0.376953125, "logps/rejected": -2.875, "loss": 0.0018, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8828125, "rewards/margins": 12.5, "rewards/rejected": -14.375, "step": 2583 }, { "epoch": 0.9623836126629423, "grad_norm": 0.546875, "learning_rate": 7.750626643432557e-09, "logits/chosen": -0.045654296875, "logits/rejected": 0.57421875, "logps/chosen": -0.443359375, "logps/rejected": -2.9375, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.21875, "rewards/margins": 12.4375, "rewards/rejected": -14.625, "step": 2584 }, { "epoch": 0.962756052141527, "grad_norm": 0.001129150390625, "learning_rate": 7.598123929683131e-09, "logits/chosen": -0.0869140625, "logits/rejected": 0.50390625, "logps/chosen": -0.5390625, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.703125, "rewards/margins": 14.375, "rewards/rejected": -17.125, "step": 2585 }, { "epoch": 0.9631284916201117, "grad_norm": 4.4345855712890625e-05, "learning_rate": 7.447130136225421e-09, "logits/chosen": 0.07080078125, "logits/rejected": 0.462890625, "logps/chosen": -0.169921875, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8515625, "rewards/margins": 17.875, "rewards/rejected": -18.75, "step": 2586 }, { "epoch": 0.9635009310986965, "grad_norm": 0.000720977783203125, "learning_rate": 7.297645518367701e-09, "logits/chosen": 0.2021484375, "logits/rejected": 0.47265625, "logps/chosen": -0.35546875, "logps/rejected": -3.296875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7734375, "rewards/margins": 14.6875, "rewards/rejected": -16.5, "step": 2587 }, { "epoch": 0.9638733705772812, "grad_norm": 12.375, "learning_rate": 7.149670328866686e-09, "logits/chosen": -0.0084228515625, "logits/rejected": 0.64453125, "logps/chosen": -0.1630859375, "logps/rejected": -2.3125, "loss": 0.0361, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8125, "rewards/margins": 10.75, "rewards/rejected": -11.5625, "step": 2588 }, { "epoch": 0.964245810055866, "grad_norm": 0.00396728515625, "learning_rate": 7.003204817926434e-09, "logits/chosen": -0.0888671875, "logits/rejected": 0.408203125, "logps/chosen": -0.31640625, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5859375, "rewards/margins": 16.625, "rewards/rejected": -18.25, "step": 2589 }, { "epoch": 0.9646182495344506, "grad_norm": 0.0003414154052734375, "learning_rate": 6.858249233198643e-09, "logits/chosen": -0.0986328125, "logits/rejected": 0.41015625, "logps/chosen": -0.306640625, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.53125, "rewards/margins": 16.25, "rewards/rejected": -17.75, "step": 2590 }, { "epoch": 0.9649906890130354, "grad_norm": 0.0791015625, "learning_rate": 6.714803819781856e-09, "logits/chosen": 0.061279296875, "logits/rejected": -0.26953125, "logps/chosen": -0.53515625, "logps/rejected": -3.015625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.671875, "rewards/margins": 12.375, "rewards/rejected": -15.0625, "step": 2591 }, { "epoch": 0.9653631284916201, "grad_norm": 0.0001277923583984375, "learning_rate": 6.5728688202212555e-09, "logits/chosen": 0.1845703125, "logits/rejected": 0.0235595703125, "logps/chosen": -0.1611328125, "logps/rejected": -3.765625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046875, "rewards/margins": 18.0, "rewards/rejected": -18.75, "step": 2592 }, { "epoch": 0.9657355679702049, "grad_norm": 0.09716796875, "learning_rate": 6.43244447450797e-09, "logits/chosen": 0.038330078125, "logits/rejected": -0.23828125, "logps/chosen": -0.1787109375, "logps/rejected": -2.75, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.890625, "rewards/margins": 12.875, "rewards/rejected": -13.75, "step": 2593 }, { "epoch": 0.9661080074487896, "grad_norm": 0.01068115234375, "learning_rate": 6.293531020078968e-09, "logits/chosen": 0.2080078125, "logits/rejected": -0.1552734375, "logps/chosen": -0.126953125, "logps/rejected": -2.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6328125, "rewards/margins": 13.375, "rewards/rejected": -14.0, "step": 2594 }, { "epoch": 0.9664804469273743, "grad_norm": 3.5, "learning_rate": 6.156128691816365e-09, "logits/chosen": 0.08203125, "logits/rejected": -0.291015625, "logps/chosen": -0.79296875, "logps/rejected": -2.78125, "loss": 0.0042, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.96875, "rewards/margins": 9.875, "rewards/rejected": -13.875, "step": 2595 }, { "epoch": 0.966852886405959, "grad_norm": 0.0001163482666015625, "learning_rate": 6.020237722047316e-09, "logits/chosen": -0.052490234375, "logits/rejected": 0.3984375, "logps/chosen": -0.232421875, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.15625, "rewards/margins": 17.0, "rewards/rejected": -18.0, "step": 2596 }, { "epoch": 0.9672253258845438, "grad_norm": 0.00020694732666015625, "learning_rate": 5.885858340543526e-09, "logits/chosen": 0.177734375, "logits/rejected": 0.416015625, "logps/chosen": -0.21875, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.09375, "rewards/margins": 17.125, "rewards/rejected": -18.25, "step": 2597 }, { "epoch": 0.9675977653631285, "grad_norm": 0.5390625, "learning_rate": 5.7529907745207384e-09, "logits/chosen": 0.12255859375, "logits/rejected": -0.1376953125, "logps/chosen": -0.62890625, "logps/rejected": -2.625, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 10.0, "rewards/rejected": -13.125, "step": 2598 }, { "epoch": 0.9679702048417133, "grad_norm": 0.1630859375, "learning_rate": 5.621635248638346e-09, "logits/chosen": -0.04541015625, "logits/rejected": -0.2275390625, "logps/chosen": -0.2890625, "logps/rejected": -2.6875, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.453125, "rewards/margins": 12.0, "rewards/rejected": -13.4375, "step": 2599 }, { "epoch": 0.9683426443202979, "grad_norm": 0.000690460205078125, "learning_rate": 5.4917919849993835e-09, "logits/chosen": 0.1416015625, "logits/rejected": 0.333984375, "logps/chosen": -0.255859375, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.28125, "rewards/margins": 14.5, "rewards/rejected": -15.75, "step": 2600 }, { "epoch": 0.9687150837988827, "grad_norm": 1.1796875, "learning_rate": 5.363461203149733e-09, "logits/chosen": 0.07177734375, "logits/rejected": 0.466796875, "logps/chosen": -0.275390625, "logps/rejected": -2.4375, "loss": 0.0017, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.375, "rewards/margins": 10.8125, "rewards/rejected": -12.1875, "step": 2601 }, { "epoch": 0.9690875232774674, "grad_norm": 190.0, "learning_rate": 5.236643120077622e-09, "logits/chosen": 0.07861328125, "logits/rejected": -0.10302734375, "logps/chosen": -0.86328125, "logps/rejected": -1.640625, "loss": 0.3242, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.3125, "rewards/margins": 3.859375, "rewards/rejected": -8.1875, "step": 2602 }, { "epoch": 0.9694599627560522, "grad_norm": 13.75, "learning_rate": 5.111337950214023e-09, "logits/chosen": -0.043701171875, "logits/rejected": -0.0625, "logps/chosen": -0.49609375, "logps/rejected": -2.640625, "loss": 0.0297, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 10.75, "rewards/rejected": -13.1875, "step": 2603 }, { "epoch": 0.9698324022346368, "grad_norm": 1.6640625, "learning_rate": 4.987545905431456e-09, "logits/chosen": 0.0274658203125, "logits/rejected": 0.796875, "logps/chosen": -0.208984375, "logps/rejected": -2.421875, "loss": 0.0037, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0390625, "rewards/margins": 11.0625, "rewards/rejected": -12.125, "step": 2604 }, { "epoch": 0.9702048417132216, "grad_norm": 9.250640869140625e-05, "learning_rate": 4.865267195043987e-09, "logits/chosen": -0.0142822265625, "logits/rejected": 0.380859375, "logps/chosen": -0.734375, "logps/rejected": -4.09375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6875, "rewards/margins": 16.875, "rewards/rejected": -20.5, "step": 2605 }, { "epoch": 0.9705772811918063, "grad_norm": 0.000644683837890625, "learning_rate": 4.744502025807029e-09, "logits/chosen": 0.013916015625, "logits/rejected": 0.30859375, "logps/chosen": -0.3359375, "logps/rejected": -3.46875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.671875, "rewards/margins": 15.625, "rewards/rejected": -17.25, "step": 2606 }, { "epoch": 0.9709497206703911, "grad_norm": 0.0001888275146484375, "learning_rate": 4.625250601916742e-09, "logits/chosen": -0.083984375, "logits/rejected": 0.341796875, "logps/chosen": -0.224609375, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1171875, "rewards/margins": 16.75, "rewards/rejected": -18.0, "step": 2607 }, { "epoch": 0.9713221601489758, "grad_norm": 0.01531982421875, "learning_rate": 4.507513125009632e-09, "logits/chosen": -0.2216796875, "logits/rejected": -0.314453125, "logps/chosen": -0.12890625, "logps/rejected": -2.875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.64453125, "rewards/margins": 13.75, "rewards/rejected": -14.4375, "step": 2608 }, { "epoch": 0.9716945996275606, "grad_norm": 0.361328125, "learning_rate": 4.391289794162456e-09, "logits/chosen": 0.0478515625, "logits/rejected": 0.7578125, "logps/chosen": -0.2490234375, "logps/rejected": -2.796875, "loss": 0.0006, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2421875, "rewards/margins": 12.75, "rewards/rejected": -14.0, "step": 2609 }, { "epoch": 0.9720670391061452, "grad_norm": 0.0048828125, "learning_rate": 4.276580805891816e-09, "logits/chosen": -0.0260009765625, "logits/rejected": -0.0255126953125, "logps/chosen": -0.5078125, "logps/rejected": -3.15625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.53125, "rewards/margins": 13.25, "rewards/rejected": -15.75, "step": 2610 }, { "epoch": 0.97243947858473, "grad_norm": 0.000171661376953125, "learning_rate": 4.163386354153764e-09, "logits/chosen": 0.062255859375, "logits/rejected": 0.3515625, "logps/chosen": -0.546875, "logps/rejected": -3.921875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 16.875, "rewards/rejected": -19.625, "step": 2611 }, { "epoch": 0.9728119180633147, "grad_norm": 0.00021457672119140625, "learning_rate": 4.051706630343499e-09, "logits/chosen": 0.0732421875, "logits/rejected": 0.48828125, "logps/chosen": -0.44921875, "logps/rejected": -4.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25, "rewards/margins": 17.75, "rewards/rejected": -20.0, "step": 2612 }, { "epoch": 0.9731843575418995, "grad_norm": 0.004852294921875, "learning_rate": 3.9415418232948695e-09, "logits/chosen": 0.19921875, "logits/rejected": 0.416015625, "logps/chosen": -0.40234375, "logps/rejected": -3.171875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0, "rewards/margins": 13.875, "rewards/rejected": -15.875, "step": 2613 }, { "epoch": 0.9735567970204841, "grad_norm": 109.0, "learning_rate": 3.832892119280373e-09, "logits/chosen": -0.06103515625, "logits/rejected": 0.322265625, "logps/chosen": -0.447265625, "logps/rejected": -1.734375, "loss": 0.1885, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.234375, "rewards/margins": 6.4375, "rewards/rejected": -8.6875, "step": 2614 }, { "epoch": 0.9739292364990689, "grad_norm": 0.0004367828369140625, "learning_rate": 3.7257577020108565e-09, "logits/chosen": 0.11962890625, "logits/rejected": 0.345703125, "logps/chosen": -0.197265625, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.984375, "rewards/margins": 15.9375, "rewards/rejected": -16.875, "step": 2615 }, { "epoch": 0.9743016759776536, "grad_norm": 0.369140625, "learning_rate": 3.620138752634616e-09, "logits/chosen": -0.162109375, "logits/rejected": 0.361328125, "logps/chosen": -0.396484375, "logps/rejected": -2.96875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.984375, "rewards/margins": 12.875, "rewards/rejected": -14.8125, "step": 2616 }, { "epoch": 0.9746741154562384, "grad_norm": 2.682209014892578e-05, "learning_rate": 3.5160354497378975e-09, "logits/chosen": -0.06591796875, "logits/rejected": 0.25390625, "logps/chosen": -0.1171875, "logps/rejected": -3.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5859375, "rewards/margins": 18.25, "rewards/rejected": -18.875, "step": 2617 }, { "epoch": 0.9750465549348231, "grad_norm": 0.00022983551025390625, "learning_rate": 3.4134479693440965e-09, "logits/chosen": 0.04443359375, "logits/rejected": 0.3203125, "logps/chosen": -0.208984375, "logps/rejected": -3.359375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046875, "rewards/margins": 15.75, "rewards/rejected": -16.75, "step": 2618 }, { "epoch": 0.9754189944134078, "grad_norm": 0.439453125, "learning_rate": 3.3123764849135593e-09, "logits/chosen": 0.008056640625, "logits/rejected": -0.498046875, "logps/chosen": -0.265625, "logps/rejected": -2.71875, "loss": 0.0008, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.328125, "rewards/margins": 12.25, "rewards/rejected": -13.5625, "step": 2619 }, { "epoch": 0.9757914338919925, "grad_norm": 0.00726318359375, "learning_rate": 3.212821167343183e-09, "logits/chosen": 0.0947265625, "logits/rejected": 0.6015625, "logps/chosen": -0.48828125, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 15.3125, "rewards/rejected": -17.75, "step": 2620 }, { "epoch": 0.9761638733705773, "grad_norm": 1.046875, "learning_rate": 3.1147821849664136e-09, "logits/chosen": -0.04052734375, "logits/rejected": -0.2333984375, "logps/chosen": -0.41015625, "logps/rejected": -2.765625, "loss": 0.0013, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.046875, "rewards/margins": 11.75, "rewards/rejected": -13.8125, "step": 2621 }, { "epoch": 0.976536312849162, "grad_norm": 0.01214599609375, "learning_rate": 3.01825970355275e-09, "logits/chosen": 0.146484375, "logits/rejected": 0.30859375, "logps/chosen": -0.390625, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9609375, "rewards/margins": 16.125, "rewards/rejected": -18.125, "step": 2622 }, { "epoch": 0.9769087523277468, "grad_norm": 0.443359375, "learning_rate": 2.923253886307542e-09, "logits/chosen": 0.012939453125, "logits/rejected": -0.287109375, "logps/chosen": -0.5859375, "logps/rejected": -2.6875, "loss": 0.0005, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9375, "rewards/margins": 10.5, "rewards/rejected": -13.5, "step": 2623 }, { "epoch": 0.9772811918063314, "grad_norm": 0.000591278076171875, "learning_rate": 2.829764893871589e-09, "logits/chosen": 0.0277099609375, "logits/rejected": 0.375, "logps/chosen": -0.53515625, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.671875, "rewards/margins": 14.875, "rewards/rejected": -17.5, "step": 2624 }, { "epoch": 0.9776536312849162, "grad_norm": 3.265625, "learning_rate": 2.737792884321044e-09, "logits/chosen": -0.2265625, "logits/rejected": 0.1962890625, "logps/chosen": -0.4296875, "logps/rejected": -1.921875, "loss": 0.0069, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.15625, "rewards/margins": 7.4375, "rewards/rejected": -9.625, "step": 2625 }, { "epoch": 0.9780260707635009, "grad_norm": 0.0004425048828125, "learning_rate": 2.647338013167111e-09, "logits/chosen": -0.0128173828125, "logits/rejected": 0.283203125, "logps/chosen": -0.1376953125, "logps/rejected": -3.203125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6875, "rewards/margins": 15.375, "rewards/rejected": -16.0, "step": 2626 }, { "epoch": 0.9783985102420857, "grad_norm": 0.000667572021484375, "learning_rate": 2.5584004333555452e-09, "logits/chosen": 0.01202392578125, "logits/rejected": 0.2099609375, "logps/chosen": -0.404296875, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.015625, "rewards/margins": 16.25, "rewards/rejected": -18.25, "step": 2627 }, { "epoch": 0.9787709497206704, "grad_norm": 2.625, "learning_rate": 2.4709802952669554e-09, "logits/chosen": -0.038330078125, "logits/rejected": 0.640625, "logps/chosen": -0.5, "logps/rejected": -2.546875, "loss": 0.0042, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5, "rewards/margins": 10.25, "rewards/rejected": -12.75, "step": 2628 }, { "epoch": 0.9791433891992551, "grad_norm": 0.00133514404296875, "learning_rate": 2.3850777467156024e-09, "logits/chosen": 0.0255126953125, "logits/rejected": 0.48828125, "logps/chosen": -0.47265625, "logps/rejected": -3.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.375, "rewards/margins": 14.75, "rewards/rejected": -17.125, "step": 2629 }, { "epoch": 0.9795158286778398, "grad_norm": 0.87890625, "learning_rate": 2.3006929329502987e-09, "logits/chosen": 0.2275390625, "logits/rejected": 0.64453125, "logps/chosen": -0.33984375, "logps/rejected": -3.0, "loss": 0.0015, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6953125, "rewards/margins": 13.375, "rewards/rejected": -15.0, "step": 2630 }, { "epoch": 0.9798882681564246, "grad_norm": 0.004730224609375, "learning_rate": 2.217825996653211e-09, "logits/chosen": -0.05224609375, "logits/rejected": 0.212890625, "logps/chosen": -0.81640625, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.09375, "rewards/margins": 12.75, "rewards/rejected": -16.875, "step": 2631 }, { "epoch": 0.9802607076350093, "grad_norm": 0.00013637542724609375, "learning_rate": 2.1364770779402573e-09, "logits/chosen": 0.14453125, "logits/rejected": 0.63671875, "logps/chosen": -0.1572265625, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.78515625, "rewards/margins": 16.75, "rewards/rejected": -17.5, "step": 2632 }, { "epoch": 0.9806331471135941, "grad_norm": 1.34375, "learning_rate": 2.056646314360411e-09, "logits/chosen": 0.007293701171875, "logits/rejected": 0.81640625, "logps/chosen": -0.3671875, "logps/rejected": -2.578125, "loss": 0.0021, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8359375, "rewards/margins": 11.0625, "rewards/rejected": -12.875, "step": 2633 }, { "epoch": 0.9810055865921787, "grad_norm": 0.32421875, "learning_rate": 1.978333840895796e-09, "logits/chosen": 0.03564453125, "logits/rejected": 0.055419921875, "logps/chosen": -0.765625, "logps/rejected": -3.0625, "loss": 0.0004, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8125, "rewards/margins": 11.5, "rewards/rejected": -15.375, "step": 2634 }, { "epoch": 0.9813780260707635, "grad_norm": 0.1142578125, "learning_rate": 1.901539789961193e-09, "logits/chosen": 0.000888824462890625, "logits/rejected": 0.3125, "logps/chosen": -0.55859375, "logps/rejected": -3.265625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.796875, "rewards/margins": 13.5, "rewards/rejected": -16.25, "step": 2635 }, { "epoch": 0.9817504655493482, "grad_norm": 0.0035858154296875, "learning_rate": 1.8262642914042337e-09, "logits/chosen": 0.166015625, "logits/rejected": 0.236328125, "logps/chosen": -0.197265625, "logps/rejected": -2.84375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.984375, "rewards/margins": 13.25, "rewards/rejected": -14.25, "step": 2636 }, { "epoch": 0.982122905027933, "grad_norm": 0.0294189453125, "learning_rate": 1.7525074725045053e-09, "logits/chosen": 0.23046875, "logits/rejected": 0.06494140625, "logps/chosen": -0.9765625, "logps/rejected": -3.515625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.875, "rewards/margins": 12.6875, "rewards/rejected": -17.5, "step": 2637 }, { "epoch": 0.9824953445065177, "grad_norm": 0.001556396484375, "learning_rate": 1.6802694579740485e-09, "logits/chosen": -0.1806640625, "logits/rejected": 0.119140625, "logps/chosen": -0.251953125, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 15.625, "rewards/rejected": -16.875, "step": 2638 }, { "epoch": 0.9828677839851024, "grad_norm": 0.9375, "learning_rate": 1.609550369956658e-09, "logits/chosen": 0.0211181640625, "logits/rejected": 0.291015625, "logps/chosen": -0.94921875, "logps/rejected": -3.625, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.75, "rewards/margins": 13.3125, "rewards/rejected": -18.0, "step": 2639 }, { "epoch": 0.9832402234636871, "grad_norm": 0.0123291015625, "learning_rate": 1.5403503280279838e-09, "logits/chosen": 0.185546875, "logits/rejected": 0.91796875, "logps/chosen": -0.142578125, "logps/rejected": -2.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7109375, "rewards/margins": 11.875, "rewards/rejected": -12.625, "step": 2640 }, { "epoch": 0.9836126629422719, "grad_norm": 5.507469177246094e-05, "learning_rate": 1.4726694491951298e-09, "logits/chosen": 0.2177734375, "logits/rejected": 0.5390625, "logps/chosen": -0.142578125, "logps/rejected": -3.59375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.71484375, "rewards/margins": 17.25, "rewards/rejected": -18.0, "step": 2641 }, { "epoch": 0.9839851024208566, "grad_norm": 0.006134033203125, "learning_rate": 1.4065078478963543e-09, "logits/chosen": 0.146484375, "logits/rejected": 0.423828125, "logps/chosen": -0.376953125, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8828125, "rewards/margins": 16.0, "rewards/rejected": -17.875, "step": 2642 }, { "epoch": 0.9843575418994414, "grad_norm": 4.649162292480469e-05, "learning_rate": 1.3418656360013713e-09, "logits/chosen": 0.09521484375, "logits/rejected": 0.42578125, "logps/chosen": -0.1640625, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8203125, "rewards/margins": 17.5, "rewards/rejected": -18.25, "step": 2643 }, { "epoch": 0.984729981378026, "grad_norm": 338.0, "learning_rate": 1.2787429228105495e-09, "logits/chosen": -0.1435546875, "logits/rejected": 0.58984375, "logps/chosen": -1.296875, "logps/rejected": -3.125, "loss": 0.5352, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.46875, "rewards/margins": 9.125, "rewards/rejected": -15.5625, "step": 2644 }, { "epoch": 0.9851024208566108, "grad_norm": 4.172325134277344e-05, "learning_rate": 1.2171398150551127e-09, "logits/chosen": 0.099609375, "logits/rejected": 0.58984375, "logps/chosen": -0.177734375, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.890625, "rewards/margins": 17.5, "rewards/rejected": -18.25, "step": 2645 }, { "epoch": 0.9854748603351955, "grad_norm": 0.04638671875, "learning_rate": 1.15705641689694e-09, "logits/chosen": 0.038818359375, "logits/rejected": -0.22265625, "logps/chosen": -0.158203125, "logps/rejected": -2.78125, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.79296875, "rewards/margins": 13.0625, "rewards/rejected": -13.875, "step": 2646 }, { "epoch": 0.9858472998137803, "grad_norm": 0.0034637451171875, "learning_rate": 1.098492829928066e-09, "logits/chosen": 0.16796875, "logits/rejected": 0.77734375, "logps/chosen": -0.32421875, "logps/rejected": -2.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609375, "rewards/margins": 13.1875, "rewards/rejected": -14.8125, "step": 2647 }, { "epoch": 0.986219739292365, "grad_norm": 0.0003662109375, "learning_rate": 1.0414491531709813e-09, "logits/chosen": 0.125, "logits/rejected": 0.4296875, "logps/chosen": -0.361328125, "logps/rejected": -3.453125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8125, "rewards/margins": 15.5, "rewards/rejected": -17.25, "step": 2648 }, { "epoch": 0.9865921787709497, "grad_norm": 9.125, "learning_rate": 9.859254830782315e-10, "logits/chosen": 0.06982421875, "logits/rejected": -0.85546875, "logps/chosen": -0.53515625, "logps/rejected": -2.25, "loss": 0.0103, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.671875, "rewards/margins": 8.5625, "rewards/rejected": -11.25, "step": 2649 }, { "epoch": 0.9869646182495344, "grad_norm": 7.2479248046875e-05, "learning_rate": 9.319219135321188e-10, "logits/chosen": 0.146484375, "logits/rejected": 0.6328125, "logps/chosen": -0.1318359375, "logps/rejected": -3.5625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.66015625, "rewards/margins": 17.125, "rewards/rejected": -17.75, "step": 2650 }, { "epoch": 0.9873370577281192, "grad_norm": 7.4803829193115234e-06, "learning_rate": 8.794385358447009e-10, "logits/chosen": 0.10498046875, "logits/rejected": 0.462890625, "logps/chosen": -0.287109375, "logps/rejected": -4.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4375, "rewards/margins": 19.375, "rewards/rejected": -20.75, "step": 2651 }, { "epoch": 0.9877094972067039, "grad_norm": 0.00051116943359375, "learning_rate": 8.284754387575921e-10, "logits/chosen": 0.07763671875, "logits/rejected": 0.3046875, "logps/chosen": -0.484375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.421875, "rewards/margins": 15.875, "rewards/rejected": -18.25, "step": 2652 }, { "epoch": 0.9880819366852887, "grad_norm": 0.8984375, "learning_rate": 7.790327084420622e-10, "logits/chosen": 0.0230712890625, "logits/rejected": 0.81640625, "logps/chosen": -0.5234375, "logps/rejected": -2.828125, "loss": 0.0011, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.625, "rewards/margins": 11.5, "rewards/rejected": -14.125, "step": 2653 }, { "epoch": 0.9884543761638733, "grad_norm": 0.00018024444580078125, "learning_rate": 7.311104284983383e-10, "logits/chosen": -0.06884765625, "logits/rejected": 0.4609375, "logps/chosen": -0.59375, "logps/rejected": -3.96875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.96875, "rewards/margins": 16.75, "rewards/rejected": -19.75, "step": 2654 }, { "epoch": 0.9888268156424581, "grad_norm": 2.4437904357910156e-05, "learning_rate": 6.847086799560031e-10, "logits/chosen": 0.1728515625, "logits/rejected": 0.53125, "logps/chosen": -0.3125, "logps/rejected": -4.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5625, "rewards/margins": 18.5, "rewards/rejected": -20.0, "step": 2655 }, { "epoch": 0.9891992551210428, "grad_norm": 0.00011110305786132812, "learning_rate": 6.398275412736964e-10, "logits/chosen": 0.279296875, "logits/rejected": 0.39453125, "logps/chosen": -0.3359375, "logps/rejected": -3.65625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6875, "rewards/margins": 16.625, "rewards/rejected": -18.25, "step": 2656 }, { "epoch": 0.9895716945996276, "grad_norm": 0.515625, "learning_rate": 5.964670883386147e-10, "logits/chosen": -0.177734375, "logits/rejected": -0.41796875, "logps/chosen": -0.33203125, "logps/rejected": -1.9375, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.65625, "rewards/margins": 8.0, "rewards/rejected": -9.6875, "step": 2657 }, { "epoch": 0.9899441340782122, "grad_norm": 0.0003204345703125, "learning_rate": 5.54627394467011e-10, "logits/chosen": -0.0166015625, "logits/rejected": 0.0089111328125, "logps/chosen": -0.75, "logps/rejected": -3.890625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.765625, "rewards/margins": 15.6875, "rewards/rejected": -19.5, "step": 2658 }, { "epoch": 0.990316573556797, "grad_norm": 0.0002307891845703125, "learning_rate": 5.143085304036954e-10, "logits/chosen": 0.134765625, "logits/rejected": 0.1943359375, "logps/chosen": -0.2578125, "logps/rejected": -3.6875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2890625, "rewards/margins": 17.125, "rewards/rejected": -18.375, "step": 2659 }, { "epoch": 0.9906890130353817, "grad_norm": 6.5, "learning_rate": 4.755105643218349e-10, "logits/chosen": 0.04345703125, "logits/rejected": -0.56640625, "logps/chosen": -0.78125, "logps/rejected": -2.90625, "loss": 0.0093, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.90625, "rewards/margins": 10.625, "rewards/rejected": -14.5, "step": 2660 }, { "epoch": 0.9910614525139665, "grad_norm": 0.0003223419189453125, "learning_rate": 4.382335618231536e-10, "logits/chosen": 0.25, "logits/rejected": 0.353515625, "logps/chosen": -0.38671875, "logps/rejected": -3.5, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 15.5625, "rewards/rejected": -17.5, "step": 2661 }, { "epoch": 0.9914338919925512, "grad_norm": 0.00506591796875, "learning_rate": 4.0247758593763284e-10, "logits/chosen": 0.0888671875, "logits/rejected": 0.052490234375, "logps/chosen": -0.2421875, "logps/rejected": -2.78125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2109375, "rewards/margins": 12.75, "rewards/rejected": -14.0, "step": 2662 }, { "epoch": 0.991806331471136, "grad_norm": 0.0029296875, "learning_rate": 3.682426971232111e-10, "logits/chosen": 0.267578125, "logits/rejected": 0.61328125, "logps/chosen": -0.41015625, "logps/rejected": -3.421875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625, "rewards/margins": 15.0625, "rewards/rejected": -17.125, "step": 2663 }, { "epoch": 0.9921787709497206, "grad_norm": 0.03173828125, "learning_rate": 3.3552895326618423e-10, "logits/chosen": -0.09619140625, "logits/rejected": 0.390625, "logps/chosen": -0.271484375, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3515625, "rewards/margins": 14.5625, "rewards/rejected": -15.875, "step": 2664 }, { "epoch": 0.9925512104283054, "grad_norm": 1.049041748046875e-05, "learning_rate": 3.043364096806056e-10, "logits/chosen": 0.2734375, "logits/rejected": 0.314453125, "logps/chosen": -0.25, "logps/rejected": -4.0625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.25, "rewards/margins": 19.0, "rewards/rejected": -20.25, "step": 2665 }, { "epoch": 0.9929236499068901, "grad_norm": 1.890625, "learning_rate": 2.746651191084859e-10, "logits/chosen": 0.07958984375, "logits/rejected": -0.75, "logps/chosen": -0.47265625, "logps/rejected": -2.078125, "loss": 0.0022, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.359375, "rewards/margins": 8.0625, "rewards/rejected": -10.375, "step": 2666 }, { "epoch": 0.9932960893854749, "grad_norm": 0.00341796875, "learning_rate": 2.465151317196934e-10, "logits/chosen": 0.138671875, "logits/rejected": 0.58203125, "logps/chosen": -0.3359375, "logps/rejected": -3.0, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.671875, "rewards/margins": 13.3125, "rewards/rejected": -15.0, "step": 2667 }, { "epoch": 0.9936685288640595, "grad_norm": 0.55859375, "learning_rate": 2.1988649511155423e-10, "logits/chosen": -0.05224609375, "logits/rejected": 0.6640625, "logps/chosen": -0.388671875, "logps/rejected": -2.75, "loss": 0.0011, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9375, "rewards/margins": 11.8125, "rewards/rejected": -13.75, "step": 2668 }, { "epoch": 0.9940409683426443, "grad_norm": 0.0240478515625, "learning_rate": 1.9477925430935182e-10, "logits/chosen": 0.1298828125, "logits/rejected": 0.1123046875, "logps/chosen": -0.54296875, "logps/rejected": -3.3125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.703125, "rewards/margins": 13.875, "rewards/rejected": -16.5, "step": 2669 }, { "epoch": 0.994413407821229, "grad_norm": 0.00016117095947265625, "learning_rate": 1.711934517656277e-10, "logits/chosen": 0.109375, "logits/rejected": 0.447265625, "logps/chosen": -0.384765625, "logps/rejected": -3.625, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.921875, "rewards/margins": 16.125, "rewards/rejected": -18.0, "step": 2670 }, { "epoch": 0.9947858472998138, "grad_norm": 0.0078125, "learning_rate": 1.4912912736058103e-10, "logits/chosen": -0.10888671875, "logits/rejected": 0.208984375, "logps/chosen": -0.3046875, "logps/rejected": -3.28125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5234375, "rewards/margins": 14.875, "rewards/rejected": -16.375, "step": 2671 }, { "epoch": 0.9951582867783985, "grad_norm": 2.8252601623535156e-05, "learning_rate": 1.285863184016689e-10, "logits/chosen": 0.1806640625, "logits/rejected": 0.140625, "logps/chosen": -0.234375, "logps/rejected": -3.8125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.171875, "rewards/margins": 18.0, "rewards/rejected": -19.0, "step": 2672 }, { "epoch": 0.9955307262569832, "grad_norm": 0.578125, "learning_rate": 1.0956505962370632e-10, "logits/chosen": 0.2412109375, "logits/rejected": 0.0025177001953125, "logps/chosen": -0.177734375, "logps/rejected": -2.71875, "loss": 0.0009, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.890625, "rewards/margins": 12.6875, "rewards/rejected": -13.5625, "step": 2673 }, { "epoch": 0.9959031657355679, "grad_norm": 0.00014972686767578125, "learning_rate": 9.206538318896617e-11, "logits/chosen": 0.0625, "logits/rejected": 0.55859375, "logps/chosen": -0.328125, "logps/rejected": -3.671875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.640625, "rewards/margins": 16.75, "rewards/rejected": -18.375, "step": 2674 }, { "epoch": 0.9962756052141527, "grad_norm": 4.076957702636719e-05, "learning_rate": 7.608731868687934e-11, "logits/chosen": -0.1484375, "logits/rejected": 0.341796875, "logps/chosen": -0.16796875, "logps/rejected": -3.75, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.84375, "rewards/margins": 18.0, "rewards/rejected": -18.75, "step": 2675 }, { "epoch": 0.9966480446927374, "grad_norm": 0.03955078125, "learning_rate": 6.163089313383496e-11, "logits/chosen": -0.049072265625, "logits/rejected": -0.02734375, "logps/chosen": -0.44921875, "logps/rejected": -2.65625, "loss": 0.0001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.234375, "rewards/margins": 11.0, "rewards/rejected": -13.25, "step": 2676 }, { "epoch": 0.9970204841713222, "grad_norm": 0.0020599365234375, "learning_rate": 4.869613097358005e-11, "logits/chosen": -0.044921875, "logits/rejected": 0.1748046875, "logps/chosen": -0.484375, "logps/rejected": -3.1875, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.421875, "rewards/margins": 13.5, "rewards/rejected": -15.9375, "step": 2677 }, { "epoch": 0.9973929236499068, "grad_norm": 0.671875, "learning_rate": 3.728305407691978e-11, "logits/chosen": -0.0020599365234375, "logits/rejected": -0.373046875, "logps/chosen": -0.333984375, "logps/rejected": -2.890625, "loss": 0.001, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.671875, "rewards/margins": 12.75, "rewards/rejected": -14.4375, "step": 2678 }, { "epoch": 0.9977653631284916, "grad_norm": 0.00041961669921875, "learning_rate": 2.7391681741617544e-11, "logits/chosen": -0.04541015625, "logits/rejected": 0.33984375, "logps/chosen": -0.333984375, "logps/rejected": -3.53125, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.671875, "rewards/margins": 16.0, "rewards/rejected": -17.75, "step": 2679 }, { "epoch": 0.9981378026070763, "grad_norm": 0.0002689361572265625, "learning_rate": 1.902203069259478e-11, "logits/chosen": 0.06396484375, "logits/rejected": 0.3359375, "logps/chosen": -0.154296875, "logps/rejected": -3.34375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7734375, "rewards/margins": 16.0, "rewards/rejected": -16.75, "step": 2680 }, { "epoch": 0.9985102420856611, "grad_norm": 1.8104910850524902e-06, "learning_rate": 1.2174115081731163e-11, "logits/chosen": 0.1953125, "logits/rejected": 0.51171875, "logps/chosen": -0.203125, "logps/rejected": -4.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.015625, "rewards/margins": 21.0, "rewards/rejected": -22.0, "step": 2681 }, { "epoch": 0.9988826815642458, "grad_norm": 0.00152587890625, "learning_rate": 6.847946487864575e-12, "logits/chosen": 0.00982666015625, "logits/rejected": 0.330078125, "logps/chosen": -0.380859375, "logps/rejected": -3.375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.90625, "rewards/margins": 15.0, "rewards/rejected": -16.875, "step": 2682 }, { "epoch": 0.9992551210428305, "grad_norm": 0.08935546875, "learning_rate": 3.0435339165912897e-12, "logits/chosen": -0.047607421875, "logits/rejected": -0.026123046875, "logps/chosen": -0.5546875, "logps/rejected": -2.46875, "loss": 0.0002, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.78125, "rewards/margins": 9.625, "rewards/rejected": -12.375, "step": 2683 }, { "epoch": 0.9996275605214152, "grad_norm": 0.71875, "learning_rate": 7.608838007655549e-13, "logits/chosen": -0.0546875, "logits/rejected": -0.10986328125, "logps/chosen": -0.31640625, "logps/rejected": -1.8125, "loss": 0.0013, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5859375, "rewards/margins": 7.5, "rewards/rejected": -9.0625, "step": 2684 }, { "epoch": 1.0, "grad_norm": 0.0242919921875, "learning_rate": 0.0, "logits/chosen": -0.1142578125, "logits/rejected": -0.251953125, "logps/chosen": -0.251953125, "logps/rejected": -2.4375, "loss": 0.0, "nll_loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265625, "rewards/margins": 10.9375, "rewards/rejected": -12.1875, "step": 2685 } ], "logging_steps": 1, "max_steps": 2685, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2685, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }