|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 100, |
|
"global_step": 2776, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007204610951008645, |
|
"grad_norm": 16.86738074547546, |
|
"learning_rate": 1.7985611510791367e-10, |
|
"logits/chosen": -1.901450514793396, |
|
"logits/rejected": -1.9076323509216309, |
|
"logps/chosen": -0.8524526953697205, |
|
"logps/rejected": -0.9626365900039673, |
|
"loss": 1.6316, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.704905390739441, |
|
"rewards/margins": 0.22036786377429962, |
|
"rewards/rejected": -1.9252731800079346, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007204610951008645, |
|
"grad_norm": 20.67220170920981, |
|
"learning_rate": 1.7985611510791365e-09, |
|
"logits/chosen": -2.020613670349121, |
|
"logits/rejected": -2.006347894668579, |
|
"logps/chosen": -1.005244255065918, |
|
"logps/rejected": -1.1096515655517578, |
|
"loss": 1.6546, |
|
"rewards/accuracies": 0.5208333134651184, |
|
"rewards/chosen": -2.010488510131836, |
|
"rewards/margins": 0.20881448686122894, |
|
"rewards/rejected": -2.2193031311035156, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01440922190201729, |
|
"grad_norm": 26.108277039722253, |
|
"learning_rate": 3.597122302158273e-09, |
|
"logits/chosen": -2.0260705947875977, |
|
"logits/rejected": -2.022770643234253, |
|
"logps/chosen": -1.052295446395874, |
|
"logps/rejected": -1.1837208271026611, |
|
"loss": 1.6167, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.104590892791748, |
|
"rewards/margins": 0.26285091042518616, |
|
"rewards/rejected": -2.3674416542053223, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021613832853025938, |
|
"grad_norm": 20.47682519715639, |
|
"learning_rate": 5.3956834532374095e-09, |
|
"logits/chosen": -1.9848406314849854, |
|
"logits/rejected": -1.9775378704071045, |
|
"logps/chosen": -1.0540497303009033, |
|
"logps/rejected": -1.1514469385147095, |
|
"loss": 1.6715, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1080994606018066, |
|
"rewards/margins": 0.19479455053806305, |
|
"rewards/rejected": -2.302893877029419, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02881844380403458, |
|
"grad_norm": 22.578054082763025, |
|
"learning_rate": 7.194244604316546e-09, |
|
"logits/chosen": -2.0309205055236816, |
|
"logits/rejected": -2.030827045440674, |
|
"logps/chosen": -1.0357428789138794, |
|
"logps/rejected": -1.1376559734344482, |
|
"loss": 1.674, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.071485757827759, |
|
"rewards/margins": 0.20382657647132874, |
|
"rewards/rejected": -2.2753119468688965, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03602305475504323, |
|
"grad_norm": 17.189127890707947, |
|
"learning_rate": 8.992805755395683e-09, |
|
"logits/chosen": -1.9604355096817017, |
|
"logits/rejected": -1.9610908031463623, |
|
"logps/chosen": -0.9419905543327332, |
|
"logps/rejected": -1.0071475505828857, |
|
"loss": 1.7048, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8839811086654663, |
|
"rewards/margins": 0.13031414151191711, |
|
"rewards/rejected": -2.0142951011657715, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.043227665706051875, |
|
"grad_norm": 24.394161121983817, |
|
"learning_rate": 1.0791366906474819e-08, |
|
"logits/chosen": -2.0403716564178467, |
|
"logits/rejected": -2.035911798477173, |
|
"logps/chosen": -1.0892378091812134, |
|
"logps/rejected": -1.1461578607559204, |
|
"loss": 1.7173, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.1784756183624268, |
|
"rewards/margins": 0.11384035646915436, |
|
"rewards/rejected": -2.292315721511841, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05043227665706052, |
|
"grad_norm": 23.080584749878106, |
|
"learning_rate": 1.2589928057553956e-08, |
|
"logits/chosen": -2.0298831462860107, |
|
"logits/rejected": -2.0174343585968018, |
|
"logps/chosen": -1.109933614730835, |
|
"logps/rejected": -1.2047233581542969, |
|
"loss": 1.6667, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.21986722946167, |
|
"rewards/margins": 0.18957947194576263, |
|
"rewards/rejected": -2.4094467163085938, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05763688760806916, |
|
"grad_norm": 28.510083775511152, |
|
"learning_rate": 1.4388489208633092e-08, |
|
"logits/chosen": -2.0415005683898926, |
|
"logits/rejected": -2.0385377407073975, |
|
"logps/chosen": -1.1662975549697876, |
|
"logps/rejected": -1.2378699779510498, |
|
"loss": 1.7003, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.332595109939575, |
|
"rewards/margins": 0.1431449055671692, |
|
"rewards/rejected": -2.4757399559020996, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06484149855907781, |
|
"grad_norm": 18.099831598265492, |
|
"learning_rate": 1.618705035971223e-08, |
|
"logits/chosen": -2.003298044204712, |
|
"logits/rejected": -2.004725933074951, |
|
"logps/chosen": -1.0415083169937134, |
|
"logps/rejected": -1.149029016494751, |
|
"loss": 1.6519, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.0830166339874268, |
|
"rewards/margins": 0.21504120528697968, |
|
"rewards/rejected": -2.298058032989502, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07204610951008646, |
|
"grad_norm": 21.6296417312396, |
|
"learning_rate": 1.7985611510791365e-08, |
|
"logits/chosen": -2.036734104156494, |
|
"logits/rejected": -2.0305848121643066, |
|
"logps/chosen": -1.0069749355316162, |
|
"logps/rejected": -1.1141220331192017, |
|
"loss": 1.654, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0139498710632324, |
|
"rewards/margins": 0.21429400146007538, |
|
"rewards/rejected": -2.2282440662384033, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0792507204610951, |
|
"grad_norm": 18.402417176588862, |
|
"learning_rate": 1.9784172661870502e-08, |
|
"logits/chosen": -1.9797817468643188, |
|
"logits/rejected": -1.9685176610946655, |
|
"logps/chosen": -1.0294291973114014, |
|
"logps/rejected": -1.1286334991455078, |
|
"loss": 1.6659, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0588583946228027, |
|
"rewards/margins": 0.19840820133686066, |
|
"rewards/rejected": -2.2572669982910156, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08645533141210375, |
|
"grad_norm": 20.729624339345094, |
|
"learning_rate": 2.1582733812949638e-08, |
|
"logits/chosen": -1.9758269786834717, |
|
"logits/rejected": -1.974029541015625, |
|
"logps/chosen": -0.964306652545929, |
|
"logps/rejected": -1.0657222270965576, |
|
"loss": 1.6486, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.928613305091858, |
|
"rewards/margins": 0.20283102989196777, |
|
"rewards/rejected": -2.1314444541931152, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0936599423631124, |
|
"grad_norm": 20.17439332595769, |
|
"learning_rate": 2.3381294964028775e-08, |
|
"logits/chosen": -2.0696139335632324, |
|
"logits/rejected": -2.068974733352661, |
|
"logps/chosen": -1.0797998905181885, |
|
"logps/rejected": -1.1516422033309937, |
|
"loss": 1.7012, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.159599781036377, |
|
"rewards/margins": 0.14368471503257751, |
|
"rewards/rejected": -2.3032844066619873, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10086455331412104, |
|
"grad_norm": 24.064780949371126, |
|
"learning_rate": 2.517985611510791e-08, |
|
"logits/chosen": -1.9815738201141357, |
|
"logits/rejected": -1.9751752614974976, |
|
"logps/chosen": -0.9776951670646667, |
|
"logps/rejected": -1.1230800151824951, |
|
"loss": 1.5974, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9553903341293335, |
|
"rewards/margins": 0.2907695770263672, |
|
"rewards/rejected": -2.2461600303649902, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10806916426512968, |
|
"grad_norm": 23.00427709241572, |
|
"learning_rate": 2.6978417266187048e-08, |
|
"logits/chosen": -1.99484121799469, |
|
"logits/rejected": -1.9905335903167725, |
|
"logps/chosen": -1.0193841457366943, |
|
"logps/rejected": -1.1368898153305054, |
|
"loss": 1.6404, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0387682914733887, |
|
"rewards/margins": 0.23501136898994446, |
|
"rewards/rejected": -2.2737796306610107, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11527377521613832, |
|
"grad_norm": 20.432294969630874, |
|
"learning_rate": 2.8776978417266184e-08, |
|
"logits/chosen": -1.997571587562561, |
|
"logits/rejected": -1.9914041757583618, |
|
"logps/chosen": -0.947496771812439, |
|
"logps/rejected": -1.0964053869247437, |
|
"loss": 1.5792, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.894993543624878, |
|
"rewards/margins": 0.2978169918060303, |
|
"rewards/rejected": -2.1928107738494873, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12247838616714697, |
|
"grad_norm": 25.103842505396916, |
|
"learning_rate": 3.057553956834532e-08, |
|
"logits/chosen": -2.006762981414795, |
|
"logits/rejected": -1.9991118907928467, |
|
"logps/chosen": -1.0366116762161255, |
|
"logps/rejected": -1.1614980697631836, |
|
"loss": 1.6344, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.073223352432251, |
|
"rewards/margins": 0.2497730255126953, |
|
"rewards/rejected": -2.322996139526367, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12968299711815562, |
|
"grad_norm": 26.27968111051558, |
|
"learning_rate": 3.237410071942446e-08, |
|
"logits/chosen": -2.0409793853759766, |
|
"logits/rejected": -2.034149646759033, |
|
"logps/chosen": -1.0202006101608276, |
|
"logps/rejected": -1.108983039855957, |
|
"loss": 1.6865, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0404012203216553, |
|
"rewards/margins": 0.177564799785614, |
|
"rewards/rejected": -2.217966079711914, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13688760806916425, |
|
"grad_norm": 25.939125627248345, |
|
"learning_rate": 3.4172661870503594e-08, |
|
"logits/chosen": -2.0743298530578613, |
|
"logits/rejected": -2.072180986404419, |
|
"logps/chosen": -0.9696714282035828, |
|
"logps/rejected": -1.065748929977417, |
|
"loss": 1.6537, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9393428564071655, |
|
"rewards/margins": 0.19215507805347443, |
|
"rewards/rejected": -2.131497859954834, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1440922190201729, |
|
"grad_norm": 26.153404922121894, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -2.0394513607025146, |
|
"logits/rejected": -2.0364089012145996, |
|
"logps/chosen": -1.0258630514144897, |
|
"logps/rejected": -1.1529974937438965, |
|
"loss": 1.6189, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0517261028289795, |
|
"rewards/margins": 0.2542688548564911, |
|
"rewards/rejected": -2.305994987487793, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15129682997118155, |
|
"grad_norm": 23.72175833145625, |
|
"learning_rate": 3.776978417266187e-08, |
|
"logits/chosen": -2.034412384033203, |
|
"logits/rejected": -2.0315709114074707, |
|
"logps/chosen": -1.073925256729126, |
|
"logps/rejected": -1.1507259607315063, |
|
"loss": 1.6945, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.147850513458252, |
|
"rewards/margins": 0.1536014825105667, |
|
"rewards/rejected": -2.3014519214630127, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1585014409221902, |
|
"grad_norm": 18.049421620748525, |
|
"learning_rate": 3.9568345323741003e-08, |
|
"logits/chosen": -1.9837948083877563, |
|
"logits/rejected": -1.9797385931015015, |
|
"logps/chosen": -1.007852554321289, |
|
"logps/rejected": -1.1767760515213013, |
|
"loss": 1.5721, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.015705108642578, |
|
"rewards/margins": 0.33784690499305725, |
|
"rewards/rejected": -2.3535521030426025, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16570605187319884, |
|
"grad_norm": 19.432289400345837, |
|
"learning_rate": 4.136690647482014e-08, |
|
"logits/chosen": -2.0252606868743896, |
|
"logits/rejected": -2.025735378265381, |
|
"logps/chosen": -1.0125794410705566, |
|
"logps/rejected": -1.1261564493179321, |
|
"loss": 1.6379, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0251588821411133, |
|
"rewards/margins": 0.22715386748313904, |
|
"rewards/rejected": -2.2523128986358643, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1729106628242075, |
|
"grad_norm": 25.42149329995876, |
|
"learning_rate": 4.3165467625899276e-08, |
|
"logits/chosen": -2.0474588871002197, |
|
"logits/rejected": -2.042466163635254, |
|
"logps/chosen": -1.0612871646881104, |
|
"logps/rejected": -1.1391594409942627, |
|
"loss": 1.7029, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1225743293762207, |
|
"rewards/margins": 0.15574422478675842, |
|
"rewards/rejected": -2.2783188819885254, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18011527377521613, |
|
"grad_norm": 21.827295632014227, |
|
"learning_rate": 4.496402877697841e-08, |
|
"logits/chosen": -1.9690139293670654, |
|
"logits/rejected": -1.9651823043823242, |
|
"logps/chosen": -1.081837773323059, |
|
"logps/rejected": -1.173208236694336, |
|
"loss": 1.6762, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.163675546646118, |
|
"rewards/margins": 0.18274101614952087, |
|
"rewards/rejected": -2.346416473388672, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1873198847262248, |
|
"grad_norm": 23.883537952919536, |
|
"learning_rate": 4.676258992805755e-08, |
|
"logits/chosen": -1.9890559911727905, |
|
"logits/rejected": -1.9971050024032593, |
|
"logps/chosen": -1.1051918268203735, |
|
"logps/rejected": -1.2165734767913818, |
|
"loss": 1.6485, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.210383653640747, |
|
"rewards/margins": 0.22276310622692108, |
|
"rewards/rejected": -2.4331469535827637, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19452449567723343, |
|
"grad_norm": 23.452229796100493, |
|
"learning_rate": 4.8561151079136686e-08, |
|
"logits/chosen": -2.0651626586914062, |
|
"logits/rejected": -2.0570404529571533, |
|
"logps/chosen": -1.0715770721435547, |
|
"logps/rejected": -1.2007033824920654, |
|
"loss": 1.6136, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.1431541442871094, |
|
"rewards/margins": 0.2582527697086334, |
|
"rewards/rejected": -2.401406764984131, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2017291066282421, |
|
"grad_norm": 29.05416335946149, |
|
"learning_rate": 4.999992091672379e-08, |
|
"logits/chosen": -2.0108678340911865, |
|
"logits/rejected": -2.0090978145599365, |
|
"logps/chosen": -0.9353054761886597, |
|
"logps/rejected": -1.0496169328689575, |
|
"loss": 1.6345, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.8706109523773193, |
|
"rewards/margins": 0.22862282395362854, |
|
"rewards/rejected": -2.099233865737915, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20893371757925072, |
|
"grad_norm": 24.746568624535307, |
|
"learning_rate": 4.999715305459108e-08, |
|
"logits/chosen": -2.0434165000915527, |
|
"logits/rejected": -2.045293092727661, |
|
"logps/chosen": -1.0135209560394287, |
|
"logps/rejected": -1.1082584857940674, |
|
"loss": 1.6735, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0270419120788574, |
|
"rewards/margins": 0.18947532773017883, |
|
"rewards/rejected": -2.2165169715881348, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21613832853025935, |
|
"grad_norm": 23.704466162259774, |
|
"learning_rate": 4.9990431528966836e-08, |
|
"logits/chosen": -2.0209240913391113, |
|
"logits/rejected": -2.012465476989746, |
|
"logps/chosen": -1.0895938873291016, |
|
"logps/rejected": -1.1909050941467285, |
|
"loss": 1.653, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.179187774658203, |
|
"rewards/margins": 0.2026222050189972, |
|
"rewards/rejected": -2.381810188293457, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22334293948126802, |
|
"grad_norm": 21.29734105171553, |
|
"learning_rate": 4.997975740295813e-08, |
|
"logits/chosen": -1.9576565027236938, |
|
"logits/rejected": -1.9576654434204102, |
|
"logps/chosen": -1.0862493515014648, |
|
"logps/rejected": -1.172272801399231, |
|
"loss": 1.6787, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1724987030029297, |
|
"rewards/margins": 0.17204709351062775, |
|
"rewards/rejected": -2.344545602798462, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.23054755043227665, |
|
"grad_norm": 18.963202937894597, |
|
"learning_rate": 4.996513236483331e-08, |
|
"logits/chosen": -2.033639907836914, |
|
"logits/rejected": -2.025113821029663, |
|
"logps/chosen": -1.008597493171692, |
|
"logps/rejected": -1.1411330699920654, |
|
"loss": 1.623, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.017194986343384, |
|
"rewards/margins": 0.26507094502449036, |
|
"rewards/rejected": -2.282266139984131, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2377521613832853, |
|
"grad_norm": 18.118259435513053, |
|
"learning_rate": 4.9946558727754974e-08, |
|
"logits/chosen": -2.013747453689575, |
|
"logits/rejected": -2.015979528427124, |
|
"logps/chosen": -1.0458552837371826, |
|
"logps/rejected": -1.0690838098526, |
|
"loss": 1.7903, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -2.0917105674743652, |
|
"rewards/margins": 0.04645707830786705, |
|
"rewards/rejected": -2.1381676197052, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24495677233429394, |
|
"grad_norm": 21.713334152733406, |
|
"learning_rate": 4.9924039429414086e-08, |
|
"logits/chosen": -2.0605921745300293, |
|
"logits/rejected": -2.0548267364501953, |
|
"logps/chosen": -1.0870946645736694, |
|
"logps/rejected": -1.1672402620315552, |
|
"loss": 1.6817, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.174189329147339, |
|
"rewards/margins": 0.1602911800146103, |
|
"rewards/rejected": -2.3344805240631104, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2521613832853026, |
|
"grad_norm": 21.7230708429387, |
|
"learning_rate": 4.989757803156537e-08, |
|
"logits/chosen": -1.9891109466552734, |
|
"logits/rejected": -1.983432412147522, |
|
"logps/chosen": -0.988193690776825, |
|
"logps/rejected": -1.115260362625122, |
|
"loss": 1.6191, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.97638738155365, |
|
"rewards/margins": 0.2541332542896271, |
|
"rewards/rejected": -2.230520725250244, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25936599423631124, |
|
"grad_norm": 24.29869182695538, |
|
"learning_rate": 4.986717871946393e-08, |
|
"logits/chosen": -1.995234727859497, |
|
"logits/rejected": -1.9911472797393799, |
|
"logps/chosen": -1.0861847400665283, |
|
"logps/rejected": -1.202515959739685, |
|
"loss": 1.6318, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.1723694801330566, |
|
"rewards/margins": 0.23266229033470154, |
|
"rewards/rejected": -2.40503191947937, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2665706051873199, |
|
"grad_norm": 21.151241640685495, |
|
"learning_rate": 4.983284630120331e-08, |
|
"logits/chosen": -2.0005943775177, |
|
"logits/rejected": -2.0005276203155518, |
|
"logps/chosen": -1.050954818725586, |
|
"logps/rejected": -1.180293083190918, |
|
"loss": 1.6091, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.101909637451172, |
|
"rewards/margins": 0.25867652893066406, |
|
"rewards/rejected": -2.360586166381836, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2737752161383285, |
|
"grad_norm": 18.702979404643415, |
|
"learning_rate": 4.979458620695505e-08, |
|
"logits/chosen": -2.0280709266662598, |
|
"logits/rejected": -2.0321407318115234, |
|
"logps/chosen": -1.01195228099823, |
|
"logps/rejected": -1.0852843523025513, |
|
"loss": 1.7127, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.02390456199646, |
|
"rewards/margins": 0.14666402339935303, |
|
"rewards/rejected": -2.1705687046051025, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.28097982708933716, |
|
"grad_norm": 18.04619323368983, |
|
"learning_rate": 4.975240448810977e-08, |
|
"logits/chosen": -2.0287792682647705, |
|
"logits/rejected": -2.0225093364715576, |
|
"logps/chosen": -1.0217763185501099, |
|
"logps/rejected": -1.1489847898483276, |
|
"loss": 1.614, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0435526371002197, |
|
"rewards/margins": 0.25441741943359375, |
|
"rewards/rejected": -2.2979695796966553, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2881844380403458, |
|
"grad_norm": 22.031189167579363, |
|
"learning_rate": 4.970630781632009e-08, |
|
"logits/chosen": -2.034381628036499, |
|
"logits/rejected": -2.034792423248291, |
|
"logps/chosen": -0.9954347610473633, |
|
"logps/rejected": -1.0486609935760498, |
|
"loss": 1.7224, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.9908695220947266, |
|
"rewards/margins": 0.10645285993814468, |
|
"rewards/rejected": -2.0973219871520996, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2953890489913545, |
|
"grad_norm": 21.234234457439968, |
|
"learning_rate": 4.965630348244542e-08, |
|
"logits/chosen": -2.0295231342315674, |
|
"logits/rejected": -2.027569532394409, |
|
"logps/chosen": -1.0738043785095215, |
|
"logps/rejected": -1.1459261178970337, |
|
"loss": 1.7042, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.147608757019043, |
|
"rewards/margins": 0.14424346387386322, |
|
"rewards/rejected": -2.2918522357940674, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3025936599423631, |
|
"grad_norm": 19.466288881985548, |
|
"learning_rate": 4.9602399395398786e-08, |
|
"logits/chosen": -2.0115177631378174, |
|
"logits/rejected": -2.0157418251037598, |
|
"logps/chosen": -1.0445759296417236, |
|
"logps/rejected": -1.1231411695480347, |
|
"loss": 1.6872, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0891518592834473, |
|
"rewards/margins": 0.15713071823120117, |
|
"rewards/rejected": -2.2462823390960693, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.30979827089337175, |
|
"grad_norm": 21.116060561215924, |
|
"learning_rate": 4.95446040808959e-08, |
|
"logits/chosen": -1.9870742559432983, |
|
"logits/rejected": -1.9879848957061768, |
|
"logps/chosen": -1.0581797361373901, |
|
"logps/rejected": -1.099675178527832, |
|
"loss": 1.7526, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -2.1163594722747803, |
|
"rewards/margins": 0.08299090713262558, |
|
"rewards/rejected": -2.199350357055664, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3170028818443804, |
|
"grad_norm": 18.736237528011962, |
|
"learning_rate": 4.948292668010676e-08, |
|
"logits/chosen": -1.9880409240722656, |
|
"logits/rejected": -1.988071084022522, |
|
"logps/chosen": -1.0214247703552246, |
|
"logps/rejected": -1.1438568830490112, |
|
"loss": 1.6286, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.042849540710449, |
|
"rewards/margins": 0.2448642998933792, |
|
"rewards/rejected": -2.2877137660980225, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3242074927953891, |
|
"grad_norm": 20.43882931641836, |
|
"learning_rate": 4.941737694820975e-08, |
|
"logits/chosen": -2.0112996101379395, |
|
"logits/rejected": -2.0076920986175537, |
|
"logps/chosen": -1.144315242767334, |
|
"logps/rejected": -1.1844433546066284, |
|
"loss": 1.7537, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.288630485534668, |
|
"rewards/margins": 0.08025630563497543, |
|
"rewards/rejected": -2.368886709213257, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3314121037463977, |
|
"grad_norm": 28.239708991055796, |
|
"learning_rate": 4.93479652528488e-08, |
|
"logits/chosen": -2.007514476776123, |
|
"logits/rejected": -2.0019874572753906, |
|
"logps/chosen": -1.1697793006896973, |
|
"logps/rejected": -1.2875298261642456, |
|
"loss": 1.6351, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.3395586013793945, |
|
"rewards/margins": 0.23550114035606384, |
|
"rewards/rejected": -2.575059652328491, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.33861671469740634, |
|
"grad_norm": 26.337152572859576, |
|
"learning_rate": 4.9274702572493555e-08, |
|
"logits/chosen": -2.040773868560791, |
|
"logits/rejected": -2.028566598892212, |
|
"logps/chosen": -1.0992854833602905, |
|
"logps/rejected": -1.2018510103225708, |
|
"loss": 1.6561, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.198570966720581, |
|
"rewards/margins": 0.20513089001178741, |
|
"rewards/rejected": -2.4037020206451416, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.345821325648415, |
|
"grad_norm": 25.04785849160215, |
|
"learning_rate": 4.9197600494702955e-08, |
|
"logits/chosen": -2.10146164894104, |
|
"logits/rejected": -2.091294765472412, |
|
"logps/chosen": -0.9840625524520874, |
|
"logps/rejected": -1.106227993965149, |
|
"loss": 1.6196, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.9681251049041748, |
|
"rewards/margins": 0.24433092772960663, |
|
"rewards/rejected": -2.212455987930298, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3530259365994236, |
|
"grad_norm": 21.442417002640138, |
|
"learning_rate": 4.9116671214292526e-08, |
|
"logits/chosen": -2.017040491104126, |
|
"logits/rejected": -2.0160226821899414, |
|
"logps/chosen": -0.98698490858078, |
|
"logps/rejected": -1.1014493703842163, |
|
"loss": 1.6301, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.97396981716156, |
|
"rewards/margins": 0.22892877459526062, |
|
"rewards/rejected": -2.2028987407684326, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.36023054755043227, |
|
"grad_norm": 20.760566848988777, |
|
"learning_rate": 4.903192753140557e-08, |
|
"logits/chosen": -2.019878387451172, |
|
"logits/rejected": -2.0063798427581787, |
|
"logps/chosen": -1.0110963582992554, |
|
"logps/rejected": -1.1396064758300781, |
|
"loss": 1.6125, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0221927165985107, |
|
"rewards/margins": 0.2570200562477112, |
|
"rewards/rejected": -2.2792129516601562, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36743515850144093, |
|
"grad_norm": 21.964679850201083, |
|
"learning_rate": 4.894338284948866e-08, |
|
"logits/chosen": -2.088066577911377, |
|
"logits/rejected": -2.081502676010132, |
|
"logps/chosen": -1.0429285764694214, |
|
"logps/rejected": -1.1580368280410767, |
|
"loss": 1.6406, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.0858571529388428, |
|
"rewards/margins": 0.23021626472473145, |
|
"rewards/rejected": -2.3160736560821533, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3746397694524496, |
|
"grad_norm": 18.869869897878324, |
|
"learning_rate": 4.8851051173171656e-08, |
|
"logits/chosen": -2.0141520500183105, |
|
"logits/rejected": -2.0126335620880127, |
|
"logps/chosen": -1.1057158708572388, |
|
"logps/rejected": -1.2025840282440186, |
|
"loss": 1.6603, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.2114317417144775, |
|
"rewards/margins": 0.1937362551689148, |
|
"rewards/rejected": -2.405168056488037, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3818443804034582, |
|
"grad_norm": 20.027593416077924, |
|
"learning_rate": 4.8754947106052696e-08, |
|
"logits/chosen": -1.9756828546524048, |
|
"logits/rejected": -1.9658939838409424, |
|
"logps/chosen": -0.983010470867157, |
|
"logps/rejected": -1.060978651046753, |
|
"loss": 1.6924, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.966020941734314, |
|
"rewards/margins": 0.15593627095222473, |
|
"rewards/rejected": -2.121957302093506, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.38904899135446686, |
|
"grad_norm": 25.513748888084603, |
|
"learning_rate": 4.865508584838841e-08, |
|
"logits/chosen": -2.0021884441375732, |
|
"logits/rejected": -1.9952195882797241, |
|
"logps/chosen": -1.02981436252594, |
|
"logps/rejected": -1.1320369243621826, |
|
"loss": 1.6593, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.05962872505188, |
|
"rewards/margins": 0.2044449746608734, |
|
"rewards/rejected": -2.2640738487243652, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3962536023054755, |
|
"grad_norm": 20.013888186622026, |
|
"learning_rate": 4.855148319468979e-08, |
|
"logits/chosen": -1.9607187509536743, |
|
"logits/rejected": -1.9611743688583374, |
|
"logps/chosen": -0.9991506338119507, |
|
"logps/rejected": -1.0771278142929077, |
|
"loss": 1.7089, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.9983012676239014, |
|
"rewards/margins": 0.15595442056655884, |
|
"rewards/rejected": -2.1542556285858154, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4034582132564842, |
|
"grad_norm": 19.766020419371234, |
|
"learning_rate": 4.8444155531224065e-08, |
|
"logits/chosen": -2.0277419090270996, |
|
"logits/rejected": -2.0282044410705566, |
|
"logps/chosen": -1.0792930126190186, |
|
"logps/rejected": -1.1801952123641968, |
|
"loss": 1.6624, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.158586025238037, |
|
"rewards/margins": 0.20180411636829376, |
|
"rewards/rejected": -2.3603904247283936, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4106628242074928, |
|
"grad_norm": 18.138026924157977, |
|
"learning_rate": 4.833311983342292e-08, |
|
"logits/chosen": -2.031890869140625, |
|
"logits/rejected": -2.0176992416381836, |
|
"logps/chosen": -1.0929630994796753, |
|
"logps/rejected": -1.2075343132019043, |
|
"loss": 1.6422, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.1859261989593506, |
|
"rewards/margins": 0.22914230823516846, |
|
"rewards/rejected": -2.4150686264038086, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.41786743515850144, |
|
"grad_norm": 21.60868876989794, |
|
"learning_rate": 4.821839366319768e-08, |
|
"logits/chosen": -2.0339295864105225, |
|
"logits/rejected": -2.0323901176452637, |
|
"logps/chosen": -0.9861429929733276, |
|
"logps/rejected": -1.0666794776916504, |
|
"loss": 1.6891, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.9722859859466553, |
|
"rewards/margins": 0.16107279062271118, |
|
"rewards/rejected": -2.133358955383301, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4250720461095101, |
|
"grad_norm": 24.05826502769421, |
|
"learning_rate": 4.8099995166161536e-08, |
|
"logits/chosen": -2.0223276615142822, |
|
"logits/rejected": -2.022588014602661, |
|
"logps/chosen": -1.0257574319839478, |
|
"logps/rejected": -1.1649348735809326, |
|
"loss": 1.5961, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0515148639678955, |
|
"rewards/margins": 0.27835482358932495, |
|
"rewards/rejected": -2.3298697471618652, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4322766570605187, |
|
"grad_norm": 22.540205396639372, |
|
"learning_rate": 4.797794306875963e-08, |
|
"logits/chosen": -2.0817413330078125, |
|
"logits/rejected": -2.0777947902679443, |
|
"logps/chosen": -1.032037615776062, |
|
"logps/rejected": -1.1749569177627563, |
|
"loss": 1.597, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.064075231552124, |
|
"rewards/margins": 0.28583866357803345, |
|
"rewards/rejected": -2.3499138355255127, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.43948126801152737, |
|
"grad_norm": 24.185799296259226, |
|
"learning_rate": 4.785225667530716e-08, |
|
"logits/chosen": -2.0293679237365723, |
|
"logits/rejected": -2.019531488418579, |
|
"logps/chosen": -1.0911657810211182, |
|
"logps/rejected": -1.1501439809799194, |
|
"loss": 1.7163, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.1823315620422363, |
|
"rewards/margins": 0.11795620620250702, |
|
"rewards/rejected": -2.300287961959839, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.44668587896253603, |
|
"grad_norm": 28.363607554135946, |
|
"learning_rate": 4.772295586493613e-08, |
|
"logits/chosen": -2.0295448303222656, |
|
"logits/rejected": -2.0222747325897217, |
|
"logps/chosen": -0.9944518804550171, |
|
"logps/rejected": -1.082177758216858, |
|
"loss": 1.6763, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.9889037609100342, |
|
"rewards/margins": 0.1754516065120697, |
|
"rewards/rejected": -2.164355516433716, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4538904899135447, |
|
"grad_norm": 23.047596709189968, |
|
"learning_rate": 4.759006108845116e-08, |
|
"logits/chosen": -2.039217472076416, |
|
"logits/rejected": -2.0391504764556885, |
|
"logps/chosen": -1.0260940790176392, |
|
"logps/rejected": -1.1543083190917969, |
|
"loss": 1.6159, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0521881580352783, |
|
"rewards/margins": 0.2564285397529602, |
|
"rewards/rejected": -2.3086166381835938, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 18.761921757090445, |
|
"learning_rate": 4.7453593365094926e-08, |
|
"logits/chosen": -1.9543355703353882, |
|
"logits/rejected": -1.955255150794983, |
|
"logps/chosen": -1.0088173151016235, |
|
"logps/rejected": -1.116272211074829, |
|
"loss": 1.6458, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.017634630203247, |
|
"rewards/margins": 0.21490976214408875, |
|
"rewards/rejected": -2.232544422149658, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.46829971181556196, |
|
"grad_norm": 24.54554910010224, |
|
"learning_rate": 4.731357427922361e-08, |
|
"logits/chosen": -2.053588390350342, |
|
"logits/rejected": -2.038914442062378, |
|
"logps/chosen": -1.0363117456436157, |
|
"logps/rejected": -1.1169908046722412, |
|
"loss": 1.6897, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.0726234912872314, |
|
"rewards/margins": 0.1613583266735077, |
|
"rewards/rejected": -2.2339816093444824, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4755043227665706, |
|
"grad_norm": 26.37090068258741, |
|
"learning_rate": 4.71700259768931e-08, |
|
"logits/chosen": -2.032832145690918, |
|
"logits/rejected": -2.0338807106018066, |
|
"logps/chosen": -1.0859205722808838, |
|
"logps/rejected": -1.174154281616211, |
|
"loss": 1.6831, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.1718411445617676, |
|
"rewards/margins": 0.17646734416484833, |
|
"rewards/rejected": -2.348308563232422, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4827089337175792, |
|
"grad_norm": 24.073708164903586, |
|
"learning_rate": 4.7022971162356176e-08, |
|
"logits/chosen": -2.0062692165374756, |
|
"logits/rejected": -1.9969465732574463, |
|
"logps/chosen": -1.060937523841858, |
|
"logps/rejected": -1.1657707691192627, |
|
"loss": 1.6525, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.121875047683716, |
|
"rewards/margins": 0.20966656506061554, |
|
"rewards/rejected": -2.3315415382385254, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4899135446685879, |
|
"grad_norm": 21.794836443162072, |
|
"learning_rate": 4.6872433094471577e-08, |
|
"logits/chosen": -1.962633490562439, |
|
"logits/rejected": -1.953546166419983, |
|
"logps/chosen": -1.0352494716644287, |
|
"logps/rejected": -1.1061433553695679, |
|
"loss": 1.7057, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0704989433288574, |
|
"rewards/margins": 0.1417877972126007, |
|
"rewards/rejected": -2.2122867107391357, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.49711815561959655, |
|
"grad_norm": 29.899603243129143, |
|
"learning_rate": 4.671843558302522e-08, |
|
"logits/chosen": -2.025979518890381, |
|
"logits/rejected": -2.020822525024414, |
|
"logps/chosen": -1.1030082702636719, |
|
"logps/rejected": -1.2090809345245361, |
|
"loss": 1.6608, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.2060165405273438, |
|
"rewards/margins": 0.21214473247528076, |
|
"rewards/rejected": -2.4181618690490723, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5043227665706052, |
|
"grad_norm": 23.0987112187863, |
|
"learning_rate": 4.656100298496439e-08, |
|
"logits/chosen": -2.0181777477264404, |
|
"logits/rejected": -2.0126051902770996, |
|
"logps/chosen": -1.017418622970581, |
|
"logps/rejected": -1.1510635614395142, |
|
"loss": 1.6139, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.034837245941162, |
|
"rewards/margins": 0.267289936542511, |
|
"rewards/rejected": -2.3021271228790283, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5115273775216138, |
|
"grad_norm": 21.978629277918778, |
|
"learning_rate": 4.640016020054527e-08, |
|
"logits/chosen": -1.9824316501617432, |
|
"logits/rejected": -1.979188323020935, |
|
"logps/chosen": -0.8954793810844421, |
|
"logps/rejected": -1.0217931270599365, |
|
"loss": 1.6279, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.7909587621688843, |
|
"rewards/margins": 0.25262752175331116, |
|
"rewards/rejected": -2.043586254119873, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5187319884726225, |
|
"grad_norm": 21.932059439411073, |
|
"learning_rate": 4.6235932669394676e-08, |
|
"logits/chosen": -2.011847972869873, |
|
"logits/rejected": -2.0056471824645996, |
|
"logps/chosen": -1.0409430265426636, |
|
"logps/rejected": -1.1656509637832642, |
|
"loss": 1.6179, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.081886053085327, |
|
"rewards/margins": 0.24941587448120117, |
|
"rewards/rejected": -2.3313019275665283, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5259365994236311, |
|
"grad_norm": 23.306210789596093, |
|
"learning_rate": 4.6068346366486325e-08, |
|
"logits/chosen": -2.013566732406616, |
|
"logits/rejected": -2.002554178237915, |
|
"logps/chosen": -1.0203325748443604, |
|
"logps/rejected": -1.104425072669983, |
|
"loss": 1.6898, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0406651496887207, |
|
"rewards/margins": 0.168185293674469, |
|
"rewards/rejected": -2.208850145339966, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5331412103746398, |
|
"grad_norm": 20.259951331338133, |
|
"learning_rate": 4.589742779803259e-08, |
|
"logits/chosen": -1.9962953329086304, |
|
"logits/rejected": -2.003087043762207, |
|
"logps/chosen": -1.017392635345459, |
|
"logps/rejected": -1.0792462825775146, |
|
"loss": 1.7292, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -2.034785270690918, |
|
"rewards/margins": 0.12370713800191879, |
|
"rewards/rejected": -2.1584925651550293, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5403458213256485, |
|
"grad_norm": 26.41919083462638, |
|
"learning_rate": 4.5723203997292146e-08, |
|
"logits/chosen": -2.014768123626709, |
|
"logits/rejected": -2.009498119354248, |
|
"logps/chosen": -1.0986969470977783, |
|
"logps/rejected": -1.1911667585372925, |
|
"loss": 1.6761, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.1973938941955566, |
|
"rewards/margins": 0.18493981659412384, |
|
"rewards/rejected": -2.382333517074585, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.547550432276657, |
|
"grad_norm": 22.856174101882264, |
|
"learning_rate": 4.554570252029421e-08, |
|
"logits/chosen": -1.9752864837646484, |
|
"logits/rejected": -1.9790761470794678, |
|
"logps/chosen": -1.0023285150527954, |
|
"logps/rejected": -1.1136945486068726, |
|
"loss": 1.6506, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.004657030105591, |
|
"rewards/margins": 0.22273211181163788, |
|
"rewards/rejected": -2.227389097213745, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5547550432276657, |
|
"grad_norm": 24.809677260951396, |
|
"learning_rate": 4.536495144148021e-08, |
|
"logits/chosen": -1.9749062061309814, |
|
"logits/rejected": -1.9776439666748047, |
|
"logps/chosen": -0.974290668964386, |
|
"logps/rejected": -1.1291836500167847, |
|
"loss": 1.5873, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.948581337928772, |
|
"rewards/margins": 0.3097858428955078, |
|
"rewards/rejected": -2.2583673000335693, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5619596541786743, |
|
"grad_norm": 25.374366616475292, |
|
"learning_rate": 4.518097934926339e-08, |
|
"logits/chosen": -1.9943599700927734, |
|
"logits/rejected": -1.993017554283142, |
|
"logps/chosen": -1.0385878086090088, |
|
"logps/rejected": -1.1217256784439087, |
|
"loss": 1.681, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0771756172180176, |
|
"rewards/margins": 0.16627538204193115, |
|
"rewards/rejected": -2.2434513568878174, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.569164265129683, |
|
"grad_norm": 20.014037840378, |
|
"learning_rate": 4.499381534150714e-08, |
|
"logits/chosen": -2.0125200748443604, |
|
"logits/rejected": -2.0065932273864746, |
|
"logps/chosen": -1.0741461515426636, |
|
"logps/rejected": -1.2335011959075928, |
|
"loss": 1.5826, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.148292303085327, |
|
"rewards/margins": 0.31871041655540466, |
|
"rewards/rejected": -2.4670023918151855, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"grad_norm": 21.859452356308726, |
|
"learning_rate": 4.48034890209227e-08, |
|
"logits/chosen": -2.037662982940674, |
|
"logits/rejected": -2.0339465141296387, |
|
"logps/chosen": -1.0423095226287842, |
|
"logps/rejected": -1.1221187114715576, |
|
"loss": 1.6972, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0846190452575684, |
|
"rewards/margins": 0.1596187800168991, |
|
"rewards/rejected": -2.2442374229431152, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5835734870317003, |
|
"grad_norm": 18.638956420354962, |
|
"learning_rate": 4.4610030490387154e-08, |
|
"logits/chosen": -2.01869797706604, |
|
"logits/rejected": -2.0211963653564453, |
|
"logps/chosen": -1.0117915868759155, |
|
"logps/rejected": -1.1026709079742432, |
|
"loss": 1.6729, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.023583173751831, |
|
"rewards/margins": 0.18175864219665527, |
|
"rewards/rejected": -2.2053418159484863, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.590778097982709, |
|
"grad_norm": 24.338984987337728, |
|
"learning_rate": 4.4413470348182124e-08, |
|
"logits/chosen": -2.020242214202881, |
|
"logits/rejected": -2.0103182792663574, |
|
"logps/chosen": -1.0275824069976807, |
|
"logps/rejected": -1.1156085729599, |
|
"loss": 1.6885, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0551648139953613, |
|
"rewards/margins": 0.1760522723197937, |
|
"rewards/rejected": -2.2312171459198, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5979827089337176, |
|
"grad_norm": 21.286386599253383, |
|
"learning_rate": 4.421383968315427e-08, |
|
"logits/chosen": -2.000786781311035, |
|
"logits/rejected": -1.9987096786499023, |
|
"logps/chosen": -0.9581828117370605, |
|
"logps/rejected": -1.079776644706726, |
|
"loss": 1.6345, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.916365623474121, |
|
"rewards/margins": 0.24318790435791016, |
|
"rewards/rejected": -2.159553289413452, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6051873198847262, |
|
"grad_norm": 18.656428501584173, |
|
"learning_rate": 4.4011170069798126e-08, |
|
"logits/chosen": -2.02819561958313, |
|
"logits/rejected": -2.0282022953033447, |
|
"logps/chosen": -1.0859931707382202, |
|
"logps/rejected": -1.1599055528640747, |
|
"loss": 1.7042, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1719863414764404, |
|
"rewards/margins": 0.14782461524009705, |
|
"rewards/rejected": -2.3198111057281494, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6123919308357348, |
|
"grad_norm": 18.413528164156084, |
|
"learning_rate": 4.380549356326208e-08, |
|
"logits/chosen": -2.0375325679779053, |
|
"logits/rejected": -2.031755208969116, |
|
"logps/chosen": -1.0479528903961182, |
|
"logps/rejected": -1.1553928852081299, |
|
"loss": 1.6564, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0959057807922363, |
|
"rewards/margins": 0.21487931907176971, |
|
"rewards/rejected": -2.3107857704162598, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6195965417867435, |
|
"grad_norm": 20.693940592783264, |
|
"learning_rate": 4.359684269427848e-08, |
|
"logits/chosen": -2.009840726852417, |
|
"logits/rejected": -2.0057568550109863, |
|
"logps/chosen": -1.1601811647415161, |
|
"logps/rejected": -1.2186861038208008, |
|
"loss": 1.7248, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.3203623294830322, |
|
"rewards/margins": 0.11700980365276337, |
|
"rewards/rejected": -2.4373722076416016, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6268011527377522, |
|
"grad_norm": 23.036875621985878, |
|
"learning_rate": 4.3385250464018355e-08, |
|
"logits/chosen": -2.0459847450256348, |
|
"logits/rejected": -2.0400002002716064, |
|
"logps/chosen": -1.0027254819869995, |
|
"logps/rejected": -1.122941017150879, |
|
"loss": 1.6293, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.005450963973999, |
|
"rewards/margins": 0.2404308319091797, |
|
"rewards/rejected": -2.245882034301758, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6340057636887608, |
|
"grad_norm": 23.415166967310086, |
|
"learning_rate": 4.3170750338871806e-08, |
|
"logits/chosen": -2.034942626953125, |
|
"logits/rejected": -2.032872438430786, |
|
"logps/chosen": -1.0137414932250977, |
|
"logps/rejected": -1.0794751644134521, |
|
"loss": 1.7127, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0274829864501953, |
|
"rewards/margins": 0.13146750628948212, |
|
"rewards/rejected": -2.1589503288269043, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6412103746397695, |
|
"grad_norm": 21.97241223292353, |
|
"learning_rate": 4.295337624515485e-08, |
|
"logits/chosen": -2.0610389709472656, |
|
"logits/rejected": -2.0595154762268066, |
|
"logps/chosen": -1.014024019241333, |
|
"logps/rejected": -1.1112914085388184, |
|
"loss": 1.6585, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.028048038482666, |
|
"rewards/margins": 0.1945347934961319, |
|
"rewards/rejected": -2.2225828170776367, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6484149855907781, |
|
"grad_norm": 23.8540421436762, |
|
"learning_rate": 4.273316256374342e-08, |
|
"logits/chosen": -1.9776846170425415, |
|
"logits/rejected": -1.9790922403335571, |
|
"logps/chosen": -1.14115309715271, |
|
"logps/rejected": -1.2139250040054321, |
|
"loss": 1.7115, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.28230619430542, |
|
"rewards/margins": 0.14554361999034882, |
|
"rewards/rejected": -2.4278500080108643, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6556195965417867, |
|
"grad_norm": 22.88947778871445, |
|
"learning_rate": 4.2510144124635605e-08, |
|
"logits/chosen": -1.9872970581054688, |
|
"logits/rejected": -1.9910697937011719, |
|
"logps/chosen": -1.0412156581878662, |
|
"logps/rejected": -1.1080083847045898, |
|
"loss": 1.7088, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.0824313163757324, |
|
"rewards/margins": 0.1335853785276413, |
|
"rewards/rejected": -2.2160167694091797, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6628242074927954, |
|
"grad_norm": 25.19603603966565, |
|
"learning_rate": 4.22843562014427e-08, |
|
"logits/chosen": -2.016618490219116, |
|
"logits/rejected": -2.0112040042877197, |
|
"logps/chosen": -0.9714315533638, |
|
"logps/rejected": -1.0625754594802856, |
|
"loss": 1.675, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.9428631067276, |
|
"rewards/margins": 0.18228788673877716, |
|
"rewards/rejected": -2.1251509189605713, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.670028818443804, |
|
"grad_norm": 20.60480228984638, |
|
"learning_rate": 4.205583450581023e-08, |
|
"logits/chosen": -2.050994873046875, |
|
"logits/rejected": -2.0482630729675293, |
|
"logps/chosen": -1.0324729681015015, |
|
"logps/rejected": -1.1509991884231567, |
|
"loss": 1.6261, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.064945936203003, |
|
"rewards/margins": 0.23705241084098816, |
|
"rewards/rejected": -2.3019983768463135, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6772334293948127, |
|
"grad_norm": 22.35823019195503, |
|
"learning_rate": 4.1824615181769577e-08, |
|
"logits/chosen": -2.0053532123565674, |
|
"logits/rejected": -2.0111613273620605, |
|
"logps/chosen": -1.1246191263198853, |
|
"logps/rejected": -1.1943109035491943, |
|
"loss": 1.6997, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.2492382526397705, |
|
"rewards/margins": 0.13938355445861816, |
|
"rewards/rejected": -2.3886218070983887, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6844380403458213, |
|
"grad_norm": 18.162143432621694, |
|
"learning_rate": 4.1590734800021354e-08, |
|
"logits/chosen": -1.9661105871200562, |
|
"logits/rejected": -1.9702253341674805, |
|
"logps/chosen": -1.0093214511871338, |
|
"logps/rejected": -1.1327455043792725, |
|
"loss": 1.633, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0186429023742676, |
|
"rewards/margins": 0.24684815108776093, |
|
"rewards/rejected": -2.265491008758545, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.69164265129683, |
|
"grad_norm": 23.965182376610645, |
|
"learning_rate": 4.1354230352151143e-08, |
|
"logits/chosen": -2.041020154953003, |
|
"logits/rejected": -2.040239095687866, |
|
"logps/chosen": -1.0478591918945312, |
|
"logps/rejected": -1.1593987941741943, |
|
"loss": 1.6429, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0957183837890625, |
|
"rewards/margins": 0.223079115152359, |
|
"rewards/rejected": -2.3187975883483887, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6988472622478387, |
|
"grad_norm": 24.464714064797324, |
|
"learning_rate": 4.111513924477878e-08, |
|
"logits/chosen": -2.043121337890625, |
|
"logits/rejected": -2.0391170978546143, |
|
"logps/chosen": -0.9660174250602722, |
|
"logps/rejected": -1.0939273834228516, |
|
"loss": 1.6115, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.9320348501205444, |
|
"rewards/margins": 0.2558196187019348, |
|
"rewards/rejected": -2.187854766845703, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7060518731988472, |
|
"grad_norm": 20.201261103575792, |
|
"learning_rate": 4.087349929364192e-08, |
|
"logits/chosen": -1.9678367376327515, |
|
"logits/rejected": -1.9675403833389282, |
|
"logps/chosen": -1.1067652702331543, |
|
"logps/rejected": -1.2355201244354248, |
|
"loss": 1.6115, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.2135305404663086, |
|
"rewards/margins": 0.25750917196273804, |
|
"rewards/rejected": -2.4710402488708496, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7132564841498559, |
|
"grad_norm": 25.954821168667177, |
|
"learning_rate": 4.062934871761497e-08, |
|
"logits/chosen": -2.0314321517944336, |
|
"logits/rejected": -2.0284628868103027, |
|
"logps/chosen": -1.1079853773117065, |
|
"logps/rejected": -1.2067363262176514, |
|
"loss": 1.6694, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.215970754623413, |
|
"rewards/margins": 0.19750212132930756, |
|
"rewards/rejected": -2.4134726524353027, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7204610951008645, |
|
"grad_norm": 22.341732516108657, |
|
"learning_rate": 4.038272613266419e-08, |
|
"logits/chosen": -2.0092735290527344, |
|
"logits/rejected": -2.0061213970184326, |
|
"logps/chosen": -1.00548255443573, |
|
"logps/rejected": -1.1272741556167603, |
|
"loss": 1.6421, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.01096510887146, |
|
"rewards/margins": 0.24358315765857697, |
|
"rewards/rejected": -2.2545483112335205, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7276657060518732, |
|
"grad_norm": 26.857801922416726, |
|
"learning_rate": 4.0133670545740014e-08, |
|
"logits/chosen": -2.0216879844665527, |
|
"logits/rejected": -2.0182127952575684, |
|
"logps/chosen": -0.9998480677604675, |
|
"logps/rejected": -1.0946764945983887, |
|
"loss": 1.6911, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.999696135520935, |
|
"rewards/margins": 0.1896568387746811, |
|
"rewards/rejected": -2.1893529891967773, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7348703170028819, |
|
"grad_norm": 20.733064848722005, |
|
"learning_rate": 3.988222134860755e-08, |
|
"logits/chosen": -2.016014337539673, |
|
"logits/rejected": -2.0111701488494873, |
|
"logps/chosen": -1.0305395126342773, |
|
"logps/rejected": -1.1271092891693115, |
|
"loss": 1.6525, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0610790252685547, |
|
"rewards/margins": 0.19313934445381165, |
|
"rewards/rejected": -2.254218578338623, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7420749279538905, |
|
"grad_norm": 20.161762859239953, |
|
"learning_rate": 3.962841831161617e-08, |
|
"logits/chosen": -1.9683122634887695, |
|
"logits/rejected": -1.9676278829574585, |
|
"logps/chosen": -1.0196747779846191, |
|
"logps/rejected": -1.1555341482162476, |
|
"loss": 1.6269, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.0393495559692383, |
|
"rewards/margins": 0.2717186510562897, |
|
"rewards/rejected": -2.311068296432495, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7492795389048992, |
|
"grad_norm": 20.049038068269237, |
|
"learning_rate": 3.937230157740931e-08, |
|
"logits/chosen": -2.0240588188171387, |
|
"logits/rejected": -2.018101692199707, |
|
"logps/chosen": -1.0107640027999878, |
|
"logps/rejected": -1.0943820476531982, |
|
"loss": 1.6788, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.0215280055999756, |
|
"rewards/margins": 0.1672360599040985, |
|
"rewards/rejected": -2.1887640953063965, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7564841498559077, |
|
"grad_norm": 21.02175811345582, |
|
"learning_rate": 3.9113911654575246e-08, |
|
"logits/chosen": -1.967104196548462, |
|
"logits/rejected": -1.9632108211517334, |
|
"logps/chosen": -0.935411810874939, |
|
"logps/rejected": -1.0687172412872314, |
|
"loss": 1.6138, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.870823621749878, |
|
"rewards/margins": 0.2666108012199402, |
|
"rewards/rejected": -2.137434482574463, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7636887608069164, |
|
"grad_norm": 21.519122032376863, |
|
"learning_rate": 3.885328941124014e-08, |
|
"logits/chosen": -1.9991518259048462, |
|
"logits/rejected": -1.993080496788025, |
|
"logps/chosen": -1.0368106365203857, |
|
"logps/rejected": -1.1475738286972046, |
|
"loss": 1.6499, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0736212730407715, |
|
"rewards/margins": 0.2215261161327362, |
|
"rewards/rejected": -2.295147657394409, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.770893371757925, |
|
"grad_norm": 23.262898735103672, |
|
"learning_rate": 3.8590476068604106e-08, |
|
"logits/chosen": -2.00036358833313, |
|
"logits/rejected": -1.998552918434143, |
|
"logps/chosen": -1.071908712387085, |
|
"logps/rejected": -1.2022292613983154, |
|
"loss": 1.6331, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.14381742477417, |
|
"rewards/margins": 0.2606413960456848, |
|
"rewards/rejected": -2.404458522796631, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7780979827089337, |
|
"grad_norm": 24.381883140284064, |
|
"learning_rate": 3.832551319442151e-08, |
|
"logits/chosen": -2.025217056274414, |
|
"logits/rejected": -2.0259487628936768, |
|
"logps/chosen": -1.0844666957855225, |
|
"logps/rejected": -1.1969218254089355, |
|
"loss": 1.6485, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.168933391571045, |
|
"rewards/margins": 0.2249099314212799, |
|
"rewards/rejected": -2.393843650817871, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7853025936599424, |
|
"grad_norm": 27.86719418333446, |
|
"learning_rate": 3.8058442696426404e-08, |
|
"logits/chosen": -2.0382745265960693, |
|
"logits/rejected": -2.030484676361084, |
|
"logps/chosen": -1.1074187755584717, |
|
"logps/rejected": -1.211817979812622, |
|
"loss": 1.6631, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.2148375511169434, |
|
"rewards/margins": 0.20879819989204407, |
|
"rewards/rejected": -2.423635959625244, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.792507204610951, |
|
"grad_norm": 27.071495780290135, |
|
"learning_rate": 3.7789306815704216e-08, |
|
"logits/chosen": -2.026120185852051, |
|
"logits/rejected": -2.020829916000366, |
|
"logps/chosen": -1.0431629419326782, |
|
"logps/rejected": -1.1798484325408936, |
|
"loss": 1.6059, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0863258838653564, |
|
"rewards/margins": 0.27337145805358887, |
|
"rewards/rejected": -2.359696865081787, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7997118155619597, |
|
"grad_norm": 18.88096690716272, |
|
"learning_rate": 3.7518148120010705e-08, |
|
"logits/chosen": -2.0271174907684326, |
|
"logits/rejected": -2.0201258659362793, |
|
"logps/chosen": -1.0074636936187744, |
|
"logps/rejected": -1.1304162740707397, |
|
"loss": 1.6266, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.014927387237549, |
|
"rewards/margins": 0.24590542912483215, |
|
"rewards/rejected": -2.2608325481414795, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8069164265129684, |
|
"grad_norm": 21.911720790106997, |
|
"learning_rate": 3.7245009497039244e-08, |
|
"logits/chosen": -1.9778887033462524, |
|
"logits/rejected": -1.9702438116073608, |
|
"logps/chosen": -1.045611023902893, |
|
"logps/rejected": -1.1082854270935059, |
|
"loss": 1.7162, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.091222047805786, |
|
"rewards/margins": 0.1253490000963211, |
|
"rewards/rejected": -2.2165708541870117, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8141210374639769, |
|
"grad_norm": 20.41807103685389, |
|
"learning_rate": 3.696993414763753e-08, |
|
"logits/chosen": -2.0123400688171387, |
|
"logits/rejected": -2.0082285404205322, |
|
"logps/chosen": -0.9978957176208496, |
|
"logps/rejected": -1.085761308670044, |
|
"loss": 1.6839, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.9957914352416992, |
|
"rewards/margins": 0.17573121190071106, |
|
"rewards/rejected": -2.171522617340088, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8213256484149856, |
|
"grad_norm": 19.092407867541205, |
|
"learning_rate": 3.66929655789747e-08, |
|
"logits/chosen": -2.0528008937835693, |
|
"logits/rejected": -2.051527738571167, |
|
"logps/chosen": -1.046097993850708, |
|
"logps/rejected": -1.165477991104126, |
|
"loss": 1.6311, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.092195987701416, |
|
"rewards/margins": 0.23876003921031952, |
|
"rewards/rejected": -2.330955982208252, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8285302593659942, |
|
"grad_norm": 21.260673310622927, |
|
"learning_rate": 3.64141475976601e-08, |
|
"logits/chosen": -2.041018009185791, |
|
"logits/rejected": -2.0345215797424316, |
|
"logps/chosen": -1.0739690065383911, |
|
"logps/rejected": -1.1673578023910522, |
|
"loss": 1.6763, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.1479380130767822, |
|
"rewards/margins": 0.18677765130996704, |
|
"rewards/rejected": -2.3347156047821045, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8357348703170029, |
|
"grad_norm": 26.34484214118367, |
|
"learning_rate": 3.61335243028146e-08, |
|
"logits/chosen": -1.9964382648468018, |
|
"logits/rejected": -1.9950309991836548, |
|
"logps/chosen": -1.0375313758850098, |
|
"logps/rejected": -1.1169970035552979, |
|
"loss": 1.706, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0750627517700195, |
|
"rewards/margins": 0.15893153846263885, |
|
"rewards/rejected": -2.2339940071105957, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8429394812680115, |
|
"grad_norm": 24.32927550103409, |
|
"learning_rate": 3.585114007909562e-08, |
|
"logits/chosen": -1.9961084127426147, |
|
"logits/rejected": -1.9874632358551025, |
|
"logps/chosen": -1.0144343376159668, |
|
"logps/rejected": -1.1264407634735107, |
|
"loss": 1.6405, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.0288686752319336, |
|
"rewards/margins": 0.2240130603313446, |
|
"rewards/rejected": -2.2528815269470215, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8501440922190202, |
|
"grad_norm": 26.36978810424015, |
|
"learning_rate": 3.556703958967716e-08, |
|
"logits/chosen": -1.9620872735977173, |
|
"logits/rejected": -1.9580965042114258, |
|
"logps/chosen": -1.059852123260498, |
|
"logps/rejected": -1.1655702590942383, |
|
"loss": 1.6635, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.119704246520996, |
|
"rewards/margins": 0.21143603324890137, |
|
"rewards/rejected": -2.3311405181884766, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8573487031700289, |
|
"grad_norm": 20.25819384222818, |
|
"learning_rate": 3.528126776918559e-08, |
|
"logits/chosen": -2.0512845516204834, |
|
"logits/rejected": -2.0443384647369385, |
|
"logps/chosen": -1.071276068687439, |
|
"logps/rejected": -1.1480839252471924, |
|
"loss": 1.6952, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.142552137374878, |
|
"rewards/margins": 0.15361574292182922, |
|
"rewards/rejected": -2.2961678504943848, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8645533141210374, |
|
"grad_norm": 25.590655270691236, |
|
"learning_rate": 3.499386981659262e-08, |
|
"logits/chosen": -1.9815905094146729, |
|
"logits/rejected": -1.9693584442138672, |
|
"logps/chosen": -1.086232304573059, |
|
"logps/rejected": -1.1749858856201172, |
|
"loss": 1.6708, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.172464609146118, |
|
"rewards/margins": 0.17750723659992218, |
|
"rewards/rejected": -2.3499717712402344, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8717579250720461, |
|
"grad_norm": 22.74687405755938, |
|
"learning_rate": 3.47048911880664e-08, |
|
"logits/chosen": -1.971374750137329, |
|
"logits/rejected": -1.979832410812378, |
|
"logps/chosen": -0.9375821352005005, |
|
"logps/rejected": -1.0928103923797607, |
|
"loss": 1.5833, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.875164270401001, |
|
"rewards/margins": 0.31045612692832947, |
|
"rewards/rejected": -2.1856207847595215, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8789625360230547, |
|
"grad_norm": 20.348332311917044, |
|
"learning_rate": 3.4414377589782e-08, |
|
"logits/chosen": -1.998797059059143, |
|
"logits/rejected": -1.9944807291030884, |
|
"logps/chosen": -1.008535623550415, |
|
"logps/rejected": -1.1152924299240112, |
|
"loss": 1.6564, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.01707124710083, |
|
"rewards/margins": 0.21351365745067596, |
|
"rewards/rejected": -2.2305848598480225, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8861671469740634, |
|
"grad_norm": 20.10685961084486, |
|
"learning_rate": 3.412237497069226e-08, |
|
"logits/chosen": -1.9737958908081055, |
|
"logits/rejected": -1.9617822170257568, |
|
"logps/chosen": -0.9835951924324036, |
|
"logps/rejected": -1.0770342350006104, |
|
"loss": 1.6699, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9671903848648071, |
|
"rewards/margins": 0.1868787556886673, |
|
"rewards/rejected": -2.1540684700012207, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8933717579250721, |
|
"grad_norm": 24.001680453622406, |
|
"learning_rate": 3.382892951526036e-08, |
|
"logits/chosen": -1.9840329885482788, |
|
"logits/rejected": -1.9820177555084229, |
|
"logps/chosen": -0.9354456067085266, |
|
"logps/rejected": -1.0733263492584229, |
|
"loss": 1.6076, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.8708912134170532, |
|
"rewards/margins": 0.2757616937160492, |
|
"rewards/rejected": -2.1466526985168457, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9005763688760807, |
|
"grad_norm": 21.463794442952526, |
|
"learning_rate": 3.353408763615502e-08, |
|
"logits/chosen": -2.019768238067627, |
|
"logits/rejected": -2.0219955444335938, |
|
"logps/chosen": -1.0638011693954468, |
|
"logps/rejected": -1.219588041305542, |
|
"loss": 1.5926, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.1276023387908936, |
|
"rewards/margins": 0.31157371401786804, |
|
"rewards/rejected": -2.439176082611084, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9077809798270894, |
|
"grad_norm": 28.944721871130742, |
|
"learning_rate": 3.323789596690971e-08, |
|
"logits/chosen": -2.0201096534729004, |
|
"logits/rejected": -2.02502703666687, |
|
"logps/chosen": -1.1166651248931885, |
|
"logps/rejected": -1.2446470260620117, |
|
"loss": 1.6227, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.233330249786377, |
|
"rewards/margins": 0.25596344470977783, |
|
"rewards/rejected": -2.4892940521240234, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9149855907780979, |
|
"grad_norm": 20.87871503781233, |
|
"learning_rate": 3.294040135454681e-08, |
|
"logits/chosen": -1.9817100763320923, |
|
"logits/rejected": -1.975229024887085, |
|
"logps/chosen": -0.9639909863471985, |
|
"logps/rejected": -1.0865840911865234, |
|
"loss": 1.6229, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.927981972694397, |
|
"rewards/margins": 0.2451862096786499, |
|
"rewards/rejected": -2.173168182373047, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 24.375366521893024, |
|
"learning_rate": 3.264165085216817e-08, |
|
"logits/chosen": -2.073403835296631, |
|
"logits/rejected": -2.0691773891448975, |
|
"logps/chosen": -1.0255587100982666, |
|
"logps/rejected": -1.1626403331756592, |
|
"loss": 1.6102, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.051117420196533, |
|
"rewards/margins": 0.2741633653640747, |
|
"rewards/rejected": -2.3252806663513184, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9293948126801153, |
|
"grad_norm": 19.708714819889934, |
|
"learning_rate": 3.2341691711512854e-08, |
|
"logits/chosen": -2.0348494052886963, |
|
"logits/rejected": -2.0337963104248047, |
|
"logps/chosen": -0.993812084197998, |
|
"logps/rejected": -1.0996659994125366, |
|
"loss": 1.648, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.987624168395996, |
|
"rewards/margins": 0.2117079198360443, |
|
"rewards/rejected": -2.1993319988250732, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9365994236311239, |
|
"grad_norm": 25.666809748463557, |
|
"learning_rate": 3.204057137548371e-08, |
|
"logits/chosen": -2.0750679969787598, |
|
"logits/rejected": -2.077117919921875, |
|
"logps/chosen": -1.0476573705673218, |
|
"logps/rejected": -1.1119945049285889, |
|
"loss": 1.7181, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.0953147411346436, |
|
"rewards/margins": 0.12867406010627747, |
|
"rewards/rejected": -2.2239890098571777, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9438040345821326, |
|
"grad_norm": 24.64906958910084, |
|
"learning_rate": 3.173833747064351e-08, |
|
"logits/chosen": -2.0429582595825195, |
|
"logits/rejected": -2.0435373783111572, |
|
"logps/chosen": -0.9831833839416504, |
|
"logps/rejected": -1.0801106691360474, |
|
"loss": 1.6616, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9663667678833008, |
|
"rewards/margins": 0.19385461509227753, |
|
"rewards/rejected": -2.1602213382720947, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9510086455331412, |
|
"grad_norm": 24.10994908767819, |
|
"learning_rate": 3.143503779968213e-08, |
|
"logits/chosen": -2.0116159915924072, |
|
"logits/rejected": -2.005117893218994, |
|
"logps/chosen": -1.0751299858093262, |
|
"logps/rejected": -1.2202341556549072, |
|
"loss": 1.594, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1502599716186523, |
|
"rewards/margins": 0.29020795226097107, |
|
"rewards/rejected": -2.4404683113098145, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9582132564841499, |
|
"grad_norm": 17.63664818825136, |
|
"learning_rate": 3.113072033385589e-08, |
|
"logits/chosen": -2.0390655994415283, |
|
"logits/rejected": -2.0347390174865723, |
|
"logps/chosen": -1.0593435764312744, |
|
"logps/rejected": -1.1827442646026611, |
|
"loss": 1.6315, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.118687152862549, |
|
"rewards/margins": 0.24680104851722717, |
|
"rewards/rejected": -2.3654885292053223, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9654178674351584, |
|
"grad_norm": 29.367574205793645, |
|
"learning_rate": 3.082543320540015e-08, |
|
"logits/chosen": -2.010270357131958, |
|
"logits/rejected": -2.0109667778015137, |
|
"logps/chosen": -1.0154287815093994, |
|
"logps/rejected": -1.132817029953003, |
|
"loss": 1.641, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.030857563018799, |
|
"rewards/margins": 0.23477670550346375, |
|
"rewards/rejected": -2.265634059906006, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9726224783861671, |
|
"grad_norm": 18.34859679144091, |
|
"learning_rate": 3.051922469991655e-08, |
|
"logits/chosen": -1.9400978088378906, |
|
"logits/rejected": -1.9382463693618774, |
|
"logps/chosen": -1.0125986337661743, |
|
"logps/rejected": -1.087135910987854, |
|
"loss": 1.7066, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.0251972675323486, |
|
"rewards/margins": 0.149074524641037, |
|
"rewards/rejected": -2.174271821975708, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9798270893371758, |
|
"grad_norm": 18.971281667579635, |
|
"learning_rate": 3.0212143248735886e-08, |
|
"logits/chosen": -1.9810125827789307, |
|
"logits/rejected": -1.9789069890975952, |
|
"logps/chosen": -0.961616039276123, |
|
"logps/rejected": -1.0554723739624023, |
|
"loss": 1.676, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.923232078552246, |
|
"rewards/margins": 0.18771259486675262, |
|
"rewards/rejected": -2.1109447479248047, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9870317002881844, |
|
"grad_norm": 24.96298831746159, |
|
"learning_rate": 2.9904237421258046e-08, |
|
"logits/chosen": -2.00824236869812, |
|
"logits/rejected": -2.0035648345947266, |
|
"logps/chosen": -0.9997411966323853, |
|
"logps/rejected": -1.1224539279937744, |
|
"loss": 1.6216, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9994823932647705, |
|
"rewards/margins": 0.24542562663555145, |
|
"rewards/rejected": -2.244907855987549, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9942363112391931, |
|
"grad_norm": 23.916004353098554, |
|
"learning_rate": 2.9595555917269997e-08, |
|
"logits/chosen": -1.9720462560653687, |
|
"logits/rejected": -1.968483328819275, |
|
"logps/chosen": -1.0500491857528687, |
|
"logps/rejected": -1.1262010335922241, |
|
"loss": 1.6923, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.1000983715057373, |
|
"rewards/margins": 0.1523038148880005, |
|
"rewards/rejected": -2.2524020671844482, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0014409221902016, |
|
"grad_norm": 31.544680289629245, |
|
"learning_rate": 2.928614755924327e-08, |
|
"logits/chosen": -2.049835443496704, |
|
"logits/rejected": -2.050297737121582, |
|
"logps/chosen": -1.0141699314117432, |
|
"logps/rejected": -1.12843656539917, |
|
"loss": 1.645, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0283398628234863, |
|
"rewards/margins": 0.22853314876556396, |
|
"rewards/rejected": -2.25687313079834, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.0086455331412103, |
|
"grad_norm": 22.171264495293624, |
|
"learning_rate": 2.8976061284611908e-08, |
|
"logits/chosen": -1.9490327835083008, |
|
"logits/rejected": -1.9464311599731445, |
|
"logps/chosen": -1.058870553970337, |
|
"logps/rejected": -1.195708990097046, |
|
"loss": 1.627, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.117741107940674, |
|
"rewards/margins": 0.27367717027664185, |
|
"rewards/rejected": -2.391417980194092, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.015850144092219, |
|
"grad_norm": 26.077670247013415, |
|
"learning_rate": 2.8665346138032327e-08, |
|
"logits/chosen": -1.9846904277801514, |
|
"logits/rejected": -1.9889650344848633, |
|
"logps/chosen": -1.0112468004226685, |
|
"logps/rejected": -1.1389497518539429, |
|
"loss": 1.6326, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.022493600845337, |
|
"rewards/margins": 0.2554059326648712, |
|
"rewards/rejected": -2.2778995037078857, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0230547550432276, |
|
"grad_norm": 21.678779034806855, |
|
"learning_rate": 2.8354051263626227e-08, |
|
"logits/chosen": -2.014312505722046, |
|
"logits/rejected": -2.0092453956604004, |
|
"logps/chosen": -1.1152961254119873, |
|
"logps/rejected": -1.2331750392913818, |
|
"loss": 1.6569, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.2305922508239746, |
|
"rewards/margins": 0.23575782775878906, |
|
"rewards/rejected": -2.4663500785827637, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0302593659942363, |
|
"grad_norm": 24.392573956049755, |
|
"learning_rate": 2.8042225897207648e-08, |
|
"logits/chosen": -2.05131196975708, |
|
"logits/rejected": -2.0500950813293457, |
|
"logps/chosen": -0.9399210214614868, |
|
"logps/rejected": -1.0278102159500122, |
|
"loss": 1.6851, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.8798420429229736, |
|
"rewards/margins": 0.17577846348285675, |
|
"rewards/rejected": -2.0556204319000244, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.037463976945245, |
|
"grad_norm": 26.584656132405183, |
|
"learning_rate": 2.7729919358495728e-08, |
|
"logits/chosen": -2.0092933177948, |
|
"logits/rejected": -2.0025553703308105, |
|
"logps/chosen": -1.1368526220321655, |
|
"logps/rejected": -1.2202892303466797, |
|
"loss": 1.6972, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.273705244064331, |
|
"rewards/margins": 0.16687336564064026, |
|
"rewards/rejected": -2.4405784606933594, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.0446685878962536, |
|
"grad_norm": 19.430388178160936, |
|
"learning_rate": 2.741718104331393e-08, |
|
"logits/chosen": -2.06870698928833, |
|
"logits/rejected": -2.0779881477355957, |
|
"logps/chosen": -0.9939098358154297, |
|
"logps/rejected": -1.1466522216796875, |
|
"loss": 1.593, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9878196716308594, |
|
"rewards/margins": 0.30548471212387085, |
|
"rewards/rejected": -2.293304443359375, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0518731988472623, |
|
"grad_norm": 19.07915573550666, |
|
"learning_rate": 2.710406041577751e-08, |
|
"logits/chosen": -1.9887897968292236, |
|
"logits/rejected": -1.9764807224273682, |
|
"logps/chosen": -0.9991080164909363, |
|
"logps/rejected": -1.1605703830718994, |
|
"loss": 1.568, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9982160329818726, |
|
"rewards/margins": 0.3229246735572815, |
|
"rewards/rejected": -2.321140766143799, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.059077809798271, |
|
"grad_norm": 19.453172319867473, |
|
"learning_rate": 2.679060700046994e-08, |
|
"logits/chosen": -2.0260889530181885, |
|
"logits/rejected": -2.0165085792541504, |
|
"logps/chosen": -0.9590311050415039, |
|
"logps/rejected": -1.092279076576233, |
|
"loss": 1.6143, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.9180622100830078, |
|
"rewards/margins": 0.266495943069458, |
|
"rewards/rejected": -2.184558153152466, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.0662824207492796, |
|
"grad_norm": 19.938817408083903, |
|
"learning_rate": 2.647687037460996e-08, |
|
"logits/chosen": -1.9772266149520874, |
|
"logits/rejected": -1.9762458801269531, |
|
"logps/chosen": -1.005311369895935, |
|
"logps/rejected": -1.1241614818572998, |
|
"loss": 1.6424, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.01062273979187, |
|
"rewards/margins": 0.23770026862621307, |
|
"rewards/rejected": -2.2483229637145996, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.0734870317002883, |
|
"grad_norm": 22.895606799094736, |
|
"learning_rate": 2.616290016021016e-08, |
|
"logits/chosen": -1.9965251684188843, |
|
"logits/rejected": -1.993642807006836, |
|
"logps/chosen": -1.1106324195861816, |
|
"logps/rejected": -1.1621644496917725, |
|
"loss": 1.736, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.2212648391723633, |
|
"rewards/margins": 0.10306410491466522, |
|
"rewards/rejected": -2.324328899383545, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.080691642651297, |
|
"grad_norm": 21.445560487885672, |
|
"learning_rate": 2.584874601622854e-08, |
|
"logits/chosen": -1.999669075012207, |
|
"logits/rejected": -1.986670732498169, |
|
"logps/chosen": -1.0090255737304688, |
|
"logps/rejected": -1.12135910987854, |
|
"loss": 1.6376, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0180511474609375, |
|
"rewards/margins": 0.2246670424938202, |
|
"rewards/rejected": -2.24271821975708, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0878962536023056, |
|
"grad_norm": 20.277040631917046, |
|
"learning_rate": 2.5534457630714267e-08, |
|
"logits/chosen": -2.044276714324951, |
|
"logits/rejected": -2.0462608337402344, |
|
"logps/chosen": -1.014711618423462, |
|
"logps/rejected": -1.1253163814544678, |
|
"loss": 1.65, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.029423236846924, |
|
"rewards/margins": 0.22120928764343262, |
|
"rewards/rejected": -2.2506327629089355, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.0951008645533142, |
|
"grad_norm": 18.830969455717604, |
|
"learning_rate": 2.5220084712948764e-08, |
|
"logits/chosen": -1.985337495803833, |
|
"logits/rejected": -1.9856802225112915, |
|
"logps/chosen": -1.097121000289917, |
|
"logps/rejected": -1.1095194816589355, |
|
"loss": 1.7974, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -2.194242000579834, |
|
"rewards/margins": 0.024796944111585617, |
|
"rewards/rejected": -2.219038963317871, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.1023054755043227, |
|
"grad_norm": 20.22397260920369, |
|
"learning_rate": 2.490567698558343e-08, |
|
"logits/chosen": -2.030097723007202, |
|
"logits/rejected": -2.0213112831115723, |
|
"logps/chosen": -0.9496325254440308, |
|
"logps/rejected": -1.1169651746749878, |
|
"loss": 1.5633, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.8992650508880615, |
|
"rewards/margins": 0.3346652686595917, |
|
"rewards/rejected": -2.2339303493499756, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.1095100864553313, |
|
"grad_norm": 27.8479778302733, |
|
"learning_rate": 2.4591284176775326e-08, |
|
"logits/chosen": -2.033017158508301, |
|
"logits/rejected": -2.0267956256866455, |
|
"logps/chosen": -1.0237690210342407, |
|
"logps/rejected": -1.148371934890747, |
|
"loss": 1.6197, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.0475380420684814, |
|
"rewards/margins": 0.24920621514320374, |
|
"rewards/rejected": -2.296743869781494, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.11671469740634, |
|
"grad_norm": 21.41619902791393, |
|
"learning_rate": 2.4276956012321926e-08, |
|
"logits/chosen": -2.0108845233917236, |
|
"logits/rejected": -2.0055813789367676, |
|
"logps/chosen": -1.026641607284546, |
|
"logps/rejected": -1.133044719696045, |
|
"loss": 1.6573, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.053283214569092, |
|
"rewards/margins": 0.21280638873577118, |
|
"rewards/rejected": -2.26608943939209, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1239193083573487, |
|
"grad_norm": 29.804837301654878, |
|
"learning_rate": 2.3962742207796268e-08, |
|
"logits/chosen": -2.0699496269226074, |
|
"logits/rejected": -2.0638415813446045, |
|
"logps/chosen": -1.0469386577606201, |
|
"logps/rejected": -1.1839386224746704, |
|
"loss": 1.6112, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0938773155212402, |
|
"rewards/margins": 0.2739998698234558, |
|
"rewards/rejected": -2.367877244949341, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.1311239193083573, |
|
"grad_norm": 19.072257470295916, |
|
"learning_rate": 2.364869246068368e-08, |
|
"logits/chosen": -2.049614429473877, |
|
"logits/rejected": -2.047759771347046, |
|
"logps/chosen": -1.0033305883407593, |
|
"logps/rejected": -1.1312639713287354, |
|
"loss": 1.6191, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0066611766815186, |
|
"rewards/margins": 0.255866676568985, |
|
"rewards/rejected": -2.2625279426574707, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.138328530259366, |
|
"grad_norm": 35.24129814573759, |
|
"learning_rate": 2.3334856442521435e-08, |
|
"logits/chosen": -2.021381139755249, |
|
"logits/rejected": -2.0155997276306152, |
|
"logps/chosen": -1.0470027923583984, |
|
"logps/rejected": -1.1944993734359741, |
|
"loss": 1.6022, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.094005584716797, |
|
"rewards/margins": 0.29499319195747375, |
|
"rewards/rejected": -2.3889987468719482, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.1455331412103746, |
|
"grad_norm": 19.787153457776164, |
|
"learning_rate": 2.3021283791042474e-08, |
|
"logits/chosen": -1.9919068813323975, |
|
"logits/rejected": -1.9873685836791992, |
|
"logps/chosen": -0.9659102559089661, |
|
"logps/rejected": -1.1006077527999878, |
|
"loss": 1.6015, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.9318205118179321, |
|
"rewards/margins": 0.26939502358436584, |
|
"rewards/rejected": -2.2012155055999756, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.1527377521613833, |
|
"grad_norm": 24.041299463029606, |
|
"learning_rate": 2.2708024102324454e-08, |
|
"logits/chosen": -2.0335323810577393, |
|
"logits/rejected": -2.0249056816101074, |
|
"logps/chosen": -1.0667665004730225, |
|
"logps/rejected": -1.1801570653915405, |
|
"loss": 1.6442, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.133533000946045, |
|
"rewards/margins": 0.2267809808254242, |
|
"rewards/rejected": -2.360314130783081, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.159942363112392, |
|
"grad_norm": 24.12111757342678, |
|
"learning_rate": 2.23951269229454e-08, |
|
"logits/chosen": -1.9858362674713135, |
|
"logits/rejected": -1.9893901348114014, |
|
"logps/chosen": -1.0084689855575562, |
|
"logps/rejected": -1.100411295890808, |
|
"loss": 1.6765, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0169379711151123, |
|
"rewards/margins": 0.18388447165489197, |
|
"rewards/rejected": -2.200822591781616, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.1671469740634006, |
|
"grad_norm": 24.80127928292751, |
|
"learning_rate": 2.2082641742147238e-08, |
|
"logits/chosen": -2.059333562850952, |
|
"logits/rejected": -2.060598373413086, |
|
"logps/chosen": -1.0564236640930176, |
|
"logps/rejected": -1.1844590902328491, |
|
"loss": 1.6208, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.112847328186035, |
|
"rewards/margins": 0.25607064366340637, |
|
"rewards/rejected": -2.3689181804656982, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.1743515850144093, |
|
"grad_norm": 20.13313537520615, |
|
"learning_rate": 2.177061798400832e-08, |
|
"logits/chosen": -1.9521719217300415, |
|
"logits/rejected": -1.9484403133392334, |
|
"logps/chosen": -1.0265686511993408, |
|
"logps/rejected": -1.0971999168395996, |
|
"loss": 1.7039, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0531373023986816, |
|
"rewards/margins": 0.14126275479793549, |
|
"rewards/rejected": -2.194399833679199, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.181556195965418, |
|
"grad_norm": 19.495579780217277, |
|
"learning_rate": 2.145910499962628e-08, |
|
"logits/chosen": -1.9787580966949463, |
|
"logits/rejected": -1.9796241521835327, |
|
"logps/chosen": -1.0164722204208374, |
|
"logps/rejected": -1.1089410781860352, |
|
"loss": 1.6796, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.032944440841675, |
|
"rewards/margins": 0.1849372386932373, |
|
"rewards/rejected": -2.2178821563720703, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.1887608069164266, |
|
"grad_norm": 25.48988945819277, |
|
"learning_rate": 2.1148152059312437e-08, |
|
"logits/chosen": -2.0072319507598877, |
|
"logits/rejected": -2.0051167011260986, |
|
"logps/chosen": -1.005936861038208, |
|
"logps/rejected": -1.077120304107666, |
|
"loss": 1.707, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.011873722076416, |
|
"rewards/margins": 0.14236697554588318, |
|
"rewards/rejected": -2.154240608215332, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.195965417867435, |
|
"grad_norm": 21.152990734847933, |
|
"learning_rate": 2.0837808344799028e-08, |
|
"logits/chosen": -1.9343931674957275, |
|
"logits/rejected": -1.938122034072876, |
|
"logps/chosen": -0.9725497961044312, |
|
"logps/rejected": -1.0786354541778564, |
|
"loss": 1.659, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9450995922088623, |
|
"rewards/margins": 0.21217119693756104, |
|
"rewards/rejected": -2.157270908355713, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.2031700288184437, |
|
"grad_norm": 26.377266925973135, |
|
"learning_rate": 2.052812294146033e-08, |
|
"logits/chosen": -2.03047776222229, |
|
"logits/rejected": -2.027374744415283, |
|
"logps/chosen": -1.0395774841308594, |
|
"logps/rejected": -1.1767067909240723, |
|
"loss": 1.6092, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0791549682617188, |
|
"rewards/margins": 0.27425867319107056, |
|
"rewards/rejected": -2.3534135818481445, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.2103746397694524, |
|
"grad_norm": 23.34792034910019, |
|
"learning_rate": 2.0219144830549163e-08, |
|
"logits/chosen": -1.971253752708435, |
|
"logits/rejected": -1.9633668661117554, |
|
"logps/chosen": -1.011054515838623, |
|
"logps/rejected": -1.148241400718689, |
|
"loss": 1.6033, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.022109031677246, |
|
"rewards/margins": 0.2743736207485199, |
|
"rewards/rejected": -2.296482801437378, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.217579250720461, |
|
"grad_norm": 21.51470689698241, |
|
"learning_rate": 1.9910922881449716e-08, |
|
"logits/chosen": -2.011819362640381, |
|
"logits/rejected": -2.01347017288208, |
|
"logps/chosen": -1.0252724885940552, |
|
"logps/rejected": -1.1493511199951172, |
|
"loss": 1.6223, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0505449771881104, |
|
"rewards/margins": 0.2481573075056076, |
|
"rewards/rejected": -2.2987022399902344, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.2247838616714697, |
|
"grad_norm": 26.201233097415667, |
|
"learning_rate": 1.9603505843948214e-08, |
|
"logits/chosen": -1.9847558736801147, |
|
"logits/rejected": -1.9857571125030518, |
|
"logps/chosen": -1.078355073928833, |
|
"logps/rejected": -1.2002760171890259, |
|
"loss": 1.6286, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.156710147857666, |
|
"rewards/margins": 0.2438419610261917, |
|
"rewards/rejected": -2.4005520343780518, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.2319884726224783, |
|
"grad_norm": 23.36257553020346, |
|
"learning_rate": 1.929694234052239e-08, |
|
"logits/chosen": -2.0325675010681152, |
|
"logits/rejected": -2.021353244781494, |
|
"logps/chosen": -0.9391233325004578, |
|
"logps/rejected": -1.0919990539550781, |
|
"loss": 1.5888, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.8782466650009155, |
|
"rewards/margins": 0.3057512640953064, |
|
"rewards/rejected": -2.1839981079101562, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.239193083573487, |
|
"grad_norm": 19.47841832843257, |
|
"learning_rate": 1.8991280858651157e-08, |
|
"logits/chosen": -1.9777787923812866, |
|
"logits/rejected": -1.9743425846099854, |
|
"logps/chosen": -0.9831492304801941, |
|
"logps/rejected": -1.1153368949890137, |
|
"loss": 1.6254, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9662984609603882, |
|
"rewards/margins": 0.2643755078315735, |
|
"rewards/rejected": -2.2306737899780273, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.2463976945244957, |
|
"grad_norm": 29.21200511886615, |
|
"learning_rate": 1.868656974314557e-08, |
|
"logits/chosen": -2.0204837322235107, |
|
"logits/rejected": -2.0202364921569824, |
|
"logps/chosen": -1.032915711402893, |
|
"logps/rejected": -1.1591503620147705, |
|
"loss": 1.6285, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.065831422805786, |
|
"rewards/margins": 0.2524695098400116, |
|
"rewards/rejected": -2.318300724029541, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.2536023054755043, |
|
"grad_norm": 19.748132847469815, |
|
"learning_rate": 1.8382857188502422e-08, |
|
"logits/chosen": -2.005788803100586, |
|
"logits/rejected": -2.0102851390838623, |
|
"logps/chosen": -1.0914397239685059, |
|
"logps/rejected": -1.2216801643371582, |
|
"loss": 1.6228, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1828794479370117, |
|
"rewards/margins": 0.26048025488853455, |
|
"rewards/rejected": -2.4433603286743164, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.260806916426513, |
|
"grad_norm": 21.739700200932262, |
|
"learning_rate": 1.8080191231281594e-08, |
|
"logits/chosen": -1.9720125198364258, |
|
"logits/rejected": -1.9585212469100952, |
|
"logps/chosen": -1.0438520908355713, |
|
"logps/rejected": -1.120289921760559, |
|
"loss": 1.7131, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.0877041816711426, |
|
"rewards/margins": 0.15287601947784424, |
|
"rewards/rejected": -2.240579843521118, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2680115273775217, |
|
"grad_norm": 22.41910419325543, |
|
"learning_rate": 1.7778619742508345e-08, |
|
"logits/chosen": -2.0582499504089355, |
|
"logits/rejected": -2.063490390777588, |
|
"logps/chosen": -1.131137728691101, |
|
"logps/rejected": -1.2315350770950317, |
|
"loss": 1.6769, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.262275457382202, |
|
"rewards/margins": 0.20079448819160461, |
|
"rewards/rejected": -2.4630701541900635, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.2752161383285303, |
|
"grad_norm": 21.482297600422083, |
|
"learning_rate": 1.7478190420101796e-08, |
|
"logits/chosen": -2.036742687225342, |
|
"logits/rejected": -2.032109260559082, |
|
"logps/chosen": -1.050010085105896, |
|
"logps/rejected": -1.183793544769287, |
|
"loss": 1.6195, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.100020170211792, |
|
"rewards/margins": 0.2675670385360718, |
|
"rewards/rejected": -2.367587089538574, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.282420749279539, |
|
"grad_norm": 27.784893259936197, |
|
"learning_rate": 1.717895078133088e-08, |
|
"logits/chosen": -2.0239245891571045, |
|
"logits/rejected": -2.0181150436401367, |
|
"logps/chosen": -1.1388823986053467, |
|
"logps/rejected": -1.2011306285858154, |
|
"loss": 1.7186, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.2777647972106934, |
|
"rewards/margins": 0.12449675798416138, |
|
"rewards/rejected": -2.402261257171631, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.2896253602305476, |
|
"grad_norm": 15.507226212932258, |
|
"learning_rate": 1.688094815529873e-08, |
|
"logits/chosen": -1.978154182434082, |
|
"logits/rejected": -1.9796226024627686, |
|
"logps/chosen": -0.9750539660453796, |
|
"logps/rejected": -1.0688438415527344, |
|
"loss": 1.6649, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9501079320907593, |
|
"rewards/margins": 0.18757975101470947, |
|
"rewards/rejected": -2.1376876831054688, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.2968299711815563, |
|
"grad_norm": 23.291152738627, |
|
"learning_rate": 1.658422967545693e-08, |
|
"logits/chosen": -2.0640625953674316, |
|
"logits/rejected": -2.0586647987365723, |
|
"logps/chosen": -1.0171083211898804, |
|
"logps/rejected": -1.207260251045227, |
|
"loss": 1.5423, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0342166423797607, |
|
"rewards/margins": 0.38030365109443665, |
|
"rewards/rejected": -2.414520502090454, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.304034582132565, |
|
"grad_norm": 24.79688067458518, |
|
"learning_rate": 1.6288842272150614e-08, |
|
"logits/chosen": -1.9829915761947632, |
|
"logits/rejected": -1.9848480224609375, |
|
"logps/chosen": -0.9952167272567749, |
|
"logps/rejected": -1.1160945892333984, |
|
"loss": 1.6428, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9904334545135498, |
|
"rewards/margins": 0.24175576865673065, |
|
"rewards/rejected": -2.232189178466797, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.3112391930835736, |
|
"grad_norm": 31.41860730573992, |
|
"learning_rate": 1.5994832665195853e-08, |
|
"logits/chosen": -2.014812469482422, |
|
"logits/rejected": -2.0102453231811523, |
|
"logps/chosen": -0.9722223281860352, |
|
"logps/rejected": -1.119134545326233, |
|
"loss": 1.5972, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9444446563720703, |
|
"rewards/margins": 0.29382452368736267, |
|
"rewards/rejected": -2.238269090652466, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.318443804034582, |
|
"grad_norm": 18.540859576791913, |
|
"learning_rate": 1.5702247356490134e-08, |
|
"logits/chosen": -1.985517144203186, |
|
"logits/rejected": -1.9945405721664429, |
|
"logps/chosen": -1.0170261859893799, |
|
"logps/rejected": -1.1490360498428345, |
|
"loss": 1.6267, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0340523719787598, |
|
"rewards/margins": 0.2640196681022644, |
|
"rewards/rejected": -2.298072099685669, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.3256484149855907, |
|
"grad_norm": 20.824160350380623, |
|
"learning_rate": 1.541113262265748e-08, |
|
"logits/chosen": -1.9940106868743896, |
|
"logits/rejected": -1.9816381931304932, |
|
"logps/chosen": -0.9921188354492188, |
|
"logps/rejected": -1.1125357151031494, |
|
"loss": 1.6419, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.9842376708984375, |
|
"rewards/margins": 0.2408340871334076, |
|
"rewards/rejected": -2.225071430206299, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.3328530259365994, |
|
"grad_norm": 20.85405330465015, |
|
"learning_rate": 1.5121534507729073e-08, |
|
"logits/chosen": -2.0356698036193848, |
|
"logits/rejected": -2.029043197631836, |
|
"logps/chosen": -0.9926624298095703, |
|
"logps/rejected": -1.119905710220337, |
|
"loss": 1.6287, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.9853248596191406, |
|
"rewards/margins": 0.25448688864707947, |
|
"rewards/rejected": -2.239811420440674, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.340057636887608, |
|
"grad_norm": 20.598069658664354, |
|
"learning_rate": 1.4833498815860756e-08, |
|
"logits/chosen": -2.0153400897979736, |
|
"logits/rejected": -2.012422800064087, |
|
"logps/chosen": -1.0502384901046753, |
|
"logps/rejected": -1.1984398365020752, |
|
"loss": 1.5909, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.1004769802093506, |
|
"rewards/margins": 0.2964025139808655, |
|
"rewards/rejected": -2.3968796730041504, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.3472622478386167, |
|
"grad_norm": 24.169363767878462, |
|
"learning_rate": 1.4547071104088443e-08, |
|
"logits/chosen": -1.9878826141357422, |
|
"logits/rejected": -1.976165771484375, |
|
"logps/chosen": -0.9248664975166321, |
|
"logps/rejected": -1.0936037302017212, |
|
"loss": 1.5519, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8497329950332642, |
|
"rewards/margins": 0.33747443556785583, |
|
"rewards/rejected": -2.1872074604034424, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.3544668587896254, |
|
"grad_norm": 22.28586277053279, |
|
"learning_rate": 1.4262296675122592e-08, |
|
"logits/chosen": -1.9980617761611938, |
|
"logits/rejected": -1.993934988975525, |
|
"logps/chosen": -1.0281975269317627, |
|
"logps/rejected": -1.1155600547790527, |
|
"loss": 1.6881, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0563950538635254, |
|
"rewards/margins": 0.1747252643108368, |
|
"rewards/rejected": -2.2311201095581055, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.361671469740634, |
|
"grad_norm": 25.91029433001521, |
|
"learning_rate": 1.3979220570182902e-08, |
|
"logits/chosen": -1.9705870151519775, |
|
"logits/rejected": -1.9714816808700562, |
|
"logps/chosen": -1.0206265449523926, |
|
"logps/rejected": -1.153298258781433, |
|
"loss": 1.612, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.041253089904785, |
|
"rewards/margins": 0.26534369587898254, |
|
"rewards/rejected": -2.306596517562866, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.3688760806916427, |
|
"grad_norm": 18.134528292331787, |
|
"learning_rate": 1.369788756187445e-08, |
|
"logits/chosen": -1.9956543445587158, |
|
"logits/rejected": -2.0001983642578125, |
|
"logps/chosen": -1.0346391201019287, |
|
"logps/rejected": -1.1629480123519897, |
|
"loss": 1.6253, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0692782402038574, |
|
"rewards/margins": 0.2566176652908325, |
|
"rewards/rejected": -2.3258960247039795, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.3760806916426513, |
|
"grad_norm": 24.41945869354139, |
|
"learning_rate": 1.3418342147106212e-08, |
|
"logits/chosen": -2.027116537094116, |
|
"logits/rejected": -2.0312001705169678, |
|
"logps/chosen": -1.052958607673645, |
|
"logps/rejected": -1.2006646394729614, |
|
"loss": 1.5902, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.10591721534729, |
|
"rewards/margins": 0.29541200399398804, |
|
"rewards/rejected": -2.401329278945923, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.38328530259366, |
|
"grad_norm": 20.07983364842535, |
|
"learning_rate": 1.3140628540053218e-08, |
|
"logits/chosen": -2.0397636890411377, |
|
"logits/rejected": -2.039858341217041, |
|
"logps/chosen": -0.9341287612915039, |
|
"logps/rejected": -1.1015546321868896, |
|
"loss": 1.5639, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.8682575225830078, |
|
"rewards/margins": 0.334852010011673, |
|
"rewards/rejected": -2.2031092643737793, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.3904899135446687, |
|
"grad_norm": 21.51361282786986, |
|
"learning_rate": 1.286479066516345e-08, |
|
"logits/chosen": -1.9665225744247437, |
|
"logits/rejected": -1.9671493768692017, |
|
"logps/chosen": -1.0339243412017822, |
|
"logps/rejected": -1.096381425857544, |
|
"loss": 1.7206, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.0678486824035645, |
|
"rewards/margins": 0.12491452693939209, |
|
"rewards/rejected": -2.192762851715088, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.397694524495677, |
|
"grad_norm": 18.366780492328118, |
|
"learning_rate": 1.2590872150210574e-08, |
|
"logits/chosen": -2.0531787872314453, |
|
"logits/rejected": -2.0568125247955322, |
|
"logps/chosen": -0.9896559715270996, |
|
"logps/rejected": -1.0928928852081299, |
|
"loss": 1.6574, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9793119430541992, |
|
"rewards/margins": 0.20647385716438293, |
|
"rewards/rejected": -2.1857857704162598, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.4048991354466858, |
|
"grad_norm": 20.45388742088791, |
|
"learning_rate": 1.2318916319393555e-08, |
|
"logits/chosen": -2.0167720317840576, |
|
"logits/rejected": -2.011418581008911, |
|
"logps/chosen": -0.9759003520011902, |
|
"logps/rejected": -1.079329490661621, |
|
"loss": 1.6557, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9518007040023804, |
|
"rewards/margins": 0.20685818791389465, |
|
"rewards/rejected": -2.158658981323242, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.4121037463976944, |
|
"grad_norm": 22.381857191050415, |
|
"learning_rate": 1.2048966186484282e-08, |
|
"logits/chosen": -1.9989935159683228, |
|
"logits/rejected": -1.995570421218872, |
|
"logps/chosen": -1.0002979040145874, |
|
"logps/rejected": -1.1596596240997314, |
|
"loss": 1.5749, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.000595808029175, |
|
"rewards/margins": 0.3187234103679657, |
|
"rewards/rejected": -2.319319248199463, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.419308357348703, |
|
"grad_norm": 26.68547824879629, |
|
"learning_rate": 1.1781064448024333e-08, |
|
"logits/chosen": -2.0441131591796875, |
|
"logits/rejected": -2.0380544662475586, |
|
"logps/chosen": -1.0262136459350586, |
|
"logps/rejected": -1.1702954769134521, |
|
"loss": 1.6003, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.052427291870117, |
|
"rewards/margins": 0.288163959980011, |
|
"rewards/rejected": -2.3405909538269043, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.4265129682997117, |
|
"grad_norm": 22.583692034377382, |
|
"learning_rate": 1.1515253476571923e-08, |
|
"logits/chosen": -2.01566481590271, |
|
"logits/rejected": -2.0159027576446533, |
|
"logps/chosen": -1.0137196779251099, |
|
"logps/rejected": -1.1474217176437378, |
|
"loss": 1.6281, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0274393558502197, |
|
"rewards/margins": 0.2674042582511902, |
|
"rewards/rejected": -2.2948434352874756, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.4337175792507204, |
|
"grad_norm": 20.384401270587638, |
|
"learning_rate": 1.1251575314000034e-08, |
|
"logits/chosen": -1.9947608709335327, |
|
"logits/rejected": -1.9923511743545532, |
|
"logps/chosen": -0.996396541595459, |
|
"logps/rejected": -1.1149427890777588, |
|
"loss": 1.6386, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.992793083190918, |
|
"rewards/margins": 0.23709246516227722, |
|
"rewards/rejected": -2.2298855781555176, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.440922190201729, |
|
"grad_norm": 18.925724233722455, |
|
"learning_rate": 1.0990071664846861e-08, |
|
"logits/chosen": -2.055290460586548, |
|
"logits/rejected": -2.047081232070923, |
|
"logps/chosen": -0.9910066723823547, |
|
"logps/rejected": -1.1598938703536987, |
|
"loss": 1.5608, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9820133447647095, |
|
"rewards/margins": 0.33777478337287903, |
|
"rewards/rejected": -2.3197877407073975, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4481268011527377, |
|
"grad_norm": 19.05620802651582, |
|
"learning_rate": 1.0730783889719711e-08, |
|
"logits/chosen": -1.999265432357788, |
|
"logits/rejected": -1.992251992225647, |
|
"logps/chosen": -1.0045114755630493, |
|
"logps/rejected": -1.145806908607483, |
|
"loss": 1.6002, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0090229511260986, |
|
"rewards/margins": 0.2825910151004791, |
|
"rewards/rejected": -2.291613817214966, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.4553314121037464, |
|
"grad_norm": 21.429538223490898, |
|
"learning_rate": 1.0473752998753114e-08, |
|
"logits/chosen": -2.030794143676758, |
|
"logits/rejected": -2.0306010246276855, |
|
"logps/chosen": -1.0277831554412842, |
|
"logps/rejected": -1.1082103252410889, |
|
"loss": 1.6945, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.0555663108825684, |
|
"rewards/margins": 0.16085436940193176, |
|
"rewards/rejected": -2.2164206504821777, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.462536023054755, |
|
"grad_norm": 22.050870113308804, |
|
"learning_rate": 1.0219019645122575e-08, |
|
"logits/chosen": -2.045313835144043, |
|
"logits/rejected": -2.04082989692688, |
|
"logps/chosen": -0.9714498519897461, |
|
"logps/rejected": -1.1051620244979858, |
|
"loss": 1.6095, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.9428997039794922, |
|
"rewards/margins": 0.26742416620254517, |
|
"rewards/rejected": -2.2103240489959717, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.4697406340057637, |
|
"grad_norm": 26.157508425601968, |
|
"learning_rate": 9.966624118614611e-09, |
|
"logits/chosen": -2.032585620880127, |
|
"logits/rejected": -2.0330874919891357, |
|
"logps/chosen": -0.9972192645072937, |
|
"logps/rejected": -1.1311516761779785, |
|
"loss": 1.6103, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9944385290145874, |
|
"rewards/margins": 0.26786476373672485, |
|
"rewards/rejected": -2.262303352355957, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.4769452449567724, |
|
"grad_norm": 23.13602722680495, |
|
"learning_rate": 9.71660633925438e-09, |
|
"logits/chosen": -2.020906925201416, |
|
"logits/rejected": -2.011904001235962, |
|
"logps/chosen": -1.1174659729003906, |
|
"logps/rejected": -1.2822870016098022, |
|
"loss": 1.5902, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.2349319458007812, |
|
"rewards/margins": 0.32964205741882324, |
|
"rewards/rejected": -2.5645740032196045, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.484149855907781, |
|
"grad_norm": 24.5077374796, |
|
"learning_rate": 9.469005850991705e-09, |
|
"logits/chosen": -2.0096890926361084, |
|
"logits/rejected": -2.0027241706848145, |
|
"logps/chosen": -1.0285447835922241, |
|
"logps/rejected": -1.0884907245635986, |
|
"loss": 1.726, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.0570895671844482, |
|
"rewards/margins": 0.11989171802997589, |
|
"rewards/rejected": -2.1769814491271973, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.4913544668587897, |
|
"grad_norm": 28.391707951147488, |
|
"learning_rate": 9.223861815446682e-09, |
|
"logits/chosen": -2.0371224880218506, |
|
"logits/rejected": -2.022277593612671, |
|
"logps/chosen": -1.1392552852630615, |
|
"logps/rejected": -1.2324645519256592, |
|
"loss": 1.6607, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.278510570526123, |
|
"rewards/margins": 0.18641893565654755, |
|
"rewards/rejected": -2.4649291038513184, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.4985590778097984, |
|
"grad_norm": 21.381110932577513, |
|
"learning_rate": 8.981213005715627e-09, |
|
"logits/chosen": -1.949776291847229, |
|
"logits/rejected": -1.952210783958435, |
|
"logps/chosen": -1.0574856996536255, |
|
"logps/rejected": -1.1887096166610718, |
|
"loss": 1.6153, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.114971399307251, |
|
"rewards/margins": 0.26244813203811646, |
|
"rewards/rejected": -2.3774192333221436, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.505763688760807, |
|
"grad_norm": 22.794596171681757, |
|
"learning_rate": 8.741097800238617e-09, |
|
"logits/chosen": -2.036937713623047, |
|
"logits/rejected": -2.031627893447876, |
|
"logps/chosen": -1.0450177192687988, |
|
"logps/rejected": -1.1693742275238037, |
|
"loss": 1.6249, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0900354385375977, |
|
"rewards/margins": 0.24871286749839783, |
|
"rewards/rejected": -2.3387484550476074, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.5129682997118157, |
|
"grad_norm": 21.632409202600087, |
|
"learning_rate": 8.503554176729341e-09, |
|
"logits/chosen": -1.989061713218689, |
|
"logits/rejected": -1.9979686737060547, |
|
"logps/chosen": -0.9342214465141296, |
|
"logps/rejected": -1.0600559711456299, |
|
"loss": 1.6299, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.8684428930282593, |
|
"rewards/margins": 0.2516690790653229, |
|
"rewards/rejected": -2.1201119422912598, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.5201729106628243, |
|
"grad_norm": 24.73490951688704, |
|
"learning_rate": 8.268619706168376e-09, |
|
"logits/chosen": -1.9841842651367188, |
|
"logits/rejected": -1.9747326374053955, |
|
"logps/chosen": -1.015921950340271, |
|
"logps/rejected": -1.1286394596099854, |
|
"loss": 1.6542, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.031843900680542, |
|
"rewards/margins": 0.2254345417022705, |
|
"rewards/rejected": -2.2572789192199707, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.527377521613833, |
|
"grad_norm": 21.05879085590256, |
|
"learning_rate": 8.036331546860777e-09, |
|
"logits/chosen": -1.908087134361267, |
|
"logits/rejected": -1.9043972492218018, |
|
"logps/chosen": -1.0467476844787598, |
|
"logps/rejected": -1.1627973318099976, |
|
"loss": 1.6392, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0934953689575195, |
|
"rewards/margins": 0.232099249958992, |
|
"rewards/rejected": -2.325594663619995, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.5345821325648417, |
|
"grad_norm": 22.29777503336236, |
|
"learning_rate": 7.806726438559003e-09, |
|
"logits/chosen": -1.9843000173568726, |
|
"logits/rejected": -1.9898990392684937, |
|
"logps/chosen": -1.0579800605773926, |
|
"logps/rejected": -1.1668568849563599, |
|
"loss": 1.6476, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.115960121154785, |
|
"rewards/margins": 0.21775400638580322, |
|
"rewards/rejected": -2.3337137699127197, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.54178674351585, |
|
"grad_norm": 23.19617849497256, |
|
"learning_rate": 7.579840696651938e-09, |
|
"logits/chosen": -1.9837186336517334, |
|
"logits/rejected": -1.975818395614624, |
|
"logps/chosen": -1.062901258468628, |
|
"logps/rejected": -1.1917634010314941, |
|
"loss": 1.624, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.125802516937256, |
|
"rewards/margins": 0.25772443413734436, |
|
"rewards/rejected": -2.3835268020629883, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.5489913544668588, |
|
"grad_norm": 28.073689367264755, |
|
"learning_rate": 7.355710206421098e-09, |
|
"logits/chosen": -1.939091444015503, |
|
"logits/rejected": -1.9357961416244507, |
|
"logps/chosen": -1.0485239028930664, |
|
"logps/rejected": -1.1550390720367432, |
|
"loss": 1.6545, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.097047805786133, |
|
"rewards/margins": 0.2130306214094162, |
|
"rewards/rejected": -2.3100781440734863, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.5561959654178674, |
|
"grad_norm": 22.523519226977555, |
|
"learning_rate": 7.134370417364849e-09, |
|
"logits/chosen": -2.002406120300293, |
|
"logits/rejected": -2.003296136856079, |
|
"logps/chosen": -1.1109195947647095, |
|
"logps/rejected": -1.1835315227508545, |
|
"loss": 1.7081, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.221839189529419, |
|
"rewards/margins": 0.14522375166416168, |
|
"rewards/rejected": -2.367063045501709, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.563400576368876, |
|
"grad_norm": 21.935686976360966, |
|
"learning_rate": 6.915856337591572e-09, |
|
"logits/chosen": -1.994996428489685, |
|
"logits/rejected": -1.9938243627548218, |
|
"logps/chosen": -0.9442762136459351, |
|
"logps/rejected": -1.0544160604476929, |
|
"loss": 1.6479, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.8885524272918701, |
|
"rewards/margins": 0.22027930617332458, |
|
"rewards/rejected": -2.1088321208953857, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.5706051873198847, |
|
"grad_norm": 25.455047703257026, |
|
"learning_rate": 6.700202528282603e-09, |
|
"logits/chosen": -1.9900974035263062, |
|
"logits/rejected": -1.9881980419158936, |
|
"logps/chosen": -1.0776493549346924, |
|
"logps/rejected": -1.2152760028839111, |
|
"loss": 1.6069, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.1552987098693848, |
|
"rewards/margins": 0.27525344491004944, |
|
"rewards/rejected": -2.4305520057678223, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.5778097982708934, |
|
"grad_norm": 24.84944470319713, |
|
"learning_rate": 6.487443098225892e-09, |
|
"logits/chosen": -2.0501418113708496, |
|
"logits/rejected": -2.0468356609344482, |
|
"logps/chosen": -1.031200885772705, |
|
"logps/rejected": -1.1816002130508423, |
|
"loss": 1.5922, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.06240177154541, |
|
"rewards/margins": 0.30079856514930725, |
|
"rewards/rejected": -2.3632004261016846, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.585014409221902, |
|
"grad_norm": 20.909336414895183, |
|
"learning_rate": 6.277611698421179e-09, |
|
"logits/chosen": -2.01149845123291, |
|
"logits/rejected": -2.007169723510742, |
|
"logps/chosen": -1.0256855487823486, |
|
"logps/rejected": -1.1708935499191284, |
|
"loss": 1.5954, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0513710975646973, |
|
"rewards/margins": 0.29041624069213867, |
|
"rewards/rejected": -2.341787099838257, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.5922190201729105, |
|
"grad_norm": 20.206486553431677, |
|
"learning_rate": 6.070741516757608e-09, |
|
"logits/chosen": -2.0089163780212402, |
|
"logits/rejected": -2.0105767250061035, |
|
"logps/chosen": -1.0603289604187012, |
|
"logps/rejected": -1.1521384716033936, |
|
"loss": 1.6878, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.1206579208374023, |
|
"rewards/margins": 0.18361885845661163, |
|
"rewards/rejected": -2.304276943206787, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.5994236311239192, |
|
"grad_norm": 22.350708958975336, |
|
"learning_rate": 5.866865272764607e-09, |
|
"logits/chosen": -2.014432668685913, |
|
"logits/rejected": -2.0136168003082275, |
|
"logps/chosen": -1.0870046615600586, |
|
"logps/rejected": -1.2768774032592773, |
|
"loss": 1.5475, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.174009323120117, |
|
"rewards/margins": 0.3797454833984375, |
|
"rewards/rejected": -2.5537548065185547, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.6066282420749278, |
|
"grad_norm": 23.288337045237636, |
|
"learning_rate": 5.666015212436795e-09, |
|
"logits/chosen": -2.0216281414031982, |
|
"logits/rejected": -2.0152599811553955, |
|
"logps/chosen": -1.0820379257202148, |
|
"logps/rejected": -1.2009848356246948, |
|
"loss": 1.6402, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1640758514404297, |
|
"rewards/margins": 0.2378939837217331, |
|
"rewards/rejected": -2.4019696712493896, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.6138328530259365, |
|
"grad_norm": 27.01583069454552, |
|
"learning_rate": 5.46822310313379e-09, |
|
"logits/chosen": -1.9930493831634521, |
|
"logits/rejected": -1.9963033199310303, |
|
"logps/chosen": -0.9150048494338989, |
|
"logps/rejected": -1.0773169994354248, |
|
"loss": 1.5742, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.8300096988677979, |
|
"rewards/margins": 0.32462412118911743, |
|
"rewards/rejected": -2.1546339988708496, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.6210374639769451, |
|
"grad_norm": 21.967134306318375, |
|
"learning_rate": 5.273520228555767e-09, |
|
"logits/chosen": -2.0560572147369385, |
|
"logits/rejected": -2.0469000339508057, |
|
"logps/chosen": -1.0821588039398193, |
|
"logps/rejected": -1.2086551189422607, |
|
"loss": 1.6406, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.1643176078796387, |
|
"rewards/margins": 0.25299257040023804, |
|
"rewards/rejected": -2.4173102378845215, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.6282420749279538, |
|
"grad_norm": 24.925763712447488, |
|
"learning_rate": 5.081937383795484e-09, |
|
"logits/chosen": -1.9834489822387695, |
|
"logits/rejected": -1.9737904071807861, |
|
"logps/chosen": -0.9765061140060425, |
|
"logps/rejected": -1.092413306236267, |
|
"loss": 1.6417, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.953012228012085, |
|
"rewards/margins": 0.2318144291639328, |
|
"rewards/rejected": -2.184826612472534, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.6354466858789625, |
|
"grad_norm": 24.22018193079342, |
|
"learning_rate": 4.893504870467588e-09, |
|
"logits/chosen": -2.0181922912597656, |
|
"logits/rejected": -2.016724109649658, |
|
"logps/chosen": -1.0404140949249268, |
|
"logps/rejected": -1.1570017337799072, |
|
"loss": 1.6379, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0808281898498535, |
|
"rewards/margins": 0.2331748753786087, |
|
"rewards/rejected": -2.3140034675598145, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.6426512968299711, |
|
"grad_norm": 26.66835743730832, |
|
"learning_rate": 4.708252491915951e-09, |
|
"logits/chosen": -1.979501724243164, |
|
"logits/rejected": -1.9686695337295532, |
|
"logps/chosen": -1.1171302795410156, |
|
"logps/rejected": -1.232032060623169, |
|
"loss": 1.6298, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.2342605590820312, |
|
"rewards/margins": 0.22980327904224396, |
|
"rewards/rejected": -2.464064121246338, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.6498559077809798, |
|
"grad_norm": 22.00835339861606, |
|
"learning_rate": 4.526209548499877e-09, |
|
"logits/chosen": -1.9684820175170898, |
|
"logits/rejected": -1.9653692245483398, |
|
"logps/chosen": -1.0476069450378418, |
|
"logps/rejected": -1.1009780168533325, |
|
"loss": 1.7427, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0952138900756836, |
|
"rewards/margins": 0.10674209892749786, |
|
"rewards/rejected": -2.201956033706665, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.6570605187319885, |
|
"grad_norm": 24.136328386644493, |
|
"learning_rate": 4.347404832959775e-09, |
|
"logits/chosen": -2.0073463916778564, |
|
"logits/rejected": -1.9953422546386719, |
|
"logps/chosen": -1.00043785572052, |
|
"logps/rejected": -1.1069265604019165, |
|
"loss": 1.6592, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.00087571144104, |
|
"rewards/margins": 0.21297720074653625, |
|
"rewards/rejected": -2.213853120803833, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.6642651296829971, |
|
"grad_norm": 22.057010770613108, |
|
"learning_rate": 4.171866625863229e-09, |
|
"logits/chosen": -1.9719337224960327, |
|
"logits/rejected": -1.968071699142456, |
|
"logps/chosen": -1.0742835998535156, |
|
"logps/rejected": -1.1518973112106323, |
|
"loss": 1.7035, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.1485671997070312, |
|
"rewards/margins": 0.15522751212120056, |
|
"rewards/rejected": -2.3037946224212646, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.6714697406340058, |
|
"grad_norm": 24.46288263177118, |
|
"learning_rate": 3.9996226911319546e-09, |
|
"logits/chosen": -2.044360637664795, |
|
"logits/rejected": -2.044651746749878, |
|
"logps/chosen": -1.0162107944488525, |
|
"logps/rejected": -1.1387598514556885, |
|
"loss": 1.635, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.032421588897705, |
|
"rewards/margins": 0.24509792029857635, |
|
"rewards/rejected": -2.277519702911377, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.6786743515850144, |
|
"grad_norm": 22.36911873035069, |
|
"learning_rate": 3.830700271650567e-09, |
|
"logits/chosen": -2.045510768890381, |
|
"logits/rejected": -2.0477523803710938, |
|
"logps/chosen": -0.9628473520278931, |
|
"logps/rejected": -1.124125599861145, |
|
"loss": 1.5848, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9256947040557861, |
|
"rewards/margins": 0.3225559890270233, |
|
"rewards/rejected": -2.24825119972229, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.685878962536023, |
|
"grad_norm": 22.36024023960374, |
|
"learning_rate": 3.665126084957723e-09, |
|
"logits/chosen": -1.9869651794433594, |
|
"logits/rejected": -1.9849302768707275, |
|
"logps/chosen": -0.9548214077949524, |
|
"logps/rejected": -1.1073800325393677, |
|
"loss": 1.5961, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9096428155899048, |
|
"rewards/margins": 0.30511730909347534, |
|
"rewards/rejected": -2.2147600650787354, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.6930835734870318, |
|
"grad_norm": 26.20538266440962, |
|
"learning_rate": 3.502926319020327e-09, |
|
"logits/chosen": -1.953912377357483, |
|
"logits/rejected": -1.943302869796753, |
|
"logps/chosen": -1.060532569885254, |
|
"logps/rejected": -1.1716785430908203, |
|
"loss": 1.6529, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.121065139770508, |
|
"rewards/margins": 0.22229187190532684, |
|
"rewards/rejected": -2.3433570861816406, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.7002881844380404, |
|
"grad_norm": 21.248350350748197, |
|
"learning_rate": 3.3441266280915427e-09, |
|
"logits/chosen": -1.9893739223480225, |
|
"logits/rejected": -1.9806448221206665, |
|
"logps/chosen": -0.9349273443222046, |
|
"logps/rejected": -1.122243046760559, |
|
"loss": 1.5398, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8698546886444092, |
|
"rewards/margins": 0.3746311664581299, |
|
"rewards/rejected": -2.244486093521118, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.707492795389049, |
|
"grad_norm": 21.37718363400938, |
|
"learning_rate": 3.1887521286532023e-09, |
|
"logits/chosen": -2.0375638008117676, |
|
"logits/rejected": -2.030097484588623, |
|
"logps/chosen": -1.0926265716552734, |
|
"logps/rejected": -1.156776785850525, |
|
"loss": 1.7217, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.185253143310547, |
|
"rewards/margins": 0.12830035388469696, |
|
"rewards/rejected": -2.31355357170105, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.7146974063400577, |
|
"grad_norm": 22.764740803633266, |
|
"learning_rate": 3.0368273954432698e-09, |
|
"logits/chosen": -2.0525734424591064, |
|
"logits/rejected": -2.0523922443389893, |
|
"logps/chosen": -1.1625360250473022, |
|
"logps/rejected": -1.2549631595611572, |
|
"loss": 1.6774, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.3250720500946045, |
|
"rewards/margins": 0.18485429883003235, |
|
"rewards/rejected": -2.5099263191223145, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.7219020172910664, |
|
"grad_norm": 23.35653645378998, |
|
"learning_rate": 2.888376457568964e-09, |
|
"logits/chosen": -2.067924737930298, |
|
"logits/rejected": -2.062840461730957, |
|
"logps/chosen": -1.0438759326934814, |
|
"logps/rejected": -1.1699734926223755, |
|
"loss": 1.6262, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.087751865386963, |
|
"rewards/margins": 0.25219520926475525, |
|
"rewards/rejected": -2.339946985244751, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.729106628242075, |
|
"grad_norm": 18.117475149068106, |
|
"learning_rate": 2.7434227947062324e-09, |
|
"logits/chosen": -2.028148651123047, |
|
"logits/rejected": -2.022372245788574, |
|
"logps/chosen": -1.0291882753372192, |
|
"logps/rejected": -1.1964164972305298, |
|
"loss": 1.5828, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0583765506744385, |
|
"rewards/margins": 0.3344564139842987, |
|
"rewards/rejected": -2.3928329944610596, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.7363112391930837, |
|
"grad_norm": 25.38586926098497, |
|
"learning_rate": 2.6019893333860954e-09, |
|
"logits/chosen": -2.0227832794189453, |
|
"logits/rejected": -2.0191638469696045, |
|
"logps/chosen": -1.0688263177871704, |
|
"logps/rejected": -1.1506297588348389, |
|
"loss": 1.6948, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.137652635574341, |
|
"rewards/margins": 0.16360695660114288, |
|
"rewards/rejected": -2.3012595176696777, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.7435158501440924, |
|
"grad_norm": 23.482865925242617, |
|
"learning_rate": 2.4640984433684758e-09, |
|
"logits/chosen": -2.0008485317230225, |
|
"logits/rejected": -1.9945675134658813, |
|
"logps/chosen": -0.9947766065597534, |
|
"logps/rejected": -1.1175730228424072, |
|
"loss": 1.6519, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.9895532131195068, |
|
"rewards/margins": 0.24559286236763, |
|
"rewards/rejected": -2.2351460456848145, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.7507204610951008, |
|
"grad_norm": 21.083704816516438, |
|
"learning_rate": 2.3297719341040856e-09, |
|
"logits/chosen": -1.9876625537872314, |
|
"logits/rejected": -1.9809608459472656, |
|
"logps/chosen": -1.0115702152252197, |
|
"logps/rejected": -1.1943610906600952, |
|
"loss": 1.5407, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.0231404304504395, |
|
"rewards/margins": 0.3655821681022644, |
|
"rewards/rejected": -2.3887221813201904, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.7579250720461095, |
|
"grad_norm": 22.214397294857353, |
|
"learning_rate": 2.199031051284972e-09, |
|
"logits/chosen": -2.029468297958374, |
|
"logits/rejected": -2.0226287841796875, |
|
"logps/chosen": -1.0236752033233643, |
|
"logps/rejected": -1.0963797569274902, |
|
"loss": 1.7051, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0473504066467285, |
|
"rewards/margins": 0.14540909230709076, |
|
"rewards/rejected": -2.1927595138549805, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.7651296829971181, |
|
"grad_norm": 22.659240095278513, |
|
"learning_rate": 2.0718964734841667e-09, |
|
"logits/chosen": -2.0216195583343506, |
|
"logits/rejected": -2.0182180404663086, |
|
"logps/chosen": -1.1063746213912964, |
|
"logps/rejected": -1.191334843635559, |
|
"loss": 1.6945, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.2127492427825928, |
|
"rewards/margins": 0.16992013156414032, |
|
"rewards/rejected": -2.382669687271118, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.7723342939481268, |
|
"grad_norm": 17.951670482135615, |
|
"learning_rate": 1.948388308885102e-09, |
|
"logits/chosen": -2.0658774375915527, |
|
"logits/rejected": -2.0580222606658936, |
|
"logps/chosen": -0.9547419548034668, |
|
"logps/rejected": -1.0859801769256592, |
|
"loss": 1.6238, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9094839096069336, |
|
"rewards/margins": 0.2624765932559967, |
|
"rewards/rejected": -2.1719603538513184, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.7795389048991355, |
|
"grad_norm": 26.064153513909826, |
|
"learning_rate": 1.8285260921011846e-09, |
|
"logits/chosen": -2.047785520553589, |
|
"logits/rejected": -2.038276433944702, |
|
"logps/chosen": -1.1647402048110962, |
|
"logps/rejected": -1.2458956241607666, |
|
"loss": 1.7181, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.3294804096221924, |
|
"rewards/margins": 0.16231082379817963, |
|
"rewards/rejected": -2.491791248321533, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.7867435158501441, |
|
"grad_norm": 19.498652452320453, |
|
"learning_rate": 1.712328781086131e-09, |
|
"logits/chosen": -1.9989614486694336, |
|
"logits/rejected": -1.9959602355957031, |
|
"logps/chosen": -1.0413289070129395, |
|
"logps/rejected": -1.1756139993667603, |
|
"loss": 1.6205, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.082657814025879, |
|
"rewards/margins": 0.2685699760913849, |
|
"rewards/rejected": -2.3512279987335205, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.7939481268011528, |
|
"grad_norm": 19.579842521680934, |
|
"learning_rate": 1.59981475413547e-09, |
|
"logits/chosen": -1.982996940612793, |
|
"logits/rejected": -1.9784364700317383, |
|
"logps/chosen": -0.9387162923812866, |
|
"logps/rejected": -1.0598050355911255, |
|
"loss": 1.6288, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.8774325847625732, |
|
"rewards/margins": 0.24217729270458221, |
|
"rewards/rejected": -2.119610071182251, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.8011527377521612, |
|
"grad_norm": 21.279275035018046, |
|
"learning_rate": 1.491001806979772e-09, |
|
"logits/chosen": -1.986486792564392, |
|
"logits/rejected": -1.9824678897857666, |
|
"logps/chosen": -1.0033233165740967, |
|
"logps/rejected": -1.1795743703842163, |
|
"loss": 1.5452, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.0066466331481934, |
|
"rewards/margins": 0.35250231623649597, |
|
"rewards/rejected": -2.3591487407684326, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.8083573487031699, |
|
"grad_norm": 22.81377086234191, |
|
"learning_rate": 1.3859071499699698e-09, |
|
"logits/chosen": -1.9927501678466797, |
|
"logits/rejected": -1.9867427349090576, |
|
"logps/chosen": -1.0045326948165894, |
|
"logps/rejected": -1.1114981174468994, |
|
"loss": 1.6603, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0090653896331787, |
|
"rewards/margins": 0.2139308750629425, |
|
"rewards/rejected": -2.222996234893799, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.8155619596541785, |
|
"grad_norm": 22.243359575974143, |
|
"learning_rate": 1.2845474053553156e-09, |
|
"logits/chosen": -1.967153549194336, |
|
"logits/rejected": -1.9660823345184326, |
|
"logps/chosen": -1.0100979804992676, |
|
"logps/rejected": -1.1443712711334229, |
|
"loss": 1.6266, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.020195960998535, |
|
"rewards/margins": 0.26854681968688965, |
|
"rewards/rejected": -2.2887425422668457, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.8227665706051872, |
|
"grad_norm": 20.940981692691256, |
|
"learning_rate": 1.1869386046543222e-09, |
|
"logits/chosen": -1.9824377298355103, |
|
"logits/rejected": -1.9759935140609741, |
|
"logps/chosen": -1.0096970796585083, |
|
"logps/rejected": -1.1292235851287842, |
|
"loss": 1.6418, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0193941593170166, |
|
"rewards/margins": 0.23905305564403534, |
|
"rewards/rejected": -2.2584471702575684, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.8299711815561959, |
|
"grad_norm": 23.31599170849686, |
|
"learning_rate": 1.0930961861191302e-09, |
|
"logits/chosen": -1.979270339012146, |
|
"logits/rejected": -1.9758975505828857, |
|
"logps/chosen": -0.9248117208480835, |
|
"logps/rejected": -1.046858549118042, |
|
"loss": 1.6419, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.849623441696167, |
|
"rewards/margins": 0.2440938651561737, |
|
"rewards/rejected": -2.093717098236084, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.8371757925072045, |
|
"grad_norm": 25.087005895011544, |
|
"learning_rate": 1.003034992293733e-09, |
|
"logits/chosen": -2.0150609016418457, |
|
"logits/rejected": -2.0050504207611084, |
|
"logps/chosen": -0.9422048330307007, |
|
"logps/rejected": -1.1044690608978271, |
|
"loss": 1.5718, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8844096660614014, |
|
"rewards/margins": 0.3245285153388977, |
|
"rewards/rejected": -2.2089381217956543, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.8443804034582132, |
|
"grad_norm": 21.9012224134329, |
|
"learning_rate": 9.16769267666434e-10, |
|
"logits/chosen": -1.9812209606170654, |
|
"logits/rejected": -1.9651670455932617, |
|
"logps/chosen": -0.984574019908905, |
|
"logps/rejected": -1.0492041110992432, |
|
"loss": 1.7224, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.96914803981781, |
|
"rewards/margins": 0.12925995886325836, |
|
"rewards/rejected": -2.0984082221984863, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.8515850144092219, |
|
"grad_norm": 29.798850096367598, |
|
"learning_rate": 8.343126564168412e-10, |
|
"logits/chosen": -2.023308277130127, |
|
"logits/rejected": -2.0172171592712402, |
|
"logps/chosen": -1.0593010187149048, |
|
"logps/rejected": -1.1926862001419067, |
|
"loss": 1.6084, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.1186020374298096, |
|
"rewards/margins": 0.2667704224586487, |
|
"rewards/rejected": -2.3853724002838135, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.8587896253602305, |
|
"grad_norm": 23.081298452800397, |
|
"learning_rate": 7.55678200257856e-10, |
|
"logits/chosen": -1.986285924911499, |
|
"logits/rejected": -1.9805923700332642, |
|
"logps/chosen": -1.0580508708953857, |
|
"logps/rejected": -1.134381651878357, |
|
"loss": 1.7026, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.1161017417907715, |
|
"rewards/margins": 0.1526612788438797, |
|
"rewards/rejected": -2.268763303756714, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.8659942363112392, |
|
"grad_norm": 19.289572768646487, |
|
"learning_rate": 6.808783363729364e-10, |
|
"logits/chosen": -1.9875192642211914, |
|
"logits/rejected": -1.9795039892196655, |
|
"logps/chosen": -0.9947047233581543, |
|
"logps/rejected": -1.128720998764038, |
|
"loss": 1.6133, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9894094467163086, |
|
"rewards/margins": 0.2680323123931885, |
|
"rewards/rejected": -2.257441997528076, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.8731988472622478, |
|
"grad_norm": 19.644352031599038, |
|
"learning_rate": 6.099248954489794e-10, |
|
"logits/chosen": -2.0019712448120117, |
|
"logits/rejected": -2.0046229362487793, |
|
"logps/chosen": -0.9696542024612427, |
|
"logps/rejected": -1.0707123279571533, |
|
"loss": 1.6688, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -1.9393084049224854, |
|
"rewards/margins": 0.20211617648601532, |
|
"rewards/rejected": -2.1414246559143066, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.8804034582132565, |
|
"grad_norm": 20.13597620650645, |
|
"learning_rate": 5.428290998051116e-10, |
|
"logits/chosen": -1.991913080215454, |
|
"logits/rejected": -1.9869178533554077, |
|
"logps/chosen": -0.9798202514648438, |
|
"logps/rejected": -1.0955702066421509, |
|
"loss": 1.63, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9596405029296875, |
|
"rewards/margins": 0.2314998209476471, |
|
"rewards/rejected": -2.1911404132843018, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.8876080691642652, |
|
"grad_norm": 24.79934802241825, |
|
"learning_rate": 4.796015616177401e-10, |
|
"logits/chosen": -1.9711564779281616, |
|
"logits/rejected": -1.9690234661102295, |
|
"logps/chosen": -1.028472900390625, |
|
"logps/rejected": -1.1603518724441528, |
|
"loss": 1.6215, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.05694580078125, |
|
"rewards/margins": 0.2637581527233124, |
|
"rewards/rejected": -2.3207037448883057, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.8948126801152738, |
|
"grad_norm": 22.569211582194647, |
|
"learning_rate": 4.2025228124205335e-10, |
|
"logits/chosen": -2.0415008068084717, |
|
"logits/rejected": -2.0464255809783936, |
|
"logps/chosen": -1.1165236234664917, |
|
"logps/rejected": -1.1973609924316406, |
|
"loss": 1.6887, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.2330472469329834, |
|
"rewards/margins": 0.1616745889186859, |
|
"rewards/rejected": -2.3947219848632812, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.9020172910662825, |
|
"grad_norm": 29.251729021197608, |
|
"learning_rate": 3.64790645630339e-10, |
|
"logits/chosen": -2.0050177574157715, |
|
"logits/rejected": -1.9978790283203125, |
|
"logps/chosen": -1.0866100788116455, |
|
"logps/rejected": -1.1683754920959473, |
|
"loss": 1.7045, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.173220157623291, |
|
"rewards/margins": 0.16353097558021545, |
|
"rewards/rejected": -2.3367509841918945, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.9092219020172911, |
|
"grad_norm": 26.686501597422076, |
|
"learning_rate": 3.1322542684729945e-10, |
|
"logits/chosen": -1.9958645105361938, |
|
"logits/rejected": -1.9863427877426147, |
|
"logps/chosen": -1.1009094715118408, |
|
"logps/rejected": -1.2481192350387573, |
|
"loss": 1.5988, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.2018189430236816, |
|
"rewards/margins": 0.29441922903060913, |
|
"rewards/rejected": -2.4962384700775146, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.9164265129682998, |
|
"grad_norm": 24.025977247368846, |
|
"learning_rate": 2.6556478068261447e-10, |
|
"logits/chosen": -2.011669158935547, |
|
"logits/rejected": -2.0208234786987305, |
|
"logps/chosen": -1.0808687210083008, |
|
"logps/rejected": -1.210559606552124, |
|
"loss": 1.6254, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1617374420166016, |
|
"rewards/margins": 0.2593816816806793, |
|
"rewards/rejected": -2.421119213104248, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.9236311239193085, |
|
"grad_norm": 23.159728407542556, |
|
"learning_rate": 2.2181624536098952e-10, |
|
"logits/chosen": -2.06213641166687, |
|
"logits/rejected": -2.0583481788635254, |
|
"logps/chosen": -1.054503321647644, |
|
"logps/rejected": -1.183261513710022, |
|
"loss": 1.624, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.109006643295288, |
|
"rewards/margins": 0.2575165629386902, |
|
"rewards/rejected": -2.366523027420044, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.9308357348703171, |
|
"grad_norm": 24.285074754554824, |
|
"learning_rate": 1.819867403498737e-10, |
|
"logits/chosen": -2.0223278999328613, |
|
"logits/rejected": -2.0300443172454834, |
|
"logps/chosen": -1.0237973928451538, |
|
"logps/rejected": -1.1488239765167236, |
|
"loss": 1.616, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.0475947856903076, |
|
"rewards/margins": 0.250053346157074, |
|
"rewards/rejected": -2.2976479530334473, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.9380403458213258, |
|
"grad_norm": 22.444579015136416, |
|
"learning_rate": 1.4608256526505157e-10, |
|
"logits/chosen": -1.9494588375091553, |
|
"logits/rejected": -1.9468958377838135, |
|
"logps/chosen": -1.1418302059173584, |
|
"logps/rejected": -1.2206158638000488, |
|
"loss": 1.6977, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.283660411834717, |
|
"rewards/margins": 0.15757131576538086, |
|
"rewards/rejected": -2.4412317276000977, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.9452449567723344, |
|
"grad_norm": 26.381118967286472, |
|
"learning_rate": 1.1410939887425141e-10, |
|
"logits/chosen": -2.0545475482940674, |
|
"logits/rejected": -2.041222095489502, |
|
"logps/chosen": -0.9961267709732056, |
|
"logps/rejected": -1.0998473167419434, |
|
"loss": 1.6658, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.9922535419464111, |
|
"rewards/margins": 0.20744113624095917, |
|
"rewards/rejected": -2.1996946334838867, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.952449567723343, |
|
"grad_norm": 24.607639118161103, |
|
"learning_rate": 8.607229819898865e-11, |
|
"logits/chosen": -2.02439022064209, |
|
"logits/rejected": -2.0220091342926025, |
|
"logps/chosen": -1.0608162879943848, |
|
"logps/rejected": -1.1870168447494507, |
|
"loss": 1.6251, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.1216325759887695, |
|
"rewards/margins": 0.2524010241031647, |
|
"rewards/rejected": -2.3740336894989014, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.9596541786743515, |
|
"grad_norm": 25.59822036865297, |
|
"learning_rate": 6.19756977147029e-11, |
|
"logits/chosen": -2.024225950241089, |
|
"logits/rejected": -2.0201265811920166, |
|
"logps/chosen": -0.9928004145622253, |
|
"logps/rejected": -1.1591542959213257, |
|
"loss": 1.5624, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.9856008291244507, |
|
"rewards/margins": 0.3327081501483917, |
|
"rewards/rejected": -2.3183085918426514, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.9668587896253602, |
|
"grad_norm": 19.72589484365043, |
|
"learning_rate": 4.1823408649391265e-11, |
|
"logits/chosen": -1.9768123626708984, |
|
"logits/rejected": -1.9772489070892334, |
|
"logps/chosen": -1.0286433696746826, |
|
"logps/rejected": -1.1329104900360107, |
|
"loss": 1.6564, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0572867393493652, |
|
"rewards/margins": 0.2085343301296234, |
|
"rewards/rejected": -2.2658209800720215, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.9740634005763689, |
|
"grad_norm": 22.64173380411296, |
|
"learning_rate": 2.5618618380812694e-11, |
|
"logits/chosen": -2.074462413787842, |
|
"logits/rejected": -2.0713300704956055, |
|
"logps/chosen": -0.9406960606575012, |
|
"logps/rejected": -1.0647523403167725, |
|
"loss": 1.6234, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.8813921213150024, |
|
"rewards/margins": 0.24811263382434845, |
|
"rewards/rejected": -2.129504680633545, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.9812680115273775, |
|
"grad_norm": 18.289142985147425, |
|
"learning_rate": 1.3363889932338501e-11, |
|
"logits/chosen": -1.980743408203125, |
|
"logits/rejected": -1.9810924530029297, |
|
"logps/chosen": -1.0640665292739868, |
|
"logps/rejected": -1.2089464664459229, |
|
"loss": 1.5996, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.1281330585479736, |
|
"rewards/margins": 0.2897598147392273, |
|
"rewards/rejected": -2.4178929328918457, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.9884726224783862, |
|
"grad_norm": 21.489973290523885, |
|
"learning_rate": 5.061161567596061e-12, |
|
"logits/chosen": -2.085202217102051, |
|
"logits/rejected": -2.083030939102173, |
|
"logps/chosen": -1.0205986499786377, |
|
"logps/rejected": -1.12671959400177, |
|
"loss": 1.6554, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0411972999572754, |
|
"rewards/margins": 0.2122419774532318, |
|
"rewards/rejected": -2.25343918800354, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.9956772334293948, |
|
"grad_norm": 27.92987313520658, |
|
"learning_rate": 7.11746483889053e-13, |
|
"logits/chosen": -2.0238137245178223, |
|
"logits/rejected": -2.018078327178955, |
|
"logps/chosen": -1.0771089792251587, |
|
"logps/rejected": -1.1666157245635986, |
|
"loss": 1.6928, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.1542179584503174, |
|
"rewards/margins": 0.17901378870010376, |
|
"rewards/rejected": -2.3332314491271973, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 2776, |
|
"total_flos": 0.0, |
|
"train_loss": 1.6477044489696322, |
|
"train_runtime": 3633.5789, |
|
"train_samples_per_second": 12.22, |
|
"train_steps_per_second": 0.764 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2776, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|