|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9993240502906584, |
|
"eval_steps": 1000, |
|
"global_step": 462, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.127659574468085e-08, |
|
"logits/chosen": -2.4600229263305664, |
|
"logits/rejected": -2.442487955093384, |
|
"logps/chosen": -419.2090759277344, |
|
"logps/rejected": -388.2476501464844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/mix_margin": -1.1801719779214181e-07, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -2.3608736991882324, |
|
"logits/rejected": -2.3287594318389893, |
|
"logps/chosen": -318.4012451171875, |
|
"logps/rejected": -263.6573181152344, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.0007825422217138112, |
|
"rewards/confidence": -0.008881219662725925, |
|
"rewards/confidence_mean_diff": 0.008881219662725925, |
|
"rewards/confidence_moving_diff": 0.001681807334534824, |
|
"rewards/margins": 0.0013569907750934362, |
|
"rewards/mix_margin": 0.0007381609757430851, |
|
"rewards/real_percentage": 15.428571701049805, |
|
"rewards/rejected": -0.0005744485533796251, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -2.4250235557556152, |
|
"logits/rejected": -2.4130008220672607, |
|
"logps/chosen": -301.75958251953125, |
|
"logps/rejected": -263.33489990234375, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.012543246150016785, |
|
"rewards/confidence": -0.011264830827713013, |
|
"rewards/confidence_mean_diff": 0.011264830827713013, |
|
"rewards/confidence_moving_diff": 0.00037633898318745196, |
|
"rewards/margins": 0.007124939002096653, |
|
"rewards/mix_margin": 0.003652904648333788, |
|
"rewards/real_percentage": 7.199999809265137, |
|
"rewards/rejected": 0.005418307613581419, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -2.3625423908233643, |
|
"logits/rejected": -2.3323898315429688, |
|
"logps/chosen": -357.43328857421875, |
|
"logps/rejected": -314.91290283203125, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.04857476428151131, |
|
"rewards/confidence": -0.043907828629016876, |
|
"rewards/confidence_mean_diff": 0.043907828629016876, |
|
"rewards/confidence_moving_diff": 0.007038711104542017, |
|
"rewards/margins": 0.0557299479842186, |
|
"rewards/mix_margin": 0.028763342648744583, |
|
"rewards/real_percentage": 7.0, |
|
"rewards/rejected": -0.007155182305723429, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -2.328860282897949, |
|
"logits/rejected": -2.324850082397461, |
|
"logps/chosen": -328.1289978027344, |
|
"logps/rejected": -297.20880126953125, |
|
"loss": 0.658, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05925966426730156, |
|
"rewards/confidence": -0.1791446954011917, |
|
"rewards/confidence_mean_diff": 0.1791446954011917, |
|
"rewards/confidence_moving_diff": 0.016184808686375618, |
|
"rewards/margins": 0.08581773936748505, |
|
"rewards/mix_margin": 0.04467679560184479, |
|
"rewards/real_percentage": 7.0, |
|
"rewards/rejected": -0.1450774073600769, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.998710660154897e-07, |
|
"logits/chosen": -2.2127022743225098, |
|
"logits/rejected": -2.2161879539489746, |
|
"logps/chosen": -326.4324645996094, |
|
"logps/rejected": -318.18798828125, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1789505034685135, |
|
"rewards/confidence": -0.23482057452201843, |
|
"rewards/confidence_mean_diff": 0.23482057452201843, |
|
"rewards/confidence_moving_diff": -9.797290113056079e-05, |
|
"rewards/margins": 0.10209941864013672, |
|
"rewards/mix_margin": 0.05361374467611313, |
|
"rewards/real_percentage": 6.199999809265137, |
|
"rewards/rejected": -0.2810499370098114, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.975807556654536e-07, |
|
"logits/chosen": -2.08450984954834, |
|
"logits/rejected": -2.0751709938049316, |
|
"logps/chosen": -337.4300537109375, |
|
"logps/rejected": -329.66619873046875, |
|
"loss": 0.6214, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.40547019243240356, |
|
"rewards/confidence": -0.24584360420703888, |
|
"rewards/confidence_mean_diff": 0.24584360420703888, |
|
"rewards/confidence_moving_diff": -0.008521117269992828, |
|
"rewards/margins": 0.17091026902198792, |
|
"rewards/mix_margin": 0.09404207020998001, |
|
"rewards/real_percentage": 5.400000095367432, |
|
"rewards/rejected": -0.5763804912567139, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.92440347807533e-07, |
|
"logits/chosen": -1.7078907489776611, |
|
"logits/rejected": -1.717552900314331, |
|
"logps/chosen": -378.1355895996094, |
|
"logps/rejected": -374.5966796875, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8885644674301147, |
|
"rewards/confidence": -0.3056587278842926, |
|
"rewards/confidence_mean_diff": 0.3056587278842926, |
|
"rewards/confidence_moving_diff": 0.03268023580312729, |
|
"rewards/margins": 0.28360408544540405, |
|
"rewards/mix_margin": 0.13858327269554138, |
|
"rewards/real_percentage": 7.800000190734863, |
|
"rewards/rejected": -1.1721686124801636, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.844792862324257e-07, |
|
"logits/chosen": -1.7334696054458618, |
|
"logits/rejected": -1.7242892980575562, |
|
"logps/chosen": -395.7420959472656, |
|
"logps/rejected": -401.59197998046875, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.094656229019165, |
|
"rewards/confidence": -0.3173515498638153, |
|
"rewards/confidence_mean_diff": 0.3173515498638153, |
|
"rewards/confidence_moving_diff": -0.02396375872194767, |
|
"rewards/margins": 0.48389825224876404, |
|
"rewards/mix_margin": 0.21666650474071503, |
|
"rewards/real_percentage": 5.599999904632568, |
|
"rewards/rejected": -1.5785545110702515, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.737431711798862e-07, |
|
"logits/chosen": -1.9971452951431274, |
|
"logits/rejected": -1.9761130809783936, |
|
"logps/chosen": -391.64276123046875, |
|
"logps/rejected": -360.64886474609375, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9299066662788391, |
|
"rewards/confidence": -0.339626669883728, |
|
"rewards/confidence_mean_diff": 0.339626669883728, |
|
"rewards/confidence_moving_diff": 0.0005624383920803666, |
|
"rewards/margins": 0.3824290335178375, |
|
"rewards/mix_margin": 0.22833208739757538, |
|
"rewards/real_percentage": 6.599999904632568, |
|
"rewards/rejected": -1.312335729598999, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.602934981446803e-07, |
|
"logits/chosen": -2.2859737873077393, |
|
"logits/rejected": -2.2752556800842285, |
|
"logps/chosen": -375.16729736328125, |
|
"logps/rejected": -376.62249755859375, |
|
"loss": 0.5654, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9961857795715332, |
|
"rewards/confidence": -0.3247523903846741, |
|
"rewards/confidence_mean_diff": 0.3247523903846741, |
|
"rewards/confidence_moving_diff": 0.013505702838301659, |
|
"rewards/margins": 0.4200294017791748, |
|
"rewards/mix_margin": 0.20285817980766296, |
|
"rewards/real_percentage": 6.400000095367432, |
|
"rewards/rejected": -1.416215181350708, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.442073056359603e-07, |
|
"logits/chosen": -2.292510986328125, |
|
"logits/rejected": -2.240940570831299, |
|
"logps/chosen": -380.4697570800781, |
|
"logps/rejected": -363.1268005371094, |
|
"loss": 0.5619, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.0786364078521729, |
|
"rewards/confidence": -0.28994521498680115, |
|
"rewards/confidence_mean_diff": 0.28994521498680115, |
|
"rewards/confidence_moving_diff": -0.01229217927902937, |
|
"rewards/margins": 0.4588547348976135, |
|
"rewards/mix_margin": 0.2650415003299713, |
|
"rewards/real_percentage": 5.800000190734863, |
|
"rewards/rejected": -1.5374912023544312, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.255767339076622e-07, |
|
"logits/chosen": -2.248251438140869, |
|
"logits/rejected": -2.198671340942383, |
|
"logps/chosen": -444.0166931152344, |
|
"logps/rejected": -446.957275390625, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.5204365253448486, |
|
"rewards/confidence": -0.38544487953186035, |
|
"rewards/confidence_mean_diff": 0.38544487953186035, |
|
"rewards/confidence_moving_diff": 0.014522197656333447, |
|
"rewards/margins": 0.4478101134300232, |
|
"rewards/mix_margin": 0.22497253119945526, |
|
"rewards/real_percentage": 7.0, |
|
"rewards/rejected": -1.9682468175888062, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.045084971874737e-07, |
|
"logits/chosen": -2.147189140319824, |
|
"logits/rejected": -2.145460605621338, |
|
"logps/chosen": -368.4799499511719, |
|
"logps/rejected": -378.8326416015625, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2452112436294556, |
|
"rewards/confidence": -0.3624282479286194, |
|
"rewards/confidence_mean_diff": 0.3624282479286194, |
|
"rewards/confidence_moving_diff": 0.014734315685927868, |
|
"rewards/margins": 0.4154941439628601, |
|
"rewards/mix_margin": 0.19120317697525024, |
|
"rewards/real_percentage": 7.800000190734863, |
|
"rewards/rejected": -1.660705327987671, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.811232724274034e-07, |
|
"logits/chosen": -2.002075433731079, |
|
"logits/rejected": -2.005988597869873, |
|
"logps/chosen": -417.646728515625, |
|
"logps/rejected": -431.691162109375, |
|
"loss": 0.537, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.487528920173645, |
|
"rewards/confidence": -0.4086700975894928, |
|
"rewards/confidence_mean_diff": 0.4086700975894928, |
|
"rewards/confidence_moving_diff": -0.02756505273282528, |
|
"rewards/margins": 0.5225194096565247, |
|
"rewards/mix_margin": 0.2395971715450287, |
|
"rewards/real_percentage": 6.0, |
|
"rewards/rejected": -2.0100481510162354, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.555550080771272e-07, |
|
"logits/chosen": -2.0403356552124023, |
|
"logits/rejected": -2.0266761779785156, |
|
"logps/chosen": -392.81732177734375, |
|
"logps/rejected": -392.935791015625, |
|
"loss": 0.5576, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.3171683549880981, |
|
"rewards/confidence": -0.3989728093147278, |
|
"rewards/confidence_mean_diff": 0.3989728093147278, |
|
"rewards/confidence_moving_diff": 0.0009117841836996377, |
|
"rewards/margins": 0.5276567339897156, |
|
"rewards/mix_margin": 0.2687237858772278, |
|
"rewards/real_percentage": 6.0, |
|
"rewards/rejected": -1.8448251485824585, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.279501568393994e-07, |
|
"logits/chosen": -2.0831470489501953, |
|
"logits/rejected": -2.0378174781799316, |
|
"logps/chosen": -440.0665588378906, |
|
"logps/rejected": -452.62176513671875, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.973459243774414, |
|
"rewards/confidence": -0.37913259863853455, |
|
"rewards/confidence_mean_diff": 0.37913259863853455, |
|
"rewards/confidence_moving_diff": -0.007635933347046375, |
|
"rewards/margins": 0.44591131806373596, |
|
"rewards/mix_margin": 0.2109535038471222, |
|
"rewards/real_percentage": 6.400000095367432, |
|
"rewards/rejected": -2.419370412826538, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.984668368022335e-07, |
|
"logits/chosen": -2.058879852294922, |
|
"logits/rejected": -2.0460941791534424, |
|
"logps/chosen": -469.419921875, |
|
"logps/rejected": -465.22857666015625, |
|
"loss": 0.5595, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.6018226146697998, |
|
"rewards/confidence": -0.3371889293193817, |
|
"rewards/confidence_mean_diff": 0.3371889293193817, |
|
"rewards/confidence_moving_diff": 0.02232498861849308, |
|
"rewards/margins": 0.4381260275840759, |
|
"rewards/mix_margin": 0.2263106405735016, |
|
"rewards/real_percentage": 7.199999809265137, |
|
"rewards/rejected": -2.0399487018585205, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.672739257528134e-07, |
|
"logits/chosen": -2.1524975299835205, |
|
"logits/rejected": -2.1426620483398438, |
|
"logps/chosen": -395.23260498046875, |
|
"logps/rejected": -424.2964782714844, |
|
"loss": 0.5497, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5090861320495605, |
|
"rewards/confidence": -0.5779476165771484, |
|
"rewards/confidence_mean_diff": 0.5779476165771484, |
|
"rewards/confidence_moving_diff": -0.008263492956757545, |
|
"rewards/margins": 0.48516401648521423, |
|
"rewards/mix_margin": 0.2023618519306183, |
|
"rewards/real_percentage": 6.599999904632568, |
|
"rewards/rejected": -1.9942500591278076, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.345500938608219e-07, |
|
"logits/chosen": -2.2230193614959717, |
|
"logits/rejected": -2.2007393836975098, |
|
"logps/chosen": -421.42156982421875, |
|
"logps/rejected": -427.07110595703125, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.7702022790908813, |
|
"rewards/confidence": -0.30366313457489014, |
|
"rewards/confidence_mean_diff": 0.30366313457489014, |
|
"rewards/confidence_moving_diff": -0.005598017480224371, |
|
"rewards/margins": 0.6354817152023315, |
|
"rewards/mix_margin": 0.295926570892334, |
|
"rewards/real_percentage": 6.599999904632568, |
|
"rewards/rejected": -2.405683755874634, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.004827802718889e-07, |
|
"logits/chosen": -2.1584935188293457, |
|
"logits/rejected": -2.1630496978759766, |
|
"logps/chosen": -462.48199462890625, |
|
"logps/rejected": -469.06536865234375, |
|
"loss": 0.5574, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9269781112670898, |
|
"rewards/confidence": -0.3838357627391815, |
|
"rewards/confidence_mean_diff": 0.3838357627391815, |
|
"rewards/confidence_moving_diff": -0.017508838325738907, |
|
"rewards/margins": 0.6354564428329468, |
|
"rewards/mix_margin": 0.31543755531311035, |
|
"rewards/real_percentage": 5.0, |
|
"rewards/rejected": -2.562434434890747, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.652671194731395e-07, |
|
"logits/chosen": -2.107018232345581, |
|
"logits/rejected": -2.055001974105835, |
|
"logps/chosen": -462.31072998046875, |
|
"logps/rejected": -467.524658203125, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9643453359603882, |
|
"rewards/confidence": -0.46903911232948303, |
|
"rewards/confidence_mean_diff": 0.46903911232948303, |
|
"rewards/confidence_moving_diff": 0.035244446247816086, |
|
"rewards/margins": 0.6127676963806152, |
|
"rewards/mix_margin": 0.2926762104034424, |
|
"rewards/real_percentage": 6.800000190734863, |
|
"rewards/rejected": -2.577113151550293, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.291048235805233e-07, |
|
"logits/chosen": -2.1444382667541504, |
|
"logits/rejected": -2.1225123405456543, |
|
"logps/chosen": -446.4590759277344, |
|
"logps/rejected": -433.69573974609375, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3874393701553345, |
|
"rewards/confidence": -0.41097918152809143, |
|
"rewards/confidence_mean_diff": 0.41097918152809143, |
|
"rewards/confidence_moving_diff": -0.002999979304149747, |
|
"rewards/margins": 0.44998854398727417, |
|
"rewards/mix_margin": 0.27227410674095154, |
|
"rewards/real_percentage": 6.0, |
|
"rewards/rejected": -1.837428092956543, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.922030269500808e-07, |
|
"logits/chosen": -2.0928385257720947, |
|
"logits/rejected": -2.0588412284851074, |
|
"logps/chosen": -425.83514404296875, |
|
"logps/rejected": -431.23382568359375, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7153390645980835, |
|
"rewards/confidence": -0.31595852971076965, |
|
"rewards/confidence_mean_diff": 0.31595852971076965, |
|
"rewards/confidence_moving_diff": -0.013669434003531933, |
|
"rewards/margins": 0.5658494234085083, |
|
"rewards/mix_margin": 0.25558221340179443, |
|
"rewards/real_percentage": 6.0, |
|
"rewards/rejected": -2.281188488006592, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.547730997311105e-07, |
|
"logits/chosen": -1.9946399927139282, |
|
"logits/rejected": -1.9523468017578125, |
|
"logps/chosen": -428.6726989746094, |
|
"logps/rejected": -438.1825256347656, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.0739216804504395, |
|
"rewards/confidence": -0.345465749502182, |
|
"rewards/confidence_mean_diff": 0.345465749502182, |
|
"rewards/confidence_moving_diff": 0.006545326206833124, |
|
"rewards/margins": 0.6022626161575317, |
|
"rewards/mix_margin": 0.3206036686897278, |
|
"rewards/real_percentage": 5.400000095367432, |
|
"rewards/rejected": -2.6761844158172607, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.17029437157094e-07, |
|
"logits/chosen": -2.0504579544067383, |
|
"logits/rejected": -2.0380096435546875, |
|
"logps/chosen": -455.2330017089844, |
|
"logps/rejected": -487.70477294921875, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.9092988967895508, |
|
"rewards/confidence": -0.521675169467926, |
|
"rewards/confidence_mean_diff": 0.521675169467926, |
|
"rewards/confidence_moving_diff": 0.03468245267868042, |
|
"rewards/margins": 0.6157360672950745, |
|
"rewards/mix_margin": 0.2263031005859375, |
|
"rewards/real_percentage": 6.800000190734863, |
|
"rewards/rejected": -2.5250351428985596, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.791882315092155e-07, |
|
"logits/chosen": -2.040631055831909, |
|
"logits/rejected": -2.0134525299072266, |
|
"logps/chosen": -465.2564392089844, |
|
"logps/rejected": -469.700927734375, |
|
"loss": 0.5102, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.8632171154022217, |
|
"rewards/confidence": -0.4674126207828522, |
|
"rewards/confidence_mean_diff": 0.4674126207828522, |
|
"rewards/confidence_moving_diff": -0.04609960317611694, |
|
"rewards/margins": 0.6828121542930603, |
|
"rewards/mix_margin": 0.34053856134414673, |
|
"rewards/real_percentage": 6.0, |
|
"rewards/rejected": -2.546029567718506, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.414662337865529e-07, |
|
"logits/chosen": -2.028301477432251, |
|
"logits/rejected": -2.0357513427734375, |
|
"logps/chosen": -431.650390625, |
|
"logps/rejected": -428.67626953125, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9017683267593384, |
|
"rewards/confidence": -0.36343130469322205, |
|
"rewards/confidence_mean_diff": 0.36343130469322205, |
|
"rewards/confidence_moving_diff": 0.005257821176201105, |
|
"rewards/margins": 0.6798229217529297, |
|
"rewards/mix_margin": 0.36580324172973633, |
|
"rewards/real_percentage": 6.400000095367432, |
|
"rewards/rejected": -2.5815913677215576, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.04079512175984e-07, |
|
"logits/chosen": -2.102811336517334, |
|
"logits/rejected": -2.081491708755493, |
|
"logps/chosen": -445.09539794921875, |
|
"logps/rejected": -462.83721923828125, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.2766060829162598, |
|
"rewards/confidence": -0.33939874172210693, |
|
"rewards/confidence_mean_diff": 0.33939874172210693, |
|
"rewards/confidence_moving_diff": 0.027284782379865646, |
|
"rewards/margins": 0.4541402757167816, |
|
"rewards/mix_margin": 0.2212882786989212, |
|
"rewards/real_percentage": 7.400000095367432, |
|
"rewards/rejected": -2.730746030807495, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.672422144331785e-07, |
|
"logits/chosen": -2.1012184619903564, |
|
"logits/rejected": -2.076214075088501, |
|
"logps/chosen": -435.23114013671875, |
|
"logps/rejected": -446.3954162597656, |
|
"loss": 0.5413, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.6879991292953491, |
|
"rewards/confidence": -0.48170310258865356, |
|
"rewards/confidence_mean_diff": 0.48170310258865356, |
|
"rewards/confidence_moving_diff": -0.0009890676010400057, |
|
"rewards/margins": 0.6249912977218628, |
|
"rewards/mix_margin": 0.30173999071121216, |
|
"rewards/real_percentage": 6.199999809265137, |
|
"rewards/rejected": -2.312990665435791, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.311653412636468e-07, |
|
"logits/chosen": -2.0913848876953125, |
|
"logits/rejected": -2.060335874557495, |
|
"logps/chosen": -439.56683349609375, |
|
"logps/rejected": -458.71905517578125, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9180190563201904, |
|
"rewards/confidence": -0.5002374649047852, |
|
"rewards/confidence_mean_diff": 0.5002374649047852, |
|
"rewards/confidence_moving_diff": -0.026549097150564194, |
|
"rewards/margins": 0.6147478818893433, |
|
"rewards/mix_margin": 0.26649755239486694, |
|
"rewards/real_percentage": 5.800000190734863, |
|
"rewards/rejected": -2.532766819000244, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.9605553772980796e-07, |
|
"logits/chosen": -2.05464243888855, |
|
"logits/rejected": -2.0151495933532715, |
|
"logps/chosen": -452.279296875, |
|
"logps/rejected": -461.88897705078125, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.083768129348755, |
|
"rewards/confidence": -0.394141286611557, |
|
"rewards/confidence_mean_diff": 0.394141286611557, |
|
"rewards/confidence_moving_diff": 0.00520284753292799, |
|
"rewards/margins": 0.5840972661972046, |
|
"rewards/mix_margin": 0.2817578911781311, |
|
"rewards/real_percentage": 6.0, |
|
"rewards/rejected": -2.66786527633667, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.6211390960678407e-07, |
|
"logits/chosen": -2.120527505874634, |
|
"logits/rejected": -2.0766115188598633, |
|
"logps/chosen": -476.22369384765625, |
|
"logps/rejected": -501.09716796875, |
|
"loss": 0.5131, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.364851474761963, |
|
"rewards/confidence": -0.44711294770240784, |
|
"rewards/confidence_mean_diff": 0.44711294770240784, |
|
"rewards/confidence_moving_diff": 0.000249391800025478, |
|
"rewards/margins": 0.6894611716270447, |
|
"rewards/mix_margin": 0.2901640832424164, |
|
"rewards/real_percentage": 5.599999904632568, |
|
"rewards/rejected": -3.0543129444122314, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.29534871466734e-07, |
|
"logits/chosen": -2.0330708026885986, |
|
"logits/rejected": -2.0034308433532715, |
|
"logps/chosen": -484.76837158203125, |
|
"logps/rejected": -491.1351013183594, |
|
"loss": 0.5358, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.210871934890747, |
|
"rewards/confidence": -0.45081382989883423, |
|
"rewards/confidence_mean_diff": 0.45081382989883423, |
|
"rewards/confidence_moving_diff": -0.0033632635604590178, |
|
"rewards/margins": 0.6040040254592896, |
|
"rewards/mix_margin": 0.27995753288269043, |
|
"rewards/real_percentage": 5.599999904632568, |
|
"rewards/rejected": -2.814876079559326, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.9850503308978828e-07, |
|
"logits/chosen": -2.0655150413513184, |
|
"logits/rejected": -2.0272319316864014, |
|
"logps/chosen": -479.78338623046875, |
|
"logps/rejected": -496.1671447753906, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.3855416774749756, |
|
"rewards/confidence": -0.4124049246311188, |
|
"rewards/confidence_mean_diff": 0.4124049246311188, |
|
"rewards/confidence_moving_diff": 0.016355862841010094, |
|
"rewards/margins": 0.6651355624198914, |
|
"rewards/mix_margin": 0.3446359634399414, |
|
"rewards/real_percentage": 6.400000095367432, |
|
"rewards/rejected": -3.0506770610809326, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6920213058013022e-07, |
|
"logits/chosen": -1.9955543279647827, |
|
"logits/rejected": -1.9886165857315063, |
|
"logps/chosen": -456.9586486816406, |
|
"logps/rejected": -483.20550537109375, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.1107418537139893, |
|
"rewards/confidence": -0.5677846670150757, |
|
"rewards/confidence_mean_diff": 0.5677846670150757, |
|
"rewards/confidence_moving_diff": 0.0054519325494766235, |
|
"rewards/margins": 0.7480586171150208, |
|
"rewards/mix_margin": 0.2732301950454712, |
|
"rewards/real_percentage": 6.400000095367432, |
|
"rewards/rejected": -2.8588004112243652, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4179400830968412e-07, |
|
"logits/chosen": -2.0179452896118164, |
|
"logits/rejected": -2.005185842514038, |
|
"logps/chosen": -468.8108825683594, |
|
"logps/rejected": -483.4927673339844, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.018651247024536, |
|
"rewards/confidence": -0.41795068979263306, |
|
"rewards/confidence_mean_diff": 0.41795068979263306, |
|
"rewards/confidence_moving_diff": -0.030550379306077957, |
|
"rewards/margins": 0.7332804799079895, |
|
"rewards/mix_margin": 0.33876484632492065, |
|
"rewards/real_percentage": 5.0, |
|
"rewards/rejected": -2.75193190574646, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1643765752075468e-07, |
|
"logits/chosen": -2.078735828399658, |
|
"logits/rejected": -2.0652003288269043, |
|
"logps/chosen": -422.59869384765625, |
|
"logps/rejected": -423.21875, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.1171858310699463, |
|
"rewards/confidence": -0.4289500117301941, |
|
"rewards/confidence_mean_diff": 0.4289500117301941, |
|
"rewards/confidence_moving_diff": 0.009287357330322266, |
|
"rewards/margins": 0.5101779103279114, |
|
"rewards/mix_margin": 0.2586424648761749, |
|
"rewards/real_percentage": 6.800000190734863, |
|
"rewards/rejected": -2.627363681793213, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.327831709440792e-08, |
|
"logits/chosen": -1.9588645696640015, |
|
"logits/rejected": -1.9197273254394531, |
|
"logps/chosen": -449.98785400390625, |
|
"logps/rejected": -502.673583984375, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.004117250442505, |
|
"rewards/confidence": -0.36164262890815735, |
|
"rewards/confidence_mean_diff": 0.36164262890815735, |
|
"rewards/confidence_moving_diff": -0.014251338317990303, |
|
"rewards/margins": 0.7769009470939636, |
|
"rewards/mix_margin": 0.3241749405860901, |
|
"rewards/real_percentage": 6.0, |
|
"rewards/rejected": -2.781017780303955, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.244864163531162e-08, |
|
"logits/chosen": -2.064495325088501, |
|
"logits/rejected": -2.0268824100494385, |
|
"logps/chosen": -422.8828125, |
|
"logps/rejected": -445.21575927734375, |
|
"loss": 0.5537, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9303261041641235, |
|
"rewards/confidence": -0.4756762981414795, |
|
"rewards/confidence_mean_diff": 0.4756762981414795, |
|
"rewards/confidence_moving_diff": -0.0012016535038128495, |
|
"rewards/margins": 0.5886819362640381, |
|
"rewards/mix_margin": 0.27653414011001587, |
|
"rewards/real_percentage": 7.0, |
|
"rewards/rejected": -2.519007921218872, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.4067941638174795e-08, |
|
"logits/chosen": -2.077833414077759, |
|
"logits/rejected": -2.0256028175354004, |
|
"logps/chosen": -427.36444091796875, |
|
"logps/rejected": -447.19708251953125, |
|
"loss": 0.5163, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.0830533504486084, |
|
"rewards/confidence": -0.3510534167289734, |
|
"rewards/confidence_mean_diff": 0.3510534167289734, |
|
"rewards/confidence_moving_diff": 0.025139298290014267, |
|
"rewards/margins": 0.7282416224479675, |
|
"rewards/mix_margin": 0.3104473054409027, |
|
"rewards/real_percentage": 7.0, |
|
"rewards/rejected": -2.8112950325012207, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.824150008803767e-08, |
|
"logits/chosen": -2.0504865646362305, |
|
"logits/rejected": -2.014496326446533, |
|
"logps/chosen": -397.7694091796875, |
|
"logps/rejected": -405.7162170410156, |
|
"loss": 0.5226, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7523311376571655, |
|
"rewards/confidence": -0.31372588872909546, |
|
"rewards/confidence_mean_diff": 0.31372588872909546, |
|
"rewards/confidence_moving_diff": -0.014554053544998169, |
|
"rewards/margins": 0.6601498126983643, |
|
"rewards/mix_margin": 0.33540448546409607, |
|
"rewards/real_percentage": 6.0, |
|
"rewards/rejected": -2.4124810695648193, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.5059969408867844e-08, |
|
"logits/chosen": -2.0050809383392334, |
|
"logits/rejected": -1.9919992685317993, |
|
"logps/chosen": -398.9284362792969, |
|
"logps/rejected": -417.0933532714844, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.4770046472549438, |
|
"rewards/confidence": -0.3290501832962036, |
|
"rewards/confidence_mean_diff": 0.3290501832962036, |
|
"rewards/confidence_moving_diff": 0.02498219534754753, |
|
"rewards/margins": 0.6767237186431885, |
|
"rewards/mix_margin": 0.34894901514053345, |
|
"rewards/real_percentage": 6.400000095367432, |
|
"rewards/rejected": -2.1537282466888428, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4598852214685486e-08, |
|
"logits/chosen": -2.111842632293701, |
|
"logits/rejected": -2.0859241485595703, |
|
"logps/chosen": -419.46783447265625, |
|
"logps/rejected": -449.5113220214844, |
|
"loss": 0.5041, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.6683928966522217, |
|
"rewards/confidence": -0.38691821694374084, |
|
"rewards/confidence_mean_diff": 0.38691821694374084, |
|
"rewards/confidence_moving_diff": -0.019811248406767845, |
|
"rewards/margins": 0.7039911150932312, |
|
"rewards/mix_margin": 0.30413728952407837, |
|
"rewards/real_percentage": 5.800000190734863, |
|
"rewards/rejected": -2.3723835945129395, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.918068837427127e-09, |
|
"logits/chosen": -2.06069016456604, |
|
"logits/rejected": -2.0733203887939453, |
|
"logps/chosen": -453.7708435058594, |
|
"logps/rejected": -459.8556213378906, |
|
"loss": 0.5331, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9116617441177368, |
|
"rewards/confidence": -0.3324928283691406, |
|
"rewards/confidence_mean_diff": 0.3324928283691406, |
|
"rewards/confidence_moving_diff": 0.008450254797935486, |
|
"rewards/margins": 0.7425110936164856, |
|
"rewards/mix_margin": 0.3466404378414154, |
|
"rewards/real_percentage": 7.0, |
|
"rewards/rejected": -2.654172897338867, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.0616141087114737e-09, |
|
"logits/chosen": -2.075761079788208, |
|
"logits/rejected": -2.0503153800964355, |
|
"logps/chosen": -475.33038330078125, |
|
"logps/rejected": -491.11932373046875, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.7454744577407837, |
|
"rewards/confidence": -0.26364636421203613, |
|
"rewards/confidence_mean_diff": 0.26364636421203613, |
|
"rewards/confidence_moving_diff": -0.0049579874612390995, |
|
"rewards/margins": 0.8323339223861694, |
|
"rewards/mix_margin": 0.3889670968055725, |
|
"rewards/real_percentage": 6.0, |
|
"rewards/rejected": -2.577807903289795, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.7305361427451014e-11, |
|
"logits/chosen": -2.111032724380493, |
|
"logits/rejected": -2.093623638153076, |
|
"logps/chosen": -483.09967041015625, |
|
"logps/rejected": -487.7115173339844, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.091290235519409, |
|
"rewards/confidence": -0.47068697214126587, |
|
"rewards/confidence_mean_diff": 0.47068697214126587, |
|
"rewards/confidence_moving_diff": 0.013580495491623878, |
|
"rewards/margins": 0.5157801508903503, |
|
"rewards/mix_margin": 0.26322659850120544, |
|
"rewards/real_percentage": 5.800000190734863, |
|
"rewards/rejected": -2.6070706844329834, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 462, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5550967540059771, |
|
"train_runtime": 33981.529, |
|
"train_samples_per_second": 0.871, |
|
"train_steps_per_second": 0.014 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 462, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|