|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 1540, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.8663302659988403, |
|
"logits/rejected": -1.870653748512268, |
|
"logps/chosen": -36.98862075805664, |
|
"logps/rejected": -33.65410232543945, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.00017976858362089843, |
|
"rewards/margins": 0.0004200442635919899, |
|
"rewards/rejected": -0.00024027563631534576, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.997056245803833, |
|
"logits/rejected": -1.9996883869171143, |
|
"logps/chosen": -29.644180297851562, |
|
"logps/rejected": -29.042306900024414, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -1.9853896446875297e-05, |
|
"rewards/margins": -0.00023122904531192034, |
|
"rewards/rejected": 0.00021137515432201326, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.9208399057388306, |
|
"logits/rejected": -1.9181444644927979, |
|
"logps/chosen": -31.4141902923584, |
|
"logps/rejected": -33.234039306640625, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 1.908373087644577e-05, |
|
"rewards/margins": 0.0001748828508425504, |
|
"rewards/rejected": -0.00015579909086227417, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.016510009765625, |
|
"logits/rejected": -2.007784843444824, |
|
"logps/chosen": -32.554359436035156, |
|
"logps/rejected": -32.493995666503906, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.00022449034440796822, |
|
"rewards/margins": 3.7771409552078694e-05, |
|
"rewards/rejected": 0.00018671892757993191, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.8620290756225586, |
|
"logits/rejected": -1.851264238357544, |
|
"logps/chosen": -33.51964569091797, |
|
"logps/rejected": -35.407535552978516, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.000378346536308527, |
|
"rewards/margins": -6.972800474613905e-05, |
|
"rewards/rejected": 0.0004480745701584965, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.941019058227539, |
|
"logits/rejected": -1.9429725408554077, |
|
"logps/chosen": -32.50640869140625, |
|
"logps/rejected": -33.16156768798828, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0009377357782796025, |
|
"rewards/margins": 0.000839641026686877, |
|
"rewards/rejected": 9.809464972931892e-05, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.071539878845215, |
|
"logits/rejected": -2.0764715671539307, |
|
"logps/chosen": -33.93183898925781, |
|
"logps/rejected": -36.525550842285156, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.000566466711461544, |
|
"rewards/margins": 0.0001332084066234529, |
|
"rewards/rejected": 0.0004332582466304302, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.9310013055801392, |
|
"logits/rejected": -1.9341026544570923, |
|
"logps/chosen": -34.225975036621094, |
|
"logps/rejected": -34.53061294555664, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0020183573942631483, |
|
"rewards/margins": 0.0014004094991832972, |
|
"rewards/rejected": 0.0006179477786645293, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.9398448467254639, |
|
"logits/rejected": -1.9443466663360596, |
|
"logps/chosen": -32.252197265625, |
|
"logps/rejected": -32.24930953979492, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0021522401366382837, |
|
"rewards/margins": 0.0010562599636614323, |
|
"rewards/rejected": 0.0010959801729768515, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.037635564804077, |
|
"logits/rejected": -2.0356411933898926, |
|
"logps/chosen": -31.99556541442871, |
|
"logps/rejected": -31.139856338500977, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0024526813067495823, |
|
"rewards/margins": 0.0011891307076439261, |
|
"rewards/rejected": 0.0012635505991056561, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.232027292251587, |
|
"eval_logits/rejected": -2.227186441421509, |
|
"eval_logps/chosen": -33.85300064086914, |
|
"eval_logps/rejected": -37.352054595947266, |
|
"eval_loss": 0.49996307492256165, |
|
"eval_rewards/accuracies": 0.5182723999023438, |
|
"eval_rewards/chosen": 0.0018155159195885062, |
|
"eval_rewards/margins": 0.00016987840353976935, |
|
"eval_rewards/rejected": 0.0016456374432891607, |
|
"eval_runtime": 145.752, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.9933750629425049, |
|
"logits/rejected": -1.9910228252410889, |
|
"logps/chosen": -32.92595291137695, |
|
"logps/rejected": -33.842830657958984, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0031786723993718624, |
|
"rewards/margins": 0.0013132576132193208, |
|
"rewards/rejected": 0.0018654146697372198, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.003373861312866, |
|
"logits/rejected": -1.9950469732284546, |
|
"logps/chosen": -32.12664031982422, |
|
"logps/rejected": -31.956974029541016, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0031826242338865995, |
|
"rewards/margins": 0.0011372944572940469, |
|
"rewards/rejected": 0.002045330125838518, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.0322792530059814, |
|
"logits/rejected": -2.024336099624634, |
|
"logps/chosen": -30.118215560913086, |
|
"logps/rejected": -31.88967514038086, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0035750004462897778, |
|
"rewards/margins": 0.0019416653085500002, |
|
"rewards/rejected": 0.0016333358362317085, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.9619420766830444, |
|
"logits/rejected": -1.9721254110336304, |
|
"logps/chosen": -31.056041717529297, |
|
"logps/rejected": -32.40135955810547, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0036362011451274157, |
|
"rewards/margins": 0.002301006345078349, |
|
"rewards/rejected": 0.0013351945672184229, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.873915433883667, |
|
"logits/rejected": -1.8750746250152588, |
|
"logps/chosen": -33.66141891479492, |
|
"logps/rejected": -34.53316116333008, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.005312725435942411, |
|
"rewards/margins": 0.003071808721870184, |
|
"rewards/rejected": 0.002240917179733515, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.9251092672348022, |
|
"logits/rejected": -1.921754240989685, |
|
"logps/chosen": -35.78556823730469, |
|
"logps/rejected": -32.478919982910156, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0038611083291471004, |
|
"rewards/margins": 0.0014227699721232057, |
|
"rewards/rejected": 0.0024383387062698603, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.0260913372039795, |
|
"logits/rejected": -2.0187911987304688, |
|
"logps/chosen": -33.21813201904297, |
|
"logps/rejected": -31.20585060119629, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.005797281861305237, |
|
"rewards/margins": 0.00433726841583848, |
|
"rewards/rejected": 0.0014600132126361132, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.0327439308166504, |
|
"logits/rejected": -2.0379390716552734, |
|
"logps/chosen": -31.954986572265625, |
|
"logps/rejected": -32.14731979370117, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.00589799415320158, |
|
"rewards/margins": 0.0025197656359523535, |
|
"rewards/rejected": 0.0033782287500798702, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.0335724353790283, |
|
"logits/rejected": -2.0308480262756348, |
|
"logps/chosen": -31.060842514038086, |
|
"logps/rejected": -31.11257553100586, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.004281100817024708, |
|
"rewards/margins": 0.002224702388048172, |
|
"rewards/rejected": 0.0020563979633152485, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.9044897556304932, |
|
"logits/rejected": -1.9091441631317139, |
|
"logps/chosen": -31.081249237060547, |
|
"logps/rejected": -32.595829010009766, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0052503314800560474, |
|
"rewards/margins": 0.0032992898486554623, |
|
"rewards/rejected": 0.0019510419806465507, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.2288246154785156, |
|
"eval_logits/rejected": -2.2240161895751953, |
|
"eval_logps/chosen": -33.74216842651367, |
|
"eval_logps/rejected": -37.281192779541016, |
|
"eval_loss": 0.49986347556114197, |
|
"eval_rewards/accuracies": 0.5747508406639099, |
|
"eval_rewards/chosen": 0.0029238576535135508, |
|
"eval_rewards/margins": 0.0005695598665624857, |
|
"eval_rewards/rejected": 0.002354297786951065, |
|
"eval_runtime": 145.8032, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.016956090927124, |
|
"logits/rejected": -2.0275344848632812, |
|
"logps/chosen": -31.490520477294922, |
|
"logps/rejected": -33.689552307128906, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.004807817749679089, |
|
"rewards/margins": 0.003076353808864951, |
|
"rewards/rejected": 0.0017314634751528502, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.9092109203338623, |
|
"logits/rejected": -1.9238770008087158, |
|
"logps/chosen": -29.577035903930664, |
|
"logps/rejected": -31.389028549194336, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.00534503348171711, |
|
"rewards/margins": 0.003653537482023239, |
|
"rewards/rejected": 0.0016914959996938705, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9660460948944092, |
|
"logits/rejected": -1.9700311422348022, |
|
"logps/chosen": -32.787933349609375, |
|
"logps/rejected": -31.389507293701172, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.006295348517596722, |
|
"rewards/margins": 0.004697396419942379, |
|
"rewards/rejected": 0.001597951864823699, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.9643852710723877, |
|
"logits/rejected": -1.9426368474960327, |
|
"logps/chosen": -33.608482360839844, |
|
"logps/rejected": -34.86387252807617, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.005452706944197416, |
|
"rewards/margins": 0.004583484493196011, |
|
"rewards/rejected": 0.0008692230330780149, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.006474256515503, |
|
"logits/rejected": -2.003183364868164, |
|
"logps/chosen": -32.441749572753906, |
|
"logps/rejected": -35.984519958496094, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.004833144135773182, |
|
"rewards/margins": 0.00242913281545043, |
|
"rewards/rejected": 0.002404011320322752, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.8745994567871094, |
|
"logits/rejected": -1.8721377849578857, |
|
"logps/chosen": -33.70234298706055, |
|
"logps/rejected": -35.24811553955078, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.004860556218773127, |
|
"rewards/margins": 0.002295448211953044, |
|
"rewards/rejected": 0.0025651075411587954, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.8601783514022827, |
|
"logits/rejected": -1.8576805591583252, |
|
"logps/chosen": -33.903480529785156, |
|
"logps/rejected": -31.573394775390625, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0049412003718316555, |
|
"rewards/margins": 0.0029618421103805304, |
|
"rewards/rejected": 0.001979358261451125, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.963313341140747, |
|
"logits/rejected": -1.9528875350952148, |
|
"logps/chosen": -34.740501403808594, |
|
"logps/rejected": -31.602115631103516, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.006161376368254423, |
|
"rewards/margins": 0.0037725958973169327, |
|
"rewards/rejected": 0.002388780238106847, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.058793544769287, |
|
"logits/rejected": -2.043962240219116, |
|
"logps/chosen": -30.4116268157959, |
|
"logps/rejected": -32.341819763183594, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.005124981049448252, |
|
"rewards/margins": 0.0020247932989150286, |
|
"rewards/rejected": 0.003100187750533223, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.9307619333267212, |
|
"logits/rejected": -1.9283145666122437, |
|
"logps/chosen": -32.04631042480469, |
|
"logps/rejected": -30.6721134185791, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.008808004669845104, |
|
"rewards/margins": 0.007361026015132666, |
|
"rewards/rejected": 0.0014469798188656569, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.2289085388183594, |
|
"eval_logits/rejected": -2.2240960597991943, |
|
"eval_logps/chosen": -33.75447463989258, |
|
"eval_logps/rejected": -37.274105072021484, |
|
"eval_loss": 0.499908983707428, |
|
"eval_rewards/accuracies": 0.5041528344154358, |
|
"eval_rewards/chosen": 0.0028007798828184605, |
|
"eval_rewards/margins": 0.00037563726073130965, |
|
"eval_rewards/rejected": 0.00242514256387949, |
|
"eval_runtime": 145.6422, |
|
"eval_samples_per_second": 2.355, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.84533120650964e-06, |
|
"logits/chosen": -1.914014458656311, |
|
"logits/rejected": -1.9108896255493164, |
|
"logps/chosen": -31.016780853271484, |
|
"logps/rejected": -33.5390739440918, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.005865688901394606, |
|
"rewards/margins": 0.0038349279202520847, |
|
"rewards/rejected": 0.002030761446803808, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.825108134172131e-06, |
|
"logits/chosen": -1.9622209072113037, |
|
"logits/rejected": -1.9501073360443115, |
|
"logps/chosen": -33.98822784423828, |
|
"logps/rejected": -33.42522048950195, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.0060784295201301575, |
|
"rewards/margins": 0.00494221830740571, |
|
"rewards/rejected": 0.0011362109798938036, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.80369052967602e-06, |
|
"logits/chosen": -1.997842788696289, |
|
"logits/rejected": -1.9965225458145142, |
|
"logps/chosen": -32.7388801574707, |
|
"logps/rejected": -32.22772216796875, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.00746917212381959, |
|
"rewards/margins": 0.0049505168572068214, |
|
"rewards/rejected": 0.002518654800951481, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.781089396387968e-06, |
|
"logits/chosen": -2.0849790573120117, |
|
"logits/rejected": -2.06939435005188, |
|
"logps/chosen": -33.35835266113281, |
|
"logps/rejected": -32.73654556274414, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.008210408501327038, |
|
"rewards/margins": 0.004395507741719484, |
|
"rewards/rejected": 0.0038149021565914154, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits/chosen": -1.9578001499176025, |
|
"logits/rejected": -1.9570014476776123, |
|
"logps/chosen": -32.38352966308594, |
|
"logps/rejected": -32.16341781616211, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.008985674008727074, |
|
"rewards/margins": 0.005968388635665178, |
|
"rewards/rejected": 0.0030172846745699644, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.73238359114687e-06, |
|
"logits/chosen": -1.9111387729644775, |
|
"logits/rejected": -1.9213663339614868, |
|
"logps/chosen": -31.330036163330078, |
|
"logps/rejected": -34.964256286621094, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.008746545761823654, |
|
"rewards/margins": 0.005501560866832733, |
|
"rewards/rejected": 0.003244984894990921, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.706303941965804e-06, |
|
"logits/chosen": -2.0449702739715576, |
|
"logits/rejected": -2.0385282039642334, |
|
"logps/chosen": -32.85245132446289, |
|
"logps/rejected": -28.954187393188477, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0079951835796237, |
|
"rewards/margins": 0.005272808950394392, |
|
"rewards/rejected": 0.002722373930737376, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.679090796681225e-06, |
|
"logits/chosen": -1.901015281677246, |
|
"logits/rejected": -1.9031999111175537, |
|
"logps/chosen": -33.20323944091797, |
|
"logps/rejected": -30.677053451538086, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.010358055122196674, |
|
"rewards/margins": 0.00840230192989111, |
|
"rewards/rejected": 0.001955752493813634, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.650758136138454e-06, |
|
"logits/chosen": -1.9289333820343018, |
|
"logits/rejected": -1.9276466369628906, |
|
"logps/chosen": -33.32128143310547, |
|
"logps/rejected": -35.63185119628906, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": 0.01063510961830616, |
|
"rewards/margins": 0.010393550619482994, |
|
"rewards/rejected": 0.00024155918799806386, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.621320516337559e-06, |
|
"logits/chosen": -1.863186240196228, |
|
"logits/rejected": -1.8549854755401611, |
|
"logps/chosen": -30.498249053955078, |
|
"logps/rejected": -36.088096618652344, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.012757278978824615, |
|
"rewards/margins": 0.014232242479920387, |
|
"rewards/rejected": -0.001474961405619979, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_logits/chosen": -2.2094738483428955, |
|
"eval_logits/rejected": -2.2046518325805664, |
|
"eval_logps/chosen": -33.672157287597656, |
|
"eval_logps/rejected": -37.215171813964844, |
|
"eval_loss": 0.4998500943183899, |
|
"eval_rewards/accuracies": 0.5622923374176025, |
|
"eval_rewards/chosen": 0.0036239090841263533, |
|
"eval_rewards/margins": 0.0006094546988606453, |
|
"eval_rewards/rejected": 0.003014454385265708, |
|
"eval_runtime": 146.2447, |
|
"eval_samples_per_second": 2.345, |
|
"eval_steps_per_second": 0.294, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.590793060955158e-06, |
|
"logits/chosen": -2.0367393493652344, |
|
"logits/rejected": -2.0395472049713135, |
|
"logps/chosen": -31.686859130859375, |
|
"logps/rejected": -34.91094970703125, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.013670777902007103, |
|
"rewards/margins": 0.0148518281057477, |
|
"rewards/rejected": -0.0011810490395873785, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.559191453574582e-06, |
|
"logits/chosen": -1.8766273260116577, |
|
"logits/rejected": -1.8752492666244507, |
|
"logps/chosen": -27.93256187438965, |
|
"logps/rejected": -32.38922882080078, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.011349949985742569, |
|
"rewards/margins": 0.012065614573657513, |
|
"rewards/rejected": -0.0007156648789532483, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.52653192962838e-06, |
|
"logits/chosen": -1.834745168685913, |
|
"logits/rejected": -1.8277431726455688, |
|
"logps/chosen": -32.58226013183594, |
|
"logps/rejected": -34.0352783203125, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.014918235130608082, |
|
"rewards/margins": 0.012818296439945698, |
|
"rewards/rejected": 0.0020999389234930277, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.492831268057307e-06, |
|
"logits/chosen": -2.005305767059326, |
|
"logits/rejected": -2.0002236366271973, |
|
"logps/chosen": -30.26675033569336, |
|
"logps/rejected": -32.066688537597656, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.015165433287620544, |
|
"rewards/margins": 0.01615620031952858, |
|
"rewards/rejected": -0.0009907669154927135, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.458106782690094e-06, |
|
"logits/chosen": -1.8884214162826538, |
|
"logits/rejected": -1.892564058303833, |
|
"logps/chosen": -32.953224182128906, |
|
"logps/rejected": -32.72943878173828, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.01562627963721752, |
|
"rewards/margins": 0.017034271731972694, |
|
"rewards/rejected": -0.0014079909306019545, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.422376313348405e-06, |
|
"logits/chosen": -1.8940017223358154, |
|
"logits/rejected": -1.888357162475586, |
|
"logps/chosen": -33.757293701171875, |
|
"logps/rejected": -35.25668716430664, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.016622314229607582, |
|
"rewards/margins": 0.01969819888472557, |
|
"rewards/rejected": -0.003075886517763138, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.3856582166815696e-06, |
|
"logits/chosen": -1.9210342168807983, |
|
"logits/rejected": -1.920659065246582, |
|
"logps/chosen": -32.557655334472656, |
|
"logps/rejected": -34.18804168701172, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.01610674150288105, |
|
"rewards/margins": 0.01613234356045723, |
|
"rewards/rejected": -2.560380380600691e-05, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.347971356735789e-06, |
|
"logits/chosen": -1.9694734811782837, |
|
"logits/rejected": -1.9510208368301392, |
|
"logps/chosen": -32.425758361816406, |
|
"logps/rejected": -33.256771087646484, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.01735007017850876, |
|
"rewards/margins": 0.01921800896525383, |
|
"rewards/rejected": -0.001867939019575715, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.309335095262675e-06, |
|
"logits/chosen": -1.934520959854126, |
|
"logits/rejected": -1.934033751487732, |
|
"logps/chosen": -29.948516845703125, |
|
"logps/rejected": -31.202651977539062, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.016627881675958633, |
|
"rewards/margins": 0.016193937510252, |
|
"rewards/rejected": 0.0004339427687227726, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.269769281772082e-06, |
|
"logits/chosen": -1.8985036611557007, |
|
"logits/rejected": -1.891710638999939, |
|
"logps/chosen": -30.8389892578125, |
|
"logps/rejected": -34.76033020019531, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.018311362713575363, |
|
"rewards/margins": 0.01964724436402321, |
|
"rewards/rejected": -0.0013358818832784891, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_logits/chosen": -2.1807026863098145, |
|
"eval_logits/rejected": -2.1759257316589355, |
|
"eval_logps/chosen": -33.619056701660156, |
|
"eval_logps/rejected": -37.18906021118164, |
|
"eval_loss": 0.4997919797897339, |
|
"eval_rewards/accuracies": 0.5772424936294556, |
|
"eval_rewards/chosen": 0.0041549475863575935, |
|
"eval_rewards/margins": 0.0008793265442363918, |
|
"eval_rewards/rejected": 0.003275620751082897, |
|
"eval_runtime": 145.8925, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.22929424333435e-06, |
|
"logits/chosen": -1.892907738685608, |
|
"logits/rejected": -1.8967831134796143, |
|
"logps/chosen": -27.704700469970703, |
|
"logps/rejected": -32.9903450012207, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.014701254665851593, |
|
"rewards/margins": 0.016011826694011688, |
|
"rewards/rejected": -0.001310571446083486, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.1879307741372085e-06, |
|
"logits/chosen": -1.8976653814315796, |
|
"logits/rejected": -1.9082101583480835, |
|
"logps/chosen": -31.580432891845703, |
|
"logps/rejected": -30.9519100189209, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.016818998381495476, |
|
"rewards/margins": 0.01982123777270317, |
|
"rewards/rejected": -0.003002240788191557, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.145700124802693e-06, |
|
"logits/chosen": -1.8399875164031982, |
|
"logits/rejected": -1.8377044200897217, |
|
"logps/chosen": -29.908676147460938, |
|
"logps/rejected": -30.362207412719727, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.01758403517305851, |
|
"rewards/margins": 0.019438493996858597, |
|
"rewards/rejected": -0.0018544571939855814, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.102623991469562e-06, |
|
"logits/chosen": -1.920372724533081, |
|
"logits/rejected": -1.9134242534637451, |
|
"logps/chosen": -32.44926071166992, |
|
"logps/rejected": -33.200767517089844, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.017835671082139015, |
|
"rewards/margins": 0.019668519496917725, |
|
"rewards/rejected": -0.0018328496953472495, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.058724504646834e-06, |
|
"logits/chosen": -1.885063886642456, |
|
"logits/rejected": -1.8914177417755127, |
|
"logps/chosen": -30.19329261779785, |
|
"logps/rejected": -32.799842834472656, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.015598910860717297, |
|
"rewards/margins": 0.01600998267531395, |
|
"rewards/rejected": -0.0004110717272851616, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.014024217844167e-06, |
|
"logits/chosen": -1.9558032751083374, |
|
"logits/rejected": -1.9331070184707642, |
|
"logps/chosen": -29.753009796142578, |
|
"logps/rejected": -33.04769515991211, |
|
"loss": 0.4959, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0168455857783556, |
|
"rewards/margins": 0.017361946403980255, |
|
"rewards/rejected": -0.0005163621390238404, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.968546095984911e-06, |
|
"logits/chosen": -1.8903181552886963, |
|
"logits/rejected": -1.8854230642318726, |
|
"logps/chosen": -30.62668228149414, |
|
"logps/rejected": -32.05100631713867, |
|
"loss": 0.4959, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.01790049858391285, |
|
"rewards/margins": 0.01720140501856804, |
|
"rewards/rejected": 0.0006990955444052815, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.922313503607806e-06, |
|
"logits/chosen": -1.930395483970642, |
|
"logits/rejected": -1.932220220565796, |
|
"logps/chosen": -32.589778900146484, |
|
"logps/rejected": -35.16767883300781, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.017860129475593567, |
|
"rewards/margins": 0.021527757868170738, |
|
"rewards/rejected": -0.0036676295567303896, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.875350192863368e-06, |
|
"logits/chosen": -1.9129533767700195, |
|
"logits/rejected": -1.9123092889785767, |
|
"logps/chosen": -28.789093017578125, |
|
"logps/rejected": -31.536624908447266, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.018029652535915375, |
|
"rewards/margins": 0.01877954974770546, |
|
"rewards/rejected": -0.0007498954655602574, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.8276802913111436e-06, |
|
"logits/chosen": -1.9223308563232422, |
|
"logits/rejected": -1.9202518463134766, |
|
"logps/chosen": -31.292776107788086, |
|
"logps/rejected": -32.150634765625, |
|
"loss": 0.4964, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.017055068165063858, |
|
"rewards/margins": 0.015561839565634727, |
|
"rewards/rejected": 0.0014932285994291306, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_logits/chosen": -2.1569008827209473, |
|
"eval_logits/rejected": -2.1521544456481934, |
|
"eval_logps/chosen": -33.592525482177734, |
|
"eval_logps/rejected": -37.25735855102539, |
|
"eval_loss": 0.4995650053024292, |
|
"eval_rewards/accuracies": 0.5685215592384338, |
|
"eval_rewards/chosen": 0.004420237150043249, |
|
"eval_rewards/margins": 0.0018276458140462637, |
|
"eval_rewards/rejected": 0.0025925911031663418, |
|
"eval_runtime": 145.7896, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.7793282895240927e-06, |
|
"logits/chosen": -1.9622409343719482, |
|
"logits/rejected": -1.968703031539917, |
|
"logps/chosen": -30.59027099609375, |
|
"logps/rejected": -32.09941864013672, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.016471117734909058, |
|
"rewards/margins": 0.016833433881402016, |
|
"rewards/rejected": -0.0003623150405474007, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.730319028506478e-06, |
|
"logits/chosen": -1.9154777526855469, |
|
"logits/rejected": -1.913214921951294, |
|
"logps/chosen": -32.721229553222656, |
|
"logps/rejected": -31.03836441040039, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.0203465037047863, |
|
"rewards/margins": 0.020875070244073868, |
|
"rewards/rejected": -0.000528569333255291, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.6806776869317074e-06, |
|
"logits/chosen": -1.8615707159042358, |
|
"logits/rejected": -1.8549633026123047, |
|
"logps/chosen": -33.12920379638672, |
|
"logps/rejected": -32.32065963745117, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.023350173607468605, |
|
"rewards/margins": 0.02298470214009285, |
|
"rewards/rejected": 0.00036546969204209745, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.6304297682067146e-06, |
|
"logits/chosen": -1.8696269989013672, |
|
"logits/rejected": -1.8758395910263062, |
|
"logps/chosen": -32.165340423583984, |
|
"logps/rejected": -33.2056770324707, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.018139610067009926, |
|
"rewards/margins": 0.01779373362660408, |
|
"rewards/rejected": 0.0003458770806901157, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.579601087369492e-06, |
|
"logits/chosen": -1.952419638633728, |
|
"logits/rejected": -1.966408371925354, |
|
"logps/chosen": -30.081417083740234, |
|
"logps/rejected": -31.9990291595459, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.017710469663143158, |
|
"rewards/margins": 0.017723901197314262, |
|
"rewards/rejected": -1.3430044418782927e-05, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.5282177578265295e-06, |
|
"logits/chosen": -1.828285574913025, |
|
"logits/rejected": -1.8253133296966553, |
|
"logps/chosen": -31.497600555419922, |
|
"logps/rejected": -35.03656005859375, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.024253567680716515, |
|
"rewards/margins": 0.02761664055287838, |
|
"rewards/rejected": -0.0033630705438554287, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.476306177936961e-06, |
|
"logits/chosen": -1.9160382747650146, |
|
"logits/rejected": -1.916337251663208, |
|
"logps/chosen": -29.65035057067871, |
|
"logps/rejected": -34.16764831542969, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.01635240949690342, |
|
"rewards/margins": 0.01976504735648632, |
|
"rewards/rejected": -0.003412640420719981, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.423893017450324e-06, |
|
"logits/chosen": -1.8645700216293335, |
|
"logits/rejected": -1.861588716506958, |
|
"logps/chosen": -29.15572166442871, |
|
"logps/rejected": -33.00225067138672, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.01716628111898899, |
|
"rewards/margins": 0.01901078037917614, |
|
"rewards/rejected": -0.0018444998422637582, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.3710052038048794e-06, |
|
"logits/chosen": -1.8963727951049805, |
|
"logits/rejected": -1.8964437246322632, |
|
"logps/chosen": -27.984289169311523, |
|
"logps/rejected": -30.99312973022461, |
|
"loss": 0.4943, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.02178804762661457, |
|
"rewards/margins": 0.025014396756887436, |
|
"rewards/rejected": -0.0032263505272567272, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits/chosen": -1.8081165552139282, |
|
"logits/rejected": -1.81146240234375, |
|
"logps/chosen": -32.05728530883789, |
|
"logps/rejected": -31.581283569335938, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.024211909621953964, |
|
"rewards/margins": 0.025377869606018066, |
|
"rewards/rejected": -0.001165962778031826, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_logits/chosen": -2.1417086124420166, |
|
"eval_logits/rejected": -2.1370012760162354, |
|
"eval_logps/chosen": -33.69810485839844, |
|
"eval_logps/rejected": -37.340553283691406, |
|
"eval_loss": 0.49963295459747314, |
|
"eval_rewards/accuracies": 0.579734206199646, |
|
"eval_rewards/chosen": 0.0033644884824752808, |
|
"eval_rewards/margins": 0.0016038385219871998, |
|
"eval_rewards/rejected": 0.0017606498440727592, |
|
"eval_runtime": 145.8796, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.2639145321045933e-06, |
|
"logits/chosen": -1.8899507522583008, |
|
"logits/rejected": -1.8811132907867432, |
|
"logps/chosen": -34.376182556152344, |
|
"logps/rejected": -32.027015686035156, |
|
"loss": 0.4948, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.021358376368880272, |
|
"rewards/margins": 0.02257036231458187, |
|
"rewards/rejected": -0.0012119871098548174, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.2097666922441107e-06, |
|
"logits/chosen": -1.9003782272338867, |
|
"logits/rejected": -1.9015012979507446, |
|
"logps/chosen": -34.15888214111328, |
|
"logps/rejected": -33.193199157714844, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.022765014320611954, |
|
"rewards/margins": 0.023936782032251358, |
|
"rewards/rejected": -0.0011717682937160134, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.1552542073477554e-06, |
|
"logits/chosen": -1.9175533056259155, |
|
"logits/rejected": -1.9151859283447266, |
|
"logps/chosen": -30.27239990234375, |
|
"logps/rejected": -33.10813522338867, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.02290928363800049, |
|
"rewards/margins": 0.024021681398153305, |
|
"rewards/rejected": -0.0011123981093987823, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.100405083388799e-06, |
|
"logits/chosen": -1.8999090194702148, |
|
"logits/rejected": -1.9051405191421509, |
|
"logps/chosen": -29.600543975830078, |
|
"logps/rejected": -33.40522384643555, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.021851813420653343, |
|
"rewards/margins": 0.02435588836669922, |
|
"rewards/rejected": -0.0025040716864168644, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.0452474992899645e-06, |
|
"logits/chosen": -1.8462251424789429, |
|
"logits/rejected": -1.8450710773468018, |
|
"logps/chosen": -30.871395111083984, |
|
"logps/rejected": -34.77960205078125, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.022535843774676323, |
|
"rewards/margins": 0.02347227931022644, |
|
"rewards/rejected": -0.0009364362922497094, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.989809792446417e-06, |
|
"logits/chosen": -1.720664381980896, |
|
"logits/rejected": -1.7157522439956665, |
|
"logps/chosen": -33.3805046081543, |
|
"logps/rejected": -35.51945877075195, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.026179391890764236, |
|
"rewards/margins": 0.02928983047604561, |
|
"rewards/rejected": -0.003110440680757165, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits/chosen": -1.8583590984344482, |
|
"logits/rejected": -1.8622472286224365, |
|
"logps/chosen": -33.08837127685547, |
|
"logps/rejected": -33.761695861816406, |
|
"loss": 0.4948, |
|
"rewards/accuracies": 0.8541666269302368, |
|
"rewards/chosen": 0.021252866834402084, |
|
"rewards/margins": 0.022921394556760788, |
|
"rewards/rejected": -0.0016685245791450143, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.878208065043501e-06, |
|
"logits/chosen": -1.803155541419983, |
|
"logits/rejected": -1.8014135360717773, |
|
"logps/chosen": -30.931873321533203, |
|
"logps/rejected": -35.34140396118164, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.029697230085730553, |
|
"rewards/margins": 0.039064671844244, |
|
"rewards/rejected": -0.00936744175851345, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.8221013802485974e-06, |
|
"logits/chosen": -1.8476654291152954, |
|
"logits/rejected": -1.8463468551635742, |
|
"logps/chosen": -30.539403915405273, |
|
"logps/rejected": -33.56371307373047, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.02836332656443119, |
|
"rewards/margins": 0.033134736120700836, |
|
"rewards/rejected": -0.004771408159285784, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.76582921478147e-06, |
|
"logits/chosen": -1.7794796228408813, |
|
"logits/rejected": -1.773779273033142, |
|
"logps/chosen": -31.581567764282227, |
|
"logps/rejected": -31.875568389892578, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.028015125542879105, |
|
"rewards/margins": 0.03212819993495941, |
|
"rewards/rejected": -0.004113074392080307, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_logits/chosen": -2.132253885269165, |
|
"eval_logits/rejected": -2.1275634765625, |
|
"eval_logps/chosen": -33.6678466796875, |
|
"eval_logps/rejected": -37.38642120361328, |
|
"eval_loss": 0.499472975730896, |
|
"eval_rewards/accuracies": 0.594684362411499, |
|
"eval_rewards/chosen": 0.0036670216359198093, |
|
"eval_rewards/margins": 0.002365043619647622, |
|
"eval_rewards/rejected": 0.0013019782491028309, |
|
"eval_runtime": 145.9155, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.7094204786572254e-06, |
|
"logits/chosen": -1.875432014465332, |
|
"logits/rejected": -1.8828538656234741, |
|
"logps/chosen": -29.421600341796875, |
|
"logps/rejected": -34.89478302001953, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.027066577225923538, |
|
"rewards/margins": 0.033455990254879, |
|
"rewards/rejected": -0.006389413960278034, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.6529041520546072e-06, |
|
"logits/chosen": -1.8495336771011353, |
|
"logits/rejected": -1.852142572402954, |
|
"logps/chosen": -30.283716201782227, |
|
"logps/rejected": -33.47504425048828, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.022873621433973312, |
|
"rewards/margins": 0.02532605454325676, |
|
"rewards/rejected": -0.00245243264362216, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.5963092704273302e-06, |
|
"logits/chosen": -1.748708963394165, |
|
"logits/rejected": -1.7528718709945679, |
|
"logps/chosen": -29.96748924255371, |
|
"logps/rejected": -35.57471466064453, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.026239663362503052, |
|
"rewards/margins": 0.032885000109672546, |
|
"rewards/rejected": -0.0066453381441533566, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.53966490958702e-06, |
|
"logits/chosen": -1.8163249492645264, |
|
"logits/rejected": -1.8123382329940796, |
|
"logps/chosen": -30.351696014404297, |
|
"logps/rejected": -33.14829635620117, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.026169534772634506, |
|
"rewards/margins": 0.027706000953912735, |
|
"rewards/rejected": -0.0015364640858024359, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.4830001707654135e-06, |
|
"logits/chosen": -1.8956245183944702, |
|
"logits/rejected": -1.8976259231567383, |
|
"logps/chosen": -29.960979461669922, |
|
"logps/rejected": -36.380279541015625, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.02814355492591858, |
|
"rewards/margins": 0.03532697632908821, |
|
"rewards/rejected": -0.007183422800153494, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.4263441656635054e-06, |
|
"logits/chosen": -1.7016551494598389, |
|
"logits/rejected": -1.6961944103240967, |
|
"logps/chosen": -33.43014144897461, |
|
"logps/rejected": -33.36838150024414, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.028515327721834183, |
|
"rewards/margins": 0.03328012302517891, |
|
"rewards/rejected": -0.004764794372022152, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.3697260014953107e-06, |
|
"logits/chosen": -1.7582767009735107, |
|
"logits/rejected": -1.7583192586898804, |
|
"logps/chosen": -32.9239616394043, |
|
"logps/rejected": -35.554019927978516, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.029188454151153564, |
|
"rewards/margins": 0.036265987902879715, |
|
"rewards/rejected": -0.007077532354742289, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.3131747660339396e-06, |
|
"logits/chosen": -1.8046823740005493, |
|
"logits/rejected": -1.7934318780899048, |
|
"logps/chosen": -31.343753814697266, |
|
"logps/rejected": -33.549922943115234, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.027628283947706223, |
|
"rewards/margins": 0.033907197415828705, |
|
"rewards/rejected": -0.006278916262090206, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.256719512667651e-06, |
|
"logits/chosen": -1.9076982736587524, |
|
"logits/rejected": -1.9121280908584595, |
|
"logps/chosen": -30.531978607177734, |
|
"logps/rejected": -33.27050018310547, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.027979601174592972, |
|
"rewards/margins": 0.0363197848200798, |
|
"rewards/rejected": -0.008340183645486832, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.2003892454735786e-06, |
|
"logits/chosen": -1.826246976852417, |
|
"logits/rejected": -1.8192167282104492, |
|
"logps/chosen": -31.618709564208984, |
|
"logps/rejected": -33.17729949951172, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.032353680580854416, |
|
"rewards/margins": 0.04010532423853874, |
|
"rewards/rejected": -0.007751642260700464, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_logits/chosen": -2.1153886318206787, |
|
"eval_logits/rejected": -2.1107325553894043, |
|
"eval_logps/chosen": -33.72140884399414, |
|
"eval_logps/rejected": -37.456180572509766, |
|
"eval_loss": 0.4994627833366394, |
|
"eval_rewards/accuracies": 0.5739202499389648, |
|
"eval_rewards/chosen": 0.003131402190774679, |
|
"eval_rewards/margins": 0.0025269899051636457, |
|
"eval_rewards/rejected": 0.0006044124602340162, |
|
"eval_runtime": 145.8928, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.1442129043167877e-06, |
|
"logits/chosen": -1.8176963329315186, |
|
"logits/rejected": -1.8180383443832397, |
|
"logps/chosen": -28.452808380126953, |
|
"logps/rejected": -35.59280014038086, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.027143558487296104, |
|
"rewards/margins": 0.036174654960632324, |
|
"rewards/rejected": -0.00903109647333622, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.088219349982323e-06, |
|
"logits/chosen": -1.772467017173767, |
|
"logits/rejected": -1.764556646347046, |
|
"logps/chosen": -29.524948120117188, |
|
"logps/rejected": -34.33641815185547, |
|
"loss": 0.4934, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.023139983415603638, |
|
"rewards/margins": 0.030790451914072037, |
|
"rewards/rejected": -0.007650467567145824, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.0324373493478803e-06, |
|
"logits/chosen": -1.9374393224716187, |
|
"logits/rejected": -1.9376726150512695, |
|
"logps/chosen": -27.288341522216797, |
|
"logps/rejected": -33.29496383666992, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.027086907997727394, |
|
"rewards/margins": 0.03138797730207443, |
|
"rewards/rejected": -0.0043010651133954525, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.976895560604729e-06, |
|
"logits/chosen": -1.8147531747817993, |
|
"logits/rejected": -1.8243385553359985, |
|
"logps/chosen": -31.1882266998291, |
|
"logps/rejected": -33.76173400878906, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.03236837312579155, |
|
"rewards/margins": 0.040089257061481476, |
|
"rewards/rejected": -0.007720877416431904, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.921622518534466e-06, |
|
"logits/chosen": -1.8579801321029663, |
|
"logits/rejected": -1.861673355102539, |
|
"logps/chosen": -28.247013092041016, |
|
"logps/rejected": -32.32887268066406, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.024007126688957214, |
|
"rewards/margins": 0.02993503212928772, |
|
"rewards/rejected": -0.0059279040433466434, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.8666466198491794e-06, |
|
"logits/chosen": -1.8509804010391235, |
|
"logits/rejected": -1.8471883535385132, |
|
"logps/chosen": -31.077259063720703, |
|
"logps/rejected": -34.63513946533203, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.029979607090353966, |
|
"rewards/margins": 0.037267137318849564, |
|
"rewards/rejected": -0.0072875297628343105, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.8119961086025376e-06, |
|
"logits/chosen": -1.7676801681518555, |
|
"logits/rejected": -1.7701276540756226, |
|
"logps/chosen": -29.73238754272461, |
|
"logps/rejected": -35.80852127075195, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.030803611502051353, |
|
"rewards/margins": 0.03707288205623627, |
|
"rewards/rejected": -0.0062692672945559025, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.7576990616793139e-06, |
|
"logits/chosen": -1.8003818988800049, |
|
"logits/rejected": -1.7940855026245117, |
|
"logps/chosen": -32.70087814331055, |
|
"logps/rejected": -37.19306945800781, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.027041900902986526, |
|
"rewards/margins": 0.03635306656360626, |
|
"rewards/rejected": -0.009311167523264885, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.7037833743707892e-06, |
|
"logits/chosen": -1.776210069656372, |
|
"logits/rejected": -1.7711480855941772, |
|
"logps/chosen": -28.2900390625, |
|
"logps/rejected": -36.815948486328125, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.02712910808622837, |
|
"rewards/margins": 0.03538975864648819, |
|
"rewards/rejected": -0.008260652422904968, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.6502767460434588e-06, |
|
"logits/chosen": -1.7549470663070679, |
|
"logits/rejected": -1.7445472478866577, |
|
"logps/chosen": -29.535968780517578, |
|
"logps/rejected": -30.032791137695312, |
|
"loss": 0.4941, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.022127199918031693, |
|
"rewards/margins": 0.026457766070961952, |
|
"rewards/rejected": -0.004330565221607685, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_logits/chosen": -2.1005983352661133, |
|
"eval_logits/rejected": -2.095959424972534, |
|
"eval_logps/chosen": -33.75029373168945, |
|
"eval_logps/rejected": -37.502525329589844, |
|
"eval_loss": 0.49945273995399475, |
|
"eval_rewards/accuracies": 0.6058970093727112, |
|
"eval_rewards/chosen": 0.0028425909113138914, |
|
"eval_rewards/margins": 0.002701645949855447, |
|
"eval_rewards/rejected": 0.00014094497601035982, |
|
"eval_runtime": 145.8673, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.5972066659083796e-06, |
|
"logits/chosen": -1.8609699010849, |
|
"logits/rejected": -1.8604313135147095, |
|
"logps/chosen": -29.16253662109375, |
|
"logps/rejected": -30.912038803100586, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.026616577059030533, |
|
"rewards/margins": 0.03315214067697525, |
|
"rewards/rejected": -0.006535563617944717, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.5446003988985041e-06, |
|
"logits/chosen": -1.9033771753311157, |
|
"logits/rejected": -1.9041751623153687, |
|
"logps/chosen": -29.415115356445312, |
|
"logps/rejected": -31.9776611328125, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.025284653529524803, |
|
"rewards/margins": 0.033403180539608, |
|
"rewards/rejected": -0.008118532598018646, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.4924849716612211e-06, |
|
"logits/chosen": -1.870643973350525, |
|
"logits/rejected": -1.8740373849868774, |
|
"logps/chosen": -29.589406967163086, |
|
"logps/rejected": -28.370319366455078, |
|
"loss": 0.4929, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.02690659835934639, |
|
"rewards/margins": 0.030772637575864792, |
|
"rewards/rejected": -0.003866040613502264, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.440887158673332e-06, |
|
"logits/chosen": -1.8781054019927979, |
|
"logits/rejected": -1.8704578876495361, |
|
"logps/chosen": -28.706920623779297, |
|
"logps/rejected": -34.413326263427734, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.02470974065363407, |
|
"rewards/margins": 0.03384070843458176, |
|
"rewards/rejected": -0.009130971506237984, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.3898334684855647e-06, |
|
"logits/chosen": -1.818250298500061, |
|
"logits/rejected": -1.8290458917617798, |
|
"logps/chosen": -30.612024307250977, |
|
"logps/rejected": -32.82752227783203, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.02941080555319786, |
|
"rewards/margins": 0.035235948860645294, |
|
"rewards/rejected": -0.005825136322528124, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.3393501301037245e-06, |
|
"logits/chosen": -1.8953624963760376, |
|
"logits/rejected": -1.8862526416778564, |
|
"logps/chosen": -30.589797973632812, |
|
"logps/rejected": -37.13585662841797, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.029109802097082138, |
|
"rewards/margins": 0.03715446963906288, |
|
"rewards/rejected": -0.008044666610658169, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.2894630795134454e-06, |
|
"logits/chosen": -1.8016560077667236, |
|
"logits/rejected": -1.8037437200546265, |
|
"logps/chosen": -32.77046203613281, |
|
"logps/rejected": -33.44564437866211, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03218822926282883, |
|
"rewards/margins": 0.039665985852479935, |
|
"rewards/rejected": -0.007477754261344671, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.2401979463554984e-06, |
|
"logits/chosen": -1.9271634817123413, |
|
"logits/rejected": -1.9280281066894531, |
|
"logps/chosen": -30.242328643798828, |
|
"logps/rejected": -35.20170211791992, |
|
"loss": 0.4917, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.028153110295534134, |
|
"rewards/margins": 0.03901328891515732, |
|
"rewards/rejected": -0.010860181413590908, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.1915800407584705e-06, |
|
"logits/chosen": -1.9012151956558228, |
|
"logits/rejected": -1.9054481983184814, |
|
"logps/chosen": -28.445846557617188, |
|
"logps/rejected": -34.31275177001953, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.02447468414902687, |
|
"rewards/margins": 0.03212384134531021, |
|
"rewards/rejected": -0.0076491571962833405, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.1436343403356019e-06, |
|
"logits/chosen": -1.8907396793365479, |
|
"logits/rejected": -1.895939588546753, |
|
"logps/chosen": -31.212976455688477, |
|
"logps/rejected": -30.585351943969727, |
|
"loss": 0.4943, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0235602967441082, |
|
"rewards/margins": 0.02539706788957119, |
|
"rewards/rejected": -0.001836769632063806, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_logits/chosen": -2.0964860916137695, |
|
"eval_logits/rejected": -2.091848373413086, |
|
"eval_logps/chosen": -33.77998352050781, |
|
"eval_logps/rejected": -37.53815841674805, |
|
"eval_loss": 0.4994538724422455, |
|
"eval_rewards/accuracies": 0.5826411843299866, |
|
"eval_rewards/chosen": 0.0025456694420427084, |
|
"eval_rewards/margins": 0.0027610675897449255, |
|
"eval_rewards/rejected": -0.0002153978421119973, |
|
"eval_runtime": 145.7726, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.0963854773524548e-06, |
|
"logits/chosen": -1.8822052478790283, |
|
"logits/rejected": -1.8824293613433838, |
|
"logps/chosen": -30.01492691040039, |
|
"logps/rejected": -31.45803451538086, |
|
"loss": 0.4923, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.029252270236611366, |
|
"rewards/margins": 0.03367346525192261, |
|
"rewards/rejected": -0.004421197809278965, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.049857726072005e-06, |
|
"logits/chosen": -1.7276147603988647, |
|
"logits/rejected": -1.7294237613677979, |
|
"logps/chosen": -31.513330459594727, |
|
"logps/rejected": -33.66464614868164, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.030516481027007103, |
|
"rewards/margins": 0.03675025328993797, |
|
"rewards/rejected": -0.006233775056898594, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0040749902836508e-06, |
|
"logits/chosen": -1.7628084421157837, |
|
"logits/rejected": -1.760263442993164, |
|
"logps/chosen": -28.515111923217773, |
|
"logps/rejected": -31.614593505859375, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.024966087192296982, |
|
"rewards/margins": 0.02945462428033352, |
|
"rewards/rejected": -0.00448854174464941, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 9.59060791022566e-07, |
|
"logits/chosen": -1.8981006145477295, |
|
"logits/rejected": -1.8926544189453125, |
|
"logps/chosen": -30.10824966430664, |
|
"logps/rejected": -33.54905319213867, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.02931547723710537, |
|
"rewards/margins": 0.035265903919935226, |
|
"rewards/rejected": -0.005950425285845995, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.148382544856885e-07, |
|
"logits/chosen": -1.759209394454956, |
|
"logits/rejected": -1.7495086193084717, |
|
"logps/chosen": -30.550457000732422, |
|
"logps/rejected": -31.75992202758789, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.02901277504861355, |
|
"rewards/margins": 0.03368794918060303, |
|
"rewards/rejected": -0.004675174597650766, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 8.714301001505568e-07, |
|
"logits/chosen": -1.8278675079345703, |
|
"logits/rejected": -1.8278887271881104, |
|
"logps/chosen": -30.80059814453125, |
|
"logps/rejected": -31.803054809570312, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.028811518102884293, |
|
"rewards/margins": 0.0348275825381279, |
|
"rewards/rejected": -0.00601606722921133, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 8.288586291031025e-07, |
|
"logits/chosen": -1.902727484703064, |
|
"logits/rejected": -1.8974792957305908, |
|
"logps/chosen": -30.870708465576172, |
|
"logps/rejected": -33.48421859741211, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.026552587747573853, |
|
"rewards/margins": 0.03198261931538582, |
|
"rewards/rejected": -0.005430030170828104, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 7.871457125803897e-07, |
|
"logits/chosen": -1.7634168863296509, |
|
"logits/rejected": -1.7711273431777954, |
|
"logps/chosen": -30.915302276611328, |
|
"logps/rejected": -33.02967071533203, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.024176979437470436, |
|
"rewards/margins": 0.030859291553497314, |
|
"rewards/rejected": -0.006682313047349453, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 7.463127807341966e-07, |
|
"logits/chosen": -1.8290866613388062, |
|
"logits/rejected": -1.8232570886611938, |
|
"logps/chosen": -29.006099700927734, |
|
"logps/rejected": -34.01789093017578, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.03167518228292465, |
|
"rewards/margins": 0.03901957720518112, |
|
"rewards/rejected": -0.007344390265643597, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.063808116212021e-07, |
|
"logits/chosen": -1.7752304077148438, |
|
"logits/rejected": -1.7772098779678345, |
|
"logps/chosen": -30.416156768798828, |
|
"logps/rejected": -33.775291442871094, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.03074287250638008, |
|
"rewards/margins": 0.0408952571451664, |
|
"rewards/rejected": -0.010152382776141167, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_logits/chosen": -2.0958993434906006, |
|
"eval_logits/rejected": -2.0912692546844482, |
|
"eval_logps/chosen": -33.775428771972656, |
|
"eval_logps/rejected": -37.52570724487305, |
|
"eval_loss": 0.4994668662548065, |
|
"eval_rewards/accuracies": 0.6009136438369751, |
|
"eval_rewards/chosen": 0.0025912297423928976, |
|
"eval_rewards/margins": 0.002682073274627328, |
|
"eval_rewards/rejected": -9.084340126719326e-05, |
|
"eval_runtime": 145.907, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 6.673703204254348e-07, |
|
"logits/chosen": -1.7069997787475586, |
|
"logits/rejected": -1.7061100006103516, |
|
"logps/chosen": -32.673728942871094, |
|
"logps/rejected": -33.5372314453125, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.0337875559926033, |
|
"rewards/margins": 0.042066238820552826, |
|
"rewards/rejected": -0.008278685621917248, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 6.293013489185315e-07, |
|
"logits/chosen": -1.8786895275115967, |
|
"logits/rejected": -1.8728017807006836, |
|
"logps/chosen": -28.626781463623047, |
|
"logps/rejected": -33.97628402709961, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.030769089236855507, |
|
"rewards/margins": 0.04174653813242912, |
|
"rewards/rejected": -0.010977448895573616, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 5.921934551632086e-07, |
|
"logits/chosen": -1.7307627201080322, |
|
"logits/rejected": -1.7194693088531494, |
|
"logps/chosen": -31.0261173248291, |
|
"logps/rejected": -33.56462478637695, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.033595647662878036, |
|
"rewards/margins": 0.040303632616996765, |
|
"rewards/rejected": -0.006707982625812292, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 5.560657034652405e-07, |
|
"logits/chosen": -1.8212391138076782, |
|
"logits/rejected": -1.8145866394042969, |
|
"logps/chosen": -28.291839599609375, |
|
"logps/rejected": -29.779611587524414, |
|
"loss": 0.4929, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.024083953350782394, |
|
"rewards/margins": 0.033204466104507446, |
|
"rewards/rejected": -0.009120511822402477, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 5.2093665457911e-07, |
|
"logits/chosen": -1.8461530208587646, |
|
"logits/rejected": -1.853011131286621, |
|
"logps/chosen": -32.12201690673828, |
|
"logps/rejected": -32.083274841308594, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.032733749598264694, |
|
"rewards/margins": 0.0383833646774292, |
|
"rewards/rejected": -0.005649610888212919, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 4.868243561723535e-07, |
|
"logits/chosen": -1.8253049850463867, |
|
"logits/rejected": -1.8258377313613892, |
|
"logps/chosen": -30.2758846282959, |
|
"logps/rejected": -33.74597930908203, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.027526726946234703, |
|
"rewards/margins": 0.03683784604072571, |
|
"rewards/rejected": -0.00931111816316843, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 4.537463335535161e-07, |
|
"logits/chosen": -1.7631717920303345, |
|
"logits/rejected": -1.7627891302108765, |
|
"logps/chosen": -30.03763771057129, |
|
"logps/rejected": -34.385276794433594, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.03053201362490654, |
|
"rewards/margins": 0.039178695529699326, |
|
"rewards/rejected": -0.008646685630083084, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 4.217195806684629e-07, |
|
"logits/chosen": -1.648390531539917, |
|
"logits/rejected": -1.6438019275665283, |
|
"logps/chosen": -32.16303253173828, |
|
"logps/rejected": -31.598995208740234, |
|
"loss": 0.4915, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0307441595941782, |
|
"rewards/margins": 0.03746918961405754, |
|
"rewards/rejected": -0.00672503188252449, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.907605513696808e-07, |
|
"logits/chosen": -1.8533127307891846, |
|
"logits/rejected": -1.838148832321167, |
|
"logps/chosen": -31.7786865234375, |
|
"logps/rejected": -35.894744873046875, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.025688907131552696, |
|
"rewards/margins": 0.03698316216468811, |
|
"rewards/rejected": -0.011294253170490265, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.6088515096305675e-07, |
|
"logits/chosen": -1.7974460124969482, |
|
"logits/rejected": -1.802198052406311, |
|
"logps/chosen": -30.211551666259766, |
|
"logps/rejected": -37.193172454833984, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.0330607071518898, |
|
"rewards/margins": 0.04396023601293564, |
|
"rewards/rejected": -0.010899528861045837, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_logits/chosen": -2.0954809188842773, |
|
"eval_logits/rejected": -2.0908546447753906, |
|
"eval_logps/chosen": -33.78174591064453, |
|
"eval_logps/rejected": -37.53657150268555, |
|
"eval_loss": 0.49945539236068726, |
|
"eval_rewards/accuracies": 0.6034052968025208, |
|
"eval_rewards/chosen": 0.0025280837435275316, |
|
"eval_rewards/margins": 0.002727580489590764, |
|
"eval_rewards/rejected": -0.0001994967897189781, |
|
"eval_runtime": 145.8892, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.321087280364757e-07, |
|
"logits/chosen": -1.770353078842163, |
|
"logits/rejected": -1.770532250404358, |
|
"logps/chosen": -32.75363540649414, |
|
"logps/rejected": -37.96880340576172, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03490384668111801, |
|
"rewards/margins": 0.04375072568655014, |
|
"rewards/rejected": -0.008846879936754704, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.044460665744284e-07, |
|
"logits/chosen": -1.8576828241348267, |
|
"logits/rejected": -1.8563992977142334, |
|
"logps/chosen": -29.168567657470703, |
|
"logps/rejected": -31.714990615844727, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.02925288677215576, |
|
"rewards/margins": 0.036869172006845474, |
|
"rewards/rejected": -0.007616284303367138, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 2.779113783626916e-07, |
|
"logits/chosen": -1.766406774520874, |
|
"logits/rejected": -1.7676417827606201, |
|
"logps/chosen": -31.04288673400879, |
|
"logps/rejected": -34.331932067871094, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.03331366181373596, |
|
"rewards/margins": 0.04014625400304794, |
|
"rewards/rejected": -0.006832593586295843, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 2.5251829568697204e-07, |
|
"logits/chosen": -1.8259414434432983, |
|
"logits/rejected": -1.8247743844985962, |
|
"logps/chosen": -28.54180335998535, |
|
"logps/rejected": -32.83352279663086, |
|
"loss": 0.4923, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.026568632572889328, |
|
"rewards/margins": 0.03498411923646927, |
|
"rewards/rejected": -0.008415484800934792, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 2.2827986432927774e-07, |
|
"logits/chosen": -1.8422014713287354, |
|
"logits/rejected": -1.8278350830078125, |
|
"logps/chosen": -31.403839111328125, |
|
"logps/rejected": -37.33091354370117, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.02912832237780094, |
|
"rewards/margins": 0.039793141186237335, |
|
"rewards/rejected": -0.010664817877113819, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.0520853686560177e-07, |
|
"logits/chosen": -1.834238052368164, |
|
"logits/rejected": -1.8465309143066406, |
|
"logps/chosen": -29.14828872680664, |
|
"logps/rejected": -32.92185592651367, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.029201796278357506, |
|
"rewards/margins": 0.03602294623851776, |
|
"rewards/rejected": -0.006821149028837681, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.833161662683672e-07, |
|
"logits/chosen": -1.937150239944458, |
|
"logits/rejected": -1.9364910125732422, |
|
"logps/chosen": -28.896930694580078, |
|
"logps/rejected": -37.03318786621094, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.03133498877286911, |
|
"rewards/margins": 0.043358031660318375, |
|
"rewards/rejected": -0.012023041024804115, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.626139998169246e-07, |
|
"logits/chosen": -1.7997983694076538, |
|
"logits/rejected": -1.8073842525482178, |
|
"logps/chosen": -30.809356689453125, |
|
"logps/rejected": -38.418704986572266, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.03371125087141991, |
|
"rewards/margins": 0.04340182989835739, |
|
"rewards/rejected": -0.009690576232969761, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.4311267331922535e-07, |
|
"logits/chosen": -1.756156325340271, |
|
"logits/rejected": -1.7522109746932983, |
|
"logps/chosen": -31.52239418029785, |
|
"logps/rejected": -32.12908172607422, |
|
"loss": 0.4915, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0324481800198555, |
|
"rewards/margins": 0.03776510804891586, |
|
"rewards/rejected": -0.005316923372447491, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.2482220564763669e-07, |
|
"logits/chosen": -1.9121891260147095, |
|
"logits/rejected": -1.90972101688385, |
|
"logps/chosen": -28.551815032958984, |
|
"logps/rejected": -32.69978713989258, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.027447333559393883, |
|
"rewards/margins": 0.03300053998827934, |
|
"rewards/rejected": -0.0055532073602080345, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_logits/chosen": -2.0949766635894775, |
|
"eval_logits/rejected": -2.0903432369232178, |
|
"eval_logps/chosen": -33.78425216674805, |
|
"eval_logps/rejected": -37.54228591918945, |
|
"eval_loss": 0.4994584321975708, |
|
"eval_rewards/accuracies": 0.5921927094459534, |
|
"eval_rewards/chosen": 0.0025030241813510656, |
|
"eval_rewards/margins": 0.0027596852742135525, |
|
"eval_rewards/rejected": -0.0002566613839007914, |
|
"eval_runtime": 145.7016, |
|
"eval_samples_per_second": 2.354, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.0775199359171346e-07, |
|
"logits/chosen": -1.8566265106201172, |
|
"logits/rejected": -1.8526551723480225, |
|
"logps/chosen": -30.65496826171875, |
|
"logps/rejected": -30.23004722595215, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.029875636100769043, |
|
"rewards/margins": 0.036631517112255096, |
|
"rewards/rejected": -0.0067558870650827885, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 9.191080703056604e-08, |
|
"logits/chosen": -1.8138599395751953, |
|
"logits/rejected": -1.8149265050888062, |
|
"logps/chosen": -30.453847885131836, |
|
"logps/rejected": -35.29817581176758, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.029110288247466087, |
|
"rewards/margins": 0.0328519269824028, |
|
"rewards/rejected": -0.0037416405975818634, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 7.730678442730539e-08, |
|
"logits/chosen": -1.7643918991088867, |
|
"logits/rejected": -1.7578294277191162, |
|
"logps/chosen": -31.150136947631836, |
|
"logps/rejected": -37.63862228393555, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.0299163106828928, |
|
"rewards/margins": 0.039408471435308456, |
|
"rewards/rejected": -0.009492164477705956, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 6.394742864787806e-08, |
|
"logits/chosen": -1.7766668796539307, |
|
"logits/rejected": -1.7712846994400024, |
|
"logps/chosen": -26.59188461303711, |
|
"logps/rejected": -32.42449188232422, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.027213579043745995, |
|
"rewards/margins": 0.03415192291140556, |
|
"rewards/rejected": -0.006938344333320856, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 5.183960310644748e-08, |
|
"logits/chosen": -1.8049747943878174, |
|
"logits/rejected": -1.7948274612426758, |
|
"logps/chosen": -30.14188575744629, |
|
"logps/rejected": -36.50958251953125, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.023396430537104607, |
|
"rewards/margins": 0.03563863784074783, |
|
"rewards/rejected": -0.012242205440998077, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 4.098952823928693e-08, |
|
"logits/chosen": -1.776219129562378, |
|
"logits/rejected": -1.773436188697815, |
|
"logps/chosen": -30.742990493774414, |
|
"logps/rejected": -31.301162719726562, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.025667887181043625, |
|
"rewards/margins": 0.029771283268928528, |
|
"rewards/rejected": -0.004103394225239754, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 3.1402778309014284e-08, |
|
"logits/chosen": -1.835860013961792, |
|
"logits/rejected": -1.8421319723129272, |
|
"logps/chosen": -28.727758407592773, |
|
"logps/rejected": -33.723846435546875, |
|
"loss": 0.4909, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.033414192497730255, |
|
"rewards/margins": 0.04102843254804611, |
|
"rewards/rejected": -0.007614238653331995, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.3084278540791427e-08, |
|
"logits/chosen": -1.8385893106460571, |
|
"logits/rejected": -1.8487203121185303, |
|
"logps/chosen": -28.86956787109375, |
|
"logps/rejected": -30.35264015197754, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.02825688198208809, |
|
"rewards/margins": 0.03549762815237045, |
|
"rewards/rejected": -0.007240750826895237, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.6038302591975807e-08, |
|
"logits/chosen": -1.7700088024139404, |
|
"logits/rejected": -1.7631587982177734, |
|
"logps/chosen": -31.02837562561035, |
|
"logps/rejected": -33.01637649536133, |
|
"loss": 0.4924, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.02628110721707344, |
|
"rewards/margins": 0.03434007614850998, |
|
"rewards/rejected": -0.008058969862759113, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.0268470356514237e-08, |
|
"logits/chosen": -1.823885202407837, |
|
"logits/rejected": -1.8207124471664429, |
|
"logps/chosen": -30.59462547302246, |
|
"logps/rejected": -34.283512115478516, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.030309468507766724, |
|
"rewards/margins": 0.04054233804345131, |
|
"rewards/rejected": -0.010232868604362011, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_logits/chosen": -2.0954155921936035, |
|
"eval_logits/rejected": -2.090785264968872, |
|
"eval_logps/chosen": -33.78346252441406, |
|
"eval_logps/rejected": -37.54273223876953, |
|
"eval_loss": 0.49945247173309326, |
|
"eval_rewards/accuracies": 0.6034052968025208, |
|
"eval_rewards/chosen": 0.0025108722038567066, |
|
"eval_rewards/margins": 0.0027720185462385416, |
|
"eval_rewards/rejected": -0.00026114637148566544, |
|
"eval_runtime": 145.8069, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 5.777746105209147e-09, |
|
"logits/chosen": -1.900957465171814, |
|
"logits/rejected": -1.9016454219818115, |
|
"logps/chosen": -26.679061889648438, |
|
"logps/rejected": -33.479774475097656, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.03137214854359627, |
|
"rewards/margins": 0.03876541927456856, |
|
"rewards/rejected": -0.007393266074359417, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.5684369628148352e-09, |
|
"logits/chosen": -1.7551101446151733, |
|
"logits/rejected": -1.7550216913223267, |
|
"logps/chosen": -30.129684448242188, |
|
"logps/rejected": -34.65699005126953, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.029121821746230125, |
|
"rewards/margins": 0.036954350769519806, |
|
"rewards/rejected": -0.007832523435354233, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 6.421917227455999e-10, |
|
"logits/chosen": -1.9060754776000977, |
|
"logits/rejected": -1.9034589529037476, |
|
"logps/chosen": -28.90531349182129, |
|
"logps/rejected": -32.31726837158203, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.02568744495511055, |
|
"rewards/margins": 0.03402363136410713, |
|
"rewards/rejected": -0.008336183615028858, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.890435814857483, |
|
"logits/rejected": -1.8915026187896729, |
|
"logps/chosen": -28.199377059936523, |
|
"logps/rejected": -30.4346981048584, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": 0.024205626919865608, |
|
"rewards/margins": 0.030650783330202103, |
|
"rewards/rejected": -0.006445156875997782, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1540, |
|
"total_flos": 0.0, |
|
"train_loss": 0.39746343532165923, |
|
"train_runtime": 10799.7737, |
|
"train_samples_per_second": 1.14, |
|
"train_steps_per_second": 0.143 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1540, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|