|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 100, |
|
"global_step": 314, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.5625e-08, |
|
"logits/generated": -3.0857884883880615, |
|
"logits/real": -2.988919258117676, |
|
"logps/generated": -105.47663879394531, |
|
"logps/real": -159.43017578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/generated": -3.0524420738220215, |
|
"logits/real": -2.888369560241699, |
|
"logps/generated": -98.16116333007812, |
|
"logps/real": -124.24053192138672, |
|
"loss": 0.4499, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/generated": -0.5709724426269531, |
|
"rewards/margins": 0.7495355606079102, |
|
"rewards/real": 0.17856311798095703, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/generated": -3.0165483951568604, |
|
"logits/real": -2.757232189178467, |
|
"logps/generated": -125.3254165649414, |
|
"logps/real": -120.93900299072266, |
|
"loss": 0.0982, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.5599706172943115, |
|
"rewards/margins": 3.3923709392547607, |
|
"rewards/real": 0.8324005007743835, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/generated": -2.9717984199523926, |
|
"logits/real": -2.703868865966797, |
|
"logps/generated": -137.14996337890625, |
|
"logps/real": -138.37025451660156, |
|
"loss": 0.0423, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.964935302734375, |
|
"rewards/margins": 4.9714460372924805, |
|
"rewards/real": 1.0065107345581055, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.858156028368794e-07, |
|
"logits/generated": -2.896350145339966, |
|
"logits/real": -2.632850408554077, |
|
"logps/generated": -149.0387420654297, |
|
"logps/real": -131.4845733642578, |
|
"loss": 0.0209, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.3386735916137695, |
|
"rewards/margins": 6.378259658813477, |
|
"rewards/real": 1.0395863056182861, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.6808510638297873e-07, |
|
"logits/generated": -2.864506244659424, |
|
"logits/real": -2.635793685913086, |
|
"logps/generated": -161.58311462402344, |
|
"logps/real": -133.5462646484375, |
|
"loss": 0.0157, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.72866678237915, |
|
"rewards/margins": 7.655924320220947, |
|
"rewards/real": 0.9272577166557312, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.50354609929078e-07, |
|
"logits/generated": -2.8477931022644043, |
|
"logits/real": -2.5890610218048096, |
|
"logps/generated": -163.30792236328125, |
|
"logps/real": -120.96208190917969, |
|
"loss": 0.016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.271246433258057, |
|
"rewards/margins": 7.847572326660156, |
|
"rewards/real": 0.5763252973556519, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.326241134751773e-07, |
|
"logits/generated": -2.817225694656372, |
|
"logits/real": -2.618878126144409, |
|
"logps/generated": -178.3530731201172, |
|
"logps/real": -133.58116149902344, |
|
"loss": 0.0134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.513948440551758, |
|
"rewards/margins": 9.06941032409668, |
|
"rewards/real": 0.5554608702659607, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.148936170212766e-07, |
|
"logits/generated": -2.743182420730591, |
|
"logits/real": -2.5085911750793457, |
|
"logps/generated": -182.21102905273438, |
|
"logps/real": -141.52438354492188, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.143477439880371, |
|
"rewards/margins": 8.894549369812012, |
|
"rewards/real": -0.24892878532409668, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.971631205673759e-07, |
|
"logits/generated": -2.667142152786255, |
|
"logits/real": -2.3680872917175293, |
|
"logps/generated": -221.4196014404297, |
|
"logps/real": -156.9970703125, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.756684303283691, |
|
"rewards/margins": 11.24407958984375, |
|
"rewards/real": -0.5126041769981384, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.7943262411347514e-07, |
|
"logits/generated": -2.716907024383545, |
|
"logits/real": -2.384260654449463, |
|
"logps/generated": -195.7281951904297, |
|
"logps/real": -141.03294372558594, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.495864868164062, |
|
"rewards/margins": 10.102469444274902, |
|
"rewards/real": -0.39339518547058105, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/generated": -2.708615779876709, |
|
"eval_logits/real": -2.4488255977630615, |
|
"eval_logps/generated": -208.8447265625, |
|
"eval_logps/real": -143.2593536376953, |
|
"eval_loss": 0.008355233818292618, |
|
"eval_rewards/accuracies": 0.9992038011550903, |
|
"eval_rewards/generated": -11.555154800415039, |
|
"eval_rewards/margins": 10.695781707763672, |
|
"eval_rewards/real": -0.8593728542327881, |
|
"eval_runtime": 338.0923, |
|
"eval_samples_per_second": 14.789, |
|
"eval_steps_per_second": 0.464, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.617021276595745e-07, |
|
"logits/generated": -2.6799185276031494, |
|
"logits/real": -2.4362854957580566, |
|
"logps/generated": -229.9918975830078, |
|
"logps/real": -144.79042053222656, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.137945175170898, |
|
"rewards/margins": 11.87834358215332, |
|
"rewards/real": -1.259602427482605, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.4397163120567375e-07, |
|
"logits/generated": -2.70621919631958, |
|
"logits/real": -2.4253954887390137, |
|
"logps/generated": -228.62405395507812, |
|
"logps/real": -158.47494506835938, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.207649230957031, |
|
"rewards/margins": 12.149269104003906, |
|
"rewards/real": -1.0583809614181519, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.2624113475177305e-07, |
|
"logits/generated": -2.662909746170044, |
|
"logits/real": -2.3800911903381348, |
|
"logps/generated": -227.56619262695312, |
|
"logps/real": -156.76431274414062, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.599740982055664, |
|
"rewards/margins": 12.402183532714844, |
|
"rewards/real": -1.1975574493408203, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.085106382978723e-07, |
|
"logits/generated": -2.69126033782959, |
|
"logits/real": -2.4004807472229004, |
|
"logps/generated": -229.597900390625, |
|
"logps/real": -146.2201385498047, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.650607109069824, |
|
"rewards/margins": 12.299604415893555, |
|
"rewards/real": -1.3510032892227173, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.907801418439716e-07, |
|
"logits/generated": -2.6775763034820557, |
|
"logits/real": -2.324918508529663, |
|
"logps/generated": -218.3529510498047, |
|
"logps/real": -140.95547485351562, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.812429428100586, |
|
"rewards/margins": 12.05975341796875, |
|
"rewards/real": -0.752677321434021, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.730496453900709e-07, |
|
"logits/generated": -2.6712429523468018, |
|
"logits/real": -2.2760770320892334, |
|
"logps/generated": -196.49375915527344, |
|
"logps/real": -121.7732925415039, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.348185539245605, |
|
"rewards/margins": 9.94708251953125, |
|
"rewards/real": -0.40110310912132263, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.5531914893617016e-07, |
|
"logits/generated": -2.6528658866882324, |
|
"logits/real": -2.1584651470184326, |
|
"logps/generated": -213.1004638671875, |
|
"logps/real": -121.10084533691406, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.93848991394043, |
|
"rewards/margins": 11.793214797973633, |
|
"rewards/real": -0.14527548849582672, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.375886524822695e-07, |
|
"logits/generated": -2.6257143020629883, |
|
"logits/real": -2.2728638648986816, |
|
"logps/generated": -228.39248657226562, |
|
"logps/real": -152.61045837402344, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.100919723510742, |
|
"rewards/margins": 12.766759872436523, |
|
"rewards/real": -0.3341600298881531, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.198581560283688e-07, |
|
"logits/generated": -2.609994411468506, |
|
"logits/real": -2.2576498985290527, |
|
"logps/generated": -222.8015594482422, |
|
"logps/real": -145.87921142578125, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -13.061747550964355, |
|
"rewards/margins": 12.396347999572754, |
|
"rewards/real": -0.6654000282287598, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.0212765957446807e-07, |
|
"logits/generated": -2.600148916244507, |
|
"logits/real": -2.1764094829559326, |
|
"logps/generated": -236.9789581298828, |
|
"logps/real": -137.09503173828125, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.19157886505127, |
|
"rewards/margins": 13.38233470916748, |
|
"rewards/real": -0.8092441558837891, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_logits/generated": -2.600484609603882, |
|
"eval_logits/real": -2.2709195613861084, |
|
"eval_logps/generated": -234.47348022460938, |
|
"eval_logps/real": -144.0410919189453, |
|
"eval_loss": 0.0056638033129274845, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -14.118030548095703, |
|
"eval_rewards/margins": 13.180484771728516, |
|
"eval_rewards/real": -0.9375430345535278, |
|
"eval_runtime": 334.9223, |
|
"eval_samples_per_second": 14.929, |
|
"eval_steps_per_second": 0.469, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.8439716312056735e-07, |
|
"logits/generated": -2.617143154144287, |
|
"logits/real": -2.263977289199829, |
|
"logps/generated": -242.97866821289062, |
|
"logps/real": -152.24502563476562, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.723902702331543, |
|
"rewards/margins": 13.91853141784668, |
|
"rewards/real": -0.8053719401359558, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/generated": -2.5576019287109375, |
|
"logits/real": -2.3169806003570557, |
|
"logps/generated": -233.3745880126953, |
|
"logps/real": -145.44570922851562, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.311078071594238, |
|
"rewards/margins": 13.486970901489258, |
|
"rewards/real": -0.8241075277328491, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.4893617021276595e-07, |
|
"logits/generated": -2.553480386734009, |
|
"logits/real": -2.315396785736084, |
|
"logps/generated": -243.16702270507812, |
|
"logps/real": -168.13119506835938, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.701271057128906, |
|
"rewards/margins": 13.865699768066406, |
|
"rewards/real": -0.8355696797370911, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.3120567375886523e-07, |
|
"logits/generated": -2.6132516860961914, |
|
"logits/real": -2.2840161323547363, |
|
"logps/generated": -246.56961059570312, |
|
"logps/real": -152.701171875, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.995321273803711, |
|
"rewards/margins": 14.276300430297852, |
|
"rewards/real": -0.7190229892730713, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.1347517730496453e-07, |
|
"logits/generated": -2.591722249984741, |
|
"logits/real": -2.277804374694824, |
|
"logps/generated": -263.14129638671875, |
|
"logps/real": -158.726806640625, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.25307273864746, |
|
"rewards/margins": 15.087217330932617, |
|
"rewards/real": -1.1658554077148438, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.574468085106382e-08, |
|
"logits/generated": -2.569918155670166, |
|
"logits/real": -2.1422371864318848, |
|
"logps/generated": -244.6776123046875, |
|
"logps/real": -145.0582733154297, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.619791984558105, |
|
"rewards/margins": 14.159820556640625, |
|
"rewards/real": -1.4599710702896118, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 7.801418439716311e-08, |
|
"logits/generated": -2.539750099182129, |
|
"logits/real": -2.137476682662964, |
|
"logps/generated": -265.46429443359375, |
|
"logps/real": -163.37588500976562, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.41511344909668, |
|
"rewards/margins": 14.795074462890625, |
|
"rewards/real": -1.6200393438339233, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.02836879432624e-08, |
|
"logits/generated": -2.574856996536255, |
|
"logits/real": -2.0835378170013428, |
|
"logps/generated": -262.6173095703125, |
|
"logps/real": -142.27984619140625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.557262420654297, |
|
"rewards/margins": 15.236516952514648, |
|
"rewards/real": -1.3207473754882812, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.25531914893617e-08, |
|
"logits/generated": -2.5489261150360107, |
|
"logits/real": -2.2142765522003174, |
|
"logps/generated": -268.306396484375, |
|
"logps/real": -147.05963134765625, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.089282989501953, |
|
"rewards/margins": 15.543319702148438, |
|
"rewards/real": -1.5459634065628052, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.4822695035460993e-08, |
|
"logits/generated": -2.556087017059326, |
|
"logits/real": -2.1201999187469482, |
|
"logps/generated": -249.31875610351562, |
|
"logps/real": -138.080322265625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.890460014343262, |
|
"rewards/margins": 14.16200065612793, |
|
"rewards/real": -1.728456735610962, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_logits/generated": -2.5437142848968506, |
|
"eval_logits/real": -2.19522762298584, |
|
"eval_logps/generated": -257.48858642578125, |
|
"eval_logps/real": -151.0966796875, |
|
"eval_loss": 0.005383754149079323, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -16.419538497924805, |
|
"eval_rewards/margins": 14.776435852050781, |
|
"eval_rewards/real": -1.6431050300598145, |
|
"eval_runtime": 335.2989, |
|
"eval_samples_per_second": 14.912, |
|
"eval_steps_per_second": 0.468, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.092198581560283e-09, |
|
"logits/generated": -2.5656914710998535, |
|
"logits/real": -2.300166368484497, |
|
"logps/generated": -260.5529479980469, |
|
"logps/real": -153.91053771972656, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.434940338134766, |
|
"rewards/margins": 14.854705810546875, |
|
"rewards/real": -1.5802339315414429, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 314, |
|
"total_flos": 0.0, |
|
"train_loss": 0.025043586236395084, |
|
"train_runtime": 2649.1704, |
|
"train_samples_per_second": 3.775, |
|
"train_steps_per_second": 0.119 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 314, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|