|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 987, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_losses": 0.6931471824645996, |
|
"epoch": 0.0, |
|
"grad_norm": 2.1006424602315215, |
|
"learning_rate": 5.050505050505051e-09, |
|
"logits/chosen": -2.589059352874756, |
|
"logits/rejected": -2.6418559551239014, |
|
"logps/chosen": -212.42770385742188, |
|
"logps/rejected": -135.43048095703125, |
|
"loss": 0.6931, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_losses": 0.6925780773162842, |
|
"epoch": 0.03, |
|
"grad_norm": 12.811305441645697, |
|
"learning_rate": 5.05050505050505e-08, |
|
"logits/chosen": -2.977125644683838, |
|
"logits/rejected": -2.981342315673828, |
|
"logps/chosen": -426.8561096191406, |
|
"logps/rejected": -265.4306640625, |
|
"loss": 0.6957, |
|
"positive_losses": 0.01630179025232792, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": 0.0003335709043312818, |
|
"rewards/margins": 0.0011399724753573537, |
|
"rewards/margins_max": 0.002273110207170248, |
|
"rewards/margins_min": 6.834672603872605e-06, |
|
"rewards/margins_std": 0.0016024988144636154, |
|
"rewards/rejected": -0.0008064016001299024, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_losses": 0.6931944489479065, |
|
"epoch": 0.06, |
|
"grad_norm": 17.598175196981984, |
|
"learning_rate": 1.01010101010101e-07, |
|
"logits/chosen": -2.992300271987915, |
|
"logits/rejected": -2.8757376670837402, |
|
"logps/chosen": -376.1276550292969, |
|
"logps/rejected": -280.2401428222656, |
|
"loss": 0.7005, |
|
"positive_losses": 0.06999759376049042, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0003719997184816748, |
|
"rewards/margins": -9.284308907808736e-05, |
|
"rewards/margins_max": 0.0014239997835829854, |
|
"rewards/margins_min": -0.0016096860636025667, |
|
"rewards/margins_std": 0.0021451397333294153, |
|
"rewards/rejected": 0.000464842829387635, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_losses": 0.6936646699905396, |
|
"epoch": 0.09, |
|
"grad_norm": 16.284910292127993, |
|
"learning_rate": 1.5151515151515152e-07, |
|
"logits/chosen": -2.8964314460754395, |
|
"logits/rejected": -2.9219629764556885, |
|
"logps/chosen": -204.34725952148438, |
|
"logps/rejected": -278.52960205078125, |
|
"loss": 0.698, |
|
"positive_losses": 0.042078591883182526, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.0006226692348718643, |
|
"rewards/margins": -0.0010327090276405215, |
|
"rewards/margins_max": 0.0002273331192554906, |
|
"rewards/margins_min": -0.002292751334607601, |
|
"rewards/margins_std": 0.001781968749128282, |
|
"rewards/rejected": 0.001655378146097064, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_losses": 0.6926015615463257, |
|
"epoch": 0.12, |
|
"grad_norm": 7.600951496593335, |
|
"learning_rate": 2.02020202020202e-07, |
|
"logits/chosen": -2.841174602508545, |
|
"logits/rejected": -2.759981632232666, |
|
"logps/chosen": -320.43280029296875, |
|
"logps/rejected": -249.27462768554688, |
|
"loss": 0.6944, |
|
"positive_losses": 0.0006469726795330644, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0031745159067213535, |
|
"rewards/margins": 0.0010936097241938114, |
|
"rewards/margins_max": 0.0026608791667968035, |
|
"rewards/margins_min": -0.0004736593982670456, |
|
"rewards/margins_std": 0.0022164531983435154, |
|
"rewards/rejected": 0.0020809059496968985, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_losses": 0.6930263638496399, |
|
"epoch": 0.15, |
|
"grad_norm": 9.105946335609067, |
|
"learning_rate": 2.525252525252525e-07, |
|
"logits/chosen": -2.829671859741211, |
|
"logits/rejected": -2.848930835723877, |
|
"logps/chosen": -227.5389404296875, |
|
"logps/rejected": -240.80026245117188, |
|
"loss": 0.694, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.004696831107139587, |
|
"rewards/margins": 0.00024281651712954044, |
|
"rewards/margins_max": 0.0018194876611232758, |
|
"rewards/margins_min": -0.0013338546268641949, |
|
"rewards/margins_std": 0.0022297496907413006, |
|
"rewards/rejected": 0.004454014357179403, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_losses": 0.6927760243415833, |
|
"epoch": 0.18, |
|
"grad_norm": 1.9982033646829467, |
|
"learning_rate": 3.0303030303030305e-07, |
|
"logits/chosen": -2.807051420211792, |
|
"logits/rejected": -2.722658157348633, |
|
"logps/chosen": -333.5701599121094, |
|
"logps/rejected": -296.24591064453125, |
|
"loss": 0.6935, |
|
"positive_losses": 0.01206359826028347, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.007529796566814184, |
|
"rewards/margins": 0.0007501507643610239, |
|
"rewards/margins_max": 0.004166206810623407, |
|
"rewards/margins_min": -0.002665905049070716, |
|
"rewards/margins_std": 0.004831031896173954, |
|
"rewards/rejected": 0.006779646035283804, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_losses": 0.692486584186554, |
|
"epoch": 0.21, |
|
"grad_norm": 9.583384726566226, |
|
"learning_rate": 3.535353535353535e-07, |
|
"logits/chosen": -2.6647205352783203, |
|
"logits/rejected": -2.6472456455230713, |
|
"logps/chosen": -305.70159912109375, |
|
"logps/rejected": -241.12240600585938, |
|
"loss": 0.6938, |
|
"positive_losses": 0.006031799130141735, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.007261182181537151, |
|
"rewards/margins": 0.0013255949597805738, |
|
"rewards/margins_max": 0.003496175166219473, |
|
"rewards/margins_min": -0.0008449858287349343, |
|
"rewards/margins_std": 0.0030696645844727755, |
|
"rewards/rejected": 0.0059355879202485085, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_losses": 0.6925214529037476, |
|
"epoch": 0.24, |
|
"grad_norm": 2.10104329669103, |
|
"learning_rate": 4.04040404040404e-07, |
|
"logits/chosen": -2.965867280960083, |
|
"logits/rejected": -2.8706986904144287, |
|
"logps/chosen": -324.9981689453125, |
|
"logps/rejected": -331.00262451171875, |
|
"loss": 0.6938, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.009126158431172371, |
|
"rewards/margins": 0.0012557277223095298, |
|
"rewards/margins_max": 0.0036045038141310215, |
|
"rewards/margins_min": -0.0010930480202659965, |
|
"rewards/margins_std": 0.0033216706942766905, |
|
"rewards/rejected": 0.007870429195463657, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_losses": 0.6932989358901978, |
|
"epoch": 0.27, |
|
"grad_norm": 1.9836450871378364, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.7672338485717773, |
|
"logits/rejected": -2.6813530921936035, |
|
"logps/chosen": -280.62030029296875, |
|
"logps/rejected": -277.8973388671875, |
|
"loss": 0.6933, |
|
"positive_losses": 0.0020202635787427425, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.007793852128088474, |
|
"rewards/margins": -0.00029996776720508933, |
|
"rewards/margins_max": 0.0021644725929945707, |
|
"rewards/margins_min": -0.0027644080109894276, |
|
"rewards/margins_std": 0.003485244931653142, |
|
"rewards/rejected": 0.008093819953501225, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_losses": 0.6919553279876709, |
|
"epoch": 0.3, |
|
"grad_norm": 1.713342030808654, |
|
"learning_rate": 4.99998435471878e-07, |
|
"logits/chosen": -2.971386432647705, |
|
"logits/rejected": -2.915938377380371, |
|
"logps/chosen": -269.40509033203125, |
|
"logps/rejected": -256.8268737792969, |
|
"loss": 0.6932, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.008469906635582447, |
|
"rewards/margins": 0.002389331813901663, |
|
"rewards/margins_max": 0.004788221325725317, |
|
"rewards/margins_min": -9.557651537761558e-06, |
|
"rewards/margins_std": 0.0033925422467291355, |
|
"rewards/rejected": 0.006080574356019497, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_dpo_losses": 0.6925311088562012, |
|
"eval_logits/chosen": -2.8593878746032715, |
|
"eval_logits/rejected": -2.8141348361968994, |
|
"eval_logps/chosen": -284.26416015625, |
|
"eval_logps/rejected": -258.3495178222656, |
|
"eval_loss": 0.6929529905319214, |
|
"eval_positive_losses": 0.0051858690567314625, |
|
"eval_rewards/accuracies": 0.579365074634552, |
|
"eval_rewards/chosen": 0.009570538066327572, |
|
"eval_rewards/margins": 0.0012392366770654917, |
|
"eval_rewards/margins_max": 0.006621083710342646, |
|
"eval_rewards/margins_min": -0.003959333524107933, |
|
"eval_rewards/margins_std": 0.004715622402727604, |
|
"eval_rewards/rejected": 0.00833130069077015, |
|
"eval_runtime": 282.6491, |
|
"eval_samples_per_second": 7.076, |
|
"eval_steps_per_second": 0.223, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_losses": 0.6927061676979065, |
|
"epoch": 0.33, |
|
"grad_norm": 2.1416302847338655, |
|
"learning_rate": 4.998107157902955e-07, |
|
"logits/chosen": -2.841651201248169, |
|
"logits/rejected": -2.74540638923645, |
|
"logps/chosen": -310.16998291015625, |
|
"logps/rejected": -254.25537109375, |
|
"loss": 0.6931, |
|
"positive_losses": 0.009991454891860485, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.009014981798827648, |
|
"rewards/margins": 0.0008873892948031425, |
|
"rewards/margins_max": 0.0038191110361367464, |
|
"rewards/margins_min": -0.0020443324465304613, |
|
"rewards/margins_std": 0.004146079998463392, |
|
"rewards/rejected": 0.008127592504024506, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_losses": 0.6933146715164185, |
|
"epoch": 0.36, |
|
"grad_norm": 2.01446405141137, |
|
"learning_rate": 4.993103596812268e-07, |
|
"logits/chosen": -2.7760932445526123, |
|
"logits/rejected": -2.853368043899536, |
|
"logps/chosen": -241.74014282226562, |
|
"logps/rejected": -251.635009765625, |
|
"loss": 0.6928, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.011283518746495247, |
|
"rewards/margins": -0.00032261465094052255, |
|
"rewards/margins_max": 0.0035319500602781773, |
|
"rewards/margins_min": -0.00417717918753624, |
|
"rewards/margins_std": 0.00545117724686861, |
|
"rewards/rejected": 0.011606132611632347, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_losses": 0.6912622451782227, |
|
"epoch": 0.4, |
|
"grad_norm": 1.6295192671904115, |
|
"learning_rate": 4.984979933369709e-07, |
|
"logits/chosen": -2.9324135780334473, |
|
"logits/rejected": -2.867285966873169, |
|
"logps/chosen": -442.0322265625, |
|
"logps/rejected": -269.4384765625, |
|
"loss": 0.6928, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.01560944877564907, |
|
"rewards/margins": 0.0037853687535971403, |
|
"rewards/margins_max": 0.007000140845775604, |
|
"rewards/margins_min": 0.0005705966614186764, |
|
"rewards/margins_std": 0.004546374548226595, |
|
"rewards/rejected": 0.011824080720543861, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_losses": 0.6920586824417114, |
|
"epoch": 0.43, |
|
"grad_norm": 1.9106724203537326, |
|
"learning_rate": 4.973746334285336e-07, |
|
"logits/chosen": -2.803515911102295, |
|
"logits/rejected": -2.8981337547302246, |
|
"logps/chosen": -340.9748229980469, |
|
"logps/rejected": -325.1402282714844, |
|
"loss": 0.6917, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.01538917887955904, |
|
"rewards/margins": 0.002193502150475979, |
|
"rewards/margins_max": 0.005477132275700569, |
|
"rewards/margins_min": -0.001090128207579255, |
|
"rewards/margins_std": 0.004643755033612251, |
|
"rewards/rejected": 0.013195676729083061, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_losses": 0.6921483278274536, |
|
"epoch": 0.46, |
|
"grad_norm": 1.4481594604817483, |
|
"learning_rate": 4.959416858332709e-07, |
|
"logits/chosen": -2.855631113052368, |
|
"logits/rejected": -2.820399045944214, |
|
"logps/chosen": -271.1258544921875, |
|
"logps/rejected": -282.1231689453125, |
|
"loss": 0.6932, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.014782702550292015, |
|
"rewards/margins": 0.002010155702009797, |
|
"rewards/margins_max": 0.00527102081105113, |
|
"rewards/margins_min": -0.001250709407031536, |
|
"rewards/margins_std": 0.004611559212207794, |
|
"rewards/rejected": 0.012772548012435436, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_losses": 0.6921746730804443, |
|
"epoch": 0.49, |
|
"grad_norm": 2.122964894274462, |
|
"learning_rate": 4.942009438754421e-07, |
|
"logits/chosen": -2.8902671337127686, |
|
"logits/rejected": -2.875211238861084, |
|
"logps/chosen": -210.3759765625, |
|
"logps/rejected": -202.38551330566406, |
|
"loss": 0.6939, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.014994035474956036, |
|
"rewards/margins": 0.0019599986262619495, |
|
"rewards/margins_max": 0.006691869348287582, |
|
"rewards/margins_min": -0.0027718725614249706, |
|
"rewards/margins_std": 0.006691875867545605, |
|
"rewards/rejected": 0.013034040108323097, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_losses": 0.690701961517334, |
|
"epoch": 0.52, |
|
"grad_norm": 1.585863985509441, |
|
"learning_rate": 4.921545860818778e-07, |
|
"logits/chosen": -2.8698291778564453, |
|
"logits/rejected": -2.769212484359741, |
|
"logps/chosen": -274.9241943359375, |
|
"logps/rejected": -276.3659973144531, |
|
"loss": 0.6909, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.019051264971494675, |
|
"rewards/margins": 0.004921003244817257, |
|
"rewards/margins_max": 0.009086159989237785, |
|
"rewards/margins_min": 0.0007558457436971366, |
|
"rewards/margins_std": 0.005890422035008669, |
|
"rewards/rejected": 0.014130261726677418, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_losses": 0.692548394203186, |
|
"epoch": 0.55, |
|
"grad_norm": 8.258786695285371, |
|
"learning_rate": 4.898051734555674e-07, |
|
"logits/chosen": -2.8515067100524902, |
|
"logits/rejected": -2.865828037261963, |
|
"logps/chosen": -193.68429565429688, |
|
"logps/rejected": -217.70962524414062, |
|
"loss": 0.6921, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.016250569373369217, |
|
"rewards/margins": 0.0012078474974259734, |
|
"rewards/margins_max": 0.0032145425211638212, |
|
"rewards/margins_min": -0.0007988469442352653, |
|
"rewards/margins_std": 0.002837894717231393, |
|
"rewards/rejected": 0.01504272036254406, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_losses": 0.6910722255706787, |
|
"epoch": 0.58, |
|
"grad_norm": 9.556509896855202, |
|
"learning_rate": 4.871556462705816e-07, |
|
"logits/chosen": -2.663051128387451, |
|
"logits/rejected": -2.8675954341888428, |
|
"logps/chosen": -174.3250732421875, |
|
"logps/rejected": -239.2481231689453, |
|
"loss": 0.692, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.020512599498033524, |
|
"rewards/margins": 0.004181156400591135, |
|
"rewards/margins_max": 0.009443456307053566, |
|
"rewards/margins_min": -0.0010811425745487213, |
|
"rewards/margins_std": 0.007442014757543802, |
|
"rewards/rejected": 0.016331443563103676, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_losses": 0.6924042701721191, |
|
"epoch": 0.61, |
|
"grad_norm": 2.1686233219670368, |
|
"learning_rate": 4.842093203923387e-07, |
|
"logits/chosen": -2.906074047088623, |
|
"logits/rejected": -2.8612585067749023, |
|
"logps/chosen": -219.8831787109375, |
|
"logps/rejected": -310.6004638671875, |
|
"loss": 0.6919, |
|
"positive_losses": 0.0378265380859375, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.02058028057217598, |
|
"rewards/margins": 0.0015535615384578705, |
|
"rewards/margins_max": 0.009957343339920044, |
|
"rewards/margins_min": -0.006850218866020441, |
|
"rewards/margins_std": 0.011884740553796291, |
|
"rewards/rejected": 0.01902671717107296, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_dpo_losses": 0.6904077529907227, |
|
"eval_logits/chosen": -2.858304977416992, |
|
"eval_logits/rejected": -2.813277244567871, |
|
"eval_logps/chosen": -282.7998962402344, |
|
"eval_logps/rejected": -257.31414794921875, |
|
"eval_loss": 0.6915329694747925, |
|
"eval_positive_losses": 0.01193891279399395, |
|
"eval_rewards/accuracies": 0.6666666865348816, |
|
"eval_rewards/chosen": 0.024213315919041634, |
|
"eval_rewards/margins": 0.0055284094996750355, |
|
"eval_rewards/margins_max": 0.019484752789139748, |
|
"eval_rewards/margins_min": -0.008329018950462341, |
|
"eval_rewards/margins_std": 0.012371117249131203, |
|
"eval_rewards/rejected": 0.018684905022382736, |
|
"eval_runtime": 282.1184, |
|
"eval_samples_per_second": 7.089, |
|
"eval_steps_per_second": 0.223, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_losses": 0.6858643293380737, |
|
"epoch": 0.64, |
|
"grad_norm": 1.962735267678132, |
|
"learning_rate": 4.809698831278217e-07, |
|
"logits/chosen": -2.9653470516204834, |
|
"logits/rejected": -2.9085001945495605, |
|
"logps/chosen": -375.43804931640625, |
|
"logps/rejected": -293.70599365234375, |
|
"loss": 0.6913, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.032657068222761154, |
|
"rewards/margins": 0.014679856598377228, |
|
"rewards/margins_max": 0.026673411950469017, |
|
"rewards/margins_min": 0.002686301711946726, |
|
"rewards/margins_std": 0.01696144789457321, |
|
"rewards/rejected": 0.017977211624383926, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_losses": 0.6917377710342407, |
|
"epoch": 0.67, |
|
"grad_norm": 1.9665123469481351, |
|
"learning_rate": 4.774413886109386e-07, |
|
"logits/chosen": -2.9416866302490234, |
|
"logits/rejected": -2.848280429840088, |
|
"logps/chosen": -281.5489807128906, |
|
"logps/rejected": -228.3254852294922, |
|
"loss": 0.6904, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.024796273559331894, |
|
"rewards/margins": 0.002851649885997176, |
|
"rewards/margins_max": 0.00997900776565075, |
|
"rewards/margins_min": -0.004275707062333822, |
|
"rewards/margins_std": 0.010079605504870415, |
|
"rewards/rejected": 0.021944623440504074, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_losses": 0.6900176405906677, |
|
"epoch": 0.7, |
|
"grad_norm": 8.719769818128936, |
|
"learning_rate": 4.736282527288008e-07, |
|
"logits/chosen": -2.9049994945526123, |
|
"logits/rejected": -2.930379629135132, |
|
"logps/chosen": -204.6458740234375, |
|
"logps/rejected": -201.35501098632812, |
|
"loss": 0.6932, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.02477971278131008, |
|
"rewards/margins": 0.006342197768390179, |
|
"rewards/margins_max": 0.015214154496788979, |
|
"rewards/margins_min": -0.0025297594256699085, |
|
"rewards/margins_std": 0.012546841986477375, |
|
"rewards/rejected": 0.018437515944242477, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_losses": 0.6900538802146912, |
|
"epoch": 0.73, |
|
"grad_norm": 2.0583669693189552, |
|
"learning_rate": 4.6953524759527053e-07, |
|
"logits/chosen": -3.0629992485046387, |
|
"logits/rejected": -2.9669101238250732, |
|
"logps/chosen": -341.70391845703125, |
|
"logps/rejected": -327.5718078613281, |
|
"loss": 0.6911, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.029439503327012062, |
|
"rewards/margins": 0.006293328013271093, |
|
"rewards/margins_max": 0.019671425223350525, |
|
"rewards/margins_min": -0.0070847696624696255, |
|
"rewards/margins_std": 0.018919486552476883, |
|
"rewards/rejected": 0.023146172985434532, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_losses": 0.6918362379074097, |
|
"epoch": 0.76, |
|
"grad_norm": 1.9864876382070107, |
|
"learning_rate": 4.6516749557869185e-07, |
|
"logits/chosen": -2.9660696983337402, |
|
"logits/rejected": -2.871572494506836, |
|
"logps/chosen": -272.2618408203125, |
|
"logps/rejected": -243.72860717773438, |
|
"loss": 0.6926, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.030278582125902176, |
|
"rewards/margins": 0.002722159493714571, |
|
"rewards/margins_max": 0.014538141898810863, |
|
"rewards/margins_min": -0.009093823842704296, |
|
"rewards/margins_std": 0.016710322350263596, |
|
"rewards/rejected": 0.027556419372558594, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_losses": 0.6899689435958862, |
|
"epoch": 0.79, |
|
"grad_norm": 2.12838016274278, |
|
"learning_rate": 4.6053046289128194e-07, |
|
"logits/chosen": -2.956109046936035, |
|
"logits/rejected": -2.9426538944244385, |
|
"logps/chosen": -241.18115234375, |
|
"logps/rejected": -204.5635528564453, |
|
"loss": 0.6896, |
|
"positive_losses": 0.00806427001953125, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.029516180977225304, |
|
"rewards/margins": 0.006419041194021702, |
|
"rewards/margins_max": 0.014670031145215034, |
|
"rewards/margins_min": -0.0018319508526474237, |
|
"rewards/margins_std": 0.011668664403259754, |
|
"rewards/rejected": 0.023097142577171326, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_losses": 0.6896145343780518, |
|
"epoch": 0.82, |
|
"grad_norm": 2.012252233192398, |
|
"learning_rate": 4.5562995274820283e-07, |
|
"logits/chosen": -2.819646120071411, |
|
"logits/rejected": -2.845240354537964, |
|
"logps/chosen": -307.28582763671875, |
|
"logps/rejected": -313.50897216796875, |
|
"loss": 0.6913, |
|
"positive_losses": 0.06691131740808487, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.025536488741636276, |
|
"rewards/margins": 0.007166997995227575, |
|
"rewards/margins_max": 0.01585778035223484, |
|
"rewards/margins_min": -0.0015237818006426096, |
|
"rewards/margins_std": 0.012290618382394314, |
|
"rewards/rejected": 0.018369488418102264, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_losses": 0.6887096166610718, |
|
"epoch": 0.85, |
|
"grad_norm": 2.181123977999317, |
|
"learning_rate": 4.5047209810487705e-07, |
|
"logits/chosen": -2.971086025238037, |
|
"logits/rejected": -2.9251911640167236, |
|
"logps/chosen": -351.91864013671875, |
|
"logps/rejected": -323.68658447265625, |
|
"loss": 0.6897, |
|
"positive_losses": 0.03092041052877903, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.04124215617775917, |
|
"rewards/margins": 0.009057796560227871, |
|
"rewards/margins_max": 0.017794614657759666, |
|
"rewards/margins_min": 0.00032097622170113027, |
|
"rewards/margins_std": 0.012355729006230831, |
|
"rewards/rejected": 0.032184358686208725, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_losses": 0.6910039186477661, |
|
"epoch": 0.88, |
|
"grad_norm": 1.7264365586137154, |
|
"learning_rate": 4.450633539816351e-07, |
|
"logits/chosen": -2.669238567352295, |
|
"logits/rejected": -2.6185073852539062, |
|
"logps/chosen": -229.40542602539062, |
|
"logps/rejected": -268.8614501953125, |
|
"loss": 0.6898, |
|
"positive_losses": 0.0014183043967932463, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.029668454080820084, |
|
"rewards/margins": 0.00436769612133503, |
|
"rewards/margins_max": 0.01580682210624218, |
|
"rewards/margins_min": -0.007071429397910833, |
|
"rewards/margins_std": 0.01617736741900444, |
|
"rewards/rejected": 0.025300756096839905, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_losses": 0.688437819480896, |
|
"epoch": 0.91, |
|
"grad_norm": 7.040817600804112, |
|
"learning_rate": 4.394104893853007e-07, |
|
"logits/chosen": -3.0286426544189453, |
|
"logits/rejected": -2.9285566806793213, |
|
"logps/chosen": -266.41265869140625, |
|
"logps/rejected": -276.48675537109375, |
|
"loss": 0.6903, |
|
"positive_losses": 0.08214263617992401, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.037517085671424866, |
|
"rewards/margins": 0.009616566821932793, |
|
"rewards/margins_max": 0.027836177498102188, |
|
"rewards/margins_min": -0.008603041991591454, |
|
"rewards/margins_std": 0.025766421109437943, |
|
"rewards/rejected": 0.027900516986846924, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_dpo_losses": 0.6876209378242493, |
|
"eval_logits/chosen": -2.853738784790039, |
|
"eval_logits/rejected": -2.8086090087890625, |
|
"eval_logps/chosen": -281.2695007324219, |
|
"eval_logps/rejected": -256.35443115234375, |
|
"eval_loss": 0.6898627877235413, |
|
"eval_positive_losses": 0.016462840139865875, |
|
"eval_rewards/accuracies": 0.6666666865348816, |
|
"eval_rewards/chosen": 0.03951713442802429, |
|
"eval_rewards/margins": 0.011234736070036888, |
|
"eval_rewards/margins_max": 0.0378512442111969, |
|
"eval_rewards/margins_min": -0.014283931814134121, |
|
"eval_rewards/margins_std": 0.023247137665748596, |
|
"eval_rewards/rejected": 0.028282402083277702, |
|
"eval_runtime": 281.1088, |
|
"eval_samples_per_second": 7.115, |
|
"eval_steps_per_second": 0.224, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_losses": 0.6858936548233032, |
|
"epoch": 0.94, |
|
"grad_norm": 1.534301986044828, |
|
"learning_rate": 4.33520578837825e-07, |
|
"logits/chosen": -2.7751994132995605, |
|
"logits/rejected": -2.767688035964966, |
|
"logps/chosen": -286.08197021484375, |
|
"logps/rejected": -240.5784912109375, |
|
"loss": 0.6894, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.04037940502166748, |
|
"rewards/margins": 0.014671111479401588, |
|
"rewards/margins_max": 0.025094876065850258, |
|
"rewards/margins_min": 0.004247347824275494, |
|
"rewards/margins_std": 0.014741426333785057, |
|
"rewards/rejected": 0.02570829726755619, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_losses": 0.6850165128707886, |
|
"epoch": 0.97, |
|
"grad_norm": 1.9190817282141555, |
|
"learning_rate": 4.2740099352256954e-07, |
|
"logits/chosen": -2.9025392532348633, |
|
"logits/rejected": -2.907529592514038, |
|
"logps/chosen": -202.9761505126953, |
|
"logps/rejected": -240.7790985107422, |
|
"loss": 0.6868, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.04233746603131294, |
|
"rewards/margins": 0.016391225159168243, |
|
"rewards/margins_max": 0.023528441786766052, |
|
"rewards/margins_min": 0.00925400760024786, |
|
"rewards/margins_std": 0.010093549266457558, |
|
"rewards/rejected": 0.02594623900949955, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_losses": 0.6829380989074707, |
|
"epoch": 1.0, |
|
"grad_norm": 2.017378170300957, |
|
"learning_rate": 4.2105939205932005e-07, |
|
"logits/chosen": -2.9684784412384033, |
|
"logits/rejected": -2.8465845584869385, |
|
"logps/chosen": -384.70379638671875, |
|
"logps/rejected": -371.2561950683594, |
|
"loss": 0.6893, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.056094102561473846, |
|
"rewards/margins": 0.02067440189421177, |
|
"rewards/margins_max": 0.03426554799079895, |
|
"rewards/margins_min": 0.007083254866302013, |
|
"rewards/margins_std": 0.019220784306526184, |
|
"rewards/rejected": 0.03541969880461693, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_losses": 0.6854172348976135, |
|
"epoch": 1.03, |
|
"grad_norm": 2.002276198207298, |
|
"learning_rate": 4.145037109195757e-07, |
|
"logits/chosen": -2.9032974243164062, |
|
"logits/rejected": -2.9131686687469482, |
|
"logps/chosen": -265.29217529296875, |
|
"logps/rejected": -192.96426391601562, |
|
"loss": 0.6866, |
|
"positive_losses": 0.01656951941549778, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.044899724423885345, |
|
"rewards/margins": 0.01578436978161335, |
|
"rewards/margins_max": 0.03546427935361862, |
|
"rewards/margins_min": -0.003895541187375784, |
|
"rewards/margins_std": 0.02783159911632538, |
|
"rewards/rejected": 0.029115352779626846, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_losses": 0.6908459067344666, |
|
"epoch": 1.06, |
|
"grad_norm": 1.847200301127122, |
|
"learning_rate": 4.077421544941079e-07, |
|
"logits/chosen": -2.8273587226867676, |
|
"logits/rejected": -2.8343966007232666, |
|
"logps/chosen": -225.12521362304688, |
|
"logps/rejected": -211.3616180419922, |
|
"loss": 0.6867, |
|
"positive_losses": 0.089324951171875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.038552217185497284, |
|
"rewards/margins": 0.004919026046991348, |
|
"rewards/margins_max": 0.021458707749843597, |
|
"rewards/margins_min": -0.011620650999248028, |
|
"rewards/margins_std": 0.0233906377106905, |
|
"rewards/rejected": 0.03363319858908653, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_losses": 0.6812031865119934, |
|
"epoch": 1.09, |
|
"grad_norm": 1.9045252266933426, |
|
"learning_rate": 4.0078318482522114e-07, |
|
"logits/chosen": -2.8550143241882324, |
|
"logits/rejected": -2.851294755935669, |
|
"logps/chosen": -236.28811645507812, |
|
"logps/rejected": -254.1648406982422, |
|
"loss": 0.6854, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.05590778589248657, |
|
"rewards/margins": 0.024477163329720497, |
|
"rewards/margins_max": 0.046524226665496826, |
|
"rewards/margins_min": 0.0024301104713231325, |
|
"rewards/margins_std": 0.031179243698716164, |
|
"rewards/rejected": 0.031430620700120926, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_losses": 0.6793321371078491, |
|
"epoch": 1.12, |
|
"grad_norm": 2.2246778576418613, |
|
"learning_rate": 3.9363551101656356e-07, |
|
"logits/chosen": -2.8856711387634277, |
|
"logits/rejected": -2.7337353229522705, |
|
"logps/chosen": -360.27142333984375, |
|
"logps/rejected": -211.5453338623047, |
|
"loss": 0.6838, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.05444517731666565, |
|
"rewards/margins": 0.02811727486550808, |
|
"rewards/margins_max": 0.04928036779165268, |
|
"rewards/margins_min": 0.006954180542379618, |
|
"rewards/margins_std": 0.029929134994745255, |
|
"rewards/rejected": 0.02632790245115757, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_losses": 0.6877564787864685, |
|
"epoch": 1.16, |
|
"grad_norm": 2.093111994313669, |
|
"learning_rate": 3.8630807833374277e-07, |
|
"logits/chosen": -2.9553494453430176, |
|
"logits/rejected": -2.8332180976867676, |
|
"logps/chosen": -270.87994384765625, |
|
"logps/rejected": -317.43060302734375, |
|
"loss": 0.6851, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.052814941853284836, |
|
"rewards/margins": 0.011218393221497536, |
|
"rewards/margins_max": 0.03882121667265892, |
|
"rewards/margins_min": -0.01638443022966385, |
|
"rewards/margins_std": 0.039036285132169724, |
|
"rewards/rejected": 0.04159654304385185, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_losses": 0.6842631101608276, |
|
"epoch": 1.19, |
|
"grad_norm": 1.9478894060100704, |
|
"learning_rate": 3.7881005700938627e-07, |
|
"logits/chosen": -2.883104085922241, |
|
"logits/rejected": -2.9553048610687256, |
|
"logps/chosen": -286.1315002441406, |
|
"logps/rejected": -273.46533203125, |
|
"loss": 0.6845, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.061849962919950485, |
|
"rewards/margins": 0.01832859218120575, |
|
"rewards/margins_max": 0.04364844784140587, |
|
"rewards/margins_min": -0.006991264875978231, |
|
"rewards/margins_std": 0.03580768406391144, |
|
"rewards/rejected": 0.043521374464035034, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_losses": 0.6858700513839722, |
|
"epoch": 1.22, |
|
"grad_norm": 2.027188274912056, |
|
"learning_rate": 3.711508307666567e-07, |
|
"logits/chosen": -2.8832974433898926, |
|
"logits/rejected": -2.911393404006958, |
|
"logps/chosen": -202.76405334472656, |
|
"logps/rejected": -219.851318359375, |
|
"loss": 0.6832, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.053437333554029465, |
|
"rewards/margins": 0.014844384975731373, |
|
"rewards/margins_max": 0.03350861743092537, |
|
"rewards/margins_min": -0.0038198456168174744, |
|
"rewards/margins_std": 0.02639520727097988, |
|
"rewards/rejected": 0.038592949509620667, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_dpo_losses": 0.6846545934677124, |
|
"eval_logits/chosen": -2.850677490234375, |
|
"eval_logits/rejected": -2.8056881427764893, |
|
"eval_logps/chosen": -279.9754638671875, |
|
"eval_logps/rejected": -255.6741485595703, |
|
"eval_loss": 0.6891601085662842, |
|
"eval_positive_losses": 0.030417578294873238, |
|
"eval_rewards/accuracies": 0.7023809552192688, |
|
"eval_rewards/chosen": 0.052457116544246674, |
|
"eval_rewards/margins": 0.01737220212817192, |
|
"eval_rewards/margins_max": 0.05574695020914078, |
|
"eval_rewards/margins_min": -0.019588936120271683, |
|
"eval_rewards/margins_std": 0.03368803858757019, |
|
"eval_rewards/rejected": 0.03508492186665535, |
|
"eval_runtime": 281.5128, |
|
"eval_samples_per_second": 7.104, |
|
"eval_steps_per_second": 0.224, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_losses": 0.6754962205886841, |
|
"epoch": 1.25, |
|
"grad_norm": 1.8539817875046798, |
|
"learning_rate": 3.6333998507558596e-07, |
|
"logits/chosen": -2.7948994636535645, |
|
"logits/rejected": -2.838921070098877, |
|
"logps/chosen": -243.80331420898438, |
|
"logps/rejected": -267.6720275878906, |
|
"loss": 0.6841, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.061250943690538406, |
|
"rewards/margins": 0.03602340444922447, |
|
"rewards/margins_max": 0.05897093936800957, |
|
"rewards/margins_min": 0.013075867667794228, |
|
"rewards/margins_std": 0.032452717423439026, |
|
"rewards/rejected": 0.025227541103959084, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_losses": 0.6807809472084045, |
|
"epoch": 1.28, |
|
"grad_norm": 6.364237058452619, |
|
"learning_rate": 3.5538729515692354e-07, |
|
"logits/chosen": -2.943338394165039, |
|
"logits/rejected": -2.7816109657287598, |
|
"logps/chosen": -335.1874084472656, |
|
"logps/rejected": -279.122802734375, |
|
"loss": 0.6895, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.06752719730138779, |
|
"rewards/margins": 0.025191819295287132, |
|
"rewards/margins_max": 0.04893922060728073, |
|
"rewards/margins_min": 0.001444423571228981, |
|
"rewards/margins_std": 0.03358389437198639, |
|
"rewards/rejected": 0.04233537241816521, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_losses": 0.6775410175323486, |
|
"epoch": 1.31, |
|
"grad_norm": 2.1288430860755887, |
|
"learning_rate": 3.4730271374851453e-07, |
|
"logits/chosen": -2.8643321990966797, |
|
"logits/rejected": -2.8559885025024414, |
|
"logps/chosen": -335.872802734375, |
|
"logps/rejected": -249.48501586914062, |
|
"loss": 0.6787, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.06267213821411133, |
|
"rewards/margins": 0.03193260729312897, |
|
"rewards/margins_max": 0.05750267580151558, |
|
"rewards/margins_min": 0.006362536456435919, |
|
"rewards/margins_std": 0.03616153448820114, |
|
"rewards/rejected": 0.03073953464627266, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_losses": 0.6760178208351135, |
|
"epoch": 1.34, |
|
"grad_norm": 2.302278973535758, |
|
"learning_rate": 3.390963586495142e-07, |
|
"logits/chosen": -2.7527740001678467, |
|
"logits/rejected": -2.7448365688323975, |
|
"logps/chosen": -291.7049560546875, |
|
"logps/rejected": -197.5590362548828, |
|
"loss": 0.6836, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.06701546162366867, |
|
"rewards/margins": 0.03498576581478119, |
|
"rewards/margins_max": 0.05517752841114998, |
|
"rewards/margins_min": 0.01479400135576725, |
|
"rewards/margins_std": 0.02855546772480011, |
|
"rewards/rejected": 0.03202969953417778, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_losses": 0.6835552453994751, |
|
"epoch": 1.37, |
|
"grad_norm": 7.531724083885525, |
|
"learning_rate": 3.3077850005803125e-07, |
|
"logits/chosen": -3.047640562057495, |
|
"logits/rejected": -2.9399123191833496, |
|
"logps/chosen": -273.14666748046875, |
|
"logps/rejected": -232.124267578125, |
|
"loss": 0.6811, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.05829363316297531, |
|
"rewards/margins": 0.019574137404561043, |
|
"rewards/margins_max": 0.040946681052446365, |
|
"rewards/margins_min": -0.001798404031433165, |
|
"rewards/margins_std": 0.03022533655166626, |
|
"rewards/rejected": 0.03871949762105942, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_losses": 0.6802719831466675, |
|
"epoch": 1.4, |
|
"grad_norm": 2.1783384024286963, |
|
"learning_rate": 3.2235954771804384e-07, |
|
"logits/chosen": -2.899862051010132, |
|
"logits/rejected": -2.8675050735473633, |
|
"logps/chosen": -288.083740234375, |
|
"logps/rejected": -268.29876708984375, |
|
"loss": 0.683, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0625755786895752, |
|
"rewards/margins": 0.026135718449950218, |
|
"rewards/margins_max": 0.04849334806203842, |
|
"rewards/margins_min": 0.0037780862767249346, |
|
"rewards/margins_std": 0.03161846846342087, |
|
"rewards/rejected": 0.03643985837697983, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_losses": 0.6933653354644775, |
|
"epoch": 1.43, |
|
"grad_norm": 6.179682870277185, |
|
"learning_rate": 3.138500378916752e-07, |
|
"logits/chosen": -2.8621251583099365, |
|
"logits/rejected": -2.8821539878845215, |
|
"logps/chosen": -181.8573455810547, |
|
"logps/rejected": -222.8794708251953, |
|
"loss": 0.6844, |
|
"positive_losses": 0.0033210753463208675, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.04994913935661316, |
|
"rewards/margins": -5.03659236983367e-07, |
|
"rewards/margins_max": 0.01866455376148224, |
|
"rewards/margins_min": -0.018665555864572525, |
|
"rewards/margins_std": 0.0263963732868433, |
|
"rewards/rejected": 0.04994964599609375, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_losses": 0.6902952790260315, |
|
"epoch": 1.46, |
|
"grad_norm": 2.096436783641514, |
|
"learning_rate": 3.0526062017313247e-07, |
|
"logits/chosen": -2.8722407817840576, |
|
"logits/rejected": -2.9027137756347656, |
|
"logps/chosen": -245.9871826171875, |
|
"logps/rejected": -332.47283935546875, |
|
"loss": 0.6832, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0516946017742157, |
|
"rewards/margins": 0.006326904986053705, |
|
"rewards/margins_max": 0.03602270036935806, |
|
"rewards/margins_min": -0.023368891328573227, |
|
"rewards/margins_std": 0.041996195912361145, |
|
"rewards/rejected": 0.04536769539117813, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_losses": 0.6820996999740601, |
|
"epoch": 1.49, |
|
"grad_norm": 2.214654285326634, |
|
"learning_rate": 2.966020441608114e-07, |
|
"logits/chosen": -2.940601110458374, |
|
"logits/rejected": -2.950160026550293, |
|
"logps/chosen": -408.1592712402344, |
|
"logps/rejected": -367.73052978515625, |
|
"loss": 0.683, |
|
"positive_losses": 0.10776062309741974, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.06694754213094711, |
|
"rewards/margins": 0.02292126789689064, |
|
"rewards/margins_max": 0.04947842285037041, |
|
"rewards/margins_min": -0.0036358858924359083, |
|
"rewards/margins_std": 0.037557490170001984, |
|
"rewards/rejected": 0.04402626305818558, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_losses": 0.6780664324760437, |
|
"epoch": 1.52, |
|
"grad_norm": 2.3489902153022464, |
|
"learning_rate": 2.8788514600424653e-07, |
|
"logits/chosen": -2.9777138233184814, |
|
"logits/rejected": -2.8717598915100098, |
|
"logps/chosen": -271.792236328125, |
|
"logps/rejected": -217.39794921875, |
|
"loss": 0.6776, |
|
"positive_losses": 0.04692535474896431, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.07212454080581665, |
|
"rewards/margins": 0.031092515215277672, |
|
"rewards/margins_max": 0.05294063687324524, |
|
"rewards/margins_min": 0.009244391694664955, |
|
"rewards/margins_std": 0.03089791163802147, |
|
"rewards/rejected": 0.04103202372789383, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_dpo_losses": 0.6824542880058289, |
|
"eval_logits/chosen": -2.8494935035705566, |
|
"eval_logits/rejected": -2.8047306537628174, |
|
"eval_logps/chosen": -278.7507629394531, |
|
"eval_logps/rejected": -254.91441345214844, |
|
"eval_loss": 0.6883726716041565, |
|
"eval_positive_losses": 0.04435727000236511, |
|
"eval_rewards/accuracies": 0.6904761791229248, |
|
"eval_rewards/chosen": 0.06470431387424469, |
|
"eval_rewards/margins": 0.02202197164297104, |
|
"eval_rewards/margins_max": 0.07098579406738281, |
|
"eval_rewards/margins_min": -0.025560587644577026, |
|
"eval_rewards/margins_std": 0.04328176751732826, |
|
"eval_rewards/rejected": 0.042682331055402756, |
|
"eval_runtime": 282.2305, |
|
"eval_samples_per_second": 7.086, |
|
"eval_steps_per_second": 0.223, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_losses": 0.684786319732666, |
|
"epoch": 1.55, |
|
"grad_norm": 1.8216734943221224, |
|
"learning_rate": 2.791208348427426e-07, |
|
"logits/chosen": -2.806025981903076, |
|
"logits/rejected": -2.7332119941711426, |
|
"logps/chosen": -350.2364501953125, |
|
"logps/rejected": -264.30572509765625, |
|
"loss": 0.6831, |
|
"positive_losses": 0.04189910739660263, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.05516824126243591, |
|
"rewards/margins": 0.017182758077979088, |
|
"rewards/margins_max": 0.042931947857141495, |
|
"rewards/margins_min": -0.008566427044570446, |
|
"rewards/margins_std": 0.036414846777915955, |
|
"rewards/rejected": 0.037985485047101974, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_losses": 0.6818202137947083, |
|
"epoch": 1.58, |
|
"grad_norm": 7.65808980206578, |
|
"learning_rate": 2.703200791526611e-07, |
|
"logits/chosen": -3.0046448707580566, |
|
"logits/rejected": -2.982851505279541, |
|
"logps/chosen": -296.64495849609375, |
|
"logps/rejected": -249.4301300048828, |
|
"loss": 0.6837, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.07770957797765732, |
|
"rewards/margins": 0.02354852482676506, |
|
"rewards/margins_max": 0.04674399644136429, |
|
"rewards/margins_min": 0.000353056937456131, |
|
"rewards/margins_std": 0.03280334919691086, |
|
"rewards/rejected": 0.05416105315089226, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_losses": 0.6779841184616089, |
|
"epoch": 1.61, |
|
"grad_norm": 2.0053415493661566, |
|
"learning_rate": 2.6149389302044487e-07, |
|
"logits/chosen": -2.849656105041504, |
|
"logits/rejected": -2.743305206298828, |
|
"logps/chosen": -389.1894836425781, |
|
"logps/rejected": -333.93048095703125, |
|
"loss": 0.6807, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.08877243846654892, |
|
"rewards/margins": 0.031258475035429, |
|
"rewards/margins_max": 0.05623454600572586, |
|
"rewards/margins_min": 0.006282416172325611, |
|
"rewards/margins_std": 0.03532148897647858, |
|
"rewards/rejected": 0.05751396343111992, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_losses": 0.6694897413253784, |
|
"epoch": 1.64, |
|
"grad_norm": 1.8763346054855339, |
|
"learning_rate": 2.526533223585641e-07, |
|
"logits/chosen": -2.7003915309906006, |
|
"logits/rejected": -2.768725872039795, |
|
"logps/chosen": -374.7425231933594, |
|
"logps/rejected": -326.8890075683594, |
|
"loss": 0.6778, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.08555268496274948, |
|
"rewards/margins": 0.048603277653455734, |
|
"rewards/margins_max": 0.07389990985393524, |
|
"rewards/margins_min": 0.023306647315621376, |
|
"rewards/margins_std": 0.0357748381793499, |
|
"rewards/rejected": 0.03694940358400345, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_losses": 0.6740890741348267, |
|
"epoch": 1.67, |
|
"grad_norm": 7.77121120757971, |
|
"learning_rate": 2.438094310816307e-07, |
|
"logits/chosen": -2.7915353775024414, |
|
"logits/rejected": -2.8357605934143066, |
|
"logps/chosen": -232.6016387939453, |
|
"logps/rejected": -294.88299560546875, |
|
"loss": 0.6826, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.06982794404029846, |
|
"rewards/margins": 0.03929734230041504, |
|
"rewards/margins_max": 0.07744838297367096, |
|
"rewards/margins_min": 0.0011462997645139694, |
|
"rewards/margins_std": 0.05395371839404106, |
|
"rewards/rejected": 0.030530598014593124, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_losses": 0.6748778820037842, |
|
"epoch": 1.7, |
|
"grad_norm": 2.150634086609646, |
|
"learning_rate": 2.3497328725998406e-07, |
|
"logits/chosen": -2.8007655143737793, |
|
"logits/rejected": -2.8195149898529053, |
|
"logps/chosen": -272.33734130859375, |
|
"logps/rejected": -247.6636199951172, |
|
"loss": 0.6794, |
|
"positive_losses": 0.18440398573875427, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.08441298454999924, |
|
"rewards/margins": 0.03730101138353348, |
|
"rewards/margins_max": 0.05858795717358589, |
|
"rewards/margins_min": 0.016014060005545616, |
|
"rewards/margins_std": 0.030104290693998337, |
|
"rewards/rejected": 0.047111984342336655, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_losses": 0.6834501028060913, |
|
"epoch": 1.73, |
|
"grad_norm": 1.9329413652442144, |
|
"learning_rate": 2.261559492680755e-07, |
|
"logits/chosen": -3.00966215133667, |
|
"logits/rejected": -2.9343342781066895, |
|
"logps/chosen": -364.2731018066406, |
|
"logps/rejected": -270.2425537109375, |
|
"loss": 0.6883, |
|
"positive_losses": 0.03923187404870987, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.07380800694227219, |
|
"rewards/margins": 0.020400431007146835, |
|
"rewards/margins_max": 0.051741547882556915, |
|
"rewards/margins_min": -0.010940684005618095, |
|
"rewards/margins_std": 0.04432303458452225, |
|
"rewards/rejected": 0.05340757966041565, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_losses": 0.683830738067627, |
|
"epoch": 1.76, |
|
"grad_norm": 6.498857250997875, |
|
"learning_rate": 2.1736845194498716e-07, |
|
"logits/chosen": -2.8374483585357666, |
|
"logits/rejected": -2.778815507888794, |
|
"logps/chosen": -294.1668395996094, |
|
"logps/rejected": -277.2710876464844, |
|
"loss": 0.6838, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0776522159576416, |
|
"rewards/margins": 0.018975799903273582, |
|
"rewards/margins_max": 0.03656230866909027, |
|
"rewards/margins_min": 0.0013892930001020432, |
|
"rewards/margins_std": 0.02487107738852501, |
|
"rewards/rejected": 0.05867641046643257, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_losses": 0.6692390441894531, |
|
"epoch": 1.79, |
|
"grad_norm": 14.442968348535452, |
|
"learning_rate": 2.0862179278440504e-07, |
|
"logits/chosen": -2.752941608428955, |
|
"logits/rejected": -2.595552921295166, |
|
"logps/chosen": -326.27947998046875, |
|
"logps/rejected": -363.86175537109375, |
|
"loss": 0.6826, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.09450069069862366, |
|
"rewards/margins": 0.04905655235052109, |
|
"rewards/margins_max": 0.08510641753673553, |
|
"rewards/margins_min": 0.013006689958274364, |
|
"rewards/margins_std": 0.05098220705986023, |
|
"rewards/rejected": 0.04544413462281227, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_losses": 0.6739311218261719, |
|
"epoch": 1.82, |
|
"grad_norm": 1.8571868330601113, |
|
"learning_rate": 1.9992691817133024e-07, |
|
"logits/chosen": -2.814953327178955, |
|
"logits/rejected": -2.798966884613037, |
|
"logps/chosen": -228.36764526367188, |
|
"logps/rejected": -220.4230499267578, |
|
"loss": 0.677, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.07805188000202179, |
|
"rewards/margins": 0.03935372084379196, |
|
"rewards/margins_max": 0.06514634191989899, |
|
"rewards/margins_min": 0.013561112806200981, |
|
"rewards/margins_std": 0.0364762619137764, |
|
"rewards/rejected": 0.03869815915822983, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_dpo_losses": 0.6810709238052368, |
|
"eval_logits/chosen": -2.8493857383728027, |
|
"eval_logits/rejected": -2.804689407348633, |
|
"eval_logps/chosen": -277.5360412597656, |
|
"eval_logps/rejected": -253.9931640625, |
|
"eval_loss": 0.6872838139533997, |
|
"eval_positive_losses": 0.04589047655463219, |
|
"eval_rewards/accuracies": 0.682539701461792, |
|
"eval_rewards/chosen": 0.07685134559869766, |
|
"eval_rewards/margins": 0.02495652064681053, |
|
"eval_rewards/margins_max": 0.08033440262079239, |
|
"eval_rewards/margins_min": -0.02796892076730728, |
|
"eval_rewards/margins_std": 0.04838396981358528, |
|
"eval_rewards/rejected": 0.05189482495188713, |
|
"eval_runtime": 282.2305, |
|
"eval_samples_per_second": 7.086, |
|
"eval_steps_per_second": 0.223, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_losses": 0.6791930794715881, |
|
"epoch": 1.85, |
|
"grad_norm": 2.108916508331074, |
|
"learning_rate": 1.912947096827513e-07, |
|
"logits/chosen": -2.977818250656128, |
|
"logits/rejected": -2.8510375022888184, |
|
"logps/chosen": -303.4037170410156, |
|
"logps/rejected": -240.5347137451172, |
|
"loss": 0.6825, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.08365954458713531, |
|
"rewards/margins": 0.028567474335432053, |
|
"rewards/margins_max": 0.05369936302304268, |
|
"rewards/margins_min": 0.003435590071603656, |
|
"rewards/margins_std": 0.03554185479879379, |
|
"rewards/rejected": 0.05509205535054207, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_losses": 0.6858159303665161, |
|
"epoch": 1.88, |
|
"grad_norm": 2.0427241279215775, |
|
"learning_rate": 1.8273597046942452e-07, |
|
"logits/chosen": -2.8828835487365723, |
|
"logits/rejected": -2.7817318439483643, |
|
"logps/chosen": -252.5178985595703, |
|
"logps/rejected": -227.63070678710938, |
|
"loss": 0.6838, |
|
"positive_losses": 0.16164550185203552, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.06277048587799072, |
|
"rewards/margins": 0.015206271782517433, |
|
"rewards/margins_max": 0.04130411148071289, |
|
"rewards/margins_min": -0.010891567915678024, |
|
"rewards/margins_std": 0.03690791875123978, |
|
"rewards/rejected": 0.04756421223282814, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_losses": 0.6795656681060791, |
|
"epoch": 1.91, |
|
"grad_norm": 4.79445166991863, |
|
"learning_rate": 1.742614117358029e-07, |
|
"logits/chosen": -2.863675594329834, |
|
"logits/rejected": -2.8280770778656006, |
|
"logps/chosen": -238.7898712158203, |
|
"logps/rejected": -213.51846313476562, |
|
"loss": 0.6827, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.06867232173681259, |
|
"rewards/margins": 0.028171828016638756, |
|
"rewards/margins_max": 0.06020760536193848, |
|
"rewards/margins_min": -0.0038639488629996777, |
|
"rewards/margins_std": 0.04530543461441994, |
|
"rewards/rejected": 0.040500491857528687, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_losses": 0.6790474057197571, |
|
"epoch": 1.95, |
|
"grad_norm": 1.9164560727509983, |
|
"learning_rate": 1.6588163933503686e-07, |
|
"logits/chosen": -2.7663025856018066, |
|
"logits/rejected": -2.7693681716918945, |
|
"logps/chosen": -180.65650939941406, |
|
"logps/rejected": -231.96517944335938, |
|
"loss": 0.6845, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.05268232896924019, |
|
"rewards/margins": 0.028681308031082153, |
|
"rewards/margins_max": 0.053858645260334015, |
|
"rewards/margins_min": 0.0035039703361690044, |
|
"rewards/margins_std": 0.035606130957603455, |
|
"rewards/rejected": 0.02400101348757744, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_losses": 0.682746946811676, |
|
"epoch": 1.98, |
|
"grad_norm": 2.1910688443112587, |
|
"learning_rate": 1.5760714049581947e-07, |
|
"logits/chosen": -2.835636615753174, |
|
"logits/rejected": -2.823740243911743, |
|
"logps/chosen": -217.59878540039062, |
|
"logps/rejected": -202.2057342529297, |
|
"loss": 0.6825, |
|
"positive_losses": 0.016841506585478783, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.05880022048950195, |
|
"rewards/margins": 0.021725308150053024, |
|
"rewards/margins_max": 0.05336114764213562, |
|
"rewards/margins_min": -0.009910527616739273, |
|
"rewards/margins_std": 0.04473983123898506, |
|
"rewards/rejected": 0.03707490488886833, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_losses": 0.6814676523208618, |
|
"epoch": 2.01, |
|
"grad_norm": 1.922230509309539, |
|
"learning_rate": 1.4944827069769122e-07, |
|
"logits/chosen": -3.023953914642334, |
|
"logits/rejected": -2.9771876335144043, |
|
"logps/chosen": -332.7305603027344, |
|
"logps/rejected": -338.9562072753906, |
|
"loss": 0.6765, |
|
"positive_losses": 0.052425384521484375, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.054904378950595856, |
|
"rewards/margins": 0.02420732006430626, |
|
"rewards/margins_max": 0.05272083729505539, |
|
"rewards/margins_min": -0.004306197632104158, |
|
"rewards/margins_std": 0.04032420367002487, |
|
"rewards/rejected": 0.03069705329835415, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_losses": 0.6808332204818726, |
|
"epoch": 2.04, |
|
"grad_norm": 2.128169444960246, |
|
"learning_rate": 1.4141524071122656e-07, |
|
"logits/chosen": -2.90728497505188, |
|
"logits/rejected": -2.9327125549316406, |
|
"logps/chosen": -294.28411865234375, |
|
"logps/rejected": -306.5546569824219, |
|
"loss": 0.6781, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.08569607883691788, |
|
"rewards/margins": 0.026000851765275, |
|
"rewards/margins_max": 0.06097273901104927, |
|
"rewards/margins_min": -0.008971036411821842, |
|
"rewards/margins_std": 0.049457717686891556, |
|
"rewards/rejected": 0.059695225208997726, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_losses": 0.6652864217758179, |
|
"epoch": 2.07, |
|
"grad_norm": 2.0596328226644616, |
|
"learning_rate": 1.3351810381932387e-07, |
|
"logits/chosen": -2.879633665084839, |
|
"logits/rejected": -2.866821765899658, |
|
"logps/chosen": -277.6556091308594, |
|
"logps/rejected": -225.43563842773438, |
|
"loss": 0.6728, |
|
"positive_losses": 0.06625213474035263, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.09781384468078613, |
|
"rewards/margins": 0.05752427130937576, |
|
"rewards/margins_max": 0.09321505576372147, |
|
"rewards/margins_min": 0.02183348871767521, |
|
"rewards/margins_std": 0.0504743866622448, |
|
"rewards/rejected": 0.04028957709670067, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_losses": 0.6790655255317688, |
|
"epoch": 2.1, |
|
"grad_norm": 2.366475517541238, |
|
"learning_rate": 1.2576674323558928e-07, |
|
"logits/chosen": -2.954852819442749, |
|
"logits/rejected": -2.885873794555664, |
|
"logps/chosen": -260.78973388671875, |
|
"logps/rejected": -258.0441589355469, |
|
"loss": 0.6782, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.08070608973503113, |
|
"rewards/margins": 0.02996811270713806, |
|
"rewards/margins_max": 0.07714684307575226, |
|
"rewards/margins_min": -0.017210613936185837, |
|
"rewards/margins_std": 0.06672079861164093, |
|
"rewards/rejected": 0.050737977027893066, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_losses": 0.677202582359314, |
|
"epoch": 2.13, |
|
"grad_norm": 2.333653791310998, |
|
"learning_rate": 1.1817085973556152e-07, |
|
"logits/chosen": -2.869096279144287, |
|
"logits/rejected": -2.842957019805908, |
|
"logps/chosen": -240.80905151367188, |
|
"logps/rejected": -281.9169006347656, |
|
"loss": 0.6796, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.08999605476856232, |
|
"rewards/margins": 0.032623276114463806, |
|
"rewards/margins_max": 0.05816454440355301, |
|
"rewards/margins_min": 0.007082013878971338, |
|
"rewards/margins_std": 0.03612080588936806, |
|
"rewards/rejected": 0.057372767478227615, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_dpo_losses": 0.6800086498260498, |
|
"eval_logits/chosen": -2.8477256298065186, |
|
"eval_logits/rejected": -2.802640676498413, |
|
"eval_logps/chosen": -277.2366027832031, |
|
"eval_logps/rejected": -253.92018127441406, |
|
"eval_loss": 0.687153697013855, |
|
"eval_positive_losses": 0.05479403957724571, |
|
"eval_rewards/accuracies": 0.682539701461792, |
|
"eval_rewards/chosen": 0.0798461064696312, |
|
"eval_rewards/margins": 0.0272214412689209, |
|
"eval_rewards/margins_max": 0.08652784675359726, |
|
"eval_rewards/margins_min": -0.029835335910320282, |
|
"eval_rewards/margins_std": 0.05211076885461807, |
|
"eval_rewards/rejected": 0.0526246652007103, |
|
"eval_runtime": 281.0737, |
|
"eval_samples_per_second": 7.116, |
|
"eval_steps_per_second": 0.224, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_losses": 0.6692663431167603, |
|
"epoch": 2.16, |
|
"grad_norm": 1.7167790990262888, |
|
"learning_rate": 1.1073995951625664e-07, |
|
"logits/chosen": -2.9118266105651855, |
|
"logits/rejected": -2.8686835765838623, |
|
"logps/chosen": -400.4477844238281, |
|
"logps/rejected": -264.7125549316406, |
|
"loss": 0.677, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.10832663625478745, |
|
"rewards/margins": 0.0492747537791729, |
|
"rewards/margins_max": 0.08400574326515198, |
|
"rewards/margins_min": 0.014543762430548668, |
|
"rewards/margins_std": 0.049117036163806915, |
|
"rewards/rejected": 0.05905189365148544, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_losses": 0.670403778553009, |
|
"epoch": 2.19, |
|
"grad_norm": 1.8644942647733735, |
|
"learning_rate": 1.0348334229922676e-07, |
|
"logits/chosen": -2.9610977172851562, |
|
"logits/rejected": -2.86533784866333, |
|
"logps/chosen": -368.39862060546875, |
|
"logps/rejected": -269.9642028808594, |
|
"loss": 0.6754, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.08919496089220047, |
|
"rewards/margins": 0.04688384756445885, |
|
"rewards/margins_max": 0.08247358351945877, |
|
"rewards/margins_min": 0.01129411906003952, |
|
"rewards/margins_std": 0.0503314845263958, |
|
"rewards/rejected": 0.04231110215187073, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_losses": 0.6769901514053345, |
|
"epoch": 2.22, |
|
"grad_norm": 2.180859431848076, |
|
"learning_rate": 9.6410089692021e-08, |
|
"logits/chosen": -2.9250197410583496, |
|
"logits/rejected": -2.9387307167053223, |
|
"logps/chosen": -361.2002868652344, |
|
"logps/rejected": -294.02313232421875, |
|
"loss": 0.6754, |
|
"positive_losses": 0.214344784617424, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.07332402467727661, |
|
"rewards/margins": 0.033169932663440704, |
|
"rewards/margins_max": 0.05973232537508011, |
|
"rewards/margins_min": 0.0066075399518013, |
|
"rewards/margins_std": 0.037564896047115326, |
|
"rewards/rejected": 0.04015409201383591, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_losses": 0.6715595126152039, |
|
"epoch": 2.25, |
|
"grad_norm": 2.04722557268885, |
|
"learning_rate": 8.95290538226148e-08, |
|
"logits/chosen": -2.7215945720672607, |
|
"logits/rejected": -2.681288957595825, |
|
"logps/chosen": -238.29833984375, |
|
"logps/rejected": -157.76754760742188, |
|
"loss": 0.6755, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.08287638425827026, |
|
"rewards/margins": 0.04476654529571533, |
|
"rewards/margins_max": 0.07331255823373795, |
|
"rewards/margins_min": 0.016220541670918465, |
|
"rewards/margins_std": 0.040370143949985504, |
|
"rewards/rejected": 0.03810984641313553, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_losses": 0.6766847372055054, |
|
"epoch": 2.28, |
|
"grad_norm": 7.254140526558107, |
|
"learning_rate": 8.284884626103164e-08, |
|
"logits/chosen": -2.8504419326782227, |
|
"logits/rejected": -2.817267656326294, |
|
"logps/chosen": -230.2511749267578, |
|
"logps/rejected": -262.9688415527344, |
|
"loss": 0.6787, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.07979516685009003, |
|
"rewards/margins": 0.03387243300676346, |
|
"rewards/margins_max": 0.05933348461985588, |
|
"rewards/margins_min": 0.008411377668380737, |
|
"rewards/margins_std": 0.036007367074489594, |
|
"rewards/rejected": 0.04592275246977806, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_losses": 0.6724633574485779, |
|
"epoch": 2.31, |
|
"grad_norm": 2.1195303141123993, |
|
"learning_rate": 7.637782724202072e-08, |
|
"logits/chosen": -2.8214287757873535, |
|
"logits/rejected": -2.8338170051574707, |
|
"logps/chosen": -247.1202850341797, |
|
"logps/rejected": -217.5040740966797, |
|
"loss": 0.684, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.07794385403394699, |
|
"rewards/margins": 0.04256601259112358, |
|
"rewards/margins_max": 0.07706017792224884, |
|
"rewards/margins_min": 0.008071859367191792, |
|
"rewards/margins_std": 0.0487821027636528, |
|
"rewards/rejected": 0.03537784516811371, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_losses": 0.6884227991104126, |
|
"epoch": 2.34, |
|
"grad_norm": 6.591694443648593, |
|
"learning_rate": 7.01240952022801e-08, |
|
"logits/chosen": -2.8677477836608887, |
|
"logits/rejected": -2.832568883895874, |
|
"logps/chosen": -238.2762451171875, |
|
"logps/rejected": -235.57272338867188, |
|
"loss": 0.6773, |
|
"positive_losses": 0.13628844916820526, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.06362812221050262, |
|
"rewards/margins": 0.009944443590939045, |
|
"rewards/margins_max": 0.03966521471738815, |
|
"rewards/margins_min": -0.019776327535510063, |
|
"rewards/margins_std": 0.04203151911497116, |
|
"rewards/rejected": 0.053683679550886154, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_losses": 0.6836405396461487, |
|
"epoch": 2.37, |
|
"grad_norm": 2.3289966359685415, |
|
"learning_rate": 6.409547664531733e-08, |
|
"logits/chosen": -2.867485523223877, |
|
"logits/rejected": -2.867396831512451, |
|
"logps/chosen": -221.3419189453125, |
|
"logps/rejected": -208.03866577148438, |
|
"loss": 0.6768, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.06145999953150749, |
|
"rewards/margins": 0.019474489614367485, |
|
"rewards/margins_max": 0.04479853808879852, |
|
"rewards/margins_min": -0.00584955420345068, |
|
"rewards/margins_std": 0.03581360727548599, |
|
"rewards/rejected": 0.04198550805449486, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_losses": 0.6730223894119263, |
|
"epoch": 2.4, |
|
"grad_norm": 2.003874601891291, |
|
"learning_rate": 5.829951634663324e-08, |
|
"logits/chosen": -2.7790565490722656, |
|
"logits/rejected": -2.7391462326049805, |
|
"logps/chosen": -245.35067749023438, |
|
"logps/rejected": -180.18910217285156, |
|
"loss": 0.6751, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.08253587782382965, |
|
"rewards/margins": 0.04125886410474777, |
|
"rewards/margins_max": 0.0634620413184166, |
|
"rewards/margins_min": 0.019055673852562904, |
|
"rewards/margins_std": 0.03140005096793175, |
|
"rewards/rejected": 0.041277021169662476, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_losses": 0.6658504605293274, |
|
"epoch": 2.43, |
|
"grad_norm": 2.0012405839420944, |
|
"learning_rate": 5.274346791148601e-08, |
|
"logits/chosen": -2.9153144359588623, |
|
"logits/rejected": -2.84319806098938, |
|
"logps/chosen": -366.9844665527344, |
|
"logps/rejected": -288.7860412597656, |
|
"loss": 0.6778, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.08800138533115387, |
|
"rewards/margins": 0.05663619562983513, |
|
"rewards/margins_max": 0.09046684950590134, |
|
"rewards/margins_min": 0.022805536165833473, |
|
"rewards/margins_std": 0.04784378036856651, |
|
"rewards/rejected": 0.031365178525447845, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_dpo_losses": 0.6795361638069153, |
|
"eval_logits/chosen": -2.8473622798919678, |
|
"eval_logits/rejected": -2.8024210929870605, |
|
"eval_logps/chosen": -277.22216796875, |
|
"eval_logps/rejected": -254.00743103027344, |
|
"eval_loss": 0.6875322461128235, |
|
"eval_positive_losses": 0.060370072722435, |
|
"eval_rewards/accuracies": 0.682539701461792, |
|
"eval_rewards/chosen": 0.07999046891927719, |
|
"eval_rewards/margins": 0.0282380823045969, |
|
"eval_rewards/margins_max": 0.08973579108715057, |
|
"eval_rewards/margins_min": -0.030655015259981155, |
|
"eval_rewards/margins_std": 0.05399897322058678, |
|
"eval_rewards/rejected": 0.05175239220261574, |
|
"eval_runtime": 281.3076, |
|
"eval_samples_per_second": 7.11, |
|
"eval_steps_per_second": 0.224, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_losses": 0.6727338433265686, |
|
"epoch": 2.46, |
|
"grad_norm": 7.411310583604656, |
|
"learning_rate": 4.743428469705335e-08, |
|
"logits/chosen": -2.6556921005249023, |
|
"logits/rejected": -2.691716432571411, |
|
"logps/chosen": -270.0729064941406, |
|
"logps/rejected": -222.0420379638672, |
|
"loss": 0.6752, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.08230028301477432, |
|
"rewards/margins": 0.04306231066584587, |
|
"rewards/margins_max": 0.09098808467388153, |
|
"rewards/margins_min": -0.004863465670496225, |
|
"rewards/margins_std": 0.06777729094028473, |
|
"rewards/rejected": 0.03923797979950905, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_losses": 0.6766294240951538, |
|
"epoch": 2.49, |
|
"grad_norm": 2.0160351712928146, |
|
"learning_rate": 4.2378611110352707e-08, |
|
"logits/chosen": -2.8572134971618652, |
|
"logits/rejected": -2.9511802196502686, |
|
"logps/chosen": -193.8530731201172, |
|
"logps/rejected": -267.66522216796875, |
|
"loss": 0.6747, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.06872855871915817, |
|
"rewards/margins": 0.03442535921931267, |
|
"rewards/margins_max": 0.07178723812103271, |
|
"rewards/margins_min": -0.0029365241061896086, |
|
"rewards/margins_std": 0.05283767729997635, |
|
"rewards/rejected": 0.034303199499845505, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_losses": 0.6828433275222778, |
|
"epoch": 2.52, |
|
"grad_norm": 2.1498185591320773, |
|
"learning_rate": 3.75827742928107e-08, |
|
"logits/chosen": -2.9401209354400635, |
|
"logits/rejected": -2.8616251945495605, |
|
"logps/chosen": -311.86260986328125, |
|
"logps/rejected": -264.8041076660156, |
|
"loss": 0.6773, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.087630495429039, |
|
"rewards/margins": 0.022305091843008995, |
|
"rewards/margins_max": 0.07450206577777863, |
|
"rewards/margins_min": -0.029891883954405785, |
|
"rewards/margins_std": 0.0738176628947258, |
|
"rewards/rejected": 0.06532540172338486, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_losses": 0.6704001426696777, |
|
"epoch": 2.55, |
|
"grad_norm": 11.453340827604897, |
|
"learning_rate": 3.305277620188826e-08, |
|
"logits/chosen": -2.805208921432495, |
|
"logits/rejected": -2.840792179107666, |
|
"logps/chosen": -336.3541564941406, |
|
"logps/rejected": -344.42413330078125, |
|
"loss": 0.6733, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.09611042588949203, |
|
"rewards/margins": 0.04694870114326477, |
|
"rewards/margins_max": 0.08145497739315033, |
|
"rewards/margins_min": 0.012442430481314659, |
|
"rewards/margins_std": 0.04879923164844513, |
|
"rewards/rejected": 0.04916173219680786, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_losses": 0.6803882122039795, |
|
"epoch": 2.58, |
|
"grad_norm": 2.3437007644568086, |
|
"learning_rate": 2.879428609967105e-08, |
|
"logits/chosen": -2.921419620513916, |
|
"logits/rejected": -2.8526148796081543, |
|
"logps/chosen": -216.5243377685547, |
|
"logps/rejected": -294.4681091308594, |
|
"loss": 0.6739, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.07691861689090729, |
|
"rewards/margins": 0.02676830254495144, |
|
"rewards/margins_max": 0.05155067518353462, |
|
"rewards/margins_min": 0.001985931070521474, |
|
"rewards/margins_std": 0.035047560930252075, |
|
"rewards/rejected": 0.0501503124833107, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_losses": 0.679145872592926, |
|
"epoch": 2.61, |
|
"grad_norm": 2.249308373857521, |
|
"learning_rate": 2.4812633457826666e-08, |
|
"logits/chosen": -2.868685007095337, |
|
"logits/rejected": -2.853390693664551, |
|
"logps/chosen": -259.72357177734375, |
|
"logps/rejected": -335.20465087890625, |
|
"loss": 0.6762, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0714501366019249, |
|
"rewards/margins": 0.028765708208084106, |
|
"rewards/margins_max": 0.051595449447631836, |
|
"rewards/margins_min": 0.00593597162514925, |
|
"rewards/margins_std": 0.03228612244129181, |
|
"rewards/rejected": 0.04268442839384079, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_losses": 0.6712166666984558, |
|
"epoch": 2.64, |
|
"grad_norm": 1.888865132587614, |
|
"learning_rate": 2.1112801287806375e-08, |
|
"logits/chosen": -2.803732395172119, |
|
"logits/rejected": -2.8339738845825195, |
|
"logps/chosen": -336.16632080078125, |
|
"logps/rejected": -346.7503662109375, |
|
"loss": 0.6767, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.10950883477926254, |
|
"rewards/margins": 0.04584169760346413, |
|
"rewards/margins_max": 0.0953935980796814, |
|
"rewards/margins_min": -0.003710202407091856, |
|
"rewards/margins_std": 0.07007695734500885, |
|
"rewards/rejected": 0.06366713345050812, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_losses": 0.663690447807312, |
|
"epoch": 2.67, |
|
"grad_norm": 2.5335259272331125, |
|
"learning_rate": 1.7699419904640022e-08, |
|
"logits/chosen": -2.864894151687622, |
|
"logits/rejected": -2.822705030441284, |
|
"logps/chosen": -253.73703002929688, |
|
"logps/rejected": -260.36767578125, |
|
"loss": 0.6763, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.1016063317656517, |
|
"rewards/margins": 0.060955654829740524, |
|
"rewards/margins_max": 0.10850463807582855, |
|
"rewards/margins_min": 0.013406666927039623, |
|
"rewards/margins_std": 0.06724441796541214, |
|
"rewards/rejected": 0.040650688111782074, |
|
"step": 880 |
|
}, |
|
{ |
|
"dpo_losses": 0.6715737581253052, |
|
"epoch": 2.71, |
|
"grad_norm": 11.524954878911132, |
|
"learning_rate": 1.4576761132127912e-08, |
|
"logits/chosen": -3.0019936561584473, |
|
"logits/rejected": -2.8649494647979736, |
|
"logps/chosen": -330.13470458984375, |
|
"logps/rejected": -248.55508422851562, |
|
"loss": 0.674, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.07323987782001495, |
|
"rewards/margins": 0.044342849403619766, |
|
"rewards/margins_max": 0.0754455104470253, |
|
"rewards/margins_min": 0.013240176253020763, |
|
"rewards/margins_std": 0.04398581385612488, |
|
"rewards/rejected": 0.028897028416395187, |
|
"step": 890 |
|
}, |
|
{ |
|
"dpo_losses": 0.6749640107154846, |
|
"epoch": 2.74, |
|
"grad_norm": 1.8036243560449348, |
|
"learning_rate": 1.1748732956682023e-08, |
|
"logits/chosen": -2.7051098346710205, |
|
"logits/rejected": -2.655970335006714, |
|
"logps/chosen": -257.9394836425781, |
|
"logps/rejected": -181.5701446533203, |
|
"loss": 0.6739, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.07508902996778488, |
|
"rewards/margins": 0.03800603747367859, |
|
"rewards/margins_max": 0.09310616552829742, |
|
"rewards/margins_min": -0.017094094306230545, |
|
"rewards/margins_std": 0.07792335003614426, |
|
"rewards/rejected": 0.03708299994468689, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_dpo_losses": 0.679327130317688, |
|
"eval_logits/chosen": -2.8477160930633545, |
|
"eval_logits/rejected": -2.802847385406494, |
|
"eval_logps/chosen": -277.20367431640625, |
|
"eval_logps/rejected": -254.03451538085938, |
|
"eval_loss": 0.6877949833869934, |
|
"eval_positive_losses": 0.06513993442058563, |
|
"eval_rewards/accuracies": 0.670634925365448, |
|
"eval_rewards/chosen": 0.08017554879188538, |
|
"eval_rewards/margins": 0.028694164007902145, |
|
"eval_rewards/margins_max": 0.09142320603132248, |
|
"eval_rewards/margins_min": -0.03173976391553879, |
|
"eval_rewards/margins_std": 0.05502929911017418, |
|
"eval_rewards/rejected": 0.05148139223456383, |
|
"eval_runtime": 282.246, |
|
"eval_samples_per_second": 7.086, |
|
"eval_steps_per_second": 0.223, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_losses": 0.6807221174240112, |
|
"epoch": 2.77, |
|
"grad_norm": 7.587296620491664, |
|
"learning_rate": 9.218874636507556e-09, |
|
"logits/chosen": -2.8844008445739746, |
|
"logits/rejected": -2.8472609519958496, |
|
"logps/chosen": -232.85934448242188, |
|
"logps/rejected": -256.0882873535156, |
|
"loss": 0.6782, |
|
"positive_losses": 0.17994289100170135, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.07232363522052765, |
|
"rewards/margins": 0.02574983797967434, |
|
"rewards/margins_max": 0.06079038232564926, |
|
"rewards/margins_min": -0.009290714748203754, |
|
"rewards/margins_std": 0.049554821103811264, |
|
"rewards/rejected": 0.04657380282878876, |
|
"step": 910 |
|
}, |
|
{ |
|
"dpo_losses": 0.674708902835846, |
|
"epoch": 2.8, |
|
"grad_norm": 2.0209217117752267, |
|
"learning_rate": 6.9903522722450645e-09, |
|
"logits/chosen": -2.6785788536071777, |
|
"logits/rejected": -2.683105945587158, |
|
"logps/chosen": -205.6979522705078, |
|
"logps/rejected": -194.4645233154297, |
|
"loss": 0.6772, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.06881477683782578, |
|
"rewards/margins": 0.03808169811964035, |
|
"rewards/margins_max": 0.06708801537752151, |
|
"rewards/margins_min": 0.00907537154853344, |
|
"rewards/margins_std": 0.041021138429641724, |
|
"rewards/rejected": 0.030733084306120872, |
|
"step": 920 |
|
}, |
|
{ |
|
"dpo_losses": 0.6782993078231812, |
|
"epoch": 2.83, |
|
"grad_norm": 2.1553504283654608, |
|
"learning_rate": 5.065954844616721e-09, |
|
"logits/chosen": -2.8473258018493652, |
|
"logits/rejected": -2.778991222381592, |
|
"logps/chosen": -272.9423522949219, |
|
"logps/rejected": -323.91290283203125, |
|
"loss": 0.6829, |
|
"positive_losses": 0.127574160695076, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.07656483352184296, |
|
"rewards/margins": 0.03158252686262131, |
|
"rewards/margins_max": 0.08533596247434616, |
|
"rewards/margins_min": -0.022170905023813248, |
|
"rewards/margins_std": 0.07601883262395859, |
|
"rewards/rejected": 0.04498231038451195, |
|
"step": 930 |
|
}, |
|
{ |
|
"dpo_losses": 0.6669495701789856, |
|
"epoch": 2.86, |
|
"grad_norm": 1.8894258589844486, |
|
"learning_rate": 3.4480907240357904e-09, |
|
"logits/chosen": -2.886190176010132, |
|
"logits/rejected": -2.852233409881592, |
|
"logps/chosen": -237.05038452148438, |
|
"logps/rejected": -192.5042724609375, |
|
"loss": 0.6785, |
|
"positive_losses": 0.08552627265453339, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.08725001662969589, |
|
"rewards/margins": 0.05445738509297371, |
|
"rewards/margins_max": 0.08135871589183807, |
|
"rewards/margins_min": 0.027556050568819046, |
|
"rewards/margins_std": 0.03804423287510872, |
|
"rewards/rejected": 0.03279264643788338, |
|
"step": 940 |
|
}, |
|
{ |
|
"dpo_losses": 0.6740903854370117, |
|
"epoch": 2.89, |
|
"grad_norm": 7.272085291531218, |
|
"learning_rate": 2.1387846565474044e-09, |
|
"logits/chosen": -2.7927393913269043, |
|
"logits/rejected": -2.80344295501709, |
|
"logps/chosen": -295.1185607910156, |
|
"logps/rejected": -264.1958312988281, |
|
"loss": 0.6773, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.09159694612026215, |
|
"rewards/margins": 0.039398252964019775, |
|
"rewards/margins_max": 0.07913588732481003, |
|
"rewards/margins_min": -0.00033939722925424576, |
|
"rewards/margins_std": 0.056197524070739746, |
|
"rewards/rejected": 0.05219869688153267, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_losses": 0.676175594329834, |
|
"epoch": 2.92, |
|
"grad_norm": 2.0704055477321535, |
|
"learning_rate": 1.1396752298723499e-09, |
|
"logits/chosen": -2.6965384483337402, |
|
"logits/rejected": -2.7728757858276367, |
|
"logps/chosen": -243.5664825439453, |
|
"logps/rejected": -246.42904663085938, |
|
"loss": 0.6762, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.07029319554567337, |
|
"rewards/margins": 0.03469633311033249, |
|
"rewards/margins_max": 0.0634746104478836, |
|
"rewards/margins_min": 0.005918048322200775, |
|
"rewards/margins_std": 0.040698640048503876, |
|
"rewards/rejected": 0.03559685871005058, |
|
"step": 960 |
|
}, |
|
{ |
|
"dpo_losses": 0.6741344332695007, |
|
"epoch": 2.95, |
|
"grad_norm": 4.375655860786601, |
|
"learning_rate": 4.520128227257225e-10, |
|
"logits/chosen": -2.9053943157196045, |
|
"logits/rejected": -2.903526782989502, |
|
"logps/chosen": -324.132568359375, |
|
"logps/rejected": -269.60394287109375, |
|
"loss": 0.6787, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.10518592596054077, |
|
"rewards/margins": 0.04040554165840149, |
|
"rewards/margins_max": 0.09129390865564346, |
|
"rewards/margins_min": -0.010482819750905037, |
|
"rewards/margins_std": 0.07196702063083649, |
|
"rewards/rejected": 0.06478038430213928, |
|
"step": 970 |
|
}, |
|
{ |
|
"dpo_losses": 0.6782342791557312, |
|
"epoch": 2.98, |
|
"grad_norm": 2.102961531885413, |
|
"learning_rate": 7.665803997633502e-11, |
|
"logits/chosen": -2.785641670227051, |
|
"logits/rejected": -2.674614667892456, |
|
"logps/chosen": -221.5739288330078, |
|
"logps/rejected": -235.2669677734375, |
|
"loss": 0.6753, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.06868946552276611, |
|
"rewards/margins": 0.030555952340364456, |
|
"rewards/margins_max": 0.06132732704281807, |
|
"rewards/margins_min": -0.00021542570902965963, |
|
"rewards/margins_std": 0.04351729154586792, |
|
"rewards/rejected": 0.03813350945711136, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 987, |
|
"total_flos": 0.0, |
|
"train_loss": 0.684076373458754, |
|
"train_runtime": 8695.656, |
|
"train_samples_per_second": 1.816, |
|
"train_steps_per_second": 0.114 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 987, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|