|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9981298423724285, |
|
"eval_steps": 500, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0021373230029388193, |
|
"grad_norm": 2.51713228225708, |
|
"learning_rate": 1.0638297872340425e-08, |
|
"logits/chosen": -1.1381689310073853, |
|
"logits/rejected": -0.9913416504859924, |
|
"logps/chosen": -0.2839311957359314, |
|
"logps/rejected": -0.2955534756183624, |
|
"loss": 1.6097, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5678623914718628, |
|
"rewards/margins": 0.023244591429829597, |
|
"rewards/rejected": -0.5911069512367249, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004274646005877639, |
|
"grad_norm": 6.541850566864014, |
|
"learning_rate": 2.127659574468085e-08, |
|
"logits/chosen": -1.0311710834503174, |
|
"logits/rejected": -0.8901023864746094, |
|
"logps/chosen": -0.24952735006809235, |
|
"logps/rejected": -0.24253402650356293, |
|
"loss": 1.6096, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4990547001361847, |
|
"rewards/margins": -0.013986671343445778, |
|
"rewards/rejected": -0.48506805300712585, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006411969008816457, |
|
"grad_norm": 5.6596479415893555, |
|
"learning_rate": 3.191489361702127e-08, |
|
"logits/chosen": -0.9279628992080688, |
|
"logits/rejected": -0.8305555582046509, |
|
"logps/chosen": -0.2633163630962372, |
|
"logps/rejected": -0.26702702045440674, |
|
"loss": 1.6174, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5266327261924744, |
|
"rewards/margins": 0.007421246729791164, |
|
"rewards/rejected": -0.5340540409088135, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008549292011755277, |
|
"grad_norm": 3.8121635913848877, |
|
"learning_rate": 4.25531914893617e-08, |
|
"logits/chosen": -0.8504582047462463, |
|
"logits/rejected": -0.7527742981910706, |
|
"logps/chosen": -0.2771408259868622, |
|
"logps/rejected": -0.26471394300460815, |
|
"loss": 1.6393, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5542816519737244, |
|
"rewards/margins": -0.024853792041540146, |
|
"rewards/rejected": -0.5294278860092163, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010686615014694095, |
|
"grad_norm": 6.048301696777344, |
|
"learning_rate": 5.3191489361702123e-08, |
|
"logits/chosen": -1.156632661819458, |
|
"logits/rejected": -1.2128832340240479, |
|
"logps/chosen": -0.28773820400238037, |
|
"logps/rejected": -0.29937219619750977, |
|
"loss": 1.6108, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5754764080047607, |
|
"rewards/margins": 0.023267941549420357, |
|
"rewards/rejected": -0.5987443923950195, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012823938017632914, |
|
"grad_norm": 3.6442198753356934, |
|
"learning_rate": 6.382978723404254e-08, |
|
"logits/chosen": -1.0647015571594238, |
|
"logits/rejected": -1.031942367553711, |
|
"logps/chosen": -0.25931063294410706, |
|
"logps/rejected": -0.28003033995628357, |
|
"loss": 1.6079, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5186212658882141, |
|
"rewards/margins": 0.04143940657377243, |
|
"rewards/rejected": -0.5600606799125671, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014961261020571734, |
|
"grad_norm": 5.595146656036377, |
|
"learning_rate": 7.446808510638298e-08, |
|
"logits/chosen": -0.7785481810569763, |
|
"logits/rejected": -0.7654089331626892, |
|
"logps/chosen": -0.25532105565071106, |
|
"logps/rejected": -0.24814245104789734, |
|
"loss": 1.6092, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5106421113014221, |
|
"rewards/margins": -0.01435722503811121, |
|
"rewards/rejected": -0.4962849020957947, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.017098584023510555, |
|
"grad_norm": 2.9471020698547363, |
|
"learning_rate": 8.51063829787234e-08, |
|
"logits/chosen": -1.0282069444656372, |
|
"logits/rejected": -1.0483824014663696, |
|
"logps/chosen": -0.24546000361442566, |
|
"logps/rejected": -0.2658500373363495, |
|
"loss": 1.5902, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4909200072288513, |
|
"rewards/margins": 0.04078003019094467, |
|
"rewards/rejected": -0.531700074672699, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01923590702644937, |
|
"grad_norm": 3.132836103439331, |
|
"learning_rate": 9.574468085106382e-08, |
|
"logits/chosen": -0.9889479875564575, |
|
"logits/rejected": -0.8638209104537964, |
|
"logps/chosen": -0.27614107728004456, |
|
"logps/rejected": -0.2566734254360199, |
|
"loss": 1.6173, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.5522821545600891, |
|
"rewards/margins": -0.03893527761101723, |
|
"rewards/rejected": -0.5133468508720398, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02137323002938819, |
|
"grad_norm": 5.624292850494385, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -1.0719839334487915, |
|
"logits/rejected": -1.0015329122543335, |
|
"logps/chosen": -0.32535240054130554, |
|
"logps/rejected": -0.31745338439941406, |
|
"loss": 1.6211, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.6507048010826111, |
|
"rewards/margins": -0.015798063948750496, |
|
"rewards/rejected": -0.6349067687988281, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02351055303232701, |
|
"grad_norm": 5.1507039070129395, |
|
"learning_rate": 1.1702127659574468e-07, |
|
"logits/chosen": -0.9715439677238464, |
|
"logits/rejected": -0.8908199071884155, |
|
"logps/chosen": -0.2835432291030884, |
|
"logps/rejected": -0.2507440745830536, |
|
"loss": 1.612, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5670864582061768, |
|
"rewards/margins": -0.0655982717871666, |
|
"rewards/rejected": -0.5014881491661072, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02564787603526583, |
|
"grad_norm": 2.2926666736602783, |
|
"learning_rate": 1.2765957446808508e-07, |
|
"logits/chosen": -0.9799962639808655, |
|
"logits/rejected": -1.0184035301208496, |
|
"logps/chosen": -0.29446908831596375, |
|
"logps/rejected": -0.26765191555023193, |
|
"loss": 1.6202, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5889381766319275, |
|
"rewards/margins": -0.05363432317972183, |
|
"rewards/rejected": -0.5353038311004639, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.027785199038204648, |
|
"grad_norm": 5.308409690856934, |
|
"learning_rate": 1.3829787234042553e-07, |
|
"logits/chosen": -0.8681848049163818, |
|
"logits/rejected": -0.8799771070480347, |
|
"logps/chosen": -0.3181426227092743, |
|
"logps/rejected": -0.3121987581253052, |
|
"loss": 1.6031, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6362852454185486, |
|
"rewards/margins": -0.011887717992067337, |
|
"rewards/rejected": -0.6243975162506104, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.029922522041143467, |
|
"grad_norm": 4.573068618774414, |
|
"learning_rate": 1.4893617021276595e-07, |
|
"logits/chosen": -0.8867932558059692, |
|
"logits/rejected": -0.861649751663208, |
|
"logps/chosen": -0.312772661447525, |
|
"logps/rejected": -0.29462364315986633, |
|
"loss": 1.6226, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.62554532289505, |
|
"rewards/margins": -0.03629804030060768, |
|
"rewards/rejected": -0.5892472863197327, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03205984504408229, |
|
"grad_norm": 4.1025872230529785, |
|
"learning_rate": 1.5957446808510638e-07, |
|
"logits/chosen": -1.1116752624511719, |
|
"logits/rejected": -0.9415389895439148, |
|
"logps/chosen": -0.27133169770240784, |
|
"logps/rejected": -0.29030919075012207, |
|
"loss": 1.5818, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5426633954048157, |
|
"rewards/margins": 0.037955012172460556, |
|
"rewards/rejected": -0.5806183815002441, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03419716804702111, |
|
"grad_norm": 3.307173728942871, |
|
"learning_rate": 1.702127659574468e-07, |
|
"logits/chosen": -0.9105625152587891, |
|
"logits/rejected": -0.8872620463371277, |
|
"logps/chosen": -0.2662544846534729, |
|
"logps/rejected": -0.28296971321105957, |
|
"loss": 1.6112, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5325089693069458, |
|
"rewards/margins": 0.03343046456575394, |
|
"rewards/rejected": -0.5659394264221191, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03633449104995993, |
|
"grad_norm": 6.173768997192383, |
|
"learning_rate": 1.8085106382978725e-07, |
|
"logits/chosen": -0.7553848028182983, |
|
"logits/rejected": -0.7946615815162659, |
|
"logps/chosen": -0.277927964925766, |
|
"logps/rejected": -0.28916075825691223, |
|
"loss": 1.5928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.555855929851532, |
|
"rewards/margins": 0.02246551401913166, |
|
"rewards/rejected": -0.5783215165138245, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03847181405289874, |
|
"grad_norm": 3.708397626876831, |
|
"learning_rate": 1.9148936170212765e-07, |
|
"logits/chosen": -1.0742344856262207, |
|
"logits/rejected": -1.1560362577438354, |
|
"logps/chosen": -0.2530558407306671, |
|
"logps/rejected": -0.2565101981163025, |
|
"loss": 1.6245, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5061116814613342, |
|
"rewards/margins": 0.006908770650625229, |
|
"rewards/rejected": -0.513020396232605, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04060913705583756, |
|
"grad_norm": 4.8654351234436035, |
|
"learning_rate": 2.0212765957446807e-07, |
|
"logits/chosen": -1.1306225061416626, |
|
"logits/rejected": -1.0444625616073608, |
|
"logps/chosen": -0.2724864184856415, |
|
"logps/rejected": -0.2817416787147522, |
|
"loss": 1.6247, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.544972836971283, |
|
"rewards/margins": 0.018510470166802406, |
|
"rewards/rejected": -0.5634833574295044, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04274646005877638, |
|
"grad_norm": 3.5271363258361816, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -1.0348137617111206, |
|
"logits/rejected": -1.0212081670761108, |
|
"logps/chosen": -0.2397567480802536, |
|
"logps/rejected": -0.23578569293022156, |
|
"loss": 1.6172, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4795134961605072, |
|
"rewards/margins": -0.007942091673612595, |
|
"rewards/rejected": -0.4715713858604431, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0448837830617152, |
|
"grad_norm": 7.901147842407227, |
|
"learning_rate": 2.2340425531914892e-07, |
|
"logits/chosen": -1.1679033041000366, |
|
"logits/rejected": -1.0415174961090088, |
|
"logps/chosen": -0.33534738421440125, |
|
"logps/rejected": -0.27388396859169006, |
|
"loss": 1.6502, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.6706947684288025, |
|
"rewards/margins": -0.12292689830064774, |
|
"rewards/rejected": -0.5477679371833801, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04702110606465402, |
|
"grad_norm": 2.3991823196411133, |
|
"learning_rate": 2.3404255319148937e-07, |
|
"logits/chosen": -1.0736172199249268, |
|
"logits/rejected": -1.0771551132202148, |
|
"logps/chosen": -0.2646552622318268, |
|
"logps/rejected": -0.2733539938926697, |
|
"loss": 1.6048, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5293105244636536, |
|
"rewards/margins": 0.01739754155278206, |
|
"rewards/rejected": -0.5467079877853394, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04915842906759284, |
|
"grad_norm": 4.812252998352051, |
|
"learning_rate": 2.4468085106382976e-07, |
|
"logits/chosen": -0.8147614002227783, |
|
"logits/rejected": -0.9166449904441833, |
|
"logps/chosen": -0.28619590401649475, |
|
"logps/rejected": -0.2908383309841156, |
|
"loss": 1.5764, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5723918080329895, |
|
"rewards/margins": 0.009284832514822483, |
|
"rewards/rejected": -0.5816766619682312, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05129575207053166, |
|
"grad_norm": 5.214301109313965, |
|
"learning_rate": 2.5531914893617016e-07, |
|
"logits/chosen": -1.0316184759140015, |
|
"logits/rejected": -1.0412724018096924, |
|
"logps/chosen": -0.23989242315292358, |
|
"logps/rejected": -0.26728230714797974, |
|
"loss": 1.602, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.47978484630584717, |
|
"rewards/margins": 0.0547797717154026, |
|
"rewards/rejected": -0.5345646142959595, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.053433075073470476, |
|
"grad_norm": 3.372835636138916, |
|
"learning_rate": 2.659574468085106e-07, |
|
"logits/chosen": -1.0795375108718872, |
|
"logits/rejected": -0.9741866588592529, |
|
"logps/chosen": -0.28838473558425903, |
|
"logps/rejected": -0.32610005140304565, |
|
"loss": 1.6016, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5767694711685181, |
|
"rewards/margins": 0.07543070614337921, |
|
"rewards/rejected": -0.6522001028060913, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.055570398076409296, |
|
"grad_norm": 3.9052999019622803, |
|
"learning_rate": 2.7659574468085106e-07, |
|
"logits/chosen": -1.2568001747131348, |
|
"logits/rejected": -1.1107139587402344, |
|
"logps/chosen": -0.30466389656066895, |
|
"logps/rejected": -0.2980763614177704, |
|
"loss": 1.6209, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6093277931213379, |
|
"rewards/margins": -0.0131750563159585, |
|
"rewards/rejected": -0.5961527228355408, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.057707721079348115, |
|
"grad_norm": 3.9069981575012207, |
|
"learning_rate": 2.872340425531915e-07, |
|
"logits/chosen": -1.0098018646240234, |
|
"logits/rejected": -0.9794459342956543, |
|
"logps/chosen": -0.2699134051799774, |
|
"logps/rejected": -0.28315117955207825, |
|
"loss": 1.6203, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5398268103599548, |
|
"rewards/margins": 0.026475582271814346, |
|
"rewards/rejected": -0.5663023591041565, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.059845044082286934, |
|
"grad_norm": 4.644921779632568, |
|
"learning_rate": 2.978723404255319e-07, |
|
"logits/chosen": -0.8839479088783264, |
|
"logits/rejected": -0.9320971965789795, |
|
"logps/chosen": -0.2668587565422058, |
|
"logps/rejected": -0.27507418394088745, |
|
"loss": 1.6176, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5337175130844116, |
|
"rewards/margins": 0.016430813819169998, |
|
"rewards/rejected": -0.5501483678817749, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.061982367085225754, |
|
"grad_norm": 3.2341363430023193, |
|
"learning_rate": 3.085106382978723e-07, |
|
"logits/chosen": -1.0859841108322144, |
|
"logits/rejected": -1.0080296993255615, |
|
"logps/chosen": -0.2636515498161316, |
|
"logps/rejected": -0.2644122838973999, |
|
"loss": 1.6185, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5273030996322632, |
|
"rewards/margins": 0.0015215259045362473, |
|
"rewards/rejected": -0.5288245677947998, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06411969008816458, |
|
"grad_norm": 5.580157279968262, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -1.170966386795044, |
|
"logits/rejected": -0.9350689053535461, |
|
"logps/chosen": -0.2749802768230438, |
|
"logps/rejected": -0.2526704668998718, |
|
"loss": 1.6164, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.5499605536460876, |
|
"rewards/margins": -0.04461963474750519, |
|
"rewards/rejected": -0.5053409337997437, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06625701309110339, |
|
"grad_norm": 4.681908130645752, |
|
"learning_rate": 3.2978723404255315e-07, |
|
"logits/chosen": -1.0664238929748535, |
|
"logits/rejected": -0.9249334335327148, |
|
"logps/chosen": -0.26851335167884827, |
|
"logps/rejected": -0.3246592581272125, |
|
"loss": 1.5989, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5370267033576965, |
|
"rewards/margins": 0.11229176819324493, |
|
"rewards/rejected": -0.649318516254425, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06839433609404222, |
|
"grad_norm": 7.798113822937012, |
|
"learning_rate": 3.404255319148936e-07, |
|
"logits/chosen": -0.8868415951728821, |
|
"logits/rejected": -0.8269252777099609, |
|
"logps/chosen": -0.26608309149742126, |
|
"logps/rejected": -0.29178884625434875, |
|
"loss": 1.6, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5321661829948425, |
|
"rewards/margins": 0.051411453634500504, |
|
"rewards/rejected": -0.5835776925086975, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07053165909698103, |
|
"grad_norm": 3.8565964698791504, |
|
"learning_rate": 3.5106382978723405e-07, |
|
"logits/chosen": -1.075560450553894, |
|
"logits/rejected": -0.9206546545028687, |
|
"logps/chosen": -0.3033750355243683, |
|
"logps/rejected": -0.2647935748100281, |
|
"loss": 1.6255, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.6067500710487366, |
|
"rewards/margins": -0.07716288417577744, |
|
"rewards/rejected": -0.5295871496200562, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07266898209991986, |
|
"grad_norm": 4.738920211791992, |
|
"learning_rate": 3.617021276595745e-07, |
|
"logits/chosen": -1.0078967809677124, |
|
"logits/rejected": -0.9841946363449097, |
|
"logps/chosen": -0.29649823904037476, |
|
"logps/rejected": -0.3331226706504822, |
|
"loss": 1.6, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5929964780807495, |
|
"rewards/margins": 0.07324886322021484, |
|
"rewards/rejected": -0.6662453413009644, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07480630510285867, |
|
"grad_norm": 15.601210594177246, |
|
"learning_rate": 3.7234042553191484e-07, |
|
"logits/chosen": -1.0271260738372803, |
|
"logits/rejected": -1.0070686340332031, |
|
"logps/chosen": -0.2500755488872528, |
|
"logps/rejected": -0.2826491892337799, |
|
"loss": 1.6131, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5001510977745056, |
|
"rewards/margins": 0.06514722108840942, |
|
"rewards/rejected": -0.5652983784675598, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07694362810579748, |
|
"grad_norm": 7.088011264801025, |
|
"learning_rate": 3.829787234042553e-07, |
|
"logits/chosen": -0.7224124670028687, |
|
"logits/rejected": -0.5971524119377136, |
|
"logps/chosen": -0.2726445198059082, |
|
"logps/rejected": -0.2940409481525421, |
|
"loss": 1.603, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5452890396118164, |
|
"rewards/margins": 0.04279275983572006, |
|
"rewards/rejected": -0.5880818963050842, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07908095110873631, |
|
"grad_norm": 4.70820426940918, |
|
"learning_rate": 3.9361702127659574e-07, |
|
"logits/chosen": -0.979728102684021, |
|
"logits/rejected": -0.9153163433074951, |
|
"logps/chosen": -0.27593153715133667, |
|
"logps/rejected": -0.26201528310775757, |
|
"loss": 1.6146, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5518630743026733, |
|
"rewards/margins": -0.02783256769180298, |
|
"rewards/rejected": -0.5240305662155151, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08121827411167512, |
|
"grad_norm": 8.7505464553833, |
|
"learning_rate": 4.0425531914893614e-07, |
|
"logits/chosen": -0.8443434238433838, |
|
"logits/rejected": -0.8855568170547485, |
|
"logps/chosen": -0.29990217089653015, |
|
"logps/rejected": -0.2905019521713257, |
|
"loss": 1.6493, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5998043417930603, |
|
"rewards/margins": -0.0188005194067955, |
|
"rewards/rejected": -0.5810039043426514, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08335559711461395, |
|
"grad_norm": 5.359803676605225, |
|
"learning_rate": 4.148936170212766e-07, |
|
"logits/chosen": -1.0560702085494995, |
|
"logits/rejected": -1.1278265714645386, |
|
"logps/chosen": -0.25392618775367737, |
|
"logps/rejected": -0.2735791802406311, |
|
"loss": 1.5949, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5078523755073547, |
|
"rewards/margins": 0.039305973798036575, |
|
"rewards/rejected": -0.5471583604812622, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08549292011755276, |
|
"grad_norm": 3.1088671684265137, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -1.0592100620269775, |
|
"logits/rejected": -1.0815989971160889, |
|
"logps/chosen": -0.2885398864746094, |
|
"logps/rejected": -0.2929195761680603, |
|
"loss": 1.6321, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5770797729492188, |
|
"rewards/margins": 0.00875941477715969, |
|
"rewards/rejected": -0.5858391523361206, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08763024312049159, |
|
"grad_norm": 8.073966026306152, |
|
"learning_rate": 4.3617021276595744e-07, |
|
"logits/chosen": -1.0096590518951416, |
|
"logits/rejected": -0.8713966012001038, |
|
"logps/chosen": -0.30629193782806396, |
|
"logps/rejected": -0.33664122223854065, |
|
"loss": 1.5914, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6125838756561279, |
|
"rewards/margins": 0.06069856137037277, |
|
"rewards/rejected": -0.6732824444770813, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0897675661234304, |
|
"grad_norm": 5.209786891937256, |
|
"learning_rate": 4.4680851063829783e-07, |
|
"logits/chosen": -1.0377849340438843, |
|
"logits/rejected": -0.8914337754249573, |
|
"logps/chosen": -0.2845829427242279, |
|
"logps/rejected": -0.3244422674179077, |
|
"loss": 1.6121, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5691658854484558, |
|
"rewards/margins": 0.079718679189682, |
|
"rewards/rejected": -0.6488845348358154, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09190488912636922, |
|
"grad_norm": 4.667476177215576, |
|
"learning_rate": 4.574468085106383e-07, |
|
"logits/chosen": -0.7347361445426941, |
|
"logits/rejected": -0.7869642376899719, |
|
"logps/chosen": -0.3507947623729706, |
|
"logps/rejected": -0.27199897170066833, |
|
"loss": 1.6222, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.7015895247459412, |
|
"rewards/margins": -0.1575915366411209, |
|
"rewards/rejected": -0.5439979434013367, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09404221212930804, |
|
"grad_norm": 14.311481475830078, |
|
"learning_rate": 4.6808510638297873e-07, |
|
"logits/chosen": -0.8943421840667725, |
|
"logits/rejected": -0.836614727973938, |
|
"logps/chosen": -0.4167774021625519, |
|
"logps/rejected": -0.430794894695282, |
|
"loss": 1.597, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8335548043251038, |
|
"rewards/margins": 0.028035037219524384, |
|
"rewards/rejected": -0.861589789390564, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09617953513224686, |
|
"grad_norm": 3.08385968208313, |
|
"learning_rate": 4.787234042553192e-07, |
|
"logits/chosen": -0.9741953015327454, |
|
"logits/rejected": -0.8605018258094788, |
|
"logps/chosen": -0.2905868887901306, |
|
"logps/rejected": -0.29014959931373596, |
|
"loss": 1.6179, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5811737775802612, |
|
"rewards/margins": -0.0008745882660150528, |
|
"rewards/rejected": -0.5802991986274719, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09831685813518568, |
|
"grad_norm": 3.141914129257202, |
|
"learning_rate": 4.893617021276595e-07, |
|
"logits/chosen": -0.8467612266540527, |
|
"logits/rejected": -0.8879311084747314, |
|
"logps/chosen": -0.2710065543651581, |
|
"logps/rejected": -0.28622525930404663, |
|
"loss": 1.6098, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5420131087303162, |
|
"rewards/margins": 0.03043745458126068, |
|
"rewards/rejected": -0.5724505186080933, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1004541811381245, |
|
"grad_norm": 5.874278545379639, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.9935128688812256, |
|
"logits/rejected": -1.0635360479354858, |
|
"logps/chosen": -0.2610815465450287, |
|
"logps/rejected": -0.2970622777938843, |
|
"loss": 1.5882, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5221630930900574, |
|
"rewards/margins": 0.07196150720119476, |
|
"rewards/rejected": -0.5941245555877686, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.10259150414106331, |
|
"grad_norm": 3.674631118774414, |
|
"learning_rate": 4.999930062653174e-07, |
|
"logits/chosen": -0.7607293725013733, |
|
"logits/rejected": -0.9387491941452026, |
|
"logps/chosen": -0.30105069279670715, |
|
"logps/rejected": -0.29622718691825867, |
|
"loss": 1.6263, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6021013855934143, |
|
"rewards/margins": -0.009646959602832794, |
|
"rewards/rejected": -0.5924543738365173, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.10472882714400214, |
|
"grad_norm": 3.2993836402893066, |
|
"learning_rate": 4.999720254525684e-07, |
|
"logits/chosen": -1.041825294494629, |
|
"logits/rejected": -0.8979977965354919, |
|
"logps/chosen": -0.3147028684616089, |
|
"logps/rejected": -0.32463401556015015, |
|
"loss": 1.5836, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6294057369232178, |
|
"rewards/margins": 0.019862275570631027, |
|
"rewards/rejected": -0.6492680311203003, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.10686615014694095, |
|
"grad_norm": 3.6394598484039307, |
|
"learning_rate": 4.999370587356267e-07, |
|
"logits/chosen": -1.0319520235061646, |
|
"logits/rejected": -0.9399799108505249, |
|
"logps/chosen": -0.3198903501033783, |
|
"logps/rejected": -0.33650463819503784, |
|
"loss": 1.61, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.6397807002067566, |
|
"rewards/margins": 0.03322865813970566, |
|
"rewards/rejected": -0.6730092763900757, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10900347314987978, |
|
"grad_norm": 3.5822248458862305, |
|
"learning_rate": 4.998881080708758e-07, |
|
"logits/chosen": -0.7624353170394897, |
|
"logits/rejected": -0.7781803011894226, |
|
"logps/chosen": -0.22195202112197876, |
|
"logps/rejected": -0.2529197931289673, |
|
"loss": 1.6014, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4439040422439575, |
|
"rewards/margins": 0.061935484409332275, |
|
"rewards/rejected": -0.5058395862579346, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11114079615281859, |
|
"grad_norm": 4.502132415771484, |
|
"learning_rate": 4.998251761970996e-07, |
|
"logits/chosen": -0.934096097946167, |
|
"logits/rejected": -0.9894377589225769, |
|
"logps/chosen": -0.3010854721069336, |
|
"logps/rejected": -0.2971184551715851, |
|
"loss": 1.6238, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6021709442138672, |
|
"rewards/margins": -0.00793398916721344, |
|
"rewards/rejected": -0.5942369103431702, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11327811915575742, |
|
"grad_norm": 14.495563507080078, |
|
"learning_rate": 4.997482666353286e-07, |
|
"logits/chosen": -0.9065138101577759, |
|
"logits/rejected": -0.8083285093307495, |
|
"logps/chosen": -0.2879031002521515, |
|
"logps/rejected": -0.30471161007881165, |
|
"loss": 1.6036, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.575806200504303, |
|
"rewards/margins": 0.03361699730157852, |
|
"rewards/rejected": -0.6094232201576233, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.11541544215869623, |
|
"grad_norm": 5.210042953491211, |
|
"learning_rate": 4.996573836886434e-07, |
|
"logits/chosen": -1.012821912765503, |
|
"logits/rejected": -0.935365617275238, |
|
"logps/chosen": -0.27059802412986755, |
|
"logps/rejected": -0.28305694460868835, |
|
"loss": 1.5922, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5411960482597351, |
|
"rewards/margins": 0.024917850270867348, |
|
"rewards/rejected": -0.5661138892173767, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11755276516163506, |
|
"grad_norm": 3.4929800033569336, |
|
"learning_rate": 4.995525324419337e-07, |
|
"logits/chosen": -1.03290593624115, |
|
"logits/rejected": -0.8397963047027588, |
|
"logps/chosen": -0.23197168111801147, |
|
"logps/rejected": -0.257206529378891, |
|
"loss": 1.6012, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.46394336223602295, |
|
"rewards/margins": 0.05046967417001724, |
|
"rewards/rejected": -0.514413058757782, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11969008816457387, |
|
"grad_norm": 6.639918804168701, |
|
"learning_rate": 4.99433718761614e-07, |
|
"logits/chosen": -0.8676168918609619, |
|
"logits/rejected": -0.8751212954521179, |
|
"logps/chosen": -0.2813611626625061, |
|
"logps/rejected": -0.28943243622779846, |
|
"loss": 1.602, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5627223253250122, |
|
"rewards/margins": 0.01614254154264927, |
|
"rewards/rejected": -0.5788648724555969, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1218274111675127, |
|
"grad_norm": 3.159461736679077, |
|
"learning_rate": 4.993009492952949e-07, |
|
"logits/chosen": -0.9598115682601929, |
|
"logits/rejected": -0.9728808999061584, |
|
"logps/chosen": -0.2418256551027298, |
|
"logps/rejected": -0.27858078479766846, |
|
"loss": 1.6025, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4836513102054596, |
|
"rewards/margins": 0.07351024448871613, |
|
"rewards/rejected": -0.5571615695953369, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.12396473417045151, |
|
"grad_norm": 3.1260619163513184, |
|
"learning_rate": 4.991542314714122e-07, |
|
"logits/chosen": -1.1715333461761475, |
|
"logits/rejected": -1.0372506380081177, |
|
"logps/chosen": -0.2886565625667572, |
|
"logps/rejected": -0.3048909306526184, |
|
"loss": 1.6142, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5773131251335144, |
|
"rewards/margins": 0.03246863931417465, |
|
"rewards/rejected": -0.6097818613052368, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.12610205717339032, |
|
"grad_norm": 4.803882598876953, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": -0.8652929663658142, |
|
"logits/rejected": -0.9138813018798828, |
|
"logps/chosen": -0.22791269421577454, |
|
"logps/rejected": -0.2620168924331665, |
|
"loss": 1.5999, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4558253884315491, |
|
"rewards/margins": 0.06820837408304214, |
|
"rewards/rejected": -0.524033784866333, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12823938017632916, |
|
"grad_norm": 2.9545466899871826, |
|
"learning_rate": 4.988189843662815e-07, |
|
"logits/chosen": -0.9540647864341736, |
|
"logits/rejected": -0.9105108380317688, |
|
"logps/chosen": -0.28050848841667175, |
|
"logps/rejected": -0.2682150602340698, |
|
"loss": 1.6229, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5610169768333435, |
|
"rewards/margins": -0.024586813524365425, |
|
"rewards/rejected": -0.5364301204681396, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13037670317926797, |
|
"grad_norm": 5.4623260498046875, |
|
"learning_rate": 4.986304738420683e-07, |
|
"logits/chosen": -0.8594868779182434, |
|
"logits/rejected": -0.8749207854270935, |
|
"logps/chosen": -0.23750001192092896, |
|
"logps/rejected": -0.24768495559692383, |
|
"loss": 1.5863, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4750000238418579, |
|
"rewards/margins": 0.020369907841086388, |
|
"rewards/rejected": -0.49536991119384766, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.13251402618220678, |
|
"grad_norm": 5.195383548736572, |
|
"learning_rate": 4.984280524733107e-07, |
|
"logits/chosen": -0.8988451361656189, |
|
"logits/rejected": -1.0471916198730469, |
|
"logps/chosen": -0.2563616931438446, |
|
"logps/rejected": -0.264529824256897, |
|
"loss": 1.628, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5127233862876892, |
|
"rewards/margins": 0.01633620262145996, |
|
"rewards/rejected": -0.529059648513794, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1346513491851456, |
|
"grad_norm": 3.365633249282837, |
|
"learning_rate": 4.982117315854593e-07, |
|
"logits/chosen": -0.9563354253768921, |
|
"logits/rejected": -1.143921971321106, |
|
"logps/chosen": -0.27564752101898193, |
|
"logps/rejected": -0.2891802191734314, |
|
"loss": 1.6243, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5512950420379639, |
|
"rewards/margins": 0.02706541307270527, |
|
"rewards/rejected": -0.5783604383468628, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.13678867218808444, |
|
"grad_norm": 3.6298470497131348, |
|
"learning_rate": 4.979815232816416e-07, |
|
"logits/chosen": -0.9835873246192932, |
|
"logits/rejected": -0.8579452037811279, |
|
"logps/chosen": -0.2935434579849243, |
|
"logps/rejected": -0.26197710633277893, |
|
"loss": 1.6428, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.5870869159698486, |
|
"rewards/margins": -0.06313266605138779, |
|
"rewards/rejected": -0.5239542126655579, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13892599519102325, |
|
"grad_norm": 5.261904239654541, |
|
"learning_rate": 4.977374404419837e-07, |
|
"logits/chosen": -1.0193111896514893, |
|
"logits/rejected": -1.036008358001709, |
|
"logps/chosen": -0.27654433250427246, |
|
"logps/rejected": -0.25757479667663574, |
|
"loss": 1.5985, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.6913608908653259, |
|
"rewards/margins": -0.047423895448446274, |
|
"rewards/rejected": -0.6439369320869446, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14106331819396206, |
|
"grad_norm": 3.326939582824707, |
|
"learning_rate": 4.974794967228907e-07, |
|
"logits/chosen": -1.0054104328155518, |
|
"logits/rejected": -0.9754442572593689, |
|
"logps/chosen": -0.2905897796154022, |
|
"logps/rejected": -0.32264938950538635, |
|
"loss": 1.6248, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7264744639396667, |
|
"rewards/margins": 0.08014895021915436, |
|
"rewards/rejected": -0.8066234588623047, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.14320064119690087, |
|
"grad_norm": 5.669600486755371, |
|
"learning_rate": 4.972077065562821e-07, |
|
"logits/chosen": -0.9552958607673645, |
|
"logits/rejected": -1.0761511325836182, |
|
"logps/chosen": -0.3276459574699402, |
|
"logps/rejected": -0.32107335329055786, |
|
"loss": 1.6203, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8191148638725281, |
|
"rewards/margins": -0.01643138751387596, |
|
"rewards/rejected": -0.8026834726333618, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.14533796419983971, |
|
"grad_norm": 3.257904052734375, |
|
"learning_rate": 4.969220851487844e-07, |
|
"logits/chosen": -0.9927914142608643, |
|
"logits/rejected": -0.9472739696502686, |
|
"logps/chosen": -0.3458186686038971, |
|
"logps/rejected": -0.34241756796836853, |
|
"loss": 1.6191, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8645466566085815, |
|
"rewards/margins": -0.008502773940563202, |
|
"rewards/rejected": -0.8560439348220825, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.14747528720277853, |
|
"grad_norm": 5.560789585113525, |
|
"learning_rate": 4.966226484808803e-07, |
|
"logits/chosen": -0.9344061613082886, |
|
"logits/rejected": -0.8273663520812988, |
|
"logps/chosen": -0.2849215567111969, |
|
"logps/rejected": -0.31608855724334717, |
|
"loss": 1.6123, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7123039960861206, |
|
"rewards/margins": 0.0779174268245697, |
|
"rewards/rejected": -0.7902213335037231, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14961261020571734, |
|
"grad_norm": 3.70934796333313, |
|
"learning_rate": 4.963094133060148e-07, |
|
"logits/chosen": -0.9611161947250366, |
|
"logits/rejected": -0.8749902248382568, |
|
"logps/chosen": -0.2869144380092621, |
|
"logps/rejected": -0.23931002616882324, |
|
"loss": 1.6348, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.7172860503196716, |
|
"rewards/margins": -0.11901099979877472, |
|
"rewards/rejected": -0.5982750654220581, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15174993320865615, |
|
"grad_norm": 3.771442413330078, |
|
"learning_rate": 4.959823971496574e-07, |
|
"logits/chosen": -1.0483484268188477, |
|
"logits/rejected": -0.9827014803886414, |
|
"logps/chosen": -0.3061015009880066, |
|
"logps/rejected": -0.3094024658203125, |
|
"loss": 1.5879, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7652537822723389, |
|
"rewards/margins": 0.00825244840234518, |
|
"rewards/rejected": -0.7735061645507812, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.15388725621159496, |
|
"grad_norm": 3.6872432231903076, |
|
"learning_rate": 4.956416183083221e-07, |
|
"logits/chosen": -1.0115149021148682, |
|
"logits/rejected": -1.0020099878311157, |
|
"logps/chosen": -0.26311925053596497, |
|
"logps/rejected": -0.27171316742897034, |
|
"loss": 1.5697, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6577981114387512, |
|
"rewards/margins": 0.02148478478193283, |
|
"rewards/rejected": -0.6792829036712646, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1560245792145338, |
|
"grad_norm": 7.885510444641113, |
|
"learning_rate": 4.952870958485431e-07, |
|
"logits/chosen": -0.7439613938331604, |
|
"logits/rejected": -0.7543243169784546, |
|
"logps/chosen": -0.32277047634124756, |
|
"logps/rejected": -0.44049495458602905, |
|
"loss": 1.5719, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8069261312484741, |
|
"rewards/margins": 0.2943112254142761, |
|
"rewards/rejected": -1.1012372970581055, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.15816190221747262, |
|
"grad_norm": 14.005949020385742, |
|
"learning_rate": 4.949188496058089e-07, |
|
"logits/chosen": -0.8661502599716187, |
|
"logits/rejected": -0.9138545989990234, |
|
"logps/chosen": -0.27060988545417786, |
|
"logps/rejected": -0.25004029273986816, |
|
"loss": 1.6381, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6765246987342834, |
|
"rewards/margins": -0.05142403393983841, |
|
"rewards/rejected": -0.6251006722450256, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16029922522041143, |
|
"grad_norm": 7.343827247619629, |
|
"learning_rate": 4.945369001834514e-07, |
|
"logits/chosen": -1.07318115234375, |
|
"logits/rejected": -1.0178194046020508, |
|
"logps/chosen": -0.2654929459095001, |
|
"logps/rejected": -0.29686206579208374, |
|
"loss": 1.5458, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.6637323498725891, |
|
"rewards/margins": 0.07842274755239487, |
|
"rewards/rejected": -0.7421550750732422, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16243654822335024, |
|
"grad_norm": 4.01411247253418, |
|
"learning_rate": 4.941412689514941e-07, |
|
"logits/chosen": -1.162184238433838, |
|
"logits/rejected": -1.2236565351486206, |
|
"logps/chosen": -0.2647251486778259, |
|
"logps/rejected": -0.2977098226547241, |
|
"loss": 1.6206, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6618129014968872, |
|
"rewards/margins": 0.08246167004108429, |
|
"rewards/rejected": -0.7442746162414551, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.16457387122628908, |
|
"grad_norm": 4.315869331359863, |
|
"learning_rate": 4.937319780454559e-07, |
|
"logits/chosen": -0.8569203019142151, |
|
"logits/rejected": -0.7959333062171936, |
|
"logps/chosen": -0.29102951288223267, |
|
"logps/rejected": -0.31862419843673706, |
|
"loss": 1.5992, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.727573812007904, |
|
"rewards/margins": 0.06898671388626099, |
|
"rewards/rejected": -0.7965604662895203, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1667111942292279, |
|
"grad_norm": 6.3516645431518555, |
|
"learning_rate": 4.933090503651128e-07, |
|
"logits/chosen": -0.9815778136253357, |
|
"logits/rejected": -0.9455960988998413, |
|
"logps/chosen": -0.290622353553772, |
|
"logps/rejected": -0.25920000672340393, |
|
"loss": 1.6019, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.7265558838844299, |
|
"rewards/margins": -0.07855589687824249, |
|
"rewards/rejected": -0.6479999423027039, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1688485172321667, |
|
"grad_norm": 5.576763153076172, |
|
"learning_rate": 4.928725095732168e-07, |
|
"logits/chosen": -0.7572908401489258, |
|
"logits/rejected": -0.8643375039100647, |
|
"logps/chosen": -0.28876882791519165, |
|
"logps/rejected": -0.38018882274627686, |
|
"loss": 1.574, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.721921980381012, |
|
"rewards/margins": 0.22855007648468018, |
|
"rewards/rejected": -0.9504721164703369, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.17098584023510552, |
|
"grad_norm": 6.904773235321045, |
|
"learning_rate": 4.924223800941717e-07, |
|
"logits/chosen": -1.1600089073181152, |
|
"logits/rejected": -1.001929759979248, |
|
"logps/chosen": -0.3185364007949829, |
|
"logps/rejected": -0.2833505868911743, |
|
"loss": 1.5885, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.796341061592102, |
|
"rewards/margins": -0.08796463906764984, |
|
"rewards/rejected": -0.708376407623291, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17312316323804436, |
|
"grad_norm": 5.542295932769775, |
|
"learning_rate": 4.919586871126667e-07, |
|
"logits/chosen": -1.1290327310562134, |
|
"logits/rejected": -1.0776805877685547, |
|
"logps/chosen": -0.28904592990875244, |
|
"logps/rejected": -0.32642093300819397, |
|
"loss": 1.5823, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7226147651672363, |
|
"rewards/margins": 0.09343745559453964, |
|
"rewards/rejected": -0.816052258014679, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.17526048624098317, |
|
"grad_norm": 7.346787929534912, |
|
"learning_rate": 4.91481456572267e-07, |
|
"logits/chosen": -1.008028507232666, |
|
"logits/rejected": -0.7614388465881348, |
|
"logps/chosen": -0.276422917842865, |
|
"logps/rejected": -0.28925520181655884, |
|
"loss": 1.5471, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6910573244094849, |
|
"rewards/margins": 0.03208072483539581, |
|
"rewards/rejected": -0.7231380343437195, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.17739780924392198, |
|
"grad_norm": 3.4823226928710938, |
|
"learning_rate": 4.909907151739633e-07, |
|
"logits/chosen": -0.8054043650627136, |
|
"logits/rejected": -0.8212348222732544, |
|
"logps/chosen": -0.25493186712265015, |
|
"logps/rejected": -0.2324959635734558, |
|
"loss": 1.6075, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.637329638004303, |
|
"rewards/margins": -0.05608966201543808, |
|
"rewards/rejected": -0.5812399983406067, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1795351322468608, |
|
"grad_norm": 8.512069702148438, |
|
"learning_rate": 4.904864903746765e-07, |
|
"logits/chosen": -0.8016963601112366, |
|
"logits/rejected": -0.8472069501876831, |
|
"logps/chosen": -0.3017991781234741, |
|
"logps/rejected": -0.30026912689208984, |
|
"loss": 1.6386, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7544978857040405, |
|
"rewards/margins": -0.003825142979621887, |
|
"rewards/rejected": -0.7506727576255798, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.18167245524979964, |
|
"grad_norm": 3.27510666847229, |
|
"learning_rate": 4.899688103857222e-07, |
|
"logits/chosen": -0.9057269096374512, |
|
"logits/rejected": -0.8979475498199463, |
|
"logps/chosen": -0.24959440529346466, |
|
"logps/rejected": -0.3118637502193451, |
|
"loss": 1.5594, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.623986005783081, |
|
"rewards/margins": 0.15567341446876526, |
|
"rewards/rejected": -0.7796593904495239, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18380977825273845, |
|
"grad_norm": 3.2452337741851807, |
|
"learning_rate": 4.894377041712326e-07, |
|
"logits/chosen": -0.6997116208076477, |
|
"logits/rejected": -0.6495150327682495, |
|
"logps/chosen": -0.2519880533218384, |
|
"logps/rejected": -0.30695033073425293, |
|
"loss": 1.585, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6299701929092407, |
|
"rewards/margins": 0.137405663728714, |
|
"rewards/rejected": -0.7673758268356323, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.18594710125567726, |
|
"grad_norm": 6.7908430099487305, |
|
"learning_rate": 4.888932014465352e-07, |
|
"logits/chosen": -0.8975124359130859, |
|
"logits/rejected": -0.8113777041435242, |
|
"logps/chosen": -0.2837047576904297, |
|
"logps/rejected": -0.2963961958885193, |
|
"loss": 1.5927, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.709261953830719, |
|
"rewards/margins": 0.03172856196761131, |
|
"rewards/rejected": -0.7409905195236206, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.18808442425861607, |
|
"grad_norm": 4.965595722198486, |
|
"learning_rate": 4.883353326764906e-07, |
|
"logits/chosen": -0.8913217186927795, |
|
"logits/rejected": -0.8421756625175476, |
|
"logps/chosen": -0.25936800241470337, |
|
"logps/rejected": -0.45224201679229736, |
|
"loss": 1.5572, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.648419976234436, |
|
"rewards/margins": 0.4821849763393402, |
|
"rewards/rejected": -1.1306049823760986, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.1902217472615549, |
|
"grad_norm": 5.781017780303955, |
|
"learning_rate": 4.877641290737883e-07, |
|
"logits/chosen": -0.9931791424751282, |
|
"logits/rejected": -0.9962902665138245, |
|
"logps/chosen": -0.2539224624633789, |
|
"logps/rejected": -0.2921288311481476, |
|
"loss": 1.6042, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6348061561584473, |
|
"rewards/margins": 0.0955159068107605, |
|
"rewards/rejected": -0.730322003364563, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.19235907026449373, |
|
"grad_norm": 5.002528190612793, |
|
"learning_rate": 4.871796225971999e-07, |
|
"logits/chosen": -0.9850423336029053, |
|
"logits/rejected": -0.857207179069519, |
|
"logps/chosen": -0.27758607268333435, |
|
"logps/rejected": -0.3058236241340637, |
|
"loss": 1.5967, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6939651966094971, |
|
"rewards/margins": 0.0705939531326294, |
|
"rewards/rejected": -0.7645590901374817, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19449639326743254, |
|
"grad_norm": 5.870463848114014, |
|
"learning_rate": 4.86581845949791e-07, |
|
"logits/chosen": -0.949233889579773, |
|
"logits/rejected": -1.0075958967208862, |
|
"logps/chosen": -0.2556777596473694, |
|
"logps/rejected": -0.2905680537223816, |
|
"loss": 1.5559, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.6391944289207458, |
|
"rewards/margins": 0.08722572773694992, |
|
"rewards/rejected": -0.7264201045036316, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.19663371627037135, |
|
"grad_norm": 4.762439727783203, |
|
"learning_rate": 4.859708325770919e-07, |
|
"logits/chosen": -1.1257095336914062, |
|
"logits/rejected": -1.173663854598999, |
|
"logps/chosen": -0.28581249713897705, |
|
"logps/rejected": -0.3704802989959717, |
|
"loss": 1.5915, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7145313024520874, |
|
"rewards/margins": 0.21166956424713135, |
|
"rewards/rejected": -0.926200807094574, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1987710392733102, |
|
"grad_norm": 5.9837117195129395, |
|
"learning_rate": 4.853466166652258e-07, |
|
"logits/chosen": -0.9948515295982361, |
|
"logits/rejected": -0.9665160179138184, |
|
"logps/chosen": -0.2551361620426178, |
|
"logps/rejected": -0.28811219334602356, |
|
"loss": 1.5882, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6378403902053833, |
|
"rewards/margins": 0.08244016021490097, |
|
"rewards/rejected": -0.7202805280685425, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.200908362276249, |
|
"grad_norm": 4.58953857421875, |
|
"learning_rate": 4.847092331389964e-07, |
|
"logits/chosen": -0.7557870149612427, |
|
"logits/rejected": -0.7804038524627686, |
|
"logps/chosen": -0.26233580708503723, |
|
"logps/rejected": -0.28490206599235535, |
|
"loss": 1.6059, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6558394432067871, |
|
"rewards/margins": 0.056415725499391556, |
|
"rewards/rejected": -0.7122551798820496, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.20304568527918782, |
|
"grad_norm": 4.404232978820801, |
|
"learning_rate": 4.840587176599343e-07, |
|
"logits/chosen": -1.1708656549453735, |
|
"logits/rejected": -1.1824274063110352, |
|
"logps/chosen": -0.3498944640159607, |
|
"logps/rejected": -0.3052523732185364, |
|
"loss": 1.5518, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8747361898422241, |
|
"rewards/margins": -0.1116051897406578, |
|
"rewards/rejected": -0.7631310224533081, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.20518300828212663, |
|
"grad_norm": 2.755133628845215, |
|
"learning_rate": 4.833951066243004e-07, |
|
"logits/chosen": -0.9821409583091736, |
|
"logits/rejected": -0.9246101975440979, |
|
"logps/chosen": -0.29376041889190674, |
|
"logps/rejected": -0.2656431794166565, |
|
"loss": 1.6092, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7344010472297668, |
|
"rewards/margins": -0.07029299437999725, |
|
"rewards/rejected": -0.6641080379486084, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.20732033128506547, |
|
"grad_norm": 8.961865425109863, |
|
"learning_rate": 4.82718437161051e-07, |
|
"logits/chosen": -0.9781126976013184, |
|
"logits/rejected": -1.0274431705474854, |
|
"logps/chosen": -0.2688814699649811, |
|
"logps/rejected": -0.25695058703422546, |
|
"loss": 1.6414, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6722037196159363, |
|
"rewards/margins": -0.029827285557985306, |
|
"rewards/rejected": -0.6423764228820801, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.20945765428800428, |
|
"grad_norm": 3.496291160583496, |
|
"learning_rate": 4.820287471297597e-07, |
|
"logits/chosen": -1.110063076019287, |
|
"logits/rejected": -0.9798667430877686, |
|
"logps/chosen": -0.2772579789161682, |
|
"logps/rejected": -0.284493625164032, |
|
"loss": 1.6027, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6931449174880981, |
|
"rewards/margins": 0.01808912120759487, |
|
"rewards/rejected": -0.7112340927124023, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2115949772909431, |
|
"grad_norm": 9.36062240600586, |
|
"learning_rate": 4.813260751184992e-07, |
|
"logits/chosen": -1.0408313274383545, |
|
"logits/rejected": -0.9097151160240173, |
|
"logps/chosen": -0.2336195558309555, |
|
"logps/rejected": -0.28545060753822327, |
|
"loss": 1.5888, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5840489268302917, |
|
"rewards/margins": 0.12957759201526642, |
|
"rewards/rejected": -0.7136265635490417, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2137323002938819, |
|
"grad_norm": 4.372857570648193, |
|
"learning_rate": 4.806104604416823e-07, |
|
"logits/chosen": -1.1981866359710693, |
|
"logits/rejected": -1.1812773942947388, |
|
"logps/chosen": -0.40545234084129333, |
|
"logps/rejected": -0.32747963070869446, |
|
"loss": 1.6366, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0136308670043945, |
|
"rewards/margins": -0.194931760430336, |
|
"rewards/rejected": -0.8186991214752197, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21586962329682075, |
|
"grad_norm": 6.457290172576904, |
|
"learning_rate": 4.798819431378626e-07, |
|
"logits/chosen": -0.9583615064620972, |
|
"logits/rejected": -0.9292630553245544, |
|
"logps/chosen": -0.2667827904224396, |
|
"logps/rejected": -0.3141520917415619, |
|
"loss": 1.5717, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6669570207595825, |
|
"rewards/margins": 0.11842326819896698, |
|
"rewards/rejected": -0.7853802442550659, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.21800694629975956, |
|
"grad_norm": 4.259753704071045, |
|
"learning_rate": 4.79140563967494e-07, |
|
"logits/chosen": -0.9554131031036377, |
|
"logits/rejected": -0.9235316514968872, |
|
"logps/chosen": -0.2790910005569458, |
|
"logps/rejected": -0.29358065128326416, |
|
"loss": 1.5956, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6977274417877197, |
|
"rewards/margins": 0.03622422739863396, |
|
"rewards/rejected": -0.7339516878128052, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.22014426930269837, |
|
"grad_norm": 7.2334675788879395, |
|
"learning_rate": 4.783863644106502e-07, |
|
"logits/chosen": -0.958928108215332, |
|
"logits/rejected": -0.9119776487350464, |
|
"logps/chosen": -0.2611943483352661, |
|
"logps/rejected": -0.29755640029907227, |
|
"loss": 1.5837, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6529859304428101, |
|
"rewards/margins": 0.09090512990951538, |
|
"rewards/rejected": -0.7438910007476807, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.22228159230563718, |
|
"grad_norm": 3.2508132457733154, |
|
"learning_rate": 4.776193866647039e-07, |
|
"logits/chosen": -1.073838710784912, |
|
"logits/rejected": -0.9083616733551025, |
|
"logps/chosen": -0.2882213294506073, |
|
"logps/rejected": -0.2734883427619934, |
|
"loss": 1.606, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7205533385276794, |
|
"rewards/margins": -0.03683248162269592, |
|
"rewards/rejected": -0.6837208867073059, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.224418915308576, |
|
"grad_norm": 5.259939193725586, |
|
"learning_rate": 4.768396736419662e-07, |
|
"logits/chosen": -0.9633040428161621, |
|
"logits/rejected": -0.9958257675170898, |
|
"logps/chosen": -0.2812567949295044, |
|
"logps/rejected": -0.3445313572883606, |
|
"loss": 1.6165, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.703141987323761, |
|
"rewards/margins": 0.1581864058971405, |
|
"rewards/rejected": -0.8613283634185791, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.22655623831151483, |
|
"grad_norm": 4.128396511077881, |
|
"learning_rate": 4.7604726896728496e-07, |
|
"logits/chosen": -0.898779571056366, |
|
"logits/rejected": -0.8008460998535156, |
|
"logps/chosen": -0.3449317216873169, |
|
"logps/rejected": -0.3174844980239868, |
|
"loss": 1.581, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.862329363822937, |
|
"rewards/margins": -0.06861816346645355, |
|
"rewards/rejected": -0.793711245059967, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.22869356131445365, |
|
"grad_norm": 7.073979377746582, |
|
"learning_rate": 4.752422169756047e-07, |
|
"logits/chosen": -0.7736971378326416, |
|
"logits/rejected": -0.7141239643096924, |
|
"logps/chosen": -0.27883169054985046, |
|
"logps/rejected": -0.28934141993522644, |
|
"loss": 1.6038, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6970791816711426, |
|
"rewards/margins": 0.02627432905137539, |
|
"rewards/rejected": -0.7233536243438721, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.23083088431739246, |
|
"grad_norm": 3.607881784439087, |
|
"learning_rate": 4.744245627094858e-07, |
|
"logits/chosen": -0.7748329043388367, |
|
"logits/rejected": -0.7313745021820068, |
|
"logps/chosen": -0.3115028142929077, |
|
"logps/rejected": -0.3770483732223511, |
|
"loss": 1.6484, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7787570357322693, |
|
"rewards/margins": 0.1638639271259308, |
|
"rewards/rejected": -0.9426208734512329, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.23296820732033127, |
|
"grad_norm": 6.025390148162842, |
|
"learning_rate": 4.735943519165842e-07, |
|
"logits/chosen": -0.8779905438423157, |
|
"logits/rejected": -0.9295673966407776, |
|
"logps/chosen": -0.2856750786304474, |
|
"logps/rejected": -0.3149481415748596, |
|
"loss": 1.6163, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7141878008842468, |
|
"rewards/margins": 0.07318252325057983, |
|
"rewards/rejected": -0.7873702645301819, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2351055303232701, |
|
"grad_norm": 10.654799461364746, |
|
"learning_rate": 4.7275163104709194e-07, |
|
"logits/chosen": -1.139617681503296, |
|
"logits/rejected": -1.037335753440857, |
|
"logps/chosen": -0.3125270903110504, |
|
"logps/rejected": -0.42321839928627014, |
|
"loss": 1.5969, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7813177704811096, |
|
"rewards/margins": 0.27672821283340454, |
|
"rewards/rejected": -1.0580458641052246, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.23724285332620892, |
|
"grad_norm": 3.1225905418395996, |
|
"learning_rate": 4.718964472511385e-07, |
|
"logits/chosen": -0.7755342125892639, |
|
"logits/rejected": -0.9119763374328613, |
|
"logps/chosen": -0.26263684034347534, |
|
"logps/rejected": -0.2584255635738373, |
|
"loss": 1.6006, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6565921306610107, |
|
"rewards/margins": -0.010528111830353737, |
|
"rewards/rejected": -0.6460639834403992, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.23938017632914774, |
|
"grad_norm": 3.907759189605713, |
|
"learning_rate": 4.710288483761524e-07, |
|
"logits/chosen": -0.805738091468811, |
|
"logits/rejected": -0.8327180743217468, |
|
"logps/chosen": -0.26873674988746643, |
|
"logps/rejected": -0.27950698137283325, |
|
"loss": 1.5569, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6718418598175049, |
|
"rewards/margins": 0.02692551538348198, |
|
"rewards/rejected": -0.6987674236297607, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.24151749933208655, |
|
"grad_norm": 5.125892162322998, |
|
"learning_rate": 4.7014888296418447e-07, |
|
"logits/chosen": -0.8660019040107727, |
|
"logits/rejected": -0.7626081109046936, |
|
"logps/chosen": -0.27202802896499634, |
|
"logps/rejected": -0.3179852068424225, |
|
"loss": 1.5133, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.680070161819458, |
|
"rewards/margins": 0.11489284038543701, |
|
"rewards/rejected": -0.7949629426002502, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2436548223350254, |
|
"grad_norm": 3.331281900405884, |
|
"learning_rate": 4.692566002491916e-07, |
|
"logits/chosen": -0.9860325455665588, |
|
"logits/rejected": -1.0227984189987183, |
|
"logps/chosen": -0.277464359998703, |
|
"logps/rejected": -0.3393504023551941, |
|
"loss": 1.5764, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6936609745025635, |
|
"rewards/margins": 0.15471497178077698, |
|
"rewards/rejected": -0.8483759164810181, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2457921453379642, |
|
"grad_norm": 4.2767205238342285, |
|
"learning_rate": 4.683520501542824e-07, |
|
"logits/chosen": -1.1069515943527222, |
|
"logits/rejected": -0.9956479668617249, |
|
"logps/chosen": -0.26621949672698975, |
|
"logps/rejected": -0.2311069816350937, |
|
"loss": 1.6116, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6655487418174744, |
|
"rewards/margins": -0.08778128772974014, |
|
"rewards/rejected": -0.5777674317359924, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.24792946834090301, |
|
"grad_norm": 4.450298309326172, |
|
"learning_rate": 4.6743528328892384e-07, |
|
"logits/chosen": -1.089507818222046, |
|
"logits/rejected": -1.0226225852966309, |
|
"logps/chosen": -0.31000208854675293, |
|
"logps/rejected": -0.3048384189605713, |
|
"loss": 1.5601, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7750052213668823, |
|
"rewards/margins": -0.012909159064292908, |
|
"rewards/rejected": -0.7620960474014282, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.25006679134384185, |
|
"grad_norm": 8.30679988861084, |
|
"learning_rate": 4.6650635094610966e-07, |
|
"logits/chosen": -1.019626498222351, |
|
"logits/rejected": -1.0024278163909912, |
|
"logps/chosen": -0.27769044041633606, |
|
"logps/rejected": -0.31382811069488525, |
|
"loss": 1.603, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6942261457443237, |
|
"rewards/margins": 0.09034418314695358, |
|
"rewards/rejected": -0.7845702767372131, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.25220411434678064, |
|
"grad_norm": 2.7810580730438232, |
|
"learning_rate": 4.655653050994906e-07, |
|
"logits/chosen": -0.8939322829246521, |
|
"logits/rejected": -0.9443778991699219, |
|
"logps/chosen": -0.3001169264316559, |
|
"logps/rejected": -0.27608948945999146, |
|
"loss": 1.6033, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7502923011779785, |
|
"rewards/margins": -0.06006866693496704, |
|
"rewards/rejected": -0.6902236938476562, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.2543414373497195, |
|
"grad_norm": 7.52852201461792, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": -1.0176833868026733, |
|
"logits/rejected": -0.9106737971305847, |
|
"logps/chosen": -0.2855750620365143, |
|
"logps/rejected": -0.2689260244369507, |
|
"loss": 1.6369, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7139376401901245, |
|
"rewards/margins": -0.041622575372457504, |
|
"rewards/rejected": -0.6723150610923767, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2564787603526583, |
|
"grad_norm": 5.235323429107666, |
|
"learning_rate": 4.636470841752404e-07, |
|
"logits/chosen": -0.894492506980896, |
|
"logits/rejected": -0.8580023050308228, |
|
"logps/chosen": -0.2390415519475937, |
|
"logps/rejected": -0.3226756751537323, |
|
"loss": 1.5698, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5976038575172424, |
|
"rewards/margins": 0.20908531546592712, |
|
"rewards/rejected": -0.8066891431808472, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2586160833555971, |
|
"grad_norm": 7.31561803817749, |
|
"learning_rate": 4.626700164218349e-07, |
|
"logits/chosen": -1.1262331008911133, |
|
"logits/rejected": -1.1069376468658447, |
|
"logps/chosen": -0.32872867584228516, |
|
"logps/rejected": -0.4012628495693207, |
|
"loss": 1.5619, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8218216896057129, |
|
"rewards/margins": 0.18133553862571716, |
|
"rewards/rejected": -1.003157138824463, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.26075340635853594, |
|
"grad_norm": 4.992301940917969, |
|
"learning_rate": 4.6168104980707103e-07, |
|
"logits/chosen": -0.947390079498291, |
|
"logits/rejected": -0.9287791848182678, |
|
"logps/chosen": -0.3660104274749756, |
|
"logps/rejected": -0.34243422746658325, |
|
"loss": 1.6722, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.9150261282920837, |
|
"rewards/margins": -0.058940548449754715, |
|
"rewards/rejected": -0.8560855388641357, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.26289072936147473, |
|
"grad_norm": 9.114599227905273, |
|
"learning_rate": 4.606802396635098e-07, |
|
"logits/chosen": -1.0491164922714233, |
|
"logits/rejected": -1.0415245294570923, |
|
"logps/chosen": -0.2906876802444458, |
|
"logps/rejected": -0.2940623164176941, |
|
"loss": 1.6062, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7267192006111145, |
|
"rewards/margins": 0.008436577394604683, |
|
"rewards/rejected": -0.7351557612419128, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.26502805236441357, |
|
"grad_norm": 13.20346736907959, |
|
"learning_rate": 4.59667641986356e-07, |
|
"logits/chosen": -0.9480360746383667, |
|
"logits/rejected": -0.9648789167404175, |
|
"logps/chosen": -0.30809280276298523, |
|
"logps/rejected": -0.39009833335876465, |
|
"loss": 1.5673, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.770232081413269, |
|
"rewards/margins": 0.20501384139060974, |
|
"rewards/rejected": -0.9752458930015564, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2671653753673524, |
|
"grad_norm": 10.775737762451172, |
|
"learning_rate": 4.5864331343032565e-07, |
|
"logits/chosen": -0.9860743880271912, |
|
"logits/rejected": -0.9669252634048462, |
|
"logps/chosen": -0.4254220724105835, |
|
"logps/rejected": -0.42529717087745667, |
|
"loss": 1.6036, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.063555121421814, |
|
"rewards/margins": -0.0003122463822364807, |
|
"rewards/rejected": -1.0632429122924805, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2693026983702912, |
|
"grad_norm": 3.9531850814819336, |
|
"learning_rate": 4.576073113064759e-07, |
|
"logits/chosen": -0.9061692953109741, |
|
"logits/rejected": -1.030226707458496, |
|
"logps/chosen": -0.2965227961540222, |
|
"logps/rejected": -0.3571414351463318, |
|
"loss": 1.5663, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7413069605827332, |
|
"rewards/margins": 0.15154659748077393, |
|
"rewards/rejected": -0.8928536176681519, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.27144002137323003, |
|
"grad_norm": 10.59150218963623, |
|
"learning_rate": 4.565596935789987e-07, |
|
"logits/chosen": -1.0731703042984009, |
|
"logits/rejected": -1.0575220584869385, |
|
"logps/chosen": -0.3338828682899475, |
|
"logps/rejected": -0.36648380756378174, |
|
"loss": 1.5818, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8347071409225464, |
|
"rewards/margins": 0.08150236308574677, |
|
"rewards/rejected": -0.9162094593048096, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2735773443761689, |
|
"grad_norm": 10.513736724853516, |
|
"learning_rate": 4.555005188619775e-07, |
|
"logits/chosen": -0.8747404217720032, |
|
"logits/rejected": -0.8733081817626953, |
|
"logps/chosen": -0.24837706983089447, |
|
"logps/rejected": -0.297157883644104, |
|
"loss": 1.5767, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6209426522254944, |
|
"rewards/margins": 0.12195204943418503, |
|
"rewards/rejected": -0.74289470911026, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.27571466737910766, |
|
"grad_norm": 5.887775421142578, |
|
"learning_rate": 4.5442984641610784e-07, |
|
"logits/chosen": -1.126139760017395, |
|
"logits/rejected": -1.0465540885925293, |
|
"logps/chosen": -0.2965885102748871, |
|
"logps/rejected": -0.2864833474159241, |
|
"loss": 1.6037, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.7414712905883789, |
|
"rewards/margins": -0.025262875482439995, |
|
"rewards/rejected": -0.7162083387374878, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.2778519903820465, |
|
"grad_norm": 4.381558418273926, |
|
"learning_rate": 4.533477361453819e-07, |
|
"logits/chosen": -1.0439306497573853, |
|
"logits/rejected": -1.1324841976165771, |
|
"logps/chosen": -0.3036992847919464, |
|
"logps/rejected": -0.3638463020324707, |
|
"loss": 1.5841, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7592483162879944, |
|
"rewards/margins": 0.15036745369434357, |
|
"rewards/rejected": -0.9096157550811768, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2799893133849853, |
|
"grad_norm": 4.211307048797607, |
|
"learning_rate": 4.5225424859373684e-07, |
|
"logits/chosen": -0.9729929566383362, |
|
"logits/rejected": -0.971265435218811, |
|
"logps/chosen": -0.3391942083835602, |
|
"logps/rejected": -0.35309362411499023, |
|
"loss": 1.595, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8479855060577393, |
|
"rewards/margins": 0.03474842756986618, |
|
"rewards/rejected": -0.8827340602874756, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.2821266363879241, |
|
"grad_norm": 9.722661018371582, |
|
"learning_rate": 4.511494449416671e-07, |
|
"logits/chosen": -0.8604239225387573, |
|
"logits/rejected": -0.790294885635376, |
|
"logps/chosen": -0.25934475660324097, |
|
"logps/rejected": -0.2542663812637329, |
|
"loss": 1.6546, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6483618021011353, |
|
"rewards/margins": -0.012695923447608948, |
|
"rewards/rejected": -0.6356659531593323, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.28426395939086296, |
|
"grad_norm": 3.2997727394104004, |
|
"learning_rate": 4.500333870028016e-07, |
|
"logits/chosen": -1.0789867639541626, |
|
"logits/rejected": -1.073919653892517, |
|
"logps/chosen": -0.25591588020324707, |
|
"logps/rejected": -0.2593124806880951, |
|
"loss": 1.5489, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6397897601127625, |
|
"rewards/margins": 0.008491499349474907, |
|
"rewards/rejected": -0.6482812166213989, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.28640128239380175, |
|
"grad_norm": 3.918221950531006, |
|
"learning_rate": 4.489061372204452e-07, |
|
"logits/chosen": -0.9510654211044312, |
|
"logits/rejected": -0.880722165107727, |
|
"logps/chosen": -0.2889711260795593, |
|
"logps/rejected": -0.32566970586776733, |
|
"loss": 1.5822, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7224277257919312, |
|
"rewards/margins": 0.09174646437168121, |
|
"rewards/rejected": -0.8141741752624512, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.2885386053967406, |
|
"grad_norm": 9.736861228942871, |
|
"learning_rate": 4.4776775866408533e-07, |
|
"logits/chosen": -1.0732065439224243, |
|
"logits/rejected": -0.9681872725486755, |
|
"logps/chosen": -0.41927701234817505, |
|
"logps/rejected": -0.2924247086048126, |
|
"loss": 1.5615, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -1.0481925010681152, |
|
"rewards/margins": -0.3171307146549225, |
|
"rewards/rejected": -0.7310618162155151, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29067592839967943, |
|
"grad_norm": 3.712836503982544, |
|
"learning_rate": 4.4661831502586244e-07, |
|
"logits/chosen": -0.9898865222930908, |
|
"logits/rejected": -0.958566427230835, |
|
"logps/chosen": -0.3362237811088562, |
|
"logps/rejected": -0.3830156624317169, |
|
"loss": 1.5408, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8405593633651733, |
|
"rewards/margins": 0.11697974801063538, |
|
"rewards/rejected": -0.9575392007827759, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.2928132514026182, |
|
"grad_norm": 6.5986409187316895, |
|
"learning_rate": 4.4545787061700746e-07, |
|
"logits/chosen": -0.9952265620231628, |
|
"logits/rejected": -0.9618417024612427, |
|
"logps/chosen": -0.33093225955963135, |
|
"logps/rejected": -0.3158915042877197, |
|
"loss": 1.6151, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8273307085037231, |
|
"rewards/margins": -0.037601932883262634, |
|
"rewards/rejected": -0.7897287607192993, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.29495057440555705, |
|
"grad_norm": 11.264966011047363, |
|
"learning_rate": 4.442864903642427e-07, |
|
"logits/chosen": -0.9705032706260681, |
|
"logits/rejected": -1.010439395904541, |
|
"logps/chosen": -0.29408299922943115, |
|
"logps/rejected": -0.32492977380752563, |
|
"loss": 1.6785, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7352074980735779, |
|
"rewards/margins": 0.07711698114871979, |
|
"rewards/rejected": -0.8123244047164917, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.29708789740849584, |
|
"grad_norm": 3.294029951095581, |
|
"learning_rate": 4.4310423980614986e-07, |
|
"logits/chosen": -0.9771057963371277, |
|
"logits/rejected": -0.8812280893325806, |
|
"logps/chosen": -0.27679648995399475, |
|
"logps/rejected": -0.301949143409729, |
|
"loss": 1.574, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6919912099838257, |
|
"rewards/margins": 0.06288158893585205, |
|
"rewards/rejected": -0.7548727989196777, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.2992252204114347, |
|
"grad_norm": 6.963750839233398, |
|
"learning_rate": 4.4191118508950277e-07, |
|
"logits/chosen": -0.9832889437675476, |
|
"logits/rejected": -1.041925311088562, |
|
"logps/chosen": -0.3261723518371582, |
|
"logps/rejected": -0.36972764134407043, |
|
"loss": 1.6004, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8154308199882507, |
|
"rewards/margins": 0.10888823121786118, |
|
"rewards/rejected": -0.9243191480636597, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3013625434143735, |
|
"grad_norm": 4.65049934387207, |
|
"learning_rate": 4.407073929655666e-07, |
|
"logits/chosen": -0.8786113858222961, |
|
"logits/rejected": -0.8743698000907898, |
|
"logps/chosen": -0.3489750027656555, |
|
"logps/rejected": -0.34578827023506165, |
|
"loss": 1.613, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8724376559257507, |
|
"rewards/margins": -0.007966993376612663, |
|
"rewards/rejected": -0.8644705414772034, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3034998664173123, |
|
"grad_norm": 5.086356163024902, |
|
"learning_rate": 4.394929307863632e-07, |
|
"logits/chosen": -1.178961157798767, |
|
"logits/rejected": -1.1221526861190796, |
|
"logps/chosen": -0.30651959776878357, |
|
"logps/rejected": -0.27971014380455017, |
|
"loss": 1.5804, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7662990093231201, |
|
"rewards/margins": -0.0670236200094223, |
|
"rewards/rejected": -0.6992753744125366, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.30563718942025114, |
|
"grad_norm": 4.039379119873047, |
|
"learning_rate": 4.3826786650090273e-07, |
|
"logits/chosen": -1.1027284860610962, |
|
"logits/rejected": -1.0947867631912231, |
|
"logps/chosen": -0.3164759874343872, |
|
"logps/rejected": -0.4346773028373718, |
|
"loss": 1.5507, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.791189968585968, |
|
"rewards/margins": 0.29550325870513916, |
|
"rewards/rejected": -1.086693286895752, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3077745124231899, |
|
"grad_norm": 5.900667190551758, |
|
"learning_rate": 4.370322686513817e-07, |
|
"logits/chosen": -0.8383625149726868, |
|
"logits/rejected": -0.7769290804862976, |
|
"logps/chosen": -0.2520799934864044, |
|
"logps/rejected": -0.23787729442119598, |
|
"loss": 1.5816, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.6302000284194946, |
|
"rewards/margins": -0.03550675883889198, |
|
"rewards/rejected": -0.5946931838989258, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.30991183542612877, |
|
"grad_norm": 5.484445095062256, |
|
"learning_rate": 4.357862063693485e-07, |
|
"logits/chosen": -0.9914720058441162, |
|
"logits/rejected": -1.0965267419815063, |
|
"logps/chosen": -0.2822697162628174, |
|
"logps/rejected": -0.3381388485431671, |
|
"loss": 1.5638, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7056742906570435, |
|
"rewards/margins": 0.13967278599739075, |
|
"rewards/rejected": -0.8453471064567566, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3120491584290676, |
|
"grad_norm": 2.739793539047241, |
|
"learning_rate": 4.345297493718352e-07, |
|
"logits/chosen": -0.9342893362045288, |
|
"logits/rejected": -0.8757031559944153, |
|
"logps/chosen": -0.512154221534729, |
|
"logps/rejected": -0.6000754237174988, |
|
"loss": 1.5743, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2803857326507568, |
|
"rewards/margins": 0.2198028415441513, |
|
"rewards/rejected": -1.5001884698867798, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3141864814320064, |
|
"grad_norm": 4.219600677490234, |
|
"learning_rate": 4.332629679574565e-07, |
|
"logits/chosen": -0.7380187511444092, |
|
"logits/rejected": -0.8239220380783081, |
|
"logps/chosen": -0.24688729643821716, |
|
"logps/rejected": -0.2952543795108795, |
|
"loss": 1.5583, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6172181963920593, |
|
"rewards/margins": 0.12091781944036484, |
|
"rewards/rejected": -0.7381359934806824, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.31632380443494523, |
|
"grad_norm": 7.114157676696777, |
|
"learning_rate": 4.319859330024777e-07, |
|
"logits/chosen": -0.950808048248291, |
|
"logits/rejected": -0.873584508895874, |
|
"logps/chosen": -0.28023314476013184, |
|
"logps/rejected": -0.37878137826919556, |
|
"loss": 1.575, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7005828022956848, |
|
"rewards/margins": 0.2463706135749817, |
|
"rewards/rejected": -0.9469534754753113, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3184611274378841, |
|
"grad_norm": 5.033133029937744, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"logits/chosen": -0.9993598461151123, |
|
"logits/rejected": -1.1495643854141235, |
|
"logps/chosen": -0.31961789727211, |
|
"logps/rejected": -0.4130839705467224, |
|
"loss": 1.6089, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7990447878837585, |
|
"rewards/margins": 0.2336651086807251, |
|
"rewards/rejected": -1.0327098369598389, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.32059845044082286, |
|
"grad_norm": 3.7931158542633057, |
|
"learning_rate": 4.294013888402029e-07, |
|
"logits/chosen": -1.0581141710281372, |
|
"logits/rejected": -0.958967924118042, |
|
"logps/chosen": -0.30636316537857056, |
|
"logps/rejected": -0.31132641434669495, |
|
"loss": 1.6122, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7659078240394592, |
|
"rewards/margins": 0.012408185750246048, |
|
"rewards/rejected": -0.7783160209655762, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3227357734437617, |
|
"grad_norm": 4.442758560180664, |
|
"learning_rate": 4.280940242378362e-07, |
|
"logits/chosen": -0.9492220878601074, |
|
"logits/rejected": -0.9829614162445068, |
|
"logps/chosen": -0.26527076959609985, |
|
"logps/rejected": -0.5424583554267883, |
|
"loss": 1.5182, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6631768941879272, |
|
"rewards/margins": 0.6929690837860107, |
|
"rewards/rejected": -1.3561458587646484, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3248730964467005, |
|
"grad_norm": 4.347165107727051, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"logits/chosen": -0.9675495624542236, |
|
"logits/rejected": -0.9267060160636902, |
|
"logps/chosen": -0.28724977374076843, |
|
"logps/rejected": -0.27893343567848206, |
|
"loss": 1.6064, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7181244492530823, |
|
"rewards/margins": -0.02079082280397415, |
|
"rewards/rejected": -0.697333574295044, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3270104194496393, |
|
"grad_norm": 3.866643190383911, |
|
"learning_rate": 4.254494757209979e-07, |
|
"logits/chosen": -1.0312570333480835, |
|
"logits/rejected": -0.8400145173072815, |
|
"logps/chosen": -0.2714364230632782, |
|
"logps/rejected": -0.3370465636253357, |
|
"loss": 1.5993, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6785910725593567, |
|
"rewards/margins": 0.16402524709701538, |
|
"rewards/rejected": -0.8426163196563721, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.32914774245257816, |
|
"grad_norm": 7.2947211265563965, |
|
"learning_rate": 4.2411243976869173e-07, |
|
"logits/chosen": -1.1030328273773193, |
|
"logits/rejected": -1.107038140296936, |
|
"logps/chosen": -0.31799790263175964, |
|
"logps/rejected": -0.3556910455226898, |
|
"loss": 1.5435, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7949947714805603, |
|
"rewards/margins": 0.09423284232616425, |
|
"rewards/rejected": -0.8892276287078857, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.33128506545551695, |
|
"grad_norm": 6.094887733459473, |
|
"learning_rate": 4.227656622467162e-07, |
|
"logits/chosen": -0.9807777404785156, |
|
"logits/rejected": -0.9574925303459167, |
|
"logps/chosen": -0.36069509387016296, |
|
"logps/rejected": -0.411272257566452, |
|
"loss": 1.5455, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9017376899719238, |
|
"rewards/margins": 0.12644296884536743, |
|
"rewards/rejected": -1.028180718421936, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3334223884584558, |
|
"grad_norm": 7.840887069702148, |
|
"learning_rate": 4.2140921850710855e-07, |
|
"logits/chosen": -1.1150490045547485, |
|
"logits/rejected": -1.1116127967834473, |
|
"logps/chosen": -0.2742304801940918, |
|
"logps/rejected": -0.3083428740501404, |
|
"loss": 1.546, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6855762004852295, |
|
"rewards/margins": 0.08528101444244385, |
|
"rewards/rejected": -0.7708572149276733, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3355597114613946, |
|
"grad_norm": 15.734699249267578, |
|
"learning_rate": 4.200431844427298e-07, |
|
"logits/chosen": -0.9994797706604004, |
|
"logits/rejected": -1.077652931213379, |
|
"logps/chosen": -0.3408905565738678, |
|
"logps/rejected": -0.605131208896637, |
|
"loss": 1.5829, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8522265553474426, |
|
"rewards/margins": 0.6606015563011169, |
|
"rewards/rejected": -1.51282799243927, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3376970344643334, |
|
"grad_norm": 4.568877696990967, |
|
"learning_rate": 4.186676364830186e-07, |
|
"logits/chosen": -0.8166912794113159, |
|
"logits/rejected": -0.9158197641372681, |
|
"logps/chosen": -0.3100201189517975, |
|
"logps/rejected": -0.4257528781890869, |
|
"loss": 1.5949, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7750502824783325, |
|
"rewards/margins": 0.28933185338974, |
|
"rewards/rejected": -1.0643821954727173, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.33983435746727225, |
|
"grad_norm": 7.091736793518066, |
|
"learning_rate": 4.172826515897145e-07, |
|
"logits/chosen": -0.9496626853942871, |
|
"logits/rejected": -0.8826749920845032, |
|
"logps/chosen": -0.2823619842529297, |
|
"logps/rejected": -0.25573766231536865, |
|
"loss": 1.6079, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7059049606323242, |
|
"rewards/margins": -0.06656082719564438, |
|
"rewards/rejected": -0.6393441557884216, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.34197168047021104, |
|
"grad_norm": 9.935672760009766, |
|
"learning_rate": 4.158883072525528e-07, |
|
"logits/chosen": -1.139492392539978, |
|
"logits/rejected": -0.9911923408508301, |
|
"logps/chosen": -0.24080964922904968, |
|
"logps/rejected": -0.23250696063041687, |
|
"loss": 1.5373, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6020240783691406, |
|
"rewards/margins": -0.020756704732775688, |
|
"rewards/rejected": -0.5812674164772034, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3441090034731499, |
|
"grad_norm": 15.794203758239746, |
|
"learning_rate": 4.1448468148492814e-07, |
|
"logits/chosen": -1.019397258758545, |
|
"logits/rejected": -0.9881049394607544, |
|
"logps/chosen": -0.3946765065193176, |
|
"logps/rejected": -0.3796921670436859, |
|
"loss": 1.5431, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9866912364959717, |
|
"rewards/margins": -0.03746076300740242, |
|
"rewards/rejected": -0.9492304921150208, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.3462463264760887, |
|
"grad_norm": 3.839250087738037, |
|
"learning_rate": 4.130718528195303e-07, |
|
"logits/chosen": -0.9311838746070862, |
|
"logits/rejected": -0.8956501483917236, |
|
"logps/chosen": -0.282693088054657, |
|
"logps/rejected": -0.2629316449165344, |
|
"loss": 1.5818, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7067327499389648, |
|
"rewards/margins": -0.04940361529588699, |
|
"rewards/rejected": -0.6573290824890137, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3483836494790275, |
|
"grad_norm": 5.865959644317627, |
|
"learning_rate": 4.1164990030394985e-07, |
|
"logits/chosen": -1.0395972728729248, |
|
"logits/rejected": -0.9770699143409729, |
|
"logps/chosen": -0.3128069043159485, |
|
"logps/rejected": -0.31642264127731323, |
|
"loss": 1.5692, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.7820172905921936, |
|
"rewards/margins": 0.009039390832185745, |
|
"rewards/rejected": -0.791056752204895, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.35052097248196634, |
|
"grad_norm": 4.154603481292725, |
|
"learning_rate": 4.10218903496256e-07, |
|
"logits/chosen": -1.0948988199234009, |
|
"logits/rejected": -0.9907031059265137, |
|
"logps/chosen": -0.30839213728904724, |
|
"logps/rejected": -0.29299482703208923, |
|
"loss": 1.5829, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7709803581237793, |
|
"rewards/margins": -0.03849326819181442, |
|
"rewards/rejected": -0.7324870824813843, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.3526582954849052, |
|
"grad_norm": 5.253880500793457, |
|
"learning_rate": 4.087789424605447e-07, |
|
"logits/chosen": -1.0539865493774414, |
|
"logits/rejected": -0.9663246870040894, |
|
"logps/chosen": -0.26886874437332153, |
|
"logps/rejected": -0.43172940611839294, |
|
"loss": 1.5157, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.672171950340271, |
|
"rewards/margins": 0.40715163946151733, |
|
"rewards/rejected": -1.0793235301971436, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.35479561848784397, |
|
"grad_norm": 11.49240493774414, |
|
"learning_rate": 4.0733009776245937e-07, |
|
"logits/chosen": -0.9969057440757751, |
|
"logits/rejected": -1.0402690172195435, |
|
"logps/chosen": -0.3554040193557739, |
|
"logps/rejected": -0.396072655916214, |
|
"loss": 1.584, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8885101079940796, |
|
"rewards/margins": 0.10167157649993896, |
|
"rewards/rejected": -0.9901816248893738, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3569329414907828, |
|
"grad_norm": 5.116168975830078, |
|
"learning_rate": 4.058724504646834e-07, |
|
"logits/chosen": -0.9382141828536987, |
|
"logits/rejected": -0.8863942623138428, |
|
"logps/chosen": -0.261793315410614, |
|
"logps/rejected": -0.45277461409568787, |
|
"loss": 1.539, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.6544832587242126, |
|
"rewards/margins": 0.47745317220687866, |
|
"rewards/rejected": -1.1319365501403809, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3590702644937216, |
|
"grad_norm": 10.550248146057129, |
|
"learning_rate": 4.0440608212240445e-07, |
|
"logits/chosen": -1.0490831136703491, |
|
"logits/rejected": -1.1039912700653076, |
|
"logps/chosen": -0.3632212281227112, |
|
"logps/rejected": -0.3736804723739624, |
|
"loss": 1.5845, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9080529808998108, |
|
"rewards/margins": 0.026148155331611633, |
|
"rewards/rejected": -0.934201180934906, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.36120758749666043, |
|
"grad_norm": 4.120011806488037, |
|
"learning_rate": 4.0293107477875156e-07, |
|
"logits/chosen": -0.914804220199585, |
|
"logits/rejected": -0.9306747317314148, |
|
"logps/chosen": -0.3597089350223541, |
|
"logps/rejected": -0.39882034063339233, |
|
"loss": 1.5235, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8992725014686584, |
|
"rewards/margins": 0.09777843952178955, |
|
"rewards/rejected": -0.9970508813858032, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.36334491049959927, |
|
"grad_norm": 3.9872193336486816, |
|
"learning_rate": 4.0144751096020497e-07, |
|
"logits/chosen": -1.0519163608551025, |
|
"logits/rejected": -0.9880449175834656, |
|
"logps/chosen": -0.27723756432533264, |
|
"logps/rejected": -0.3831270933151245, |
|
"loss": 1.5744, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6930938959121704, |
|
"rewards/margins": 0.26472383737564087, |
|
"rewards/rejected": -0.9578177332878113, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36548223350253806, |
|
"grad_norm": 5.331676006317139, |
|
"learning_rate": 3.999554736719785e-07, |
|
"logits/chosen": -1.1113324165344238, |
|
"logits/rejected": -1.1892024278640747, |
|
"logps/chosen": -0.3108530640602112, |
|
"logps/rejected": -0.5784565806388855, |
|
"loss": 1.4846, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7771324515342712, |
|
"rewards/margins": 0.6690089702606201, |
|
"rewards/rejected": -1.4461414813995361, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.3676195565054769, |
|
"grad_norm": 6.586511611938477, |
|
"learning_rate": 3.9845504639337535e-07, |
|
"logits/chosen": -1.2047513723373413, |
|
"logits/rejected": -1.1406968832015991, |
|
"logps/chosen": -0.3595273196697235, |
|
"logps/rejected": -0.32145068049430847, |
|
"loss": 1.5328, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8988182544708252, |
|
"rewards/margins": -0.0951915979385376, |
|
"rewards/rejected": -0.8036267757415771, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.36975687950841574, |
|
"grad_norm": 6.111835479736328, |
|
"learning_rate": 3.9694631307311825e-07, |
|
"logits/chosen": -0.8004586696624756, |
|
"logits/rejected": -0.7772153615951538, |
|
"logps/chosen": -0.4090813100337982, |
|
"logps/rejected": -0.4898335635662079, |
|
"loss": 1.559, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0227031707763672, |
|
"rewards/margins": 0.20188063383102417, |
|
"rewards/rejected": -1.2245839834213257, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3718942025113545, |
|
"grad_norm": 7.0863189697265625, |
|
"learning_rate": 3.954293581246514e-07, |
|
"logits/chosen": -0.9679336547851562, |
|
"logits/rejected": -0.9125540256500244, |
|
"logps/chosen": -0.29369306564331055, |
|
"logps/rejected": -0.31403255462646484, |
|
"loss": 1.5375, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7342327237129211, |
|
"rewards/margins": 0.05084871128201485, |
|
"rewards/rejected": -0.7850814461708069, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.37403152551429336, |
|
"grad_norm": 7.140958309173584, |
|
"learning_rate": 3.939042664214184e-07, |
|
"logits/chosen": -0.949452817440033, |
|
"logits/rejected": -1.0473122596740723, |
|
"logps/chosen": -0.2707624137401581, |
|
"logps/rejected": -0.32049351930618286, |
|
"loss": 1.5626, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6769061088562012, |
|
"rewards/margins": 0.12432771921157837, |
|
"rewards/rejected": -0.8012337684631348, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.37616884851723215, |
|
"grad_norm": 7.0456695556640625, |
|
"learning_rate": 3.92371123292113e-07, |
|
"logits/chosen": -1.0727981328964233, |
|
"logits/rejected": -1.1329890489578247, |
|
"logps/chosen": -0.29705438017845154, |
|
"logps/rejected": -0.3278125524520874, |
|
"loss": 1.6107, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7426358461380005, |
|
"rewards/margins": 0.07689555734395981, |
|
"rewards/rejected": -0.8195314407348633, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.378306171520171, |
|
"grad_norm": 5.836486339569092, |
|
"learning_rate": 3.908300145159055e-07, |
|
"logits/chosen": -0.9942230582237244, |
|
"logits/rejected": -1.0356171131134033, |
|
"logps/chosen": -0.31931719183921814, |
|
"logps/rejected": -0.33853164315223694, |
|
"loss": 1.5837, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7982929348945618, |
|
"rewards/margins": 0.0480361208319664, |
|
"rewards/rejected": -0.8463290929794312, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.3804434945231098, |
|
"grad_norm": 8.505417823791504, |
|
"learning_rate": 3.8928102631764304e-07, |
|
"logits/chosen": -1.0212180614471436, |
|
"logits/rejected": -1.087773323059082, |
|
"logps/chosen": -0.3532945513725281, |
|
"logps/rejected": -0.5901373028755188, |
|
"loss": 1.5557, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.883236289024353, |
|
"rewards/margins": 0.5921069979667664, |
|
"rewards/rejected": -1.4753433465957642, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3825808175260486, |
|
"grad_norm": 6.116640090942383, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": -1.2131381034851074, |
|
"logits/rejected": -1.0686910152435303, |
|
"logps/chosen": -0.3515666127204895, |
|
"logps/rejected": -0.3958896994590759, |
|
"loss": 1.5671, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8789165019989014, |
|
"rewards/margins": 0.11080773174762726, |
|
"rewards/rejected": -0.9897242784500122, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.38471814052898745, |
|
"grad_norm": 6.355064868927002, |
|
"learning_rate": 3.8615975875375676e-07, |
|
"logits/chosen": -0.9339985847473145, |
|
"logits/rejected": -0.9060691595077515, |
|
"logps/chosen": -0.32276052236557007, |
|
"logps/rejected": -0.37401843070983887, |
|
"loss": 1.548, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8069013953208923, |
|
"rewards/margins": 0.1281447559595108, |
|
"rewards/rejected": -0.9350461959838867, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38685546353192624, |
|
"grad_norm": 6.534996509552002, |
|
"learning_rate": 3.8458765402267056e-07, |
|
"logits/chosen": -0.8938146233558655, |
|
"logits/rejected": -0.9069436192512512, |
|
"logps/chosen": -0.336931049823761, |
|
"logps/rejected": -0.4913772940635681, |
|
"loss": 1.5787, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8423275947570801, |
|
"rewards/margins": 0.3861156404018402, |
|
"rewards/rejected": -1.2284431457519531, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3889927865348651, |
|
"grad_norm": 10.956029891967773, |
|
"learning_rate": 3.8300801912883414e-07, |
|
"logits/chosen": -1.0703511238098145, |
|
"logits/rejected": -0.9989842176437378, |
|
"logps/chosen": -0.26583123207092285, |
|
"logps/rejected": -0.2977861762046814, |
|
"loss": 1.5609, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6645781397819519, |
|
"rewards/margins": 0.07988730072975159, |
|
"rewards/rejected": -0.7444654107093811, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.3911301095378039, |
|
"grad_norm": 10.217528343200684, |
|
"learning_rate": 3.8142094245262615e-07, |
|
"logits/chosen": -1.145703673362732, |
|
"logits/rejected": -1.0282764434814453, |
|
"logps/chosen": -0.3538467586040497, |
|
"logps/rejected": -0.3405742645263672, |
|
"loss": 1.5855, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8846168518066406, |
|
"rewards/margins": -0.03318122401833534, |
|
"rewards/rejected": -0.8514357209205627, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.3932674325407427, |
|
"grad_norm": 4.681653022766113, |
|
"learning_rate": 3.7982651279079227e-07, |
|
"logits/chosen": -1.2552436590194702, |
|
"logits/rejected": -1.259030818939209, |
|
"logps/chosen": -0.2886826992034912, |
|
"logps/rejected": -0.4662485718727112, |
|
"loss": 1.5609, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7217066884040833, |
|
"rewards/margins": 0.44391465187072754, |
|
"rewards/rejected": -1.1656213998794556, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.39540475554368154, |
|
"grad_norm": 4.339652061462402, |
|
"learning_rate": 3.7822481935147655e-07, |
|
"logits/chosen": -1.0260683298110962, |
|
"logits/rejected": -1.015075922012329, |
|
"logps/chosen": -0.36714547872543335, |
|
"logps/rejected": -0.5204967260360718, |
|
"loss": 1.5682, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.9178636074066162, |
|
"rewards/margins": 0.38337817788124084, |
|
"rewards/rejected": -1.3012418746948242, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3975420785466204, |
|
"grad_norm": 5.974206924438477, |
|
"learning_rate": 3.766159517492307e-07, |
|
"logits/chosen": -1.0455535650253296, |
|
"logits/rejected": -1.1319448947906494, |
|
"logps/chosen": -0.41289687156677246, |
|
"logps/rejected": -0.613991379737854, |
|
"loss": 1.5825, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0322421789169312, |
|
"rewards/margins": 0.5027362108230591, |
|
"rewards/rejected": -1.5349783897399902, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.39967940154955917, |
|
"grad_norm": 8.767956733703613, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -1.0032697916030884, |
|
"logits/rejected": -0.9564570784568787, |
|
"logps/chosen": -0.31954333186149597, |
|
"logps/rejected": -0.4057242274284363, |
|
"loss": 1.6033, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7988582849502563, |
|
"rewards/margins": 0.21545226871967316, |
|
"rewards/rejected": -1.014310598373413, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.401816724552498, |
|
"grad_norm": 4.35204553604126, |
|
"learning_rate": 3.7337705451608667e-07, |
|
"logits/chosen": -1.1166412830352783, |
|
"logits/rejected": -1.0849709510803223, |
|
"logps/chosen": -0.3008464574813843, |
|
"logps/rejected": -0.2960435450077057, |
|
"loss": 1.5105, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7521160840988159, |
|
"rewards/margins": -0.012007185257971287, |
|
"rewards/rejected": -0.7401089072227478, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.4039540475554368, |
|
"grad_norm": 3.929826021194458, |
|
"learning_rate": 3.717472061010918e-07, |
|
"logits/chosen": -1.1040568351745605, |
|
"logits/rejected": -1.062517523765564, |
|
"logps/chosen": -0.3373297154903412, |
|
"logps/rejected": -0.5283687710762024, |
|
"loss": 1.5152, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8433243632316589, |
|
"rewards/margins": 0.47759759426116943, |
|
"rewards/rejected": -1.3209218978881836, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 4.574549198150635, |
|
"learning_rate": 3.7011054594483443e-07, |
|
"logits/chosen": -1.1240224838256836, |
|
"logits/rejected": -1.0487711429595947, |
|
"logps/chosen": -0.3029400706291199, |
|
"logps/rejected": -0.41601306200027466, |
|
"loss": 1.4632, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7573502063751221, |
|
"rewards/margins": 0.28268247842788696, |
|
"rewards/rejected": -1.0400326251983643, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.40822869356131447, |
|
"grad_norm": 4.018647193908691, |
|
"learning_rate": 3.6846716561824967e-07, |
|
"logits/chosen": -0.80363529920578, |
|
"logits/rejected": -0.9596213102340698, |
|
"logps/chosen": -0.3076530694961548, |
|
"logps/rejected": -0.5633202195167542, |
|
"loss": 1.5163, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7691327333450317, |
|
"rewards/margins": 0.6391679048538208, |
|
"rewards/rejected": -1.408300518989563, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.41036601656425326, |
|
"grad_norm": 7.332089424133301, |
|
"learning_rate": 3.668171570682655e-07, |
|
"logits/chosen": -0.9585205316543579, |
|
"logits/rejected": -0.9636404514312744, |
|
"logps/chosen": -0.33684462308883667, |
|
"logps/rejected": -0.3766506016254425, |
|
"loss": 1.5671, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8421115279197693, |
|
"rewards/margins": 0.09951499849557877, |
|
"rewards/rejected": -0.9416265487670898, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.4125033395671921, |
|
"grad_norm": 8.853985786437988, |
|
"learning_rate": 3.6516061261265805e-07, |
|
"logits/chosen": -1.027462363243103, |
|
"logits/rejected": -0.9135668873786926, |
|
"logps/chosen": -0.3370886445045471, |
|
"logps/rejected": -0.3812939524650574, |
|
"loss": 1.5598, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8427215814590454, |
|
"rewards/margins": 0.11051319539546967, |
|
"rewards/rejected": -0.953234851360321, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.41464066257013094, |
|
"grad_norm": 5.765879154205322, |
|
"learning_rate": 3.634976249348867e-07, |
|
"logits/chosen": -1.1132540702819824, |
|
"logits/rejected": -1.003641963005066, |
|
"logps/chosen": -0.3518536686897278, |
|
"logps/rejected": -0.5063703656196594, |
|
"loss": 1.5071, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8796342015266418, |
|
"rewards/margins": 0.38629183173179626, |
|
"rewards/rejected": -1.2659261226654053, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4167779855730697, |
|
"grad_norm": 17.148714065551758, |
|
"learning_rate": 3.618282870789081e-07, |
|
"logits/chosen": -1.041336178779602, |
|
"logits/rejected": -1.0308490991592407, |
|
"logps/chosen": -0.4422120749950409, |
|
"logps/rejected": -0.4290231466293335, |
|
"loss": 1.6783, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1055301427841187, |
|
"rewards/margins": -0.032972272485494614, |
|
"rewards/rejected": -1.072557806968689, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.41891530857600856, |
|
"grad_norm": 4.9743332862854, |
|
"learning_rate": 3.601526924439709e-07, |
|
"logits/chosen": -0.9943188428878784, |
|
"logits/rejected": -1.029951810836792, |
|
"logps/chosen": -0.2909929156303406, |
|
"logps/rejected": -0.3154396116733551, |
|
"loss": 1.5771, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7274821996688843, |
|
"rewards/margins": 0.061116717755794525, |
|
"rewards/rejected": -0.7885990142822266, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 6.192495346069336, |
|
"learning_rate": 3.584709347793895e-07, |
|
"logits/chosen": -0.8082910776138306, |
|
"logits/rejected": -0.8116950988769531, |
|
"logps/chosen": -0.2856646478176117, |
|
"logps/rejected": -0.30446913838386536, |
|
"loss": 1.5157, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.7141616344451904, |
|
"rewards/margins": 0.04701121151447296, |
|
"rewards/rejected": -0.7611728310585022, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4231899545818862, |
|
"grad_norm": 4.891373157501221, |
|
"learning_rate": 3.567831081792992e-07, |
|
"logits/chosen": -1.0285996198654175, |
|
"logits/rejected": -1.034073829650879, |
|
"logps/chosen": -0.3283870220184326, |
|
"logps/rejected": -0.5464656949043274, |
|
"loss": 1.4871, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8209674954414368, |
|
"rewards/margins": 0.5451965928077698, |
|
"rewards/rejected": -1.366164207458496, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.425327277584825, |
|
"grad_norm": 13.869108200073242, |
|
"learning_rate": 3.550893070773914e-07, |
|
"logits/chosen": -1.0854626893997192, |
|
"logits/rejected": -1.0260361433029175, |
|
"logps/chosen": -0.39059579372406006, |
|
"logps/rejected": -0.4412023425102234, |
|
"loss": 1.6672, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9764894247055054, |
|
"rewards/margins": 0.12651631236076355, |
|
"rewards/rejected": -1.1030058860778809, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.4274646005877638, |
|
"grad_norm": 29.342126846313477, |
|
"learning_rate": 3.5338962624163016e-07, |
|
"logits/chosen": -1.1286933422088623, |
|
"logits/rejected": -1.1019514799118042, |
|
"logps/chosen": -0.29572370648384094, |
|
"logps/rejected": -0.3438429832458496, |
|
"loss": 1.6118, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7393092513084412, |
|
"rewards/margins": 0.12029813230037689, |
|
"rewards/rejected": -0.8596073985099792, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42960192359070265, |
|
"grad_norm": 4.435629367828369, |
|
"learning_rate": 3.516841607689501e-07, |
|
"logits/chosen": -1.1759017705917358, |
|
"logits/rejected": -1.0626184940338135, |
|
"logps/chosen": -0.3442676067352295, |
|
"logps/rejected": -0.3576590120792389, |
|
"loss": 1.5321, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8606690168380737, |
|
"rewards/margins": 0.03347862884402275, |
|
"rewards/rejected": -0.8941476345062256, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4317392465936415, |
|
"grad_norm": 5.45989990234375, |
|
"learning_rate": 3.499730060799352e-07, |
|
"logits/chosen": -1.1944599151611328, |
|
"logits/rejected": -1.1447770595550537, |
|
"logps/chosen": -0.300496369600296, |
|
"logps/rejected": -0.3771470785140991, |
|
"loss": 1.4774, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7512409687042236, |
|
"rewards/margins": 0.19162678718566895, |
|
"rewards/rejected": -0.9428676962852478, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.4338765695965803, |
|
"grad_norm": 4.396944046020508, |
|
"learning_rate": 3.482562579134809e-07, |
|
"logits/chosen": -0.9371283054351807, |
|
"logits/rejected": -0.9887581467628479, |
|
"logps/chosen": -0.34337079524993896, |
|
"logps/rejected": -0.31941717863082886, |
|
"loss": 1.5624, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8584270477294922, |
|
"rewards/margins": -0.05988417938351631, |
|
"rewards/rejected": -0.798542857170105, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.4360138925995191, |
|
"grad_norm": 5.779623508453369, |
|
"learning_rate": 3.465340123214365e-07, |
|
"logits/chosen": -0.9840802550315857, |
|
"logits/rejected": -0.9649553298950195, |
|
"logps/chosen": -0.5713462829589844, |
|
"logps/rejected": -0.7279367446899414, |
|
"loss": 1.5474, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4283654689788818, |
|
"rewards/margins": 0.39147651195526123, |
|
"rewards/rejected": -1.8198421001434326, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4381512156024579, |
|
"grad_norm": 10.535792350769043, |
|
"learning_rate": 3.448063656632321e-07, |
|
"logits/chosen": -1.1214243173599243, |
|
"logits/rejected": -1.0236384868621826, |
|
"logps/chosen": -0.327178418636322, |
|
"logps/rejected": -0.3443678021430969, |
|
"loss": 1.5847, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8179460167884827, |
|
"rewards/margins": 0.042973555624485016, |
|
"rewards/rejected": -0.8609195351600647, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44028853860539674, |
|
"grad_norm": 5.442493915557861, |
|
"learning_rate": 3.430734146004863e-07, |
|
"logits/chosen": -1.1191673278808594, |
|
"logits/rejected": -0.9904736876487732, |
|
"logps/chosen": -0.2607005536556244, |
|
"logps/rejected": -0.2681718170642853, |
|
"loss": 1.542, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.6517513990402222, |
|
"rewards/margins": 0.01867814175784588, |
|
"rewards/rejected": -0.670429527759552, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4424258616083356, |
|
"grad_norm": 6.850170612335205, |
|
"learning_rate": 3.413352560915988e-07, |
|
"logits/chosen": -1.0275464057922363, |
|
"logits/rejected": -1.0052015781402588, |
|
"logps/chosen": -0.3867985010147095, |
|
"logps/rejected": -0.4938412010669708, |
|
"loss": 1.6312, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9669963121414185, |
|
"rewards/margins": 0.267606645822525, |
|
"rewards/rejected": -1.234602928161621, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.44456318461127436, |
|
"grad_norm": 9.965657234191895, |
|
"learning_rate": 3.39591987386325e-07, |
|
"logits/chosen": -0.9659216403961182, |
|
"logits/rejected": -0.9130998253822327, |
|
"logps/chosen": -0.33372846245765686, |
|
"logps/rejected": -0.3092671036720276, |
|
"loss": 1.5355, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.834321141242981, |
|
"rewards/margins": -0.061153292655944824, |
|
"rewards/rejected": -0.7731677889823914, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4467005076142132, |
|
"grad_norm": 5.595789909362793, |
|
"learning_rate": 3.378437060203357e-07, |
|
"logits/chosen": -1.2547951936721802, |
|
"logits/rejected": -1.1610562801361084, |
|
"logps/chosen": -0.34088316559791565, |
|
"logps/rejected": -0.34324803948402405, |
|
"loss": 1.6059, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8522078990936279, |
|
"rewards/margins": 0.00591224804520607, |
|
"rewards/rejected": -0.8581202030181885, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.448837830617152, |
|
"grad_norm": 17.92057991027832, |
|
"learning_rate": 3.360905098097587e-07, |
|
"logits/chosen": -1.0579925775527954, |
|
"logits/rejected": -0.9834758043289185, |
|
"logps/chosen": -0.38748034834861755, |
|
"logps/rejected": -0.6860374808311462, |
|
"loss": 1.5363, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9687008857727051, |
|
"rewards/margins": 0.7463930249214172, |
|
"rewards/rejected": -1.715093970298767, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45097515362009083, |
|
"grad_norm": 6.570519924163818, |
|
"learning_rate": 3.343324968457075e-07, |
|
"logits/chosen": -1.0359179973602295, |
|
"logits/rejected": -0.9564209580421448, |
|
"logps/chosen": -0.38825637102127075, |
|
"logps/rejected": -0.3802332878112793, |
|
"loss": 1.5384, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.970641016960144, |
|
"rewards/margins": -0.02005772665143013, |
|
"rewards/rejected": -0.950583279132843, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.45311247662302967, |
|
"grad_norm": 5.518048286437988, |
|
"learning_rate": 3.325697654887918e-07, |
|
"logits/chosen": -0.998512327671051, |
|
"logits/rejected": -0.9381792545318604, |
|
"logps/chosen": -0.3794736862182617, |
|
"logps/rejected": -0.6236636638641357, |
|
"loss": 1.5091, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9486840963363647, |
|
"rewards/margins": 0.6104748249053955, |
|
"rewards/rejected": -1.5591590404510498, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.45524979962596845, |
|
"grad_norm": 12.084184646606445, |
|
"learning_rate": 3.30802414363615e-07, |
|
"logits/chosen": -0.9403542280197144, |
|
"logits/rejected": -0.6737431287765503, |
|
"logps/chosen": -0.4244030714035034, |
|
"logps/rejected": -0.43834903836250305, |
|
"loss": 1.4581, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.0610076189041138, |
|
"rewards/margins": 0.03486503288149834, |
|
"rewards/rejected": -1.0958726406097412, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.4573871226289073, |
|
"grad_norm": 3.5294582843780518, |
|
"learning_rate": 3.2903054235325613e-07, |
|
"logits/chosen": -1.1825759410858154, |
|
"logits/rejected": -1.210655927658081, |
|
"logps/chosen": -0.3315200209617615, |
|
"logps/rejected": -0.46745753288269043, |
|
"loss": 1.5312, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8287999629974365, |
|
"rewards/margins": 0.33984383940696716, |
|
"rewards/rejected": -1.168643832206726, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.45952444563184613, |
|
"grad_norm": 6.134922027587891, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -1.1072171926498413, |
|
"logits/rejected": -1.208855152130127, |
|
"logps/chosen": -0.4051734209060669, |
|
"logps/rejected": -0.6289750337600708, |
|
"loss": 1.5473, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.012933611869812, |
|
"rewards/margins": 0.5595039129257202, |
|
"rewards/rejected": -1.5724375247955322, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4616617686347849, |
|
"grad_norm": 6.10336446762085, |
|
"learning_rate": 3.2547363246847546e-07, |
|
"logits/chosen": -1.0125056505203247, |
|
"logits/rejected": -1.0291041135787964, |
|
"logps/chosen": -0.3960397243499756, |
|
"logps/rejected": -0.6897832751274109, |
|
"loss": 1.5091, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.990099310874939, |
|
"rewards/margins": 0.7343588471412659, |
|
"rewards/rejected": -1.72445809841156, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.46379909163772376, |
|
"grad_norm": 9.434686660766602, |
|
"learning_rate": 3.2368879360272606e-07, |
|
"logits/chosen": -1.0608569383621216, |
|
"logits/rejected": -1.0038235187530518, |
|
"logps/chosen": -0.4567071199417114, |
|
"logps/rejected": -0.42994168400764465, |
|
"loss": 1.616, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1417678594589233, |
|
"rewards/margins": -0.06691357493400574, |
|
"rewards/rejected": -1.0748542547225952, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.46593641464066254, |
|
"grad_norm": 5.21524715423584, |
|
"learning_rate": 3.218998318580043e-07, |
|
"logits/chosen": -1.1354548931121826, |
|
"logits/rejected": -1.0435974597930908, |
|
"logps/chosen": -0.2741296589374542, |
|
"logps/rejected": -0.37144631147384644, |
|
"loss": 1.6029, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6853241920471191, |
|
"rewards/margins": 0.24329157173633575, |
|
"rewards/rejected": -0.9286156892776489, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.4680737376436014, |
|
"grad_norm": 4.108745574951172, |
|
"learning_rate": 3.201068473265007e-07, |
|
"logits/chosen": -0.8878648281097412, |
|
"logits/rejected": -0.8645142316818237, |
|
"logps/chosen": -0.32466036081314087, |
|
"logps/rejected": -0.28847843408584595, |
|
"loss": 1.6023, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.811650812625885, |
|
"rewards/margins": -0.09045480191707611, |
|
"rewards/rejected": -0.7211960554122925, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4702110606465402, |
|
"grad_norm": 17.760408401489258, |
|
"learning_rate": 3.1830994032548e-07, |
|
"logits/chosen": -1.197770595550537, |
|
"logits/rejected": -1.0971354246139526, |
|
"logps/chosen": -0.44655174016952515, |
|
"logps/rejected": -0.5050027370452881, |
|
"loss": 1.6185, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1163791418075562, |
|
"rewards/margins": 0.14612753689289093, |
|
"rewards/rejected": -1.2625068426132202, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.472348383649479, |
|
"grad_norm": 24.489158630371094, |
|
"learning_rate": 3.1650921139166874e-07, |
|
"logits/chosen": -0.9091489315032959, |
|
"logits/rejected": -0.9671614766120911, |
|
"logps/chosen": -0.2689306437969208, |
|
"logps/rejected": -0.2791651487350464, |
|
"loss": 1.6576, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6723266243934631, |
|
"rewards/margins": 0.02558620274066925, |
|
"rewards/rejected": -0.6979128122329712, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.47448570665241785, |
|
"grad_norm": 4.240891933441162, |
|
"learning_rate": 3.147047612756302e-07, |
|
"logits/chosen": -1.1410434246063232, |
|
"logits/rejected": -0.9494026303291321, |
|
"logps/chosen": -0.3623463809490204, |
|
"logps/rejected": -0.3546559810638428, |
|
"loss": 1.5634, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9058659672737122, |
|
"rewards/margins": -0.01922605186700821, |
|
"rewards/rejected": -0.8866399526596069, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.4766230296553567, |
|
"grad_norm": 11.909400939941406, |
|
"learning_rate": 3.128966909361271e-07, |
|
"logits/chosen": -1.0778872966766357, |
|
"logits/rejected": -0.9947598576545715, |
|
"logps/chosen": -0.2876349687576294, |
|
"logps/rejected": -0.3500506281852722, |
|
"loss": 1.5763, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7190873622894287, |
|
"rewards/margins": 0.15603923797607422, |
|
"rewards/rejected": -0.8751266002655029, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.4787603526582955, |
|
"grad_norm": 3.9968485832214355, |
|
"learning_rate": 3.110851015344735e-07, |
|
"logits/chosen": -1.043594241142273, |
|
"logits/rejected": -1.0751991271972656, |
|
"logps/chosen": -0.3403151333332062, |
|
"logps/rejected": -0.45080384612083435, |
|
"loss": 1.4964, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.850787878036499, |
|
"rewards/margins": 0.27622172236442566, |
|
"rewards/rejected": -1.127009630203247, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.4808976756612343, |
|
"grad_norm": 4.30190372467041, |
|
"learning_rate": 3.0927009442887437e-07, |
|
"logits/chosen": -0.9305320978164673, |
|
"logits/rejected": -1.0111606121063232, |
|
"logps/chosen": -0.32919758558273315, |
|
"logps/rejected": -0.34503474831581116, |
|
"loss": 1.5875, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8229939341545105, |
|
"rewards/margins": 0.0395929217338562, |
|
"rewards/rejected": -0.8625868558883667, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4830349986641731, |
|
"grad_norm": 5.68215799331665, |
|
"learning_rate": 3.074517711687549e-07, |
|
"logits/chosen": -0.9502861499786377, |
|
"logits/rejected": -0.9219777584075928, |
|
"logps/chosen": -0.40744659304618835, |
|
"logps/rejected": -0.4551170766353607, |
|
"loss": 1.5318, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0186164379119873, |
|
"rewards/margins": 0.11917618662118912, |
|
"rewards/rejected": -1.137792706489563, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.48517232166711194, |
|
"grad_norm": 5.924420356750488, |
|
"learning_rate": 3.056302334890786e-07, |
|
"logits/chosen": -1.0599088668823242, |
|
"logits/rejected": -0.9398927688598633, |
|
"logps/chosen": -0.2768517732620239, |
|
"logps/rejected": -0.3650413155555725, |
|
"loss": 1.5365, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.692129373550415, |
|
"rewards/margins": 0.22047390043735504, |
|
"rewards/rejected": -0.9126032590866089, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.4873096446700508, |
|
"grad_norm": 9.18790340423584, |
|
"learning_rate": 3.038055833046555e-07, |
|
"logits/chosen": -1.23221755027771, |
|
"logits/rejected": -1.1094015836715698, |
|
"logps/chosen": -0.3468588590621948, |
|
"logps/rejected": -0.533679723739624, |
|
"loss": 1.5544, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8671470880508423, |
|
"rewards/margins": 0.4670522212982178, |
|
"rewards/rejected": -1.3341991901397705, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.48944696767298956, |
|
"grad_norm": 3.236159563064575, |
|
"learning_rate": 3.0197792270443976e-07, |
|
"logits/chosen": -1.101015567779541, |
|
"logits/rejected": -0.980370044708252, |
|
"logps/chosen": -0.5276182293891907, |
|
"logps/rejected": -0.2907797694206238, |
|
"loss": 1.6115, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.3190455436706543, |
|
"rewards/margins": -0.592096209526062, |
|
"rewards/rejected": -0.7269493341445923, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.4915842906759284, |
|
"grad_norm": 6.479002475738525, |
|
"learning_rate": 3.001473539458182e-07, |
|
"logits/chosen": -1.1139984130859375, |
|
"logits/rejected": -1.0145281553268433, |
|
"logps/chosen": -0.40499821305274963, |
|
"logps/rejected": -0.5032440423965454, |
|
"loss": 1.5857, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0124956369400024, |
|
"rewards/margins": 0.24561452865600586, |
|
"rewards/rejected": -1.2581101655960083, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49372161367886724, |
|
"grad_norm": 11.510492324829102, |
|
"learning_rate": 2.983139794488883e-07, |
|
"logits/chosen": -1.1703720092773438, |
|
"logits/rejected": -1.0775160789489746, |
|
"logps/chosen": -0.4314059615135193, |
|
"logps/rejected": -0.39874231815338135, |
|
"loss": 1.6011, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.078514814376831, |
|
"rewards/margins": -0.08165915310382843, |
|
"rewards/rejected": -0.9968557953834534, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.49585893668180603, |
|
"grad_norm": 5.559615612030029, |
|
"learning_rate": 2.964779017907287e-07, |
|
"logits/chosen": -1.0301462411880493, |
|
"logits/rejected": -1.0727837085723877, |
|
"logps/chosen": -0.40483570098876953, |
|
"logps/rejected": -0.45073747634887695, |
|
"loss": 1.5311, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0120892524719238, |
|
"rewards/margins": 0.114754319190979, |
|
"rewards/rejected": -1.1268435716629028, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.49799625968474487, |
|
"grad_norm": 4.339590549468994, |
|
"learning_rate": 2.9463922369965915e-07, |
|
"logits/chosen": -0.9359559416770935, |
|
"logits/rejected": -0.9321252703666687, |
|
"logps/chosen": -0.35180893540382385, |
|
"logps/rejected": -0.536721408367157, |
|
"loss": 1.5723, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.8795222640037537, |
|
"rewards/margins": 0.4622812271118164, |
|
"rewards/rejected": -1.3418035507202148, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5001335826876837, |
|
"grad_norm": 4.583502292633057, |
|
"learning_rate": 2.927980480494938e-07, |
|
"logits/chosen": -1.0992170572280884, |
|
"logits/rejected": -1.0070428848266602, |
|
"logps/chosen": -0.36073118448257446, |
|
"logps/rejected": -0.3927144706249237, |
|
"loss": 1.5751, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9018279314041138, |
|
"rewards/margins": 0.07995828986167908, |
|
"rewards/rejected": -0.9817862510681152, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5022709056906225, |
|
"grad_norm": 4.529286861419678, |
|
"learning_rate": 2.909544778537844e-07, |
|
"logits/chosen": -1.1656326055526733, |
|
"logits/rejected": -1.0929317474365234, |
|
"logps/chosen": -0.3711916506290436, |
|
"logps/rejected": -0.3863615393638611, |
|
"loss": 1.5221, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.9279791116714478, |
|
"rewards/margins": 0.03792468085885048, |
|
"rewards/rejected": -0.9659038186073303, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5044082286935613, |
|
"grad_norm": 10.745676040649414, |
|
"learning_rate": 2.8910861626005773e-07, |
|
"logits/chosen": -1.058958649635315, |
|
"logits/rejected": -0.9348481297492981, |
|
"logps/chosen": -0.31637054681777954, |
|
"logps/rejected": -0.33828607201576233, |
|
"loss": 1.4931, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.7909263968467712, |
|
"rewards/margins": 0.05478885397315025, |
|
"rewards/rejected": -0.8457151055335999, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.5065455516965002, |
|
"grad_norm": 4.350003242492676, |
|
"learning_rate": 2.872605665440436e-07, |
|
"logits/chosen": -1.155067801475525, |
|
"logits/rejected": -1.044098138809204, |
|
"logps/chosen": -0.4006834626197815, |
|
"logps/rejected": -0.3987181484699249, |
|
"loss": 1.5417, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.001708745956421, |
|
"rewards/margins": -0.004913315176963806, |
|
"rewards/rejected": -0.9967952966690063, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.508682874699439, |
|
"grad_norm": 4.5963358879089355, |
|
"learning_rate": 2.8541043210389726e-07, |
|
"logits/chosen": -0.9011512994766235, |
|
"logits/rejected": -0.9799545407295227, |
|
"logps/chosen": -0.30112171173095703, |
|
"logps/rejected": -0.4484432339668274, |
|
"loss": 1.4859, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7528042793273926, |
|
"rewards/margins": 0.36830389499664307, |
|
"rewards/rejected": -1.1211082935333252, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5108201977023777, |
|
"grad_norm": 5.283090591430664, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": -1.2146248817443848, |
|
"logits/rejected": -1.2574325799942017, |
|
"logps/chosen": -0.3450472354888916, |
|
"logps/rejected": -0.4763634204864502, |
|
"loss": 1.4888, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.862618088722229, |
|
"rewards/margins": 0.32829049229621887, |
|
"rewards/rejected": -1.1909085512161255, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.5129575207053166, |
|
"grad_norm": 6.43093729019165, |
|
"learning_rate": 2.817043232212371e-07, |
|
"logits/chosen": -1.2071186304092407, |
|
"logits/rejected": -1.1450533866882324, |
|
"logps/chosen": -0.3647967576980591, |
|
"logps/rejected": -0.4625674784183502, |
|
"loss": 1.5268, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9119919538497925, |
|
"rewards/margins": 0.24442686140537262, |
|
"rewards/rejected": -1.156418800354004, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5150948437082554, |
|
"grad_norm": 9.717195510864258, |
|
"learning_rate": 2.7984855613506106e-07, |
|
"logits/chosen": -1.1946227550506592, |
|
"logits/rejected": -1.1376502513885498, |
|
"logps/chosen": -0.29764774441719055, |
|
"logps/rejected": -0.302202045917511, |
|
"loss": 1.5322, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.74411940574646, |
|
"rewards/margins": 0.01138581894338131, |
|
"rewards/rejected": -0.7555052042007446, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.5172321667111942, |
|
"grad_norm": 6.479928493499756, |
|
"learning_rate": 2.7799111902582693e-07, |
|
"logits/chosen": -1.2251317501068115, |
|
"logits/rejected": -1.0719342231750488, |
|
"logps/chosen": -0.3156971037387848, |
|
"logps/rejected": -0.2400185763835907, |
|
"loss": 1.5725, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.789242684841156, |
|
"rewards/margins": -0.18919625878334045, |
|
"rewards/rejected": -0.6000465154647827, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.5193694897141331, |
|
"grad_norm": 8.900924682617188, |
|
"learning_rate": 2.761321158169134e-07, |
|
"logits/chosen": -1.0849740505218506, |
|
"logits/rejected": -1.1516170501708984, |
|
"logps/chosen": -0.35676899552345276, |
|
"logps/rejected": -0.5772523283958435, |
|
"loss": 1.4924, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8919224739074707, |
|
"rewards/margins": 0.5512083172798157, |
|
"rewards/rejected": -1.4431307315826416, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.5215068127170719, |
|
"grad_norm": 5.021285533905029, |
|
"learning_rate": 2.74271650519322e-07, |
|
"logits/chosen": -1.1510225534439087, |
|
"logits/rejected": -1.1225014925003052, |
|
"logps/chosen": -0.3502144515514374, |
|
"logps/rejected": -0.48459944128990173, |
|
"loss": 1.5383, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.8755362033843994, |
|
"rewards/margins": 0.3359624445438385, |
|
"rewards/rejected": -1.2114986181259155, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5236441357200107, |
|
"grad_norm": 6.033867359161377, |
|
"learning_rate": 2.7240982722585837e-07, |
|
"logits/chosen": -1.0076422691345215, |
|
"logits/rejected": -1.0045421123504639, |
|
"logps/chosen": -0.3226780295372009, |
|
"logps/rejected": -0.37450891733169556, |
|
"loss": 1.5745, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8066950440406799, |
|
"rewards/margins": 0.1295773833990097, |
|
"rewards/rejected": -0.9362723231315613, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5257814587229495, |
|
"grad_norm": 7.610095977783203, |
|
"learning_rate": 2.705467501053076e-07, |
|
"logits/chosen": -1.3070695400238037, |
|
"logits/rejected": -1.360163688659668, |
|
"logps/chosen": -0.4132193624973297, |
|
"logps/rejected": -0.5460841059684753, |
|
"loss": 1.5482, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.033048391342163, |
|
"rewards/margins": 0.3321617841720581, |
|
"rewards/rejected": -1.3652101755142212, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5279187817258884, |
|
"grad_norm": 5.6094770431518555, |
|
"learning_rate": 2.6868252339660607e-07, |
|
"logits/chosen": -0.9480774998664856, |
|
"logits/rejected": -0.9445351362228394, |
|
"logps/chosen": -0.5733252763748169, |
|
"logps/rejected": -1.0633982419967651, |
|
"loss": 1.5284, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.433313250541687, |
|
"rewards/margins": 1.225182294845581, |
|
"rewards/rejected": -2.6584954261779785, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5300561047288271, |
|
"grad_norm": 16.856760025024414, |
|
"learning_rate": 2.6681725140300995e-07, |
|
"logits/chosen": -1.1925255060195923, |
|
"logits/rejected": -1.1129463911056519, |
|
"logps/chosen": -0.28040605783462524, |
|
"logps/rejected": -0.36458098888397217, |
|
"loss": 1.5368, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7010151743888855, |
|
"rewards/margins": 0.21043738722801208, |
|
"rewards/rejected": -0.9114525318145752, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5321934277317659, |
|
"grad_norm": 6.675515174865723, |
|
"learning_rate": 2.6495103848625854e-07, |
|
"logits/chosen": -1.2934060096740723, |
|
"logits/rejected": -1.17371666431427, |
|
"logps/chosen": -0.3621112108230591, |
|
"logps/rejected": -0.4810316562652588, |
|
"loss": 1.5586, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9052779674530029, |
|
"rewards/margins": 0.29730114340782166, |
|
"rewards/rejected": -1.2025790214538574, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5343307507347048, |
|
"grad_norm": 15.49481201171875, |
|
"learning_rate": 2.63083989060736e-07, |
|
"logits/chosen": -1.019038438796997, |
|
"logits/rejected": -0.9999558925628662, |
|
"logps/chosen": -0.431622713804245, |
|
"logps/rejected": -0.620360255241394, |
|
"loss": 1.55, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0790568590164185, |
|
"rewards/margins": 0.47184401750564575, |
|
"rewards/rejected": -1.550900936126709, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5364680737376436, |
|
"grad_norm": 4.874868392944336, |
|
"learning_rate": 2.6121620758762875e-07, |
|
"logits/chosen": -1.1522804498672485, |
|
"logits/rejected": -1.144692301750183, |
|
"logps/chosen": -0.40826401114463806, |
|
"logps/rejected": -0.4715278744697571, |
|
"loss": 1.5317, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0206599235534668, |
|
"rewards/margins": 0.15815965831279755, |
|
"rewards/rejected": -1.1788195371627808, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5386053967405824, |
|
"grad_norm": 4.113776206970215, |
|
"learning_rate": 2.593477985690815e-07, |
|
"logits/chosen": -1.0712709426879883, |
|
"logits/rejected": -1.1005451679229736, |
|
"logps/chosen": -0.5715100765228271, |
|
"logps/rejected": -0.6493417620658875, |
|
"loss": 1.5129, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.4287753105163574, |
|
"rewards/margins": 0.19457919895648956, |
|
"rewards/rejected": -1.6233545541763306, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5407427197435213, |
|
"grad_norm": 17.1635799407959, |
|
"learning_rate": 2.574788665423496e-07, |
|
"logits/chosen": -0.9928967356681824, |
|
"logits/rejected": -0.9838371276855469, |
|
"logps/chosen": -0.3351861536502838, |
|
"logps/rejected": -0.3290242850780487, |
|
"loss": 1.5488, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.8379653692245483, |
|
"rewards/margins": -0.01540469378232956, |
|
"rewards/rejected": -0.822560727596283, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5428800427464601, |
|
"grad_norm": 8.28346061706543, |
|
"learning_rate": 2.5560951607395126e-07, |
|
"logits/chosen": -1.1226708889007568, |
|
"logits/rejected": -1.0680346488952637, |
|
"logps/chosen": -0.3342251777648926, |
|
"logps/rejected": -0.3822442591190338, |
|
"loss": 1.5604, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8355628252029419, |
|
"rewards/margins": 0.12004776298999786, |
|
"rewards/rejected": -0.9556106925010681, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5450173657493989, |
|
"grad_norm": 12.173513412475586, |
|
"learning_rate": 2.537398517538159e-07, |
|
"logits/chosen": -1.1180171966552734, |
|
"logits/rejected": -1.1232236623764038, |
|
"logps/chosen": -0.3291173279285431, |
|
"logps/rejected": -0.5288177132606506, |
|
"loss": 1.4907, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8227933645248413, |
|
"rewards/margins": 0.49925094842910767, |
|
"rewards/rejected": -1.3220442533493042, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5471546887523377, |
|
"grad_norm": 4.996148109436035, |
|
"learning_rate": 2.518699781894332e-07, |
|
"logits/chosen": -1.0864285230636597, |
|
"logits/rejected": -1.0856531858444214, |
|
"logps/chosen": -0.46618932485580444, |
|
"logps/rejected": -0.9816129803657532, |
|
"loss": 1.5076, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.165473222732544, |
|
"rewards/margins": 1.2885593175888062, |
|
"rewards/rejected": -2.4540326595306396, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5492920117552765, |
|
"grad_norm": 5.049304008483887, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.9920480251312256, |
|
"logits/rejected": -0.897991418838501, |
|
"logps/chosen": -0.3009772002696991, |
|
"logps/rejected": -0.3982135057449341, |
|
"loss": 1.6439, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7524430155754089, |
|
"rewards/margins": 0.24309074878692627, |
|
"rewards/rejected": -0.9955337643623352, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5514293347582153, |
|
"grad_norm": 4.575491428375244, |
|
"learning_rate": 2.4813002181056676e-07, |
|
"logits/chosen": -1.0483980178833008, |
|
"logits/rejected": -1.040475845336914, |
|
"logps/chosen": -0.2760324478149414, |
|
"logps/rejected": -0.5634697675704956, |
|
"loss": 1.5932, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6900811791419983, |
|
"rewards/margins": 0.718593180179596, |
|
"rewards/rejected": -1.4086742401123047, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5535666577611541, |
|
"grad_norm": 4.031703948974609, |
|
"learning_rate": 2.4626014824618413e-07, |
|
"logits/chosen": -1.2272746562957764, |
|
"logits/rejected": -1.2073853015899658, |
|
"logps/chosen": -0.4353184700012207, |
|
"logps/rejected": -0.5070162415504456, |
|
"loss": 1.5153, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0882960557937622, |
|
"rewards/margins": 0.17924460768699646, |
|
"rewards/rejected": -1.2675405740737915, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.555703980764093, |
|
"grad_norm": 8.027057647705078, |
|
"learning_rate": 2.4439048392604877e-07, |
|
"logits/chosen": -0.953754186630249, |
|
"logits/rejected": -0.9900184869766235, |
|
"logps/chosen": -0.2740909159183502, |
|
"logps/rejected": -0.3458973467350006, |
|
"loss": 1.5291, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6852273344993591, |
|
"rewards/margins": 0.17951609194278717, |
|
"rewards/rejected": -0.8647434711456299, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5578413037670318, |
|
"grad_norm": 7.306129455566406, |
|
"learning_rate": 2.4252113345765043e-07, |
|
"logits/chosen": -0.9035928845405579, |
|
"logits/rejected": -0.8614873290061951, |
|
"logps/chosen": -0.2865443229675293, |
|
"logps/rejected": -0.32079729437828064, |
|
"loss": 1.5665, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7163608074188232, |
|
"rewards/margins": 0.0856325626373291, |
|
"rewards/rejected": -0.8019933104515076, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5599786267699706, |
|
"grad_norm": 3.2643260955810547, |
|
"learning_rate": 2.406522014309186e-07, |
|
"logits/chosen": -1.1808401346206665, |
|
"logits/rejected": -1.1874431371688843, |
|
"logps/chosen": -0.5122575163841248, |
|
"logps/rejected": -0.8233806490898132, |
|
"loss": 1.5851, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2806435823440552, |
|
"rewards/margins": 0.7778077721595764, |
|
"rewards/rejected": -2.0584514141082764, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5621159497729095, |
|
"grad_norm": 3.0484321117401123, |
|
"learning_rate": 2.3878379241237134e-07, |
|
"logits/chosen": -1.1015522480010986, |
|
"logits/rejected": -1.1043397188186646, |
|
"logps/chosen": -0.5216892957687378, |
|
"logps/rejected": -0.5477871298789978, |
|
"loss": 1.4888, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3042232990264893, |
|
"rewards/margins": 0.06524449586868286, |
|
"rewards/rejected": -1.3694677352905273, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5642532727758482, |
|
"grad_norm": 8.040013313293457, |
|
"learning_rate": 2.3691601093926402e-07, |
|
"logits/chosen": -1.0679914951324463, |
|
"logits/rejected": -1.041649580001831, |
|
"logps/chosen": -0.4239467978477478, |
|
"logps/rejected": -0.427889347076416, |
|
"loss": 1.6854, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0598669052124023, |
|
"rewards/margins": 0.009856484830379486, |
|
"rewards/rejected": -1.06972336769104, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.566390595778787, |
|
"grad_norm": 10.049110412597656, |
|
"learning_rate": 2.3504896151374144e-07, |
|
"logits/chosen": -1.1767913103103638, |
|
"logits/rejected": -1.2240692377090454, |
|
"logps/chosen": -0.4159534275531769, |
|
"logps/rejected": -0.5419010519981384, |
|
"loss": 1.5352, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0398834943771362, |
|
"rewards/margins": 0.3148690462112427, |
|
"rewards/rejected": -1.3547526597976685, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5685279187817259, |
|
"grad_norm": 8.153444290161133, |
|
"learning_rate": 2.3318274859699008e-07, |
|
"logits/chosen": -1.063308596611023, |
|
"logits/rejected": -1.164639949798584, |
|
"logps/chosen": -0.2907513678073883, |
|
"logps/rejected": -0.5243133306503296, |
|
"loss": 1.6477, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7268784046173096, |
|
"rewards/margins": 0.5839048624038696, |
|
"rewards/rejected": -1.3107832670211792, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.5706652417846647, |
|
"grad_norm": 8.460691452026367, |
|
"learning_rate": 2.3131747660339394e-07, |
|
"logits/chosen": -1.2165307998657227, |
|
"logits/rejected": -1.1944361925125122, |
|
"logps/chosen": -0.5601080656051636, |
|
"logps/rejected": -0.47026118636131287, |
|
"loss": 1.5634, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.4002699851989746, |
|
"rewards/margins": -0.2246171087026596, |
|
"rewards/rejected": -1.1756529808044434, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5728025647876035, |
|
"grad_norm": 13.028862953186035, |
|
"learning_rate": 2.2945324989469243e-07, |
|
"logits/chosen": -1.0125137567520142, |
|
"logits/rejected": -0.9787082672119141, |
|
"logps/chosen": -0.3832467794418335, |
|
"logps/rejected": -0.7757288217544556, |
|
"loss": 1.4993, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.958116888999939, |
|
"rewards/margins": 0.9812053442001343, |
|
"rewards/rejected": -1.9393221139907837, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5749398877905424, |
|
"grad_norm": 10.404675483703613, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"logits/chosen": -1.1808825731277466, |
|
"logits/rejected": -1.1194167137145996, |
|
"logps/chosen": -0.43756401538848877, |
|
"logps/rejected": -0.3946504294872284, |
|
"loss": 1.6378, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -1.0939099788665771, |
|
"rewards/margins": -0.10728396475315094, |
|
"rewards/rejected": -0.9866260290145874, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.5770772107934812, |
|
"grad_norm": 7.459733963012695, |
|
"learning_rate": 2.2572834948067795e-07, |
|
"logits/chosen": -0.9175713062286377, |
|
"logits/rejected": -0.9572230577468872, |
|
"logps/chosen": -0.2940235137939453, |
|
"logps/rejected": -0.3464244604110718, |
|
"loss": 1.6275, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.7350587844848633, |
|
"rewards/margins": 0.13100232183933258, |
|
"rewards/rejected": -0.8660610914230347, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.57921453379642, |
|
"grad_norm": 6.2825493812561035, |
|
"learning_rate": 2.2386788418308665e-07, |
|
"logits/chosen": -1.0154887437820435, |
|
"logits/rejected": -1.0528539419174194, |
|
"logps/chosen": -0.5251376628875732, |
|
"logps/rejected": -0.7547603845596313, |
|
"loss": 1.5214, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3128442764282227, |
|
"rewards/margins": 0.5740568041801453, |
|
"rewards/rejected": -1.8869010210037231, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5813518567993589, |
|
"grad_norm": 4.264716148376465, |
|
"learning_rate": 2.2200888097417302e-07, |
|
"logits/chosen": -1.043276071548462, |
|
"logits/rejected": -0.9186975955963135, |
|
"logps/chosen": -0.39481961727142334, |
|
"logps/rejected": -0.5242050886154175, |
|
"loss": 1.5337, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9870489835739136, |
|
"rewards/margins": 0.32346370816230774, |
|
"rewards/rejected": -1.310512661933899, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.5834891798022976, |
|
"grad_norm": 4.6611199378967285, |
|
"learning_rate": 2.2015144386493895e-07, |
|
"logits/chosen": -0.9979356527328491, |
|
"logits/rejected": -0.9526849985122681, |
|
"logps/chosen": -0.39222562313079834, |
|
"logps/rejected": -0.46204763650894165, |
|
"loss": 1.4999, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9805639982223511, |
|
"rewards/margins": 0.1745550036430359, |
|
"rewards/rejected": -1.1551190614700317, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.5856265028052364, |
|
"grad_norm": 4.630171298980713, |
|
"learning_rate": 2.1829567677876297e-07, |
|
"logits/chosen": -0.9676195979118347, |
|
"logits/rejected": -0.9666755199432373, |
|
"logps/chosen": -0.35061851143836975, |
|
"logps/rejected": -0.35894879698753357, |
|
"loss": 1.609, |
|
"rewards/accuracies": 0.125, |
|
"rewards/chosen": -0.8765462636947632, |
|
"rewards/margins": 0.020825695246458054, |
|
"rewards/rejected": -0.8973720073699951, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.5877638258081752, |
|
"grad_norm": 11.871662139892578, |
|
"learning_rate": 2.164416835455862e-07, |
|
"logits/chosen": -0.7467477321624756, |
|
"logits/rejected": -0.6393258571624756, |
|
"logps/chosen": -0.502008318901062, |
|
"logps/rejected": -0.44732266664505005, |
|
"loss": 1.5655, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.2550209760665894, |
|
"rewards/margins": -0.13671430945396423, |
|
"rewards/rejected": -1.1183066368103027, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5899011488111141, |
|
"grad_norm": 3.6022424697875977, |
|
"learning_rate": 2.1458956789610277e-07, |
|
"logits/chosen": -1.2034939527511597, |
|
"logits/rejected": -1.0202971696853638, |
|
"logps/chosen": -0.3793608248233795, |
|
"logps/rejected": -0.33719444274902344, |
|
"loss": 1.5627, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9484022259712219, |
|
"rewards/margins": -0.10541604459285736, |
|
"rewards/rejected": -0.8429861068725586, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5920384718140529, |
|
"grad_norm": 6.574893474578857, |
|
"learning_rate": 2.1273943345595635e-07, |
|
"logits/chosen": -1.2551283836364746, |
|
"logits/rejected": -1.2000017166137695, |
|
"logps/chosen": -0.4082186818122864, |
|
"logps/rejected": -0.6158214807510376, |
|
"loss": 1.5529, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0205466747283936, |
|
"rewards/margins": 0.51900714635849, |
|
"rewards/rejected": -1.5395537614822388, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.5941757948169917, |
|
"grad_norm": 5.325026035308838, |
|
"learning_rate": 2.1089138373994222e-07, |
|
"logits/chosen": -1.0981683731079102, |
|
"logits/rejected": -1.093741774559021, |
|
"logps/chosen": -0.4150196313858032, |
|
"logps/rejected": -0.5339372754096985, |
|
"loss": 1.5688, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0375490188598633, |
|
"rewards/margins": 0.2972941994667053, |
|
"rewards/rejected": -1.3348432779312134, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.5963131178199306, |
|
"grad_norm": 17.316631317138672, |
|
"learning_rate": 2.0904552214621556e-07, |
|
"logits/chosen": -1.1414576768875122, |
|
"logits/rejected": -1.1112793684005737, |
|
"logps/chosen": -0.6599245071411133, |
|
"logps/rejected": -0.3380126357078552, |
|
"loss": 1.6195, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.6498112678527832, |
|
"rewards/margins": -0.8047796487808228, |
|
"rewards/rejected": -0.8450315594673157, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.5984504408228694, |
|
"grad_norm": 4.380247116088867, |
|
"learning_rate": 2.072019519505062e-07, |
|
"logits/chosen": -0.9662964940071106, |
|
"logits/rejected": -0.9849826693534851, |
|
"logps/chosen": -0.36699962615966797, |
|
"logps/rejected": -0.3455093204975128, |
|
"loss": 1.5053, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.9174990653991699, |
|
"rewards/margins": -0.053725723177194595, |
|
"rewards/rejected": -0.8637734055519104, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6005877638258081, |
|
"grad_norm": 7.828254699707031, |
|
"learning_rate": 2.0536077630034085e-07, |
|
"logits/chosen": -0.9694425463676453, |
|
"logits/rejected": -0.8208516240119934, |
|
"logps/chosen": -0.4557761251926422, |
|
"logps/rejected": -0.6782093048095703, |
|
"loss": 1.6184, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1394402980804443, |
|
"rewards/margins": 0.5560829043388367, |
|
"rewards/rejected": -1.6955231428146362, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.602725086828747, |
|
"grad_norm": 7.952332496643066, |
|
"learning_rate": 2.0352209820927135e-07, |
|
"logits/chosen": -0.9816855192184448, |
|
"logits/rejected": -0.8845440149307251, |
|
"logps/chosen": -0.3230597972869873, |
|
"logps/rejected": -0.4072348475456238, |
|
"loss": 1.5012, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8076494932174683, |
|
"rewards/margins": 0.21043761074543, |
|
"rewards/rejected": -1.0180871486663818, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6048624098316858, |
|
"grad_norm": 8.255958557128906, |
|
"learning_rate": 2.0168602055111173e-07, |
|
"logits/chosen": -1.1203254461288452, |
|
"logits/rejected": -1.107031226158142, |
|
"logps/chosen": -0.6907448768615723, |
|
"logps/rejected": -0.5776211023330688, |
|
"loss": 1.6044, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.7268621921539307, |
|
"rewards/margins": -0.28280916810035706, |
|
"rewards/rejected": -1.4440529346466064, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6069997328346246, |
|
"grad_norm": 11.462747573852539, |
|
"learning_rate": 1.998526460541818e-07, |
|
"logits/chosen": -1.0083472728729248, |
|
"logits/rejected": -0.9932087659835815, |
|
"logps/chosen": -0.5044468641281128, |
|
"logps/rejected": -0.4264739453792572, |
|
"loss": 1.6586, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2611171007156372, |
|
"rewards/margins": -0.19493228197097778, |
|
"rewards/rejected": -1.0661848783493042, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6091370558375635, |
|
"grad_norm": 4.765872478485107, |
|
"learning_rate": 1.980220772955602e-07, |
|
"logits/chosen": -1.035547137260437, |
|
"logits/rejected": -1.0857359170913696, |
|
"logps/chosen": -0.41963931918144226, |
|
"logps/rejected": -0.592042863368988, |
|
"loss": 1.5091, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.049098253250122, |
|
"rewards/margins": 0.43100887537002563, |
|
"rewards/rejected": -1.4801071882247925, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6112743788405023, |
|
"grad_norm": 7.985474586486816, |
|
"learning_rate": 1.961944166953445e-07, |
|
"logits/chosen": -0.8251385688781738, |
|
"logits/rejected": -0.9071054458618164, |
|
"logps/chosen": -0.3732144236564636, |
|
"logps/rejected": -0.4104728400707245, |
|
"loss": 1.5169, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9330360889434814, |
|
"rewards/margins": 0.09314604848623276, |
|
"rewards/rejected": -1.0261821746826172, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6134117018434411, |
|
"grad_norm": 7.316262722015381, |
|
"learning_rate": 1.9436976651092142e-07, |
|
"logits/chosen": -0.9544340372085571, |
|
"logits/rejected": -0.898868203163147, |
|
"logps/chosen": -0.35007724165916443, |
|
"logps/rejected": -0.495257705450058, |
|
"loss": 1.6104, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8751930594444275, |
|
"rewards/margins": 0.3629511594772339, |
|
"rewards/rejected": -1.2381441593170166, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.6155490248463799, |
|
"grad_norm": 3.980193614959717, |
|
"learning_rate": 1.9254822883124517e-07, |
|
"logits/chosen": -1.2356715202331543, |
|
"logits/rejected": -1.1466394662857056, |
|
"logps/chosen": -0.418282151222229, |
|
"logps/rejected": -0.5082724094390869, |
|
"loss": 1.5123, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0457054376602173, |
|
"rewards/margins": 0.22497567534446716, |
|
"rewards/rejected": -1.2706811428070068, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.6176863478493188, |
|
"grad_norm": 6.745012283325195, |
|
"learning_rate": 1.9072990557112564e-07, |
|
"logits/chosen": -1.2313592433929443, |
|
"logits/rejected": -1.1524139642715454, |
|
"logps/chosen": -0.34664469957351685, |
|
"logps/rejected": -0.5482650399208069, |
|
"loss": 1.5042, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8666118383407593, |
|
"rewards/margins": 0.504050612449646, |
|
"rewards/rejected": -1.3706625699996948, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.6198236708522575, |
|
"grad_norm": 9.481700897216797, |
|
"learning_rate": 1.8891489846552644e-07, |
|
"logits/chosen": -1.081266164779663, |
|
"logits/rejected": -1.085394263267517, |
|
"logps/chosen": -0.37148210406303406, |
|
"logps/rejected": -0.5117133855819702, |
|
"loss": 1.5743, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.928705096244812, |
|
"rewards/margins": 0.35057833790779114, |
|
"rewards/rejected": -1.2792835235595703, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6219609938551963, |
|
"grad_norm": 5.410580635070801, |
|
"learning_rate": 1.8710330906387286e-07, |
|
"logits/chosen": -1.0288105010986328, |
|
"logits/rejected": -1.0116032361984253, |
|
"logps/chosen": -0.3644832670688629, |
|
"logps/rejected": -0.4690641462802887, |
|
"loss": 1.4934, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9112080931663513, |
|
"rewards/margins": 0.261452317237854, |
|
"rewards/rejected": -1.1726603507995605, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.6240983168581352, |
|
"grad_norm": 12.723451614379883, |
|
"learning_rate": 1.8529523872436977e-07, |
|
"logits/chosen": -1.0818991661071777, |
|
"logits/rejected": -1.0822491645812988, |
|
"logps/chosen": -0.2900945246219635, |
|
"logps/rejected": -0.44144943356513977, |
|
"loss": 1.6044, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.7252362370491028, |
|
"rewards/margins": 0.37838736176490784, |
|
"rewards/rejected": -1.103623628616333, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.626235639861074, |
|
"grad_norm": 18.391582489013672, |
|
"learning_rate": 1.8349078860833124e-07, |
|
"logits/chosen": -1.2308346033096313, |
|
"logits/rejected": -1.1760648488998413, |
|
"logps/chosen": -0.6205483675003052, |
|
"logps/rejected": -0.3508188724517822, |
|
"loss": 1.6365, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.5513708591461182, |
|
"rewards/margins": -0.6743236780166626, |
|
"rewards/rejected": -0.8770472407341003, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.6283729628640128, |
|
"grad_norm": 4.619331359863281, |
|
"learning_rate": 1.8169005967452e-07, |
|
"logits/chosen": -1.2816352844238281, |
|
"logits/rejected": -1.2922104597091675, |
|
"logps/chosen": -0.3991982340812683, |
|
"logps/rejected": -0.504127562046051, |
|
"loss": 1.5704, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9979956150054932, |
|
"rewards/margins": 0.2623233199119568, |
|
"rewards/rejected": -1.2603188753128052, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6305102858669517, |
|
"grad_norm": 5.379226207733154, |
|
"learning_rate": 1.7989315267349933e-07, |
|
"logits/chosen": -1.0375932455062866, |
|
"logits/rejected": -0.987227737903595, |
|
"logps/chosen": -0.5034650564193726, |
|
"logps/rejected": -0.6317480206489563, |
|
"loss": 1.5085, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2586627006530762, |
|
"rewards/margins": 0.3207073211669922, |
|
"rewards/rejected": -1.5793699026107788, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6326476088698905, |
|
"grad_norm": 15.063772201538086, |
|
"learning_rate": 1.781001681419957e-07, |
|
"logits/chosen": -1.0075099468231201, |
|
"logits/rejected": -0.9762495160102844, |
|
"logps/chosen": -0.5012757778167725, |
|
"logps/rejected": -0.4941572844982147, |
|
"loss": 1.4992, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2531894445419312, |
|
"rewards/margins": -0.01779627427458763, |
|
"rewards/rejected": -1.2353932857513428, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6347849318728293, |
|
"grad_norm": 6.187438488006592, |
|
"learning_rate": 1.763112063972739e-07, |
|
"logits/chosen": -1.086329460144043, |
|
"logits/rejected": -1.003612995147705, |
|
"logps/chosen": -0.43468421697616577, |
|
"logps/rejected": -0.623386800289154, |
|
"loss": 1.5243, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0867105722427368, |
|
"rewards/margins": 0.4717563986778259, |
|
"rewards/rejected": -1.5584670305252075, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6369222548757681, |
|
"grad_norm": 16.858556747436523, |
|
"learning_rate": 1.745263675315245e-07, |
|
"logits/chosen": -0.9435803890228271, |
|
"logits/rejected": -0.9401760697364807, |
|
"logps/chosen": -0.41162610054016113, |
|
"logps/rejected": -0.7601633667945862, |
|
"loss": 1.5119, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0290653705596924, |
|
"rewards/margins": 0.871343195438385, |
|
"rewards/rejected": -1.9004085063934326, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6390595778787069, |
|
"grad_norm": 9.424429893493652, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": -1.1454746723175049, |
|
"logits/rejected": -1.0624852180480957, |
|
"logps/chosen": -0.5017335414886475, |
|
"logps/rejected": -0.5628975629806519, |
|
"loss": 1.6427, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2543339729309082, |
|
"rewards/margins": 0.1529100388288498, |
|
"rewards/rejected": -1.4072438478469849, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6411969008816457, |
|
"grad_norm": 5.96793794631958, |
|
"learning_rate": 1.7096945764674398e-07, |
|
"logits/chosen": -0.9207834005355835, |
|
"logits/rejected": -0.9202168583869934, |
|
"logps/chosen": -0.3956447243690491, |
|
"logps/rejected": -0.40050509572029114, |
|
"loss": 1.6262, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9891117811203003, |
|
"rewards/margins": 0.012150941416621208, |
|
"rewards/rejected": -1.0012627840042114, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6433342238845846, |
|
"grad_norm": 5.713474750518799, |
|
"learning_rate": 1.6919758563638502e-07, |
|
"logits/chosen": -0.9401556253433228, |
|
"logits/rejected": -0.8605173826217651, |
|
"logps/chosen": -0.4167335331439972, |
|
"logps/rejected": -0.5897922515869141, |
|
"loss": 1.527, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.041833758354187, |
|
"rewards/margins": 0.4326467216014862, |
|
"rewards/rejected": -1.4744806289672852, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6454715468875234, |
|
"grad_norm": 11.05538272857666, |
|
"learning_rate": 1.674302345112083e-07, |
|
"logits/chosen": -1.0368751287460327, |
|
"logits/rejected": -1.1599576473236084, |
|
"logps/chosen": -0.42875924706459045, |
|
"logps/rejected": -0.7052878737449646, |
|
"loss": 1.4713, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.071898102760315, |
|
"rewards/margins": 0.6913214325904846, |
|
"rewards/rejected": -1.7632195949554443, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6476088698904622, |
|
"grad_norm": 7.134030818939209, |
|
"learning_rate": 1.656675031542925e-07, |
|
"logits/chosen": -1.1293184757232666, |
|
"logits/rejected": -1.1065864562988281, |
|
"logps/chosen": -0.4180205166339874, |
|
"logps/rejected": -0.4501388669013977, |
|
"loss": 1.5318, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.045051097869873, |
|
"rewards/margins": 0.08029599487781525, |
|
"rewards/rejected": -1.1253471374511719, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.649746192893401, |
|
"grad_norm": 9.838216781616211, |
|
"learning_rate": 1.6390949019024118e-07, |
|
"logits/chosen": -1.2255228757858276, |
|
"logits/rejected": -1.0440919399261475, |
|
"logps/chosen": -0.34637248516082764, |
|
"logps/rejected": -0.332830548286438, |
|
"loss": 1.4995, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8659312725067139, |
|
"rewards/margins": -0.0338548980653286, |
|
"rewards/rejected": -0.8320763111114502, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6518835158963399, |
|
"grad_norm": 3.633300542831421, |
|
"learning_rate": 1.621562939796643e-07, |
|
"logits/chosen": -1.0595769882202148, |
|
"logits/rejected": -1.060139536857605, |
|
"logps/chosen": -0.4522903561592102, |
|
"logps/rejected": -0.7426398992538452, |
|
"loss": 1.5239, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.1307260990142822, |
|
"rewards/margins": 0.7258738279342651, |
|
"rewards/rejected": -1.8565996885299683, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6540208388992786, |
|
"grad_norm": 6.867697715759277, |
|
"learning_rate": 1.6040801261367493e-07, |
|
"logits/chosen": -1.1086713075637817, |
|
"logits/rejected": -1.1967618465423584, |
|
"logps/chosen": -0.42471063137054443, |
|
"logps/rejected": -0.4879637360572815, |
|
"loss": 1.4941, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0617766380310059, |
|
"rewards/margins": 0.15813271701335907, |
|
"rewards/rejected": -1.2199093103408813, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6561581619022174, |
|
"grad_norm": 9.760129928588867, |
|
"learning_rate": 1.5866474390840124e-07, |
|
"logits/chosen": -1.0829546451568604, |
|
"logits/rejected": -1.0814926624298096, |
|
"logps/chosen": -0.4132816791534424, |
|
"logps/rejected": -0.586501955986023, |
|
"loss": 1.5785, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.033204197883606, |
|
"rewards/margins": 0.43305063247680664, |
|
"rewards/rejected": -1.466254711151123, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6582954849051563, |
|
"grad_norm": 4.1318511962890625, |
|
"learning_rate": 1.569265853995137e-07, |
|
"logits/chosen": -0.9130831360816956, |
|
"logits/rejected": -1.0174808502197266, |
|
"logps/chosen": -0.3561224639415741, |
|
"logps/rejected": -0.47521257400512695, |
|
"loss": 1.4836, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8903061747550964, |
|
"rewards/margins": 0.2977251708507538, |
|
"rewards/rejected": -1.1880314350128174, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6604328079080951, |
|
"grad_norm": 5.264586925506592, |
|
"learning_rate": 1.5519363433676791e-07, |
|
"logits/chosen": -1.2580910921096802, |
|
"logits/rejected": -1.2599362134933472, |
|
"logps/chosen": -0.42455989122390747, |
|
"logps/rejected": -0.5628042221069336, |
|
"loss": 1.6118, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0613996982574463, |
|
"rewards/margins": 0.34561091661453247, |
|
"rewards/rejected": -1.4070106744766235, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6625701309110339, |
|
"grad_norm": 19.9966983795166, |
|
"learning_rate": 1.5346598767856345e-07, |
|
"logits/chosen": -0.8979520201683044, |
|
"logits/rejected": -0.9155081510543823, |
|
"logps/chosen": -0.335290789604187, |
|
"logps/rejected": -0.43495267629623413, |
|
"loss": 1.6585, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8382269740104675, |
|
"rewards/margins": 0.24915480613708496, |
|
"rewards/rejected": -1.0873818397521973, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6647074539139728, |
|
"grad_norm": 10.383206367492676, |
|
"learning_rate": 1.517437420865191e-07, |
|
"logits/chosen": -1.4863961935043335, |
|
"logits/rejected": -1.2771762609481812, |
|
"logps/chosen": -0.33609017729759216, |
|
"logps/rejected": -0.5989465713500977, |
|
"loss": 1.5329, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.8402254581451416, |
|
"rewards/margins": 0.6571409106254578, |
|
"rewards/rejected": -1.4973664283752441, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6668447769169116, |
|
"grad_norm": 4.85746431350708, |
|
"learning_rate": 1.500269939200648e-07, |
|
"logits/chosen": -1.1300700902938843, |
|
"logits/rejected": -1.1000648736953735, |
|
"logps/chosen": -0.4227021336555481, |
|
"logps/rejected": -0.3756105899810791, |
|
"loss": 1.572, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0567553043365479, |
|
"rewards/margins": -0.1177288144826889, |
|
"rewards/rejected": -0.939026415348053, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6689820999198504, |
|
"grad_norm": 15.065166473388672, |
|
"learning_rate": 1.4831583923104998e-07, |
|
"logits/chosen": -1.2687509059906006, |
|
"logits/rejected": -1.2387371063232422, |
|
"logps/chosen": -0.37272506952285767, |
|
"logps/rejected": -0.4628967344760895, |
|
"loss": 1.5731, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9318127632141113, |
|
"rewards/margins": 0.22542911767959595, |
|
"rewards/rejected": -1.157241940498352, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.6711194229227893, |
|
"grad_norm": 4.742990970611572, |
|
"learning_rate": 1.4661037375836987e-07, |
|
"logits/chosen": -1.1166198253631592, |
|
"logits/rejected": -1.1708546876907349, |
|
"logps/chosen": -0.40323004126548767, |
|
"logps/rejected": -0.49593961238861084, |
|
"loss": 1.5854, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.0080751180648804, |
|
"rewards/margins": 0.2317739725112915, |
|
"rewards/rejected": -1.2398490905761719, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.673256745925728, |
|
"grad_norm": 6.135270595550537, |
|
"learning_rate": 1.4491069292260866e-07, |
|
"logits/chosen": -1.0454214811325073, |
|
"logits/rejected": -0.9797170162200928, |
|
"logps/chosen": -0.4790341258049011, |
|
"logps/rejected": -0.5334821343421936, |
|
"loss": 1.5961, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.1975852251052856, |
|
"rewards/margins": 0.13612008094787598, |
|
"rewards/rejected": -1.3337054252624512, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6753940689286668, |
|
"grad_norm": 5.529047012329102, |
|
"learning_rate": 1.432168918207009e-07, |
|
"logits/chosen": -0.9548214673995972, |
|
"logits/rejected": -1.037723183631897, |
|
"logps/chosen": -0.34955471754074097, |
|
"logps/rejected": -0.6267892122268677, |
|
"loss": 1.5352, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.87388676404953, |
|
"rewards/margins": 0.6930862069129944, |
|
"rewards/rejected": -1.5669729709625244, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.6775313919316056, |
|
"grad_norm": 4.692312240600586, |
|
"learning_rate": 1.4152906522061047e-07, |
|
"logits/chosen": -1.0882840156555176, |
|
"logits/rejected": -1.0136744976043701, |
|
"logps/chosen": -0.31677815318107605, |
|
"logps/rejected": -0.42752817273139954, |
|
"loss": 1.522, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7919453978538513, |
|
"rewards/margins": 0.2768751382827759, |
|
"rewards/rejected": -1.0688204765319824, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6796687149345445, |
|
"grad_norm": 10.370941162109375, |
|
"learning_rate": 1.3984730755602903e-07, |
|
"logits/chosen": -1.161927580833435, |
|
"logits/rejected": -1.0617276430130005, |
|
"logps/chosen": -0.534487783908844, |
|
"logps/rejected": -0.6021491289138794, |
|
"loss": 1.5201, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3362195491790771, |
|
"rewards/margins": 0.1691533476114273, |
|
"rewards/rejected": -1.5053728818893433, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.6818060379374833, |
|
"grad_norm": 9.610735893249512, |
|
"learning_rate": 1.381717129210918e-07, |
|
"logits/chosen": -1.1923787593841553, |
|
"logits/rejected": -1.2188538312911987, |
|
"logps/chosen": -0.375224769115448, |
|
"logps/rejected": -0.7198653817176819, |
|
"loss": 1.5779, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9380618929862976, |
|
"rewards/margins": 0.8616017699241638, |
|
"rewards/rejected": -1.7996635437011719, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.6839433609404221, |
|
"grad_norm": 5.218975067138672, |
|
"learning_rate": 1.365023750651133e-07, |
|
"logits/chosen": -1.1556600332260132, |
|
"logits/rejected": -1.095563530921936, |
|
"logps/chosen": -0.37500467896461487, |
|
"logps/rejected": -0.4297294020652771, |
|
"loss": 1.5315, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9375116229057312, |
|
"rewards/margins": 0.1368117332458496, |
|
"rewards/rejected": -1.0743234157562256, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.686080683943361, |
|
"grad_norm": 6.690799236297607, |
|
"learning_rate": 1.3483938738734195e-07, |
|
"logits/chosen": -0.8860509395599365, |
|
"logits/rejected": -0.8542050123214722, |
|
"logps/chosen": -0.3055468201637268, |
|
"logps/rejected": -0.3592091202735901, |
|
"loss": 1.5345, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7638670802116394, |
|
"rewards/margins": 0.13415566086769104, |
|
"rewards/rejected": -0.8980227708816528, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.6882180069462998, |
|
"grad_norm": 9.405998229980469, |
|
"learning_rate": 1.3318284293173449e-07, |
|
"logits/chosen": -0.9992817640304565, |
|
"logits/rejected": -0.9634179472923279, |
|
"logps/chosen": -0.4300232529640198, |
|
"logps/rejected": -0.4082144498825073, |
|
"loss": 1.5484, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.075058102607727, |
|
"rewards/margins": -0.05452210083603859, |
|
"rewards/rejected": -1.0205360651016235, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.6903553299492385, |
|
"grad_norm": 5.07054328918457, |
|
"learning_rate": 1.3153283438175034e-07, |
|
"logits/chosen": -1.0605663061141968, |
|
"logits/rejected": -1.0907377004623413, |
|
"logps/chosen": -0.3852520287036896, |
|
"logps/rejected": -0.4656079113483429, |
|
"loss": 1.5678, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9631301164627075, |
|
"rewards/margins": 0.2008897364139557, |
|
"rewards/rejected": -1.1640198230743408, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.6924926529521774, |
|
"grad_norm": 7.885300159454346, |
|
"learning_rate": 1.2988945405516565e-07, |
|
"logits/chosen": -1.0609923601150513, |
|
"logits/rejected": -1.0959070920944214, |
|
"logps/chosen": -0.4137347638607025, |
|
"logps/rejected": -0.5460120439529419, |
|
"loss": 1.5155, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.034337043762207, |
|
"rewards/margins": 0.33069324493408203, |
|
"rewards/rejected": -1.3650301694869995, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.6946299759551162, |
|
"grad_norm": 5.987451553344727, |
|
"learning_rate": 1.2825279389890818e-07, |
|
"logits/chosen": -0.9947149753570557, |
|
"logits/rejected": -1.0909423828125, |
|
"logps/chosen": -0.4138132333755493, |
|
"logps/rejected": -0.4551887512207031, |
|
"loss": 1.4521, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0345330238342285, |
|
"rewards/margins": 0.10343889147043228, |
|
"rewards/rejected": -1.1379718780517578, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.696767298958055, |
|
"grad_norm": 5.460362434387207, |
|
"learning_rate": 1.2662294548391328e-07, |
|
"logits/chosen": -1.1488416194915771, |
|
"logits/rejected": -0.9424848556518555, |
|
"logps/chosen": -0.48600658774375916, |
|
"logps/rejected": -0.7618072628974915, |
|
"loss": 1.5489, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.215016484260559, |
|
"rewards/margins": 0.6895018219947815, |
|
"rewards/rejected": -1.9045181274414062, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.6989046219609939, |
|
"grad_norm": 10.205648422241211, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": -0.9662617444992065, |
|
"logits/rejected": -1.1120105981826782, |
|
"logps/chosen": -0.40102994441986084, |
|
"logps/rejected": -0.6069145798683167, |
|
"loss": 1.5435, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0025748014450073, |
|
"rewards/margins": 0.5147115588188171, |
|
"rewards/rejected": -1.5172864198684692, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.7010419449639327, |
|
"grad_norm": 8.57025146484375, |
|
"learning_rate": 1.2338404825076935e-07, |
|
"logits/chosen": -1.1456284523010254, |
|
"logits/rejected": -1.0507254600524902, |
|
"logps/chosen": -0.4316751956939697, |
|
"logps/rejected": -0.44297924637794495, |
|
"loss": 1.5203, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0791878700256348, |
|
"rewards/margins": 0.02826026827096939, |
|
"rewards/rejected": -1.1074482202529907, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.7031792679668715, |
|
"grad_norm": 9.405667304992676, |
|
"learning_rate": 1.2177518064852345e-07, |
|
"logits/chosen": -1.1795152425765991, |
|
"logits/rejected": -1.0673104524612427, |
|
"logps/chosen": -0.37221118807792664, |
|
"logps/rejected": -0.4177697002887726, |
|
"loss": 1.5138, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.930527925491333, |
|
"rewards/margins": 0.11389636248350143, |
|
"rewards/rejected": -1.044424295425415, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.7053165909698104, |
|
"grad_norm": 6.466906547546387, |
|
"learning_rate": 1.201734872092077e-07, |
|
"logits/chosen": -1.0346126556396484, |
|
"logits/rejected": -1.0439854860305786, |
|
"logps/chosen": -0.42790815234184265, |
|
"logps/rejected": -1.0795375108718872, |
|
"loss": 1.4512, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0697704553604126, |
|
"rewards/margins": 1.6290733814239502, |
|
"rewards/rejected": -2.6988439559936523, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7074539139727491, |
|
"grad_norm": 4.390846252441406, |
|
"learning_rate": 1.185790575473738e-07, |
|
"logits/chosen": -1.1390395164489746, |
|
"logits/rejected": -1.1135765314102173, |
|
"logps/chosen": -0.5658525824546814, |
|
"logps/rejected": -0.5814335942268372, |
|
"loss": 1.4646, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.4146316051483154, |
|
"rewards/margins": 0.03895253688097, |
|
"rewards/rejected": -1.45358407497406, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.7095912369756879, |
|
"grad_norm": 5.300051212310791, |
|
"learning_rate": 1.1699198087116588e-07, |
|
"logits/chosen": -1.015911340713501, |
|
"logits/rejected": -1.11297607421875, |
|
"logps/chosen": -0.4078459143638611, |
|
"logps/rejected": -0.6018810868263245, |
|
"loss": 1.5814, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.019614815711975, |
|
"rewards/margins": 0.48508787155151367, |
|
"rewards/rejected": -1.5047025680541992, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.7117285599786267, |
|
"grad_norm": 5.75014066696167, |
|
"learning_rate": 1.1541234597732947e-07, |
|
"logits/chosen": -1.0877783298492432, |
|
"logits/rejected": -1.0997726917266846, |
|
"logps/chosen": -0.3489132523536682, |
|
"logps/rejected": -0.43700623512268066, |
|
"loss": 1.4605, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8722831606864929, |
|
"rewards/margins": 0.22023235261440277, |
|
"rewards/rejected": -1.0925155878067017, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.7138658829815656, |
|
"grad_norm": 5.7690229415893555, |
|
"learning_rate": 1.1384024124624322e-07, |
|
"logits/chosen": -0.9815250635147095, |
|
"logits/rejected": -0.9534754753112793, |
|
"logps/chosen": -0.42468035221099854, |
|
"logps/rejected": -0.42991912364959717, |
|
"loss": 1.5096, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0617008209228516, |
|
"rewards/margins": 0.013096902519464493, |
|
"rewards/rejected": -1.0747978687286377, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.7160032059845044, |
|
"grad_norm": 6.530084609985352, |
|
"learning_rate": 1.1227575463697439e-07, |
|
"logits/chosen": -1.0770314931869507, |
|
"logits/rejected": -1.1593232154846191, |
|
"logps/chosen": -0.41134944558143616, |
|
"logps/rejected": -0.46647369861602783, |
|
"loss": 1.5333, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0283737182617188, |
|
"rewards/margins": 0.1378105729818344, |
|
"rewards/rejected": -1.1661843061447144, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7181405289874432, |
|
"grad_norm": 6.972837448120117, |
|
"learning_rate": 1.1071897368235694e-07, |
|
"logits/chosen": -1.0634236335754395, |
|
"logits/rejected": -1.1836671829223633, |
|
"logps/chosen": -0.35514572262763977, |
|
"logps/rejected": -0.6676814556121826, |
|
"loss": 1.4464, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8878642320632935, |
|
"rewards/margins": 0.781339168548584, |
|
"rewards/rejected": -1.6692036390304565, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.7202778519903821, |
|
"grad_norm": 6.366727828979492, |
|
"learning_rate": 1.0916998548409447e-07, |
|
"logits/chosen": -1.0254015922546387, |
|
"logits/rejected": -0.9457456469535828, |
|
"logps/chosen": -0.3946457803249359, |
|
"logps/rejected": -0.4669502377510071, |
|
"loss": 1.617, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.986614465713501, |
|
"rewards/margins": 0.1807611733675003, |
|
"rewards/rejected": -1.1673755645751953, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.7224151749933209, |
|
"grad_norm": 6.886241436004639, |
|
"learning_rate": 1.0762887670788701e-07, |
|
"logits/chosen": -0.8809665441513062, |
|
"logits/rejected": -0.7807790637016296, |
|
"logps/chosen": -0.32795268297195435, |
|
"logps/rejected": -0.4252588152885437, |
|
"loss": 1.4383, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8198816776275635, |
|
"rewards/margins": 0.2432653307914734, |
|
"rewards/rejected": -1.063146948814392, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.7245524979962596, |
|
"grad_norm": 4.194058418273926, |
|
"learning_rate": 1.0609573357858165e-07, |
|
"logits/chosen": -1.1363167762756348, |
|
"logits/rejected": -1.1581072807312012, |
|
"logps/chosen": -0.4632134437561035, |
|
"logps/rejected": -0.6518194079399109, |
|
"loss": 1.6039, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1580334901809692, |
|
"rewards/margins": 0.47151511907577515, |
|
"rewards/rejected": -1.6295486688613892, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.7266898209991985, |
|
"grad_norm": 23.59259033203125, |
|
"learning_rate": 1.0457064187534861e-07, |
|
"logits/chosen": -1.0518946647644043, |
|
"logits/rejected": -0.9888642430305481, |
|
"logps/chosen": -0.407795786857605, |
|
"logps/rejected": -0.45109352469444275, |
|
"loss": 1.6088, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -1.0194894075393677, |
|
"rewards/margins": 0.1082444041967392, |
|
"rewards/rejected": -1.127733826637268, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7288271440021373, |
|
"grad_norm": 5.5164079666137695, |
|
"learning_rate": 1.0305368692688174e-07, |
|
"logits/chosen": -1.0163506269454956, |
|
"logits/rejected": -0.9497014284133911, |
|
"logps/chosen": -0.43188926577568054, |
|
"logps/rejected": -0.5361363291740417, |
|
"loss": 1.5212, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0797233581542969, |
|
"rewards/margins": 0.2606176435947418, |
|
"rewards/rejected": -1.3403409719467163, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.7309644670050761, |
|
"grad_norm": 10.590493202209473, |
|
"learning_rate": 1.0154495360662463e-07, |
|
"logits/chosen": -0.7582399249076843, |
|
"logits/rejected": -0.7725558280944824, |
|
"logps/chosen": -0.3883350193500519, |
|
"logps/rejected": -0.4608793556690216, |
|
"loss": 1.4818, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9708375930786133, |
|
"rewards/margins": 0.18136097490787506, |
|
"rewards/rejected": -1.152198314666748, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.733101790008015, |
|
"grad_norm": 5.3863677978515625, |
|
"learning_rate": 1.0004452632802158e-07, |
|
"logits/chosen": -1.0777875185012817, |
|
"logits/rejected": -1.0332427024841309, |
|
"logps/chosen": -0.6136234402656555, |
|
"logps/rejected": -0.6208893656730652, |
|
"loss": 1.4504, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.5340585708618164, |
|
"rewards/margins": 0.018164925277233124, |
|
"rewards/rejected": -1.552223563194275, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7352391130109538, |
|
"grad_norm": 4.131237506866455, |
|
"learning_rate": 9.855248903979505e-08, |
|
"logits/chosen": -1.1028010845184326, |
|
"logits/rejected": -1.063793420791626, |
|
"logps/chosen": -0.4896419644355774, |
|
"logps/rejected": -0.5126334428787231, |
|
"loss": 1.496, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.224104881286621, |
|
"rewards/margins": 0.05747878551483154, |
|
"rewards/rejected": -1.2815836668014526, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7373764360138926, |
|
"grad_norm": 6.84834623336792, |
|
"learning_rate": 9.706892522124838e-08, |
|
"logits/chosen": -1.0494110584259033, |
|
"logits/rejected": -0.994105339050293, |
|
"logps/chosen": -0.5251023173332214, |
|
"logps/rejected": -0.5692360401153564, |
|
"loss": 1.5667, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -1.312755823135376, |
|
"rewards/margins": 0.11033419519662857, |
|
"rewards/rejected": -1.4230899810791016, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7395137590168315, |
|
"grad_norm": 7.503670692443848, |
|
"learning_rate": 9.559391787759554e-08, |
|
"logits/chosen": -1.304071307182312, |
|
"logits/rejected": -1.1836615800857544, |
|
"logps/chosen": -0.5006979703903198, |
|
"logps/rejected": -0.4764450788497925, |
|
"loss": 1.5827, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2517449855804443, |
|
"rewards/margins": -0.06063230335712433, |
|
"rewards/rejected": -1.191112756729126, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.7416510820197703, |
|
"grad_norm": 7.5821757316589355, |
|
"learning_rate": 9.412754953531663e-08, |
|
"logits/chosen": -1.0126221179962158, |
|
"logits/rejected": -1.0061360597610474, |
|
"logps/chosen": -0.536862850189209, |
|
"logps/rejected": -0.7791385650634766, |
|
"loss": 1.5476, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.342157244682312, |
|
"rewards/margins": 0.6056893467903137, |
|
"rewards/rejected": -1.9478464126586914, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.743788405022709, |
|
"grad_norm": 6.262937545776367, |
|
"learning_rate": 9.266990223754067e-08, |
|
"logits/chosen": -0.9866530895233154, |
|
"logits/rejected": -1.087868571281433, |
|
"logps/chosen": -0.35664424300193787, |
|
"logps/rejected": -0.7218388915061951, |
|
"loss": 1.5054, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8916106224060059, |
|
"rewards/margins": 0.9129866361618042, |
|
"rewards/rejected": -1.8045971393585205, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7459257280256478, |
|
"grad_norm": 4.314223766326904, |
|
"learning_rate": 9.12210575394553e-08, |
|
"logits/chosen": -1.0736174583435059, |
|
"logits/rejected": -1.0914644002914429, |
|
"logps/chosen": -0.4206693172454834, |
|
"logps/rejected": -0.4104337692260742, |
|
"loss": 1.58, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0516732931137085, |
|
"rewards/margins": -0.025588899850845337, |
|
"rewards/rejected": -1.0260844230651855, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.7480630510285867, |
|
"grad_norm": 12.095142364501953, |
|
"learning_rate": 8.978109650374396e-08, |
|
"logits/chosen": -1.0621310472488403, |
|
"logits/rejected": -1.0466017723083496, |
|
"logps/chosen": -0.4226948618888855, |
|
"logps/rejected": -0.46644341945648193, |
|
"loss": 1.5575, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0567370653152466, |
|
"rewards/margins": 0.10937156528234482, |
|
"rewards/rejected": -1.16610848903656, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7502003740315255, |
|
"grad_norm": 6.449507236480713, |
|
"learning_rate": 8.835009969605011e-08, |
|
"logits/chosen": -1.1138099431991577, |
|
"logits/rejected": -1.0220037698745728, |
|
"logps/chosen": -0.3483428359031677, |
|
"logps/rejected": -0.3484461307525635, |
|
"loss": 1.5018, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8708571195602417, |
|
"rewards/margins": 0.0002581886947154999, |
|
"rewards/rejected": -0.8711153268814087, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7523376970344643, |
|
"grad_norm": 6.543509483337402, |
|
"learning_rate": 8.692814718046978e-08, |
|
"logits/chosen": -1.1011321544647217, |
|
"logits/rejected": -1.0573300123214722, |
|
"logps/chosen": -0.6217561364173889, |
|
"logps/rejected": -0.5529008507728577, |
|
"loss": 1.6158, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.55439031124115, |
|
"rewards/margins": -0.1721382439136505, |
|
"rewards/rejected": -1.3822520971298218, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.7544750200374032, |
|
"grad_norm": 6.191348552703857, |
|
"learning_rate": 8.551531851507185e-08, |
|
"logits/chosen": -1.0794535875320435, |
|
"logits/rejected": -0.9287205934524536, |
|
"logps/chosen": -0.3870071768760681, |
|
"logps/rejected": -0.4184566140174866, |
|
"loss": 1.5651, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9675179719924927, |
|
"rewards/margins": 0.07862359285354614, |
|
"rewards/rejected": -1.046141505241394, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.756612343040342, |
|
"grad_norm": 5.186205863952637, |
|
"learning_rate": 8.411169274744723e-08, |
|
"logits/chosen": -1.0084459781646729, |
|
"logits/rejected": -1.0072932243347168, |
|
"logps/chosen": -0.3501359224319458, |
|
"logps/rejected": -0.4932965040206909, |
|
"loss": 1.4669, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8753398656845093, |
|
"rewards/margins": 0.35790154337882996, |
|
"rewards/rejected": -1.233241319656372, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.7587496660432808, |
|
"grad_norm": 8.382218360900879, |
|
"learning_rate": 8.271734841028552e-08, |
|
"logits/chosen": -1.0127713680267334, |
|
"logits/rejected": -1.0045888423919678, |
|
"logps/chosen": -0.4923154413700104, |
|
"logps/rejected": -0.4377010464668274, |
|
"loss": 1.5886, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.2307885885238647, |
|
"rewards/margins": -0.13653598725795746, |
|
"rewards/rejected": -1.094252586364746, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7608869890462197, |
|
"grad_norm": 34.09469985961914, |
|
"learning_rate": 8.133236351698142e-08, |
|
"logits/chosen": -1.191988229751587, |
|
"logits/rejected": -1.0870414972305298, |
|
"logps/chosen": -0.5740368366241455, |
|
"logps/rejected": -0.943623960018158, |
|
"loss": 1.5604, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.4350918531417847, |
|
"rewards/margins": 0.9239681959152222, |
|
"rewards/rejected": -2.359060049057007, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.7630243120491584, |
|
"grad_norm": 15.886775970458984, |
|
"learning_rate": 7.99568155572701e-08, |
|
"logits/chosen": -1.2273132801055908, |
|
"logits/rejected": -1.1660319566726685, |
|
"logps/chosen": -0.5724566578865051, |
|
"logps/rejected": -0.6214060187339783, |
|
"loss": 1.5502, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.4311414957046509, |
|
"rewards/margins": 0.12237339466810226, |
|
"rewards/rejected": -1.5535149574279785, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.7651616350520972, |
|
"grad_norm": 5.216745376586914, |
|
"learning_rate": 7.859078149289144e-08, |
|
"logits/chosen": -1.0447226762771606, |
|
"logits/rejected": -1.0748844146728516, |
|
"logps/chosen": -0.3527478873729706, |
|
"logps/rejected": -0.45652708411216736, |
|
"loss": 1.4919, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8818696737289429, |
|
"rewards/margins": 0.2594480514526367, |
|
"rewards/rejected": -1.1413178443908691, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7672989580550361, |
|
"grad_norm": 7.554361820220947, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": -1.0472806692123413, |
|
"logits/rejected": -1.2351243495941162, |
|
"logps/chosen": -0.486628919839859, |
|
"logps/rejected": -0.8915761709213257, |
|
"loss": 1.5042, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2165722846984863, |
|
"rewards/margins": 1.012368083000183, |
|
"rewards/rejected": -2.22894024848938, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7694362810579749, |
|
"grad_norm": 5.030363082885742, |
|
"learning_rate": 7.588756023130833e-08, |
|
"logits/chosen": -0.7821986079216003, |
|
"logits/rejected": -0.8699120283126831, |
|
"logps/chosen": -0.4785808324813843, |
|
"logps/rejected": -0.5831509828567505, |
|
"loss": 1.4942, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1964521408081055, |
|
"rewards/margins": 0.261425256729126, |
|
"rewards/rejected": -1.4578773975372314, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7715736040609137, |
|
"grad_norm": 7.105344295501709, |
|
"learning_rate": 7.455052427900213e-08, |
|
"logits/chosen": -1.215461254119873, |
|
"logits/rejected": -1.0217854976654053, |
|
"logps/chosen": -0.3912314176559448, |
|
"logps/rejected": -0.3650144636631012, |
|
"loss": 1.6164, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9780785441398621, |
|
"rewards/margins": -0.06554235517978668, |
|
"rewards/rejected": -0.9125362038612366, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.7737109270638525, |
|
"grad_norm": 5.5884199142456055, |
|
"learning_rate": 7.322330470336313e-08, |
|
"logits/chosen": -1.0982252359390259, |
|
"logits/rejected": -1.0146585702896118, |
|
"logps/chosen": -0.32143884897232056, |
|
"logps/rejected": -0.495330274105072, |
|
"loss": 1.4397, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8035971522331238, |
|
"rewards/margins": 0.4347284734249115, |
|
"rewards/rejected": -1.2383257150650024, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.7758482500667914, |
|
"grad_norm": 10.065922737121582, |
|
"learning_rate": 7.190597576216384e-08, |
|
"logits/chosen": -0.8641526699066162, |
|
"logits/rejected": -0.8766672015190125, |
|
"logps/chosen": -0.46004733443260193, |
|
"logps/rejected": -0.4904063940048218, |
|
"loss": 1.5026, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.150118350982666, |
|
"rewards/margins": 0.07589760422706604, |
|
"rewards/rejected": -1.2260159254074097, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.7779855730697302, |
|
"grad_norm": 9.752578735351562, |
|
"learning_rate": 7.059861115979701e-08, |
|
"logits/chosen": -1.0331135988235474, |
|
"logits/rejected": -1.0849862098693848, |
|
"logps/chosen": -0.4356737434864044, |
|
"logps/rejected": -0.4790940582752228, |
|
"loss": 1.6159, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0891844034194946, |
|
"rewards/margins": 0.1085507869720459, |
|
"rewards/rejected": -1.1977351903915405, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.7801228960726689, |
|
"grad_norm": 6.089657783508301, |
|
"learning_rate": 6.930128404315214e-08, |
|
"logits/chosen": -1.0164854526519775, |
|
"logits/rejected": -1.006756067276001, |
|
"logps/chosen": -0.6724580526351929, |
|
"logps/rejected": -0.6376264095306396, |
|
"loss": 1.4724, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.6811450719833374, |
|
"rewards/margins": -0.08707903325557709, |
|
"rewards/rejected": -1.5940660238265991, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7822602190756078, |
|
"grad_norm": 4.81856107711792, |
|
"learning_rate": 6.801406699752229e-08, |
|
"logits/chosen": -1.1662445068359375, |
|
"logits/rejected": -1.0658493041992188, |
|
"logps/chosen": -0.4783725440502167, |
|
"logps/rejected": -0.4546273946762085, |
|
"loss": 1.6376, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.1959314346313477, |
|
"rewards/margins": -0.059362899512052536, |
|
"rewards/rejected": -1.1365684270858765, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.7843975420785466, |
|
"grad_norm": 9.418230056762695, |
|
"learning_rate": 6.673703204254347e-08, |
|
"logits/chosen": -1.2206920385360718, |
|
"logits/rejected": -1.2510498762130737, |
|
"logps/chosen": -0.594007670879364, |
|
"logps/rejected": -0.8274646997451782, |
|
"loss": 1.4951, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.4850192070007324, |
|
"rewards/margins": 0.5836424827575684, |
|
"rewards/rejected": -2.068661689758301, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.7865348650814854, |
|
"grad_norm": 10.291698455810547, |
|
"learning_rate": 6.547025062816486e-08, |
|
"logits/chosen": -0.8960355520248413, |
|
"logits/rejected": -0.9344096779823303, |
|
"logps/chosen": -0.36661607027053833, |
|
"logps/rejected": -0.43485212326049805, |
|
"loss": 1.554, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.916540265083313, |
|
"rewards/margins": 0.17059014737606049, |
|
"rewards/rejected": -1.0871303081512451, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.7886721880844243, |
|
"grad_norm": 4.118884086608887, |
|
"learning_rate": 6.42137936306514e-08, |
|
"logits/chosen": -1.0395543575286865, |
|
"logits/rejected": -0.9369036555290222, |
|
"logps/chosen": -0.3800487816333771, |
|
"logps/rejected": -0.35273048281669617, |
|
"loss": 1.5453, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9501218795776367, |
|
"rewards/margins": -0.06829574704170227, |
|
"rewards/rejected": -0.8818261623382568, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.7908095110873631, |
|
"grad_norm": 10.194417953491211, |
|
"learning_rate": 6.296773134861824e-08, |
|
"logits/chosen": -1.06082284450531, |
|
"logits/rejected": -1.0715018510818481, |
|
"logps/chosen": -0.45035964250564575, |
|
"logps/rejected": -0.49578312039375305, |
|
"loss": 1.5399, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.125899076461792, |
|
"rewards/margins": 0.11355876922607422, |
|
"rewards/rejected": -1.2394579648971558, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7929468340903019, |
|
"grad_norm": 12.731304168701172, |
|
"learning_rate": 6.173213349909728e-08, |
|
"logits/chosen": -1.1646618843078613, |
|
"logits/rejected": -1.0550154447555542, |
|
"logps/chosen": -0.45987626910209656, |
|
"logps/rejected": -0.6932123303413391, |
|
"loss": 1.4858, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1496906280517578, |
|
"rewards/margins": 0.583340048789978, |
|
"rewards/rejected": -1.7330307960510254, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.7950841570932408, |
|
"grad_norm": 4.002368927001953, |
|
"learning_rate": 6.050706921363672e-08, |
|
"logits/chosen": -1.1935923099517822, |
|
"logits/rejected": -1.2162138223648071, |
|
"logps/chosen": -0.39056870341300964, |
|
"logps/rejected": -0.5482085943222046, |
|
"loss": 1.472, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9764216542243958, |
|
"rewards/margins": 0.39409980177879333, |
|
"rewards/rejected": -1.3705215454101562, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.7972214800961795, |
|
"grad_norm": 14.192221641540527, |
|
"learning_rate": 5.929260703443337e-08, |
|
"logits/chosen": -0.7887646555900574, |
|
"logits/rejected": -0.8968151211738586, |
|
"logps/chosen": -0.3553355634212494, |
|
"logps/rejected": -0.43609827756881714, |
|
"loss": 1.5724, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8883388638496399, |
|
"rewards/margins": 0.20190683007240295, |
|
"rewards/rejected": -1.0902457237243652, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.7993588030991183, |
|
"grad_norm": 14.193215370178223, |
|
"learning_rate": 5.808881491049722e-08, |
|
"logits/chosen": -1.265504240989685, |
|
"logits/rejected": -1.3089518547058105, |
|
"logps/chosen": -0.5325222611427307, |
|
"logps/rejected": -0.530707836151123, |
|
"loss": 1.5907, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.331305742263794, |
|
"rewards/margins": -0.004536189138889313, |
|
"rewards/rejected": -1.3267695903778076, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8014961261020572, |
|
"grad_norm": 8.60618782043457, |
|
"learning_rate": 5.6895760193850145e-08, |
|
"logits/chosen": -1.0075315237045288, |
|
"logits/rejected": -1.0355682373046875, |
|
"logps/chosen": -0.4078059792518616, |
|
"logps/rejected": -0.6130856275558472, |
|
"loss": 1.5543, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.019515037536621, |
|
"rewards/margins": 0.5131990313529968, |
|
"rewards/rejected": -1.5327140092849731, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.803633449104996, |
|
"grad_norm": 8.175145149230957, |
|
"learning_rate": 5.571350963575727e-08, |
|
"logits/chosen": -1.1598341464996338, |
|
"logits/rejected": -0.9834379553794861, |
|
"logps/chosen": -0.4177122414112091, |
|
"logps/rejected": -0.35740387439727783, |
|
"loss": 1.5343, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": -1.0442806482315063, |
|
"rewards/margins": -0.1507708877325058, |
|
"rewards/rejected": -0.8935096859931946, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8057707721079348, |
|
"grad_norm": 20.987957000732422, |
|
"learning_rate": 5.454212938299255e-08, |
|
"logits/chosen": -1.140153169631958, |
|
"logits/rejected": -1.0446147918701172, |
|
"logps/chosen": -0.5767669677734375, |
|
"logps/rejected": -0.4474826455116272, |
|
"loss": 1.5854, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -1.4419174194335938, |
|
"rewards/margins": -0.32321077585220337, |
|
"rewards/rejected": -1.1187067031860352, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.8079080951108736, |
|
"grad_norm": 7.252152919769287, |
|
"learning_rate": 5.338168497413756e-08, |
|
"logits/chosen": -1.1144384145736694, |
|
"logits/rejected": -1.2679189443588257, |
|
"logps/chosen": -0.3241596817970276, |
|
"logps/rejected": -0.5525774955749512, |
|
"loss": 1.5213, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8103991150856018, |
|
"rewards/margins": 0.5710446238517761, |
|
"rewards/rejected": -1.3814438581466675, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.8100454181138125, |
|
"grad_norm": 5.948306083679199, |
|
"learning_rate": 5.223224133591475e-08, |
|
"logits/chosen": -1.212296724319458, |
|
"logits/rejected": -1.0987417697906494, |
|
"logps/chosen": -0.7626843452453613, |
|
"logps/rejected": -1.2524211406707764, |
|
"loss": 1.4664, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.9067108631134033, |
|
"rewards/margins": 1.2243417501449585, |
|
"rewards/rejected": -3.1310529708862305, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 5.803626537322998, |
|
"learning_rate": 5.109386277955477e-08, |
|
"logits/chosen": -1.1813595294952393, |
|
"logits/rejected": -1.1350992918014526, |
|
"logps/chosen": -0.45435211062431335, |
|
"logps/rejected": -0.5456478595733643, |
|
"loss": 1.4602, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.1358802318572998, |
|
"rewards/margins": 0.22823941707611084, |
|
"rewards/rejected": -1.364119529724121, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.81432006411969, |
|
"grad_norm": 4.112217903137207, |
|
"learning_rate": 4.996661299719845e-08, |
|
"logits/chosen": -0.8679373264312744, |
|
"logits/rejected": -0.8792969584465027, |
|
"logps/chosen": -0.4242730736732483, |
|
"logps/rejected": -0.7051547765731812, |
|
"loss": 1.5246, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.060682773590088, |
|
"rewards/margins": 0.7022043466567993, |
|
"rewards/rejected": -1.7628870010375977, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.8164573871226289, |
|
"grad_norm": 10.623425483703613, |
|
"learning_rate": 4.885055505833291e-08, |
|
"logits/chosen": -1.2642873525619507, |
|
"logits/rejected": -1.189084768295288, |
|
"logps/chosen": -0.4796138405799866, |
|
"logps/rejected": -0.5438456535339355, |
|
"loss": 1.5916, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.199034571647644, |
|
"rewards/margins": 0.16057954728603363, |
|
"rewards/rejected": -1.3596141338348389, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8185947101255677, |
|
"grad_norm": 5.901862144470215, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logits/chosen": -1.0387252569198608, |
|
"logits/rejected": -0.9353397488594055, |
|
"logps/chosen": -0.43210557103157043, |
|
"logps/rejected": -0.474994421005249, |
|
"loss": 1.4878, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0802638530731201, |
|
"rewards/margins": 0.1072220653295517, |
|
"rewards/rejected": -1.187485933303833, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.8207320331285065, |
|
"grad_norm": 10.615392684936523, |
|
"learning_rate": 4.6652263854618016e-08, |
|
"logits/chosen": -1.2227771282196045, |
|
"logits/rejected": -1.2380874156951904, |
|
"logps/chosen": -0.42436325550079346, |
|
"logps/rejected": -0.6415535807609558, |
|
"loss": 1.4763, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0609081983566284, |
|
"rewards/margins": 0.5429757833480835, |
|
"rewards/rejected": -1.6038841009140015, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.8228693561314454, |
|
"grad_norm": 7.720395565032959, |
|
"learning_rate": 4.557015358389216e-08, |
|
"logits/chosen": -0.9876857995986938, |
|
"logits/rejected": -1.0623700618743896, |
|
"logps/chosen": -0.37730199098587036, |
|
"logps/rejected": -0.587026059627533, |
|
"loss": 1.4647, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9432549476623535, |
|
"rewards/margins": 0.5243102312088013, |
|
"rewards/rejected": -1.4675650596618652, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8250066791343842, |
|
"grad_norm": 8.550374984741211, |
|
"learning_rate": 4.449948113802254e-08, |
|
"logits/chosen": -1.203737735748291, |
|
"logits/rejected": -1.1764881610870361, |
|
"logps/chosen": -0.33095237612724304, |
|
"logps/rejected": -0.3797753155231476, |
|
"loss": 1.5067, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.827380895614624, |
|
"rewards/margins": 0.12205736339092255, |
|
"rewards/rejected": -0.9494383335113525, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.827144002137323, |
|
"grad_norm": 8.158326148986816, |
|
"learning_rate": 4.3440306421001324e-08, |
|
"logits/chosen": -1.0478994846343994, |
|
"logits/rejected": -1.0275698900222778, |
|
"logps/chosen": -0.4138807952404022, |
|
"logps/rejected": -0.5256016254425049, |
|
"loss": 1.4697, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0347020626068115, |
|
"rewards/margins": 0.2793022096157074, |
|
"rewards/rejected": -1.3140041828155518, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.8292813251402619, |
|
"grad_norm": 4.749565124511719, |
|
"learning_rate": 4.2392688693524055e-08, |
|
"logits/chosen": -1.0925198793411255, |
|
"logits/rejected": -1.053438663482666, |
|
"logps/chosen": -0.4893158972263336, |
|
"logps/rejected": -0.7248774766921997, |
|
"loss": 1.4731, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2232897281646729, |
|
"rewards/margins": 0.5889038443565369, |
|
"rewards/rejected": -1.8121936321258545, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.8314186481432007, |
|
"grad_norm": 17.479476928710938, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"logits/chosen": -1.237557291984558, |
|
"logits/rejected": -1.1833033561706543, |
|
"logps/chosen": -0.6519225835800171, |
|
"logps/rejected": -0.6998899579048157, |
|
"loss": 1.5504, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.6298065185546875, |
|
"rewards/margins": 0.1199183464050293, |
|
"rewards/rejected": -1.7497249841690063, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.8335559711461394, |
|
"grad_norm": 6.035606384277344, |
|
"learning_rate": 4.0332358013644015e-08, |
|
"logits/chosen": -1.16561758518219, |
|
"logits/rejected": -1.1693965196609497, |
|
"logps/chosen": -0.6768723726272583, |
|
"logps/rejected": -1.003299593925476, |
|
"loss": 1.4768, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.692180871963501, |
|
"rewards/margins": 0.8160682916641235, |
|
"rewards/rejected": -2.508248805999756, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8356932941490782, |
|
"grad_norm": 6.278528213500977, |
|
"learning_rate": 3.9319760336490205e-08, |
|
"logits/chosen": -0.7999597191810608, |
|
"logits/rejected": -0.6943086385726929, |
|
"logps/chosen": -0.3784443140029907, |
|
"logps/rejected": -0.4581376612186432, |
|
"loss": 1.5857, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9461109042167664, |
|
"rewards/margins": 0.1992333084344864, |
|
"rewards/rejected": -1.1453441381454468, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.8378306171520171, |
|
"grad_norm": 4.925440788269043, |
|
"learning_rate": 3.831895019292897e-08, |
|
"logits/chosen": -1.1565301418304443, |
|
"logits/rejected": -1.1625264883041382, |
|
"logps/chosen": -0.5009636878967285, |
|
"logps/rejected": -0.8770073056221008, |
|
"loss": 1.4676, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2524092197418213, |
|
"rewards/margins": 0.9401088953018188, |
|
"rewards/rejected": -2.1925179958343506, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8399679401549559, |
|
"grad_norm": 16.391368865966797, |
|
"learning_rate": 3.732998357816514e-08, |
|
"logits/chosen": -1.0616164207458496, |
|
"logits/rejected": -1.0247597694396973, |
|
"logps/chosen": -0.5045540928840637, |
|
"logps/rejected": -0.467951238155365, |
|
"loss": 1.6318, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.261385202407837, |
|
"rewards/margins": -0.09150727093219757, |
|
"rewards/rejected": -1.1698780059814453, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 8.550324440002441, |
|
"learning_rate": 3.635291582475963e-08, |
|
"logits/chosen": -0.9775318503379822, |
|
"logits/rejected": -0.8906686902046204, |
|
"logps/chosen": -0.5394979119300842, |
|
"logps/rejected": -0.5499922037124634, |
|
"loss": 1.5031, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3487448692321777, |
|
"rewards/margins": 0.02623593807220459, |
|
"rewards/rejected": -1.3749808073043823, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8442425861608336, |
|
"grad_norm": 4.05560302734375, |
|
"learning_rate": 3.538780159953347e-08, |
|
"logits/chosen": -1.0048534870147705, |
|
"logits/rejected": -0.839844822883606, |
|
"logps/chosen": -0.4153073728084564, |
|
"logps/rejected": -0.35160398483276367, |
|
"loss": 1.5959, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -1.0382684469223022, |
|
"rewards/margins": -0.1592584103345871, |
|
"rewards/rejected": -0.8790099620819092, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8463799091637724, |
|
"grad_norm": 7.721834182739258, |
|
"learning_rate": 3.4434694900509345e-08, |
|
"logits/chosen": -1.1735496520996094, |
|
"logits/rejected": -1.150696039199829, |
|
"logps/chosen": -0.5112527012825012, |
|
"logps/rejected": -0.6088312864303589, |
|
"loss": 1.5405, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2781318426132202, |
|
"rewards/margins": 0.24394652247428894, |
|
"rewards/rejected": -1.522078275680542, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8485172321667112, |
|
"grad_norm": 5.097099781036377, |
|
"learning_rate": 3.349364905389032e-08, |
|
"logits/chosen": -1.2030649185180664, |
|
"logits/rejected": -1.08577561378479, |
|
"logps/chosen": -0.47224241495132446, |
|
"logps/rejected": -0.6983097791671753, |
|
"loss": 1.5699, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1806060075759888, |
|
"rewards/margins": 0.5651683807373047, |
|
"rewards/rejected": -1.745774507522583, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.85065455516965, |
|
"grad_norm": 9.325733184814453, |
|
"learning_rate": 3.256471671107616e-08, |
|
"logits/chosen": -0.9340042471885681, |
|
"logits/rejected": -0.946205198764801, |
|
"logps/chosen": -0.7264373302459717, |
|
"logps/rejected": -0.6860026121139526, |
|
"loss": 1.5801, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8160933256149292, |
|
"rewards/margins": -0.1010868027806282, |
|
"rewards/rejected": -1.7150065898895264, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.8527918781725888, |
|
"grad_norm": 4.532627105712891, |
|
"learning_rate": 3.1647949845717585e-08, |
|
"logits/chosen": -0.8892905712127686, |
|
"logits/rejected": -0.8267837166786194, |
|
"logps/chosen": -0.4368503987789154, |
|
"logps/rejected": -0.5416733026504517, |
|
"loss": 1.4184, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0921260118484497, |
|
"rewards/margins": 0.2620573043823242, |
|
"rewards/rejected": -1.3541834354400635, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.8549292011755276, |
|
"grad_norm": 3.611345052719116, |
|
"learning_rate": 3.074339975080836e-08, |
|
"logits/chosen": -0.9715834259986877, |
|
"logits/rejected": -0.9480459690093994, |
|
"logps/chosen": -0.685413122177124, |
|
"logps/rejected": -0.7482943534851074, |
|
"loss": 1.4719, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.71353280544281, |
|
"rewards/margins": 0.15720298886299133, |
|
"rewards/rejected": -1.870735764503479, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8570665241784665, |
|
"grad_norm": 49.35193634033203, |
|
"learning_rate": 2.98511170358155e-08, |
|
"logits/chosen": -0.959058940410614, |
|
"logits/rejected": -0.9538683295249939, |
|
"logps/chosen": -0.43192583322525024, |
|
"logps/rejected": -0.4624338746070862, |
|
"loss": 1.6139, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0798146724700928, |
|
"rewards/margins": 0.07627001404762268, |
|
"rewards/rejected": -1.156084656715393, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.8592038471814053, |
|
"grad_norm": 11.570405006408691, |
|
"learning_rate": 2.8971151623847584e-08, |
|
"logits/chosen": -1.0528483390808105, |
|
"logits/rejected": -0.9815914630889893, |
|
"logps/chosen": -0.584007740020752, |
|
"logps/rejected": -0.6078099608421326, |
|
"loss": 1.5858, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4600192308425903, |
|
"rewards/margins": 0.05950555205345154, |
|
"rewards/rejected": -1.5195249319076538, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.8613411701843441, |
|
"grad_norm": 5.403092861175537, |
|
"learning_rate": 2.8103552748861475e-08, |
|
"logits/chosen": -1.052750825881958, |
|
"logits/rejected": -1.0424790382385254, |
|
"logps/chosen": -0.6250475645065308, |
|
"logps/rejected": -0.6444798111915588, |
|
"loss": 1.6129, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.5626189708709717, |
|
"rewards/margins": 0.04858069866895676, |
|
"rewards/rejected": -1.6111997365951538, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.863478493187283, |
|
"grad_norm": 5.894848346710205, |
|
"learning_rate": 2.724836895290805e-08, |
|
"logits/chosen": -1.0178946256637573, |
|
"logits/rejected": -0.8687437772750854, |
|
"logps/chosen": -0.36898887157440186, |
|
"logps/rejected": -0.7522455453872681, |
|
"loss": 1.5337, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9224721789360046, |
|
"rewards/margins": 0.958141565322876, |
|
"rewards/rejected": -1.8806138038635254, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.8656158161902218, |
|
"grad_norm": 6.724638938903809, |
|
"learning_rate": 2.6405648083415833e-08, |
|
"logits/chosen": -1.1396384239196777, |
|
"logits/rejected": -1.0131150484085083, |
|
"logps/chosen": -0.6166858673095703, |
|
"logps/rejected": -0.5131340026855469, |
|
"loss": 1.5427, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.5417147874832153, |
|
"rewards/margins": -0.258879691362381, |
|
"rewards/rejected": -1.2828351259231567, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8677531391931605, |
|
"grad_norm": 4.763136863708496, |
|
"learning_rate": 2.55754372905142e-08, |
|
"logits/chosen": -1.1291677951812744, |
|
"logits/rejected": -1.0440433025360107, |
|
"logps/chosen": -0.44505226612091064, |
|
"logps/rejected": -0.46291491389274597, |
|
"loss": 1.499, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1126307249069214, |
|
"rewards/margins": 0.04465658590197563, |
|
"rewards/rejected": -1.157287359237671, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.8698904621960993, |
|
"grad_norm": 5.994953155517578, |
|
"learning_rate": 2.475778302439524e-08, |
|
"logits/chosen": -1.0867843627929688, |
|
"logits/rejected": -1.1085426807403564, |
|
"logps/chosen": -0.6850754022598267, |
|
"logps/rejected": -0.8681538105010986, |
|
"loss": 1.54, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.7126885652542114, |
|
"rewards/margins": 0.4576959013938904, |
|
"rewards/rejected": -2.170384645462036, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.8720277851990382, |
|
"grad_norm": 4.274337291717529, |
|
"learning_rate": 2.3952731032714973e-08, |
|
"logits/chosen": -0.8507957458496094, |
|
"logits/rejected": -0.8216973543167114, |
|
"logps/chosen": -0.352754145860672, |
|
"logps/rejected": -0.6458288431167603, |
|
"loss": 1.4806, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8818854093551636, |
|
"rewards/margins": 0.7326868176460266, |
|
"rewards/rejected": -1.6145721673965454, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.874165108201977, |
|
"grad_norm": 19.687917709350586, |
|
"learning_rate": 2.3160326358033778e-08, |
|
"logits/chosen": -1.0179362297058105, |
|
"logits/rejected": -0.9422796964645386, |
|
"logps/chosen": -0.6053561568260193, |
|
"logps/rejected": -1.013503074645996, |
|
"loss": 1.4768, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5133905410766602, |
|
"rewards/margins": 1.0203672647476196, |
|
"rewards/rejected": -2.5337576866149902, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.8763024312049158, |
|
"grad_norm": 12.98466968536377, |
|
"learning_rate": 2.2380613335296033e-08, |
|
"logits/chosen": -0.8576774597167969, |
|
"logits/rejected": -0.9601131677627563, |
|
"logps/chosen": -0.42103275656700134, |
|
"logps/rejected": -0.41525495052337646, |
|
"loss": 1.5959, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0525819063186646, |
|
"rewards/margins": -0.014444507658481598, |
|
"rewards/rejected": -1.0381373167037964, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8784397542078547, |
|
"grad_norm": 7.204395294189453, |
|
"learning_rate": 2.1613635589349756e-08, |
|
"logits/chosen": -0.9204460978507996, |
|
"logits/rejected": -0.9429717063903809, |
|
"logps/chosen": -0.3754885196685791, |
|
"logps/rejected": -0.4179832935333252, |
|
"loss": 1.5211, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9387211799621582, |
|
"rewards/margins": 0.10623697191476822, |
|
"rewards/rejected": -1.0449581146240234, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.8805770772107935, |
|
"grad_norm": 14.507763862609863, |
|
"learning_rate": 2.085943603250595e-08, |
|
"logits/chosen": -0.9056552648544312, |
|
"logits/rejected": -0.8666099309921265, |
|
"logps/chosen": -0.4410094618797302, |
|
"logps/rejected": -0.6149858236312866, |
|
"loss": 1.5065, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1025235652923584, |
|
"rewards/margins": 0.43494072556495667, |
|
"rewards/rejected": -1.5374643802642822, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.8827144002137323, |
|
"grad_norm": 4.710812568664551, |
|
"learning_rate": 2.0118056862137354e-08, |
|
"logits/chosen": -0.9734061360359192, |
|
"logits/rejected": -0.8919450044631958, |
|
"logps/chosen": -0.4355347156524658, |
|
"logps/rejected": -0.4121510982513428, |
|
"loss": 1.5909, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0888367891311646, |
|
"rewards/margins": -0.058458905667066574, |
|
"rewards/rejected": -1.030377745628357, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.8848517232166712, |
|
"grad_norm": 6.681394100189209, |
|
"learning_rate": 1.938953955831771e-08, |
|
"logits/chosen": -1.089871883392334, |
|
"logits/rejected": -1.0652118921279907, |
|
"logps/chosen": -0.45682665705680847, |
|
"logps/rejected": -0.5210414528846741, |
|
"loss": 1.4889, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1420665979385376, |
|
"rewards/margins": 0.1605370044708252, |
|
"rewards/rejected": -1.3026037216186523, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.88698904621961, |
|
"grad_norm": 10.179585456848145, |
|
"learning_rate": 1.8673924881500823e-08, |
|
"logits/chosen": -0.9976410269737244, |
|
"logits/rejected": -1.0120102167129517, |
|
"logps/chosen": -0.7260380387306213, |
|
"logps/rejected": -0.9380686283111572, |
|
"loss": 1.5325, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8150951862335205, |
|
"rewards/margins": 0.5300765037536621, |
|
"rewards/rejected": -2.3451716899871826, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8891263692225487, |
|
"grad_norm": 6.529333591461182, |
|
"learning_rate": 1.797125287024029e-08, |
|
"logits/chosen": -1.0898345708847046, |
|
"logits/rejected": -1.1249827146530151, |
|
"logps/chosen": -0.5659042596817017, |
|
"logps/rejected": -0.8098978996276855, |
|
"loss": 1.4598, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.414760947227478, |
|
"rewards/margins": 0.6099839210510254, |
|
"rewards/rejected": -2.024744749069214, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.8912636922254876, |
|
"grad_norm": 18.293628692626953, |
|
"learning_rate": 1.7281562838948966e-08, |
|
"logits/chosen": -0.9206041693687439, |
|
"logits/rejected": -0.9363532066345215, |
|
"logps/chosen": -0.6177800893783569, |
|
"logps/rejected": -0.5964070558547974, |
|
"loss": 1.6686, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.5444501638412476, |
|
"rewards/margins": -0.05343271791934967, |
|
"rewards/rejected": -1.4910173416137695, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.8934010152284264, |
|
"grad_norm": 6.754497528076172, |
|
"learning_rate": 1.6604893375699592e-08, |
|
"logits/chosen": -1.1073287725448608, |
|
"logits/rejected": -0.9858848452568054, |
|
"logps/chosen": -0.4562823176383972, |
|
"logps/rejected": -0.5044858455657959, |
|
"loss": 1.5667, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1407058238983154, |
|
"rewards/margins": 0.12050891667604446, |
|
"rewards/rejected": -1.2612147331237793, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.8955383382313652, |
|
"grad_norm": 4.217410087585449, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": -1.1437349319458008, |
|
"logits/rejected": -1.2322208881378174, |
|
"logps/chosen": -0.4406435191631317, |
|
"logps/rejected": -0.6784199476242065, |
|
"loss": 1.4618, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1016088724136353, |
|
"rewards/margins": 0.5944410562515259, |
|
"rewards/rejected": -1.6960498094558716, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.897675661234304, |
|
"grad_norm": 16.80787467956543, |
|
"learning_rate": 1.5290766861003475e-08, |
|
"logits/chosen": -0.9160170555114746, |
|
"logits/rejected": -0.8729619383811951, |
|
"logps/chosen": -0.3433056175708771, |
|
"logps/rejected": -0.37045544385910034, |
|
"loss": 1.6206, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.8582640290260315, |
|
"rewards/margins": 0.06787460297346115, |
|
"rewards/rejected": -0.926138699054718, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8998129842372429, |
|
"grad_norm": 13.28708553314209, |
|
"learning_rate": 1.4653383334774228e-08, |
|
"logits/chosen": -1.0449622869491577, |
|
"logits/rejected": -1.0788357257843018, |
|
"logps/chosen": -0.5669997930526733, |
|
"logps/rejected": -0.7596959471702576, |
|
"loss": 1.5144, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.4174995422363281, |
|
"rewards/margins": 0.4817403554916382, |
|
"rewards/rejected": -1.8992400169372559, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.9019503072401817, |
|
"grad_norm": 7.049681663513184, |
|
"learning_rate": 1.4029167422908105e-08, |
|
"logits/chosen": -1.148837924003601, |
|
"logits/rejected": -1.0951889753341675, |
|
"logps/chosen": -0.4990730583667755, |
|
"logps/rejected": -0.607469916343689, |
|
"loss": 1.5152, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2476826906204224, |
|
"rewards/margins": 0.2709922194480896, |
|
"rewards/rejected": -1.5186748504638672, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.9040876302431204, |
|
"grad_norm": 6.772598743438721, |
|
"learning_rate": 1.3418154050208936e-08, |
|
"logits/chosen": -0.9771215319633484, |
|
"logits/rejected": -0.9951186776161194, |
|
"logps/chosen": -0.5036316514015198, |
|
"logps/rejected": -0.6173194646835327, |
|
"loss": 1.5277, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2590792179107666, |
|
"rewards/margins": 0.28421932458877563, |
|
"rewards/rejected": -1.543298602104187, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.9062249532460593, |
|
"grad_norm": 5.654405117034912, |
|
"learning_rate": 1.2820377402800064e-08, |
|
"logits/chosen": -0.8762426972389221, |
|
"logits/rejected": -0.6818346381187439, |
|
"logps/chosen": -0.4185902774333954, |
|
"logps/rejected": -0.9549139738082886, |
|
"loss": 1.4405, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0464756488800049, |
|
"rewards/margins": 1.3408091068267822, |
|
"rewards/rejected": -2.387284755706787, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.9083622762489981, |
|
"grad_norm": 9.220845222473145, |
|
"learning_rate": 1.2235870926211616e-08, |
|
"logits/chosen": -0.9336291551589966, |
|
"logits/rejected": -0.9068048596382141, |
|
"logps/chosen": -0.4764101505279541, |
|
"logps/rejected": -0.6432890295982361, |
|
"loss": 1.5286, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1910252571105957, |
|
"rewards/margins": 0.41719722747802734, |
|
"rewards/rejected": -1.608222484588623, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9104995992519369, |
|
"grad_norm": 6.728222846984863, |
|
"learning_rate": 1.1664667323509347e-08, |
|
"logits/chosen": -1.0641913414001465, |
|
"logits/rejected": -0.9187748432159424, |
|
"logps/chosen": -0.3940516710281372, |
|
"logps/rejected": -0.4065035283565521, |
|
"loss": 1.5201, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.985129177570343, |
|
"rewards/margins": 0.031129609793424606, |
|
"rewards/rejected": -1.0162588357925415, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.9126369222548758, |
|
"grad_norm": 7.619234561920166, |
|
"learning_rate": 1.1106798553464802e-08, |
|
"logits/chosen": -0.954318642616272, |
|
"logits/rejected": -0.908359706401825, |
|
"logps/chosen": -0.4075399935245514, |
|
"logps/rejected": -0.4785918891429901, |
|
"loss": 1.5005, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0188499689102173, |
|
"rewards/margins": 0.17762985825538635, |
|
"rewards/rejected": -1.1964799165725708, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.9147742452578146, |
|
"grad_norm": 6.409261226654053, |
|
"learning_rate": 1.0562295828767387e-08, |
|
"logits/chosen": -1.0399943590164185, |
|
"logits/rejected": -1.0340969562530518, |
|
"logps/chosen": -0.3984678387641907, |
|
"logps/rejected": -0.5552449226379395, |
|
"loss": 1.4632, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.9961696863174438, |
|
"rewards/margins": 0.39194267988204956, |
|
"rewards/rejected": -1.3881123065948486, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.9169115682607534, |
|
"grad_norm": 10.060012817382812, |
|
"learning_rate": 1.0031189614277763e-08, |
|
"logits/chosen": -0.9579813480377197, |
|
"logits/rejected": -0.9383722543716431, |
|
"logps/chosen": -0.5487989187240601, |
|
"logps/rejected": -0.5818829536437988, |
|
"loss": 1.5268, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.371997356414795, |
|
"rewards/margins": 0.0827101320028305, |
|
"rewards/rejected": -1.4547075033187866, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.9190488912636923, |
|
"grad_norm": 6.2424139976501465, |
|
"learning_rate": 9.513509625323518e-09, |
|
"logits/chosen": -0.9296804666519165, |
|
"logits/rejected": -0.9260187745094299, |
|
"logps/chosen": -0.39111167192459106, |
|
"logps/rejected": -0.45873600244522095, |
|
"loss": 1.4814, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9777792096138, |
|
"rewards/margins": 0.16906076669692993, |
|
"rewards/rejected": -1.14683997631073, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.921186214266631, |
|
"grad_norm": 10.799667358398438, |
|
"learning_rate": 9.009284826036689e-09, |
|
"logits/chosen": -0.892406702041626, |
|
"logits/rejected": -0.9241263270378113, |
|
"logps/chosen": -0.5018429160118103, |
|
"logps/rejected": -0.6716207265853882, |
|
"loss": 1.4844, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2546073198318481, |
|
"rewards/margins": 0.42444440722465515, |
|
"rewards/rejected": -1.6790517568588257, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.9233235372695698, |
|
"grad_norm": 4.4624104499816895, |
|
"learning_rate": 8.518543427732949e-09, |
|
"logits/chosen": -0.9802000522613525, |
|
"logits/rejected": -0.9559367895126343, |
|
"logps/chosen": -0.38735339045524597, |
|
"logps/rejected": -0.6876262426376343, |
|
"loss": 1.481, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.9683833718299866, |
|
"rewards/margins": 0.7506821155548096, |
|
"rewards/rejected": -1.7190656661987305, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.9254608602725087, |
|
"grad_norm": 7.951170444488525, |
|
"learning_rate": 8.041312887333396e-09, |
|
"logits/chosen": -0.9957330226898193, |
|
"logits/rejected": -0.9468004703521729, |
|
"logps/chosen": -0.4312437176704407, |
|
"logps/rejected": -0.5361820459365845, |
|
"loss": 1.4571, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0781091451644897, |
|
"rewards/margins": 0.26234593987464905, |
|
"rewards/rejected": -1.3404550552368164, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.9275981832754475, |
|
"grad_norm": 12.679637908935547, |
|
"learning_rate": 7.577619905828281e-09, |
|
"logits/chosen": -1.0287508964538574, |
|
"logits/rejected": -0.9482178688049316, |
|
"logps/chosen": -0.40496230125427246, |
|
"logps/rejected": -0.38896051049232483, |
|
"loss": 1.4673, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.0124057531356812, |
|
"rewards/margins": -0.040004514157772064, |
|
"rewards/rejected": -0.9724011421203613, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.9297355062783863, |
|
"grad_norm": 6.805285930633545, |
|
"learning_rate": 7.127490426783123e-09, |
|
"logits/chosen": -1.1413686275482178, |
|
"logits/rejected": -1.09022057056427, |
|
"logps/chosen": -0.5725710391998291, |
|
"logps/rejected": -0.6725842952728271, |
|
"loss": 1.5341, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.4314275979995728, |
|
"rewards/margins": 0.2500333786010742, |
|
"rewards/rejected": -1.6814608573913574, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9318728292813251, |
|
"grad_norm": 5.759010314941406, |
|
"learning_rate": 6.6909496348871445e-09, |
|
"logits/chosen": -1.1474663019180298, |
|
"logits/rejected": -1.1745803356170654, |
|
"logps/chosen": -0.6815481185913086, |
|
"logps/rejected": -0.7699655294418335, |
|
"loss": 1.5144, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.7038702964782715, |
|
"rewards/margins": 0.2210434526205063, |
|
"rewards/rejected": -1.9249136447906494, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.934010152284264, |
|
"grad_norm": 4.481965065002441, |
|
"learning_rate": 6.268021954544095e-09, |
|
"logits/chosen": -0.9412963390350342, |
|
"logits/rejected": -0.9516785144805908, |
|
"logps/chosen": -0.3796120285987854, |
|
"logps/rejected": -0.37879857420921326, |
|
"loss": 1.5547, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.9490300416946411, |
|
"rewards/margins": -0.002033662050962448, |
|
"rewards/rejected": -0.9469964504241943, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.9361474752872028, |
|
"grad_norm": 6.431197166442871, |
|
"learning_rate": 5.858731048505927e-09, |
|
"logits/chosen": -1.076103687286377, |
|
"logits/rejected": -1.0976781845092773, |
|
"logps/chosen": -0.4042336046695709, |
|
"logps/rejected": -0.612984836101532, |
|
"loss": 1.4394, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0105839967727661, |
|
"rewards/margins": 0.5218778848648071, |
|
"rewards/rejected": -1.5324620008468628, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.9382847982901416, |
|
"grad_norm": 11.778403282165527, |
|
"learning_rate": 5.463099816548577e-09, |
|
"logits/chosen": -1.085112452507019, |
|
"logits/rejected": -1.024551272392273, |
|
"logps/chosen": -0.3431437611579895, |
|
"logps/rejected": -0.5487081408500671, |
|
"loss": 1.5601, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8578594923019409, |
|
"rewards/margins": 0.5139108896255493, |
|
"rewards/rejected": -1.3717702627182007, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.9404221212930804, |
|
"grad_norm": 4.792006492614746, |
|
"learning_rate": 5.08115039419113e-09, |
|
"logits/chosen": -0.9755135774612427, |
|
"logits/rejected": -0.8824871778488159, |
|
"logps/chosen": -0.3806512653827667, |
|
"logps/rejected": -0.5343747735023499, |
|
"loss": 1.52, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9516281485557556, |
|
"rewards/margins": 0.38430866599082947, |
|
"rewards/rejected": -1.3359367847442627, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9425594442960192, |
|
"grad_norm": 6.083741188049316, |
|
"learning_rate": 4.712904151456864e-09, |
|
"logits/chosen": -0.9559481143951416, |
|
"logits/rejected": -0.8965126872062683, |
|
"logps/chosen": -0.4216436743736267, |
|
"logps/rejected": -0.4634767770767212, |
|
"loss": 1.4695, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0541093349456787, |
|
"rewards/margins": 0.1045827567577362, |
|
"rewards/rejected": -1.1586920022964478, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.944696767298958, |
|
"grad_norm": 4.866091251373291, |
|
"learning_rate": 4.358381691677931e-09, |
|
"logits/chosen": -0.9368703961372375, |
|
"logits/rejected": -0.8826941251754761, |
|
"logps/chosen": -0.3301496207714081, |
|
"logps/rejected": -0.38065895438194275, |
|
"loss": 1.4942, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8253740072250366, |
|
"rewards/margins": 0.1262734830379486, |
|
"rewards/rejected": -0.9516474008560181, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9468340903018969, |
|
"grad_norm": 7.398251056671143, |
|
"learning_rate": 4.0176028503425826e-09, |
|
"logits/chosen": -1.079075574874878, |
|
"logits/rejected": -1.00128972530365, |
|
"logps/chosen": -0.45567309856414795, |
|
"logps/rejected": -0.39884287118911743, |
|
"loss": 1.5388, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -1.1391828060150146, |
|
"rewards/margins": -0.14207565784454346, |
|
"rewards/rejected": -0.9971071481704712, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.9489714133048357, |
|
"grad_norm": 9.869709014892578, |
|
"learning_rate": 3.6905866939851983e-09, |
|
"logits/chosen": -1.1050983667373657, |
|
"logits/rejected": -1.0123190879821777, |
|
"logps/chosen": -0.44024163484573364, |
|
"logps/rejected": -0.3926200568675995, |
|
"loss": 1.4883, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1006040573120117, |
|
"rewards/margins": -0.1190539002418518, |
|
"rewards/rejected": -0.9815501570701599, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.9511087363077745, |
|
"grad_norm": 4.558749198913574, |
|
"learning_rate": 3.3773515191196646e-09, |
|
"logits/chosen": -1.0600441694259644, |
|
"logits/rejected": -1.0637009143829346, |
|
"logps/chosen": -0.4824512004852295, |
|
"logps/rejected": -0.594225287437439, |
|
"loss": 1.5854, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2061281204223633, |
|
"rewards/margins": 0.2794351577758789, |
|
"rewards/rejected": -1.4855631589889526, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9532460593107134, |
|
"grad_norm": 7.49786376953125, |
|
"learning_rate": 3.077914851215585e-09, |
|
"logits/chosen": -1.06005859375, |
|
"logits/rejected": -1.0157767534255981, |
|
"logps/chosen": -0.5099815130233765, |
|
"logps/rejected": -0.6151185035705566, |
|
"loss": 1.4851, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.274953842163086, |
|
"rewards/margins": 0.26284244656562805, |
|
"rewards/rejected": -1.5377962589263916, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.9553833823136522, |
|
"grad_norm": 5.8320746421813965, |
|
"learning_rate": 2.7922934437178692e-09, |
|
"logits/chosen": -0.8938602209091187, |
|
"logits/rejected": -0.9562631249427795, |
|
"logps/chosen": -0.37319353222846985, |
|
"logps/rejected": -0.37881189584732056, |
|
"loss": 1.4395, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.9329838752746582, |
|
"rewards/margins": 0.0140459556132555, |
|
"rewards/rejected": -0.9470298290252686, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.957520705316591, |
|
"grad_norm": 3.9899652004241943, |
|
"learning_rate": 2.5205032771092592e-09, |
|
"logits/chosen": -0.9867920279502869, |
|
"logits/rejected": -0.9630347490310669, |
|
"logps/chosen": -0.39268139004707336, |
|
"logps/rejected": -0.635047435760498, |
|
"loss": 1.5503, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9817034006118774, |
|
"rewards/margins": 0.6059151291847229, |
|
"rewards/rejected": -1.5876185894012451, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.9596580283195298, |
|
"grad_norm": 7.979334354400635, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"logits/chosen": -1.1002980470657349, |
|
"logits/rejected": -1.0980544090270996, |
|
"logps/chosen": -0.7607989311218262, |
|
"logps/rejected": -0.9997137784957886, |
|
"loss": 1.5401, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9019973278045654, |
|
"rewards/margins": 0.5972872972488403, |
|
"rewards/rejected": -2.4992847442626953, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.9617953513224686, |
|
"grad_norm": 21.347997665405273, |
|
"learning_rate": 2.0184767183584474e-09, |
|
"logits/chosen": -0.8544862866401672, |
|
"logits/rejected": -0.8984670042991638, |
|
"logps/chosen": -0.5437809228897095, |
|
"logps/rejected": -0.6033496856689453, |
|
"loss": 1.4988, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.359452247619629, |
|
"rewards/margins": 0.14892183244228363, |
|
"rewards/rejected": -1.5083742141723633, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9639326743254074, |
|
"grad_norm": 8.341211318969727, |
|
"learning_rate": 1.7882684145406612e-09, |
|
"logits/chosen": -1.0166016817092896, |
|
"logits/rejected": -1.0338623523712158, |
|
"logps/chosen": -0.5460320711135864, |
|
"logps/rejected": -0.5169766545295715, |
|
"loss": 1.5717, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.3650802373886108, |
|
"rewards/margins": -0.07263859361410141, |
|
"rewards/rejected": -1.292441725730896, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.9660699973283462, |
|
"grad_norm": 12.869359970092773, |
|
"learning_rate": 1.5719475266893489e-09, |
|
"logits/chosen": -1.0716265439987183, |
|
"logits/rejected": -1.0736249685287476, |
|
"logps/chosen": -0.4328761696815491, |
|
"logps/rejected": -0.5213409066200256, |
|
"loss": 1.5382, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.0821905136108398, |
|
"rewards/margins": 0.22116179764270782, |
|
"rewards/rejected": -1.3033523559570312, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.9682073203312851, |
|
"grad_norm": 5.998004913330078, |
|
"learning_rate": 1.3695261579316775e-09, |
|
"logits/chosen": -1.1411190032958984, |
|
"logits/rejected": -1.0635725259780884, |
|
"logps/chosen": -0.6178188323974609, |
|
"logps/rejected": -0.592136561870575, |
|
"loss": 1.6393, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -1.5445469617843628, |
|
"rewards/margins": -0.064205601811409, |
|
"rewards/rejected": -1.4803414344787598, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.9703446433342239, |
|
"grad_norm": 7.896289348602295, |
|
"learning_rate": 1.1810156337183908e-09, |
|
"logits/chosen": -1.009376883506775, |
|
"logits/rejected": -0.9895673394203186, |
|
"logps/chosen": -0.7131325602531433, |
|
"logps/rejected": -0.6051285266876221, |
|
"loss": 1.5398, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.7828314304351807, |
|
"rewards/margins": -0.2700101137161255, |
|
"rewards/rejected": -1.5128213167190552, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.9724819663371627, |
|
"grad_norm": 6.323320388793945, |
|
"learning_rate": 1.0064265011902328e-09, |
|
"logits/chosen": -1.0121572017669678, |
|
"logits/rejected": -0.9521965980529785, |
|
"logps/chosen": -0.5518695712089539, |
|
"logps/rejected": -0.5250836610794067, |
|
"loss": 1.5786, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -1.379673957824707, |
|
"rewards/margins": -0.06696499139070511, |
|
"rewards/rejected": -1.312708854675293, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9746192893401016, |
|
"grad_norm": 6.093278408050537, |
|
"learning_rate": 8.457685285878091e-10, |
|
"logits/chosen": -0.9040694236755371, |
|
"logits/rejected": -1.0100067853927612, |
|
"logps/chosen": -0.6011037230491638, |
|
"logps/rejected": -0.9459134340286255, |
|
"loss": 1.4765, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.502759337425232, |
|
"rewards/margins": 0.8620242476463318, |
|
"rewards/rejected": -2.364783525466919, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.9767566123430403, |
|
"grad_norm": 7.644316673278809, |
|
"learning_rate": 6.990507047049676e-10, |
|
"logits/chosen": -1.1642239093780518, |
|
"logits/rejected": -1.327940583229065, |
|
"logps/chosen": -0.7490012645721436, |
|
"logps/rejected": -0.8202410340309143, |
|
"loss": 1.6319, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.8725032806396484, |
|
"rewards/margins": 0.1780991107225418, |
|
"rewards/rejected": -2.0506021976470947, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.9788939353459791, |
|
"grad_norm": 4.500396728515625, |
|
"learning_rate": 5.662812383859794e-10, |
|
"logits/chosen": -1.057512640953064, |
|
"logits/rejected": -1.0183889865875244, |
|
"logps/chosen": -0.5618507862091064, |
|
"logps/rejected": -0.7458251714706421, |
|
"loss": 1.5519, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4046270847320557, |
|
"rewards/margins": 0.4599360227584839, |
|
"rewards/rejected": -1.86456298828125, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.981031258348918, |
|
"grad_norm": 5.931386470794678, |
|
"learning_rate": 4.4746755806621126e-10, |
|
"logits/chosen": -1.0571941137313843, |
|
"logits/rejected": -1.1213597059249878, |
|
"logps/chosen": -0.642679750919342, |
|
"logps/rejected": -0.7841815948486328, |
|
"loss": 1.4134, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6066994667053223, |
|
"rewards/margins": 0.3537544906139374, |
|
"rewards/rejected": -1.9604538679122925, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.9831685813518568, |
|
"grad_norm": 9.15275764465332, |
|
"learning_rate": 3.4261631135654167e-10, |
|
"logits/chosen": -0.8693954348564148, |
|
"logits/rejected": -0.7475937008857727, |
|
"logps/chosen": -0.3651605546474457, |
|
"logps/rejected": -0.42595067620277405, |
|
"loss": 1.4887, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9129014015197754, |
|
"rewards/margins": 0.15197524428367615, |
|
"rewards/rejected": -1.064876675605774, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9853059043547956, |
|
"grad_norm": 11.78666877746582, |
|
"learning_rate": 2.5173336467135263e-10, |
|
"logits/chosen": -1.1248339414596558, |
|
"logits/rejected": -1.013832688331604, |
|
"logps/chosen": -0.48437923192977905, |
|
"logps/rejected": -0.5153346657752991, |
|
"loss": 1.487, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2109479904174805, |
|
"rewards/margins": 0.07738865166902542, |
|
"rewards/rejected": -1.2883366346359253, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.9874432273577345, |
|
"grad_norm": 5.332235336303711, |
|
"learning_rate": 1.7482380290034792e-10, |
|
"logits/chosen": -1.0759484767913818, |
|
"logits/rejected": -0.9958518147468567, |
|
"logps/chosen": -0.4139256775379181, |
|
"logps/rejected": -0.7975391745567322, |
|
"loss": 1.3972, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0348142385482788, |
|
"rewards/margins": 0.9590335488319397, |
|
"rewards/rejected": -1.9938479661941528, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.9895805503606733, |
|
"grad_norm": 5.443465709686279, |
|
"learning_rate": 1.1189192912416933e-10, |
|
"logits/chosen": -1.0934535264968872, |
|
"logits/rejected": -1.0068289041519165, |
|
"logps/chosen": -0.48808667063713074, |
|
"logps/rejected": -0.6034483313560486, |
|
"loss": 1.4738, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2202166318893433, |
|
"rewards/margins": 0.2884041368961334, |
|
"rewards/rejected": -1.5086207389831543, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.9917178733636121, |
|
"grad_norm": 5.87878942489624, |
|
"learning_rate": 6.294126437336733e-11, |
|
"logits/chosen": -1.0164133310317993, |
|
"logits/rejected": -0.9699276685714722, |
|
"logps/chosen": -0.4373021125793457, |
|
"logps/rejected": -0.5335453152656555, |
|
"loss": 1.4684, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.0932552814483643, |
|
"rewards/margins": 0.24060802161693573, |
|
"rewards/rejected": -1.3338632583618164, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.9938551963665508, |
|
"grad_norm": 4.464337348937988, |
|
"learning_rate": 2.797454743164174e-11, |
|
"logits/chosen": -1.2042040824890137, |
|
"logits/rejected": -1.055449366569519, |
|
"logps/chosen": -0.3999456763267517, |
|
"logps/rejected": -0.4777381420135498, |
|
"loss": 1.5844, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9998641610145569, |
|
"rewards/margins": 0.19448117911815643, |
|
"rewards/rejected": -1.1943453550338745, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9959925193694897, |
|
"grad_norm": 9.290578842163086, |
|
"learning_rate": 6.993734682547714e-12, |
|
"logits/chosen": -0.8978444933891296, |
|
"logits/rejected": -0.8331681489944458, |
|
"logps/chosen": -0.521608293056488, |
|
"logps/rejected": -0.5517727136611938, |
|
"loss": 1.5965, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.304020643234253, |
|
"rewards/margins": 0.07541122287511826, |
|
"rewards/rejected": -1.379431962966919, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.9981298423724285, |
|
"grad_norm": 12.642468452453613, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.8587179183959961, |
|
"logits/rejected": -0.8208239078521729, |
|
"logps/chosen": -0.4356221556663513, |
|
"logps/rejected": -0.39131855964660645, |
|
"loss": 1.4854, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0890554189682007, |
|
"rewards/margins": -0.11075909435749054, |
|
"rewards/rejected": -0.9782962799072266, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.9981298423724285, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.0036, |
|
"train_samples_per_second": 16407823.488, |
|
"train_steps_per_second": 127972.035 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 32, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|