|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.866492748260498, |
|
"logits/rejected": -1.87080979347229, |
|
"logps/chosen": -36.97657775878906, |
|
"logps/rejected": -33.65824890136719, |
|
"loss": 0.9236, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.015009618364274502, |
|
"rewards/margins": 0.02909613959491253, |
|
"rewards/rejected": -0.014086521230638027, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.9977840185165405, |
|
"logits/rejected": -2.000425100326538, |
|
"logps/chosen": -29.640512466430664, |
|
"logps/rejected": -29.048751831054688, |
|
"loss": 1.0528, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0008407801506109536, |
|
"rewards/margins": -0.0065057664178311825, |
|
"rewards/rejected": 0.007346546743065119, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.9210313558578491, |
|
"logits/rejected": -1.9183330535888672, |
|
"logps/chosen": -31.377187728881836, |
|
"logps/rejected": -33.214942932128906, |
|
"loss": 0.976, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.019451653584837914, |
|
"rewards/margins": 0.01769269071519375, |
|
"rewards/rejected": 0.001758962869644165, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.0173258781433105, |
|
"logits/rejected": -2.008592128753662, |
|
"logps/chosen": -32.55642318725586, |
|
"logps/rejected": -32.49436569213867, |
|
"loss": 1.0336, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.010191624984145164, |
|
"rewards/margins": 0.001039800001308322, |
|
"rewards/rejected": 0.009151825681328773, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.8626506328582764, |
|
"logits/rejected": -1.8518873453140259, |
|
"logps/chosen": -33.54867172241211, |
|
"logps/rejected": -35.45621109008789, |
|
"loss": 1.0318, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.004401391837745905, |
|
"rewards/margins": 0.006334079895168543, |
|
"rewards/rejected": -0.001932688057422638, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.940718650817871, |
|
"logits/rejected": -1.9426720142364502, |
|
"logps/chosen": -32.53395462036133, |
|
"logps/rejected": -33.20496368408203, |
|
"loss": 0.9445, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.03311632573604584, |
|
"rewards/margins": 0.04990752786397934, |
|
"rewards/rejected": -0.016791202127933502, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.0724740028381348, |
|
"logits/rejected": -2.077458381652832, |
|
"logps/chosen": -33.9911994934082, |
|
"logps/rejected": -36.61388397216797, |
|
"loss": 1.1022, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.001358801149763167, |
|
"rewards/margins": 0.021143654361367226, |
|
"rewards/rejected": -0.022502455860376358, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.936197280883789, |
|
"logits/rejected": -1.9393237829208374, |
|
"logps/chosen": -34.28167724609375, |
|
"logps/rejected": -34.63819885253906, |
|
"loss": 0.8186, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.07306724786758423, |
|
"rewards/margins": 0.09596274793148041, |
|
"rewards/rejected": -0.02289549633860588, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.9451515674591064, |
|
"logits/rejected": -1.949669599533081, |
|
"logps/chosen": -32.39059066772461, |
|
"logps/rejected": -32.346839904785156, |
|
"loss": 1.0017, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.03841521218419075, |
|
"rewards/margins": 0.03238191828131676, |
|
"rewards/rejected": 0.006033292505890131, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.042168617248535, |
|
"logits/rejected": -2.040160655975342, |
|
"logps/chosen": -32.12788772583008, |
|
"logps/rejected": -31.280298233032227, |
|
"loss": 0.8581, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.05647433549165726, |
|
"rewards/margins": 0.06351961940526962, |
|
"rewards/rejected": -0.007045289967209101, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.237440824508667, |
|
"eval_logits/rejected": -2.232595682144165, |
|
"eval_logps/chosen": -34.017024993896484, |
|
"eval_logps/rejected": -37.50282287597656, |
|
"eval_loss": 1.0730067491531372, |
|
"eval_rewards/accuracies": 0.5199335813522339, |
|
"eval_rewards/chosen": 0.00876238290220499, |
|
"eval_rewards/margins": 0.001864485559053719, |
|
"eval_rewards/rejected": 0.0068978965282440186, |
|
"eval_runtime": 146.0042, |
|
"eval_samples_per_second": 2.349, |
|
"eval_steps_per_second": 0.295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.9984264373779297, |
|
"logits/rejected": -1.9960410594940186, |
|
"logps/chosen": -33.10862350463867, |
|
"logps/rejected": -34.00126266479492, |
|
"loss": 1.1665, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0675957053899765, |
|
"rewards/margins": 0.05354113504290581, |
|
"rewards/rejected": 0.014054578728973866, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.0099892616271973, |
|
"logits/rejected": -2.001642942428589, |
|
"logps/chosen": -32.312686920166016, |
|
"logps/rejected": -32.10304641723633, |
|
"loss": 0.9851, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.06610965728759766, |
|
"rewards/margins": 0.03688037022948265, |
|
"rewards/rejected": 0.029229288920760155, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.0365984439849854, |
|
"logits/rejected": -2.0286123752593994, |
|
"logps/chosen": -30.3278865814209, |
|
"logps/rejected": -32.04685592651367, |
|
"loss": 0.9819, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.07391555607318878, |
|
"rewards/margins": 0.07084138691425323, |
|
"rewards/rejected": 0.0030741647351533175, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.9668314456939697, |
|
"logits/rejected": -1.9770755767822266, |
|
"logps/chosen": -31.2120304107666, |
|
"logps/rejected": -32.57902526855469, |
|
"loss": 0.8178, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.10381648689508438, |
|
"rewards/margins": 0.12588787078857422, |
|
"rewards/rejected": -0.02207140065729618, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.8799388408660889, |
|
"logits/rejected": -1.881087064743042, |
|
"logps/chosen": -33.97100830078125, |
|
"logps/rejected": -34.84876251220703, |
|
"loss": 0.8351, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.11084038019180298, |
|
"rewards/margins": 0.15659382939338684, |
|
"rewards/rejected": -0.04575346037745476, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.9326753616333008, |
|
"logits/rejected": -1.9292488098144531, |
|
"logps/chosen": -36.040306091308594, |
|
"logps/rejected": -32.739051818847656, |
|
"loss": 0.8539, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0656842365860939, |
|
"rewards/margins": 0.07383431494235992, |
|
"rewards/rejected": -0.008150083012878895, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.0338892936706543, |
|
"logits/rejected": -2.026510238647461, |
|
"logps/chosen": -33.518821716308594, |
|
"logps/rejected": -31.37355613708496, |
|
"loss": 0.7141, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.13951851427555084, |
|
"rewards/margins": 0.15037165582180023, |
|
"rewards/rejected": -0.010853144340217113, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.039998769760132, |
|
"logits/rejected": -2.045238733291626, |
|
"logps/chosen": -32.28400421142578, |
|
"logps/rejected": -32.450523376464844, |
|
"loss": 0.7652, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.13039177656173706, |
|
"rewards/margins": 0.11308407783508301, |
|
"rewards/rejected": 0.017307698726654053, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.0408711433410645, |
|
"logits/rejected": -2.0380892753601074, |
|
"logps/chosen": -31.287479400634766, |
|
"logps/rejected": -31.33124351501465, |
|
"loss": 0.8251, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.10073776543140411, |
|
"rewards/margins": 0.10725078731775284, |
|
"rewards/rejected": -0.0065130265429615974, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.9110311269760132, |
|
"logits/rejected": -1.915704369544983, |
|
"logps/chosen": -31.336145401000977, |
|
"logps/rejected": -32.791221618652344, |
|
"loss": 0.899, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.13506832718849182, |
|
"rewards/margins": 0.13521215319633484, |
|
"rewards/rejected": -0.0001438349427189678, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.2354750633239746, |
|
"eval_logits/rejected": -2.2306265830993652, |
|
"eval_logps/chosen": -34.03763198852539, |
|
"eval_logps/rejected": -37.534156799316406, |
|
"eval_loss": 1.0715795755386353, |
|
"eval_rewards/accuracies": 0.5070598125457764, |
|
"eval_rewards/chosen": -0.0015398082323372364, |
|
"eval_rewards/margins": 0.007228231523185968, |
|
"eval_rewards/rejected": -0.008768039755523205, |
|
"eval_runtime": 145.8306, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.0229578018188477, |
|
"logits/rejected": -2.033618450164795, |
|
"logps/chosen": -31.776050567626953, |
|
"logps/rejected": -33.90400314331055, |
|
"loss": 0.7465, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.09762789309024811, |
|
"rewards/margins": 0.11827856302261353, |
|
"rewards/rejected": -0.020650675520300865, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.9135267734527588, |
|
"logits/rejected": -1.9282987117767334, |
|
"logps/chosen": -29.896175384521484, |
|
"logps/rejected": -31.5633487701416, |
|
"loss": 0.75, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.10768184810876846, |
|
"rewards/margins": 0.11026783287525177, |
|
"rewards/rejected": -0.002585983369499445, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9711973667144775, |
|
"logits/rejected": -1.9751732349395752, |
|
"logps/chosen": -33.15174102783203, |
|
"logps/rejected": -31.605077743530273, |
|
"loss": 0.7191, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.1328631192445755, |
|
"rewards/margins": 0.1607515811920166, |
|
"rewards/rejected": -0.027888456359505653, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.969957709312439, |
|
"logits/rejected": -1.9480478763580322, |
|
"logps/chosen": -33.9122200012207, |
|
"logps/rejected": -35.02121353149414, |
|
"loss": 0.6955, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.12076646089553833, |
|
"rewards/margins": 0.15597540140151978, |
|
"rewards/rejected": -0.03520893678069115, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.0103070735931396, |
|
"logits/rejected": -2.006990909576416, |
|
"logps/chosen": -32.72673797607422, |
|
"logps/rejected": -36.23841094970703, |
|
"loss": 0.8178, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.09916610270738602, |
|
"rewards/margins": 0.10590960830450058, |
|
"rewards/rejected": -0.006743511650711298, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.8776795864105225, |
|
"logits/rejected": -1.875245451927185, |
|
"logps/chosen": -34.003971099853516, |
|
"logps/rejected": -35.510765075683594, |
|
"loss": 0.889, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.09221391379833221, |
|
"rewards/margins": 0.09528535604476929, |
|
"rewards/rejected": -0.0030714483000338078, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.863521933555603, |
|
"logits/rejected": -1.8610206842422485, |
|
"logps/chosen": -34.20132827758789, |
|
"logps/rejected": -31.76943016052246, |
|
"loss": 0.8268, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.09813599288463593, |
|
"rewards/margins": 0.09718601405620575, |
|
"rewards/rejected": 0.000949984765611589, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.9676616191864014, |
|
"logits/rejected": -1.957082748413086, |
|
"logps/chosen": -35.020606994628906, |
|
"logps/rejected": -31.831247329711914, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.168013796210289, |
|
"rewards/margins": 0.16314153373241425, |
|
"rewards/rejected": 0.00487226527184248, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.0636610984802246, |
|
"logits/rejected": -2.0486764907836914, |
|
"logps/chosen": -30.683029174804688, |
|
"logps/rejected": -32.61827850341797, |
|
"loss": 0.894, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.12054909765720367, |
|
"rewards/margins": 0.10377003997564316, |
|
"rewards/rejected": 0.016779040917754173, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.9357779026031494, |
|
"logits/rejected": -1.9332023859024048, |
|
"logps/chosen": -32.54056930541992, |
|
"logps/rejected": -30.850332260131836, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.1932690441608429, |
|
"rewards/margins": 0.21003055572509766, |
|
"rewards/rejected": -0.01676151715219021, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.233783006668091, |
|
"eval_logits/rejected": -2.228926658630371, |
|
"eval_logps/chosen": -34.01414108276367, |
|
"eval_logps/rejected": -37.53648376464844, |
|
"eval_loss": 1.0314662456512451, |
|
"eval_rewards/accuracies": 0.560215950012207, |
|
"eval_rewards/chosen": 0.010205330327153206, |
|
"eval_rewards/margins": 0.020134516060352325, |
|
"eval_rewards/rejected": -0.009929186664521694, |
|
"eval_runtime": 145.6316, |
|
"eval_samples_per_second": 2.355, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": -1.9184128046035767, |
|
"logits/rejected": -1.9151279926300049, |
|
"logps/chosen": -31.325061798095703, |
|
"logps/rejected": -33.77220916748047, |
|
"loss": 0.7283, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.13914386928081512, |
|
"rewards/margins": 0.15417365729808807, |
|
"rewards/rejected": -0.01502978801727295, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": -1.968726396560669, |
|
"logits/rejected": -1.9564485549926758, |
|
"logps/chosen": -34.366207122802734, |
|
"logps/rejected": -33.61689376831055, |
|
"loss": 0.6804, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.11493507772684097, |
|
"rewards/margins": 0.1539594680070877, |
|
"rewards/rejected": -0.03902440145611763, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": -2.0041136741638184, |
|
"logits/rejected": -2.002657651901245, |
|
"logps/chosen": -33.25464630126953, |
|
"logps/rejected": -32.49077606201172, |
|
"loss": 0.7724, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.11557211726903915, |
|
"rewards/margins": 0.12116815894842148, |
|
"rewards/rejected": -0.005596047732979059, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": -2.0917208194732666, |
|
"logits/rejected": -2.0759525299072266, |
|
"logps/chosen": -33.83209991455078, |
|
"logps/rejected": -33.08992004394531, |
|
"loss": 0.7606, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.17364642024040222, |
|
"rewards/margins": 0.15958845615386963, |
|
"rewards/rejected": 0.014057991094887257, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": -1.9635206460952759, |
|
"logits/rejected": -1.9625988006591797, |
|
"logps/chosen": -32.91681671142578, |
|
"logps/rejected": -32.46485137939453, |
|
"loss": 0.7913, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.18264031410217285, |
|
"rewards/margins": 0.182492196559906, |
|
"rewards/rejected": 0.00014809667482040823, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": -1.9189882278442383, |
|
"logits/rejected": -1.9293220043182373, |
|
"logps/chosen": -31.9406681060791, |
|
"logps/rejected": -35.306640625, |
|
"loss": 0.7574, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1320098638534546, |
|
"rewards/margins": 0.1409510374069214, |
|
"rewards/rejected": -0.008941170759499073, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": -2.0582680702209473, |
|
"logits/rejected": -2.051753520965576, |
|
"logps/chosen": -33.39839553833008, |
|
"logps/rejected": -29.18343734741211, |
|
"loss": 0.7663, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.12678876519203186, |
|
"rewards/margins": 0.1052960604429245, |
|
"rewards/rejected": 0.02149270847439766, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": -1.9175922870635986, |
|
"logits/rejected": -1.9197555780410767, |
|
"logps/chosen": -33.878089904785156, |
|
"logps/rejected": -30.871530532836914, |
|
"loss": 0.6999, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.18047982454299927, |
|
"rewards/margins": 0.17993128299713135, |
|
"rewards/rejected": 0.0005485474830493331, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.8446982934877469, |
|
"train_runtime": 3252.3399, |
|
"train_samples_per_second": 0.947, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|