|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.973977695167286, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2144.0, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -2.582127094268799, |
|
"logits/rejected": -2.6067237854003906, |
|
"logps/chosen": -1126.8817138671875, |
|
"logps/rejected": -1078.447509765625, |
|
"loss": 0.6576, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.08093704283237457, |
|
"rewards/margins": 0.09326125681400299, |
|
"rewards/rejected": -0.012324221432209015, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 504.0, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"logits/chosen": -2.340122699737549, |
|
"logits/rejected": -2.652778387069702, |
|
"logps/chosen": -1099.25048828125, |
|
"logps/rejected": -1232.9901123046875, |
|
"loss": 0.2009, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.4397642612457275, |
|
"rewards/margins": 2.55800461769104, |
|
"rewards/rejected": -1.1182401180267334, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 36.0, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -2.513775110244751, |
|
"logits/rejected": -2.662311315536499, |
|
"logps/chosen": -990.4671020507812, |
|
"logps/rejected": -1043.4093017578125, |
|
"loss": 0.0709, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.9243946075439453, |
|
"rewards/margins": 6.777369499206543, |
|
"rewards/rejected": -3.8529744148254395, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.703125, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -2.4629788398742676, |
|
"logits/rejected": -2.802347183227539, |
|
"logps/chosen": -1008.41064453125, |
|
"logps/rejected": -1297.322265625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.369072914123535, |
|
"rewards/margins": 15.539718627929688, |
|
"rewards/rejected": -7.170646667480469, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.000362396240234375, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.5417537689208984, |
|
"logits/rejected": -2.6880087852478027, |
|
"logps/chosen": -926.39892578125, |
|
"logps/rejected": -1187.432861328125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.67939281463623, |
|
"rewards/margins": 24.91078758239746, |
|
"rewards/rejected": -14.23139476776123, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -2.577080249786377, |
|
"logits/rejected": -2.733059883117676, |
|
"logps/chosen": -908.2595825195312, |
|
"logps/rejected": -1309.6556396484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.903298377990723, |
|
"rewards/margins": 34.511558532714844, |
|
"rewards/rejected": -25.608257293701172, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 4.0978193283081055e-07, |
|
"learning_rate": 4.995770395678171e-06, |
|
"logits/chosen": -2.651310920715332, |
|
"logits/rejected": -2.6355812549591064, |
|
"logps/chosen": -873.0777587890625, |
|
"logps/rejected": -1328.905517578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.599026679992676, |
|
"rewards/margins": 44.44927215576172, |
|
"rewards/rejected": -32.850250244140625, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 4.983095894354858e-06, |
|
"logits/chosen": -2.5453743934631348, |
|
"logits/rejected": -2.668313503265381, |
|
"logps/chosen": -1026.492919921875, |
|
"logps/rejected": -1623.9166259765625, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.174985885620117, |
|
"rewards/margins": 48.78560256958008, |
|
"rewards/rejected": -36.610618591308594, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.0002899169921875, |
|
"learning_rate": 4.962019382530521e-06, |
|
"logits/chosen": -2.688816785812378, |
|
"logits/rejected": -2.8246970176696777, |
|
"logps/chosen": -866.5418090820312, |
|
"logps/rejected": -1398.448974609375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.687978744506836, |
|
"rewards/margins": 49.26004409790039, |
|
"rewards/rejected": -33.57206344604492, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.448803909122944e-09, |
|
"learning_rate": 4.93261217644956e-06, |
|
"logits/chosen": -2.555048704147339, |
|
"logits/rejected": -2.771728515625, |
|
"logps/chosen": -826.4230346679688, |
|
"logps/rejected": -1372.0035400390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 14.595741271972656, |
|
"rewards/margins": 46.010047912597656, |
|
"rewards/rejected": -31.414306640625, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 14.5, |
|
"learning_rate": 4.894973780788722e-06, |
|
"logits/chosen": -2.570012331008911, |
|
"logits/rejected": -2.613524913787842, |
|
"logps/chosen": -867.3160400390625, |
|
"logps/rejected": -1359.3463134765625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.941835403442383, |
|
"rewards/margins": 41.53192901611328, |
|
"rewards/rejected": -24.590087890625, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.420778274536133e-06, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -2.6153066158294678, |
|
"logits/rejected": -2.857034206390381, |
|
"logps/chosen": -774.2218627929688, |
|
"logps/rejected": -1181.09619140625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 21.74435043334961, |
|
"rewards/margins": 31.659595489501953, |
|
"rewards/rejected": -9.915243148803711, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.00081634521484375, |
|
"learning_rate": 4.7955402672006855e-06, |
|
"logits/chosen": -2.738781690597534, |
|
"logits/rejected": -2.8861005306243896, |
|
"logps/chosen": -704.155517578125, |
|
"logps/rejected": -1052.4534912109375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.42382049560547, |
|
"rewards/margins": 30.588428497314453, |
|
"rewards/rejected": -5.164605617523193, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.555308699607849e-07, |
|
"learning_rate": 4.734081600808531e-06, |
|
"logits/chosen": -2.744210720062256, |
|
"logits/rejected": -2.9681973457336426, |
|
"logps/chosen": -755.2449340820312, |
|
"logps/rejected": -1124.8590087890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 26.51047706604004, |
|
"rewards/margins": 34.19602966308594, |
|
"rewards/rejected": -7.685556888580322, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.0208740234375, |
|
"learning_rate": 4.665063509461098e-06, |
|
"logits/chosen": -2.6066527366638184, |
|
"logits/rejected": -2.763363838195801, |
|
"logps/chosen": -718.3406982421875, |
|
"logps/rejected": -1171.5050048828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 23.655757904052734, |
|
"rewards/margins": 39.56992721557617, |
|
"rewards/rejected": -15.914169311523438, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 2.0605511963367462e-08, |
|
"learning_rate": 4.588719528532342e-06, |
|
"logits/chosen": -2.6591174602508545, |
|
"logits/rejected": -2.818765640258789, |
|
"logps/chosen": -713.7478637695312, |
|
"logps/rejected": -1214.9329833984375, |
|
"loss": 0.0178, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 25.415401458740234, |
|
"rewards/margins": 46.337589263916016, |
|
"rewards/rejected": -20.922182083129883, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 8.866190910339355e-07, |
|
"learning_rate": 4.50530798188761e-06, |
|
"logits/chosen": -2.592603921890259, |
|
"logits/rejected": -2.5666003227233887, |
|
"logps/chosen": -672.0972900390625, |
|
"logps/rejected": -1153.8974609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 21.85224151611328, |
|
"rewards/margins": 44.60143280029297, |
|
"rewards/rejected": -22.749195098876953, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.0032501220703125, |
|
"learning_rate": 4.415111107797445e-06, |
|
"logits/chosen": -2.5358242988586426, |
|
"logits/rejected": -2.7607996463775635, |
|
"logps/chosen": -837.4728393554688, |
|
"logps/rejected": -1421.790771484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 21.30091667175293, |
|
"rewards/margins": 51.24127197265625, |
|
"rewards/rejected": -29.94034767150879, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.00067901611328125, |
|
"learning_rate": 4.318434103932622e-06, |
|
"logits/chosen": -2.5782151222229004, |
|
"logits/rejected": -2.708712339401245, |
|
"logps/chosen": -784.4146728515625, |
|
"logps/rejected": -1424.0408935546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 24.907550811767578, |
|
"rewards/margins": 59.107765197753906, |
|
"rewards/rejected": -34.20021438598633, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.0652001947164536e-08, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -2.598839044570923, |
|
"logits/rejected": -2.6679890155792236, |
|
"logps/chosen": -733.5455322265625, |
|
"logps/rejected": -1301.297119140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 27.65069007873535, |
|
"rewards/margins": 55.6466178894043, |
|
"rewards/rejected": -27.995929718017578, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 3.073364496231079e-08, |
|
"learning_rate": 4.106969024216348e-06, |
|
"logits/chosen": -2.4979000091552734, |
|
"logits/rejected": -2.702669620513916, |
|
"logps/chosen": -800.4815673828125, |
|
"logps/rejected": -1545.734130859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.3299503326416, |
|
"rewards/margins": 66.7006607055664, |
|
"rewards/rejected": -38.37070846557617, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 1.1980533599853516e-05, |
|
"learning_rate": 3.992896479256966e-06, |
|
"logits/chosen": -2.5696396827697754, |
|
"logits/rejected": -2.6518044471740723, |
|
"logps/chosen": -811.0010986328125, |
|
"logps/rejected": -1455.451904296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 24.791704177856445, |
|
"rewards/margins": 59.291259765625, |
|
"rewards/rejected": -34.49955749511719, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.178705133497715e-09, |
|
"learning_rate": 3.8737724451770155e-06, |
|
"logits/chosen": -2.480198383331299, |
|
"logits/rejected": -2.7152700424194336, |
|
"logps/chosen": -748.87255859375, |
|
"logps/rejected": -1521.2923583984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 24.639190673828125, |
|
"rewards/margins": 62.03853225708008, |
|
"rewards/rejected": -37.39933395385742, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 1.7508864402770996e-07, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -2.5954699516296387, |
|
"logits/rejected": -2.7862911224365234, |
|
"logps/chosen": -828.2017822265625, |
|
"logps/rejected": -1500.0533447265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 24.49906349182129, |
|
"rewards/margins": 57.625587463378906, |
|
"rewards/rejected": -33.12652587890625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 2.9921531677246094e-05, |
|
"learning_rate": 3.621997950501156e-06, |
|
"logits/chosen": -2.5512304306030273, |
|
"logits/rejected": -2.5572822093963623, |
|
"logps/chosen": -721.7669067382812, |
|
"logps/rejected": -1303.0982666015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 24.202795028686523, |
|
"rewards/margins": 57.88828659057617, |
|
"rewards/rejected": -33.68549346923828, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 1.0477378964424133e-09, |
|
"learning_rate": 3.4901994150978926e-06, |
|
"logits/chosen": -2.5931153297424316, |
|
"logits/rejected": -2.693915843963623, |
|
"logps/chosen": -727.4300537109375, |
|
"logps/rejected": -1358.4566650390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 24.488115310668945, |
|
"rewards/margins": 57.87506866455078, |
|
"rewards/rejected": -33.38694763183594, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.0002765655517578125, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": -2.6825664043426514, |
|
"logits/rejected": -2.786104679107666, |
|
"logps/chosen": -796.1810302734375, |
|
"logps/rejected": -1376.611328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.74446678161621, |
|
"rewards/margins": 58.13447189331055, |
|
"rewards/rejected": -32.39000701904297, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 5.030632019042969e-05, |
|
"learning_rate": 3.217008081777726e-06, |
|
"logits/chosen": -2.5538275241851807, |
|
"logits/rejected": -2.7200071811676025, |
|
"logps/chosen": -782.6590576171875, |
|
"logps/rejected": -1443.415283203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 27.598276138305664, |
|
"rewards/margins": 58.656410217285156, |
|
"rewards/rejected": -31.05812644958496, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 3.6209821701049805e-06, |
|
"learning_rate": 3.0765396768561005e-06, |
|
"logits/chosen": -2.6117515563964844, |
|
"logits/rejected": -2.74174165725708, |
|
"logps/chosen": -670.0432739257812, |
|
"logps/rejected": -1294.08642578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 26.823715209960938, |
|
"rewards/margins": 60.58592987060547, |
|
"rewards/rejected": -33.76221466064453, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.1932570487260818e-09, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits/chosen": -2.6705422401428223, |
|
"logits/rejected": -2.7561261653900146, |
|
"logps/chosen": -830.1842041015625, |
|
"logps/rejected": -1481.5352783203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 24.070798873901367, |
|
"rewards/margins": 62.027305603027344, |
|
"rewards/rejected": -37.956504821777344, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 7.927417755126953e-06, |
|
"learning_rate": 2.7902322853130758e-06, |
|
"logits/chosen": -2.5545754432678223, |
|
"logits/rejected": -2.740570068359375, |
|
"logps/chosen": -853.0714111328125, |
|
"logps/rejected": -1508.432373046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.61324119567871, |
|
"rewards/margins": 58.5531120300293, |
|
"rewards/rejected": -32.93987274169922, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 1.5178830414797062e-16, |
|
"learning_rate": 2.6453620722761897e-06, |
|
"logits/chosen": -2.5684103965759277, |
|
"logits/rejected": -2.6356844902038574, |
|
"logps/chosen": -695.0123291015625, |
|
"logps/rejected": -1333.6201171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 26.32758140563965, |
|
"rewards/margins": 56.49725341796875, |
|
"rewards/rejected": -30.169677734375, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 4.0512531995773315e-08, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -2.6028122901916504, |
|
"logits/rejected": -2.621495246887207, |
|
"logps/chosen": -689.859130859375, |
|
"logps/rejected": -1285.8802490234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 26.484960556030273, |
|
"rewards/margins": 60.581703186035156, |
|
"rewards/rejected": -34.09674835205078, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 5.811452865600586e-07, |
|
"learning_rate": 2.3546379277238107e-06, |
|
"logits/chosen": -2.5608181953430176, |
|
"logits/rejected": -2.633148431777954, |
|
"logps/chosen": -689.5059204101562, |
|
"logps/rejected": -1259.192138671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 24.118501663208008, |
|
"rewards/margins": 56.985939025878906, |
|
"rewards/rejected": -32.86743927001953, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 2.637505531311035e-06, |
|
"learning_rate": 2.2097677146869242e-06, |
|
"logits/chosen": -2.5463547706604004, |
|
"logits/rejected": -2.6621968746185303, |
|
"logps/chosen": -845.2557373046875, |
|
"logps/rejected": -1509.9571533203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.753326416015625, |
|
"rewards/margins": 60.85497283935547, |
|
"rewards/rejected": -40.101646423339844, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 2.8033131371785203e-15, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -2.5142176151275635, |
|
"logits/rejected": -2.800828456878662, |
|
"logps/chosen": -768.0780029296875, |
|
"logps/rejected": -1504.419677734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 26.789424896240234, |
|
"rewards/margins": 65.28126525878906, |
|
"rewards/rejected": -38.49182891845703, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 7.450580596923828e-06, |
|
"learning_rate": 1.9234603231439e-06, |
|
"logits/chosen": -2.5999841690063477, |
|
"logits/rejected": -2.785568952560425, |
|
"logps/chosen": -742.1795654296875, |
|
"logps/rejected": -1414.2978515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 26.651920318603516, |
|
"rewards/margins": 61.58074951171875, |
|
"rewards/rejected": -34.92882537841797, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 3.841705620288849e-08, |
|
"learning_rate": 1.7829919182222752e-06, |
|
"logits/chosen": -2.735452175140381, |
|
"logits/rejected": -2.7289414405822754, |
|
"logps/chosen": -814.5637817382812, |
|
"logps/rejected": -1430.7174072265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.13421630859375, |
|
"rewards/margins": 61.936744689941406, |
|
"rewards/rejected": -33.802528381347656, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 6.892264536872972e-13, |
|
"learning_rate": 1.6449496416858285e-06, |
|
"logits/chosen": -2.515446186065674, |
|
"logits/rejected": -2.742518663406372, |
|
"logps/chosen": -745.94970703125, |
|
"logps/rejected": -1403.6351318359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 26.278427124023438, |
|
"rewards/margins": 64.49276733398438, |
|
"rewards/rejected": -38.21433639526367, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 1.1757947504520416e-08, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -2.478830575942993, |
|
"logits/rejected": -2.7162294387817383, |
|
"logps/chosen": -780.1927490234375, |
|
"logps/rejected": -1506.2659912109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 26.495025634765625, |
|
"rewards/margins": 63.679115295410156, |
|
"rewards/rejected": -37.184085845947266, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 1.519918441772461e-05, |
|
"learning_rate": 1.3780020494988447e-06, |
|
"logits/chosen": -2.5534565448760986, |
|
"logits/rejected": -2.703151226043701, |
|
"logps/chosen": -684.3609619140625, |
|
"logps/rejected": -1303.3043212890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 23.173385620117188, |
|
"rewards/margins": 54.865631103515625, |
|
"rewards/rejected": -31.692245483398438, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.0001277923583984375, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"logits/chosen": -2.566140651702881, |
|
"logits/rejected": -2.703296661376953, |
|
"logps/chosen": -764.0347290039062, |
|
"logps/rejected": -1441.303955078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.557802200317383, |
|
"rewards/margins": 61.8077392578125, |
|
"rewards/rejected": -36.249942779541016, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 3.110617399215698e-07, |
|
"learning_rate": 1.1262275548229852e-06, |
|
"logits/chosen": -2.577171564102173, |
|
"logits/rejected": -2.6875264644622803, |
|
"logps/chosen": -733.8834228515625, |
|
"logps/rejected": -1341.32763671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 23.420785903930664, |
|
"rewards/margins": 59.020118713378906, |
|
"rewards/rejected": -35.59933090209961, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 1.8917489796876907e-09, |
|
"learning_rate": 1.0071035207430352e-06, |
|
"logits/chosen": -2.5544378757476807, |
|
"logits/rejected": -2.641348361968994, |
|
"logps/chosen": -771.989013671875, |
|
"logps/rejected": -1348.832275390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 24.312747955322266, |
|
"rewards/margins": 58.93672561645508, |
|
"rewards/rejected": -34.62397003173828, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 8.221832104027271e-10, |
|
"learning_rate": 8.930309757836517e-07, |
|
"logits/chosen": -2.6528477668762207, |
|
"logits/rejected": -2.757702350616455, |
|
"logps/chosen": -788.48828125, |
|
"logps/rejected": -1497.994140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 24.537336349487305, |
|
"rewards/margins": 68.33438110351562, |
|
"rewards/rejected": -43.79704284667969, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.000446319580078125, |
|
"learning_rate": 7.843959053281663e-07, |
|
"logits/chosen": -2.631401777267456, |
|
"logits/rejected": -2.7087044715881348, |
|
"logps/chosen": -811.6467895507812, |
|
"logps/rejected": -1447.6087646484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.546356201171875, |
|
"rewards/margins": 62.1228141784668, |
|
"rewards/rejected": -36.576454162597656, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.001556396484375, |
|
"learning_rate": 6.815658960673782e-07, |
|
"logits/chosen": -2.496450901031494, |
|
"logits/rejected": -2.640903949737549, |
|
"logps/chosen": -756.075439453125, |
|
"logps/rejected": -1518.998046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 22.391010284423828, |
|
"rewards/margins": 63.806968688964844, |
|
"rewards/rejected": -41.41596603393555, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 3.361701965332031e-05, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -2.5900871753692627, |
|
"logits/rejected": -2.6478281021118164, |
|
"logps/chosen": -654.4774780273438, |
|
"logps/rejected": -1188.7408447265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.036846160888672, |
|
"rewards/margins": 53.53025436401367, |
|
"rewards/rejected": -28.493408203125, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 2.1191226551309228e-10, |
|
"learning_rate": 4.946920181123904e-07, |
|
"logits/chosen": -2.509355068206787, |
|
"logits/rejected": -2.687610149383545, |
|
"logps/chosen": -787.9483642578125, |
|
"logps/rejected": -1455.489013671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 23.417675018310547, |
|
"rewards/margins": 57.781227111816406, |
|
"rewards/rejected": -34.363548278808594, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 1.210719347000122e-08, |
|
"learning_rate": 4.1128047146765936e-07, |
|
"logits/chosen": -2.5654799938201904, |
|
"logits/rejected": -2.7495007514953613, |
|
"logps/chosen": -880.7611083984375, |
|
"logps/rejected": -1492.9364013671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 26.42355728149414, |
|
"rewards/margins": 58.61255645751953, |
|
"rewards/rejected": -32.188995361328125, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 3.552713678800501e-12, |
|
"learning_rate": 3.3493649053890325e-07, |
|
"logits/chosen": -2.5684847831726074, |
|
"logits/rejected": -2.7665956020355225, |
|
"logps/chosen": -717.5662231445312, |
|
"logps/rejected": -1405.523681640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.795669555664062, |
|
"rewards/margins": 55.92620849609375, |
|
"rewards/rejected": -30.130529403686523, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 4.1470733097570545e-18, |
|
"learning_rate": 2.6591839919146963e-07, |
|
"logits/chosen": -2.5511410236358643, |
|
"logits/rejected": -2.642638921737671, |
|
"logps/chosen": -770.4322509765625, |
|
"logps/rejected": -1513.5374755859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 26.562414169311523, |
|
"rewards/margins": 67.80595397949219, |
|
"rewards/rejected": -41.24353790283203, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 2.775341272354126e-07, |
|
"learning_rate": 2.044597327993153e-07, |
|
"logits/chosen": -2.5555355548858643, |
|
"logits/rejected": -2.715888738632202, |
|
"logps/chosen": -807.5404052734375, |
|
"logps/rejected": -1584.75927734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.03497886657715, |
|
"rewards/margins": 64.63668823242188, |
|
"rewards/rejected": -39.601707458496094, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 4.007461029686965e-12, |
|
"learning_rate": 1.507684480352292e-07, |
|
"logits/chosen": -2.597036123275757, |
|
"logits/rejected": -2.643857479095459, |
|
"logps/chosen": -831.2688598632812, |
|
"logps/rejected": -1632.8023681640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 24.538908004760742, |
|
"rewards/margins": 74.41605377197266, |
|
"rewards/rejected": -49.87714385986328, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 4.172325134277344e-05, |
|
"learning_rate": 1.0502621921127776e-07, |
|
"logits/chosen": -2.5798580646514893, |
|
"logits/rejected": -2.7736194133758545, |
|
"logps/chosen": -767.0318603515625, |
|
"logps/rejected": -1508.767822265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.648067474365234, |
|
"rewards/margins": 66.6827163696289, |
|
"rewards/rejected": -41.034645080566406, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 1.9818544387817383e-06, |
|
"learning_rate": 6.738782355044048e-08, |
|
"logits/chosen": -2.5585246086120605, |
|
"logits/rejected": -2.666764736175537, |
|
"logps/chosen": -772.8341064453125, |
|
"logps/rejected": -1421.703857421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 27.29400634765625, |
|
"rewards/margins": 63.170677185058594, |
|
"rewards/rejected": -35.87666702270508, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 1.2631062418222427e-08, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": -2.512103319168091, |
|
"logits/rejected": -2.690737009048462, |
|
"logps/chosen": -810.3479614257812, |
|
"logps/rejected": -1493.773193359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 22.784202575683594, |
|
"rewards/margins": 61.64385223388672, |
|
"rewards/rejected": -38.859649658203125, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 8.009374141693115e-08, |
|
"learning_rate": 1.6904105645142443e-08, |
|
"logits/chosen": -2.618516445159912, |
|
"logits/rejected": -2.7000651359558105, |
|
"logps/chosen": -769.9617309570312, |
|
"logps/rejected": -1351.812255859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.189565658569336, |
|
"rewards/margins": 57.4492073059082, |
|
"rewards/rejected": -32.259639739990234, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 9.324138683375338e-17, |
|
"learning_rate": 4.229604321829561e-09, |
|
"logits/chosen": -2.5340938568115234, |
|
"logits/rejected": -2.880659818649292, |
|
"logps/chosen": -801.6094970703125, |
|
"logps/rejected": -1518.06201171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 26.99312400817871, |
|
"rewards/margins": 64.31195068359375, |
|
"rewards/rejected": -37.3188362121582, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 3.241002559661865e-07, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.616868019104004, |
|
"logits/rejected": -2.702866792678833, |
|
"logps/chosen": -725.1414184570312, |
|
"logps/rejected": -1320.96435546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.64286231994629, |
|
"rewards/margins": 60.58977508544922, |
|
"rewards/rejected": -34.94690704345703, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|