|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-08, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2820512820512818e-07, |
|
"logits/chosen": -1.866065502166748, |
|
"logits/rejected": -1.8703795671463013, |
|
"logps/chosen": -36.988380432128906, |
|
"logps/rejected": -33.66728210449219, |
|
"loss": 0.9945, |
|
"rewards/accuracies": 0.5694444179534912, |
|
"rewards/chosen": 0.0018219809280708432, |
|
"rewards/margins": 0.00554280448704958, |
|
"rewards/rejected": -0.0037208227440714836, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5641025641025636e-07, |
|
"logits/chosen": -1.997332215309143, |
|
"logits/rejected": -1.999983549118042, |
|
"logps/chosen": -29.625896453857422, |
|
"logps/rejected": -29.035802841186523, |
|
"loss": 1.0011, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0016297042602673173, |
|
"rewards/margins": -0.0011343134101480246, |
|
"rewards/rejected": 0.00276401755400002, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -1.9199495315551758, |
|
"logits/rejected": -1.917249321937561, |
|
"logps/chosen": -31.421478271484375, |
|
"logps/rejected": -33.2115364074707, |
|
"loss": 1.0012, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0005385443801060319, |
|
"rewards/margins": -0.0012309944722801447, |
|
"rewards/rejected": 0.0006924502667970955, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438433e-07, |
|
"logits/chosen": -2.0169284343719482, |
|
"logits/rejected": -2.008178949356079, |
|
"logps/chosen": -32.59435272216797, |
|
"logps/rejected": -32.49193572998047, |
|
"loss": 1.0038, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0017543068388476968, |
|
"rewards/margins": -0.0038279418367892504, |
|
"rewards/rejected": 0.0020736351143568754, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542186e-07, |
|
"logits/chosen": -1.86457097530365, |
|
"logits/rejected": -1.8537908792495728, |
|
"logps/chosen": -33.56566619873047, |
|
"logps/rejected": -35.423240661621094, |
|
"loss": 1.0037, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0008188748615793884, |
|
"rewards/margins": -0.003729355288669467, |
|
"rewards/rejected": 0.0029104803688824177, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941118e-07, |
|
"logits/chosen": -1.9449050426483154, |
|
"logits/rejected": -1.9468472003936768, |
|
"logps/chosen": -32.59955596923828, |
|
"logps/rejected": -33.1828498840332, |
|
"loss": 0.9988, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 6.278457294683903e-05, |
|
"rewards/margins": 0.0012094037374481559, |
|
"rewards/rejected": -0.0011466194409877062, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413548e-07, |
|
"logits/chosen": -2.079878330230713, |
|
"logits/rejected": -2.084862232208252, |
|
"logps/chosen": -33.98878860473633, |
|
"logps/rejected": -36.574462890625, |
|
"loss": 0.9995, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -3.0292372684925795e-05, |
|
"rewards/margins": 0.0005289571708999574, |
|
"rewards/rejected": -0.0005592495435848832, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-07, |
|
"logits/chosen": -1.9425569772720337, |
|
"logits/rejected": -1.9457191228866577, |
|
"logps/chosen": -34.40068054199219, |
|
"logps/rejected": -34.5762939453125, |
|
"loss": 0.9989, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.002712726127356291, |
|
"rewards/margins": 0.0011012849863618612, |
|
"rewards/rejected": 0.0016114413738250732, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.736716601303429e-07, |
|
"logits/chosen": -1.9507396221160889, |
|
"logits/rejected": -1.9552500247955322, |
|
"logps/chosen": -32.460357666015625, |
|
"logps/rejected": -32.354434967041016, |
|
"loss": 0.9997, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0007068099221214652, |
|
"rewards/margins": 0.00025928454124368727, |
|
"rewards/rejected": 0.0004475254681892693, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.62624545834521e-07, |
|
"logits/chosen": -2.0492873191833496, |
|
"logits/rejected": -2.0472888946533203, |
|
"logps/chosen": -32.23810958862305, |
|
"logps/rejected": -31.260278701782227, |
|
"loss": 1.0003, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.00027285455144010484, |
|
"rewards/margins": -0.00032049461151473224, |
|
"rewards/rejected": 0.0005933489883318543, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.243312358856201, |
|
"eval_logits/rejected": -2.238436222076416, |
|
"eval_logps/chosen": -34.023216247558594, |
|
"eval_logps/rejected": -37.49723434448242, |
|
"eval_loss": 1.0008214712142944, |
|
"eval_rewards/accuracies": 0.49833887815475464, |
|
"eval_rewards/chosen": 0.0011338649783283472, |
|
"eval_rewards/margins": -0.0008048001327551901, |
|
"eval_rewards/rejected": 0.0019386651692911983, |
|
"eval_runtime": 146.1271, |
|
"eval_samples_per_second": 2.347, |
|
"eval_steps_per_second": 0.294, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4982572012636904e-07, |
|
"logits/chosen": -2.005169153213501, |
|
"logits/rejected": -2.0027499198913574, |
|
"logps/chosen": -33.2365837097168, |
|
"logps/rejected": -34.01953125, |
|
"loss": 1.0003, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.0007230007322505116, |
|
"rewards/margins": -0.00026112591149285436, |
|
"rewards/rejected": 0.0009841264691203833, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777677e-07, |
|
"logits/chosen": -2.0166728496551514, |
|
"logits/rejected": -2.0083022117614746, |
|
"logps/chosen": -32.457847595214844, |
|
"logps/rejected": -32.18357467651367, |
|
"loss": 0.9991, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0012942379107698798, |
|
"rewards/margins": 0.0009121716138906777, |
|
"rewards/rejected": -0.002206409815698862, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.194082707715275e-07, |
|
"logits/chosen": -2.0462448596954346, |
|
"logits/rejected": -2.038203001022339, |
|
"logps/chosen": -30.475027084350586, |
|
"logps/rejected": -32.046302795410156, |
|
"loss": 1.0006, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 6.882836896693334e-05, |
|
"rewards/margins": -0.0006015264661982656, |
|
"rewards/rejected": 0.0006703549297526479, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020402418666621e-07, |
|
"logits/chosen": -1.976986289024353, |
|
"logits/rejected": -1.9872560501098633, |
|
"logps/chosen": -31.407278060913086, |
|
"logps/rejected": -32.543296813964844, |
|
"loss": 0.9979, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0012384230503812432, |
|
"rewards/margins": 0.0020802102517336607, |
|
"rewards/rejected": -0.0008417873759754002, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8341962650351185e-07, |
|
"logits/chosen": -1.8905508518218994, |
|
"logits/rejected": -1.891632080078125, |
|
"logps/chosen": -34.20501708984375, |
|
"logps/rejected": -34.77235412597656, |
|
"loss": 0.9997, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0012326312717050314, |
|
"rewards/margins": 0.00027702696388587356, |
|
"rewards/rejected": -0.001509658177383244, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800572e-07, |
|
"logits/chosen": -1.94281005859375, |
|
"logits/rejected": -1.939327597618103, |
|
"logps/chosen": -36.144107818603516, |
|
"logps/rejected": -32.72822570800781, |
|
"loss": 0.9967, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0027573786210268736, |
|
"rewards/margins": 0.0033046435564756393, |
|
"rewards/rejected": -0.0005472644697874784, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.430433172111807e-07, |
|
"logits/chosen": -2.0420708656311035, |
|
"logits/rejected": -2.0346803665161133, |
|
"logps/chosen": -33.771934509277344, |
|
"logps/rejected": -31.371145248413086, |
|
"loss": 0.9955, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.002593145240098238, |
|
"rewards/margins": 0.004522555507719517, |
|
"rewards/rejected": -0.0019294100347906351, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.216202642830543e-07, |
|
"logits/chosen": -2.0475738048553467, |
|
"logits/rejected": -2.0528526306152344, |
|
"logps/chosen": -32.524593353271484, |
|
"logps/rejected": -32.510643005371094, |
|
"loss": 0.9954, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0020192908123135567, |
|
"rewards/margins": 0.004569421522319317, |
|
"rewards/rejected": -0.002550130244344473, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9960716642946403e-07, |
|
"logits/chosen": -2.048490524291992, |
|
"logits/rejected": -2.0457024574279785, |
|
"logps/chosen": -31.492746353149414, |
|
"logps/rejected": -31.319293975830078, |
|
"loss": 1.0003, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0003792368806898594, |
|
"rewards/margins": -0.0002712804707698524, |
|
"rewards/rejected": -0.0001079567227861844, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.771853789806683e-07, |
|
"logits/chosen": -1.9185683727264404, |
|
"logits/rejected": -1.9232347011566162, |
|
"logps/chosen": -31.5926513671875, |
|
"logps/rejected": -32.78697204589844, |
|
"loss": 0.999, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.001363297225907445, |
|
"rewards/margins": 0.0009675474721007049, |
|
"rewards/rejected": 0.0003957498411182314, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.243807315826416, |
|
"eval_logits/rejected": -2.2389235496520996, |
|
"eval_logps/chosen": -34.018714904785156, |
|
"eval_logps/rejected": -37.517478942871094, |
|
"eval_loss": 0.9983054399490356, |
|
"eval_rewards/accuracies": 0.5328072905540466, |
|
"eval_rewards/chosen": 0.0015839905245229602, |
|
"eval_rewards/margins": 0.0016701342537999153, |
|
"eval_rewards/rejected": -8.614380931248888e-05, |
|
"eval_runtime": 145.691, |
|
"eval_samples_per_second": 2.354, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402e-07, |
|
"logits/chosen": -2.0318503379821777, |
|
"logits/rejected": -2.042539596557617, |
|
"logps/chosen": -31.948400497436523, |
|
"logps/rejected": -33.86983871459961, |
|
"loss": 0.997, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0022904814686626196, |
|
"rewards/margins": 0.0030044156592339277, |
|
"rewards/rejected": -0.0007139344816096127, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.318564697655179e-07, |
|
"logits/chosen": -1.9251388311386108, |
|
"logits/rejected": -1.9399843215942383, |
|
"logps/chosen": -30.099853515625, |
|
"logps/rejected": -31.55409812927246, |
|
"loss": 0.9992, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0011685896897688508, |
|
"rewards/margins": 0.000760918774176389, |
|
"rewards/rejected": 0.00040767071186564863, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.093227910899832e-07, |
|
"logits/chosen": -1.9835479259490967, |
|
"logits/rejected": -1.9875112771987915, |
|
"logps/chosen": -33.387638092041016, |
|
"logps/rejected": -31.554845809936523, |
|
"loss": 0.9965, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0029833675362169743, |
|
"rewards/margins": 0.003537458134815097, |
|
"rewards/rejected": -0.0005540908314287663, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279356e-07, |
|
"logits/chosen": -1.9826898574829102, |
|
"logits/rejected": -1.9607274532318115, |
|
"logps/chosen": -34.158443450927734, |
|
"logps/rejected": -34.963783264160156, |
|
"loss": 0.9992, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.00046964577632024884, |
|
"rewards/margins": 0.0008290981058962643, |
|
"rewards/rejected": -0.0012987437658011913, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.654436768970182e-07, |
|
"logits/chosen": -2.024381160736084, |
|
"logits/rejected": -2.0210862159729004, |
|
"logps/chosen": -32.9254035949707, |
|
"logps/rejected": -36.251712799072266, |
|
"loss": 0.9974, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -3.328696038806811e-05, |
|
"rewards/margins": 0.0026453982573002577, |
|
"rewards/rejected": -0.0026786853559315205, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.444597403062196e-07, |
|
"logits/chosen": -1.8911311626434326, |
|
"logits/rejected": -1.8886839151382446, |
|
"logps/chosen": -34.194557189941406, |
|
"logps/rejected": -35.51445770263672, |
|
"loss": 0.9996, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0006158108590170741, |
|
"rewards/margins": 0.0003673938917927444, |
|
"rewards/rejected": -0.0009832048090174794, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2434529917578887e-07, |
|
"logits/chosen": -1.8759450912475586, |
|
"logits/rejected": -1.8734045028686523, |
|
"logps/chosen": -34.40558624267578, |
|
"logps/rejected": -31.752349853515625, |
|
"loss": 1.0027, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0007984804688021541, |
|
"rewards/margins": -0.0026967611629515886, |
|
"rewards/rejected": 0.0018982805777341127, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603521e-07, |
|
"logits/chosen": -1.980015754699707, |
|
"logits/rejected": -1.9693737030029297, |
|
"logps/chosen": -35.33230209350586, |
|
"logps/rejected": -31.845691680908203, |
|
"loss": 0.9971, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.002433922840282321, |
|
"rewards/margins": 0.0029038069769740105, |
|
"rewards/rejected": -0.0004698836710304022, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071453e-08, |
|
"logits/chosen": -2.0756278038024902, |
|
"logits/rejected": -2.060606002807617, |
|
"logps/chosen": -30.907390594482422, |
|
"logps/rejected": -32.64055252075195, |
|
"loss": 0.9995, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.001673347083851695, |
|
"rewards/margins": 0.0005450047319754958, |
|
"rewards/rejected": 0.0011283422354608774, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-08, |
|
"logits/chosen": -1.946616768836975, |
|
"logits/rejected": -1.9440828561782837, |
|
"logps/chosen": -32.894561767578125, |
|
"logps/rejected": -30.812387466430664, |
|
"loss": 0.9972, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003255209419876337, |
|
"rewards/margins": 0.002812772523611784, |
|
"rewards/rejected": 0.00044243651791475713, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.2434821128845215, |
|
"eval_logits/rejected": -2.2386035919189453, |
|
"eval_logps/chosen": -34.017669677734375, |
|
"eval_logps/rejected": -37.50018310546875, |
|
"eval_loss": 0.9999711513519287, |
|
"eval_rewards/accuracies": 0.490448534488678, |
|
"eval_rewards/chosen": 0.0016884810756891966, |
|
"eval_rewards/margins": 4.470993735594675e-05, |
|
"eval_rewards/rejected": 0.001643771305680275, |
|
"eval_runtime": 145.8271, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589034e-08, |
|
"logits/chosen": -1.9287067651748657, |
|
"logits/rejected": -1.925451636314392, |
|
"logps/chosen": -31.603496551513672, |
|
"logps/rejected": -33.734046936035156, |
|
"loss": 1.0008, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -1.4918483429937623e-05, |
|
"rewards/margins": -0.000825587019789964, |
|
"rewards/rejected": 0.0008106685127131641, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380912e-08, |
|
"logits/chosen": -1.9802377223968506, |
|
"logits/rejected": -1.9679291248321533, |
|
"logps/chosen": -34.585323333740234, |
|
"logps/rejected": -33.57084274291992, |
|
"loss": 0.9957, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.00107504369225353, |
|
"rewards/margins": 0.0042753927409648895, |
|
"rewards/rejected": -0.0032003491651266813, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-08, |
|
"logits/chosen": -2.015906810760498, |
|
"logits/rejected": -2.014427661895752, |
|
"logps/chosen": -33.49116516113281, |
|
"logps/rejected": -32.47978973388672, |
|
"loss": 1.0005, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0005370675935409963, |
|
"rewards/margins": -0.0005161463050171733, |
|
"rewards/rejected": -2.0921288523823023e-05, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.05793773749158e-08, |
|
"logits/chosen": -2.1030631065368652, |
|
"logits/rejected": -2.0872654914855957, |
|
"logps/chosen": -34.18492889404297, |
|
"logps/rejected": -33.08319854736328, |
|
"loss": 1.004, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.0005535687087103724, |
|
"rewards/margins": -0.00403733691200614, |
|
"rewards/rejected": 0.003483767854049802, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.251801807404168e-08, |
|
"logits/chosen": -1.9745471477508545, |
|
"logits/rejected": -1.9736032485961914, |
|
"logps/chosen": -33.23271942138672, |
|
"logps/rejected": -32.4765510559082, |
|
"loss": 0.9939, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.004937793128192425, |
|
"rewards/margins": 0.006078074686229229, |
|
"rewards/rejected": -0.001140281674452126, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-09, |
|
"logits/chosen": -1.9305438995361328, |
|
"logits/rejected": -1.940913438796997, |
|
"logps/chosen": -32.22040939331055, |
|
"logps/rejected": -35.28728103637695, |
|
"loss": 1.0017, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0015721082454547286, |
|
"rewards/margins": -0.001719816355034709, |
|
"rewards/rejected": 0.00014770813868381083, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050324e-09, |
|
"logits/chosen": -2.069648265838623, |
|
"logits/rejected": -2.0630898475646973, |
|
"logps/chosen": -33.63695526123047, |
|
"logps/rejected": -29.226470947265625, |
|
"loss": 0.9985, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0015011827927082777, |
|
"rewards/margins": 0.0015057541895657778, |
|
"rewards/rejected": -4.571699719235767e-06, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-10, |
|
"logits/chosen": -1.928865671157837, |
|
"logits/rejected": -1.9310123920440674, |
|
"logps/chosen": -34.243560791015625, |
|
"logps/rejected": -30.892742156982422, |
|
"loss": 0.9984, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0004508168494794518, |
|
"rewards/margins": 0.0015608124667778611, |
|
"rewards/rejected": -0.0020116292871534824, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.9991211135666092, |
|
"train_runtime": 3253.9902, |
|
"train_samples_per_second": 0.946, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|