|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 352, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.028409090909090908, |
|
"grad_norm": 58.42705245846632, |
|
"learning_rate": 1.3888888888888888e-07, |
|
"logits/chosen": -2.8592312335968018, |
|
"logits/rejected": -2.642709732055664, |
|
"logps/chosen": -390.5020446777344, |
|
"logps/rejected": -607.8412475585938, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.002577512990683317, |
|
"rewards/margins": 0.013913804665207863, |
|
"rewards/rejected": -0.011336291208863258, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.056818181818181816, |
|
"grad_norm": 18.51862119745116, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"logits/chosen": -2.84271502494812, |
|
"logits/rejected": -2.694936513900757, |
|
"logps/chosen": -328.5304870605469, |
|
"logps/rejected": -774.9099731445312, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.08614631742238998, |
|
"rewards/margins": 0.6608496904373169, |
|
"rewards/rejected": -0.5747033357620239, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08522727272727272, |
|
"grad_norm": 3.947671256913515, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.8618407249450684, |
|
"logits/rejected": -2.6804850101470947, |
|
"logps/chosen": -294.7425842285156, |
|
"logps/rejected": -1098.802978515625, |
|
"loss": 0.1417, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.37539467215538025, |
|
"rewards/margins": 4.5838799476623535, |
|
"rewards/rejected": -4.208485126495361, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11363636363636363, |
|
"grad_norm": 1.2673223440201191, |
|
"learning_rate": 4.998023493068254e-07, |
|
"logits/chosen": -2.8695826530456543, |
|
"logits/rejected": -2.690202236175537, |
|
"logps/chosen": -310.9261169433594, |
|
"logps/rejected": -2008.798583984375, |
|
"loss": 0.0271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3582938015460968, |
|
"rewards/margins": 13.367321968078613, |
|
"rewards/rejected": -13.009028434753418, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14204545454545456, |
|
"grad_norm": 0.0479749771589853, |
|
"learning_rate": 4.975823666181255e-07, |
|
"logits/chosen": -2.8763322830200195, |
|
"logits/rejected": -2.6664085388183594, |
|
"logps/chosen": -403.7674255371094, |
|
"logps/rejected": -3682.93896484375, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4113216996192932, |
|
"rewards/margins": 29.878662109375, |
|
"rewards/rejected": -30.28998374938965, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17045454545454544, |
|
"grad_norm": 0.6274546818497669, |
|
"learning_rate": 4.929173350101024e-07, |
|
"logits/chosen": -3.0023722648620605, |
|
"logits/rejected": -2.7470011711120605, |
|
"logps/chosen": -463.00946044921875, |
|
"logps/rejected": -4437.8525390625, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.0311436653137207, |
|
"rewards/margins": 36.9188346862793, |
|
"rewards/rejected": -37.949981689453125, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.19886363636363635, |
|
"grad_norm": 0.0844518740673388, |
|
"learning_rate": 4.858533249305336e-07, |
|
"logits/chosen": -3.005385398864746, |
|
"logits/rejected": -2.6852545738220215, |
|
"logps/chosen": -471.344970703125, |
|
"logps/rejected": -4398.6142578125, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9218431711196899, |
|
"rewards/margins": 36.93693923950195, |
|
"rewards/rejected": -37.858787536621094, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 5.427351151177568, |
|
"learning_rate": 4.764600984163808e-07, |
|
"logits/chosen": -3.0055181980133057, |
|
"logits/rejected": -2.524444103240967, |
|
"logps/chosen": -475.0348205566406, |
|
"logps/rejected": -5109.64990234375, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6871398687362671, |
|
"rewards/margins": 43.67203903198242, |
|
"rewards/rejected": -44.35917663574219, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2556818181818182, |
|
"grad_norm": 0.009091790561130925, |
|
"learning_rate": 4.6483042014491527e-07, |
|
"logits/chosen": -3.004645824432373, |
|
"logits/rejected": -2.3897948265075684, |
|
"logps/chosen": -470.91943359375, |
|
"logps/rejected": -4687.4931640625, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.0880612134933472, |
|
"rewards/margins": 39.41301727294922, |
|
"rewards/rejected": -40.50108337402344, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2840909090909091, |
|
"grad_norm": 0.055190493723617784, |
|
"learning_rate": 4.510791413176912e-07, |
|
"logits/chosen": -2.8832428455352783, |
|
"logits/rejected": -1.8006477355957031, |
|
"logps/chosen": -491.24505615234375, |
|
"logps/rejected": -5191.5498046875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2177506685256958, |
|
"rewards/margins": 43.63993453979492, |
|
"rewards/rejected": -44.857688903808594, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2840909090909091, |
|
"eval_logits/chosen": -2.7411134243011475, |
|
"eval_logits/rejected": -1.4200084209442139, |
|
"eval_logps/chosen": -499.48809814453125, |
|
"eval_logps/rejected": -5114.40576171875, |
|
"eval_loss": 0.0009676189511083066, |
|
"eval_rewards/accuracies": 0.9979838728904724, |
|
"eval_rewards/chosen": -1.3250634670257568, |
|
"eval_rewards/margins": 43.411109924316406, |
|
"eval_rewards/rejected": -44.73617172241211, |
|
"eval_runtime": 196.2044, |
|
"eval_samples_per_second": 19.903, |
|
"eval_steps_per_second": 0.316, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 0.03671469805558989, |
|
"learning_rate": 4.353420654246546e-07, |
|
"logits/chosen": -2.5657219886779785, |
|
"logits/rejected": -1.2966344356536865, |
|
"logps/chosen": -516.1082763671875, |
|
"logps/rejected": -4920.09814453125, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4068708419799805, |
|
"rewards/margins": 41.80142593383789, |
|
"rewards/rejected": -43.20829391479492, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3409090909090909, |
|
"grad_norm": 0.046355425167055216, |
|
"learning_rate": 4.177746070897592e-07, |
|
"logits/chosen": -2.7508440017700195, |
|
"logits/rejected": -1.5980149507522583, |
|
"logps/chosen": -527.7090454101562, |
|
"logps/rejected": -5251.87744140625, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4619219303131104, |
|
"rewards/margins": 44.68457794189453, |
|
"rewards/rejected": -46.1464958190918, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3693181818181818, |
|
"grad_norm": 0.22007447567081792, |
|
"learning_rate": 3.9855025724292763e-07, |
|
"logits/chosen": -2.9421451091766357, |
|
"logits/rejected": -1.7615553140640259, |
|
"logps/chosen": -534.1954345703125, |
|
"logps/rejected": -5053.0048828125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.6374390125274658, |
|
"rewards/margins": 42.48723220825195, |
|
"rewards/rejected": -44.124671936035156, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3977272727272727, |
|
"grad_norm": 2.5714609658759042, |
|
"learning_rate": 3.7785886977585555e-07, |
|
"logits/chosen": -2.755537271499634, |
|
"logits/rejected": -1.0718333721160889, |
|
"logps/chosen": -519.0208129882812, |
|
"logps/rejected": -5410.51708984375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5298444032669067, |
|
"rewards/margins": 46.15542984008789, |
|
"rewards/rejected": -47.68526840209961, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.42613636363636365, |
|
"grad_norm": 0.3002663453248257, |
|
"learning_rate": 3.5590478660213206e-07, |
|
"logits/chosen": -2.406147003173828, |
|
"logits/rejected": -0.27999475598335266, |
|
"logps/chosen": -545.5628051757812, |
|
"logps/rejected": -5466.4716796875, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.67499577999115, |
|
"rewards/margins": 45.86994171142578, |
|
"rewards/rejected": -47.544944763183594, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 0.5345626100174099, |
|
"learning_rate": 3.3290481963801696e-07, |
|
"logits/chosen": -2.146878242492676, |
|
"logits/rejected": 0.38504794239997864, |
|
"logps/chosen": -508.6912536621094, |
|
"logps/rejected": -5496.20166015625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3855329751968384, |
|
"rewards/margins": 47.411582946777344, |
|
"rewards/rejected": -48.797119140625, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.48295454545454547, |
|
"grad_norm": 0.011551400933576704, |
|
"learning_rate": 3.0908610963322626e-07, |
|
"logits/chosen": -2.115241289138794, |
|
"logits/rejected": 0.22601358592510223, |
|
"logps/chosen": -550.0446166992188, |
|
"logps/rejected": -5791.59521484375, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6242635250091553, |
|
"rewards/margins": 49.00857925415039, |
|
"rewards/rejected": -50.632843017578125, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5113636363636364, |
|
"grad_norm": 0.0026766351641471543, |
|
"learning_rate": 2.846838829972671e-07, |
|
"logits/chosen": -2.1634111404418945, |
|
"logits/rejected": 0.14969149231910706, |
|
"logps/chosen": -528.2894287109375, |
|
"logps/rejected": -5540.1259765625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6738510131835938, |
|
"rewards/margins": 47.597564697265625, |
|
"rewards/rejected": -49.27141571044922, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5397727272727273, |
|
"grad_norm": 0.032545430377750574, |
|
"learning_rate": 2.5993912877423147e-07, |
|
"logits/chosen": -2.0492312908172607, |
|
"logits/rejected": 0.02273269183933735, |
|
"logps/chosen": -494.49713134765625, |
|
"logps/rejected": -5951.44970703125, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4888134002685547, |
|
"rewards/margins": 51.109580993652344, |
|
"rewards/rejected": -52.5984001159668, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5681818181818182, |
|
"grad_norm": 0.33060341294021806, |
|
"learning_rate": 2.3509621870754504e-07, |
|
"logits/chosen": -1.8956499099731445, |
|
"logits/rejected": 0.8085635304450989, |
|
"logps/chosen": -514.856201171875, |
|
"logps/rejected": -5108.5361328125, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4972646236419678, |
|
"rewards/margins": 43.27967071533203, |
|
"rewards/rejected": -44.77693557739258, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5681818181818182, |
|
"eval_logits/chosen": -1.8780713081359863, |
|
"eval_logits/rejected": 0.7428802251815796, |
|
"eval_logps/chosen": -529.2301635742188, |
|
"eval_logps/rejected": -5479.81494140625, |
|
"eval_loss": 0.0004189308965578675, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -1.6224839687347412, |
|
"eval_rewards/margins": 46.76777648925781, |
|
"eval_rewards/rejected": -48.390262603759766, |
|
"eval_runtime": 194.4022, |
|
"eval_samples_per_second": 20.087, |
|
"eval_steps_per_second": 0.319, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5965909090909091, |
|
"grad_norm": 0.2151284325761924, |
|
"learning_rate": 2.1040049389819624e-07, |
|
"logits/chosen": -1.7524973154067993, |
|
"logits/rejected": 0.9734399914741516, |
|
"logps/chosen": -544.6936645507812, |
|
"logps/rejected": -5410.6865234375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5405043363571167, |
|
"rewards/margins": 46.24732208251953, |
|
"rewards/rejected": -47.78782272338867, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 2.3334722364043823, |
|
"learning_rate": 1.8609584188988133e-07, |
|
"logits/chosen": -1.2117726802825928, |
|
"logits/rejected": 0.9446122050285339, |
|
"logps/chosen": -568.7277221679688, |
|
"logps/rejected": -5055.0498046875, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0412721633911133, |
|
"rewards/margins": 42.01286697387695, |
|
"rewards/rejected": -44.05413055419922, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6534090909090909, |
|
"grad_norm": 70.71977531846203, |
|
"learning_rate": 1.624222881090439e-07, |
|
"logits/chosen": -1.3626362085342407, |
|
"logits/rejected": 0.9213559031486511, |
|
"logps/chosen": -592.7008056640625, |
|
"logps/rejected": -5952.9228515625, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.9803613424301147, |
|
"rewards/margins": 50.69305419921875, |
|
"rewards/rejected": -52.67341995239258, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 0.049503027606470004, |
|
"learning_rate": 1.3961362544602212e-07, |
|
"logits/chosen": -1.4228966236114502, |
|
"logits/rejected": 1.1803163290023804, |
|
"logps/chosen": -560.458251953125, |
|
"logps/rejected": -5114.12890625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8444585800170898, |
|
"rewards/margins": 43.356101989746094, |
|
"rewards/rejected": -45.20056915283203, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7102272727272727, |
|
"grad_norm": 0.31518289932805543, |
|
"learning_rate": 1.1789510538684522e-07, |
|
"logits/chosen": -1.6409775018692017, |
|
"logits/rejected": 0.999941349029541, |
|
"logps/chosen": -527.280517578125, |
|
"logps/rejected": -6313.17236328125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7524694204330444, |
|
"rewards/margins": 54.08344268798828, |
|
"rewards/rejected": -55.835906982421875, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7386363636363636, |
|
"grad_norm": 0.19426521854792267, |
|
"learning_rate": 9.748121349736891e-08, |
|
"logits/chosen": -1.6752300262451172, |
|
"logits/rejected": 0.9494975805282593, |
|
"logps/chosen": -575.4473266601562, |
|
"logps/rejected": -5758.55859375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9161920547485352, |
|
"rewards/margins": 48.88811111450195, |
|
"rewards/rejected": -50.80430221557617, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7670454545454546, |
|
"grad_norm": 0.00641608946538556, |
|
"learning_rate": 7.857355122839673e-08, |
|
"logits/chosen": -1.775024652481079, |
|
"logits/rejected": 0.8411375880241394, |
|
"logps/chosen": -561.006103515625, |
|
"logps/rejected": -5389.9775390625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.801565408706665, |
|
"rewards/margins": 45.40861129760742, |
|
"rewards/rejected": -47.21017837524414, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7954545454545454, |
|
"grad_norm": 0.07974689002553936, |
|
"learning_rate": 6.135884496044244e-08, |
|
"logits/chosen": -1.6470428705215454, |
|
"logits/rejected": 1.1842314004898071, |
|
"logps/chosen": -544.4002685546875, |
|
"logps/rejected": -5383.25, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6522743701934814, |
|
"rewards/margins": 45.73133087158203, |
|
"rewards/rejected": -47.38361358642578, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8238636363636364, |
|
"grad_norm": 0.0651111213625187, |
|
"learning_rate": 4.600710195020982e-08, |
|
"logits/chosen": -1.5382473468780518, |
|
"logits/rejected": 1.2690740823745728, |
|
"logps/chosen": -565.579833984375, |
|
"logps/rejected": -5386.55859375, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7643659114837646, |
|
"rewards/margins": 45.688636779785156, |
|
"rewards/rejected": -47.4530029296875, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8522727272727273, |
|
"grad_norm": 0.7977586596409562, |
|
"learning_rate": 3.2669931390104374e-08, |
|
"logits/chosen": -1.57468581199646, |
|
"logits/rejected": 1.145819902420044, |
|
"logps/chosen": -520.630859375, |
|
"logps/rejected": -5860.75927734375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6437629461288452, |
|
"rewards/margins": 50.755577087402344, |
|
"rewards/rejected": -52.39934158325195, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8522727272727273, |
|
"eval_logits/chosen": -1.4805512428283691, |
|
"eval_logits/rejected": 1.2551480531692505, |
|
"eval_logps/chosen": -540.1784057617188, |
|
"eval_logps/rejected": -5602.4775390625, |
|
"eval_loss": 0.0003319734532851726, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -1.7319667339324951, |
|
"eval_rewards/margins": 47.88492202758789, |
|
"eval_rewards/rejected": -49.61688995361328, |
|
"eval_runtime": 195.4681, |
|
"eval_samples_per_second": 19.978, |
|
"eval_steps_per_second": 0.317, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8806818181818182, |
|
"grad_norm": 0.1653589500509016, |
|
"learning_rate": 2.147904716149135e-08, |
|
"logits/chosen": -1.4495469331741333, |
|
"logits/rejected": 1.214980959892273, |
|
"logps/chosen": -526.7190551757812, |
|
"logps/rejected": -5688.666015625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6465237140655518, |
|
"rewards/margins": 48.23511505126953, |
|
"rewards/rejected": -49.88164520263672, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 0.028900316674960968, |
|
"learning_rate": 1.254496706805433e-08, |
|
"logits/chosen": -1.584967851638794, |
|
"logits/rejected": 1.173344373703003, |
|
"logps/chosen": -558.8123779296875, |
|
"logps/rejected": -5757.21240234375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.761392593383789, |
|
"rewards/margins": 49.29091262817383, |
|
"rewards/rejected": -51.052303314208984, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 0.1098326915964376, |
|
"learning_rate": 5.955921395237318e-09, |
|
"logits/chosen": -1.5144588947296143, |
|
"logits/rejected": 1.1384176015853882, |
|
"logps/chosen": -516.386962890625, |
|
"logps/rejected": -5688.7119140625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6111881732940674, |
|
"rewards/margins": 48.798397064208984, |
|
"rewards/rejected": -50.40958786010742, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9659090909090909, |
|
"grad_norm": 0.17586461845284926, |
|
"learning_rate": 1.7769815745066474e-09, |
|
"logits/chosen": -1.7140228748321533, |
|
"logits/rejected": 1.0387569665908813, |
|
"logps/chosen": -531.6962890625, |
|
"logps/rejected": -5153.82958984375, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6271555423736572, |
|
"rewards/margins": 43.823490142822266, |
|
"rewards/rejected": -45.45064163208008, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9943181818181818, |
|
"grad_norm": 0.8169470643038225, |
|
"learning_rate": 4.9417557483610875e-11, |
|
"logits/chosen": -1.4863841533660889, |
|
"logits/rejected": 1.150782823562622, |
|
"logps/chosen": -551.8464965820312, |
|
"logps/rejected": -5518.20068359375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.775787353515625, |
|
"rewards/margins": 46.930641174316406, |
|
"rewards/rejected": -48.706424713134766, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 352, |
|
"total_flos": 0.0, |
|
"train_loss": 0.03994455389971563, |
|
"train_runtime": 9328.4885, |
|
"train_samples_per_second": 4.824, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 352, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|