zephyr-7b-dpo-full-magpi-3 / trainer_state.json
sfulay's picture
Model save
b281371 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 352,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.028409090909090908,
"grad_norm": 58.42705245846632,
"learning_rate": 1.3888888888888888e-07,
"logits/chosen": -2.8592312335968018,
"logits/rejected": -2.642709732055664,
"logps/chosen": -390.5020446777344,
"logps/rejected": -607.8412475585938,
"loss": 0.6868,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.002577512990683317,
"rewards/margins": 0.013913804665207863,
"rewards/rejected": -0.011336291208863258,
"step": 10
},
{
"epoch": 0.056818181818181816,
"grad_norm": 18.51862119745116,
"learning_rate": 2.7777777777777776e-07,
"logits/chosen": -2.84271502494812,
"logits/rejected": -2.694936513900757,
"logps/chosen": -328.5304870605469,
"logps/rejected": -774.9099731445312,
"loss": 0.4819,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.08614631742238998,
"rewards/margins": 0.6608496904373169,
"rewards/rejected": -0.5747033357620239,
"step": 20
},
{
"epoch": 0.08522727272727272,
"grad_norm": 3.947671256913515,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -2.8618407249450684,
"logits/rejected": -2.6804850101470947,
"logps/chosen": -294.7425842285156,
"logps/rejected": -1098.802978515625,
"loss": 0.1417,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.37539467215538025,
"rewards/margins": 4.5838799476623535,
"rewards/rejected": -4.208485126495361,
"step": 30
},
{
"epoch": 0.11363636363636363,
"grad_norm": 1.2673223440201191,
"learning_rate": 4.998023493068254e-07,
"logits/chosen": -2.8695826530456543,
"logits/rejected": -2.690202236175537,
"logps/chosen": -310.9261169433594,
"logps/rejected": -2008.798583984375,
"loss": 0.0271,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.3582938015460968,
"rewards/margins": 13.367321968078613,
"rewards/rejected": -13.009028434753418,
"step": 40
},
{
"epoch": 0.14204545454545456,
"grad_norm": 0.0479749771589853,
"learning_rate": 4.975823666181255e-07,
"logits/chosen": -2.8763322830200195,
"logits/rejected": -2.6664085388183594,
"logps/chosen": -403.7674255371094,
"logps/rejected": -3682.93896484375,
"loss": 0.0033,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.4113216996192932,
"rewards/margins": 29.878662109375,
"rewards/rejected": -30.28998374938965,
"step": 50
},
{
"epoch": 0.17045454545454544,
"grad_norm": 0.6274546818497669,
"learning_rate": 4.929173350101024e-07,
"logits/chosen": -3.0023722648620605,
"logits/rejected": -2.7470011711120605,
"logps/chosen": -463.00946044921875,
"logps/rejected": -4437.8525390625,
"loss": 0.0031,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -1.0311436653137207,
"rewards/margins": 36.9188346862793,
"rewards/rejected": -37.949981689453125,
"step": 60
},
{
"epoch": 0.19886363636363635,
"grad_norm": 0.0844518740673388,
"learning_rate": 4.858533249305336e-07,
"logits/chosen": -3.005385398864746,
"logits/rejected": -2.6852545738220215,
"logps/chosen": -471.344970703125,
"logps/rejected": -4398.6142578125,
"loss": 0.0034,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.9218431711196899,
"rewards/margins": 36.93693923950195,
"rewards/rejected": -37.858787536621094,
"step": 70
},
{
"epoch": 0.22727272727272727,
"grad_norm": 5.427351151177568,
"learning_rate": 4.764600984163808e-07,
"logits/chosen": -3.0055181980133057,
"logits/rejected": -2.524444103240967,
"logps/chosen": -475.0348205566406,
"logps/rejected": -5109.64990234375,
"loss": 0.0027,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6871398687362671,
"rewards/margins": 43.67203903198242,
"rewards/rejected": -44.35917663574219,
"step": 80
},
{
"epoch": 0.2556818181818182,
"grad_norm": 0.009091790561130925,
"learning_rate": 4.6483042014491527e-07,
"logits/chosen": -3.004645824432373,
"logits/rejected": -2.3897948265075684,
"logps/chosen": -470.91943359375,
"logps/rejected": -4687.4931640625,
"loss": 0.0112,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -1.0880612134933472,
"rewards/margins": 39.41301727294922,
"rewards/rejected": -40.50108337402344,
"step": 90
},
{
"epoch": 0.2840909090909091,
"grad_norm": 0.055190493723617784,
"learning_rate": 4.510791413176912e-07,
"logits/chosen": -2.8832428455352783,
"logits/rejected": -1.8006477355957031,
"logps/chosen": -491.24505615234375,
"logps/rejected": -5191.5498046875,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.2177506685256958,
"rewards/margins": 43.63993453979492,
"rewards/rejected": -44.857688903808594,
"step": 100
},
{
"epoch": 0.2840909090909091,
"eval_logits/chosen": -2.7411134243011475,
"eval_logits/rejected": -1.4200084209442139,
"eval_logps/chosen": -499.48809814453125,
"eval_logps/rejected": -5114.40576171875,
"eval_loss": 0.0009676189511083066,
"eval_rewards/accuracies": 0.9979838728904724,
"eval_rewards/chosen": -1.3250634670257568,
"eval_rewards/margins": 43.411109924316406,
"eval_rewards/rejected": -44.73617172241211,
"eval_runtime": 196.2044,
"eval_samples_per_second": 19.903,
"eval_steps_per_second": 0.316,
"step": 100
},
{
"epoch": 0.3125,
"grad_norm": 0.03671469805558989,
"learning_rate": 4.353420654246546e-07,
"logits/chosen": -2.5657219886779785,
"logits/rejected": -1.2966344356536865,
"logps/chosen": -516.1082763671875,
"logps/rejected": -4920.09814453125,
"loss": 0.0032,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.4068708419799805,
"rewards/margins": 41.80142593383789,
"rewards/rejected": -43.20829391479492,
"step": 110
},
{
"epoch": 0.3409090909090909,
"grad_norm": 0.046355425167055216,
"learning_rate": 4.177746070897592e-07,
"logits/chosen": -2.7508440017700195,
"logits/rejected": -1.5980149507522583,
"logps/chosen": -527.7090454101562,
"logps/rejected": -5251.87744140625,
"loss": 0.0014,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.4619219303131104,
"rewards/margins": 44.68457794189453,
"rewards/rejected": -46.1464958190918,
"step": 120
},
{
"epoch": 0.3693181818181818,
"grad_norm": 0.22007447567081792,
"learning_rate": 3.9855025724292763e-07,
"logits/chosen": -2.9421451091766357,
"logits/rejected": -1.7615553140640259,
"logps/chosen": -534.1954345703125,
"logps/rejected": -5053.0048828125,
"loss": 0.0022,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -1.6374390125274658,
"rewards/margins": 42.48723220825195,
"rewards/rejected": -44.124671936035156,
"step": 130
},
{
"epoch": 0.3977272727272727,
"grad_norm": 2.5714609658759042,
"learning_rate": 3.7785886977585555e-07,
"logits/chosen": -2.755537271499634,
"logits/rejected": -1.0718333721160889,
"logps/chosen": -519.0208129882812,
"logps/rejected": -5410.51708984375,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.5298444032669067,
"rewards/margins": 46.15542984008789,
"rewards/rejected": -47.68526840209961,
"step": 140
},
{
"epoch": 0.42613636363636365,
"grad_norm": 0.3002663453248257,
"learning_rate": 3.5590478660213206e-07,
"logits/chosen": -2.406147003173828,
"logits/rejected": -0.27999475598335266,
"logps/chosen": -545.5628051757812,
"logps/rejected": -5466.4716796875,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.67499577999115,
"rewards/margins": 45.86994171142578,
"rewards/rejected": -47.544944763183594,
"step": 150
},
{
"epoch": 0.45454545454545453,
"grad_norm": 0.5345626100174099,
"learning_rate": 3.3290481963801696e-07,
"logits/chosen": -2.146878242492676,
"logits/rejected": 0.38504794239997864,
"logps/chosen": -508.6912536621094,
"logps/rejected": -5496.20166015625,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.3855329751968384,
"rewards/margins": 47.411582946777344,
"rewards/rejected": -48.797119140625,
"step": 160
},
{
"epoch": 0.48295454545454547,
"grad_norm": 0.011551400933576704,
"learning_rate": 3.0908610963322626e-07,
"logits/chosen": -2.115241289138794,
"logits/rejected": 0.22601358592510223,
"logps/chosen": -550.0446166992188,
"logps/rejected": -5791.59521484375,
"loss": 0.0042,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6242635250091553,
"rewards/margins": 49.00857925415039,
"rewards/rejected": -50.632843017578125,
"step": 170
},
{
"epoch": 0.5113636363636364,
"grad_norm": 0.0026766351641471543,
"learning_rate": 2.846838829972671e-07,
"logits/chosen": -2.1634111404418945,
"logits/rejected": 0.14969149231910706,
"logps/chosen": -528.2894287109375,
"logps/rejected": -5540.1259765625,
"loss": 0.0026,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6738510131835938,
"rewards/margins": 47.597564697265625,
"rewards/rejected": -49.27141571044922,
"step": 180
},
{
"epoch": 0.5397727272727273,
"grad_norm": 0.032545430377750574,
"learning_rate": 2.5993912877423147e-07,
"logits/chosen": -2.0492312908172607,
"logits/rejected": 0.02273269183933735,
"logps/chosen": -494.49713134765625,
"logps/rejected": -5951.44970703125,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.4888134002685547,
"rewards/margins": 51.109580993652344,
"rewards/rejected": -52.5984001159668,
"step": 190
},
{
"epoch": 0.5681818181818182,
"grad_norm": 0.33060341294021806,
"learning_rate": 2.3509621870754504e-07,
"logits/chosen": -1.8956499099731445,
"logits/rejected": 0.8085635304450989,
"logps/chosen": -514.856201171875,
"logps/rejected": -5108.5361328125,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.4972646236419678,
"rewards/margins": 43.27967071533203,
"rewards/rejected": -44.77693557739258,
"step": 200
},
{
"epoch": 0.5681818181818182,
"eval_logits/chosen": -1.8780713081359863,
"eval_logits/rejected": 0.7428802251815796,
"eval_logps/chosen": -529.2301635742188,
"eval_logps/rejected": -5479.81494140625,
"eval_loss": 0.0004189308965578675,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -1.6224839687347412,
"eval_rewards/margins": 46.76777648925781,
"eval_rewards/rejected": -48.390262603759766,
"eval_runtime": 194.4022,
"eval_samples_per_second": 20.087,
"eval_steps_per_second": 0.319,
"step": 200
},
{
"epoch": 0.5965909090909091,
"grad_norm": 0.2151284325761924,
"learning_rate": 2.1040049389819624e-07,
"logits/chosen": -1.7524973154067993,
"logits/rejected": 0.9734399914741516,
"logps/chosen": -544.6936645507812,
"logps/rejected": -5410.6865234375,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.5405043363571167,
"rewards/margins": 46.24732208251953,
"rewards/rejected": -47.78782272338867,
"step": 210
},
{
"epoch": 0.625,
"grad_norm": 2.3334722364043823,
"learning_rate": 1.8609584188988133e-07,
"logits/chosen": -1.2117726802825928,
"logits/rejected": 0.9446122050285339,
"logps/chosen": -568.7277221679688,
"logps/rejected": -5055.0498046875,
"loss": 0.0018,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.0412721633911133,
"rewards/margins": 42.01286697387695,
"rewards/rejected": -44.05413055419922,
"step": 220
},
{
"epoch": 0.6534090909090909,
"grad_norm": 70.71977531846203,
"learning_rate": 1.624222881090439e-07,
"logits/chosen": -1.3626362085342407,
"logits/rejected": 0.9213559031486511,
"logps/chosen": -592.7008056640625,
"logps/rejected": -5952.9228515625,
"loss": 0.0147,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -1.9803613424301147,
"rewards/margins": 50.69305419921875,
"rewards/rejected": -52.67341995239258,
"step": 230
},
{
"epoch": 0.6818181818181818,
"grad_norm": 0.049503027606470004,
"learning_rate": 1.3961362544602212e-07,
"logits/chosen": -1.4228966236114502,
"logits/rejected": 1.1803163290023804,
"logps/chosen": -560.458251953125,
"logps/rejected": -5114.12890625,
"loss": 0.001,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.8444585800170898,
"rewards/margins": 43.356101989746094,
"rewards/rejected": -45.20056915283203,
"step": 240
},
{
"epoch": 0.7102272727272727,
"grad_norm": 0.31518289932805543,
"learning_rate": 1.1789510538684522e-07,
"logits/chosen": -1.6409775018692017,
"logits/rejected": 0.999941349029541,
"logps/chosen": -527.280517578125,
"logps/rejected": -6313.17236328125,
"loss": 0.0006,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.7524694204330444,
"rewards/margins": 54.08344268798828,
"rewards/rejected": -55.835906982421875,
"step": 250
},
{
"epoch": 0.7386363636363636,
"grad_norm": 0.19426521854792267,
"learning_rate": 9.748121349736891e-08,
"logits/chosen": -1.6752300262451172,
"logits/rejected": 0.9494975805282593,
"logps/chosen": -575.4473266601562,
"logps/rejected": -5758.55859375,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.9161920547485352,
"rewards/margins": 48.88811111450195,
"rewards/rejected": -50.80430221557617,
"step": 260
},
{
"epoch": 0.7670454545454546,
"grad_norm": 0.00641608946538556,
"learning_rate": 7.857355122839673e-08,
"logits/chosen": -1.775024652481079,
"logits/rejected": 0.8411375880241394,
"logps/chosen": -561.006103515625,
"logps/rejected": -5389.9775390625,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.801565408706665,
"rewards/margins": 45.40861129760742,
"rewards/rejected": -47.21017837524414,
"step": 270
},
{
"epoch": 0.7954545454545454,
"grad_norm": 0.07974689002553936,
"learning_rate": 6.135884496044244e-08,
"logits/chosen": -1.6470428705215454,
"logits/rejected": 1.1842314004898071,
"logps/chosen": -544.4002685546875,
"logps/rejected": -5383.25,
"loss": 0.0015,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6522743701934814,
"rewards/margins": 45.73133087158203,
"rewards/rejected": -47.38361358642578,
"step": 280
},
{
"epoch": 0.8238636363636364,
"grad_norm": 0.0651111213625187,
"learning_rate": 4.600710195020982e-08,
"logits/chosen": -1.5382473468780518,
"logits/rejected": 1.2690740823745728,
"logps/chosen": -565.579833984375,
"logps/rejected": -5386.55859375,
"loss": 0.0025,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.7643659114837646,
"rewards/margins": 45.688636779785156,
"rewards/rejected": -47.4530029296875,
"step": 290
},
{
"epoch": 0.8522727272727273,
"grad_norm": 0.7977586596409562,
"learning_rate": 3.2669931390104374e-08,
"logits/chosen": -1.57468581199646,
"logits/rejected": 1.145819902420044,
"logps/chosen": -520.630859375,
"logps/rejected": -5860.75927734375,
"loss": 0.0008,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6437629461288452,
"rewards/margins": 50.755577087402344,
"rewards/rejected": -52.39934158325195,
"step": 300
},
{
"epoch": 0.8522727272727273,
"eval_logits/chosen": -1.4805512428283691,
"eval_logits/rejected": 1.2551480531692505,
"eval_logps/chosen": -540.1784057617188,
"eval_logps/rejected": -5602.4775390625,
"eval_loss": 0.0003319734532851726,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -1.7319667339324951,
"eval_rewards/margins": 47.88492202758789,
"eval_rewards/rejected": -49.61688995361328,
"eval_runtime": 195.4681,
"eval_samples_per_second": 19.978,
"eval_steps_per_second": 0.317,
"step": 300
},
{
"epoch": 0.8806818181818182,
"grad_norm": 0.1653589500509016,
"learning_rate": 2.147904716149135e-08,
"logits/chosen": -1.4495469331741333,
"logits/rejected": 1.214980959892273,
"logps/chosen": -526.7190551757812,
"logps/rejected": -5688.666015625,
"loss": 0.0012,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6465237140655518,
"rewards/margins": 48.23511505126953,
"rewards/rejected": -49.88164520263672,
"step": 310
},
{
"epoch": 0.9090909090909091,
"grad_norm": 0.028900316674960968,
"learning_rate": 1.254496706805433e-08,
"logits/chosen": -1.584967851638794,
"logits/rejected": 1.173344373703003,
"logps/chosen": -558.8123779296875,
"logps/rejected": -5757.21240234375,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.761392593383789,
"rewards/margins": 49.29091262817383,
"rewards/rejected": -51.052303314208984,
"step": 320
},
{
"epoch": 0.9375,
"grad_norm": 0.1098326915964376,
"learning_rate": 5.955921395237318e-09,
"logits/chosen": -1.5144588947296143,
"logits/rejected": 1.1384176015853882,
"logps/chosen": -516.386962890625,
"logps/rejected": -5688.7119140625,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6111881732940674,
"rewards/margins": 48.798397064208984,
"rewards/rejected": -50.40958786010742,
"step": 330
},
{
"epoch": 0.9659090909090909,
"grad_norm": 0.17586461845284926,
"learning_rate": 1.7769815745066474e-09,
"logits/chosen": -1.7140228748321533,
"logits/rejected": 1.0387569665908813,
"logps/chosen": -531.6962890625,
"logps/rejected": -5153.82958984375,
"loss": 0.0006,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6271555423736572,
"rewards/margins": 43.823490142822266,
"rewards/rejected": -45.45064163208008,
"step": 340
},
{
"epoch": 0.9943181818181818,
"grad_norm": 0.8169470643038225,
"learning_rate": 4.9417557483610875e-11,
"logits/chosen": -1.4863841533660889,
"logits/rejected": 1.150782823562622,
"logps/chosen": -551.8464965820312,
"logps/rejected": -5518.20068359375,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.775787353515625,
"rewards/margins": 46.930641174316406,
"rewards/rejected": -48.706424713134766,
"step": 350
},
{
"epoch": 1.0,
"step": 352,
"total_flos": 0.0,
"train_loss": 0.03994455389971563,
"train_runtime": 9328.4885,
"train_samples_per_second": 4.824,
"train_steps_per_second": 0.038
}
],
"logging_steps": 10,
"max_steps": 352,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}