ZhangShenao's picture
Model save
8c1b113 verified
raw
history blame contribute delete
No virus
10.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984301412872841,
"eval_steps": 500,
"global_step": 159,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006279434850863423,
"grad_norm": 49.46455363166422,
"learning_rate": 6.25e-09,
"logits/chosen": 0.7729572653770447,
"logits/rejected": 0.9946910738945007,
"logps/chosen": -236.32302856445312,
"logps/pi_response": -130.86985778808594,
"logps/ref_response": -130.86985778808594,
"logps/rejected": -603.046630859375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06279434850863422,
"grad_norm": 39.43803954404861,
"learning_rate": 6.25e-08,
"logits/chosen": 0.586125373840332,
"logits/rejected": 0.9812673926353455,
"logps/chosen": -321.5859680175781,
"logps/pi_response": -114.55306243896484,
"logps/ref_response": -114.55144500732422,
"logps/rejected": -546.476318359375,
"loss": 0.6928,
"rewards/accuracies": 0.4097222089767456,
"rewards/chosen": -0.002124057849869132,
"rewards/margins": -0.0025205437559634447,
"rewards/rejected": 0.0003964858187828213,
"step": 10
},
{
"epoch": 0.12558869701726844,
"grad_norm": 35.21511776692514,
"learning_rate": 9.980706626858606e-08,
"logits/chosen": 0.71666020154953,
"logits/rejected": 1.056302785873413,
"logps/chosen": -268.4955139160156,
"logps/pi_response": -120.29206848144531,
"logps/ref_response": -120.35611724853516,
"logps/rejected": -554.8519897460938,
"loss": 0.6871,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.0067259399220347404,
"rewards/margins": 0.013305353932082653,
"rewards/rejected": -0.020031295716762543,
"step": 20
},
{
"epoch": 0.18838304552590268,
"grad_norm": 29.26357792211296,
"learning_rate": 9.765362502737097e-08,
"logits/chosen": 0.7042983174324036,
"logits/rejected": 1.0865741968154907,
"logps/chosen": -327.14837646484375,
"logps/pi_response": -118.11744689941406,
"logps/ref_response": -118.01436614990234,
"logps/rejected": -522.3547973632812,
"loss": 0.6647,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.05294172838330269,
"rewards/margins": 0.0403742715716362,
"rewards/rejected": -0.09331601113080978,
"step": 30
},
{
"epoch": 0.25117739403453687,
"grad_norm": 29.237988897280104,
"learning_rate": 9.320944188084241e-08,
"logits/chosen": 0.7036560773849487,
"logits/rejected": 1.0425139665603638,
"logps/chosen": -314.95477294921875,
"logps/pi_response": -113.6474609375,
"logps/ref_response": -112.6651840209961,
"logps/rejected": -582.9769897460938,
"loss": 0.6238,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.14075346291065216,
"rewards/margins": 0.1760648787021637,
"rewards/rejected": -0.31681832671165466,
"step": 40
},
{
"epoch": 0.3139717425431711,
"grad_norm": 28.58811527839098,
"learning_rate": 8.668815171119019e-08,
"logits/chosen": 0.7629910707473755,
"logits/rejected": 1.156697154045105,
"logps/chosen": -294.5866394042969,
"logps/pi_response": -118.6247329711914,
"logps/ref_response": -117.3294906616211,
"logps/rejected": -556.7156372070312,
"loss": 0.5895,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.15429341793060303,
"rewards/margins": 0.2478162795305252,
"rewards/rejected": -0.40210968255996704,
"step": 50
},
{
"epoch": 0.37676609105180536,
"grad_norm": 19.524336970843382,
"learning_rate": 7.840323733655779e-08,
"logits/chosen": 0.7207380533218384,
"logits/rejected": 1.0942904949188232,
"logps/chosen": -313.4345703125,
"logps/pi_response": -113.78782653808594,
"logps/ref_response": -110.54020690917969,
"logps/rejected": -610.8480224609375,
"loss": 0.5704,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.30726358294487,
"rewards/margins": 0.4200159013271332,
"rewards/rejected": -0.7272794842720032,
"step": 60
},
{
"epoch": 0.43956043956043955,
"grad_norm": 19.57511396533311,
"learning_rate": 6.87529601804781e-08,
"logits/chosen": 0.9113609194755554,
"logits/rejected": 1.2666047811508179,
"logps/chosen": -296.62127685546875,
"logps/pi_response": -117.84214782714844,
"logps/ref_response": -111.63108825683594,
"logps/rejected": -629.7382202148438,
"loss": 0.5413,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.41433459520339966,
"rewards/margins": 0.5893218517303467,
"rewards/rejected": -1.0036565065383911,
"step": 70
},
{
"epoch": 0.5023547880690737,
"grad_norm": 23.55600516973884,
"learning_rate": 5.8201215576551086e-08,
"logits/chosen": 0.7490819692611694,
"logits/rejected": 1.2997609376907349,
"logps/chosen": -338.2764587402344,
"logps/pi_response": -127.49137878417969,
"logps/ref_response": -119.87553405761719,
"logps/rejected": -695.3623657226562,
"loss": 0.5438,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.4643521308898926,
"rewards/margins": 0.773566484451294,
"rewards/rejected": -1.2379186153411865,
"step": 80
},
{
"epoch": 0.565149136577708,
"grad_norm": 21.827438682416545,
"learning_rate": 4.725523300678362e-08,
"logits/chosen": 0.931191086769104,
"logits/rejected": 1.334916353225708,
"logps/chosen": -402.0022888183594,
"logps/pi_response": -120.39073181152344,
"logps/ref_response": -110.82243347167969,
"logps/rejected": -706.8969116210938,
"loss": 0.5457,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.6253440380096436,
"rewards/margins": 0.7057360410690308,
"rewards/rejected": -1.3310800790786743,
"step": 90
},
{
"epoch": 0.6279434850863422,
"grad_norm": 15.954130788308259,
"learning_rate": 3.644119323817915e-08,
"logits/chosen": 0.9159143567085266,
"logits/rejected": 1.2248215675354004,
"logps/chosen": -411.3912658691406,
"logps/pi_response": -127.1515884399414,
"logps/ref_response": -119.00660705566406,
"logps/rejected": -621.9088745117188,
"loss": 0.5465,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.6569873690605164,
"rewards/margins": 0.44270920753479004,
"rewards/rejected": -1.0996966361999512,
"step": 100
},
{
"epoch": 0.6907378335949764,
"grad_norm": 18.598007666864472,
"learning_rate": 2.6278934458271994e-08,
"logits/chosen": 0.8910084962844849,
"logits/rejected": 1.170478105545044,
"logps/chosen": -335.10687255859375,
"logps/pi_response": -121.1594009399414,
"logps/ref_response": -113.50992584228516,
"logps/rejected": -652.3834228515625,
"loss": 0.5584,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.4804770052433014,
"rewards/margins": 0.6785348057746887,
"rewards/rejected": -1.159011960029602,
"step": 110
},
{
"epoch": 0.7535321821036107,
"grad_norm": 19.38573962620474,
"learning_rate": 1.725696330273575e-08,
"logits/chosen": 0.8399345278739929,
"logits/rejected": 1.1444717645645142,
"logps/chosen": -328.6500244140625,
"logps/pi_response": -122.32352447509766,
"logps/ref_response": -115.1312026977539,
"logps/rejected": -664.1232299804688,
"loss": 0.5217,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.43035322427749634,
"rewards/margins": 0.7509421706199646,
"rewards/rejected": -1.181295394897461,
"step": 120
},
{
"epoch": 0.8163265306122449,
"grad_norm": 17.23175686548954,
"learning_rate": 9.808972011828054e-09,
"logits/chosen": 0.7406191229820251,
"logits/rejected": 1.2706494331359863,
"logps/chosen": -332.7467956542969,
"logps/pi_response": -127.5960922241211,
"logps/ref_response": -121.97221374511719,
"logps/rejected": -654.1246948242188,
"loss": 0.5466,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.4437400698661804,
"rewards/margins": 0.6695939898490906,
"rewards/rejected": -1.1133341789245605,
"step": 130
},
{
"epoch": 0.8791208791208791,
"grad_norm": 17.704873543452532,
"learning_rate": 4.2929905518041705e-09,
"logits/chosen": 0.8205103874206543,
"logits/rejected": 1.2819491624832153,
"logps/chosen": -346.9167175292969,
"logps/pi_response": -123.3003158569336,
"logps/ref_response": -116.151611328125,
"logps/rejected": -589.7174072265625,
"loss": 0.5379,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.4823363423347473,
"rewards/margins": 0.5467069745063782,
"rewards/rejected": -1.029043436050415,
"step": 140
},
{
"epoch": 0.9419152276295133,
"grad_norm": 20.711700427076533,
"learning_rate": 9.741758728888216e-10,
"logits/chosen": 0.6927149891853333,
"logits/rejected": 1.12680983543396,
"logps/chosen": -402.5865783691406,
"logps/pi_response": -140.15695190429688,
"logps/ref_response": -133.6136016845703,
"logps/rejected": -667.4969482421875,
"loss": 0.5318,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.5095483064651489,
"rewards/margins": 0.6510818600654602,
"rewards/rejected": -1.160630226135254,
"step": 150
},
{
"epoch": 0.9984301412872841,
"step": 159,
"total_flos": 0.0,
"train_loss": 0.5759439048527172,
"train_runtime": 4356.4071,
"train_samples_per_second": 4.678,
"train_steps_per_second": 0.036
}
],
"logging_steps": 10,
"max_steps": 159,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}