|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.9335875511169434, |
|
"logits/rejected": -2.708059310913086, |
|
"logps/chosen": -367.5562744140625, |
|
"logps/rejected": -316.3052062988281, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.7960729598999023, |
|
"logits/rejected": -2.7260401248931885, |
|
"logps/chosen": -274.3499755859375, |
|
"logps/rejected": -233.05935668945312, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 7.348848885158077e-05, |
|
"rewards/margins": 0.00036154486588202417, |
|
"rewards/rejected": -0.0002880564716178924, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": -2.7711944580078125, |
|
"logits/rejected": -2.701967716217041, |
|
"logps/chosen": -303.2231140136719, |
|
"logps/rejected": -258.30401611328125, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.02738715335726738, |
|
"rewards/margins": 0.014244809746742249, |
|
"rewards/rejected": 0.013142342679202557, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.877641290737883e-07, |
|
"logits/chosen": -2.702401638031006, |
|
"logits/rejected": -2.6644959449768066, |
|
"logps/chosen": -298.130859375, |
|
"logps/rejected": -281.53778076171875, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0661591961979866, |
|
"rewards/margins": 0.05363141745328903, |
|
"rewards/rejected": 0.012527775950729847, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": -2.713608741760254, |
|
"logits/rejected": -2.580498218536377, |
|
"logps/chosen": -287.88385009765625, |
|
"logps/rejected": -264.4161376953125, |
|
"loss": 0.648, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0008304916555061936, |
|
"rewards/margins": 0.12442084401845932, |
|
"rewards/rejected": -0.12359035015106201, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"logits/chosen": -2.5537896156311035, |
|
"logits/rejected": -2.501384735107422, |
|
"logps/chosen": -269.0035705566406, |
|
"logps/rejected": -289.73883056640625, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.021835196763277054, |
|
"rewards/margins": 0.18140152096748352, |
|
"rewards/rejected": -0.20323672890663147, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": -2.577971935272217, |
|
"logits/rejected": -2.5247650146484375, |
|
"logps/chosen": -304.6644592285156, |
|
"logps/rejected": -324.11370849609375, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12026672065258026, |
|
"rewards/margins": 0.289926141500473, |
|
"rewards/rejected": -0.4101928174495697, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.378437060203357e-07, |
|
"logits/chosen": -2.5257785320281982, |
|
"logits/rejected": -2.4621853828430176, |
|
"logps/chosen": -316.51153564453125, |
|
"logps/rejected": -312.25164794921875, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1019580215215683, |
|
"rewards/margins": 0.32222574949264526, |
|
"rewards/rejected": -0.42418378591537476, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": -2.629840135574341, |
|
"logits/rejected": -2.492844343185425, |
|
"logps/chosen": -348.1588439941406, |
|
"logps/rejected": -312.782470703125, |
|
"loss": 0.5935, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.14375950396060944, |
|
"rewards/margins": 0.3931245803833008, |
|
"rewards/rejected": -0.5368840098381042, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"logits/chosen": -2.368922710418701, |
|
"logits/rejected": -2.346982479095459, |
|
"logps/chosen": -337.67193603515625, |
|
"logps/rejected": -351.6241760253906, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3990400731563568, |
|
"rewards/margins": 0.3560941815376282, |
|
"rewards/rejected": -0.7551342844963074, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": -2.164156675338745, |
|
"logits/rejected": -2.03346586227417, |
|
"logps/chosen": -310.364013671875, |
|
"logps/rejected": -301.4464111328125, |
|
"loss": 0.5887, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2869732081890106, |
|
"rewards/margins": 0.44102102518081665, |
|
"rewards/rejected": -0.7279942035675049, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"logits/chosen": -1.8856881856918335, |
|
"logits/rejected": -1.5883822441101074, |
|
"logps/chosen": -327.2856750488281, |
|
"logps/rejected": -338.85137939453125, |
|
"loss": 0.5656, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3426055610179901, |
|
"rewards/margins": 0.4826560616493225, |
|
"rewards/rejected": -0.8252617120742798, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": -1.5672389268875122, |
|
"logits/rejected": -1.4351329803466797, |
|
"logps/chosen": -334.88446044921875, |
|
"logps/rejected": -357.7071838378906, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.46880921721458435, |
|
"rewards/margins": 0.29977938532829285, |
|
"rewards/rejected": -0.768588662147522, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"logits/chosen": -1.5653654336929321, |
|
"logits/rejected": -1.2208584547042847, |
|
"logps/chosen": -330.20635986328125, |
|
"logps/rejected": -355.11126708984375, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.34367313981056213, |
|
"rewards/margins": 0.5312191247940063, |
|
"rewards/rejected": -0.8748922348022461, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": -1.5312021970748901, |
|
"logits/rejected": -1.239989995956421, |
|
"logps/chosen": -325.05908203125, |
|
"logps/rejected": -337.0903015136719, |
|
"loss": 0.5504, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2957434356212616, |
|
"rewards/margins": 0.4055696427822113, |
|
"rewards/rejected": -0.7013131380081177, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"logits/chosen": -1.4374409914016724, |
|
"logits/rejected": -1.128422498703003, |
|
"logps/chosen": -334.95135498046875, |
|
"logps/rejected": -365.4905700683594, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.3784746527671814, |
|
"rewards/margins": 0.64166659116745, |
|
"rewards/rejected": -1.020141363143921, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3960995582433847, |
|
"train_runtime": 6783.1442, |
|
"train_samples_per_second": 2.948, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|