|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9874476987447699, |
|
"eval_steps": 500, |
|
"global_step": 59, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016736401673640166, |
|
"grad_norm": 11.88069732420037, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -2.8434743881225586, |
|
"logits/rejected": -2.769583225250244, |
|
"logps/chosen": -178.4530487060547, |
|
"logps/pi_response": -97.4625244140625, |
|
"logps/ref_response": -97.4625244140625, |
|
"logps/rejected": -233.52891540527344, |
|
"loss": 0.6956, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16736401673640167, |
|
"grad_norm": 11.505588607871543, |
|
"learning_rate": 4.930057285201027e-07, |
|
"logits/chosen": -2.7551352977752686, |
|
"logits/rejected": -2.7301125526428223, |
|
"logps/chosen": -225.8229522705078, |
|
"logps/pi_response": -120.52629852294922, |
|
"logps/ref_response": -119.74172973632812, |
|
"logps/rejected": -265.8622131347656, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.5972222089767456, |
|
"rewards/chosen": -0.03128578141331673, |
|
"rewards/margins": 0.02152625285089016, |
|
"rewards/rejected": -0.05281204357743263, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33472803347280333, |
|
"grad_norm": 25.557753221670836, |
|
"learning_rate": 4.187457503795526e-07, |
|
"logits/chosen": -2.8003687858581543, |
|
"logits/rejected": -2.7502455711364746, |
|
"logps/chosen": -259.4373474121094, |
|
"logps/pi_response": -115.7847900390625, |
|
"logps/ref_response": -118.667724609375, |
|
"logps/rejected": -311.7029113769531, |
|
"loss": 0.6364, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.24674002826213837, |
|
"rewards/margins": 0.29084205627441406, |
|
"rewards/rejected": -0.5375820994377136, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"grad_norm": 15.538988265977032, |
|
"learning_rate": 2.8691164100062034e-07, |
|
"logits/chosen": -2.782900094985962, |
|
"logits/rejected": -2.7508139610290527, |
|
"logps/chosen": -289.4466247558594, |
|
"logps/pi_response": -149.1678466796875, |
|
"logps/ref_response": -124.9267807006836, |
|
"logps/rejected": -358.2657470703125, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.4117729663848877, |
|
"rewards/margins": 0.5106114745140076, |
|
"rewards/rejected": -0.9223844408988953, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6694560669456067, |
|
"grad_norm": 19.459335671099513, |
|
"learning_rate": 1.4248369943086995e-07, |
|
"logits/chosen": -2.680762767791748, |
|
"logits/rejected": -2.6408679485321045, |
|
"logps/chosen": -269.59991455078125, |
|
"logps/pi_response": -149.72483825683594, |
|
"logps/ref_response": -115.91612243652344, |
|
"logps/rejected": -360.96343994140625, |
|
"loss": 0.5607, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.552818238735199, |
|
"rewards/margins": 0.5624674558639526, |
|
"rewards/rejected": -1.1152856349945068, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 21.120309284129263, |
|
"learning_rate": 3.473909705816111e-08, |
|
"logits/chosen": -2.7053980827331543, |
|
"logits/rejected": -2.6527323722839355, |
|
"logps/chosen": -259.5418701171875, |
|
"logps/pi_response": -140.0457305908203, |
|
"logps/ref_response": -106.76103210449219, |
|
"logps/rejected": -333.9945373535156, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5184835195541382, |
|
"rewards/margins": 0.5243555307388306, |
|
"rewards/rejected": -1.0428390502929688, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9874476987447699, |
|
"step": 59, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6013881675267624, |
|
"train_runtime": 1334.1164, |
|
"train_samples_per_second": 11.456, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 59, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|