{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 93, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16129032258064516, "grad_norm": 50.222183840612765, "learning_rate": 5e-07, "logits/chosen": -1.225976586341858, "logits/rejected": -1.1897461414337158, "logps/chosen": -123.3499984741211, "logps/rejected": -135.125, "loss": 5.5301, "rewards/accuracies": 0.28125, "rewards/chosen": -0.0023483275435864925, "rewards/margins": 0.003292083740234375, "rewards/rejected": -0.00562210101634264, "step": 5 }, { "epoch": 0.3225806451612903, "grad_norm": 46.021197534461706, "learning_rate": 5e-07, "logits/chosen": -1.2423827648162842, "logits/rejected": -1.1926758289337158, "logps/chosen": -117.51249694824219, "logps/rejected": -127.3125, "loss": 5.5391, "rewards/accuracies": 0.3125, "rewards/chosen": 0.005465698428452015, "rewards/margins": 0.0017150879139080644, "rewards/rejected": 0.0037551880814135075, "step": 10 }, { "epoch": 0.4838709677419355, "grad_norm": 46.89746866641082, "learning_rate": 5e-07, "logits/chosen": -1.228906273841858, "logits/rejected": -1.205468773841858, "logps/chosen": -121.6500015258789, "logps/rejected": -128.0500030517578, "loss": 5.5246, "rewards/accuracies": 0.37187498807907104, "rewards/chosen": 0.00469894427806139, "rewards/margins": 0.0062500000931322575, "rewards/rejected": -0.0015663147205486894, "step": 15 }, { "epoch": 0.6451612903225806, "grad_norm": 44.09582250258885, "learning_rate": 5e-07, "logits/chosen": -1.206640601158142, "logits/rejected": -1.17529296875, "logps/chosen": -120.48750305175781, "logps/rejected": -127.73750305175781, "loss": 5.4723, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.02676239050924778, "rewards/margins": 0.020679473876953125, "rewards/rejected": 0.006100463680922985, "step": 20 }, { "epoch": 0.8064516129032258, "grad_norm": 41.92350954266875, "learning_rate": 5e-07, "logits/chosen": -1.205664038658142, "logits/rejected": -1.183007836341858, "logps/chosen": -119.30000305175781, "logps/rejected": -126.0, "loss": 5.4928, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": 0.03284301608800888, "rewards/margins": 0.01783447340130806, "rewards/rejected": 0.015004729852080345, "step": 25 }, { "epoch": 0.967741935483871, "grad_norm": 42.97249437985483, "learning_rate": 5e-07, "logits/chosen": -1.1775391101837158, "logits/rejected": -1.1438477039337158, "logps/chosen": -121.01249694824219, "logps/rejected": -130.27499389648438, "loss": 5.4334, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": 0.0032699585426598787, "rewards/margins": 0.03898162767291069, "rewards/rejected": -0.03567352145910263, "step": 30 }, { "epoch": 1.129032258064516, "grad_norm": 33.66788683488558, "learning_rate": 5e-07, "logits/chosen": -1.1749999523162842, "logits/rejected": -1.1409180164337158, "logps/chosen": -118.7750015258789, "logps/rejected": -131.21249389648438, "loss": 4.4444, "rewards/accuracies": 0.8356249928474426, "rewards/chosen": 0.10396194458007812, "rewards/margins": 0.3320721387863159, "rewards/rejected": -0.22795777022838593, "step": 35 }, { "epoch": 1.2903225806451613, "grad_norm": 30.157940166128338, "learning_rate": 5e-07, "logits/chosen": -1.16796875, "logits/rejected": -1.1378905773162842, "logps/chosen": -120.0875015258789, "logps/rejected": -133.0124969482422, "loss": 4.1528, "rewards/accuracies": 0.8843749761581421, "rewards/chosen": 0.11828994750976562, "rewards/margins": 0.4443908631801605, "rewards/rejected": -0.325979620218277, "step": 40 }, { "epoch": 1.4516129032258065, "grad_norm": 28.845039720008064, "learning_rate": 5e-07, "logits/chosen": -1.1624023914337158, "logits/rejected": -1.11767578125, "logps/chosen": -119.375, "logps/rejected": -133.625, "loss": 3.8436, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.09718475490808487, "rewards/margins": 0.565844714641571, "rewards/rejected": -0.4686523377895355, "step": 45 }, { "epoch": 1.6129032258064515, "grad_norm": 29.354865646916156, "learning_rate": 5e-07, "logits/chosen": -1.145410180091858, "logits/rejected": -1.113867163658142, "logps/chosen": -119.1875, "logps/rejected": -135.89999389648438, "loss": 3.7449, "rewards/accuracies": 0.921875, "rewards/chosen": 0.025782013311982155, "rewards/margins": 0.617114245891571, "rewards/rejected": -0.591235339641571, "step": 50 }, { "epoch": 1.7741935483870968, "grad_norm": 29.833521782828086, "learning_rate": 5e-07, "logits/chosen": -1.1676757335662842, "logits/rejected": -1.1394531726837158, "logps/chosen": -121.8375015258789, "logps/rejected": -134.39999389648438, "loss": 3.6219, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.0078277587890625, "rewards/margins": 0.644726574420929, "rewards/rejected": -0.65252685546875, "step": 55 }, { "epoch": 1.935483870967742, "grad_norm": 31.512770746698088, "learning_rate": 5e-07, "logits/chosen": -1.18359375, "logits/rejected": -1.137597680091858, "logps/chosen": -121.2125015258789, "logps/rejected": -137.0500030517578, "loss": 3.3899, "rewards/accuracies": 0.9468749761581421, "rewards/chosen": -0.1258491575717926, "rewards/margins": 0.773388683795929, "rewards/rejected": -0.899609386920929, "step": 60 }, { "epoch": 2.096774193548387, "grad_norm": 22.595598612147207, "learning_rate": 5e-07, "logits/chosen": -1.2158203125, "logits/rejected": -1.17626953125, "logps/chosen": -121.875, "logps/rejected": -142.0124969482422, "loss": 2.6071, "rewards/accuracies": 0.9706250429153442, "rewards/chosen": -0.17760467529296875, "rewards/margins": 1.14990234375, "rewards/rejected": -1.328222632408142, "step": 65 }, { "epoch": 2.258064516129032, "grad_norm": 19.154167422104837, "learning_rate": 5e-07, "logits/chosen": -1.2336914539337158, "logits/rejected": -1.1828124523162842, "logps/chosen": -123.92500305175781, "logps/rejected": -147.1750030517578, "loss": 2.0145, "rewards/accuracies": 0.9906250238418579, "rewards/chosen": -0.23938903212547302, "rewards/margins": 1.580664038658142, "rewards/rejected": -1.8203125, "step": 70 }, { "epoch": 2.4193548387096775, "grad_norm": 19.199349829794635, "learning_rate": 5e-07, "logits/chosen": -1.231835961341858, "logits/rejected": -1.1955077648162842, "logps/chosen": -124.19999694824219, "logps/rejected": -151.8125, "loss": 1.782, "rewards/accuracies": 0.984375, "rewards/chosen": -0.36355286836624146, "rewards/margins": 1.8154296875, "rewards/rejected": -2.177539110183716, "step": 75 }, { "epoch": 2.5806451612903225, "grad_norm": 24.963797478719403, "learning_rate": 5e-07, "logits/chosen": -1.2677733898162842, "logits/rejected": -1.2166016101837158, "logps/chosen": -129.03750610351562, "logps/rejected": -157.3625030517578, "loss": 1.5875, "rewards/accuracies": 0.984375, "rewards/chosen": -0.652294933795929, "rewards/margins": 2.1304688453674316, "rewards/rejected": -2.7826170921325684, "step": 80 }, { "epoch": 2.741935483870968, "grad_norm": 25.696120056907905, "learning_rate": 5e-07, "logits/chosen": -1.3058593273162842, "logits/rejected": -1.253320336341858, "logps/chosen": -130.8625030517578, "logps/rejected": -160.22500610351562, "loss": 1.3192, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9574218988418579, "rewards/margins": 2.357617139816284, "rewards/rejected": -3.315234422683716, "step": 85 }, { "epoch": 2.903225806451613, "grad_norm": 21.21732885239019, "learning_rate": 5e-07, "logits/chosen": -1.330468773841858, "logits/rejected": -1.279296875, "logps/chosen": -131.71249389648438, "logps/rejected": -169.0500030517578, "loss": 1.1231, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -1.255859375, "rewards/margins": 2.912304639816284, "rewards/rejected": -4.168359279632568, "step": 90 }, { "epoch": 3.0, "step": 93, "total_flos": 0.0, "train_loss": 3.6152802436582503, "train_runtime": 1025.2608, "train_samples_per_second": 5.797, "train_steps_per_second": 0.091 } ], "logging_steps": 5, "max_steps": 93, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }