{ "best_metric": 0.21592645, "best_model_checkpoint": "/home/patrickbarker/output/qwen2-vl-7b-instruct/v6-20250103-052220/checkpoint-414", "epoch": 3.0, "eval_steps": 200, "global_step": 414, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.95261294, "epoch": 0.007272727272727273, "grad_norm": 1.9728064130980805, "learning_rate": 0.0, "loss": 0.22823659, "memory(GiB)": 31.98, "step": 1, "train_speed(iter/s)": 0.054965 }, { "acc": 0.93504357, "epoch": 0.03636363636363636, "grad_norm": 2.237331671502764, "learning_rate": 4.526808302869428e-06, "loss": 0.35742611, "memory(GiB)": 33.01, "step": 5, "train_speed(iter/s)": 0.120168 }, { "acc": 0.91343489, "epoch": 0.07272727272727272, "grad_norm": 3.274625633110072, "learning_rate": 6.476398521807292e-06, "loss": 0.45717983, "memory(GiB)": 33.01, "step": 10, "train_speed(iter/s)": 0.14108 }, { "acc": 0.93629456, "epoch": 0.10909090909090909, "grad_norm": 3.424189078760059, "learning_rate": 7.6168356916586906e-06, "loss": 0.34272318, "memory(GiB)": 33.01, "step": 15, "train_speed(iter/s)": 0.149645 }, { "acc": 0.92510157, "epoch": 0.14545454545454545, "grad_norm": 2.2347104663373494, "learning_rate": 8.425988740745155e-06, "loss": 0.38463933, "memory(GiB)": 33.01, "step": 20, "train_speed(iter/s)": 0.154322 }, { "acc": 0.92444115, "epoch": 0.18181818181818182, "grad_norm": 2.999692166672811, "learning_rate": 9.053616605738856e-06, "loss": 0.38348885, "memory(GiB)": 33.01, "step": 25, "train_speed(iter/s)": 0.15729 }, { "acc": 0.89717693, "epoch": 0.21818181818181817, "grad_norm": 3.151051933103401, "learning_rate": 9.566425910596553e-06, "loss": 0.50090494, "memory(GiB)": 33.01, "step": 30, "train_speed(iter/s)": 0.159318 }, { "acc": 0.8918396, "epoch": 0.2545454545454545, "grad_norm": 3.929292818540404, "learning_rate": 9.999999999999999e-06, "loss": 0.53159599, "memory(GiB)": 33.01, "step": 35, "train_speed(iter/s)": 0.160758 }, { "acc": 0.91250048, "epoch": 0.2909090909090909, "grad_norm": 2.9644423662409465, "learning_rate": 9.998540216393926e-06, "loss": 0.44619522, "memory(GiB)": 33.01, "step": 40, "train_speed(iter/s)": 0.161864 }, { "acc": 0.91625786, "epoch": 0.32727272727272727, "grad_norm": 2.8793186353393256, "learning_rate": 9.994161718048217e-06, "loss": 0.46977191, "memory(GiB)": 33.01, "step": 45, "train_speed(iter/s)": 0.162757 }, { "acc": 0.91005774, "epoch": 0.36363636363636365, "grad_norm": 2.649498967933029, "learning_rate": 9.986867061882612e-06, "loss": 0.4955514, "memory(GiB)": 33.01, "step": 50, "train_speed(iter/s)": 0.163456 }, { "acc": 0.90930538, "epoch": 0.4, "grad_norm": 3.239689305737002, "learning_rate": 9.976660507770886e-06, "loss": 0.48896346, "memory(GiB)": 33.01, "step": 55, "train_speed(iter/s)": 0.164017 }, { "acc": 0.89578419, "epoch": 0.43636363636363634, "grad_norm": 4.172977184581541, "learning_rate": 9.963548016053221e-06, "loss": 0.53288736, "memory(GiB)": 33.01, "step": 60, "train_speed(iter/s)": 0.164492 }, { "acc": 0.90814514, "epoch": 0.4727272727272727, "grad_norm": 2.4035166321477672, "learning_rate": 9.94753724405553e-06, "loss": 0.43980942, "memory(GiB)": 33.01, "step": 65, "train_speed(iter/s)": 0.164893 }, { "acc": 0.9009388, "epoch": 0.509090909090909, "grad_norm": 2.792817778420491, "learning_rate": 9.92863754161779e-06, "loss": 0.52926388, "memory(GiB)": 33.01, "step": 70, "train_speed(iter/s)": 0.165233 }, { "acc": 0.90013046, "epoch": 0.5454545454545454, "grad_norm": 2.6334892567714436, "learning_rate": 9.906859945633999e-06, "loss": 0.49731359, "memory(GiB)": 33.01, "step": 75, "train_speed(iter/s)": 0.165533 }, { "acc": 0.9045433, "epoch": 0.5818181818181818, "grad_norm": 2.332185471916977, "learning_rate": 9.882217173606948e-06, "loss": 0.51631742, "memory(GiB)": 33.01, "step": 80, "train_speed(iter/s)": 0.165793 }, { "acc": 0.92099228, "epoch": 0.6181818181818182, "grad_norm": 2.3620310828105935, "learning_rate": 9.854723616221548e-06, "loss": 0.41591549, "memory(GiB)": 33.01, "step": 85, "train_speed(iter/s)": 0.166024 }, { "acc": 0.91060734, "epoch": 0.6545454545454545, "grad_norm": 2.2844767249274214, "learning_rate": 9.824395328941086e-06, "loss": 0.44734068, "memory(GiB)": 33.01, "step": 90, "train_speed(iter/s)": 0.16623 }, { "acc": 0.91947842, "epoch": 0.6909090909090909, "grad_norm": 2.654847718943916, "learning_rate": 9.791250022631277e-06, "loss": 0.4483614, "memory(GiB)": 33.01, "step": 95, "train_speed(iter/s)": 0.166415 }, { "acc": 0.90607023, "epoch": 0.7272727272727273, "grad_norm": 2.846204394974626, "learning_rate": 9.755307053217622e-06, "loss": 0.47331667, "memory(GiB)": 33.01, "step": 100, "train_speed(iter/s)": 0.16658 }, { "acc": 0.91776829, "epoch": 0.7636363636363637, "grad_norm": 4.193055969054884, "learning_rate": 9.716587410382087e-06, "loss": 0.45284424, "memory(GiB)": 33.01, "step": 105, "train_speed(iter/s)": 0.166736 }, { "acc": 0.90584297, "epoch": 0.8, "grad_norm": 2.3839939588937082, "learning_rate": 9.675113705305733e-06, "loss": 0.49337497, "memory(GiB)": 33.01, "step": 110, "train_speed(iter/s)": 0.166886 }, { "acc": 0.90228806, "epoch": 0.8363636363636363, "grad_norm": 2.6045227101709214, "learning_rate": 9.630910157464404e-06, "loss": 0.48910527, "memory(GiB)": 33.01, "step": 115, "train_speed(iter/s)": 0.16701 }, { "acc": 0.90632572, "epoch": 0.8727272727272727, "grad_norm": 2.1215112041093906, "learning_rate": 9.584002580485256e-06, "loss": 0.4827064, "memory(GiB)": 33.01, "step": 120, "train_speed(iter/s)": 0.16713 }, { "acc": 0.91771622, "epoch": 0.9090909090909091, "grad_norm": 2.8141571001065424, "learning_rate": 9.534418367072303e-06, "loss": 0.40849586, "memory(GiB)": 33.01, "step": 125, "train_speed(iter/s)": 0.167246 }, { "acc": 0.89680672, "epoch": 0.9454545454545454, "grad_norm": 2.048597621204809, "learning_rate": 9.482186473009871e-06, "loss": 0.52411914, "memory(GiB)": 33.01, "step": 130, "train_speed(iter/s)": 0.167353 }, { "acc": 0.90298738, "epoch": 0.9818181818181818, "grad_norm": 2.887900153774404, "learning_rate": 9.427337400253224e-06, "loss": 0.5198132, "memory(GiB)": 33.01, "step": 135, "train_speed(iter/s)": 0.167445 }, { "epoch": 1.0, "eval_acc": 0.9165654110976104, "eval_loss": 0.2242395281791687, "eval_runtime": 12.914, "eval_samples_per_second": 8.983, "eval_steps_per_second": 1.162, "step": 138 }, { "acc": 0.8111371, "epoch": 1.0145454545454546, "grad_norm": 2.8557900772261893, "learning_rate": 9.381595246879296e-06, "loss": 0.41494226, "memory(GiB)": 33.01, "step": 140, "train_speed(iter/s)": 0.155909 }, { "acc": 0.92820644, "epoch": 1.050909090909091, "grad_norm": 3.457089179525002, "learning_rate": 9.322116984002575e-06, "loss": 0.36117687, "memory(GiB)": 33.01, "step": 145, "train_speed(iter/s)": 0.15618 }, { "acc": 0.91620007, "epoch": 1.0872727272727274, "grad_norm": 2.202599146322218, "learning_rate": 9.260115018505599e-06, "loss": 0.44102664, "memory(GiB)": 33.01, "step": 150, "train_speed(iter/s)": 0.156606 }, { "acc": 0.91883535, "epoch": 1.1236363636363635, "grad_norm": 2.3114291842279697, "learning_rate": 9.195625557790217e-06, "loss": 0.41582127, "memory(GiB)": 33.01, "step": 155, "train_speed(iter/s)": 0.156919 }, { "acc": 0.93434258, "epoch": 1.16, "grad_norm": 5.406474002236419, "learning_rate": 9.128686261885441e-06, "loss": 0.35148592, "memory(GiB)": 33.01, "step": 160, "train_speed(iter/s)": 0.157294 }, { "acc": 0.91467266, "epoch": 1.1963636363636363, "grad_norm": 3.861068773297179, "learning_rate": 9.059336221455045e-06, "loss": 0.42474766, "memory(GiB)": 33.01, "step": 165, "train_speed(iter/s)": 0.157643 }, { "acc": 0.91754112, "epoch": 1.2327272727272727, "grad_norm": 3.4508767564011182, "learning_rate": 8.987615934969708e-06, "loss": 0.44358654, "memory(GiB)": 33.01, "step": 170, "train_speed(iter/s)": 0.157971 }, { "acc": 0.89289169, "epoch": 1.269090909090909, "grad_norm": 2.9350868391082434, "learning_rate": 8.913567285057077e-06, "loss": 0.51100779, "memory(GiB)": 33.01, "step": 175, "train_speed(iter/s)": 0.158289 }, { "acc": 0.91759186, "epoch": 1.3054545454545454, "grad_norm": 2.3549681642790072, "learning_rate": 8.837233514043455e-06, "loss": 0.40699306, "memory(GiB)": 33.01, "step": 180, "train_speed(iter/s)": 0.158589 }, { "acc": 0.9234314, "epoch": 1.3418181818181818, "grad_norm": 2.097178456402854, "learning_rate": 8.758659198701528e-06, "loss": 0.38527017, "memory(GiB)": 33.01, "step": 185, "train_speed(iter/s)": 0.158868 }, { "acc": 0.90927277, "epoch": 1.3781818181818182, "grad_norm": 2.0240919955649277, "learning_rate": 8.677890224218774e-06, "loss": 0.43820124, "memory(GiB)": 33.01, "step": 190, "train_speed(iter/s)": 0.159135 }, { "acc": 0.92771797, "epoch": 1.4145454545454546, "grad_norm": 1.8263954768378603, "learning_rate": 8.594973757401804e-06, "loss": 0.38704326, "memory(GiB)": 33.01, "step": 195, "train_speed(iter/s)": 0.159384 }, { "acc": 0.92044001, "epoch": 1.450909090909091, "grad_norm": 2.1904738367354772, "learning_rate": 8.50995821913228e-06, "loss": 0.40301366, "memory(GiB)": 33.01, "step": 200, "train_speed(iter/s)": 0.159623 }, { "acc": 0.91810284, "epoch": 1.4872727272727273, "grad_norm": 2.322191659362731, "learning_rate": 8.42289325609048e-06, "loss": 0.42446461, "memory(GiB)": 33.01, "step": 205, "train_speed(iter/s)": 0.159854 }, { "acc": 0.91816349, "epoch": 1.5236363636363637, "grad_norm": 3.6766273070353996, "learning_rate": 8.333829711763038e-06, "loss": 0.40156956, "memory(GiB)": 33.01, "step": 210, "train_speed(iter/s)": 0.160076 }, { "acc": 0.9263833, "epoch": 1.56, "grad_norm": 4.222791406049376, "learning_rate": 8.242819596751778e-06, "loss": 0.40653038, "memory(GiB)": 33.01, "step": 215, "train_speed(iter/s)": 0.160284 }, { "acc": 0.90884829, "epoch": 1.5963636363636362, "grad_norm": 2.774384141884033, "learning_rate": 8.149916058400986e-06, "loss": 0.4541821, "memory(GiB)": 33.01, "step": 220, "train_speed(iter/s)": 0.160482 }, { "acc": 0.92306767, "epoch": 1.6327272727272728, "grad_norm": 1.8808774186906907, "learning_rate": 8.055173349760858e-06, "loss": 0.38071015, "memory(GiB)": 33.01, "step": 225, "train_speed(iter/s)": 0.160675 }, { "acc": 0.92462444, "epoch": 1.669090909090909, "grad_norm": 2.6660330848450147, "learning_rate": 7.958646797905251e-06, "loss": 0.40280228, "memory(GiB)": 33.01, "step": 230, "train_speed(iter/s)": 0.160867 }, { "acc": 0.90507927, "epoch": 1.7054545454545456, "grad_norm": 2.396279628543684, "learning_rate": 7.860392771622222e-06, "loss": 0.49732876, "memory(GiB)": 33.01, "step": 235, "train_speed(iter/s)": 0.161052 }, { "acc": 0.90813217, "epoch": 1.7418181818181817, "grad_norm": 1.959098632103441, "learning_rate": 7.760468648496251e-06, "loss": 0.45160952, "memory(GiB)": 33.01, "step": 240, "train_speed(iter/s)": 0.161223 }, { "acc": 0.90353422, "epoch": 1.7781818181818183, "grad_norm": 3.350380496148853, "learning_rate": 7.658932781401341e-06, "loss": 0.49037123, "memory(GiB)": 33.01, "step": 245, "train_speed(iter/s)": 0.161388 }, { "acc": 0.91233768, "epoch": 1.8145454545454545, "grad_norm": 2.375193909280913, "learning_rate": 7.5558444644245855e-06, "loss": 0.45447264, "memory(GiB)": 33.01, "step": 250, "train_speed(iter/s)": 0.161547 }, { "acc": 0.90856533, "epoch": 1.850909090909091, "grad_norm": 2.7061925296961937, "learning_rate": 7.451263898240091e-06, "loss": 0.47929668, "memory(GiB)": 33.01, "step": 255, "train_speed(iter/s)": 0.161704 }, { "acc": 0.92107229, "epoch": 1.8872727272727272, "grad_norm": 3.242205419295514, "learning_rate": 7.345252154953482e-06, "loss": 0.39586713, "memory(GiB)": 33.01, "step": 260, "train_speed(iter/s)": 0.161853 }, { "acc": 0.91105738, "epoch": 1.9236363636363636, "grad_norm": 3.5795963065620744, "learning_rate": 7.2378711424375056e-06, "loss": 0.46428795, "memory(GiB)": 33.01, "step": 265, "train_speed(iter/s)": 0.161996 }, { "acc": 0.92524033, "epoch": 1.96, "grad_norm": 3.652004803975923, "learning_rate": 7.129183568179582e-06, "loss": 0.34946482, "memory(GiB)": 33.01, "step": 270, "train_speed(iter/s)": 0.162127 }, { "acc": 0.91721325, "epoch": 1.9963636363636363, "grad_norm": 2.386708723548124, "learning_rate": 7.019252902662391e-06, "loss": 0.45530472, "memory(GiB)": 33.01, "step": 275, "train_speed(iter/s)": 0.162264 }, { "epoch": 2.0, "eval_acc": 0.9177804779262859, "eval_loss": 0.2162669152021408, "eval_runtime": 12.9627, "eval_samples_per_second": 8.949, "eval_steps_per_second": 1.157, "step": 276 }, { "acc": 0.83070297, "epoch": 2.0290909090909093, "grad_norm": 2.535544826305509, "learning_rate": 6.908143342298905e-06, "loss": 0.36057291, "memory(GiB)": 33.01, "step": 280, "train_speed(iter/s)": 0.156683 }, { "acc": 0.93970337, "epoch": 2.0654545454545454, "grad_norm": 2.276921539430186, "learning_rate": 6.795919771943491e-06, "loss": 0.32403946, "memory(GiB)": 33.01, "step": 285, "train_speed(iter/s)": 0.156901 }, { "acc": 0.92168732, "epoch": 2.101818181818182, "grad_norm": 4.127638603634917, "learning_rate": 6.682647727000975e-06, "loss": 0.41061325, "memory(GiB)": 33.01, "step": 290, "train_speed(iter/s)": 0.157112 }, { "acc": 0.92185087, "epoch": 2.138181818181818, "grad_norm": 2.613159698814904, "learning_rate": 6.5683933551558196e-06, "loss": 0.41276412, "memory(GiB)": 33.01, "step": 295, "train_speed(iter/s)": 0.157311 }, { "acc": 0.93119574, "epoch": 2.174545454545455, "grad_norm": 2.4146244471425997, "learning_rate": 6.4532233777437355e-06, "loss": 0.34114873, "memory(GiB)": 33.01, "step": 300, "train_speed(iter/s)": 0.157504 }, { "acc": 0.92488451, "epoch": 2.210909090909091, "grad_norm": 2.907208779112958, "learning_rate": 6.337205050788301e-06, "loss": 0.38522263, "memory(GiB)": 33.01, "step": 305, "train_speed(iter/s)": 0.15769 }, { "acc": 0.92167654, "epoch": 2.247272727272727, "grad_norm": 2.9896138007592024, "learning_rate": 6.220406125725334e-06, "loss": 0.37358305, "memory(GiB)": 33.01, "step": 310, "train_speed(iter/s)": 0.15787 }, { "acc": 0.9312851, "epoch": 2.2836363636363637, "grad_norm": 4.445824844631244, "learning_rate": 6.102894809837971e-06, "loss": 0.36833365, "memory(GiB)": 33.01, "step": 315, "train_speed(iter/s)": 0.158047 }, { "acc": 0.93003368, "epoch": 2.32, "grad_norm": 2.3181584670775464, "learning_rate": 5.984739726425528e-06, "loss": 0.36130757, "memory(GiB)": 33.01, "step": 320, "train_speed(iter/s)": 0.15822 }, { "acc": 0.91338739, "epoch": 2.3563636363636364, "grad_norm": 2.3002867827492235, "learning_rate": 5.866009874729421e-06, "loss": 0.44016724, "memory(GiB)": 33.01, "step": 325, "train_speed(iter/s)": 0.158388 }, { "acc": 0.93581047, "epoch": 2.3927272727272726, "grad_norm": 2.212658837030889, "learning_rate": 5.746774589639565e-06, "loss": 0.3611378, "memory(GiB)": 33.01, "step": 330, "train_speed(iter/s)": 0.158548 }, { "acc": 0.91852398, "epoch": 2.429090909090909, "grad_norm": 3.5098558925283343, "learning_rate": 5.6271035012047395e-06, "loss": 0.4051528, "memory(GiB)": 33.01, "step": 335, "train_speed(iter/s)": 0.158706 }, { "acc": 0.92445469, "epoch": 2.4654545454545453, "grad_norm": 2.3254480753766833, "learning_rate": 5.507066493970609e-06, "loss": 0.38657694, "memory(GiB)": 33.01, "step": 340, "train_speed(iter/s)": 0.158859 }, { "acc": 0.94319763, "epoch": 2.501818181818182, "grad_norm": 2.546749274650209, "learning_rate": 5.386733666169105e-06, "loss": 0.3047235, "memory(GiB)": 33.01, "step": 345, "train_speed(iter/s)": 0.159007 }, { "acc": 0.92226295, "epoch": 2.538181818181818, "grad_norm": 2.061710328743839, "learning_rate": 5.266175288783046e-06, "loss": 0.38313189, "memory(GiB)": 33.01, "step": 350, "train_speed(iter/s)": 0.159149 }, { "acc": 0.93342876, "epoch": 2.5745454545454547, "grad_norm": 3.3978986222055916, "learning_rate": 5.1454617645098595e-06, "loss": 0.32807801, "memory(GiB)": 33.01, "step": 355, "train_speed(iter/s)": 0.159288 }, { "acc": 0.92254868, "epoch": 2.610909090909091, "grad_norm": 2.36401523000578, "learning_rate": 5.024663586648378e-06, "loss": 0.37164879, "memory(GiB)": 33.01, "step": 360, "train_speed(iter/s)": 0.159425 }, { "acc": 0.93187628, "epoch": 2.6472727272727274, "grad_norm": 2.0346344864902726, "learning_rate": 4.903851297932749e-06, "loss": 0.35205598, "memory(GiB)": 33.01, "step": 365, "train_speed(iter/s)": 0.159489 }, { "acc": 0.92569065, "epoch": 2.6836363636363636, "grad_norm": 2.7461167183892883, "learning_rate": 4.783095449337462e-06, "loss": 0.35683942, "memory(GiB)": 33.01, "step": 370, "train_speed(iter/s)": 0.159622 }, { "acc": 0.92851734, "epoch": 2.7199999999999998, "grad_norm": 2.7763819290375316, "learning_rate": 4.6624665588775675e-06, "loss": 0.35814347, "memory(GiB)": 33.01, "step": 375, "train_speed(iter/s)": 0.159722 }, { "acc": 0.94960651, "epoch": 2.7563636363636363, "grad_norm": 2.4971767216968956, "learning_rate": 4.542035070428136e-06, "loss": 0.27659984, "memory(GiB)": 33.01, "step": 380, "train_speed(iter/s)": 0.159847 }, { "acc": 0.92807074, "epoch": 2.792727272727273, "grad_norm": 2.683132297237531, "learning_rate": 4.4218713125870236e-06, "loss": 0.37527924, "memory(GiB)": 33.01, "step": 385, "train_speed(iter/s)": 0.159969 }, { "acc": 0.93306198, "epoch": 2.829090909090909, "grad_norm": 2.295750889124929, "learning_rate": 4.302045457604953e-06, "loss": 0.33568211, "memory(GiB)": 33.01, "step": 390, "train_speed(iter/s)": 0.160089 }, { "acc": 0.92898979, "epoch": 2.8654545454545453, "grad_norm": 2.07143338080191, "learning_rate": 4.182627480406894e-06, "loss": 0.34494858, "memory(GiB)": 33.01, "step": 395, "train_speed(iter/s)": 0.160203 }, { "acc": 0.92927322, "epoch": 2.901818181818182, "grad_norm": 3.061148841919245, "learning_rate": 4.063687117728671e-06, "loss": 0.37803557, "memory(GiB)": 33.01, "step": 400, "train_speed(iter/s)": 0.160316 }, { "acc": 0.93406506, "epoch": 2.9381818181818184, "grad_norm": 2.3908961716983748, "learning_rate": 3.94529382739268e-06, "loss": 0.32702701, "memory(GiB)": 33.01, "step": 405, "train_speed(iter/s)": 0.160429 }, { "acc": 0.91922884, "epoch": 2.9745454545454546, "grad_norm": 3.1103789917359834, "learning_rate": 3.82751674774648e-06, "loss": 0.40710459, "memory(GiB)": 33.01, "step": 410, "train_speed(iter/s)": 0.160539 }, { "epoch": 3.0, "eval_acc": 0.9214256784123127, "eval_loss": 0.21592645347118378, "eval_runtime": 12.9338, "eval_samples_per_second": 8.969, "eval_steps_per_second": 1.16, "step": 414 } ], "logging_steps": 5, "max_steps": 685, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 492366226620416.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }