{ "best_metric": 0.6225172281265259, "best_model_checkpoint": "ProbeMedicalYonseiMAILab/medllama3-v20-with-prefix_prompt/checkpoint-1980", "epoch": 17.0, "eval_steps": 500, "global_step": 2805, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.303951367781155, "grad_norm": 29.728609085083008, "learning_rate": 9.400000000000001e-07, "loss": 0.6959, "step": 50 }, { "epoch": 0.60790273556231, "grad_norm": 95.18992614746094, "learning_rate": 1.9200000000000003e-06, "loss": 0.6149, "step": 100 }, { "epoch": 0.9118541033434651, "grad_norm": 81.67002868652344, "learning_rate": 2.92e-06, "loss": 0.7238, "step": 150 }, { "epoch": 1.0, "eval_accuracy": 0.6461187214611872, "eval_balanced_accuracy": 0.6213951381549148, "eval_loss": 0.6600713133811951, "eval_runtime": 44.9636, "eval_samples_per_second": 9.741, "eval_steps_per_second": 1.223, "step": 165 }, { "epoch": 1.2127659574468086, "grad_norm": 22.406213760375977, "learning_rate": 3.920000000000001e-06, "loss": 0.7947, "step": 200 }, { "epoch": 1.5167173252279635, "grad_norm": 65.21754455566406, "learning_rate": 4.92e-06, "loss": 0.7316, "step": 250 }, { "epoch": 1.8206686930091185, "grad_norm": 20.789213180541992, "learning_rate": 5.9e-06, "loss": 0.7253, "step": 300 }, { "epoch": 2.0, "eval_accuracy": 0.6118721461187214, "eval_balanced_accuracy": 0.6298490470568392, "eval_loss": 0.63615483045578, "eval_runtime": 44.9888, "eval_samples_per_second": 9.736, "eval_steps_per_second": 1.223, "step": 330 }, { "epoch": 2.121580547112462, "grad_norm": 28.46219253540039, "learning_rate": 6.9e-06, "loss": 0.7171, "step": 350 }, { "epoch": 2.425531914893617, "grad_norm": 115.24221801757812, "learning_rate": 7.9e-06, "loss": 0.9459, "step": 400 }, { "epoch": 2.729483282674772, "grad_norm": 84.25735473632812, "learning_rate": 8.880000000000001e-06, "loss": 0.9118, "step": 450 }, { "epoch": 3.0, "eval_accuracy": 0.3493150684931507, "eval_balanced_accuracy": 0.5720554272517321, "eval_loss": 0.8440132141113281, "eval_runtime": 45.0008, "eval_samples_per_second": 9.733, "eval_steps_per_second": 1.222, "step": 495 }, { "epoch": 3.0303951367781155, "grad_norm": 65.59783935546875, "learning_rate": 9.88e-06, "loss": 0.7725, "step": 500 }, { "epoch": 3.3343465045592704, "grad_norm": 46.10994338989258, "learning_rate": 9.841726618705037e-06, "loss": 0.7673, "step": 550 }, { "epoch": 3.6382978723404253, "grad_norm": 98.0461196899414, "learning_rate": 9.661870503597123e-06, "loss": 0.7604, "step": 600 }, { "epoch": 3.9422492401215807, "grad_norm": 39.60095977783203, "learning_rate": 9.482014388489208e-06, "loss": 0.7387, "step": 650 }, { "epoch": 4.0, "eval_accuracy": 0.3904109589041096, "eval_balanced_accuracy": 0.639176755447942, "eval_loss": 0.919624924659729, "eval_runtime": 44.9909, "eval_samples_per_second": 9.735, "eval_steps_per_second": 1.222, "step": 660 }, { "epoch": 4.243161094224924, "grad_norm": 26.217370986938477, "learning_rate": 9.302158273381295e-06, "loss": 0.8143, "step": 700 }, { "epoch": 4.547112462006079, "grad_norm": 44.249088287353516, "learning_rate": 9.122302158273381e-06, "loss": 0.8331, "step": 750 }, { "epoch": 4.851063829787234, "grad_norm": 11.06800651550293, "learning_rate": 8.942446043165468e-06, "loss": 0.8555, "step": 800 }, { "epoch": 5.0, "eval_accuracy": 0.6757990867579908, "eval_balanced_accuracy": 0.6786494755244755, "eval_loss": 0.8303006887435913, "eval_runtime": 44.9703, "eval_samples_per_second": 9.74, "eval_steps_per_second": 1.223, "step": 825 }, { "epoch": 5.151975683890577, "grad_norm": 240.76536560058594, "learning_rate": 8.762589928057554e-06, "loss": 0.7793, "step": 850 }, { "epoch": 5.455927051671733, "grad_norm": 30.626108169555664, "learning_rate": 8.582733812949641e-06, "loss": 0.8786, "step": 900 }, { "epoch": 5.759878419452887, "grad_norm": 8.145743370056152, "learning_rate": 8.402877697841727e-06, "loss": 0.7603, "step": 950 }, { "epoch": 6.0, "eval_accuracy": 0.6415525114155252, "eval_balanced_accuracy": 0.6506368028107159, "eval_loss": 0.6329528093338013, "eval_runtime": 44.9464, "eval_samples_per_second": 9.745, "eval_steps_per_second": 1.224, "step": 990 }, { "epoch": 6.060790273556231, "grad_norm": 124.13258361816406, "learning_rate": 8.223021582733814e-06, "loss": 0.6977, "step": 1000 }, { "epoch": 6.364741641337386, "grad_norm": 25.195247650146484, "learning_rate": 8.0431654676259e-06, "loss": 0.6591, "step": 1050 }, { "epoch": 6.668693009118541, "grad_norm": 78.33740997314453, "learning_rate": 7.863309352517987e-06, "loss": 0.667, "step": 1100 }, { "epoch": 6.972644376899696, "grad_norm": 77.23793029785156, "learning_rate": 7.683453237410072e-06, "loss": 0.7386, "step": 1150 }, { "epoch": 7.0, "eval_accuracy": 0.6666666666666666, "eval_balanced_accuracy": 0.6141959798994976, "eval_loss": 0.7898709177970886, "eval_runtime": 44.9232, "eval_samples_per_second": 9.75, "eval_steps_per_second": 1.224, "step": 1155 }, { "epoch": 7.27355623100304, "grad_norm": 71.74555969238281, "learning_rate": 7.503597122302159e-06, "loss": 0.6621, "step": 1200 }, { "epoch": 7.577507598784194, "grad_norm": 44.78120422363281, "learning_rate": 7.323741007194245e-06, "loss": 0.7375, "step": 1250 }, { "epoch": 7.88145896656535, "grad_norm": 8.3840970993042, "learning_rate": 7.1438848920863315e-06, "loss": 0.7636, "step": 1300 }, { "epoch": 8.0, "eval_accuracy": 0.4657534246575342, "eval_balanced_accuracy": 0.6618908716540837, "eval_loss": 0.6962416172027588, "eval_runtime": 44.9904, "eval_samples_per_second": 9.735, "eval_steps_per_second": 1.222, "step": 1320 }, { "epoch": 8.182370820668693, "grad_norm": 5.842133522033691, "learning_rate": 6.964028776978418e-06, "loss": 0.9421, "step": 1350 }, { "epoch": 8.486322188449847, "grad_norm": 57.9466667175293, "learning_rate": 6.784172661870504e-06, "loss": 0.7452, "step": 1400 }, { "epoch": 8.790273556231003, "grad_norm": 65.29971313476562, "learning_rate": 6.604316546762591e-06, "loss": 0.7203, "step": 1450 }, { "epoch": 9.0, "eval_accuracy": 0.591324200913242, "eval_balanced_accuracy": 0.6261770244821092, "eval_loss": 0.6226291656494141, "eval_runtime": 44.9238, "eval_samples_per_second": 9.75, "eval_steps_per_second": 1.224, "step": 1485 }, { "epoch": 9.091185410334347, "grad_norm": 19.527257919311523, "learning_rate": 6.424460431654676e-06, "loss": 0.6548, "step": 1500 }, { "epoch": 9.395136778115502, "grad_norm": 93.42425537109375, "learning_rate": 6.244604316546763e-06, "loss": 0.6294, "step": 1550 }, { "epoch": 9.699088145896656, "grad_norm": 59.397743225097656, "learning_rate": 6.064748201438849e-06, "loss": 0.6695, "step": 1600 }, { "epoch": 10.0, "grad_norm": 47.82102966308594, "learning_rate": 5.884892086330935e-06, "loss": 0.6849, "step": 1650 }, { "epoch": 10.0, "eval_accuracy": 0.6666666666666666, "eval_balanced_accuracy": 0.6116620111731843, "eval_loss": 0.7656034231185913, "eval_runtime": 44.9267, "eval_samples_per_second": 9.749, "eval_steps_per_second": 1.224, "step": 1650 }, { "epoch": 10.303951367781155, "grad_norm": 86.78540802001953, "learning_rate": 5.705035971223022e-06, "loss": 0.6763, "step": 1700 }, { "epoch": 10.60790273556231, "grad_norm": 30.66022300720215, "learning_rate": 5.525179856115108e-06, "loss": 0.6076, "step": 1750 }, { "epoch": 10.911854103343465, "grad_norm": 92.97509765625, "learning_rate": 5.345323741007195e-06, "loss": 0.5737, "step": 1800 }, { "epoch": 11.0, "eval_accuracy": 0.684931506849315, "eval_balanced_accuracy": 0.6411216029479503, "eval_loss": 0.7753307223320007, "eval_runtime": 44.9215, "eval_samples_per_second": 9.75, "eval_steps_per_second": 1.224, "step": 1815 }, { "epoch": 11.212765957446809, "grad_norm": 15.782450675964355, "learning_rate": 5.165467625899281e-06, "loss": 0.5926, "step": 1850 }, { "epoch": 11.516717325227964, "grad_norm": 15.262404441833496, "learning_rate": 4.985611510791367e-06, "loss": 0.6144, "step": 1900 }, { "epoch": 11.820668693009118, "grad_norm": 9.566486358642578, "learning_rate": 4.805755395683454e-06, "loss": 0.5891, "step": 1950 }, { "epoch": 12.0, "eval_accuracy": 0.5547945205479452, "eval_balanced_accuracy": 0.6379267841735486, "eval_loss": 0.6225172281265259, "eval_runtime": 44.9682, "eval_samples_per_second": 9.74, "eval_steps_per_second": 1.223, "step": 1980 }, { "epoch": 12.121580547112462, "grad_norm": 11.156400680541992, "learning_rate": 4.62589928057554e-06, "loss": 0.583, "step": 2000 }, { "epoch": 12.425531914893616, "grad_norm": 54.37253189086914, "learning_rate": 4.446043165467626e-06, "loss": 0.6269, "step": 2050 }, { "epoch": 12.729483282674773, "grad_norm": 22.50881576538086, "learning_rate": 4.266187050359712e-06, "loss": 0.5992, "step": 2100 }, { "epoch": 13.0, "eval_accuracy": 0.5684931506849316, "eval_balanced_accuracy": 0.6185281521178554, "eval_loss": 0.6588040590286255, "eval_runtime": 44.99, "eval_samples_per_second": 9.735, "eval_steps_per_second": 1.222, "step": 2145 }, { "epoch": 13.030395136778116, "grad_norm": 28.85976219177246, "learning_rate": 4.086330935251799e-06, "loss": 0.6465, "step": 2150 }, { "epoch": 13.33434650455927, "grad_norm": 91.04498291015625, "learning_rate": 3.906474820143885e-06, "loss": 0.5114, "step": 2200 }, { "epoch": 13.638297872340425, "grad_norm": 19.850927352905273, "learning_rate": 3.7266187050359714e-06, "loss": 0.5409, "step": 2250 }, { "epoch": 13.94224924012158, "grad_norm": 87.89186096191406, "learning_rate": 3.5467625899280578e-06, "loss": 0.5283, "step": 2300 }, { "epoch": 14.0, "eval_accuracy": 0.6278538812785388, "eval_balanced_accuracy": 0.6307748461779121, "eval_loss": 0.6853114366531372, "eval_runtime": 44.9733, "eval_samples_per_second": 9.739, "eval_steps_per_second": 1.223, "step": 2310 }, { "epoch": 14.243161094224924, "grad_norm": 17.08485984802246, "learning_rate": 3.366906474820144e-06, "loss": 0.4606, "step": 2350 }, { "epoch": 14.54711246200608, "grad_norm": 41.410186767578125, "learning_rate": 3.1870503597122306e-06, "loss": 0.5069, "step": 2400 }, { "epoch": 14.851063829787234, "grad_norm": 15.93907642364502, "learning_rate": 3.0071942446043166e-06, "loss": 0.4854, "step": 2450 }, { "epoch": 15.0, "eval_accuracy": 0.5525114155251142, "eval_balanced_accuracy": 0.6256951102588687, "eval_loss": 0.7648139595985413, "eval_runtime": 44.933, "eval_samples_per_second": 9.748, "eval_steps_per_second": 1.224, "step": 2475 }, { "epoch": 15.151975683890578, "grad_norm": 52.72271728515625, "learning_rate": 2.827338129496403e-06, "loss": 0.3968, "step": 2500 }, { "epoch": 15.455927051671733, "grad_norm": 97.08126831054688, "learning_rate": 2.6474820143884894e-06, "loss": 0.3851, "step": 2550 }, { "epoch": 15.759878419452887, "grad_norm": 44.035125732421875, "learning_rate": 2.4676258992805758e-06, "loss": 0.3738, "step": 2600 }, { "epoch": 16.0, "eval_accuracy": 0.6118721461187214, "eval_balanced_accuracy": 0.5976641414141415, "eval_loss": 0.8674690127372742, "eval_runtime": 45.0541, "eval_samples_per_second": 9.722, "eval_steps_per_second": 1.221, "step": 2640 }, { "epoch": 16.060790273556233, "grad_norm": 29.919408798217773, "learning_rate": 2.287769784172662e-06, "loss": 0.3726, "step": 2650 }, { "epoch": 16.364741641337385, "grad_norm": 35.357173919677734, "learning_rate": 2.1079136690647486e-06, "loss": 0.2964, "step": 2700 }, { "epoch": 16.66869300911854, "grad_norm": 48.781551361083984, "learning_rate": 1.9280575539568346e-06, "loss": 0.2764, "step": 2750 }, { "epoch": 16.972644376899694, "grad_norm": 11.10974407196045, "learning_rate": 1.748201438848921e-06, "loss": 0.2891, "step": 2800 }, { "epoch": 17.0, "eval_accuracy": 0.5707762557077626, "eval_balanced_accuracy": 0.5602689793866265, "eval_loss": 1.030253291130066, "eval_runtime": 44.9833, "eval_samples_per_second": 9.737, "eval_steps_per_second": 1.223, "step": 2805 }, { "epoch": 17.0, "step": 2805, "total_flos": 2.5261629350510592e+17, "train_loss": 0.6479321214179516, "train_runtime": 9469.6106, "train_samples_per_second": 2.777, "train_steps_per_second": 0.346 } ], "logging_steps": 50, "max_steps": 3280, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 1e-05 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.5261629350510592e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }