{ "best_metric": 0.25979954936421096, "best_model_checkpoint": "./logo-matching-base/checkpoint-510", "epoch": 20.0, "eval_steps": 500, "global_step": 680, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 72.06623077392578, "learning_rate": 9.5e-06, "loss": 0.6833, "step": 34 }, { "epoch": 1.0, "eval_adjusted_mutual_info_score": 0.0959962302587181, "eval_adjusted_rand_score": 0.06912863690017566, "eval_completeness_score": 0.5109803955924982, "eval_fowlkes_mallows_score": 0.44395093511191686, "eval_homogeneity_score": 0.12370192092796624, "eval_loss": 0.06912863690017566, "eval_pair_confusion_matrix": [ [ 16212, 31438 ], [ 2986, 10620 ] ], "step": 34 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 9e-06, "loss": 0.5711, "step": 68 }, { "epoch": 2.0, "eval_adjusted_mutual_info_score": 0.16050612062870448, "eval_adjusted_rand_score": 0.04788887456487486, "eval_completeness_score": 0.4801879169228485, "eval_fowlkes_mallows_score": 0.326241320471177, "eval_homogeneity_score": 0.24777791806632807, "eval_loss": 0.04788887456487486, "eval_pair_confusion_matrix": [ [ 30178, 17472 ], [ 7800, 5806 ] ], "step": 68 }, { "epoch": 3.0, "grad_norm": 0.0, "learning_rate": 8.5e-06, "loss": 0.5048, "step": 102 }, { "epoch": 3.0, "eval_adjusted_mutual_info_score": 0.14453579947468986, "eval_adjusted_rand_score": 0.06941336059571244, "eval_completeness_score": 0.48534870125531976, "eval_fowlkes_mallows_score": 0.35623205660821267, "eval_homogeneity_score": 0.20687927372388243, "eval_loss": 0.06941336059571244, "eval_pair_confusion_matrix": [ [ 28728, 18922 ], [ 6962, 6644 ] ], "step": 102 }, { "epoch": 4.0, "grad_norm": 0.0, "learning_rate": 8.000000000000001e-06, "loss": 0.4474, "step": 136 }, { "epoch": 4.0, "eval_adjusted_mutual_info_score": 0.15197952267992879, "eval_adjusted_rand_score": 0.06395362023662775, "eval_completeness_score": 0.5053597877227305, "eval_fowlkes_mallows_score": 0.36009183925771726, "eval_homogeneity_score": 0.201135083549786, "eval_loss": 0.06395362023662775, "eval_pair_confusion_matrix": [ [ 27632, 20018 ], [ 6716, 6890 ] ], "step": 136 }, { "epoch": 5.0, "grad_norm": 53.12451934814453, "learning_rate": 7.500000000000001e-06, "loss": 0.4433, "step": 170 }, { "epoch": 5.0, "eval_adjusted_mutual_info_score": 0.09225927782155577, "eval_adjusted_rand_score": 0.04497696614477031, "eval_completeness_score": 0.4856685890606832, "eval_fowlkes_mallows_score": 0.41794001145778076, "eval_homogeneity_score": 0.12954647762487131, "eval_loss": 0.04497696614477031, "eval_pair_confusion_matrix": [ [ 16938, 30712 ], [ 3792, 9814 ] ], "step": 170 }, { "epoch": 6.0, "grad_norm": 0.0, "learning_rate": 7e-06, "loss": 0.4582, "step": 204 }, { "epoch": 6.0, "eval_adjusted_mutual_info_score": 0.1550123095541971, "eval_adjusted_rand_score": 0.1235971988422464, "eval_completeness_score": 0.7625668447266793, "eval_fowlkes_mallows_score": 0.5091255095660694, "eval_homogeneity_score": 0.13490749128374505, "eval_loss": 0.1235971988422464, "eval_pair_confusion_matrix": [ [ 13668, 33982 ], [ 754, 12852 ] ], "step": 204 }, { "epoch": 7.0, "grad_norm": 24.434818267822266, "learning_rate": 6.5000000000000004e-06, "loss": 0.4384, "step": 238 }, { "epoch": 7.0, "eval_adjusted_mutual_info_score": 0.17177361413411174, "eval_adjusted_rand_score": 0.08374811610033726, "eval_completeness_score": 0.5222737901409524, "eval_fowlkes_mallows_score": 0.3647858508374196, "eval_homogeneity_score": 0.2242118568770657, "eval_loss": 0.08374811610033726, "eval_pair_confusion_matrix": [ [ 29196, 18454 ], [ 6850, 6756 ] ], "step": 238 }, { "epoch": 8.0, "grad_norm": 0.0, "learning_rate": 6e-06, "loss": 0.4388, "step": 272 }, { "epoch": 8.0, "eval_adjusted_mutual_info_score": 0.20449439835224686, "eval_adjusted_rand_score": 0.20212814342460073, "eval_completeness_score": 0.5850787856442333, "eval_fowlkes_mallows_score": 0.4533371718722817, "eval_homogeneity_score": 0.22520931487447624, "eval_loss": 0.20212814342460073, "eval_pair_confusion_matrix": [ [ 30796, 16854 ], [ 5202, 8404 ] ], "step": 272 }, { "epoch": 9.0, "grad_norm": 0.0, "learning_rate": 5.500000000000001e-06, "loss": 0.4136, "step": 306 }, { "epoch": 9.0, "eval_adjusted_mutual_info_score": 0.11177382718405922, "eval_adjusted_rand_score": 0.09640002189335507, "eval_completeness_score": 0.6506809526372479, "eval_fowlkes_mallows_score": 0.4899932693994939, "eval_homogeneity_score": 0.10817341204672741, "eval_loss": 0.09640002189335507, "eval_pair_confusion_matrix": [ [ 12770, 34880 ], [ 1174, 12432 ] ], "step": 306 }, { "epoch": 10.0, "grad_norm": 0.0, "learning_rate": 5e-06, "loss": 0.4148, "step": 340 }, { "epoch": 10.0, "eval_adjusted_mutual_info_score": 0.13291287868141516, "eval_adjusted_rand_score": 0.0173768729388201, "eval_completeness_score": 0.46335582053405955, "eval_fowlkes_mallows_score": 0.3147845140860169, "eval_homogeneity_score": 0.20737389396161876, "eval_loss": 0.0173768729388201, "eval_pair_confusion_matrix": [ [ 28346, 19304 ], [ 7786, 5820 ] ], "step": 340 }, { "epoch": 11.0, "grad_norm": 0.0, "learning_rate": 4.5e-06, "loss": 0.4146, "step": 374 }, { "epoch": 11.0, "eval_adjusted_mutual_info_score": 0.15463237739434937, "eval_adjusted_rand_score": 0.05775589302463435, "eval_completeness_score": 0.5013447616164763, "eval_fowlkes_mallows_score": 0.34999174740596783, "eval_homogeneity_score": 0.21319532171325858, "eval_loss": 0.05775589302463435, "eval_pair_confusion_matrix": [ [ 28252, 19398 ], [ 7026, 6580 ] ], "step": 374 }, { "epoch": 12.0, "grad_norm": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 0.4096, "step": 408 }, { "epoch": 12.0, "eval_adjusted_mutual_info_score": 0.15954128854018185, "eval_adjusted_rand_score": 0.08353262118433151, "eval_completeness_score": 0.48657807669089076, "eval_fowlkes_mallows_score": 0.3488784926098972, "eval_homogeneity_score": 0.23781381627812734, "eval_loss": 0.08353262118433151, "eval_pair_confusion_matrix": [ [ 31128, 16522 ], [ 7482, 6124 ] ], "step": 408 }, { "epoch": 13.0, "grad_norm": 0.0, "learning_rate": 3.5e-06, "loss": 0.3973, "step": 442 }, { "epoch": 13.0, "eval_adjusted_mutual_info_score": 0.19041946370338364, "eval_adjusted_rand_score": 0.12553878006056823, "eval_completeness_score": 0.5046617692993569, "eval_fowlkes_mallows_score": 0.36189400936638344, "eval_homogeneity_score": 0.27375639307416655, "eval_loss": 0.12553878006056823, "eval_pair_confusion_matrix": [ [ 33812, 13838 ], [ 7670, 5936 ] ], "step": 442 }, { "epoch": 14.0, "grad_norm": 0.0, "learning_rate": 3e-06, "loss": 0.4051, "step": 476 }, { "epoch": 14.0, "eval_adjusted_mutual_info_score": 0.18691214822396793, "eval_adjusted_rand_score": 0.18158266255584393, "eval_completeness_score": 0.5245970271191535, "eval_fowlkes_mallows_score": 0.41690193788215973, "eval_homogeneity_score": 0.24324643119902978, "eval_loss": 0.18158266255584393, "eval_pair_confusion_matrix": [ [ 33010, 14640 ], [ 6422, 7184 ] ], "step": 476 }, { "epoch": 15.0, "grad_norm": 20.364652633666992, "learning_rate": 2.5e-06, "loss": 0.4062, "step": 510 }, { "epoch": 15.0, "eval_adjusted_mutual_info_score": 0.21823634710165685, "eval_adjusted_rand_score": 0.25979954936421096, "eval_completeness_score": 0.5641057686568595, "eval_fowlkes_mallows_score": 0.47949730143044716, "eval_homogeneity_score": 0.2648407343665406, "eval_loss": 0.25979954936421096, "eval_pair_confusion_matrix": [ [ 33538, 14112 ], [ 5216, 8390 ] ], "step": 510 }, { "epoch": 16.0, "grad_norm": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.4025, "step": 544 }, { "epoch": 16.0, "eval_adjusted_mutual_info_score": 0.10056939420043334, "eval_adjusted_rand_score": 0.0907629141131465, "eval_completeness_score": 0.5696556835173626, "eval_fowlkes_mallows_score": 0.4759933991482618, "eval_homogeneity_score": 0.11380033927304208, "eval_loss": 0.0907629141131465, "eval_pair_confusion_matrix": [ [ 14202, 33448 ], [ 1794, 11812 ] ], "step": 544 }, { "epoch": 17.0, "grad_norm": 0.0, "learning_rate": 1.5e-06, "loss": 0.4043, "step": 578 }, { "epoch": 17.0, "eval_adjusted_mutual_info_score": 0.11845707888523377, "eval_adjusted_rand_score": 0.06151994460143414, "eval_completeness_score": 0.5431310507543884, "eval_fowlkes_mallows_score": 0.4323980307250435, "eval_homogeneity_score": 0.14340786222528185, "eval_loss": 0.06151994460143414, "eval_pair_confusion_matrix": [ [ 16966, 30684 ], [ 3408, 10198 ] ], "step": 578 }, { "epoch": 18.0, "grad_norm": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 0.4013, "step": 612 }, { "epoch": 18.0, "eval_adjusted_mutual_info_score": 0.20098591140511965, "eval_adjusted_rand_score": 0.24115050477114428, "eval_completeness_score": 0.5343711342342489, "eval_fowlkes_mallows_score": 0.45788735752549203, "eval_homogeneity_score": 0.2597787469313228, "eval_loss": 0.24115050477114428, "eval_pair_confusion_matrix": [ [ 34176, 13474 ], [ 5818, 7788 ] ], "step": 612 }, { "epoch": 19.0, "grad_norm": 0.0, "learning_rate": 5.000000000000001e-07, "loss": 0.4006, "step": 646 }, { "epoch": 19.0, "eval_adjusted_mutual_info_score": 0.252153502376998, "eval_adjusted_rand_score": 0.24023922903374736, "eval_completeness_score": 0.5460417717065094, "eval_fowlkes_mallows_score": 0.4233209236936553, "eval_homogeneity_score": 0.34453853702602516, "eval_loss": 0.24023922903374736, "eval_pair_confusion_matrix": [ [ 37986, 9664 ], [ 7382, 6224 ] ], "step": 646 }, { "epoch": 20.0, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.4044, "step": 680 }, { "epoch": 20.0, "eval_adjusted_mutual_info_score": 0.08179538296319702, "eval_adjusted_rand_score": 0.040227652358330604, "eval_completeness_score": 0.48262386100058413, "eval_fowlkes_mallows_score": 0.423484876527761, "eval_homogeneity_score": 0.11562820805581293, "eval_loss": 0.040227652358330604, "eval_pair_confusion_matrix": [ [ 15374, 32276 ], [ 3428, 10178 ] ], "step": 680 }, { "epoch": 20.0, "step": 680, "total_flos": 0.0, "train_loss": 0.44298483904670266, "train_runtime": 986.129, "train_samples_per_second": 21.843, "train_steps_per_second": 0.69 } ], "logging_steps": 500, "max_steps": 680, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }