|
{ |
|
"best_metric": 0.25979954936421096, |
|
"best_model_checkpoint": "./logo-matching-base/checkpoint-510", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 680, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 72.06623077392578, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.6833, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_adjusted_mutual_info_score": 0.0959962302587181, |
|
"eval_adjusted_rand_score": 0.06912863690017566, |
|
"eval_completeness_score": 0.5109803955924982, |
|
"eval_fowlkes_mallows_score": 0.44395093511191686, |
|
"eval_homogeneity_score": 0.12370192092796624, |
|
"eval_loss": 0.06912863690017566, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
16212, |
|
31438 |
|
], |
|
[ |
|
2986, |
|
10620 |
|
] |
|
], |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9e-06, |
|
"loss": 0.5711, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_adjusted_mutual_info_score": 0.16050612062870448, |
|
"eval_adjusted_rand_score": 0.04788887456487486, |
|
"eval_completeness_score": 0.4801879169228485, |
|
"eval_fowlkes_mallows_score": 0.326241320471177, |
|
"eval_homogeneity_score": 0.24777791806632807, |
|
"eval_loss": 0.04788887456487486, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
30178, |
|
17472 |
|
], |
|
[ |
|
7800, |
|
5806 |
|
] |
|
], |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.5048, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_adjusted_mutual_info_score": 0.14453579947468986, |
|
"eval_adjusted_rand_score": 0.06941336059571244, |
|
"eval_completeness_score": 0.48534870125531976, |
|
"eval_fowlkes_mallows_score": 0.35623205660821267, |
|
"eval_homogeneity_score": 0.20687927372388243, |
|
"eval_loss": 0.06941336059571244, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
28728, |
|
18922 |
|
], |
|
[ |
|
6962, |
|
6644 |
|
] |
|
], |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.4474, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_adjusted_mutual_info_score": 0.15197952267992879, |
|
"eval_adjusted_rand_score": 0.06395362023662775, |
|
"eval_completeness_score": 0.5053597877227305, |
|
"eval_fowlkes_mallows_score": 0.36009183925771726, |
|
"eval_homogeneity_score": 0.201135083549786, |
|
"eval_loss": 0.06395362023662775, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
27632, |
|
20018 |
|
], |
|
[ |
|
6716, |
|
6890 |
|
] |
|
], |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 53.12451934814453, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.4433, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_adjusted_mutual_info_score": 0.09225927782155577, |
|
"eval_adjusted_rand_score": 0.04497696614477031, |
|
"eval_completeness_score": 0.4856685890606832, |
|
"eval_fowlkes_mallows_score": 0.41794001145778076, |
|
"eval_homogeneity_score": 0.12954647762487131, |
|
"eval_loss": 0.04497696614477031, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
16938, |
|
30712 |
|
], |
|
[ |
|
3792, |
|
9814 |
|
] |
|
], |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7e-06, |
|
"loss": 0.4582, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_adjusted_mutual_info_score": 0.1550123095541971, |
|
"eval_adjusted_rand_score": 0.1235971988422464, |
|
"eval_completeness_score": 0.7625668447266793, |
|
"eval_fowlkes_mallows_score": 0.5091255095660694, |
|
"eval_homogeneity_score": 0.13490749128374505, |
|
"eval_loss": 0.1235971988422464, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
13668, |
|
33982 |
|
], |
|
[ |
|
754, |
|
12852 |
|
] |
|
], |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 24.434818267822266, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.4384, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_adjusted_mutual_info_score": 0.17177361413411174, |
|
"eval_adjusted_rand_score": 0.08374811610033726, |
|
"eval_completeness_score": 0.5222737901409524, |
|
"eval_fowlkes_mallows_score": 0.3647858508374196, |
|
"eval_homogeneity_score": 0.2242118568770657, |
|
"eval_loss": 0.08374811610033726, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
29196, |
|
18454 |
|
], |
|
[ |
|
6850, |
|
6756 |
|
] |
|
], |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6e-06, |
|
"loss": 0.4388, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_adjusted_mutual_info_score": 0.20449439835224686, |
|
"eval_adjusted_rand_score": 0.20212814342460073, |
|
"eval_completeness_score": 0.5850787856442333, |
|
"eval_fowlkes_mallows_score": 0.4533371718722817, |
|
"eval_homogeneity_score": 0.22520931487447624, |
|
"eval_loss": 0.20212814342460073, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
30796, |
|
16854 |
|
], |
|
[ |
|
5202, |
|
8404 |
|
] |
|
], |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.4136, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_adjusted_mutual_info_score": 0.11177382718405922, |
|
"eval_adjusted_rand_score": 0.09640002189335507, |
|
"eval_completeness_score": 0.6506809526372479, |
|
"eval_fowlkes_mallows_score": 0.4899932693994939, |
|
"eval_homogeneity_score": 0.10817341204672741, |
|
"eval_loss": 0.09640002189335507, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
12770, |
|
34880 |
|
], |
|
[ |
|
1174, |
|
12432 |
|
] |
|
], |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4148, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_adjusted_mutual_info_score": 0.13291287868141516, |
|
"eval_adjusted_rand_score": 0.0173768729388201, |
|
"eval_completeness_score": 0.46335582053405955, |
|
"eval_fowlkes_mallows_score": 0.3147845140860169, |
|
"eval_homogeneity_score": 0.20737389396161876, |
|
"eval_loss": 0.0173768729388201, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
28346, |
|
19304 |
|
], |
|
[ |
|
7786, |
|
5820 |
|
] |
|
], |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.4146, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_adjusted_mutual_info_score": 0.15463237739434937, |
|
"eval_adjusted_rand_score": 0.05775589302463435, |
|
"eval_completeness_score": 0.5013447616164763, |
|
"eval_fowlkes_mallows_score": 0.34999174740596783, |
|
"eval_homogeneity_score": 0.21319532171325858, |
|
"eval_loss": 0.05775589302463435, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
28252, |
|
19398 |
|
], |
|
[ |
|
7026, |
|
6580 |
|
] |
|
], |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.4096, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_adjusted_mutual_info_score": 0.15954128854018185, |
|
"eval_adjusted_rand_score": 0.08353262118433151, |
|
"eval_completeness_score": 0.48657807669089076, |
|
"eval_fowlkes_mallows_score": 0.3488784926098972, |
|
"eval_homogeneity_score": 0.23781381627812734, |
|
"eval_loss": 0.08353262118433151, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
31128, |
|
16522 |
|
], |
|
[ |
|
7482, |
|
6124 |
|
] |
|
], |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.3973, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_adjusted_mutual_info_score": 0.19041946370338364, |
|
"eval_adjusted_rand_score": 0.12553878006056823, |
|
"eval_completeness_score": 0.5046617692993569, |
|
"eval_fowlkes_mallows_score": 0.36189400936638344, |
|
"eval_homogeneity_score": 0.27375639307416655, |
|
"eval_loss": 0.12553878006056823, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
33812, |
|
13838 |
|
], |
|
[ |
|
7670, |
|
5936 |
|
] |
|
], |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3e-06, |
|
"loss": 0.4051, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_adjusted_mutual_info_score": 0.18691214822396793, |
|
"eval_adjusted_rand_score": 0.18158266255584393, |
|
"eval_completeness_score": 0.5245970271191535, |
|
"eval_fowlkes_mallows_score": 0.41690193788215973, |
|
"eval_homogeneity_score": 0.24324643119902978, |
|
"eval_loss": 0.18158266255584393, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
33010, |
|
14640 |
|
], |
|
[ |
|
6422, |
|
7184 |
|
] |
|
], |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 20.364652633666992, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.4062, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_adjusted_mutual_info_score": 0.21823634710165685, |
|
"eval_adjusted_rand_score": 0.25979954936421096, |
|
"eval_completeness_score": 0.5641057686568595, |
|
"eval_fowlkes_mallows_score": 0.47949730143044716, |
|
"eval_homogeneity_score": 0.2648407343665406, |
|
"eval_loss": 0.25979954936421096, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
33538, |
|
14112 |
|
], |
|
[ |
|
5216, |
|
8390 |
|
] |
|
], |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.4025, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_adjusted_mutual_info_score": 0.10056939420043334, |
|
"eval_adjusted_rand_score": 0.0907629141131465, |
|
"eval_completeness_score": 0.5696556835173626, |
|
"eval_fowlkes_mallows_score": 0.4759933991482618, |
|
"eval_homogeneity_score": 0.11380033927304208, |
|
"eval_loss": 0.0907629141131465, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
14202, |
|
33448 |
|
], |
|
[ |
|
1794, |
|
11812 |
|
] |
|
], |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.4043, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_adjusted_mutual_info_score": 0.11845707888523377, |
|
"eval_adjusted_rand_score": 0.06151994460143414, |
|
"eval_completeness_score": 0.5431310507543884, |
|
"eval_fowlkes_mallows_score": 0.4323980307250435, |
|
"eval_homogeneity_score": 0.14340786222528185, |
|
"eval_loss": 0.06151994460143414, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
16966, |
|
30684 |
|
], |
|
[ |
|
3408, |
|
10198 |
|
] |
|
], |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.4013, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_adjusted_mutual_info_score": 0.20098591140511965, |
|
"eval_adjusted_rand_score": 0.24115050477114428, |
|
"eval_completeness_score": 0.5343711342342489, |
|
"eval_fowlkes_mallows_score": 0.45788735752549203, |
|
"eval_homogeneity_score": 0.2597787469313228, |
|
"eval_loss": 0.24115050477114428, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
34176, |
|
13474 |
|
], |
|
[ |
|
5818, |
|
7788 |
|
] |
|
], |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.4006, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_adjusted_mutual_info_score": 0.252153502376998, |
|
"eval_adjusted_rand_score": 0.24023922903374736, |
|
"eval_completeness_score": 0.5460417717065094, |
|
"eval_fowlkes_mallows_score": 0.4233209236936553, |
|
"eval_homogeneity_score": 0.34453853702602516, |
|
"eval_loss": 0.24023922903374736, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
37986, |
|
9664 |
|
], |
|
[ |
|
7382, |
|
6224 |
|
] |
|
], |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.4044, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_adjusted_mutual_info_score": 0.08179538296319702, |
|
"eval_adjusted_rand_score": 0.040227652358330604, |
|
"eval_completeness_score": 0.48262386100058413, |
|
"eval_fowlkes_mallows_score": 0.423484876527761, |
|
"eval_homogeneity_score": 0.11562820805581293, |
|
"eval_loss": 0.040227652358330604, |
|
"eval_pair_confusion_matrix": [ |
|
[ |
|
15374, |
|
32276 |
|
], |
|
[ |
|
3428, |
|
10178 |
|
] |
|
], |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 680, |
|
"total_flos": 0.0, |
|
"train_loss": 0.44298483904670266, |
|
"train_runtime": 986.129, |
|
"train_samples_per_second": 21.843, |
|
"train_steps_per_second": 0.69 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 680, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|