|
{ |
|
"best_metric": 0.04811817407608032, |
|
"best_model_checkpoint": "./logo-matching-base/checkpoint-47", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 470, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2127659574468085, |
|
"grad_norm": 0.07344582676887512, |
|
"learning_rate": 0.00019574468085106384, |
|
"loss": 0.0184, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"grad_norm": 0.07934936881065369, |
|
"learning_rate": 0.00019148936170212768, |
|
"loss": 0.0158, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6382978723404256, |
|
"grad_norm": 0.06698207557201385, |
|
"learning_rate": 0.0001872340425531915, |
|
"loss": 0.0147, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 0.08733490109443665, |
|
"learning_rate": 0.00018297872340425532, |
|
"loss": 0.014, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.28807339449541286, |
|
"eval_loss": 0.04811817407608032, |
|
"eval_runtime": 6.3598, |
|
"eval_samples_per_second": 85.695, |
|
"eval_steps_per_second": 10.849, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"grad_norm": 0.062165793031454086, |
|
"learning_rate": 0.00017872340425531915, |
|
"loss": 0.0126, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.2765957446808511, |
|
"grad_norm": 0.07251156866550446, |
|
"learning_rate": 0.00017446808510638298, |
|
"loss": 0.0128, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.4893617021276595, |
|
"grad_norm": 0.0481877438724041, |
|
"learning_rate": 0.00017021276595744682, |
|
"loss": 0.0126, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.702127659574468, |
|
"grad_norm": 0.07360873371362686, |
|
"learning_rate": 0.00016595744680851065, |
|
"loss": 0.0121, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.9148936170212765, |
|
"grad_norm": 0.058246735483407974, |
|
"learning_rate": 0.00016170212765957446, |
|
"loss": 0.0127, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.12844036697247707, |
|
"eval_loss": 0.054430264979600906, |
|
"eval_runtime": 6.5223, |
|
"eval_samples_per_second": 83.559, |
|
"eval_steps_per_second": 10.579, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 0.06463072448968887, |
|
"learning_rate": 0.00015744680851063832, |
|
"loss": 0.0113, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.3404255319148937, |
|
"grad_norm": 0.055768080055713654, |
|
"learning_rate": 0.00015319148936170213, |
|
"loss": 0.0099, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.5531914893617023, |
|
"grad_norm": 0.08255070447921753, |
|
"learning_rate": 0.00014893617021276596, |
|
"loss": 0.0105, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.7659574468085104, |
|
"grad_norm": 0.07059154659509659, |
|
"learning_rate": 0.0001446808510638298, |
|
"loss": 0.0092, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.978723404255319, |
|
"grad_norm": 0.0670301541686058, |
|
"learning_rate": 0.00014042553191489363, |
|
"loss": 0.0097, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.13394495412844037, |
|
"eval_loss": 0.056532666087150574, |
|
"eval_runtime": 6.5935, |
|
"eval_samples_per_second": 82.657, |
|
"eval_steps_per_second": 10.465, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"grad_norm": 0.05259150639176369, |
|
"learning_rate": 0.00013617021276595746, |
|
"loss": 0.0094, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.404255319148936, |
|
"grad_norm": 0.08173543959856033, |
|
"learning_rate": 0.00013191489361702127, |
|
"loss": 0.0096, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.617021276595745, |
|
"grad_norm": 0.04590131714940071, |
|
"learning_rate": 0.00012765957446808513, |
|
"loss": 0.0071, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.829787234042553, |
|
"grad_norm": 0.03957865759730339, |
|
"learning_rate": 0.00012340425531914893, |
|
"loss": 0.0062, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.1669724770642202, |
|
"eval_loss": 0.0624094121158123, |
|
"eval_runtime": 6.6046, |
|
"eval_samples_per_second": 82.518, |
|
"eval_steps_per_second": 10.447, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 4.042553191489362, |
|
"grad_norm": 0.07129650563001633, |
|
"learning_rate": 0.00011914893617021277, |
|
"loss": 0.0056, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"grad_norm": 0.056660715490579605, |
|
"learning_rate": 0.00011489361702127661, |
|
"loss": 0.0061, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.468085106382979, |
|
"grad_norm": 0.05685529112815857, |
|
"learning_rate": 0.00011063829787234043, |
|
"loss": 0.005, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.680851063829787, |
|
"grad_norm": 0.03731105104088783, |
|
"learning_rate": 0.00010638297872340425, |
|
"loss": 0.006, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.8936170212765955, |
|
"grad_norm": 0.039578877389431, |
|
"learning_rate": 0.00010212765957446809, |
|
"loss": 0.0051, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.23669724770642203, |
|
"eval_loss": 0.059408094733953476, |
|
"eval_runtime": 6.822, |
|
"eval_samples_per_second": 79.889, |
|
"eval_steps_per_second": 10.114, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 5.1063829787234045, |
|
"grad_norm": 0.05410230532288551, |
|
"learning_rate": 9.787234042553192e-05, |
|
"loss": 0.0044, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 0.03862292692065239, |
|
"learning_rate": 9.361702127659576e-05, |
|
"loss": 0.0037, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.531914893617021, |
|
"grad_norm": 0.06473053991794586, |
|
"learning_rate": 8.936170212765958e-05, |
|
"loss": 0.0046, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.74468085106383, |
|
"grad_norm": 0.03953048214316368, |
|
"learning_rate": 8.510638297872341e-05, |
|
"loss": 0.0039, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 5.957446808510638, |
|
"grad_norm": 0.050171270966529846, |
|
"learning_rate": 8.085106382978723e-05, |
|
"loss": 0.0037, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.20917431192660552, |
|
"eval_loss": 0.06461313366889954, |
|
"eval_runtime": 5.4957, |
|
"eval_samples_per_second": 99.169, |
|
"eval_steps_per_second": 12.555, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 6.170212765957447, |
|
"grad_norm": 0.05051916465163231, |
|
"learning_rate": 7.659574468085106e-05, |
|
"loss": 0.0035, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"grad_norm": 0.03177861496806145, |
|
"learning_rate": 7.23404255319149e-05, |
|
"loss": 0.0029, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.595744680851064, |
|
"grad_norm": 0.01716785505414009, |
|
"learning_rate": 6.808510638297873e-05, |
|
"loss": 0.0026, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.808510638297872, |
|
"grad_norm": 0.02455182373523712, |
|
"learning_rate": 6.382978723404256e-05, |
|
"loss": 0.0023, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.1981651376146789, |
|
"eval_loss": 0.06650757044553757, |
|
"eval_runtime": 6.6722, |
|
"eval_samples_per_second": 81.683, |
|
"eval_steps_per_second": 10.341, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 7.0212765957446805, |
|
"grad_norm": 0.06645756959915161, |
|
"learning_rate": 5.9574468085106384e-05, |
|
"loss": 0.0023, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 7.23404255319149, |
|
"grad_norm": 0.036258358508348465, |
|
"learning_rate": 5.531914893617022e-05, |
|
"loss": 0.0022, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.446808510638298, |
|
"grad_norm": 0.055592458695173264, |
|
"learning_rate": 5.1063829787234044e-05, |
|
"loss": 0.0027, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.659574468085106, |
|
"grad_norm": 0.02403583563864231, |
|
"learning_rate": 4.680851063829788e-05, |
|
"loss": 0.0015, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.872340425531915, |
|
"grad_norm": 0.01888449862599373, |
|
"learning_rate": 4.2553191489361704e-05, |
|
"loss": 0.0015, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.1596330275229358, |
|
"eval_loss": 0.06884702295064926, |
|
"eval_runtime": 6.3823, |
|
"eval_samples_per_second": 85.393, |
|
"eval_steps_per_second": 10.811, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 8.085106382978724, |
|
"grad_norm": 0.012278878130018711, |
|
"learning_rate": 3.829787234042553e-05, |
|
"loss": 0.0015, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 8.297872340425531, |
|
"grad_norm": 0.029290180653333664, |
|
"learning_rate": 3.4042553191489365e-05, |
|
"loss": 0.0013, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"grad_norm": 0.01336819026619196, |
|
"learning_rate": 2.9787234042553192e-05, |
|
"loss": 0.0012, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.72340425531915, |
|
"grad_norm": 0.01985483057796955, |
|
"learning_rate": 2.5531914893617022e-05, |
|
"loss": 0.0013, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 8.936170212765958, |
|
"grad_norm": 0.026989364996552467, |
|
"learning_rate": 2.1276595744680852e-05, |
|
"loss": 0.0013, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.181651376146789, |
|
"eval_loss": 0.0706261619925499, |
|
"eval_runtime": 6.7059, |
|
"eval_samples_per_second": 81.272, |
|
"eval_steps_per_second": 10.29, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 9.148936170212766, |
|
"grad_norm": 0.02496664598584175, |
|
"learning_rate": 1.7021276595744682e-05, |
|
"loss": 0.0014, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 9.361702127659575, |
|
"grad_norm": 0.01364427525550127, |
|
"learning_rate": 1.2765957446808511e-05, |
|
"loss": 0.0009, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 9.574468085106384, |
|
"grad_norm": 0.010134860873222351, |
|
"learning_rate": 8.510638297872341e-06, |
|
"loss": 0.0009, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.787234042553191, |
|
"grad_norm": 0.03484776243567467, |
|
"learning_rate": 4.255319148936171e-06, |
|
"loss": 0.0011, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.0072807134129107, |
|
"learning_rate": 0.0, |
|
"loss": 0.0007, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.1743119266055046, |
|
"eval_loss": 0.07116351276636124, |
|
"eval_runtime": 6.8862, |
|
"eval_samples_per_second": 79.144, |
|
"eval_steps_per_second": 10.02, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 470, |
|
"total_flos": 5.916629591779738e+17, |
|
"train_loss": 0.006165030080468413, |
|
"train_runtime": 454.1808, |
|
"train_samples_per_second": 16.469, |
|
"train_steps_per_second": 1.035 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 470, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.916629591779738e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|