{ "best_metric": 0.04811817407608032, "best_model_checkpoint": "./logo-matching-base/checkpoint-47", "epoch": 10.0, "eval_steps": 500, "global_step": 470, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2127659574468085, "grad_norm": 0.07344582676887512, "learning_rate": 0.00019574468085106384, "loss": 0.0184, "step": 10 }, { "epoch": 0.425531914893617, "grad_norm": 0.07934936881065369, "learning_rate": 0.00019148936170212768, "loss": 0.0158, "step": 20 }, { "epoch": 0.6382978723404256, "grad_norm": 0.06698207557201385, "learning_rate": 0.0001872340425531915, "loss": 0.0147, "step": 30 }, { "epoch": 0.851063829787234, "grad_norm": 0.08733490109443665, "learning_rate": 0.00018297872340425532, "loss": 0.014, "step": 40 }, { "epoch": 1.0, "eval_accuracy": 0.28807339449541286, "eval_loss": 0.04811817407608032, "eval_runtime": 6.3598, "eval_samples_per_second": 85.695, "eval_steps_per_second": 10.849, "step": 47 }, { "epoch": 1.0638297872340425, "grad_norm": 0.062165793031454086, "learning_rate": 0.00017872340425531915, "loss": 0.0126, "step": 50 }, { "epoch": 1.2765957446808511, "grad_norm": 0.07251156866550446, "learning_rate": 0.00017446808510638298, "loss": 0.0128, "step": 60 }, { "epoch": 1.4893617021276595, "grad_norm": 0.0481877438724041, "learning_rate": 0.00017021276595744682, "loss": 0.0126, "step": 70 }, { "epoch": 1.702127659574468, "grad_norm": 0.07360873371362686, "learning_rate": 0.00016595744680851065, "loss": 0.0121, "step": 80 }, { "epoch": 1.9148936170212765, "grad_norm": 0.058246735483407974, "learning_rate": 0.00016170212765957446, "loss": 0.0127, "step": 90 }, { "epoch": 2.0, "eval_accuracy": 0.12844036697247707, "eval_loss": 0.054430264979600906, "eval_runtime": 6.5223, "eval_samples_per_second": 83.559, "eval_steps_per_second": 10.579, "step": 94 }, { "epoch": 2.127659574468085, "grad_norm": 0.06463072448968887, "learning_rate": 0.00015744680851063832, "loss": 0.0113, "step": 100 }, { "epoch": 2.3404255319148937, "grad_norm": 0.055768080055713654, "learning_rate": 0.00015319148936170213, "loss": 0.0099, "step": 110 }, { "epoch": 2.5531914893617023, "grad_norm": 0.08255070447921753, "learning_rate": 0.00014893617021276596, "loss": 0.0105, "step": 120 }, { "epoch": 2.7659574468085104, "grad_norm": 0.07059154659509659, "learning_rate": 0.0001446808510638298, "loss": 0.0092, "step": 130 }, { "epoch": 2.978723404255319, "grad_norm": 0.0670301541686058, "learning_rate": 0.00014042553191489363, "loss": 0.0097, "step": 140 }, { "epoch": 3.0, "eval_accuracy": 0.13394495412844037, "eval_loss": 0.056532666087150574, "eval_runtime": 6.5935, "eval_samples_per_second": 82.657, "eval_steps_per_second": 10.465, "step": 141 }, { "epoch": 3.1914893617021276, "grad_norm": 0.05259150639176369, "learning_rate": 0.00013617021276595746, "loss": 0.0094, "step": 150 }, { "epoch": 3.404255319148936, "grad_norm": 0.08173543959856033, "learning_rate": 0.00013191489361702127, "loss": 0.0096, "step": 160 }, { "epoch": 3.617021276595745, "grad_norm": 0.04590131714940071, "learning_rate": 0.00012765957446808513, "loss": 0.0071, "step": 170 }, { "epoch": 3.829787234042553, "grad_norm": 0.03957865759730339, "learning_rate": 0.00012340425531914893, "loss": 0.0062, "step": 180 }, { "epoch": 4.0, "eval_accuracy": 0.1669724770642202, "eval_loss": 0.0624094121158123, "eval_runtime": 6.6046, "eval_samples_per_second": 82.518, "eval_steps_per_second": 10.447, "step": 188 }, { "epoch": 4.042553191489362, "grad_norm": 0.07129650563001633, "learning_rate": 0.00011914893617021277, "loss": 0.0056, "step": 190 }, { "epoch": 4.25531914893617, "grad_norm": 0.056660715490579605, "learning_rate": 0.00011489361702127661, "loss": 0.0061, "step": 200 }, { "epoch": 4.468085106382979, "grad_norm": 0.05685529112815857, "learning_rate": 0.00011063829787234043, "loss": 0.005, "step": 210 }, { "epoch": 4.680851063829787, "grad_norm": 0.03731105104088783, "learning_rate": 0.00010638297872340425, "loss": 0.006, "step": 220 }, { "epoch": 4.8936170212765955, "grad_norm": 0.039578877389431, "learning_rate": 0.00010212765957446809, "loss": 0.0051, "step": 230 }, { "epoch": 5.0, "eval_accuracy": 0.23669724770642203, "eval_loss": 0.059408094733953476, "eval_runtime": 6.822, "eval_samples_per_second": 79.889, "eval_steps_per_second": 10.114, "step": 235 }, { "epoch": 5.1063829787234045, "grad_norm": 0.05410230532288551, "learning_rate": 9.787234042553192e-05, "loss": 0.0044, "step": 240 }, { "epoch": 5.319148936170213, "grad_norm": 0.03862292692065239, "learning_rate": 9.361702127659576e-05, "loss": 0.0037, "step": 250 }, { "epoch": 5.531914893617021, "grad_norm": 0.06473053991794586, "learning_rate": 8.936170212765958e-05, "loss": 0.0046, "step": 260 }, { "epoch": 5.74468085106383, "grad_norm": 0.03953048214316368, "learning_rate": 8.510638297872341e-05, "loss": 0.0039, "step": 270 }, { "epoch": 5.957446808510638, "grad_norm": 0.050171270966529846, "learning_rate": 8.085106382978723e-05, "loss": 0.0037, "step": 280 }, { "epoch": 6.0, "eval_accuracy": 0.20917431192660552, "eval_loss": 0.06461313366889954, "eval_runtime": 5.4957, "eval_samples_per_second": 99.169, "eval_steps_per_second": 12.555, "step": 282 }, { "epoch": 6.170212765957447, "grad_norm": 0.05051916465163231, "learning_rate": 7.659574468085106e-05, "loss": 0.0035, "step": 290 }, { "epoch": 6.382978723404255, "grad_norm": 0.03177861496806145, "learning_rate": 7.23404255319149e-05, "loss": 0.0029, "step": 300 }, { "epoch": 6.595744680851064, "grad_norm": 0.01716785505414009, "learning_rate": 6.808510638297873e-05, "loss": 0.0026, "step": 310 }, { "epoch": 6.808510638297872, "grad_norm": 0.02455182373523712, "learning_rate": 6.382978723404256e-05, "loss": 0.0023, "step": 320 }, { "epoch": 7.0, "eval_accuracy": 0.1981651376146789, "eval_loss": 0.06650757044553757, "eval_runtime": 6.6722, "eval_samples_per_second": 81.683, "eval_steps_per_second": 10.341, "step": 329 }, { "epoch": 7.0212765957446805, "grad_norm": 0.06645756959915161, "learning_rate": 5.9574468085106384e-05, "loss": 0.0023, "step": 330 }, { "epoch": 7.23404255319149, "grad_norm": 0.036258358508348465, "learning_rate": 5.531914893617022e-05, "loss": 0.0022, "step": 340 }, { "epoch": 7.446808510638298, "grad_norm": 0.055592458695173264, "learning_rate": 5.1063829787234044e-05, "loss": 0.0027, "step": 350 }, { "epoch": 7.659574468085106, "grad_norm": 0.02403583563864231, "learning_rate": 4.680851063829788e-05, "loss": 0.0015, "step": 360 }, { "epoch": 7.872340425531915, "grad_norm": 0.01888449862599373, "learning_rate": 4.2553191489361704e-05, "loss": 0.0015, "step": 370 }, { "epoch": 8.0, "eval_accuracy": 0.1596330275229358, "eval_loss": 0.06884702295064926, "eval_runtime": 6.3823, "eval_samples_per_second": 85.393, "eval_steps_per_second": 10.811, "step": 376 }, { "epoch": 8.085106382978724, "grad_norm": 0.012278878130018711, "learning_rate": 3.829787234042553e-05, "loss": 0.0015, "step": 380 }, { "epoch": 8.297872340425531, "grad_norm": 0.029290180653333664, "learning_rate": 3.4042553191489365e-05, "loss": 0.0013, "step": 390 }, { "epoch": 8.51063829787234, "grad_norm": 0.01336819026619196, "learning_rate": 2.9787234042553192e-05, "loss": 0.0012, "step": 400 }, { "epoch": 8.72340425531915, "grad_norm": 0.01985483057796955, "learning_rate": 2.5531914893617022e-05, "loss": 0.0013, "step": 410 }, { "epoch": 8.936170212765958, "grad_norm": 0.026989364996552467, "learning_rate": 2.1276595744680852e-05, "loss": 0.0013, "step": 420 }, { "epoch": 9.0, "eval_accuracy": 0.181651376146789, "eval_loss": 0.0706261619925499, "eval_runtime": 6.7059, "eval_samples_per_second": 81.272, "eval_steps_per_second": 10.29, "step": 423 }, { "epoch": 9.148936170212766, "grad_norm": 0.02496664598584175, "learning_rate": 1.7021276595744682e-05, "loss": 0.0014, "step": 430 }, { "epoch": 9.361702127659575, "grad_norm": 0.01364427525550127, "learning_rate": 1.2765957446808511e-05, "loss": 0.0009, "step": 440 }, { "epoch": 9.574468085106384, "grad_norm": 0.010134860873222351, "learning_rate": 8.510638297872341e-06, "loss": 0.0009, "step": 450 }, { "epoch": 9.787234042553191, "grad_norm": 0.03484776243567467, "learning_rate": 4.255319148936171e-06, "loss": 0.0011, "step": 460 }, { "epoch": 10.0, "grad_norm": 0.0072807134129107, "learning_rate": 0.0, "loss": 0.0007, "step": 470 }, { "epoch": 10.0, "eval_accuracy": 0.1743119266055046, "eval_loss": 0.07116351276636124, "eval_runtime": 6.8862, "eval_samples_per_second": 79.144, "eval_steps_per_second": 10.02, "step": 470 }, { "epoch": 10.0, "step": 470, "total_flos": 5.916629591779738e+17, "train_loss": 0.006165030080468413, "train_runtime": 454.1808, "train_samples_per_second": 16.469, "train_steps_per_second": 1.035 } ], "logging_steps": 10, "max_steps": 470, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.916629591779738e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }