{ "best_metric": 0.7103222012519836, "best_model_checkpoint": "../artifacts/LlaMa3-QLoRA-PatentMatch-v0.1/checkpoint-60", "epoch": 0.5660377358490566, "eval_steps": 20, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009433962264150943, "grad_norm": 20.337095260620117, "learning_rate": 2e-05, "loss": 0.5913, "step": 1 }, { "epoch": 0.018867924528301886, "grad_norm": 11.506393432617188, "learning_rate": 2e-05, "loss": 0.6775, "step": 2 }, { "epoch": 0.02830188679245283, "grad_norm": 9.703904151916504, "learning_rate": 2e-05, "loss": 0.576, "step": 3 }, { "epoch": 0.03773584905660377, "grad_norm": 11.118324279785156, "learning_rate": 2e-05, "loss": 0.5084, "step": 4 }, { "epoch": 0.04716981132075472, "grad_norm": 13.329315185546875, "learning_rate": 2e-05, "loss": 0.5712, "step": 5 }, { "epoch": 0.05660377358490566, "grad_norm": 29.63173484802246, "learning_rate": 2e-05, "loss": 0.7234, "step": 6 }, { "epoch": 0.0660377358490566, "grad_norm": 17.787134170532227, "learning_rate": 2e-05, "loss": 0.6053, "step": 7 }, { "epoch": 0.07547169811320754, "grad_norm": 30.639591217041016, "learning_rate": 2e-05, "loss": 0.6873, "step": 8 }, { "epoch": 0.08490566037735849, "grad_norm": 9.612072944641113, "learning_rate": 2e-05, "loss": 0.5541, "step": 9 }, { "epoch": 0.09433962264150944, "grad_norm": 8.989519119262695, "learning_rate": 2e-05, "loss": 0.7858, "step": 10 }, { "epoch": 0.10377358490566038, "grad_norm": 17.486469268798828, "learning_rate": 2e-05, "loss": 0.9176, "step": 11 }, { "epoch": 0.11320754716981132, "grad_norm": 35.29791259765625, "learning_rate": 2e-05, "loss": 0.6558, "step": 12 }, { "epoch": 0.12264150943396226, "grad_norm": 19.468692779541016, "learning_rate": 2e-05, "loss": 0.6085, "step": 13 }, { "epoch": 0.1320754716981132, "grad_norm": 9.410886764526367, "learning_rate": 2e-05, "loss": 0.6229, "step": 14 }, { "epoch": 0.14150943396226415, "grad_norm": 11.87700080871582, "learning_rate": 2e-05, "loss": 0.5764, "step": 15 }, { "epoch": 0.1509433962264151, "grad_norm": 20.188251495361328, "learning_rate": 2e-05, "loss": 0.8275, "step": 16 }, { "epoch": 0.16037735849056603, "grad_norm": 28.298933029174805, "learning_rate": 2e-05, "loss": 0.5896, "step": 17 }, { "epoch": 0.16981132075471697, "grad_norm": 47.8366813659668, "learning_rate": 2e-05, "loss": 0.8496, "step": 18 }, { "epoch": 0.1792452830188679, "grad_norm": 36.19501495361328, "learning_rate": 2e-05, "loss": 0.6756, "step": 19 }, { "epoch": 0.18867924528301888, "grad_norm": 22.574682235717773, "learning_rate": 2e-05, "loss": 0.6366, "step": 20 }, { "epoch": 0.18867924528301888, "eval_loss": 0.7738199234008789, "eval_runtime": 18.5622, "eval_samples_per_second": 15.893, "eval_steps_per_second": 3.179, "step": 20 }, { "epoch": 0.19811320754716982, "grad_norm": 47.26109313964844, "learning_rate": 2e-05, "loss": 0.8049, "step": 21 }, { "epoch": 0.20754716981132076, "grad_norm": 14.115569114685059, "learning_rate": 2e-05, "loss": 0.6604, "step": 22 }, { "epoch": 0.2169811320754717, "grad_norm": 25.182506561279297, "learning_rate": 2e-05, "loss": 0.7591, "step": 23 }, { "epoch": 0.22641509433962265, "grad_norm": 11.066629409790039, "learning_rate": 2e-05, "loss": 0.6497, "step": 24 }, { "epoch": 0.2358490566037736, "grad_norm": 8.666443824768066, "learning_rate": 2e-05, "loss": 0.5788, "step": 25 }, { "epoch": 0.24528301886792453, "grad_norm": 7.663419723510742, "learning_rate": 2e-05, "loss": 0.7128, "step": 26 }, { "epoch": 0.25471698113207547, "grad_norm": 30.738019943237305, "learning_rate": 2e-05, "loss": 0.7349, "step": 27 }, { "epoch": 0.2641509433962264, "grad_norm": 29.7031307220459, "learning_rate": 2e-05, "loss": 0.7618, "step": 28 }, { "epoch": 0.27358490566037735, "grad_norm": 36.29247283935547, "learning_rate": 2e-05, "loss": 0.6923, "step": 29 }, { "epoch": 0.2830188679245283, "grad_norm": 16.721107482910156, "learning_rate": 2e-05, "loss": 0.5942, "step": 30 }, { "epoch": 0.29245283018867924, "grad_norm": 36.51066970825195, "learning_rate": 2e-05, "loss": 0.7745, "step": 31 }, { "epoch": 0.3018867924528302, "grad_norm": 13.144597053527832, "learning_rate": 2e-05, "loss": 0.6199, "step": 32 }, { "epoch": 0.3113207547169811, "grad_norm": 24.113306045532227, "learning_rate": 2e-05, "loss": 0.6653, "step": 33 }, { "epoch": 0.32075471698113206, "grad_norm": 34.57608413696289, "learning_rate": 2e-05, "loss": 0.5586, "step": 34 }, { "epoch": 0.330188679245283, "grad_norm": 15.308676719665527, "learning_rate": 2e-05, "loss": 0.7438, "step": 35 }, { "epoch": 0.33962264150943394, "grad_norm": 34.94574737548828, "learning_rate": 2e-05, "loss": 0.7437, "step": 36 }, { "epoch": 0.3490566037735849, "grad_norm": 53.19334030151367, "learning_rate": 2e-05, "loss": 0.8349, "step": 37 }, { "epoch": 0.3584905660377358, "grad_norm": 38.979618072509766, "learning_rate": 2e-05, "loss": 0.6599, "step": 38 }, { "epoch": 0.36792452830188677, "grad_norm": 30.653545379638672, "learning_rate": 2e-05, "loss": 0.6782, "step": 39 }, { "epoch": 0.37735849056603776, "grad_norm": 28.044891357421875, "learning_rate": 2e-05, "loss": 0.7945, "step": 40 }, { "epoch": 0.37735849056603776, "eval_loss": 0.7332659959793091, "eval_runtime": 19.1011, "eval_samples_per_second": 15.444, "eval_steps_per_second": 3.089, "step": 40 }, { "epoch": 0.3867924528301887, "grad_norm": 7.029095649719238, "learning_rate": 2e-05, "loss": 0.6402, "step": 41 }, { "epoch": 0.39622641509433965, "grad_norm": 31.614521026611328, "learning_rate": 2e-05, "loss": 0.6392, "step": 42 }, { "epoch": 0.4056603773584906, "grad_norm": 8.320149421691895, "learning_rate": 2e-05, "loss": 0.5229, "step": 43 }, { "epoch": 0.41509433962264153, "grad_norm": 18.34058380126953, "learning_rate": 2e-05, "loss": 0.6935, "step": 44 }, { "epoch": 0.42452830188679247, "grad_norm": 36.57161331176758, "learning_rate": 2e-05, "loss": 0.6399, "step": 45 }, { "epoch": 0.4339622641509434, "grad_norm": 7.638645172119141, "learning_rate": 2e-05, "loss": 0.7015, "step": 46 }, { "epoch": 0.44339622641509435, "grad_norm": 18.424884796142578, "learning_rate": 2e-05, "loss": 0.7157, "step": 47 }, { "epoch": 0.4528301886792453, "grad_norm": 51.02284240722656, "learning_rate": 2e-05, "loss": 0.8159, "step": 48 }, { "epoch": 0.46226415094339623, "grad_norm": 29.55755615234375, "learning_rate": 2e-05, "loss": 0.6529, "step": 49 }, { "epoch": 0.4716981132075472, "grad_norm": 12.764640808105469, "learning_rate": 2e-05, "loss": 0.6704, "step": 50 }, { "epoch": 0.4811320754716981, "grad_norm": 17.65540313720703, "learning_rate": 2e-05, "loss": 0.6762, "step": 51 }, { "epoch": 0.49056603773584906, "grad_norm": 10.487552642822266, "learning_rate": 2e-05, "loss": 0.6094, "step": 52 }, { "epoch": 0.5, "grad_norm": 10.158540725708008, "learning_rate": 2e-05, "loss": 0.6539, "step": 53 }, { "epoch": 0.5094339622641509, "grad_norm": 27.807415008544922, "learning_rate": 2e-05, "loss": 0.7554, "step": 54 }, { "epoch": 0.5188679245283019, "grad_norm": 39.26100540161133, "learning_rate": 2e-05, "loss": 0.8584, "step": 55 }, { "epoch": 0.5283018867924528, "grad_norm": 8.890057563781738, "learning_rate": 2e-05, "loss": 0.7872, "step": 56 }, { "epoch": 0.5377358490566038, "grad_norm": 11.212479591369629, "learning_rate": 2e-05, "loss": 0.8501, "step": 57 }, { "epoch": 0.5471698113207547, "grad_norm": 8.871652603149414, "learning_rate": 2e-05, "loss": 0.6034, "step": 58 }, { "epoch": 0.5566037735849056, "grad_norm": 13.393775939941406, "learning_rate": 2e-05, "loss": 0.5953, "step": 59 }, { "epoch": 0.5660377358490566, "grad_norm": 16.56597328186035, "learning_rate": 2e-05, "loss": 0.5595, "step": 60 }, { "epoch": 0.5660377358490566, "eval_loss": 0.7103222012519836, "eval_runtime": 19.0536, "eval_samples_per_second": 15.483, "eval_steps_per_second": 3.097, "step": 60 } ], "logging_steps": 1, "max_steps": 106, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.305665388744704e+16, "train_batch_size": 5, "trial_name": null, "trial_params": null }