{ "best_metric": 0.6870357394218445, "best_model_checkpoint": "../artifacts/LlaMa3-QLoRA-PatentMatch-v0.1/checkpoint-80", "epoch": 0.9433962264150944, "eval_steps": 20, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009433962264150943, "grad_norm": 20.337095260620117, "learning_rate": 2e-05, "loss": 0.5913, "step": 1 }, { "epoch": 0.018867924528301886, "grad_norm": 11.506393432617188, "learning_rate": 2e-05, "loss": 0.6775, "step": 2 }, { "epoch": 0.02830188679245283, "grad_norm": 9.703904151916504, "learning_rate": 2e-05, "loss": 0.576, "step": 3 }, { "epoch": 0.03773584905660377, "grad_norm": 11.118324279785156, "learning_rate": 2e-05, "loss": 0.5084, "step": 4 }, { "epoch": 0.04716981132075472, "grad_norm": 13.329315185546875, "learning_rate": 2e-05, "loss": 0.5712, "step": 5 }, { "epoch": 0.05660377358490566, "grad_norm": 29.63173484802246, "learning_rate": 2e-05, "loss": 0.7234, "step": 6 }, { "epoch": 0.0660377358490566, "grad_norm": 17.787134170532227, "learning_rate": 2e-05, "loss": 0.6053, "step": 7 }, { "epoch": 0.07547169811320754, "grad_norm": 30.639591217041016, "learning_rate": 2e-05, "loss": 0.6873, "step": 8 }, { "epoch": 0.08490566037735849, "grad_norm": 9.612072944641113, "learning_rate": 2e-05, "loss": 0.5541, "step": 9 }, { "epoch": 0.09433962264150944, "grad_norm": 8.989519119262695, "learning_rate": 2e-05, "loss": 0.7858, "step": 10 }, { "epoch": 0.10377358490566038, "grad_norm": 17.486469268798828, "learning_rate": 2e-05, "loss": 0.9176, "step": 11 }, { "epoch": 0.11320754716981132, "grad_norm": 35.29791259765625, "learning_rate": 2e-05, "loss": 0.6558, "step": 12 }, { "epoch": 0.12264150943396226, "grad_norm": 19.468692779541016, "learning_rate": 2e-05, "loss": 0.6085, "step": 13 }, { "epoch": 0.1320754716981132, "grad_norm": 9.410886764526367, "learning_rate": 2e-05, "loss": 0.6229, "step": 14 }, { "epoch": 0.14150943396226415, "grad_norm": 11.87700080871582, "learning_rate": 2e-05, "loss": 0.5764, "step": 15 }, { "epoch": 0.1509433962264151, "grad_norm": 20.188251495361328, "learning_rate": 2e-05, "loss": 0.8275, "step": 16 }, { "epoch": 0.16037735849056603, "grad_norm": 28.298933029174805, "learning_rate": 2e-05, "loss": 0.5896, "step": 17 }, { "epoch": 0.16981132075471697, "grad_norm": 47.8366813659668, "learning_rate": 2e-05, "loss": 0.8496, "step": 18 }, { "epoch": 0.1792452830188679, "grad_norm": 36.19501495361328, "learning_rate": 2e-05, "loss": 0.6756, "step": 19 }, { "epoch": 0.18867924528301888, "grad_norm": 22.574682235717773, "learning_rate": 2e-05, "loss": 0.6366, "step": 20 }, { "epoch": 0.18867924528301888, "eval_loss": 0.7738199234008789, "eval_runtime": 18.5622, "eval_samples_per_second": 15.893, "eval_steps_per_second": 3.179, "step": 20 }, { "epoch": 0.19811320754716982, "grad_norm": 47.26109313964844, "learning_rate": 2e-05, "loss": 0.8049, "step": 21 }, { "epoch": 0.20754716981132076, "grad_norm": 14.115569114685059, "learning_rate": 2e-05, "loss": 0.6604, "step": 22 }, { "epoch": 0.2169811320754717, "grad_norm": 25.182506561279297, "learning_rate": 2e-05, "loss": 0.7591, "step": 23 }, { "epoch": 0.22641509433962265, "grad_norm": 11.066629409790039, "learning_rate": 2e-05, "loss": 0.6497, "step": 24 }, { "epoch": 0.2358490566037736, "grad_norm": 8.666443824768066, "learning_rate": 2e-05, "loss": 0.5788, "step": 25 }, { "epoch": 0.24528301886792453, "grad_norm": 7.663419723510742, "learning_rate": 2e-05, "loss": 0.7128, "step": 26 }, { "epoch": 0.25471698113207547, "grad_norm": 30.738019943237305, "learning_rate": 2e-05, "loss": 0.7349, "step": 27 }, { "epoch": 0.2641509433962264, "grad_norm": 29.7031307220459, "learning_rate": 2e-05, "loss": 0.7618, "step": 28 }, { "epoch": 0.27358490566037735, "grad_norm": 36.29247283935547, "learning_rate": 2e-05, "loss": 0.6923, "step": 29 }, { "epoch": 0.2830188679245283, "grad_norm": 16.721107482910156, "learning_rate": 2e-05, "loss": 0.5942, "step": 30 }, { "epoch": 0.29245283018867924, "grad_norm": 36.51066970825195, "learning_rate": 2e-05, "loss": 0.7745, "step": 31 }, { "epoch": 0.3018867924528302, "grad_norm": 13.144597053527832, "learning_rate": 2e-05, "loss": 0.6199, "step": 32 }, { "epoch": 0.3113207547169811, "grad_norm": 24.113306045532227, "learning_rate": 2e-05, "loss": 0.6653, "step": 33 }, { "epoch": 0.32075471698113206, "grad_norm": 34.57608413696289, "learning_rate": 2e-05, "loss": 0.5586, "step": 34 }, { "epoch": 0.330188679245283, "grad_norm": 15.308676719665527, "learning_rate": 2e-05, "loss": 0.7438, "step": 35 }, { "epoch": 0.33962264150943394, "grad_norm": 34.94574737548828, "learning_rate": 2e-05, "loss": 0.7437, "step": 36 }, { "epoch": 0.3490566037735849, "grad_norm": 53.19334030151367, "learning_rate": 2e-05, "loss": 0.8349, "step": 37 }, { "epoch": 0.3584905660377358, "grad_norm": 38.979618072509766, "learning_rate": 2e-05, "loss": 0.6599, "step": 38 }, { "epoch": 0.36792452830188677, "grad_norm": 30.653545379638672, "learning_rate": 2e-05, "loss": 0.6782, "step": 39 }, { "epoch": 0.37735849056603776, "grad_norm": 28.044891357421875, "learning_rate": 2e-05, "loss": 0.7945, "step": 40 }, { "epoch": 0.37735849056603776, "eval_loss": 0.7332659959793091, "eval_runtime": 19.1011, "eval_samples_per_second": 15.444, "eval_steps_per_second": 3.089, "step": 40 }, { "epoch": 0.3867924528301887, "grad_norm": 7.029095649719238, "learning_rate": 2e-05, "loss": 0.6402, "step": 41 }, { "epoch": 0.39622641509433965, "grad_norm": 31.614521026611328, "learning_rate": 2e-05, "loss": 0.6392, "step": 42 }, { "epoch": 0.4056603773584906, "grad_norm": 8.320149421691895, "learning_rate": 2e-05, "loss": 0.5229, "step": 43 }, { "epoch": 0.41509433962264153, "grad_norm": 18.34058380126953, "learning_rate": 2e-05, "loss": 0.6935, "step": 44 }, { "epoch": 0.42452830188679247, "grad_norm": 36.57161331176758, "learning_rate": 2e-05, "loss": 0.6399, "step": 45 }, { "epoch": 0.4339622641509434, "grad_norm": 7.638645172119141, "learning_rate": 2e-05, "loss": 0.7015, "step": 46 }, { "epoch": 0.44339622641509435, "grad_norm": 18.424884796142578, "learning_rate": 2e-05, "loss": 0.7157, "step": 47 }, { "epoch": 0.4528301886792453, "grad_norm": 51.02284240722656, "learning_rate": 2e-05, "loss": 0.8159, "step": 48 }, { "epoch": 0.46226415094339623, "grad_norm": 29.55755615234375, "learning_rate": 2e-05, "loss": 0.6529, "step": 49 }, { "epoch": 0.4716981132075472, "grad_norm": 12.764640808105469, "learning_rate": 2e-05, "loss": 0.6704, "step": 50 }, { "epoch": 0.4811320754716981, "grad_norm": 17.65540313720703, "learning_rate": 2e-05, "loss": 0.6762, "step": 51 }, { "epoch": 0.49056603773584906, "grad_norm": 10.487552642822266, "learning_rate": 2e-05, "loss": 0.6094, "step": 52 }, { "epoch": 0.5, "grad_norm": 10.158540725708008, "learning_rate": 2e-05, "loss": 0.6539, "step": 53 }, { "epoch": 0.5094339622641509, "grad_norm": 27.807415008544922, "learning_rate": 2e-05, "loss": 0.7554, "step": 54 }, { "epoch": 0.5188679245283019, "grad_norm": 39.26100540161133, "learning_rate": 2e-05, "loss": 0.8584, "step": 55 }, { "epoch": 0.5283018867924528, "grad_norm": 8.890057563781738, "learning_rate": 2e-05, "loss": 0.7872, "step": 56 }, { "epoch": 0.5377358490566038, "grad_norm": 11.212479591369629, "learning_rate": 2e-05, "loss": 0.8501, "step": 57 }, { "epoch": 0.5471698113207547, "grad_norm": 8.871652603149414, "learning_rate": 2e-05, "loss": 0.6034, "step": 58 }, { "epoch": 0.5566037735849056, "grad_norm": 13.393775939941406, "learning_rate": 2e-05, "loss": 0.5953, "step": 59 }, { "epoch": 0.5660377358490566, "grad_norm": 16.56597328186035, "learning_rate": 2e-05, "loss": 0.5595, "step": 60 }, { "epoch": 0.5660377358490566, "eval_loss": 0.7103222012519836, "eval_runtime": 19.0536, "eval_samples_per_second": 15.483, "eval_steps_per_second": 3.097, "step": 60 }, { "epoch": 0.5754716981132075, "grad_norm": 66.63365936279297, "learning_rate": 2e-05, "loss": 0.8609, "step": 61 }, { "epoch": 0.5849056603773585, "grad_norm": 43.89859390258789, "learning_rate": 2e-05, "loss": 0.7555, "step": 62 }, { "epoch": 0.5943396226415094, "grad_norm": 54.232025146484375, "learning_rate": 2e-05, "loss": 0.7666, "step": 63 }, { "epoch": 0.6037735849056604, "grad_norm": 10.439966201782227, "learning_rate": 2e-05, "loss": 0.6019, "step": 64 }, { "epoch": 0.6132075471698113, "grad_norm": 15.057198524475098, "learning_rate": 2e-05, "loss": 0.6797, "step": 65 }, { "epoch": 0.6226415094339622, "grad_norm": 8.816701889038086, "learning_rate": 2e-05, "loss": 0.8066, "step": 66 }, { "epoch": 0.6320754716981132, "grad_norm": 16.436609268188477, "learning_rate": 2e-05, "loss": 0.5891, "step": 67 }, { "epoch": 0.6415094339622641, "grad_norm": 27.5755672454834, "learning_rate": 2e-05, "loss": 0.6204, "step": 68 }, { "epoch": 0.6509433962264151, "grad_norm": 26.33946990966797, "learning_rate": 2e-05, "loss": 0.671, "step": 69 }, { "epoch": 0.660377358490566, "grad_norm": 64.1870346069336, "learning_rate": 2e-05, "loss": 0.7638, "step": 70 }, { "epoch": 0.6698113207547169, "grad_norm": 21.89188003540039, "learning_rate": 2e-05, "loss": 0.6771, "step": 71 }, { "epoch": 0.6792452830188679, "grad_norm": 8.088455200195312, "learning_rate": 2e-05, "loss": 0.6761, "step": 72 }, { "epoch": 0.6886792452830188, "grad_norm": 11.988521575927734, "learning_rate": 2e-05, "loss": 0.6315, "step": 73 }, { "epoch": 0.6981132075471698, "grad_norm": 8.751002311706543, "learning_rate": 2e-05, "loss": 0.5967, "step": 74 }, { "epoch": 0.7075471698113207, "grad_norm": 22.44446563720703, "learning_rate": 2e-05, "loss": 0.5986, "step": 75 }, { "epoch": 0.7169811320754716, "grad_norm": 6.895334243774414, "learning_rate": 2e-05, "loss": 0.6324, "step": 76 }, { "epoch": 0.7264150943396226, "grad_norm": 8.335739135742188, "learning_rate": 2e-05, "loss": 0.6581, "step": 77 }, { "epoch": 0.7358490566037735, "grad_norm": 6.27984619140625, "learning_rate": 2e-05, "loss": 0.6899, "step": 78 }, { "epoch": 0.7452830188679245, "grad_norm": 13.635252952575684, "learning_rate": 2e-05, "loss": 0.7032, "step": 79 }, { "epoch": 0.7547169811320755, "grad_norm": 5.515637397766113, "learning_rate": 2e-05, "loss": 0.5121, "step": 80 }, { "epoch": 0.7547169811320755, "eval_loss": 0.6870357394218445, "eval_runtime": 19.1439, "eval_samples_per_second": 15.41, "eval_steps_per_second": 3.082, "step": 80 }, { "epoch": 0.7641509433962265, "grad_norm": 14.854217529296875, "learning_rate": 2e-05, "loss": 0.7582, "step": 81 }, { "epoch": 0.7735849056603774, "grad_norm": 19.503761291503906, "learning_rate": 2e-05, "loss": 0.7394, "step": 82 }, { "epoch": 0.7830188679245284, "grad_norm": 5.10677433013916, "learning_rate": 2e-05, "loss": 0.5536, "step": 83 }, { "epoch": 0.7924528301886793, "grad_norm": 48.037845611572266, "learning_rate": 2e-05, "loss": 0.7501, "step": 84 }, { "epoch": 0.8018867924528302, "grad_norm": 28.357952117919922, "learning_rate": 2e-05, "loss": 0.7174, "step": 85 }, { "epoch": 0.8113207547169812, "grad_norm": 18.693449020385742, "learning_rate": 2e-05, "loss": 0.8174, "step": 86 }, { "epoch": 0.8207547169811321, "grad_norm": 36.01970672607422, "learning_rate": 2e-05, "loss": 0.7863, "step": 87 }, { "epoch": 0.8301886792452831, "grad_norm": 63.98431396484375, "learning_rate": 2e-05, "loss": 0.7538, "step": 88 }, { "epoch": 0.839622641509434, "grad_norm": 7.736374855041504, "learning_rate": 2e-05, "loss": 0.6478, "step": 89 }, { "epoch": 0.8490566037735849, "grad_norm": 9.201268196105957, "learning_rate": 2e-05, "loss": 0.7841, "step": 90 }, { "epoch": 0.8584905660377359, "grad_norm": 26.842529296875, "learning_rate": 2e-05, "loss": 0.7152, "step": 91 }, { "epoch": 0.8679245283018868, "grad_norm": 21.91474723815918, "learning_rate": 2e-05, "loss": 0.5827, "step": 92 }, { "epoch": 0.8773584905660378, "grad_norm": 9.022438049316406, "learning_rate": 2e-05, "loss": 0.6294, "step": 93 }, { "epoch": 0.8867924528301887, "grad_norm": 9.270819664001465, "learning_rate": 2e-05, "loss": 0.6174, "step": 94 }, { "epoch": 0.8962264150943396, "grad_norm": 11.497746467590332, "learning_rate": 2e-05, "loss": 0.7267, "step": 95 }, { "epoch": 0.9056603773584906, "grad_norm": 19.90700912475586, "learning_rate": 2e-05, "loss": 0.665, "step": 96 }, { "epoch": 0.9150943396226415, "grad_norm": 26.896240234375, "learning_rate": 2e-05, "loss": 0.7505, "step": 97 }, { "epoch": 0.9245283018867925, "grad_norm": 12.731915473937988, "learning_rate": 2e-05, "loss": 0.6568, "step": 98 }, { "epoch": 0.9339622641509434, "grad_norm": 29.186397552490234, "learning_rate": 2e-05, "loss": 0.701, "step": 99 }, { "epoch": 0.9433962264150944, "grad_norm": 5.476395130157471, "learning_rate": 2e-05, "loss": 0.5688, "step": 100 }, { "epoch": 0.9433962264150944, "eval_loss": 0.689711332321167, "eval_runtime": 19.1115, "eval_samples_per_second": 15.436, "eval_steps_per_second": 3.087, "step": 100 } ], "logging_steps": 1, "max_steps": 106, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.862540230721536e+16, "train_batch_size": 5, "trial_name": null, "trial_params": null }