{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 3.3398852348327637, "learning_rate": 4.999950652140343e-05, "loss": 0.288, "num_input_tokens_seen": 1192, "step": 1 }, { "epoch": 0.04, "grad_norm": 3.534205436706543, "learning_rate": 4.9998026105095405e-05, "loss": 0.1627, "num_input_tokens_seen": 2208, "step": 2 }, { "epoch": 0.06, "grad_norm": 3.9795141220092773, "learning_rate": 4.999555880952023e-05, "loss": 0.2259, "num_input_tokens_seen": 3320, "step": 3 }, { "epoch": 0.08, "grad_norm": 2.712265729904175, "learning_rate": 4.99921047320825e-05, "loss": 0.3248, "num_input_tokens_seen": 4536, "step": 4 }, { "epoch": 0.1, "grad_norm": 2.0970170497894287, "learning_rate": 4.998766400914329e-05, "loss": 0.1867, "num_input_tokens_seen": 5960, "step": 5 }, { "epoch": 0.12, "grad_norm": 2.16365909576416, "learning_rate": 4.998223681601473e-05, "loss": 0.1558, "num_input_tokens_seen": 7304, "step": 6 }, { "epoch": 0.14, "grad_norm": 1.9363147020339966, "learning_rate": 4.9975823366953124e-05, "loss": 0.1863, "num_input_tokens_seen": 8744, "step": 7 }, { "epoch": 0.16, "grad_norm": 1.874936819076538, "learning_rate": 4.996842391515044e-05, "loss": 0.1679, "num_input_tokens_seen": 10120, "step": 8 }, { "epoch": 0.18, "grad_norm": 7.298994541168213, "learning_rate": 4.996003875272438e-05, "loss": 0.3627, "num_input_tokens_seen": 10816, "step": 9 }, { "epoch": 0.2, "grad_norm": 1.7786474227905273, "learning_rate": 4.995066821070679e-05, "loss": 0.1466, "num_input_tokens_seen": 11920, "step": 10 }, { "epoch": 0.22, "grad_norm": 1.9229803085327148, "learning_rate": 4.994031265903063e-05, "loss": 0.2029, "num_input_tokens_seen": 13216, "step": 11 }, { "epoch": 0.24, "grad_norm": 1.230123519897461, "learning_rate": 4.992897250651535e-05, "loss": 0.1142, "num_input_tokens_seen": 14552, "step": 12 }, { "epoch": 0.26, "grad_norm": 2.0229177474975586, "learning_rate": 4.991664820085074e-05, "loss": 0.2142, "num_input_tokens_seen": 15600, "step": 13 }, { "epoch": 0.28, "grad_norm": 1.4955450296401978, "learning_rate": 4.990334022857932e-05, "loss": 0.149, "num_input_tokens_seen": 16744, "step": 14 }, { "epoch": 0.3, "grad_norm": 3.070120334625244, "learning_rate": 4.9889049115077005e-05, "loss": 0.2267, "num_input_tokens_seen": 17424, "step": 15 }, { "epoch": 0.32, "grad_norm": 2.16508150100708, "learning_rate": 4.987377542453251e-05, "loss": 0.1499, "num_input_tokens_seen": 18408, "step": 16 }, { "epoch": 0.34, "grad_norm": 3.1738064289093018, "learning_rate": 4.9857519759924974e-05, "loss": 0.4279, "num_input_tokens_seen": 19288, "step": 17 }, { "epoch": 0.36, "grad_norm": 2.7662558555603027, "learning_rate": 4.984028276300021e-05, "loss": 0.2957, "num_input_tokens_seen": 20336, "step": 18 }, { "epoch": 0.38, "grad_norm": 1.4056564569473267, "learning_rate": 4.982206511424534e-05, "loss": 0.1899, "num_input_tokens_seen": 21528, "step": 19 }, { "epoch": 0.4, "grad_norm": 1.4555004835128784, "learning_rate": 4.980286753286195e-05, "loss": 0.1429, "num_input_tokens_seen": 22680, "step": 20 }, { "epoch": 0.42, "grad_norm": 1.6009600162506104, "learning_rate": 4.978269077673767e-05, "loss": 0.2084, "num_input_tokens_seen": 23800, "step": 21 }, { "epoch": 0.44, "grad_norm": 1.1936904191970825, "learning_rate": 4.976153564241628e-05, "loss": 0.0474, "num_input_tokens_seen": 24784, "step": 22 }, { "epoch": 0.46, "grad_norm": 2.241938829421997, "learning_rate": 4.9739402965066276e-05, "loss": 0.1757, "num_input_tokens_seen": 25688, "step": 23 }, { "epoch": 0.48, "grad_norm": 1.735016942024231, "learning_rate": 4.971629361844785e-05, "loss": 0.2083, "num_input_tokens_seen": 27016, "step": 24 }, { "epoch": 0.5, "grad_norm": 1.648120403289795, "learning_rate": 4.9692208514878444e-05, "loss": 0.1653, "num_input_tokens_seen": 28200, "step": 25 }, { "epoch": 0.52, "grad_norm": 2.088919162750244, "learning_rate": 4.96671486051967e-05, "loss": 0.2277, "num_input_tokens_seen": 29240, "step": 26 }, { "epoch": 0.54, "grad_norm": 1.9720033407211304, "learning_rate": 4.9641114878724956e-05, "loss": 0.2262, "num_input_tokens_seen": 30264, "step": 27 }, { "epoch": 0.56, "grad_norm": 2.137261390686035, "learning_rate": 4.9614108363230135e-05, "loss": 0.2119, "num_input_tokens_seen": 31376, "step": 28 }, { "epoch": 0.58, "grad_norm": 1.7072184085845947, "learning_rate": 4.958613012488324e-05, "loss": 0.186, "num_input_tokens_seen": 32288, "step": 29 }, { "epoch": 0.6, "grad_norm": 1.6640106439590454, "learning_rate": 4.9557181268217227e-05, "loss": 0.1381, "num_input_tokens_seen": 33336, "step": 30 }, { "epoch": 0.62, "grad_norm": 0.743667721748352, "learning_rate": 4.952726293608335e-05, "loss": 0.1521, "num_input_tokens_seen": 35264, "step": 31 }, { "epoch": 0.64, "grad_norm": 1.3096036911010742, "learning_rate": 4.949637630960617e-05, "loss": 0.2232, "num_input_tokens_seen": 36496, "step": 32 }, { "epoch": 0.66, "grad_norm": 1.943765640258789, "learning_rate": 4.9464522608136805e-05, "loss": 0.0252, "num_input_tokens_seen": 37312, "step": 33 }, { "epoch": 0.68, "grad_norm": 1.8293421268463135, "learning_rate": 4.943170308920484e-05, "loss": 0.264, "num_input_tokens_seen": 38376, "step": 34 }, { "epoch": 0.7, "grad_norm": 2.6471211910247803, "learning_rate": 4.939791904846869e-05, "loss": 0.1645, "num_input_tokens_seen": 39168, "step": 35 }, { "epoch": 0.72, "grad_norm": 1.6533693075180054, "learning_rate": 4.9363171819664434e-05, "loss": 0.1802, "num_input_tokens_seen": 40440, "step": 36 }, { "epoch": 0.74, "grad_norm": 1.3770309686660767, "learning_rate": 4.9327462774553166e-05, "loss": 0.2206, "num_input_tokens_seen": 41760, "step": 37 }, { "epoch": 0.76, "grad_norm": 1.6882014274597168, "learning_rate": 4.929079332286685e-05, "loss": 0.2886, "num_input_tokens_seen": 42944, "step": 38 }, { "epoch": 0.78, "grad_norm": 1.9799898862838745, "learning_rate": 4.925316491225265e-05, "loss": 0.1897, "num_input_tokens_seen": 44008, "step": 39 }, { "epoch": 0.8, "grad_norm": 1.551802158355713, "learning_rate": 4.9214579028215776e-05, "loss": 0.1935, "num_input_tokens_seen": 45168, "step": 40 }, { "epoch": 0.82, "grad_norm": 1.0445095300674438, "learning_rate": 4.917503719406088e-05, "loss": 0.1071, "num_input_tokens_seen": 46432, "step": 41 }, { "epoch": 0.84, "grad_norm": 3.2616090774536133, "learning_rate": 4.913454097083185e-05, "loss": 0.5737, "num_input_tokens_seen": 47360, "step": 42 }, { "epoch": 0.86, "grad_norm": 0.9618259072303772, "learning_rate": 4.909309195725025e-05, "loss": 0.114, "num_input_tokens_seen": 49008, "step": 43 }, { "epoch": 0.88, "grad_norm": 1.5821013450622559, "learning_rate": 4.905069178965215e-05, "loss": 0.2676, "num_input_tokens_seen": 50264, "step": 44 }, { "epoch": 0.9, "grad_norm": 1.1913903951644897, "learning_rate": 4.900734214192358e-05, "loss": 0.1061, "num_input_tokens_seen": 51368, "step": 45 }, { "epoch": 0.92, "grad_norm": 1.8395439386367798, "learning_rate": 4.89630447254344e-05, "loss": 0.2161, "num_input_tokens_seen": 52304, "step": 46 }, { "epoch": 0.94, "grad_norm": 0.736319363117218, "learning_rate": 4.891780128897077e-05, "loss": 0.1163, "num_input_tokens_seen": 53712, "step": 47 }, { "epoch": 0.96, "grad_norm": 1.5373475551605225, "learning_rate": 4.887161361866608e-05, "loss": 0.1609, "num_input_tokens_seen": 54704, "step": 48 }, { "epoch": 0.98, "grad_norm": 1.3580759763717651, "learning_rate": 4.882448353793048e-05, "loss": 0.1673, "num_input_tokens_seen": 55880, "step": 49 }, { "epoch": 1.0, "grad_norm": 2.0536258220672607, "learning_rate": 4.877641290737884e-05, "loss": 0.1955, "num_input_tokens_seen": 56784, "step": 50 } ], "logging_steps": 1, "max_steps": 500, "num_input_tokens_seen": 56784, "num_train_epochs": 10, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2564102936199168.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }