{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9874476987447699, "eval_steps": 59, "global_step": 59, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016736401673640166, "grad_norm": 0.3126992881298065, "learning_rate": 2e-05, "loss": 2.4388, "step": 1 }, { "epoch": 0.016736401673640166, "eval_loss": 2.2736361026763916, "eval_runtime": 44.9772, "eval_samples_per_second": 37.419, "eval_steps_per_second": 0.8, "step": 1 }, { "epoch": 0.03347280334728033, "grad_norm": 0.31262004375457764, "learning_rate": 4e-05, "loss": 2.3915, "step": 2 }, { "epoch": 0.0502092050209205, "grad_norm": 0.3161173462867737, "learning_rate": 6e-05, "loss": 2.4118, "step": 3 }, { "epoch": 0.06694560669456066, "grad_norm": 0.32605665922164917, "learning_rate": 8e-05, "loss": 2.3872, "step": 4 }, { "epoch": 0.08368200836820083, "grad_norm": 0.32243478298187256, "learning_rate": 0.0001, "loss": 2.3368, "step": 5 }, { "epoch": 0.100418410041841, "grad_norm": 0.3141494393348694, "learning_rate": 0.00012, "loss": 2.2966, "step": 6 }, { "epoch": 0.11715481171548117, "grad_norm": 0.330599308013916, "learning_rate": 0.00014, "loss": 2.2379, "step": 7 }, { "epoch": 0.13389121338912133, "grad_norm": 0.33452969789505005, "learning_rate": 0.00016, "loss": 2.1949, "step": 8 }, { "epoch": 0.1506276150627615, "grad_norm": 0.2835342288017273, "learning_rate": 0.00018, "loss": 2.156, "step": 9 }, { "epoch": 0.16736401673640167, "grad_norm": 0.23091256618499756, "learning_rate": 0.0002, "loss": 2.1109, "step": 10 }, { "epoch": 0.18410041841004185, "grad_norm": 0.2284015268087387, "learning_rate": 0.00019999033847063811, "loss": 2.0782, "step": 11 }, { "epoch": 0.200836820083682, "grad_norm": 0.23981927335262299, "learning_rate": 0.00019996135574945544, "loss": 2.053, "step": 12 }, { "epoch": 0.2175732217573222, "grad_norm": 0.23807936906814575, "learning_rate": 0.00019991305743680013, "loss": 2.0294, "step": 13 }, { "epoch": 0.23430962343096234, "grad_norm": 0.20766477286815643, "learning_rate": 0.0001998454528653836, "loss": 1.9947, "step": 14 }, { "epoch": 0.2510460251046025, "grad_norm": 0.1892593652009964, "learning_rate": 0.00019975855509847686, "loss": 1.9643, "step": 15 }, { "epoch": 0.26778242677824265, "grad_norm": 0.19022946059703827, "learning_rate": 0.00019965238092738643, "loss": 1.9275, "step": 16 }, { "epoch": 0.28451882845188287, "grad_norm": 0.18370254337787628, "learning_rate": 0.00019952695086820975, "loss": 1.9512, "step": 17 }, { "epoch": 0.301255230125523, "grad_norm": 0.16774943470954895, "learning_rate": 0.0001993822891578708, "loss": 1.8883, "step": 18 }, { "epoch": 0.3179916317991632, "grad_norm": 0.17572854459285736, "learning_rate": 0.0001992184237494368, "loss": 1.938, "step": 19 }, { "epoch": 0.33472803347280333, "grad_norm": 0.21952244639396667, "learning_rate": 0.0001990353863067169, "loss": 1.8937, "step": 20 }, { "epoch": 0.3514644351464435, "grad_norm": 0.2579434812068939, "learning_rate": 0.0001988332121981436, "loss": 1.8797, "step": 21 }, { "epoch": 0.3682008368200837, "grad_norm": 0.29942768812179565, "learning_rate": 0.00019861194048993863, "loss": 1.8766, "step": 22 }, { "epoch": 0.38493723849372385, "grad_norm": 0.326593279838562, "learning_rate": 0.0001983716139385641, "loss": 1.8567, "step": 23 }, { "epoch": 0.401673640167364, "grad_norm": 0.2769831418991089, "learning_rate": 0.0001981122789824607, "loss": 1.839, "step": 24 }, { "epoch": 0.41841004184100417, "grad_norm": 0.2902805507183075, "learning_rate": 0.00019783398573307428, "loss": 1.8622, "step": 25 }, { "epoch": 0.4351464435146444, "grad_norm": 0.2152428776025772, "learning_rate": 0.00019753678796517282, "loss": 1.8393, "step": 26 }, { "epoch": 0.45188284518828453, "grad_norm": 0.1541067361831665, "learning_rate": 0.00019722074310645553, "loss": 1.8326, "step": 27 }, { "epoch": 0.4686192468619247, "grad_norm": 0.12413746118545532, "learning_rate": 0.00019688591222645607, "loss": 1.8079, "step": 28 }, { "epoch": 0.48535564853556484, "grad_norm": 0.11909659206867218, "learning_rate": 0.000196532360024742, "loss": 1.8044, "step": 29 }, { "epoch": 0.502092050209205, "grad_norm": 0.16260313987731934, "learning_rate": 0.0001961601548184129, "loss": 1.7888, "step": 30 }, { "epoch": 0.5188284518828452, "grad_norm": 0.17894335091114044, "learning_rate": 0.00019576936852889936, "loss": 1.8263, "step": 31 }, { "epoch": 0.5355648535564853, "grad_norm": 0.21706554293632507, "learning_rate": 0.00019536007666806556, "loss": 1.8151, "step": 32 }, { "epoch": 0.5523012552301255, "grad_norm": 0.17296762764453888, "learning_rate": 0.0001949323583236181, "loss": 1.7901, "step": 33 }, { "epoch": 0.5690376569037657, "grad_norm": 0.1761002093553543, "learning_rate": 0.0001944862961438239, "loss": 1.8126, "step": 34 }, { "epoch": 0.5857740585774058, "grad_norm": 0.14914190769195557, "learning_rate": 0.00019402197632153992, "loss": 1.7797, "step": 35 }, { "epoch": 0.602510460251046, "grad_norm": 0.13111546635627747, "learning_rate": 0.00019353948857755803, "loss": 1.7916, "step": 36 }, { "epoch": 0.6192468619246861, "grad_norm": 0.12080994248390198, "learning_rate": 0.00019303892614326836, "loss": 1.7834, "step": 37 }, { "epoch": 0.6359832635983264, "grad_norm": 0.15622606873512268, "learning_rate": 0.00019252038574264405, "loss": 1.794, "step": 38 }, { "epoch": 0.6527196652719666, "grad_norm": 0.1345275640487671, "learning_rate": 0.00019198396757355118, "loss": 1.7554, "step": 39 }, { "epoch": 0.6694560669456067, "grad_norm": 0.18387249112129211, "learning_rate": 0.00019142977528838762, "loss": 1.7675, "step": 40 }, { "epoch": 0.6861924686192469, "grad_norm": 0.165897399187088, "learning_rate": 0.00019085791597405404, "loss": 1.7631, "step": 41 }, { "epoch": 0.702928870292887, "grad_norm": 0.13675889372825623, "learning_rate": 0.00019026850013126157, "loss": 1.754, "step": 42 }, { "epoch": 0.7196652719665272, "grad_norm": 0.16969387233257294, "learning_rate": 0.00018966164165317966, "loss": 1.745, "step": 43 }, { "epoch": 0.7364016736401674, "grad_norm": 0.16098380088806152, "learning_rate": 0.00018903745780342839, "loss": 1.7609, "step": 44 }, { "epoch": 0.7531380753138075, "grad_norm": 0.12338632345199585, "learning_rate": 0.0001883960691934196, "loss": 1.7584, "step": 45 }, { "epoch": 0.7698744769874477, "grad_norm": 0.1335158348083496, "learning_rate": 0.00018773759975905098, "loss": 1.7302, "step": 46 }, { "epoch": 0.7866108786610879, "grad_norm": 0.11967575550079346, "learning_rate": 0.00018706217673675811, "loss": 1.7323, "step": 47 }, { "epoch": 0.803347280334728, "grad_norm": 0.12446475774049759, "learning_rate": 0.0001863699306389282, "loss": 1.7217, "step": 48 }, { "epoch": 0.8200836820083682, "grad_norm": 0.13070392608642578, "learning_rate": 0.00018566099522868119, "loss": 1.7192, "step": 49 }, { "epoch": 0.8368200836820083, "grad_norm": 0.15041972696781158, "learning_rate": 0.00018493550749402278, "loss": 1.7386, "step": 50 }, { "epoch": 0.8535564853556485, "grad_norm": 0.12314204126596451, "learning_rate": 0.00018419360762137395, "loss": 1.7465, "step": 51 }, { "epoch": 0.8702928870292888, "grad_norm": 0.15009309351444244, "learning_rate": 0.00018343543896848273, "loss": 1.7216, "step": 52 }, { "epoch": 0.8870292887029289, "grad_norm": 0.12467087060213089, "learning_rate": 0.00018266114803672318, "loss": 1.7043, "step": 53 }, { "epoch": 0.9037656903765691, "grad_norm": 0.1297266036272049, "learning_rate": 0.00018187088444278674, "loss": 1.7069, "step": 54 }, { "epoch": 0.9205020920502092, "grad_norm": 0.1195509061217308, "learning_rate": 0.00018106480088977172, "loss": 1.7305, "step": 55 }, { "epoch": 0.9372384937238494, "grad_norm": 0.14450602233409882, "learning_rate": 0.00018024305313767646, "loss": 1.7171, "step": 56 }, { "epoch": 0.9539748953974896, "grad_norm": 0.16594989597797394, "learning_rate": 0.00017940579997330165, "loss": 1.7239, "step": 57 }, { "epoch": 0.9707112970711297, "grad_norm": 0.19210928678512573, "learning_rate": 0.00017855320317956784, "loss": 1.7081, "step": 58 }, { "epoch": 0.9874476987447699, "grad_norm": 0.16463987529277802, "learning_rate": 0.00017768542750425426, "loss": 1.7178, "step": 59 }, { "epoch": 0.9874476987447699, "eval_loss": 1.7365927696228027, "eval_runtime": 44.9816, "eval_samples_per_second": 37.415, "eval_steps_per_second": 0.8, "step": 59 } ], "logging_steps": 1, "max_steps": 236, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 59, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.409845473312768e+17, "train_batch_size": 48, "trial_name": null, "trial_params": null }