{ "best_metric": 3.468397378921509, "best_model_checkpoint": "/datadrive/disk1/pierpaolo/llm/itlangadapt/bloom-1b3_it/checkpoint-50000", "epoch": 5.236680193742637, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 0.00098, "loss": 3.4734, "step": 1000 }, { "epoch": 0.21, "learning_rate": 0.00096, "loss": 3.466, "step": 2000 }, { "epoch": 0.31, "learning_rate": 0.00094, "loss": 3.461, "step": 3000 }, { "epoch": 0.42, "learning_rate": 0.00092, "loss": 3.4725, "step": 4000 }, { "epoch": 0.52, "learning_rate": 0.0009000000000000001, "loss": 3.4614, "step": 5000 }, { "epoch": 0.52, "eval_loss": 3.469589948654175, "eval_runtime": 358.4452, "eval_samples_per_second": 10.624, "eval_steps_per_second": 5.312, "step": 5000 }, { "epoch": 0.63, "learning_rate": 0.00088, "loss": 3.4653, "step": 6000 }, { "epoch": 0.73, "learning_rate": 0.00086, "loss": 3.4618, "step": 7000 }, { "epoch": 0.84, "learning_rate": 0.00084, "loss": 3.4707, "step": 8000 }, { "epoch": 0.94, "learning_rate": 0.00082002, "loss": 3.4706, "step": 9000 }, { "epoch": 1.05, "learning_rate": 0.0008000199999999999, "loss": 3.4694, "step": 10000 }, { "epoch": 1.05, "eval_loss": 3.4690916538238525, "eval_runtime": 358.2981, "eval_samples_per_second": 10.628, "eval_steps_per_second": 5.314, "step": 10000 }, { "epoch": 1.15, "learning_rate": 0.0007800200000000001, "loss": 3.4686, "step": 11000 }, { "epoch": 1.26, "learning_rate": 0.00076002, "loss": 3.4636, "step": 12000 }, { "epoch": 1.36, "learning_rate": 0.0007400400000000001, "loss": 3.463, "step": 13000 }, { "epoch": 1.47, "learning_rate": 0.00072006, "loss": 3.4694, "step": 14000 }, { "epoch": 1.57, "learning_rate": 0.0007000600000000001, "loss": 3.4709, "step": 15000 }, { "epoch": 1.57, "eval_loss": 3.4688496589660645, "eval_runtime": 358.397, "eval_samples_per_second": 10.625, "eval_steps_per_second": 5.313, "step": 15000 }, { "epoch": 1.68, "learning_rate": 0.00068006, "loss": 3.4627, "step": 16000 }, { "epoch": 1.78, "learning_rate": 0.0006600800000000001, "loss": 3.4658, "step": 17000 }, { "epoch": 1.89, "learning_rate": 0.00064008, "loss": 3.4661, "step": 18000 }, { "epoch": 1.99, "learning_rate": 0.00062008, "loss": 3.4615, "step": 19000 }, { "epoch": 2.09, "learning_rate": 0.0006001, "loss": 3.481, "step": 20000 }, { "epoch": 2.09, "eval_loss": 3.468804359436035, "eval_runtime": 359.4028, "eval_samples_per_second": 10.595, "eval_steps_per_second": 5.298, "step": 20000 }, { "epoch": 2.2, "learning_rate": 0.0005801, "loss": 3.465, "step": 21000 }, { "epoch": 2.3, "learning_rate": 0.00056012, "loss": 3.4656, "step": 22000 }, { "epoch": 2.41, "learning_rate": 0.00054012, "loss": 3.472, "step": 23000 }, { "epoch": 2.51, "learning_rate": 0.00052012, "loss": 3.4574, "step": 24000 }, { "epoch": 2.62, "learning_rate": 0.0005001200000000001, "loss": 3.4638, "step": 25000 }, { "epoch": 2.62, "eval_loss": 3.468665599822998, "eval_runtime": 358.6249, "eval_samples_per_second": 10.618, "eval_steps_per_second": 5.309, "step": 25000 }, { "epoch": 2.72, "learning_rate": 0.00048012, "loss": 3.4611, "step": 26000 }, { "epoch": 2.83, "learning_rate": 0.00046012, "loss": 3.4631, "step": 27000 }, { "epoch": 2.93, "learning_rate": 0.00044014, "loss": 3.4626, "step": 28000 }, { "epoch": 3.04, "learning_rate": 0.00042014, "loss": 3.469, "step": 29000 }, { "epoch": 3.14, "learning_rate": 0.00040018, "loss": 3.4704, "step": 30000 }, { "epoch": 3.14, "eval_loss": 3.468623399734497, "eval_runtime": 359.7336, "eval_samples_per_second": 10.586, "eval_steps_per_second": 5.293, "step": 30000 }, { "epoch": 3.25, "learning_rate": 0.00038018000000000004, "loss": 3.4659, "step": 31000 }, { "epoch": 3.35, "learning_rate": 0.00036018, "loss": 3.4706, "step": 32000 }, { "epoch": 3.46, "learning_rate": 0.00034018, "loss": 3.4659, "step": 33000 }, { "epoch": 3.56, "learning_rate": 0.00032018000000000004, "loss": 3.4669, "step": 34000 }, { "epoch": 3.67, "learning_rate": 0.00030018, "loss": 3.4617, "step": 35000 }, { "epoch": 3.67, "eval_loss": 3.4684813022613525, "eval_runtime": 361.1986, "eval_samples_per_second": 10.543, "eval_steps_per_second": 5.271, "step": 35000 }, { "epoch": 3.77, "learning_rate": 0.00028020000000000003, "loss": 3.4703, "step": 36000 }, { "epoch": 3.88, "learning_rate": 0.00026024, "loss": 3.4606, "step": 37000 }, { "epoch": 3.98, "learning_rate": 0.00024026, "loss": 3.4549, "step": 38000 }, { "epoch": 4.08, "learning_rate": 0.00022026, "loss": 3.4701, "step": 39000 }, { "epoch": 4.19, "learning_rate": 0.00020026, "loss": 3.4636, "step": 40000 }, { "epoch": 4.19, "eval_loss": 3.468475103378296, "eval_runtime": 358.6453, "eval_samples_per_second": 10.618, "eval_steps_per_second": 5.309, "step": 40000 }, { "epoch": 4.29, "learning_rate": 0.00018026, "loss": 3.4541, "step": 41000 }, { "epoch": 4.4, "learning_rate": 0.00016026000000000001, "loss": 3.4724, "step": 42000 }, { "epoch": 4.5, "learning_rate": 0.00014026, "loss": 3.4683, "step": 43000 }, { "epoch": 4.61, "learning_rate": 0.00012026, "loss": 3.463, "step": 44000 }, { "epoch": 4.71, "learning_rate": 0.00010026, "loss": 3.4668, "step": 45000 }, { "epoch": 4.71, "eval_loss": 3.4684269428253174, "eval_runtime": 358.1004, "eval_samples_per_second": 10.634, "eval_steps_per_second": 5.317, "step": 45000 }, { "epoch": 4.82, "learning_rate": 8.028000000000001e-05, "loss": 3.4696, "step": 46000 }, { "epoch": 4.92, "learning_rate": 6.028e-05, "loss": 3.4605, "step": 47000 }, { "epoch": 5.03, "learning_rate": 4.028000000000001e-05, "loss": 3.4645, "step": 48000 }, { "epoch": 5.13, "learning_rate": 2.028e-05, "loss": 3.4704, "step": 49000 }, { "epoch": 5.24, "learning_rate": 2.7999999999999997e-07, "loss": 3.4623, "step": 50000 }, { "epoch": 5.24, "eval_loss": 3.468397378921509, "eval_runtime": 358.1322, "eval_samples_per_second": 10.633, "eval_steps_per_second": 5.316, "step": 50000 } ], "max_steps": 50000, "num_train_epochs": 6, "total_flos": 2.9704946454626304e+18, "trial_name": null, "trial_params": null }