|
{ |
|
"best_metric": 1.2135709524154663, |
|
"best_model_checkpoint": "./outputs/checkpoint-4000", |
|
"epoch": 2.9143897996357016, |
|
"eval_steps": 100, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1514, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.9972127676010132, |
|
"eval_runtime": 132.9222, |
|
"eval_samples_per_second": 47.201, |
|
"eval_steps_per_second": 5.906, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9494, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.9086177349090576, |
|
"eval_runtime": 132.9394, |
|
"eval_samples_per_second": 47.194, |
|
"eval_steps_per_second": 5.905, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8786, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.8495125770568848, |
|
"eval_runtime": 132.8956, |
|
"eval_samples_per_second": 47.21, |
|
"eval_steps_per_second": 5.907, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8283, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.8054330348968506, |
|
"eval_runtime": 132.877, |
|
"eval_samples_per_second": 47.217, |
|
"eval_steps_per_second": 5.908, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7777, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.7643086910247803, |
|
"eval_runtime": 132.9187, |
|
"eval_samples_per_second": 47.202, |
|
"eval_steps_per_second": 5.906, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7496, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.7355191707611084, |
|
"eval_runtime": 132.8029, |
|
"eval_samples_per_second": 47.243, |
|
"eval_steps_per_second": 5.911, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.712, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.703617811203003, |
|
"eval_runtime": 132.9047, |
|
"eval_samples_per_second": 47.207, |
|
"eval_steps_per_second": 5.906, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.691, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.675934076309204, |
|
"eval_runtime": 132.9001, |
|
"eval_samples_per_second": 47.208, |
|
"eval_steps_per_second": 5.907, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6552, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.653730869293213, |
|
"eval_runtime": 132.8143, |
|
"eval_samples_per_second": 47.239, |
|
"eval_steps_per_second": 5.911, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6412, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.629328966140747, |
|
"eval_runtime": 132.9175, |
|
"eval_samples_per_second": 47.202, |
|
"eval_steps_per_second": 5.906, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6211, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.6079449653625488, |
|
"eval_runtime": 132.8611, |
|
"eval_samples_per_second": 47.222, |
|
"eval_steps_per_second": 5.908, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5861, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.5884170532226562, |
|
"eval_runtime": 132.8296, |
|
"eval_samples_per_second": 47.233, |
|
"eval_steps_per_second": 5.91, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5925, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.5686501264572144, |
|
"eval_runtime": 132.8517, |
|
"eval_samples_per_second": 47.226, |
|
"eval_steps_per_second": 5.909, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5467, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.5463733673095703, |
|
"eval_runtime": 132.8859, |
|
"eval_samples_per_second": 47.213, |
|
"eval_steps_per_second": 5.907, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5182, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.5295236110687256, |
|
"eval_runtime": 132.8424, |
|
"eval_samples_per_second": 47.229, |
|
"eval_steps_per_second": 5.909, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5007, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.5132012367248535, |
|
"eval_runtime": 132.7525, |
|
"eval_samples_per_second": 47.261, |
|
"eval_steps_per_second": 5.913, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4987, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.496045708656311, |
|
"eval_runtime": 132.8633, |
|
"eval_samples_per_second": 47.221, |
|
"eval_steps_per_second": 5.908, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.471, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.4813148975372314, |
|
"eval_runtime": 132.8337, |
|
"eval_samples_per_second": 47.232, |
|
"eval_steps_per_second": 5.91, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4629, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.4688937664031982, |
|
"eval_runtime": 132.906, |
|
"eval_samples_per_second": 47.206, |
|
"eval_steps_per_second": 5.906, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4448, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.4537802934646606, |
|
"eval_runtime": 132.7872, |
|
"eval_samples_per_second": 47.249, |
|
"eval_steps_per_second": 5.912, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4308, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.437719464302063, |
|
"eval_runtime": 132.854, |
|
"eval_samples_per_second": 47.225, |
|
"eval_steps_per_second": 5.909, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4061, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.423782467842102, |
|
"eval_runtime": 132.744, |
|
"eval_samples_per_second": 47.264, |
|
"eval_steps_per_second": 5.914, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4041, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.4104719161987305, |
|
"eval_runtime": 132.8625, |
|
"eval_samples_per_second": 47.222, |
|
"eval_steps_per_second": 5.908, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.404, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.3974003791809082, |
|
"eval_runtime": 132.8724, |
|
"eval_samples_per_second": 47.218, |
|
"eval_steps_per_second": 5.908, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3756, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.3845711946487427, |
|
"eval_runtime": 132.8814, |
|
"eval_samples_per_second": 47.215, |
|
"eval_steps_per_second": 5.908, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3654, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.3717011213302612, |
|
"eval_runtime": 132.776, |
|
"eval_samples_per_second": 47.253, |
|
"eval_steps_per_second": 5.912, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3535, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.3580347299575806, |
|
"eval_runtime": 132.7648, |
|
"eval_samples_per_second": 47.256, |
|
"eval_steps_per_second": 5.913, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3256, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.3460556268692017, |
|
"eval_runtime": 132.8473, |
|
"eval_samples_per_second": 47.227, |
|
"eval_steps_per_second": 5.909, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.299, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.333855152130127, |
|
"eval_runtime": 132.9091, |
|
"eval_samples_per_second": 47.205, |
|
"eval_steps_per_second": 5.906, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3002, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.3230880498886108, |
|
"eval_runtime": 132.8618, |
|
"eval_samples_per_second": 47.222, |
|
"eval_steps_per_second": 5.908, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2797, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.3130332231521606, |
|
"eval_runtime": 132.7387, |
|
"eval_samples_per_second": 47.266, |
|
"eval_steps_per_second": 5.914, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2834, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 1.3008962869644165, |
|
"eval_runtime": 132.866, |
|
"eval_samples_per_second": 47.221, |
|
"eval_steps_per_second": 5.908, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.266, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.28789484500885, |
|
"eval_runtime": 132.8174, |
|
"eval_samples_per_second": 47.238, |
|
"eval_steps_per_second": 5.91, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2491, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.2794678211212158, |
|
"eval_runtime": 132.939, |
|
"eval_samples_per_second": 47.195, |
|
"eval_steps_per_second": 5.905, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2486, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.2680211067199707, |
|
"eval_runtime": 132.8824, |
|
"eval_samples_per_second": 47.215, |
|
"eval_steps_per_second": 5.907, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2372, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.2562363147735596, |
|
"eval_runtime": 132.9828, |
|
"eval_samples_per_second": 47.179, |
|
"eval_steps_per_second": 5.903, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2199, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.2433452606201172, |
|
"eval_runtime": 132.8033, |
|
"eval_samples_per_second": 47.243, |
|
"eval_steps_per_second": 5.911, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1973, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 1.233286738395691, |
|
"eval_runtime": 132.8372, |
|
"eval_samples_per_second": 47.231, |
|
"eval_steps_per_second": 5.909, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2073, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.2219091653823853, |
|
"eval_runtime": 132.7605, |
|
"eval_samples_per_second": 47.258, |
|
"eval_steps_per_second": 5.913, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1906, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 1.2135709524154663, |
|
"eval_runtime": 132.8138, |
|
"eval_samples_per_second": 47.239, |
|
"eval_steps_per_second": 5.911, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 2.3840611478920397e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|