|
{ |
|
"best_metric": 1.408768892288208, |
|
"best_model_checkpoint": "./outputs/checkpoint-2400", |
|
"epoch": 1.7489981785063753, |
|
"eval_steps": 100, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1823, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.0118842124938965, |
|
"eval_runtime": 144.2983, |
|
"eval_samples_per_second": 43.479, |
|
"eval_steps_per_second": 5.44, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 1.962, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.9193025827407837, |
|
"eval_runtime": 144.1022, |
|
"eval_samples_per_second": 43.539, |
|
"eval_steps_per_second": 5.448, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8883, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.8596361875534058, |
|
"eval_runtime": 144.0831, |
|
"eval_samples_per_second": 43.544, |
|
"eval_steps_per_second": 5.448, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8371, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.813263177871704, |
|
"eval_runtime": 144.1028, |
|
"eval_samples_per_second": 43.538, |
|
"eval_steps_per_second": 5.447, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7855, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.772437572479248, |
|
"eval_runtime": 144.0494, |
|
"eval_samples_per_second": 43.555, |
|
"eval_steps_per_second": 5.45, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.757, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.7428079843521118, |
|
"eval_runtime": 144.0319, |
|
"eval_samples_per_second": 43.56, |
|
"eval_steps_per_second": 5.45, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7183, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.7120596170425415, |
|
"eval_runtime": 144.1455, |
|
"eval_samples_per_second": 43.525, |
|
"eval_steps_per_second": 5.446, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6973, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.6833879947662354, |
|
"eval_runtime": 144.034, |
|
"eval_samples_per_second": 43.559, |
|
"eval_steps_per_second": 5.45, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.662, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.6580077409744263, |
|
"eval_runtime": 144.0204, |
|
"eval_samples_per_second": 43.563, |
|
"eval_steps_per_second": 5.451, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6473, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.6349676847457886, |
|
"eval_runtime": 144.1987, |
|
"eval_samples_per_second": 43.509, |
|
"eval_steps_per_second": 5.444, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6273, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.6135053634643555, |
|
"eval_runtime": 144.1005, |
|
"eval_samples_per_second": 43.539, |
|
"eval_steps_per_second": 5.448, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5919, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.5944637060165405, |
|
"eval_runtime": 144.0899, |
|
"eval_samples_per_second": 43.542, |
|
"eval_steps_per_second": 5.448, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5994, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.5728504657745361, |
|
"eval_runtime": 144.043, |
|
"eval_samples_per_second": 43.556, |
|
"eval_steps_per_second": 5.45, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5528, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.552846074104309, |
|
"eval_runtime": 144.0891, |
|
"eval_samples_per_second": 43.543, |
|
"eval_steps_per_second": 5.448, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5246, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.5355615615844727, |
|
"eval_runtime": 144.0408, |
|
"eval_samples_per_second": 43.557, |
|
"eval_steps_per_second": 5.45, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5062, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.5179370641708374, |
|
"eval_runtime": 144.0242, |
|
"eval_samples_per_second": 43.562, |
|
"eval_steps_per_second": 5.45, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5038, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.5012134313583374, |
|
"eval_runtime": 144.0166, |
|
"eval_samples_per_second": 43.564, |
|
"eval_steps_per_second": 5.451, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5144, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.511275291442871, |
|
"eval_runtime": 133.6708, |
|
"eval_samples_per_second": 46.936, |
|
"eval_steps_per_second": 5.873, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4715, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.48880934715271, |
|
"eval_runtime": 133.6301, |
|
"eval_samples_per_second": 46.95, |
|
"eval_steps_per_second": 5.874, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4621, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.4694921970367432, |
|
"eval_runtime": 133.7845, |
|
"eval_samples_per_second": 46.896, |
|
"eval_steps_per_second": 5.868, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4364, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.4534579515457153, |
|
"eval_runtime": 133.5069, |
|
"eval_samples_per_second": 46.994, |
|
"eval_steps_per_second": 5.88, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4388, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.4382610321044922, |
|
"eval_runtime": 133.6146, |
|
"eval_samples_per_second": 46.956, |
|
"eval_steps_per_second": 5.875, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4139, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.4250658750534058, |
|
"eval_runtime": 133.6118, |
|
"eval_samples_per_second": 46.957, |
|
"eval_steps_per_second": 5.875, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4145, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.408768892288208, |
|
"eval_runtime": 133.5488, |
|
"eval_samples_per_second": 46.979, |
|
"eval_steps_per_second": 5.878, |
|
"step": 2400 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.4305055049825485e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|