|
{ |
|
"best_metric": 2.331069231033325, |
|
"best_model_checkpoint": "dq158/morbius/checkpoint-13150", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 13150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8098859315589354e-05, |
|
"loss": 2.6388, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.619771863117871e-05, |
|
"loss": 2.6102, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.4296577946768064e-05, |
|
"loss": 2.6153, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.2395437262357415e-05, |
|
"loss": 2.6423, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.0494296577946774e-05, |
|
"loss": 2.6085, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.039845626314436104, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0584780223326664, |
|
"eval_loss": 2.3793318271636963, |
|
"eval_precisions": [ |
|
0.11484440108136675, |
|
0.05086452177719413, |
|
0.022402389588222743, |
|
0.019262093750807972 |
|
], |
|
"eval_reference_length": 46479, |
|
"eval_runtime": 201.6056, |
|
"eval_samples_per_second": 17.395, |
|
"eval_steps_per_second": 1.453, |
|
"eval_translation_length": 49197, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.8593155893536125e-05, |
|
"loss": 2.529, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.669201520912548e-05, |
|
"loss": 2.5228, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.479087452471483e-05, |
|
"loss": 2.4726, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.288973384030418e-05, |
|
"loss": 2.5319, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.098859315589354e-05, |
|
"loss": 2.5537, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.04511134441808281, |
|
"eval_brevity_penalty": 0.9857642100346898, |
|
"eval_length_ratio": 0.98586458400568, |
|
"eval_loss": 2.3537790775299072, |
|
"eval_precisions": [ |
|
0.12435074854873206, |
|
0.053338059789672695, |
|
0.02736549165120594, |
|
0.024163621427155037 |
|
], |
|
"eval_reference_length": 46479, |
|
"eval_runtime": 200.2247, |
|
"eval_samples_per_second": 17.515, |
|
"eval_steps_per_second": 1.463, |
|
"eval_translation_length": 45822, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.908745247148289e-05, |
|
"loss": 2.4999, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.7186311787072245e-05, |
|
"loss": 2.4502, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.5285171102661596e-05, |
|
"loss": 2.4936, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.338403041825095e-05, |
|
"loss": 2.4053, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.1482889733840306e-05, |
|
"loss": 2.427, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.047839363303602896, |
|
"eval_brevity_penalty": 0.9798114045807085, |
|
"eval_length_ratio": 0.9800124787538458, |
|
"eval_loss": 2.3411617279052734, |
|
"eval_precisions": [ |
|
0.12566410537870473, |
|
0.05610922151130985, |
|
0.029971974257836827, |
|
0.026891236083357122 |
|
], |
|
"eval_reference_length": 46479, |
|
"eval_runtime": 200.7411, |
|
"eval_samples_per_second": 17.47, |
|
"eval_steps_per_second": 1.46, |
|
"eval_translation_length": 45550, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.958174904942966e-05, |
|
"loss": 2.4461, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.7680608365019013e-05, |
|
"loss": 2.4042, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.5779467680608364e-05, |
|
"loss": 2.4027, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.387832699619772e-05, |
|
"loss": 2.3927, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.1977186311787073e-05, |
|
"loss": 2.3733, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 1.0076045627376426e-05, |
|
"loss": 2.3716, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.04869515928692814, |
|
"eval_brevity_penalty": 0.9823324657661657, |
|
"eval_length_ratio": 0.9824867144301728, |
|
"eval_loss": 2.3347413539886475, |
|
"eval_precisions": [ |
|
0.12663965838169275, |
|
0.0574505431946487, |
|
0.030477866031926728, |
|
0.027230821761893922 |
|
], |
|
"eval_reference_length": 46479, |
|
"eval_runtime": 200.6451, |
|
"eval_samples_per_second": 17.479, |
|
"eval_steps_per_second": 1.46, |
|
"eval_translation_length": 45665, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 8.17490494296578e-06, |
|
"loss": 2.3618, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 6.273764258555133e-06, |
|
"loss": 2.3667, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 4.3726235741444865e-06, |
|
"loss": 2.377, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.4714828897338406e-06, |
|
"loss": 2.346, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.70342205323194e-07, |
|
"loss": 2.3494, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.04900460577934908, |
|
"eval_brevity_penalty": 0.9780979152370445, |
|
"eval_length_ratio": 0.978334301512511, |
|
"eval_loss": 2.331069231033325, |
|
"eval_precisions": [ |
|
0.12658339197748064, |
|
0.058000714881448825, |
|
0.031020853918560506, |
|
0.0276665140764477 |
|
], |
|
"eval_reference_length": 46479, |
|
"eval_runtime": 201.3997, |
|
"eval_samples_per_second": 17.413, |
|
"eval_steps_per_second": 1.455, |
|
"eval_translation_length": 45472, |
|
"step": 13150 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 13150, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 1.080547102162944e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|