|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 258, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11627906976744186, |
|
"grad_norm": 10.613214492797852, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.6914, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.23255813953488372, |
|
"grad_norm": 13.879897117614746, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.6682, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3488372093023256, |
|
"grad_norm": 12.199045181274414, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.6228, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.46511627906976744, |
|
"grad_norm": 10.077858924865723, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.5581, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5813953488372093, |
|
"grad_norm": 9.077909469604492, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.4533, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6976744186046512, |
|
"grad_norm": 12.368229866027832, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.3712, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.813953488372093, |
|
"grad_norm": 8.568652153015137, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.2595, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 4.570730209350586, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.1839, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0465116279069768, |
|
"grad_norm": 7.3009562492370605, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.1171, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.1627906976744187, |
|
"grad_norm": 17.301179885864258, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.1089, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1627906976744187, |
|
"eval_accuracy": 0.9883720930232558, |
|
"eval_accuracy_label_clickbait": 0.9827586206896551, |
|
"eval_accuracy_label_factual": 0.9941176470588236, |
|
"eval_f1": 0.9883724860836951, |
|
"eval_loss": 0.06170095503330231, |
|
"eval_precision": 0.9884396971335857, |
|
"eval_recall": 0.9883720930232558, |
|
"eval_runtime": 0.2786, |
|
"eval_samples_per_second": 1234.948, |
|
"eval_steps_per_second": 78.979, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.2790697674418605, |
|
"grad_norm": 9.898296356201172, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.0528, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.3953488372093024, |
|
"grad_norm": 0.6471260190010071, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.0235, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.5116279069767442, |
|
"grad_norm": 0.5717675089836121, |
|
"learning_rate": 5.2e-06, |
|
"loss": 0.0456, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.627906976744186, |
|
"grad_norm": 0.4758104085922241, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.0329, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.744186046511628, |
|
"grad_norm": 0.17176453769207, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0422, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.8604651162790697, |
|
"grad_norm": 0.1672956943511963, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.0325, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.9767441860465116, |
|
"grad_norm": 8.929553031921387, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.0577, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.0930232558139537, |
|
"grad_norm": 6.927422523498535, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.0329, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.2093023255813953, |
|
"grad_norm": 101.96356964111328, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.0076, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.3255813953488373, |
|
"grad_norm": 0.08046901226043701, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0118, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.3255813953488373, |
|
"eval_accuracy": 0.997093023255814, |
|
"eval_accuracy_label_clickbait": 0.9942528735632183, |
|
"eval_accuracy_label_factual": 1.0, |
|
"eval_f1": 0.9970930969577605, |
|
"eval_loss": 0.009262952022254467, |
|
"eval_precision": 0.9971100231198151, |
|
"eval_recall": 0.997093023255814, |
|
"eval_runtime": 0.2762, |
|
"eval_samples_per_second": 1245.451, |
|
"eval_steps_per_second": 79.651, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.441860465116279, |
|
"grad_norm": 8.304059028625488, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.0752, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.558139534883721, |
|
"grad_norm": 0.26231056451797485, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.0019, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.6744186046511627, |
|
"grad_norm": 0.04355992376804352, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 0.0076, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.7906976744186047, |
|
"grad_norm": 27.33185386657715, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.029, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.9069767441860463, |
|
"grad_norm": 0.038574591279029846, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0015, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 258, |
|
"total_flos": 11163167404740.0, |
|
"train_loss": 0.1740486863350799, |
|
"train_runtime": 20.4599, |
|
"train_samples_per_second": 403.08, |
|
"train_steps_per_second": 12.61 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 1.0, |
|
"eval_accuracy_label_clickbait": 1.0, |
|
"eval_accuracy_label_factual": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.001034915097989142, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 0.2775, |
|
"eval_samples_per_second": 1239.448, |
|
"eval_steps_per_second": 79.267, |
|
"step": 258 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 258, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 11163167404740.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|