|
{ |
|
"best_metric": 0.39284127950668335, |
|
"best_model_checkpoint": "StressTweetRobertaSentiment/checkpoint-180", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03888888888888889, |
|
"grad_norm": 4.96373987197876, |
|
"learning_rate": 6.481481481481481e-06, |
|
"loss": 0.6582, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.07777777777777778, |
|
"grad_norm": 7.41157341003418, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.5382, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.11666666666666667, |
|
"grad_norm": 19.304912567138672, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.4156, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.15555555555555556, |
|
"grad_norm": 16.010875701904297, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.4644, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.19444444444444445, |
|
"grad_norm": 40.6972770690918, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.4544, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.23333333333333334, |
|
"grad_norm": 10.888315200805664, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.3438, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2722222222222222, |
|
"grad_norm": 36.86009979248047, |
|
"learning_rate": 4.5370370370370374e-05, |
|
"loss": 0.362, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.3111111111111111, |
|
"grad_norm": 36.744590759277344, |
|
"learning_rate": 4.9794238683127575e-05, |
|
"loss": 0.5263, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 12.836268424987793, |
|
"learning_rate": 4.9074074074074075e-05, |
|
"loss": 0.3602, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3888888888888889, |
|
"grad_norm": 7.51406717300415, |
|
"learning_rate": 4.835390946502058e-05, |
|
"loss": 0.6603, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.42777777777777776, |
|
"grad_norm": 9.410354614257812, |
|
"learning_rate": 4.763374485596708e-05, |
|
"loss": 0.6094, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.4666666666666667, |
|
"grad_norm": 53.80480194091797, |
|
"learning_rate": 4.691358024691358e-05, |
|
"loss": 0.3819, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5055555555555555, |
|
"grad_norm": 4.867987155914307, |
|
"learning_rate": 4.6193415637860084e-05, |
|
"loss": 0.5925, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5444444444444444, |
|
"grad_norm": 0.8935460448265076, |
|
"learning_rate": 4.5473251028806584e-05, |
|
"loss": 0.2471, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5833333333333334, |
|
"grad_norm": 8.400871276855469, |
|
"learning_rate": 4.4753086419753084e-05, |
|
"loss": 0.4062, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6222222222222222, |
|
"grad_norm": 14.911149024963379, |
|
"learning_rate": 4.403292181069959e-05, |
|
"loss": 0.6447, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.6611111111111111, |
|
"grad_norm": 20.210161209106445, |
|
"learning_rate": 4.331275720164609e-05, |
|
"loss": 0.4248, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 17.20897674560547, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.6514, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7388888888888889, |
|
"grad_norm": 15.010047912597656, |
|
"learning_rate": 4.18724279835391e-05, |
|
"loss": 0.327, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 23.96828269958496, |
|
"learning_rate": 4.11522633744856e-05, |
|
"loss": 0.3802, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8166666666666667, |
|
"grad_norm": 45.84319305419922, |
|
"learning_rate": 4.04320987654321e-05, |
|
"loss": 0.4437, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.8555555555555555, |
|
"grad_norm": 21.39622688293457, |
|
"learning_rate": 3.971193415637861e-05, |
|
"loss": 0.7226, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.8944444444444445, |
|
"grad_norm": 7.082496166229248, |
|
"learning_rate": 3.89917695473251e-05, |
|
"loss": 0.2913, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 4.158477783203125, |
|
"learning_rate": 3.82716049382716e-05, |
|
"loss": 0.3775, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.9722222222222222, |
|
"grad_norm": 5.003448486328125, |
|
"learning_rate": 3.755144032921811e-05, |
|
"loss": 0.3336, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8831168831168831, |
|
"eval_auc": 0.957133746355685, |
|
"eval_f1": 0.91, |
|
"eval_loss": 0.39284127950668335, |
|
"eval_precision": 0.8921568627450981, |
|
"eval_recall": 0.9285714285714286, |
|
"eval_runtime": 49.1707, |
|
"eval_samples_per_second": 6.264, |
|
"eval_steps_per_second": 0.407, |
|
"step": 180 |
|
} |
|
], |
|
"logging_steps": 7, |
|
"max_steps": 540, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 94391091110400.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|