|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.431522846221924, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5653, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7218045112781954, |
|
"eval_f1": 0.6333236742973052, |
|
"eval_loss": 0.5245500802993774, |
|
"eval_precision": 0.6540233301136786, |
|
"eval_recall": 0.6256592107655937, |
|
"eval_runtime": 4.6575, |
|
"eval_samples_per_second": 85.668, |
|
"eval_steps_per_second": 10.735, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.8715531826019287, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5167, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7293233082706767, |
|
"eval_f1": 0.6854598540145985, |
|
"eval_loss": 0.521543025970459, |
|
"eval_precision": 0.6804511278195489, |
|
"eval_recall": 0.6934897254046191, |
|
"eval_runtime": 5.0566, |
|
"eval_samples_per_second": 78.907, |
|
"eval_steps_per_second": 9.888, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.720804691314697, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4984, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7443609022556391, |
|
"eval_f1": 0.6916257501363885, |
|
"eval_loss": 0.49747392535209656, |
|
"eval_precision": 0.6916257501363885, |
|
"eval_recall": 0.6916257501363885, |
|
"eval_runtime": 5.0553, |
|
"eval_samples_per_second": 78.927, |
|
"eval_steps_per_second": 9.891, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.2620692253112793, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4765, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7418546365914787, |
|
"eval_f1": 0.6619339448031918, |
|
"eval_loss": 0.4854496121406555, |
|
"eval_precision": 0.6836634025138848, |
|
"eval_recall": 0.6523458810692853, |
|
"eval_runtime": 5.0451, |
|
"eval_samples_per_second": 79.087, |
|
"eval_steps_per_second": 9.911, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.330729007720947, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4797, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7719298245614035, |
|
"eval_f1": 0.7320072332730561, |
|
"eval_loss": 0.485201358795166, |
|
"eval_precision": 0.7269805119926199, |
|
"eval_recall": 0.7386342971449354, |
|
"eval_runtime": 5.0467, |
|
"eval_samples_per_second": 79.062, |
|
"eval_steps_per_second": 9.907, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.9019949436187744, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4668, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7669172932330827, |
|
"eval_f1": 0.7195344091304183, |
|
"eval_loss": 0.47377943992614746, |
|
"eval_precision": 0.7189969238192895, |
|
"eval_recall": 0.7200854700854701, |
|
"eval_runtime": 5.0582, |
|
"eval_samples_per_second": 78.882, |
|
"eval_steps_per_second": 9.885, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 14.114830017089844, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.4622, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7719298245614035, |
|
"eval_f1": 0.7295157072938161, |
|
"eval_loss": 0.47686928510665894, |
|
"eval_precision": 0.7260557184750733, |
|
"eval_recall": 0.7336333878887071, |
|
"eval_runtime": 5.045, |
|
"eval_samples_per_second": 79.088, |
|
"eval_steps_per_second": 9.911, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.223310947418213, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4621, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7493734335839599, |
|
"eval_f1": 0.6685826300750881, |
|
"eval_loss": 0.46253928542137146, |
|
"eval_precision": 0.6949044585987261, |
|
"eval_recall": 0.6576650300054556, |
|
"eval_runtime": 5.052, |
|
"eval_samples_per_second": 78.979, |
|
"eval_steps_per_second": 9.897, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.2552032470703125, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.4561, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7769423558897243, |
|
"eval_f1": 0.7199470035725271, |
|
"eval_loss": 0.4609311521053314, |
|
"eval_precision": 0.7310853634383045, |
|
"eval_recall": 0.7121749408983451, |
|
"eval_runtime": 5.0567, |
|
"eval_samples_per_second": 78.905, |
|
"eval_steps_per_second": 9.888, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.524580240249634, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4519, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7669172932330827, |
|
"eval_f1": 0.6822078533807219, |
|
"eval_loss": 0.4608353078365326, |
|
"eval_precision": 0.7252321981424149, |
|
"eval_recall": 0.6675759228950718, |
|
"eval_runtime": 5.0558, |
|
"eval_samples_per_second": 78.919, |
|
"eval_steps_per_second": 9.89, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.3073718547821045, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.4413, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7694235588972431, |
|
"eval_f1": 0.7079992363497518, |
|
"eval_loss": 0.4543740451335907, |
|
"eval_precision": 0.7214646464646465, |
|
"eval_recall": 0.6993544280778323, |
|
"eval_runtime": 5.0512, |
|
"eval_samples_per_second": 78.992, |
|
"eval_steps_per_second": 9.899, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 6.039628982543945, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4449, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7844611528822055, |
|
"eval_f1": 0.7412841546534773, |
|
"eval_loss": 0.4569094777107239, |
|
"eval_precision": 0.7401260504201681, |
|
"eval_recall": 0.7424986361156574, |
|
"eval_runtime": 5.0491, |
|
"eval_samples_per_second": 79.023, |
|
"eval_steps_per_second": 9.903, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.8723957538604736, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.4506, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7644110275689223, |
|
"eval_f1": 0.6821309919316564, |
|
"eval_loss": 0.4527250826358795, |
|
"eval_precision": 0.7196598101265823, |
|
"eval_recall": 0.6683033278777959, |
|
"eval_runtime": 5.0532, |
|
"eval_samples_per_second": 78.959, |
|
"eval_steps_per_second": 9.895, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 2.8429393768310547, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.4446, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7794486215538847, |
|
"eval_f1": 0.7120834426659669, |
|
"eval_loss": 0.4487856924533844, |
|
"eval_precision": 0.7379122870605291, |
|
"eval_recall": 0.69894526277505, |
|
"eval_runtime": 5.0594, |
|
"eval_samples_per_second": 78.864, |
|
"eval_steps_per_second": 9.883, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 2.688943386077881, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.4426, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7869674185463659, |
|
"eval_f1": 0.7355039968804835, |
|
"eval_loss": 0.44907739758491516, |
|
"eval_precision": 0.7435604353145727, |
|
"eval_recall": 0.7292689579923622, |
|
"eval_runtime": 5.0481, |
|
"eval_samples_per_second": 79.039, |
|
"eval_steps_per_second": 9.905, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 2.842677593231201, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4409, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7719298245614035, |
|
"eval_f1": 0.7068474127421138, |
|
"eval_loss": 0.44651278853416443, |
|
"eval_precision": 0.725706313219393, |
|
"eval_recall": 0.696126568466994, |
|
"eval_runtime": 5.0391, |
|
"eval_samples_per_second": 79.18, |
|
"eval_steps_per_second": 9.922, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.7264163494110107, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.4348, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7869674185463659, |
|
"eval_f1": 0.7355039968804835, |
|
"eval_loss": 0.4473975598812103, |
|
"eval_precision": 0.7435604353145727, |
|
"eval_recall": 0.7292689579923622, |
|
"eval_runtime": 5.0762, |
|
"eval_samples_per_second": 78.602, |
|
"eval_steps_per_second": 9.85, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.3701038360595703, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4478, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7844611528822055, |
|
"eval_f1": 0.7301509908776345, |
|
"eval_loss": 0.4460136294364929, |
|
"eval_precision": 0.7407832589871425, |
|
"eval_recall": 0.7224949990907438, |
|
"eval_runtime": 5.1002, |
|
"eval_samples_per_second": 78.233, |
|
"eval_steps_per_second": 9.804, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.103198289871216, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.4382, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7869674185463659, |
|
"eval_f1": 0.7309977236133474, |
|
"eval_loss": 0.44484180212020874, |
|
"eval_precision": 0.7447157190635452, |
|
"eval_recall": 0.7217675941080197, |
|
"eval_runtime": 5.061, |
|
"eval_samples_per_second": 78.838, |
|
"eval_steps_per_second": 9.879, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 6.061123847961426, |
|
"learning_rate": 0.0, |
|
"loss": 0.4313, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7869674185463659, |
|
"eval_f1": 0.7325336550973572, |
|
"eval_loss": 0.445127934217453, |
|
"eval_precision": 0.7442562883739354, |
|
"eval_recall": 0.7242680487361338, |
|
"eval_runtime": 5.049, |
|
"eval_samples_per_second": 79.025, |
|
"eval_steps_per_second": 9.903, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7588990440528000.0, |
|
"train_loss": 0.4626342241881324, |
|
"train_runtime": 1944.0727, |
|
"train_samples_per_second": 37.427, |
|
"train_steps_per_second": 1.255 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7588990440528000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|