lilt-xlm-roberta-base-finetuned-DocLayNet-large_paragraphs_ml512-v1-cp5000
/
checkpoint-4700
/trainer_state.json
{ | |
"best_metric": 0.8966457253905286, | |
"best_model_checkpoint": "DocLayNet/lilt-xlm-roberta-base-finetuned-DocLayNet-large_paragraphs_ml512-v1\\checkpoint-3300", | |
"epoch": 0.24949570018048625, | |
"eval_steps": 100, | |
"global_step": 4700, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.01, | |
"eval_accuracy": 0.7151480521438768, | |
"eval_f1": 0.7151480521438768, | |
"eval_loss": 0.8854337930679321, | |
"eval_precision": 0.7151480521438768, | |
"eval_recall": 0.7151480521438768, | |
"eval_runtime": 178.7293, | |
"eval_samples_per_second": 89.291, | |
"eval_steps_per_second": 5.584, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.01, | |
"eval_accuracy": 0.7820361348614215, | |
"eval_f1": 0.7820361348614215, | |
"eval_loss": 0.6539337038993835, | |
"eval_precision": 0.7820361348614215, | |
"eval_recall": 0.7820361348614215, | |
"eval_runtime": 180.7707, | |
"eval_samples_per_second": 88.283, | |
"eval_steps_per_second": 5.521, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.02, | |
"eval_accuracy": 0.7994587607645203, | |
"eval_f1": 0.7994587607645203, | |
"eval_loss": 0.6309346556663513, | |
"eval_precision": 0.7994587607645203, | |
"eval_recall": 0.7994587607645203, | |
"eval_runtime": 181.7301, | |
"eval_samples_per_second": 87.817, | |
"eval_steps_per_second": 5.492, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.02, | |
"eval_accuracy": 0.7618061280264906, | |
"eval_f1": 0.7618061280264905, | |
"eval_loss": 0.6511958837509155, | |
"eval_precision": 0.7618061280264906, | |
"eval_recall": 0.7618061280264906, | |
"eval_runtime": 180.2148, | |
"eval_samples_per_second": 88.555, | |
"eval_steps_per_second": 5.538, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 1.9473404820044594e-05, | |
"loss": 0.7733, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.03, | |
"eval_accuracy": 0.8603838870222617, | |
"eval_f1": 0.8603838870222617, | |
"eval_loss": 0.45408895611763, | |
"eval_precision": 0.8603838870222617, | |
"eval_recall": 0.8603838870222617, | |
"eval_runtime": 179.2793, | |
"eval_samples_per_second": 89.018, | |
"eval_steps_per_second": 5.567, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.03, | |
"eval_accuracy": 0.8186640239589626, | |
"eval_f1": 0.8186640239589626, | |
"eval_loss": 0.588881254196167, | |
"eval_precision": 0.8186640239589626, | |
"eval_recall": 0.8186640239589626, | |
"eval_runtime": 178.9676, | |
"eval_samples_per_second": 89.173, | |
"eval_steps_per_second": 5.576, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.04, | |
"eval_accuracy": 0.8374099877288685, | |
"eval_f1": 0.8374099877288685, | |
"eval_loss": 0.5701665878295898, | |
"eval_precision": 0.8374099877288685, | |
"eval_recall": 0.8374099877288685, | |
"eval_runtime": 179.5722, | |
"eval_samples_per_second": 88.872, | |
"eval_steps_per_second": 5.558, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.04, | |
"eval_accuracy": 0.8645771022868747, | |
"eval_f1": 0.8645771022868747, | |
"eval_loss": 0.45831653475761414, | |
"eval_precision": 0.8645771022868747, | |
"eval_recall": 0.8645771022868747, | |
"eval_runtime": 178.4787, | |
"eval_samples_per_second": 89.417, | |
"eval_steps_per_second": 5.592, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.05, | |
"eval_accuracy": 0.8684816433991331, | |
"eval_f1": 0.8684816433991331, | |
"eval_loss": 0.4440499544143677, | |
"eval_precision": 0.8684816433991331, | |
"eval_recall": 0.8684816433991331, | |
"eval_runtime": 179.4524, | |
"eval_samples_per_second": 88.932, | |
"eval_steps_per_second": 5.561, | |
"step": 900 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 1.894256290476696e-05, | |
"loss": 0.4221, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.05, | |
"eval_accuracy": 0.8310532043326915, | |
"eval_f1": 0.8310532043326915, | |
"eval_loss": 0.49495837092399597, | |
"eval_precision": 0.8310532043326915, | |
"eval_recall": 0.8310532043326915, | |
"eval_runtime": 177.3364, | |
"eval_samples_per_second": 89.993, | |
"eval_steps_per_second": 5.628, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.06, | |
"eval_accuracy": 0.8420863105955319, | |
"eval_f1": 0.8420863105955319, | |
"eval_loss": 0.563329815864563, | |
"eval_precision": 0.8420863105955319, | |
"eval_recall": 0.8420863105955319, | |
"eval_runtime": 177.8626, | |
"eval_samples_per_second": 89.727, | |
"eval_steps_per_second": 5.611, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 0.06, | |
"eval_accuracy": 0.850556069746056, | |
"eval_f1": 0.850556069746056, | |
"eval_loss": 0.4411052465438843, | |
"eval_precision": 0.850556069746056, | |
"eval_recall": 0.850556069746056, | |
"eval_runtime": 177.3605, | |
"eval_samples_per_second": 89.981, | |
"eval_steps_per_second": 5.627, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 0.07, | |
"eval_accuracy": 0.8742392543278803, | |
"eval_f1": 0.8742392543278803, | |
"eval_loss": 0.4354064464569092, | |
"eval_precision": 0.8742392543278803, | |
"eval_recall": 0.8742392543278803, | |
"eval_runtime": 177.2446, | |
"eval_samples_per_second": 90.039, | |
"eval_steps_per_second": 5.631, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 0.07, | |
"eval_accuracy": 0.8777866727774323, | |
"eval_f1": 0.8777866727774322, | |
"eval_loss": 0.3980981111526489, | |
"eval_precision": 0.8777866727774323, | |
"eval_recall": 0.8777866727774323, | |
"eval_runtime": 177.4209, | |
"eval_samples_per_second": 89.95, | |
"eval_steps_per_second": 5.625, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 1.8412782673319888e-05, | |
"loss": 0.3623, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 0.08, | |
"eval_accuracy": 0.8690242714449679, | |
"eval_f1": 0.8690242714449679, | |
"eval_loss": 0.42456910014152527, | |
"eval_precision": 0.8690242714449679, | |
"eval_recall": 0.8690242714449679, | |
"eval_runtime": 177.8504, | |
"eval_samples_per_second": 89.733, | |
"eval_steps_per_second": 5.611, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 0.08, | |
"eval_accuracy": 0.8831891450582655, | |
"eval_f1": 0.8831891450582655, | |
"eval_loss": 0.4083055853843689, | |
"eval_precision": 0.8831891450582655, | |
"eval_recall": 0.8831891450582655, | |
"eval_runtime": 177.1688, | |
"eval_samples_per_second": 90.078, | |
"eval_steps_per_second": 5.633, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 0.09, | |
"eval_accuracy": 0.8346978395070913, | |
"eval_f1": 0.8346978395070913, | |
"eval_loss": 0.5854523181915283, | |
"eval_precision": 0.8346978395070913, | |
"eval_recall": 0.8346978395070913, | |
"eval_runtime": 177.5366, | |
"eval_samples_per_second": 89.891, | |
"eval_steps_per_second": 5.621, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 0.1, | |
"eval_accuracy": 0.8867871558850343, | |
"eval_f1": 0.8867871558850343, | |
"eval_loss": 0.39148494601249695, | |
"eval_precision": 0.8867871558850343, | |
"eval_recall": 0.8867871558850343, | |
"eval_runtime": 177.8685, | |
"eval_samples_per_second": 89.724, | |
"eval_steps_per_second": 5.611, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 0.1, | |
"eval_accuracy": 0.8668835194835213, | |
"eval_f1": 0.8668835194835214, | |
"eval_loss": 0.44608378410339355, | |
"eval_precision": 0.8668835194835213, | |
"eval_recall": 0.8668835194835213, | |
"eval_runtime": 177.5721, | |
"eval_samples_per_second": 89.873, | |
"eval_steps_per_second": 5.62, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 1.7881940758042255e-05, | |
"loss": 0.3762, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 0.11, | |
"eval_accuracy": 0.8843170574679805, | |
"eval_f1": 0.8843170574679804, | |
"eval_loss": 0.3827630281448364, | |
"eval_precision": 0.8843170574679805, | |
"eval_recall": 0.8843170574679805, | |
"eval_runtime": 177.7736, | |
"eval_samples_per_second": 89.771, | |
"eval_steps_per_second": 5.614, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 0.11, | |
"eval_accuracy": 0.8833111619680236, | |
"eval_f1": 0.8833111619680236, | |
"eval_loss": 0.41039273142814636, | |
"eval_precision": 0.8833111619680236, | |
"eval_recall": 0.8833111619680236, | |
"eval_runtime": 177.7324, | |
"eval_samples_per_second": 89.792, | |
"eval_steps_per_second": 5.615, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 0.12, | |
"eval_accuracy": 0.8817051019932405, | |
"eval_f1": 0.8817051019932405, | |
"eval_loss": 0.41124606132507324, | |
"eval_precision": 0.8817051019932405, | |
"eval_recall": 0.8817051019932405, | |
"eval_runtime": 177.9802, | |
"eval_samples_per_second": 89.667, | |
"eval_steps_per_second": 5.607, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 0.12, | |
"eval_accuracy": 0.8908226419736186, | |
"eval_f1": 0.8908226419736186, | |
"eval_loss": 0.3715788424015045, | |
"eval_precision": 0.8908226419736186, | |
"eval_recall": 0.8908226419736186, | |
"eval_runtime": 181.2362, | |
"eval_samples_per_second": 88.056, | |
"eval_steps_per_second": 5.507, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 0.13, | |
"eval_accuracy": 0.8952380668950267, | |
"eval_f1": 0.8952380668950267, | |
"eval_loss": 0.38714146614074707, | |
"eval_precision": 0.8952380668950267, | |
"eval_recall": 0.8952380668950267, | |
"eval_runtime": 185.8786, | |
"eval_samples_per_second": 85.857, | |
"eval_steps_per_second": 5.369, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 1.7351098842764628e-05, | |
"loss": 0.3049, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 0.13, | |
"eval_accuracy": 0.8558087489100319, | |
"eval_f1": 0.8558087489100319, | |
"eval_loss": 0.5354240536689758, | |
"eval_precision": 0.8558087489100319, | |
"eval_recall": 0.8558087489100319, | |
"eval_runtime": 179.1088, | |
"eval_samples_per_second": 89.102, | |
"eval_steps_per_second": 5.572, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 0.14, | |
"eval_accuracy": 0.8584157443477899, | |
"eval_f1": 0.8584157443477899, | |
"eval_loss": 0.5085635781288147, | |
"eval_precision": 0.8584157443477899, | |
"eval_recall": 0.8584157443477899, | |
"eval_runtime": 178.0144, | |
"eval_samples_per_second": 89.65, | |
"eval_steps_per_second": 5.606, | |
"step": 2600 | |
}, | |
{ | |
"epoch": 0.14, | |
"eval_accuracy": 0.8527087257962596, | |
"eval_f1": 0.8527087257962596, | |
"eval_loss": 0.5149290561676025, | |
"eval_precision": 0.8527087257962596, | |
"eval_recall": 0.8527087257962596, | |
"eval_runtime": 178.6366, | |
"eval_samples_per_second": 89.338, | |
"eval_steps_per_second": 5.587, | |
"step": 2700 | |
}, | |
{ | |
"epoch": 0.15, | |
"eval_accuracy": 0.8276535949852042, | |
"eval_f1": 0.8276535949852042, | |
"eval_loss": 0.7321985363960266, | |
"eval_precision": 0.8276535949852042, | |
"eval_recall": 0.8276535949852042, | |
"eval_runtime": 180.6283, | |
"eval_samples_per_second": 88.353, | |
"eval_steps_per_second": 5.525, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 0.15, | |
"eval_accuracy": 0.8446089854045952, | |
"eval_f1": 0.8446089854045952, | |
"eval_loss": 0.5994013547897339, | |
"eval_precision": 0.8446089854045952, | |
"eval_recall": 0.8446089854045952, | |
"eval_runtime": 181.3155, | |
"eval_samples_per_second": 88.018, | |
"eval_steps_per_second": 5.504, | |
"step": 2900 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 1.6820256927486995e-05, | |
"loss": 0.272, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 0.16, | |
"eval_accuracy": 0.8696780043192002, | |
"eval_f1": 0.8696780043192002, | |
"eval_loss": 0.5098685622215271, | |
"eval_precision": 0.8696780043192002, | |
"eval_recall": 0.8696780043192002, | |
"eval_runtime": 182.3242, | |
"eval_samples_per_second": 87.531, | |
"eval_steps_per_second": 5.474, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 0.16, | |
"eval_accuracy": 0.8661514180249728, | |
"eval_f1": 0.8661514180249729, | |
"eval_loss": 0.46759557723999023, | |
"eval_precision": 0.8661514180249728, | |
"eval_recall": 0.8661514180249728, | |
"eval_runtime": 185.214, | |
"eval_samples_per_second": 86.165, | |
"eval_steps_per_second": 5.388, | |
"step": 3100 | |
}, | |
{ | |
"epoch": 0.17, | |
"eval_accuracy": 0.8626764161153585, | |
"eval_f1": 0.8626764161153585, | |
"eval_loss": 0.5452213287353516, | |
"eval_precision": 0.8626764161153585, | |
"eval_recall": 0.8626764161153585, | |
"eval_runtime": 181.9867, | |
"eval_samples_per_second": 87.693, | |
"eval_steps_per_second": 5.484, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 0.18, | |
"eval_accuracy": 0.8966457253905286, | |
"eval_f1": 0.8966457253905286, | |
"eval_loss": 0.38049089908599854, | |
"eval_precision": 0.8966457253905286, | |
"eval_recall": 0.8966457253905286, | |
"eval_runtime": 183.5682, | |
"eval_samples_per_second": 86.938, | |
"eval_steps_per_second": 5.437, | |
"step": 3300 | |
}, | |
{ | |
"epoch": 0.18, | |
"eval_accuracy": 0.887366488204536, | |
"eval_f1": 0.887366488204536, | |
"eval_loss": 0.41261956095695496, | |
"eval_precision": 0.887366488204536, | |
"eval_recall": 0.887366488204536, | |
"eval_runtime": 186.6798, | |
"eval_samples_per_second": 85.489, | |
"eval_steps_per_second": 5.346, | |
"step": 3400 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 1.629047669603992e-05, | |
"loss": 0.3686, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 0.19, | |
"eval_accuracy": 0.8557442684292654, | |
"eval_f1": 0.8557442684292654, | |
"eval_loss": 0.5042837262153625, | |
"eval_precision": 0.8557442684292654, | |
"eval_recall": 0.8557442684292654, | |
"eval_runtime": 186.7579, | |
"eval_samples_per_second": 85.453, | |
"eval_steps_per_second": 5.344, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 0.19, | |
"eval_accuracy": 0.8617965055547454, | |
"eval_f1": 0.8617965055547454, | |
"eval_loss": 0.46751198172569275, | |
"eval_precision": 0.8617965055547454, | |
"eval_recall": 0.8617965055547454, | |
"eval_runtime": 198.6805, | |
"eval_samples_per_second": 80.325, | |
"eval_steps_per_second": 5.023, | |
"step": 3600 | |
}, | |
{ | |
"epoch": 0.2, | |
"eval_accuracy": 0.8682514976831667, | |
"eval_f1": 0.8682514976831667, | |
"eval_loss": 0.4736296832561493, | |
"eval_precision": 0.8682514976831667, | |
"eval_recall": 0.8682514976831667, | |
"eval_runtime": 198.48, | |
"eval_samples_per_second": 80.406, | |
"eval_steps_per_second": 5.028, | |
"step": 3700 | |
}, | |
{ | |
"epoch": 0.2, | |
"eval_accuracy": 0.8476574241337543, | |
"eval_f1": 0.8476574241337542, | |
"eval_loss": 0.5243175029754639, | |
"eval_precision": 0.8476574241337543, | |
"eval_recall": 0.8476574241337543, | |
"eval_runtime": 193.5062, | |
"eval_samples_per_second": 82.473, | |
"eval_steps_per_second": 5.157, | |
"step": 3800 | |
}, | |
{ | |
"epoch": 0.21, | |
"eval_accuracy": 0.8786953515525412, | |
"eval_f1": 0.8786953515525412, | |
"eval_loss": 0.4317740201950073, | |
"eval_precision": 0.8786953515525412, | |
"eval_recall": 0.8786953515525412, | |
"eval_runtime": 191.9223, | |
"eval_samples_per_second": 83.153, | |
"eval_steps_per_second": 5.2, | |
"step": 3900 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 1.575963478076229e-05, | |
"loss": 0.2712, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 0.21, | |
"eval_accuracy": 0.8783630290747447, | |
"eval_f1": 0.8783630290747447, | |
"eval_loss": 0.43737560510635376, | |
"eval_precision": 0.8783630290747447, | |
"eval_recall": 0.8783630290747447, | |
"eval_runtime": 190.6186, | |
"eval_samples_per_second": 83.722, | |
"eval_steps_per_second": 5.236, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 0.22, | |
"eval_accuracy": 0.8703069370085224, | |
"eval_f1": 0.8703069370085224, | |
"eval_loss": 0.5073068737983704, | |
"eval_precision": 0.8703069370085224, | |
"eval_recall": 0.8703069370085224, | |
"eval_runtime": 186.0675, | |
"eval_samples_per_second": 85.77, | |
"eval_steps_per_second": 5.364, | |
"step": 4100 | |
}, | |
{ | |
"epoch": 0.22, | |
"eval_accuracy": 0.8908900984765742, | |
"eval_f1": 0.8908900984765742, | |
"eval_loss": 0.4120965301990509, | |
"eval_precision": 0.8908900984765742, | |
"eval_recall": 0.8908900984765742, | |
"eval_runtime": 178.3397, | |
"eval_samples_per_second": 89.487, | |
"eval_steps_per_second": 5.596, | |
"step": 4200 | |
}, | |
{ | |
"epoch": 0.23, | |
"eval_accuracy": 0.8742471903870516, | |
"eval_f1": 0.8742471903870516, | |
"eval_loss": 0.481146901845932, | |
"eval_precision": 0.8742471903870516, | |
"eval_recall": 0.8742471903870516, | |
"eval_runtime": 179.3822, | |
"eval_samples_per_second": 88.966, | |
"eval_steps_per_second": 5.564, | |
"step": 4300 | |
}, | |
{ | |
"epoch": 0.23, | |
"eval_accuracy": 0.8648419682617153, | |
"eval_f1": 0.8648419682617154, | |
"eval_loss": 0.5382417440414429, | |
"eval_precision": 0.8648419682617153, | |
"eval_recall": 0.8648419682617153, | |
"eval_runtime": 180.6189, | |
"eval_samples_per_second": 88.357, | |
"eval_steps_per_second": 5.525, | |
"step": 4400 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 1.5228792865484661e-05, | |
"loss": 0.2641, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 0.24, | |
"eval_accuracy": 0.8675372523577536, | |
"eval_f1": 0.8675372523577536, | |
"eval_loss": 0.5521109104156494, | |
"eval_precision": 0.8675372523577536, | |
"eval_recall": 0.8675372523577536, | |
"eval_runtime": 178.45, | |
"eval_samples_per_second": 89.431, | |
"eval_steps_per_second": 5.593, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 0.24, | |
"eval_accuracy": 0.8621943005207047, | |
"eval_f1": 0.8621943005207047, | |
"eval_loss": 0.5831220149993896, | |
"eval_precision": 0.8621943005207047, | |
"eval_recall": 0.8621943005207047, | |
"eval_runtime": 258.7275, | |
"eval_samples_per_second": 61.683, | |
"eval_steps_per_second": 3.857, | |
"step": 4600 | |
}, | |
{ | |
"epoch": 0.25, | |
"eval_accuracy": 0.871682851267339, | |
"eval_f1": 0.871682851267339, | |
"eval_loss": 0.46240246295928955, | |
"eval_precision": 0.871682851267339, | |
"eval_recall": 0.871682851267339, | |
"eval_runtime": 2013.7143, | |
"eval_samples_per_second": 7.925, | |
"eval_steps_per_second": 0.496, | |
"step": 4700 | |
} | |
], | |
"logging_steps": 500, | |
"max_steps": 18838, | |
"num_train_epochs": 1, | |
"save_steps": 100, | |
"total_flos": 1.04592761806848e+16, | |
"trial_name": null, | |
"trial_params": null | |
} | |