File size: 11,092 Bytes
40603e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
2023-02-22 10:29:59,083 [INFO ] |experiment_dir |gigafida_multi_case_fg_sloberta256 |
2023-02-22 10:29:59,083 [INFO ] |train_path |...an-publishers-mala-velika-multilabel/train.json|
2023-02-22 10:29:59,083 [INFO ] |dev_path |...lean-publishers-mala-velika-multilabel/dev.json|
2023-02-22 10:29:59,083 [INFO ] |pretrained_name_or_path |EMBEDDIA/sloberta |
2023-02-22 10:29:59,083 [INFO ] |max_length |256 |
2023-02-22 10:29:59,083 [INFO ] |num_epochs |3 |
2023-02-22 10:29:59,083 [INFO ] |learning_rate |2e-05 |
2023-02-22 10:29:59,083 [INFO ] |batch_size |32 |
2023-02-22 10:29:59,083 [INFO ] |validate_every_n_steps |300000 |
2023-02-22 10:29:59,084 [INFO ] |early_stopping_rounds |999 |
2023-02-22 10:29:59,084 [INFO ] |input_column |words_lc |
2023-02-22 10:29:59,084 [INFO ] |target_column |case |
2023-02-22 10:29:59,084 [INFO ] |class_encoding_path |...-publishers-mala-velika-multilabel/classes.json|
2023-02-22 10:29:59,084 [INFO ] |use_cpu |False |
2023-02-22 10:29:59,084 [INFO ] Using class encoding:
{
"LOWER_OTHER": 0,
"LOWER_HYPERCORRECTION": 1,
"LOWER_ADJ_SKI": 2,
"LOWER_ENTITY_PART": 3,
"UPPER_OTHER": 4,
"UPPER_BEGIN": 5,
"UPPER_ENTITY": 6,
"UPPER_DIRECT_SPEECH": 7,
"UPPER_ADJ_OTHER": 8,
"UPPER_ALLUC_OTHER": 9,
"UPPER_ALLUC_BEGIN": 10,
"UPPER_ALLUC_ENTITY": 11,
"UNCAP": -100
}
2023-02-22 13:34:10,808 [INFO ] Loaded 2652880 training examples, 166737 validation examples.
2023-02-22 13:34:10,912 [INFO ] Epoch #1
2023-02-22 13:34:10,913 [INFO ] Subset #1
2023-02-22 14:52:36,449 [INFO ] Training loss: 0.0271
2023-02-22 15:10:55,902 [INFO ] Dev macro F1 = 0.8745
2023-02-22 15:10:55,903 [INFO ] New best macro F1 = 0.8745
([0.995, 0.7376, 0.9809, 0.7119, 0.643, 0.9873, 0.9536, 0.9807, 0.9202, 0.7135, 0.9679, 0.9025]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-22 15:10:57,034 [INFO ] Subset #2
2023-02-22 16:29:16,755 [INFO ] Training loss: 0.0174
2023-02-22 16:47:35,042 [INFO ] Dev macro F1 = 0.8908
2023-02-22 16:47:35,043 [INFO ] New best macro F1 = 0.8908
([0.9956, 0.7957, 0.9821, 0.7471, 0.6683, 0.9887, 0.9579, 0.9832, 0.9285, 0.756, 0.9706, 0.9165]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-22 16:47:36,125 [INFO ] Subset #3
2023-02-22 18:05:55,544 [INFO ] Training loss: 0.0139
2023-02-22 18:24:12,701 [INFO ] Dev macro F1 = 0.8978
2023-02-22 18:24:12,702 [INFO ] New best macro F1 = 0.8978
([0.9958, 0.7877, 0.9835, 0.7566, 0.6912, 0.9891, 0.9604, 0.9836, 0.9317, 0.7991, 0.974, 0.9208]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-22 18:24:13,804 [INFO ] Subset #4
2023-02-22 19:42:27,790 [INFO ] Training loss: 0.0121
2023-02-22 20:00:41,833 [INFO ] Dev macro F1 = 0.9007
2023-02-22 20:00:41,834 [INFO ] New best macro F1 = 0.9007
([0.9959, 0.8197, 0.984, 0.7597, 0.6966, 0.9893, 0.9616, 0.9835, 0.9334, 0.7914, 0.9737, 0.9193]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-22 20:00:43,000 [INFO ] Subset #5
2023-02-22 21:18:59,194 [INFO ] Training loss: 0.0110
2023-02-22 21:37:11,791 [INFO ] Dev macro F1 = 0.9038
2023-02-22 21:37:11,792 [INFO ] New best macro F1 = 0.9038
([0.9961, 0.8296, 0.9838, 0.7709, 0.6851, 0.9899, 0.9623, 0.9813, 0.9343, 0.8091, 0.9761, 0.9268]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-22 21:37:12,829 [INFO ] Subset #6
2023-02-22 22:55:32,855 [INFO ] Training loss: 0.0102
2023-02-22 23:13:49,305 [INFO ] Dev macro F1 = 0.9084
2023-02-22 23:13:49,306 [INFO ] New best macro F1 = 0.9084
([0.9961, 0.8353, 0.9846, 0.7777, 0.713, 0.9898, 0.9635, 0.9847, 0.9374, 0.8131, 0.9764, 0.9293]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-22 23:13:50,334 [INFO ] Subset #7
2023-02-23 00:32:18,993 [INFO ] Training loss: 0.0096
2023-02-23 00:50:32,151 [INFO ] Dev macro F1 = 0.9085
2023-02-23 00:50:32,151 [INFO ] New best macro F1 = 0.9085
([0.9961, 0.8392, 0.9849, 0.7667, 0.7116, 0.9899, 0.9639, 0.9852, 0.9376, 0.8197, 0.9774, 0.9299]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 00:50:33,174 [INFO ] Subset #8
2023-02-23 02:08:49,459 [INFO ] Training loss: 0.0092
2023-02-23 02:27:04,519 [INFO ] Dev macro F1 = 0.9106
2023-02-23 02:27:04,521 [INFO ] New best macro F1 = 0.9106
([0.9963, 0.8464, 0.9853, 0.7808, 0.7133, 0.99, 0.9645, 0.9851, 0.9372, 0.8192, 0.9771, 0.9321]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 02:27:05,553 [INFO ] Subset #9
2023-02-23 03:33:09,800 [INFO ] Training loss: 0.0089
2023-02-23 03:51:29,228 [INFO ] Dev macro F1 = 0.9117
2023-02-23 03:51:29,228 [INFO ] New best macro F1 = 0.9117
([0.9963, 0.8437, 0.9852, 0.7787, 0.7232, 0.9901, 0.9643, 0.9851, 0.9391, 0.8272, 0.9777, 0.9298]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 03:51:30,347 [INFO ] Epoch #2
2023-02-23 03:51:30,347 [INFO ] Subset #1
2023-02-23 05:09:58,233 [INFO ] Training loss: 0.0052
2023-02-23 05:28:16,763 [INFO ] Dev macro F1 = 0.9133
2023-02-23 05:28:16,763 [INFO ] New best macro F1 = 0.9133
([0.9963, 0.8438, 0.9854, 0.7847, 0.7282, 0.99, 0.9647, 0.9855, 0.9393, 0.8333, 0.9776, 0.9303]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 05:28:17,805 [INFO ] Subset #2
2023-02-23 06:46:44,412 [INFO ] Training loss: 0.0052
2023-02-23 07:04:59,668 [INFO ] Dev macro F1 = 0.9145
2023-02-23 07:04:59,668 [INFO ] New best macro F1 = 0.9145
([0.9963, 0.8482, 0.9857, 0.7795, 0.7377, 0.99, 0.9652, 0.9852, 0.9405, 0.8362, 0.9778, 0.9312]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 07:05:00,671 [INFO ] Subset #3
2023-02-23 08:23:23,919 [INFO ] Training loss: 0.0052
2023-02-23 08:41:39,100 [INFO ] Dev macro F1 = 0.9148
2023-02-23 08:41:39,101 [INFO ] New best macro F1 = 0.9148
([0.9962, 0.8421, 0.9853, 0.7931, 0.7393, 0.9901, 0.9652, 0.9851, 0.9392, 0.8313, 0.9772, 0.9331]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 08:41:40,114 [INFO ] Subset #4
2023-02-23 10:00:17,635 [INFO ] Training loss: 0.0052
2023-02-23 10:18:31,362 [INFO ] Dev macro F1 = 0.9161
2023-02-23 10:18:31,362 [INFO ] New best macro F1 = 0.9161
([0.9964, 0.851, 0.9857, 0.7905, 0.7375, 0.9902, 0.966, 0.9853, 0.9413, 0.8361, 0.9779, 0.9347]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 10:18:32,356 [INFO ] Subset #5
2023-02-23 11:36:54,299 [INFO ] Training loss: 0.0052
2023-02-23 11:55:05,378 [INFO ] Dev macro F1 = 0.9147
2023-02-23 11:55:05,379 [INFO ] Subset #6
2023-02-23 13:13:16,174 [INFO ] Training loss: 0.0052
2023-02-23 13:31:28,612 [INFO ] Dev macro F1 = 0.9139
2023-02-23 13:31:28,613 [INFO ] Subset #7
2023-02-23 14:49:40,826 [INFO ] Training loss: 0.0052
2023-02-23 15:07:51,390 [INFO ] Dev macro F1 = 0.9159
2023-02-23 15:07:51,390 [INFO ] Subset #8
2023-02-23 16:25:51,975 [INFO ] Training loss: 0.0052
2023-02-23 16:44:06,088 [INFO ] Dev macro F1 = 0.9163
2023-02-23 16:44:06,089 [INFO ] New best macro F1 = 0.9163
([0.9963, 0.8482, 0.9861, 0.7968, 0.7263, 0.9904, 0.9654, 0.9859, 0.9423, 0.8414, 0.979, 0.9371]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 16:44:07,077 [INFO ] Subset #9
2023-02-23 17:49:59,943 [INFO ] Training loss: 0.0052
2023-02-23 18:08:12,023 [INFO ] Dev macro F1 = 0.9175
2023-02-23 18:08:12,024 [INFO ] New best macro F1 = 0.9175
([0.9965, 0.8428, 0.9859, 0.7962, 0.7459, 0.9904, 0.967, 0.9854, 0.9427, 0.8409, 0.9785, 0.9373]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 18:08:13,106 [INFO ] Epoch #3
2023-02-23 18:08:13,107 [INFO ] Subset #1
2023-02-23 19:26:19,659 [INFO ] Training loss: 0.0042
2023-02-23 19:44:30,411 [INFO ] Dev macro F1 = 0.9180
2023-02-23 19:44:30,412 [INFO ] New best macro F1 = 0.9180
([0.9964, 0.8528, 0.9858, 0.7957, 0.7448, 0.9903, 0.9662, 0.9849, 0.9417, 0.8425, 0.9784, 0.9365]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 19:44:31,428 [INFO ] Subset #2
2023-02-23 21:02:32,847 [INFO ] Training loss: 0.0042
2023-02-23 21:20:42,523 [INFO ] Dev macro F1 = 0.9171
2023-02-23 21:20:42,523 [INFO ] Subset #3
2023-02-23 22:38:50,203 [INFO ] Training loss: 0.0043
2023-02-23 22:57:02,020 [INFO ] Dev macro F1 = 0.9191
2023-02-23 22:57:02,020 [INFO ] New best macro F1 = 0.9191
([0.9964, 0.8509, 0.9858, 0.7998, 0.7527, 0.9903, 0.9666, 0.9855, 0.9421, 0.8418, 0.9791, 0.9379]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 22:57:03,088 [INFO ] Subset #4
2023-02-24 00:15:28,073 [INFO ] Training loss: 0.0043
2023-02-24 00:33:40,728 [INFO ] Dev macro F1 = 0.9189
2023-02-24 00:33:40,729 [INFO ] Subset #5
2023-02-24 01:52:05,939 [INFO ] Training loss: 0.0043
2023-02-24 02:10:18,483 [INFO ] Dev macro F1 = 0.9184
2023-02-24 02:10:18,484 [INFO ] Subset #6
2023-02-24 03:28:31,701 [INFO ] Training loss: 0.0043
2023-02-24 03:46:44,872 [INFO ] Dev macro F1 = 0.9199
2023-02-24 03:46:44,873 [INFO ] New best macro F1 = 0.9199
([0.9965, 0.8544, 0.9862, 0.7997, 0.7532, 0.9905, 0.967, 0.9849, 0.943, 0.8459, 0.9797, 0.9384]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-24 03:46:45,868 [INFO ] Subset #7
2023-02-24 05:04:57,214 [INFO ] Training loss: 0.0043
2023-02-24 05:23:09,199 [INFO ] Dev macro F1 = 0.9177
2023-02-24 05:23:09,200 [INFO ] Subset #8
2023-02-24 06:41:25,821 [INFO ] Training loss: 0.0043
2023-02-24 06:59:39,709 [INFO ] Dev macro F1 = 0.9190
2023-02-24 06:59:39,710 [INFO ] Subset #9
2023-02-24 08:05:25,875 [INFO ] Training loss: 0.0043
2023-02-24 08:23:40,031 [INFO ] Dev macro F1 = 0.9184
2023-02-24 08:23:40,031 [INFO ] Training took 154169.21s
2023-02-24 08:23:40,031 [INFO ] Best dev F1: 0.9199, using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
|