2023-02-22 10:29:59,083 [INFO ] |experiment_dir |gigafida_multi_case_fg_sloberta256 | 2023-02-22 10:29:59,083 [INFO ] |train_path |...an-publishers-mala-velika-multilabel/train.json| 2023-02-22 10:29:59,083 [INFO ] |dev_path |...lean-publishers-mala-velika-multilabel/dev.json| 2023-02-22 10:29:59,083 [INFO ] |pretrained_name_or_path |EMBEDDIA/sloberta | 2023-02-22 10:29:59,083 [INFO ] |max_length |256 | 2023-02-22 10:29:59,083 [INFO ] |num_epochs |3 | 2023-02-22 10:29:59,083 [INFO ] |learning_rate |2e-05 | 2023-02-22 10:29:59,083 [INFO ] |batch_size |32 | 2023-02-22 10:29:59,083 [INFO ] |validate_every_n_steps |300000 | 2023-02-22 10:29:59,084 [INFO ] |early_stopping_rounds |999 | 2023-02-22 10:29:59,084 [INFO ] |input_column |words_lc | 2023-02-22 10:29:59,084 [INFO ] |target_column |case | 2023-02-22 10:29:59,084 [INFO ] |class_encoding_path |...-publishers-mala-velika-multilabel/classes.json| 2023-02-22 10:29:59,084 [INFO ] |use_cpu |False | 2023-02-22 10:29:59,084 [INFO ] Using class encoding: { "LOWER_OTHER": 0, "LOWER_HYPERCORRECTION": 1, "LOWER_ADJ_SKI": 2, "LOWER_ENTITY_PART": 3, "UPPER_OTHER": 4, "UPPER_BEGIN": 5, "UPPER_ENTITY": 6, "UPPER_DIRECT_SPEECH": 7, "UPPER_ADJ_OTHER": 8, "UPPER_ALLUC_OTHER": 9, "UPPER_ALLUC_BEGIN": 10, "UPPER_ALLUC_ENTITY": 11, "UNCAP": -100 } 2023-02-22 13:34:10,808 [INFO ] Loaded 2652880 training examples, 166737 validation examples. 2023-02-22 13:34:10,912 [INFO ] Epoch #1 2023-02-22 13:34:10,913 [INFO ] Subset #1 2023-02-22 14:52:36,449 [INFO ] Training loss: 0.0271 2023-02-22 15:10:55,902 [INFO ] Dev macro F1 = 0.8745 2023-02-22 15:10:55,903 [INFO ] New best macro F1 = 0.8745 ([0.995, 0.7376, 0.9809, 0.7119, 0.643, 0.9873, 0.9536, 0.9807, 0.9202, 0.7135, 0.9679, 0.9025]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-22 15:10:57,034 [INFO ] Subset #2 2023-02-22 16:29:16,755 [INFO ] Training loss: 0.0174 2023-02-22 16:47:35,042 [INFO ] Dev macro F1 = 0.8908 2023-02-22 16:47:35,043 [INFO ] New best macro F1 = 0.8908 ([0.9956, 0.7957, 0.9821, 0.7471, 0.6683, 0.9887, 0.9579, 0.9832, 0.9285, 0.756, 0.9706, 0.9165]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-22 16:47:36,125 [INFO ] Subset #3 2023-02-22 18:05:55,544 [INFO ] Training loss: 0.0139 2023-02-22 18:24:12,701 [INFO ] Dev macro F1 = 0.8978 2023-02-22 18:24:12,702 [INFO ] New best macro F1 = 0.8978 ([0.9958, 0.7877, 0.9835, 0.7566, 0.6912, 0.9891, 0.9604, 0.9836, 0.9317, 0.7991, 0.974, 0.9208]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-22 18:24:13,804 [INFO ] Subset #4 2023-02-22 19:42:27,790 [INFO ] Training loss: 0.0121 2023-02-22 20:00:41,833 [INFO ] Dev macro F1 = 0.9007 2023-02-22 20:00:41,834 [INFO ] New best macro F1 = 0.9007 ([0.9959, 0.8197, 0.984, 0.7597, 0.6966, 0.9893, 0.9616, 0.9835, 0.9334, 0.7914, 0.9737, 0.9193]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-22 20:00:43,000 [INFO ] Subset #5 2023-02-22 21:18:59,194 [INFO ] Training loss: 0.0110 2023-02-22 21:37:11,791 [INFO ] Dev macro F1 = 0.9038 2023-02-22 21:37:11,792 [INFO ] New best macro F1 = 0.9038 ([0.9961, 0.8296, 0.9838, 0.7709, 0.6851, 0.9899, 0.9623, 0.9813, 0.9343, 0.8091, 0.9761, 0.9268]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-22 21:37:12,829 [INFO ] Subset #6 2023-02-22 22:55:32,855 [INFO ] Training loss: 0.0102 2023-02-22 23:13:49,305 [INFO ] Dev macro F1 = 0.9084 2023-02-22 23:13:49,306 [INFO ] New best macro F1 = 0.9084 ([0.9961, 0.8353, 0.9846, 0.7777, 0.713, 0.9898, 0.9635, 0.9847, 0.9374, 0.8131, 0.9764, 0.9293]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-22 23:13:50,334 [INFO ] Subset #7 2023-02-23 00:32:18,993 [INFO ] Training loss: 0.0096 2023-02-23 00:50:32,151 [INFO ] Dev macro F1 = 0.9085 2023-02-23 00:50:32,151 [INFO ] New best macro F1 = 0.9085 ([0.9961, 0.8392, 0.9849, 0.7667, 0.7116, 0.9899, 0.9639, 0.9852, 0.9376, 0.8197, 0.9774, 0.9299]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-23 00:50:33,174 [INFO ] Subset #8 2023-02-23 02:08:49,459 [INFO ] Training loss: 0.0092 2023-02-23 02:27:04,519 [INFO ] Dev macro F1 = 0.9106 2023-02-23 02:27:04,521 [INFO ] New best macro F1 = 0.9106 ([0.9963, 0.8464, 0.9853, 0.7808, 0.7133, 0.99, 0.9645, 0.9851, 0.9372, 0.8192, 0.9771, 0.9321]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-23 02:27:05,553 [INFO ] Subset #9 2023-02-23 03:33:09,800 [INFO ] Training loss: 0.0089 2023-02-23 03:51:29,228 [INFO ] Dev macro F1 = 0.9117 2023-02-23 03:51:29,228 [INFO ] New best macro F1 = 0.9117 ([0.9963, 0.8437, 0.9852, 0.7787, 0.7232, 0.9901, 0.9643, 0.9851, 0.9391, 0.8272, 0.9777, 0.9298]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-23 03:51:30,347 [INFO ] Epoch #2 2023-02-23 03:51:30,347 [INFO ] Subset #1 2023-02-23 05:09:58,233 [INFO ] Training loss: 0.0052 2023-02-23 05:28:16,763 [INFO ] Dev macro F1 = 0.9133 2023-02-23 05:28:16,763 [INFO ] New best macro F1 = 0.9133 ([0.9963, 0.8438, 0.9854, 0.7847, 0.7282, 0.99, 0.9647, 0.9855, 0.9393, 0.8333, 0.9776, 0.9303]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-23 05:28:17,805 [INFO ] Subset #2 2023-02-23 06:46:44,412 [INFO ] Training loss: 0.0052 2023-02-23 07:04:59,668 [INFO ] Dev macro F1 = 0.9145 2023-02-23 07:04:59,668 [INFO ] New best macro F1 = 0.9145 ([0.9963, 0.8482, 0.9857, 0.7795, 0.7377, 0.99, 0.9652, 0.9852, 0.9405, 0.8362, 0.9778, 0.9312]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-23 07:05:00,671 [INFO ] Subset #3 2023-02-23 08:23:23,919 [INFO ] Training loss: 0.0052 2023-02-23 08:41:39,100 [INFO ] Dev macro F1 = 0.9148 2023-02-23 08:41:39,101 [INFO ] New best macro F1 = 0.9148 ([0.9962, 0.8421, 0.9853, 0.7931, 0.7393, 0.9901, 0.9652, 0.9851, 0.9392, 0.8313, 0.9772, 0.9331]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-23 08:41:40,114 [INFO ] Subset #4 2023-02-23 10:00:17,635 [INFO ] Training loss: 0.0052 2023-02-23 10:18:31,362 [INFO ] Dev macro F1 = 0.9161 2023-02-23 10:18:31,362 [INFO ] New best macro F1 = 0.9161 ([0.9964, 0.851, 0.9857, 0.7905, 0.7375, 0.9902, 0.966, 0.9853, 0.9413, 0.8361, 0.9779, 0.9347]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-23 10:18:32,356 [INFO ] Subset #5 2023-02-23 11:36:54,299 [INFO ] Training loss: 0.0052 2023-02-23 11:55:05,378 [INFO ] Dev macro F1 = 0.9147 2023-02-23 11:55:05,379 [INFO ] Subset #6 2023-02-23 13:13:16,174 [INFO ] Training loss: 0.0052 2023-02-23 13:31:28,612 [INFO ] Dev macro F1 = 0.9139 2023-02-23 13:31:28,613 [INFO ] Subset #7 2023-02-23 14:49:40,826 [INFO ] Training loss: 0.0052 2023-02-23 15:07:51,390 [INFO ] Dev macro F1 = 0.9159 2023-02-23 15:07:51,390 [INFO ] Subset #8 2023-02-23 16:25:51,975 [INFO ] Training loss: 0.0052 2023-02-23 16:44:06,088 [INFO ] Dev macro F1 = 0.9163 2023-02-23 16:44:06,089 [INFO ] New best macro F1 = 0.9163 ([0.9963, 0.8482, 0.9861, 0.7968, 0.7263, 0.9904, 0.9654, 0.9859, 0.9423, 0.8414, 0.979, 0.9371]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-23 16:44:07,077 [INFO ] Subset #9 2023-02-23 17:49:59,943 [INFO ] Training loss: 0.0052 2023-02-23 18:08:12,023 [INFO ] Dev macro F1 = 0.9175 2023-02-23 18:08:12,024 [INFO ] New best macro F1 = 0.9175 ([0.9965, 0.8428, 0.9859, 0.7962, 0.7459, 0.9904, 0.967, 0.9854, 0.9427, 0.8409, 0.9785, 0.9373]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-23 18:08:13,106 [INFO ] Epoch #3 2023-02-23 18:08:13,107 [INFO ] Subset #1 2023-02-23 19:26:19,659 [INFO ] Training loss: 0.0042 2023-02-23 19:44:30,411 [INFO ] Dev macro F1 = 0.9180 2023-02-23 19:44:30,412 [INFO ] New best macro F1 = 0.9180 ([0.9964, 0.8528, 0.9858, 0.7957, 0.7448, 0.9903, 0.9662, 0.9849, 0.9417, 0.8425, 0.9784, 0.9365]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-23 19:44:31,428 [INFO ] Subset #2 2023-02-23 21:02:32,847 [INFO ] Training loss: 0.0042 2023-02-23 21:20:42,523 [INFO ] Dev macro F1 = 0.9171 2023-02-23 21:20:42,523 [INFO ] Subset #3 2023-02-23 22:38:50,203 [INFO ] Training loss: 0.0043 2023-02-23 22:57:02,020 [INFO ] Dev macro F1 = 0.9191 2023-02-23 22:57:02,020 [INFO ] New best macro F1 = 0.9191 ([0.9964, 0.8509, 0.9858, 0.7998, 0.7527, 0.9903, 0.9666, 0.9855, 0.9421, 0.8418, 0.9791, 0.9379]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-23 22:57:03,088 [INFO ] Subset #4 2023-02-24 00:15:28,073 [INFO ] Training loss: 0.0043 2023-02-24 00:33:40,728 [INFO ] Dev macro F1 = 0.9189 2023-02-24 00:33:40,729 [INFO ] Subset #5 2023-02-24 01:52:05,939 [INFO ] Training loss: 0.0043 2023-02-24 02:10:18,483 [INFO ] Dev macro F1 = 0.9184 2023-02-24 02:10:18,484 [INFO ] Subset #6 2023-02-24 03:28:31,701 [INFO ] Training loss: 0.0043 2023-02-24 03:46:44,872 [INFO ] Dev macro F1 = 0.9199 2023-02-24 03:46:44,873 [INFO ] New best macro F1 = 0.9199 ([0.9965, 0.8544, 0.9862, 0.7997, 0.7532, 0.9905, 0.967, 0.9849, 0.943, 0.8459, 0.9797, 0.9384]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 2023-02-24 03:46:45,868 [INFO ] Subset #7 2023-02-24 05:04:57,214 [INFO ] Training loss: 0.0043 2023-02-24 05:23:09,199 [INFO ] Dev macro F1 = 0.9177 2023-02-24 05:23:09,200 [INFO ] Subset #8 2023-02-24 06:41:25,821 [INFO ] Training loss: 0.0043 2023-02-24 06:59:39,709 [INFO ] Dev macro F1 = 0.9190 2023-02-24 06:59:39,710 [INFO ] Subset #9 2023-02-24 08:05:25,875 [INFO ] Training loss: 0.0043 2023-02-24 08:23:40,031 [INFO ] Dev macro F1 = 0.9184 2023-02-24 08:23:40,031 [INFO ] Training took 154169.21s 2023-02-24 08:23:40,031 [INFO ] Best dev F1: 0.9199, using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]