File size: 11,092 Bytes
40603e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
2023-02-22 10:29:59,083 [INFO ]  |experiment_dir                |gigafida_multi_case_fg_sloberta256                |
2023-02-22 10:29:59,083 [INFO ]  |train_path                    |...an-publishers-mala-velika-multilabel/train.json|
2023-02-22 10:29:59,083 [INFO ]  |dev_path                      |...lean-publishers-mala-velika-multilabel/dev.json|
2023-02-22 10:29:59,083 [INFO ]  |pretrained_name_or_path       |EMBEDDIA/sloberta                                 |
2023-02-22 10:29:59,083 [INFO ]  |max_length                    |256                                               |
2023-02-22 10:29:59,083 [INFO ]  |num_epochs                    |3                                                 |
2023-02-22 10:29:59,083 [INFO ]  |learning_rate                 |2e-05                                             |
2023-02-22 10:29:59,083 [INFO ]  |batch_size                    |32                                                |
2023-02-22 10:29:59,083 [INFO ]  |validate_every_n_steps        |300000                                            |
2023-02-22 10:29:59,084 [INFO ]  |early_stopping_rounds         |999                                               |
2023-02-22 10:29:59,084 [INFO ]  |input_column                  |words_lc                                          |
2023-02-22 10:29:59,084 [INFO ]  |target_column                 |case                                              |
2023-02-22 10:29:59,084 [INFO ]  |class_encoding_path           |...-publishers-mala-velika-multilabel/classes.json|
2023-02-22 10:29:59,084 [INFO ]  |use_cpu                       |False                                             |
2023-02-22 10:29:59,084 [INFO ]  Using class encoding:
{
    "LOWER_OTHER": 0,
    "LOWER_HYPERCORRECTION": 1,
    "LOWER_ADJ_SKI": 2,
    "LOWER_ENTITY_PART": 3,
    "UPPER_OTHER": 4,
    "UPPER_BEGIN": 5,
    "UPPER_ENTITY": 6,
    "UPPER_DIRECT_SPEECH": 7,
    "UPPER_ADJ_OTHER": 8,
    "UPPER_ALLUC_OTHER": 9,
    "UPPER_ALLUC_BEGIN": 10,
    "UPPER_ALLUC_ENTITY": 11,
    "UNCAP": -100
}
2023-02-22 13:34:10,808 [INFO ]  Loaded 2652880 training examples, 166737 validation examples.
2023-02-22 13:34:10,912 [INFO ]  Epoch #1
2023-02-22 13:34:10,913 [INFO ]  	Subset #1
2023-02-22 14:52:36,449 [INFO ]  		Training loss:  0.0271
2023-02-22 15:10:55,902 [INFO ]  		Dev macro F1 = 0.8745
2023-02-22 15:10:55,903 [INFO ]  			New best macro F1 = 0.8745
 ([0.995, 0.7376, 0.9809, 0.7119, 0.643, 0.9873, 0.9536, 0.9807, 0.9202, 0.7135, 0.9679, 0.9025]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-22 15:10:57,034 [INFO ]  	Subset #2
2023-02-22 16:29:16,755 [INFO ]  		Training loss:  0.0174
2023-02-22 16:47:35,042 [INFO ]  		Dev macro F1 = 0.8908
2023-02-22 16:47:35,043 [INFO ]  			New best macro F1 = 0.8908
 ([0.9956, 0.7957, 0.9821, 0.7471, 0.6683, 0.9887, 0.9579, 0.9832, 0.9285, 0.756, 0.9706, 0.9165]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-22 16:47:36,125 [INFO ]  	Subset #3
2023-02-22 18:05:55,544 [INFO ]  		Training loss:  0.0139
2023-02-22 18:24:12,701 [INFO ]  		Dev macro F1 = 0.8978
2023-02-22 18:24:12,702 [INFO ]  			New best macro F1 = 0.8978
 ([0.9958, 0.7877, 0.9835, 0.7566, 0.6912, 0.9891, 0.9604, 0.9836, 0.9317, 0.7991, 0.974, 0.9208]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-22 18:24:13,804 [INFO ]  	Subset #4
2023-02-22 19:42:27,790 [INFO ]  		Training loss:  0.0121
2023-02-22 20:00:41,833 [INFO ]  		Dev macro F1 = 0.9007
2023-02-22 20:00:41,834 [INFO ]  			New best macro F1 = 0.9007
 ([0.9959, 0.8197, 0.984, 0.7597, 0.6966, 0.9893, 0.9616, 0.9835, 0.9334, 0.7914, 0.9737, 0.9193]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-22 20:00:43,000 [INFO ]  	Subset #5
2023-02-22 21:18:59,194 [INFO ]  		Training loss:  0.0110
2023-02-22 21:37:11,791 [INFO ]  		Dev macro F1 = 0.9038
2023-02-22 21:37:11,792 [INFO ]  			New best macro F1 = 0.9038
 ([0.9961, 0.8296, 0.9838, 0.7709, 0.6851, 0.9899, 0.9623, 0.9813, 0.9343, 0.8091, 0.9761, 0.9268]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-22 21:37:12,829 [INFO ]  	Subset #6
2023-02-22 22:55:32,855 [INFO ]  		Training loss:  0.0102
2023-02-22 23:13:49,305 [INFO ]  		Dev macro F1 = 0.9084
2023-02-22 23:13:49,306 [INFO ]  			New best macro F1 = 0.9084
 ([0.9961, 0.8353, 0.9846, 0.7777, 0.713, 0.9898, 0.9635, 0.9847, 0.9374, 0.8131, 0.9764, 0.9293]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-22 23:13:50,334 [INFO ]  	Subset #7
2023-02-23 00:32:18,993 [INFO ]  		Training loss:  0.0096
2023-02-23 00:50:32,151 [INFO ]  		Dev macro F1 = 0.9085
2023-02-23 00:50:32,151 [INFO ]  			New best macro F1 = 0.9085
 ([0.9961, 0.8392, 0.9849, 0.7667, 0.7116, 0.9899, 0.9639, 0.9852, 0.9376, 0.8197, 0.9774, 0.9299]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 00:50:33,174 [INFO ]  	Subset #8
2023-02-23 02:08:49,459 [INFO ]  		Training loss:  0.0092
2023-02-23 02:27:04,519 [INFO ]  		Dev macro F1 = 0.9106
2023-02-23 02:27:04,521 [INFO ]  			New best macro F1 = 0.9106
 ([0.9963, 0.8464, 0.9853, 0.7808, 0.7133, 0.99, 0.9645, 0.9851, 0.9372, 0.8192, 0.9771, 0.9321]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 02:27:05,553 [INFO ]  	Subset #9
2023-02-23 03:33:09,800 [INFO ]  		Training loss:  0.0089
2023-02-23 03:51:29,228 [INFO ]  		Dev macro F1 = 0.9117
2023-02-23 03:51:29,228 [INFO ]  			New best macro F1 = 0.9117
 ([0.9963, 0.8437, 0.9852, 0.7787, 0.7232, 0.9901, 0.9643, 0.9851, 0.9391, 0.8272, 0.9777, 0.9298]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 03:51:30,347 [INFO ]  Epoch #2
2023-02-23 03:51:30,347 [INFO ]  	Subset #1
2023-02-23 05:09:58,233 [INFO ]  		Training loss:  0.0052
2023-02-23 05:28:16,763 [INFO ]  		Dev macro F1 = 0.9133
2023-02-23 05:28:16,763 [INFO ]  			New best macro F1 = 0.9133
 ([0.9963, 0.8438, 0.9854, 0.7847, 0.7282, 0.99, 0.9647, 0.9855, 0.9393, 0.8333, 0.9776, 0.9303]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 05:28:17,805 [INFO ]  	Subset #2
2023-02-23 06:46:44,412 [INFO ]  		Training loss:  0.0052
2023-02-23 07:04:59,668 [INFO ]  		Dev macro F1 = 0.9145
2023-02-23 07:04:59,668 [INFO ]  			New best macro F1 = 0.9145
 ([0.9963, 0.8482, 0.9857, 0.7795, 0.7377, 0.99, 0.9652, 0.9852, 0.9405, 0.8362, 0.9778, 0.9312]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 07:05:00,671 [INFO ]  	Subset #3
2023-02-23 08:23:23,919 [INFO ]  		Training loss:  0.0052
2023-02-23 08:41:39,100 [INFO ]  		Dev macro F1 = 0.9148
2023-02-23 08:41:39,101 [INFO ]  			New best macro F1 = 0.9148
 ([0.9962, 0.8421, 0.9853, 0.7931, 0.7393, 0.9901, 0.9652, 0.9851, 0.9392, 0.8313, 0.9772, 0.9331]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 08:41:40,114 [INFO ]  	Subset #4
2023-02-23 10:00:17,635 [INFO ]  		Training loss:  0.0052
2023-02-23 10:18:31,362 [INFO ]  		Dev macro F1 = 0.9161
2023-02-23 10:18:31,362 [INFO ]  			New best macro F1 = 0.9161
 ([0.9964, 0.851, 0.9857, 0.7905, 0.7375, 0.9902, 0.966, 0.9853, 0.9413, 0.8361, 0.9779, 0.9347]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 10:18:32,356 [INFO ]  	Subset #5
2023-02-23 11:36:54,299 [INFO ]  		Training loss:  0.0052
2023-02-23 11:55:05,378 [INFO ]  		Dev macro F1 = 0.9147
2023-02-23 11:55:05,379 [INFO ]  	Subset #6
2023-02-23 13:13:16,174 [INFO ]  		Training loss:  0.0052
2023-02-23 13:31:28,612 [INFO ]  		Dev macro F1 = 0.9139
2023-02-23 13:31:28,613 [INFO ]  	Subset #7
2023-02-23 14:49:40,826 [INFO ]  		Training loss:  0.0052
2023-02-23 15:07:51,390 [INFO ]  		Dev macro F1 = 0.9159
2023-02-23 15:07:51,390 [INFO ]  	Subset #8
2023-02-23 16:25:51,975 [INFO ]  		Training loss:  0.0052
2023-02-23 16:44:06,088 [INFO ]  		Dev macro F1 = 0.9163
2023-02-23 16:44:06,089 [INFO ]  			New best macro F1 = 0.9163
 ([0.9963, 0.8482, 0.9861, 0.7968, 0.7263, 0.9904, 0.9654, 0.9859, 0.9423, 0.8414, 0.979, 0.9371]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 16:44:07,077 [INFO ]  	Subset #9
2023-02-23 17:49:59,943 [INFO ]  		Training loss:  0.0052
2023-02-23 18:08:12,023 [INFO ]  		Dev macro F1 = 0.9175
2023-02-23 18:08:12,024 [INFO ]  			New best macro F1 = 0.9175
 ([0.9965, 0.8428, 0.9859, 0.7962, 0.7459, 0.9904, 0.967, 0.9854, 0.9427, 0.8409, 0.9785, 0.9373]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 18:08:13,106 [INFO ]  Epoch #3
2023-02-23 18:08:13,107 [INFO ]  	Subset #1
2023-02-23 19:26:19,659 [INFO ]  		Training loss:  0.0042
2023-02-23 19:44:30,411 [INFO ]  		Dev macro F1 = 0.9180
2023-02-23 19:44:30,412 [INFO ]  			New best macro F1 = 0.9180
 ([0.9964, 0.8528, 0.9858, 0.7957, 0.7448, 0.9903, 0.9662, 0.9849, 0.9417, 0.8425, 0.9784, 0.9365]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 19:44:31,428 [INFO ]  	Subset #2
2023-02-23 21:02:32,847 [INFO ]  		Training loss:  0.0042
2023-02-23 21:20:42,523 [INFO ]  		Dev macro F1 = 0.9171
2023-02-23 21:20:42,523 [INFO ]  	Subset #3
2023-02-23 22:38:50,203 [INFO ]  		Training loss:  0.0043
2023-02-23 22:57:02,020 [INFO ]  		Dev macro F1 = 0.9191
2023-02-23 22:57:02,020 [INFO ]  			New best macro F1 = 0.9191
 ([0.9964, 0.8509, 0.9858, 0.7998, 0.7527, 0.9903, 0.9666, 0.9855, 0.9421, 0.8418, 0.9791, 0.9379]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-23 22:57:03,088 [INFO ]  	Subset #4
2023-02-24 00:15:28,073 [INFO ]  		Training loss:  0.0043
2023-02-24 00:33:40,728 [INFO ]  		Dev macro F1 = 0.9189
2023-02-24 00:33:40,729 [INFO ]  	Subset #5
2023-02-24 01:52:05,939 [INFO ]  		Training loss:  0.0043
2023-02-24 02:10:18,483 [INFO ]  		Dev macro F1 = 0.9184
2023-02-24 02:10:18,484 [INFO ]  	Subset #6
2023-02-24 03:28:31,701 [INFO ]  		Training loss:  0.0043
2023-02-24 03:46:44,872 [INFO ]  		Dev macro F1 = 0.9199
2023-02-24 03:46:44,873 [INFO ]  			New best macro F1 = 0.9199
 ([0.9965, 0.8544, 0.9862, 0.7997, 0.7532, 0.9905, 0.967, 0.9849, 0.943, 0.8459, 0.9797, 0.9384]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
2023-02-24 03:46:45,868 [INFO ]  	Subset #7
2023-02-24 05:04:57,214 [INFO ]  		Training loss:  0.0043
2023-02-24 05:23:09,199 [INFO ]  		Dev macro F1 = 0.9177
2023-02-24 05:23:09,200 [INFO ]  	Subset #8
2023-02-24 06:41:25,821 [INFO ]  		Training loss:  0.0043
2023-02-24 06:59:39,709 [INFO ]  		Dev macro F1 = 0.9190
2023-02-24 06:59:39,710 [INFO ]  	Subset #9
2023-02-24 08:05:25,875 [INFO ]  		Training loss:  0.0043
2023-02-24 08:23:40,031 [INFO ]  		Dev macro F1 = 0.9184
2023-02-24 08:23:40,031 [INFO ]  Training took 154169.21s
2023-02-24 08:23:40,031 [INFO ]  Best dev F1: 0.9199, using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]