Matej Klemen commited on
Commit
40603e9
1 Parent(s): 2344e98

Add model files

Browse files
classes.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "LOWER_OTHER": 0,
3
+ "LOWER_HYPERCORRECTION": 1,
4
+ "LOWER_ADJ_SKI": 2,
5
+ "LOWER_ENTITY_PART": 3,
6
+ "UPPER_OTHER": 4,
7
+ "UPPER_BEGIN": 5,
8
+ "UPPER_ENTITY": 6,
9
+ "UPPER_DIRECT_SPEECH": 7,
10
+ "UPPER_ADJ_OTHER": 8,
11
+ "UPPER_ALLUC_OTHER": 9,
12
+ "UPPER_ALLUC_BEGIN": 10,
13
+ "UPPER_ALLUC_ENTITY": 11,
14
+ "UNCAP": -100
15
+ }
config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "EMBEDDIA/sloberta",
3
+ "architectures": [
4
+ "CamembertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "LABEL_0",
16
+ "1": "LABEL_1",
17
+ "2": "LABEL_2",
18
+ "3": "LABEL_3",
19
+ "4": "LABEL_4",
20
+ "5": "LABEL_5",
21
+ "6": "LABEL_6",
22
+ "7": "LABEL_7",
23
+ "8": "LABEL_8",
24
+ "9": "LABEL_9",
25
+ "10": "LABEL_10",
26
+ "11": "LABEL_11"
27
+ },
28
+ "initializer_range": 0.02,
29
+ "intermediate_size": 3072,
30
+ "label2id": {
31
+ "LABEL_0": 0,
32
+ "LABEL_1": 1,
33
+ "LABEL_10": 10,
34
+ "LABEL_11": 11,
35
+ "LABEL_2": 2,
36
+ "LABEL_3": 3,
37
+ "LABEL_4": 4,
38
+ "LABEL_5": 5,
39
+ "LABEL_6": 6,
40
+ "LABEL_7": 7,
41
+ "LABEL_8": 8,
42
+ "LABEL_9": 9
43
+ },
44
+ "layer_norm_eps": 1e-05,
45
+ "max_position_embeddings": 514,
46
+ "model_type": "camembert",
47
+ "num_attention_heads": 12,
48
+ "num_hidden_layers": 12,
49
+ "pad_token_id": 1,
50
+ "position_embedding_type": "absolute",
51
+ "problem_type": "multi_label_classification",
52
+ "torch_dtype": "float32",
53
+ "transformers_version": "4.25.1",
54
+ "type_vocab_size": 1,
55
+ "use_cache": true,
56
+ "vocab_size": 32005
57
+ }
label_thresholds.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ 0.45,
3
+ 0.58,
4
+ 0.481,
5
+ 0.433,
6
+ 0.446,
7
+ 0.469,
8
+ 0.503,
9
+ 0.417,
10
+ 0.508,
11
+ 0.485,
12
+ 0.517,
13
+ 0.449
14
+ ]
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5437a8fb2bcac05130f79e9525699fc30045d478ead488f0ec6cab06ad72a3c
3
+ size 440233393
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34b589385a2320549143ab23b0ccf82cc99a82685701cdabe0fad847bd0479ff
3
+ size 800013
special_tokens_map.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s>NOTUSED",
4
+ "</s>NOTUSED"
5
+ ],
6
+ "bos_token": "<s>",
7
+ "cls_token": "<s>",
8
+ "eos_token": "</s>",
9
+ "mask_token": {
10
+ "content": "<mask>",
11
+ "lstrip": true,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<pad>",
17
+ "sep_token": "</s>",
18
+ "unk_token": "<unk>"
19
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s>NOTUSED",
4
+ "</s>NOTUSED"
5
+ ],
6
+ "bos_token": "<s>",
7
+ "cls_token": "<s>",
8
+ "do_lower_case": false,
9
+ "eos_token": "</s>",
10
+ "mask_token": {
11
+ "__type": "AddedToken",
12
+ "content": "<mask>",
13
+ "lstrip": true,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "model_max_length": 512,
19
+ "name_or_path": "EMBEDDIA/sloberta",
20
+ "pad_token": "<pad>",
21
+ "sep_token": "</s>",
22
+ "sp_model_kwargs": {},
23
+ "special_tokens_map_file": null,
24
+ "tokenizer_class": "CamembertTokenizer",
25
+ "unk_token": "<unk>"
26
+ }
train.log ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-02-22 10:29:59,083 [INFO ] |experiment_dir |gigafida_multi_case_fg_sloberta256 |
2
+ 2023-02-22 10:29:59,083 [INFO ] |train_path |...an-publishers-mala-velika-multilabel/train.json|
3
+ 2023-02-22 10:29:59,083 [INFO ] |dev_path |...lean-publishers-mala-velika-multilabel/dev.json|
4
+ 2023-02-22 10:29:59,083 [INFO ] |pretrained_name_or_path |EMBEDDIA/sloberta |
5
+ 2023-02-22 10:29:59,083 [INFO ] |max_length |256 |
6
+ 2023-02-22 10:29:59,083 [INFO ] |num_epochs |3 |
7
+ 2023-02-22 10:29:59,083 [INFO ] |learning_rate |2e-05 |
8
+ 2023-02-22 10:29:59,083 [INFO ] |batch_size |32 |
9
+ 2023-02-22 10:29:59,083 [INFO ] |validate_every_n_steps |300000 |
10
+ 2023-02-22 10:29:59,084 [INFO ] |early_stopping_rounds |999 |
11
+ 2023-02-22 10:29:59,084 [INFO ] |input_column |words_lc |
12
+ 2023-02-22 10:29:59,084 [INFO ] |target_column |case |
13
+ 2023-02-22 10:29:59,084 [INFO ] |class_encoding_path |...-publishers-mala-velika-multilabel/classes.json|
14
+ 2023-02-22 10:29:59,084 [INFO ] |use_cpu |False |
15
+ 2023-02-22 10:29:59,084 [INFO ] Using class encoding:
16
+ {
17
+ "LOWER_OTHER": 0,
18
+ "LOWER_HYPERCORRECTION": 1,
19
+ "LOWER_ADJ_SKI": 2,
20
+ "LOWER_ENTITY_PART": 3,
21
+ "UPPER_OTHER": 4,
22
+ "UPPER_BEGIN": 5,
23
+ "UPPER_ENTITY": 6,
24
+ "UPPER_DIRECT_SPEECH": 7,
25
+ "UPPER_ADJ_OTHER": 8,
26
+ "UPPER_ALLUC_OTHER": 9,
27
+ "UPPER_ALLUC_BEGIN": 10,
28
+ "UPPER_ALLUC_ENTITY": 11,
29
+ "UNCAP": -100
30
+ }
31
+ 2023-02-22 13:34:10,808 [INFO ] Loaded 2652880 training examples, 166737 validation examples.
32
+ 2023-02-22 13:34:10,912 [INFO ] Epoch #1
33
+ 2023-02-22 13:34:10,913 [INFO ] Subset #1
34
+ 2023-02-22 14:52:36,449 [INFO ] Training loss: 0.0271
35
+ 2023-02-22 15:10:55,902 [INFO ] Dev macro F1 = 0.8745
36
+ 2023-02-22 15:10:55,903 [INFO ] New best macro F1 = 0.8745
37
+ ([0.995, 0.7376, 0.9809, 0.7119, 0.643, 0.9873, 0.9536, 0.9807, 0.9202, 0.7135, 0.9679, 0.9025]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
38
+ 2023-02-22 15:10:57,034 [INFO ] Subset #2
39
+ 2023-02-22 16:29:16,755 [INFO ] Training loss: 0.0174
40
+ 2023-02-22 16:47:35,042 [INFO ] Dev macro F1 = 0.8908
41
+ 2023-02-22 16:47:35,043 [INFO ] New best macro F1 = 0.8908
42
+ ([0.9956, 0.7957, 0.9821, 0.7471, 0.6683, 0.9887, 0.9579, 0.9832, 0.9285, 0.756, 0.9706, 0.9165]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
43
+ 2023-02-22 16:47:36,125 [INFO ] Subset #3
44
+ 2023-02-22 18:05:55,544 [INFO ] Training loss: 0.0139
45
+ 2023-02-22 18:24:12,701 [INFO ] Dev macro F1 = 0.8978
46
+ 2023-02-22 18:24:12,702 [INFO ] New best macro F1 = 0.8978
47
+ ([0.9958, 0.7877, 0.9835, 0.7566, 0.6912, 0.9891, 0.9604, 0.9836, 0.9317, 0.7991, 0.974, 0.9208]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
48
+ 2023-02-22 18:24:13,804 [INFO ] Subset #4
49
+ 2023-02-22 19:42:27,790 [INFO ] Training loss: 0.0121
50
+ 2023-02-22 20:00:41,833 [INFO ] Dev macro F1 = 0.9007
51
+ 2023-02-22 20:00:41,834 [INFO ] New best macro F1 = 0.9007
52
+ ([0.9959, 0.8197, 0.984, 0.7597, 0.6966, 0.9893, 0.9616, 0.9835, 0.9334, 0.7914, 0.9737, 0.9193]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
53
+ 2023-02-22 20:00:43,000 [INFO ] Subset #5
54
+ 2023-02-22 21:18:59,194 [INFO ] Training loss: 0.0110
55
+ 2023-02-22 21:37:11,791 [INFO ] Dev macro F1 = 0.9038
56
+ 2023-02-22 21:37:11,792 [INFO ] New best macro F1 = 0.9038
57
+ ([0.9961, 0.8296, 0.9838, 0.7709, 0.6851, 0.9899, 0.9623, 0.9813, 0.9343, 0.8091, 0.9761, 0.9268]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
58
+ 2023-02-22 21:37:12,829 [INFO ] Subset #6
59
+ 2023-02-22 22:55:32,855 [INFO ] Training loss: 0.0102
60
+ 2023-02-22 23:13:49,305 [INFO ] Dev macro F1 = 0.9084
61
+ 2023-02-22 23:13:49,306 [INFO ] New best macro F1 = 0.9084
62
+ ([0.9961, 0.8353, 0.9846, 0.7777, 0.713, 0.9898, 0.9635, 0.9847, 0.9374, 0.8131, 0.9764, 0.9293]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
63
+ 2023-02-22 23:13:50,334 [INFO ] Subset #7
64
+ 2023-02-23 00:32:18,993 [INFO ] Training loss: 0.0096
65
+ 2023-02-23 00:50:32,151 [INFO ] Dev macro F1 = 0.9085
66
+ 2023-02-23 00:50:32,151 [INFO ] New best macro F1 = 0.9085
67
+ ([0.9961, 0.8392, 0.9849, 0.7667, 0.7116, 0.9899, 0.9639, 0.9852, 0.9376, 0.8197, 0.9774, 0.9299]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
68
+ 2023-02-23 00:50:33,174 [INFO ] Subset #8
69
+ 2023-02-23 02:08:49,459 [INFO ] Training loss: 0.0092
70
+ 2023-02-23 02:27:04,519 [INFO ] Dev macro F1 = 0.9106
71
+ 2023-02-23 02:27:04,521 [INFO ] New best macro F1 = 0.9106
72
+ ([0.9963, 0.8464, 0.9853, 0.7808, 0.7133, 0.99, 0.9645, 0.9851, 0.9372, 0.8192, 0.9771, 0.9321]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
73
+ 2023-02-23 02:27:05,553 [INFO ] Subset #9
74
+ 2023-02-23 03:33:09,800 [INFO ] Training loss: 0.0089
75
+ 2023-02-23 03:51:29,228 [INFO ] Dev macro F1 = 0.9117
76
+ 2023-02-23 03:51:29,228 [INFO ] New best macro F1 = 0.9117
77
+ ([0.9963, 0.8437, 0.9852, 0.7787, 0.7232, 0.9901, 0.9643, 0.9851, 0.9391, 0.8272, 0.9777, 0.9298]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
78
+ 2023-02-23 03:51:30,347 [INFO ] Epoch #2
79
+ 2023-02-23 03:51:30,347 [INFO ] Subset #1
80
+ 2023-02-23 05:09:58,233 [INFO ] Training loss: 0.0052
81
+ 2023-02-23 05:28:16,763 [INFO ] Dev macro F1 = 0.9133
82
+ 2023-02-23 05:28:16,763 [INFO ] New best macro F1 = 0.9133
83
+ ([0.9963, 0.8438, 0.9854, 0.7847, 0.7282, 0.99, 0.9647, 0.9855, 0.9393, 0.8333, 0.9776, 0.9303]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
84
+ 2023-02-23 05:28:17,805 [INFO ] Subset #2
85
+ 2023-02-23 06:46:44,412 [INFO ] Training loss: 0.0052
86
+ 2023-02-23 07:04:59,668 [INFO ] Dev macro F1 = 0.9145
87
+ 2023-02-23 07:04:59,668 [INFO ] New best macro F1 = 0.9145
88
+ ([0.9963, 0.8482, 0.9857, 0.7795, 0.7377, 0.99, 0.9652, 0.9852, 0.9405, 0.8362, 0.9778, 0.9312]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
89
+ 2023-02-23 07:05:00,671 [INFO ] Subset #3
90
+ 2023-02-23 08:23:23,919 [INFO ] Training loss: 0.0052
91
+ 2023-02-23 08:41:39,100 [INFO ] Dev macro F1 = 0.9148
92
+ 2023-02-23 08:41:39,101 [INFO ] New best macro F1 = 0.9148
93
+ ([0.9962, 0.8421, 0.9853, 0.7931, 0.7393, 0.9901, 0.9652, 0.9851, 0.9392, 0.8313, 0.9772, 0.9331]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
94
+ 2023-02-23 08:41:40,114 [INFO ] Subset #4
95
+ 2023-02-23 10:00:17,635 [INFO ] Training loss: 0.0052
96
+ 2023-02-23 10:18:31,362 [INFO ] Dev macro F1 = 0.9161
97
+ 2023-02-23 10:18:31,362 [INFO ] New best macro F1 = 0.9161
98
+ ([0.9964, 0.851, 0.9857, 0.7905, 0.7375, 0.9902, 0.966, 0.9853, 0.9413, 0.8361, 0.9779, 0.9347]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
99
+ 2023-02-23 10:18:32,356 [INFO ] Subset #5
100
+ 2023-02-23 11:36:54,299 [INFO ] Training loss: 0.0052
101
+ 2023-02-23 11:55:05,378 [INFO ] Dev macro F1 = 0.9147
102
+ 2023-02-23 11:55:05,379 [INFO ] Subset #6
103
+ 2023-02-23 13:13:16,174 [INFO ] Training loss: 0.0052
104
+ 2023-02-23 13:31:28,612 [INFO ] Dev macro F1 = 0.9139
105
+ 2023-02-23 13:31:28,613 [INFO ] Subset #7
106
+ 2023-02-23 14:49:40,826 [INFO ] Training loss: 0.0052
107
+ 2023-02-23 15:07:51,390 [INFO ] Dev macro F1 = 0.9159
108
+ 2023-02-23 15:07:51,390 [INFO ] Subset #8
109
+ 2023-02-23 16:25:51,975 [INFO ] Training loss: 0.0052
110
+ 2023-02-23 16:44:06,088 [INFO ] Dev macro F1 = 0.9163
111
+ 2023-02-23 16:44:06,089 [INFO ] New best macro F1 = 0.9163
112
+ ([0.9963, 0.8482, 0.9861, 0.7968, 0.7263, 0.9904, 0.9654, 0.9859, 0.9423, 0.8414, 0.979, 0.9371]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
113
+ 2023-02-23 16:44:07,077 [INFO ] Subset #9
114
+ 2023-02-23 17:49:59,943 [INFO ] Training loss: 0.0052
115
+ 2023-02-23 18:08:12,023 [INFO ] Dev macro F1 = 0.9175
116
+ 2023-02-23 18:08:12,024 [INFO ] New best macro F1 = 0.9175
117
+ ([0.9965, 0.8428, 0.9859, 0.7962, 0.7459, 0.9904, 0.967, 0.9854, 0.9427, 0.8409, 0.9785, 0.9373]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
118
+ 2023-02-23 18:08:13,106 [INFO ] Epoch #3
119
+ 2023-02-23 18:08:13,107 [INFO ] Subset #1
120
+ 2023-02-23 19:26:19,659 [INFO ] Training loss: 0.0042
121
+ 2023-02-23 19:44:30,411 [INFO ] Dev macro F1 = 0.9180
122
+ 2023-02-23 19:44:30,412 [INFO ] New best macro F1 = 0.9180
123
+ ([0.9964, 0.8528, 0.9858, 0.7957, 0.7448, 0.9903, 0.9662, 0.9849, 0.9417, 0.8425, 0.9784, 0.9365]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
124
+ 2023-02-23 19:44:31,428 [INFO ] Subset #2
125
+ 2023-02-23 21:02:32,847 [INFO ] Training loss: 0.0042
126
+ 2023-02-23 21:20:42,523 [INFO ] Dev macro F1 = 0.9171
127
+ 2023-02-23 21:20:42,523 [INFO ] Subset #3
128
+ 2023-02-23 22:38:50,203 [INFO ] Training loss: 0.0043
129
+ 2023-02-23 22:57:02,020 [INFO ] Dev macro F1 = 0.9191
130
+ 2023-02-23 22:57:02,020 [INFO ] New best macro F1 = 0.9191
131
+ ([0.9964, 0.8509, 0.9858, 0.7998, 0.7527, 0.9903, 0.9666, 0.9855, 0.9421, 0.8418, 0.9791, 0.9379]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
132
+ 2023-02-23 22:57:03,088 [INFO ] Subset #4
133
+ 2023-02-24 00:15:28,073 [INFO ] Training loss: 0.0043
134
+ 2023-02-24 00:33:40,728 [INFO ] Dev macro F1 = 0.9189
135
+ 2023-02-24 00:33:40,729 [INFO ] Subset #5
136
+ 2023-02-24 01:52:05,939 [INFO ] Training loss: 0.0043
137
+ 2023-02-24 02:10:18,483 [INFO ] Dev macro F1 = 0.9184
138
+ 2023-02-24 02:10:18,484 [INFO ] Subset #6
139
+ 2023-02-24 03:28:31,701 [INFO ] Training loss: 0.0043
140
+ 2023-02-24 03:46:44,872 [INFO ] Dev macro F1 = 0.9199
141
+ 2023-02-24 03:46:44,873 [INFO ] New best macro F1 = 0.9199
142
+ ([0.9965, 0.8544, 0.9862, 0.7997, 0.7532, 0.9905, 0.967, 0.9849, 0.943, 0.8459, 0.9797, 0.9384]), using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
143
+ 2023-02-24 03:46:45,868 [INFO ] Subset #7
144
+ 2023-02-24 05:04:57,214 [INFO ] Training loss: 0.0043
145
+ 2023-02-24 05:23:09,199 [INFO ] Dev macro F1 = 0.9177
146
+ 2023-02-24 05:23:09,200 [INFO ] Subset #8
147
+ 2023-02-24 06:41:25,821 [INFO ] Training loss: 0.0043
148
+ 2023-02-24 06:59:39,709 [INFO ] Dev macro F1 = 0.9190
149
+ 2023-02-24 06:59:39,710 [INFO ] Subset #9
150
+ 2023-02-24 08:05:25,875 [INFO ] Training loss: 0.0043
151
+ 2023-02-24 08:23:40,031 [INFO ] Dev macro F1 = 0.9184
152
+ 2023-02-24 08:23:40,031 [INFO ] Training took 154169.21s
153
+ 2023-02-24 08:23:40,031 [INFO ] Best dev F1: 0.9199, using thresholds: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]