Upload folder using huggingface_hub
Browse files- best-model.pt +3 -0
- dev.tsv +0 -0
- final-model.pt +3 -0
- loss.tsv +11 -0
- runs/events.out.tfevents.1696977887.de2e83fddbee.1120.2 +3 -0
- test.tsv +0 -0
- training.log +263 -0
best-model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ac75fcc05dcfa85fa2d2bf53a3d3520f71deaac2809e50ffc041167824509ae
|
3 |
+
size 870817519
|
dev.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
final-model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ef67a7cdd3806a7914a09bca499d50e76ac36d18039d1475220d854d6f6b165
|
3 |
+
size 870817636
|
loss.tsv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
|
2 |
+
1 22:54:33 0.0001 1.1277 0.2332 0.4517 0.3878 0.4173 0.2914
|
3 |
+
2 23:04:05 0.0001 0.1584 0.1155 0.7497 0.7660 0.7577 0.6333
|
4 |
+
3 23:13:52 0.0001 0.0792 0.1301 0.7382 0.7905 0.7635 0.6371
|
5 |
+
4 23:23:54 0.0001 0.0573 0.1505 0.7555 0.7986 0.7765 0.6544
|
6 |
+
5 23:33:26 0.0001 0.0415 0.1764 0.7632 0.7891 0.7759 0.6510
|
7 |
+
6 23:43:07 0.0001 0.0292 0.1814 0.7785 0.7986 0.7884 0.6663
|
8 |
+
7 23:52:35 0.0001 0.0222 0.1975 0.7870 0.7891 0.7880 0.6674
|
9 |
+
8 00:01:59 0.0000 0.0159 0.2195 0.7468 0.7864 0.7661 0.6380
|
10 |
+
9 00:11:36 0.0000 0.0109 0.2320 0.7692 0.8027 0.7856 0.6644
|
11 |
+
10 00:20:58 0.0000 0.0093 0.2328 0.7666 0.8000 0.7830 0.6614
|
runs/events.out.tfevents.1696977887.de2e83fddbee.1120.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c403dc3f1087947c539979213b410bc1e7c0a2340e69e6595e7ea0cd642211e
|
3 |
+
size 999862
|
test.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training.log
ADDED
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-10-10 22:44:47,590 ----------------------------------------------------------------------------------------------------
|
2 |
+
2023-10-10 22:44:47,592 Model: "SequenceTagger(
|
3 |
+
(embeddings): ByT5Embeddings(
|
4 |
+
(model): T5EncoderModel(
|
5 |
+
(shared): Embedding(384, 1472)
|
6 |
+
(encoder): T5Stack(
|
7 |
+
(embed_tokens): Embedding(384, 1472)
|
8 |
+
(block): ModuleList(
|
9 |
+
(0): T5Block(
|
10 |
+
(layer): ModuleList(
|
11 |
+
(0): T5LayerSelfAttention(
|
12 |
+
(SelfAttention): T5Attention(
|
13 |
+
(q): Linear(in_features=1472, out_features=384, bias=False)
|
14 |
+
(k): Linear(in_features=1472, out_features=384, bias=False)
|
15 |
+
(v): Linear(in_features=1472, out_features=384, bias=False)
|
16 |
+
(o): Linear(in_features=384, out_features=1472, bias=False)
|
17 |
+
(relative_attention_bias): Embedding(32, 6)
|
18 |
+
)
|
19 |
+
(layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
20 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
21 |
+
)
|
22 |
+
(1): T5LayerFF(
|
23 |
+
(DenseReluDense): T5DenseGatedActDense(
|
24 |
+
(wi_0): Linear(in_features=1472, out_features=3584, bias=False)
|
25 |
+
(wi_1): Linear(in_features=1472, out_features=3584, bias=False)
|
26 |
+
(wo): Linear(in_features=3584, out_features=1472, bias=False)
|
27 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
28 |
+
(act): NewGELUActivation()
|
29 |
+
)
|
30 |
+
(layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
31 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
32 |
+
)
|
33 |
+
)
|
34 |
+
)
|
35 |
+
(1-11): 11 x T5Block(
|
36 |
+
(layer): ModuleList(
|
37 |
+
(0): T5LayerSelfAttention(
|
38 |
+
(SelfAttention): T5Attention(
|
39 |
+
(q): Linear(in_features=1472, out_features=384, bias=False)
|
40 |
+
(k): Linear(in_features=1472, out_features=384, bias=False)
|
41 |
+
(v): Linear(in_features=1472, out_features=384, bias=False)
|
42 |
+
(o): Linear(in_features=384, out_features=1472, bias=False)
|
43 |
+
)
|
44 |
+
(layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
45 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
46 |
+
)
|
47 |
+
(1): T5LayerFF(
|
48 |
+
(DenseReluDense): T5DenseGatedActDense(
|
49 |
+
(wi_0): Linear(in_features=1472, out_features=3584, bias=False)
|
50 |
+
(wi_1): Linear(in_features=1472, out_features=3584, bias=False)
|
51 |
+
(wo): Linear(in_features=3584, out_features=1472, bias=False)
|
52 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
53 |
+
(act): NewGELUActivation()
|
54 |
+
)
|
55 |
+
(layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
56 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
57 |
+
)
|
58 |
+
)
|
59 |
+
)
|
60 |
+
)
|
61 |
+
(final_layer_norm): FusedRMSNorm(torch.Size([1472]), eps=1e-06, elementwise_affine=True)
|
62 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
63 |
+
)
|
64 |
+
)
|
65 |
+
)
|
66 |
+
(locked_dropout): LockedDropout(p=0.5)
|
67 |
+
(linear): Linear(in_features=1472, out_features=17, bias=True)
|
68 |
+
(loss_function): CrossEntropyLoss()
|
69 |
+
)"
|
70 |
+
2023-10-10 22:44:47,593 ----------------------------------------------------------------------------------------------------
|
71 |
+
2023-10-10 22:44:47,593 MultiCorpus: 7142 train + 698 dev + 2570 test sentences
|
72 |
+
- NER_HIPE_2022 Corpus: 7142 train + 698 dev + 2570 test sentences - /root/.flair/datasets/ner_hipe_2022/v2.1/newseye/fr/with_doc_seperator
|
73 |
+
2023-10-10 22:44:47,593 ----------------------------------------------------------------------------------------------------
|
74 |
+
2023-10-10 22:44:47,593 Train: 7142 sentences
|
75 |
+
2023-10-10 22:44:47,593 (train_with_dev=False, train_with_test=False)
|
76 |
+
2023-10-10 22:44:47,593 ----------------------------------------------------------------------------------------------------
|
77 |
+
2023-10-10 22:44:47,593 Training Params:
|
78 |
+
2023-10-10 22:44:47,593 - learning_rate: "0.00015"
|
79 |
+
2023-10-10 22:44:47,593 - mini_batch_size: "4"
|
80 |
+
2023-10-10 22:44:47,593 - max_epochs: "10"
|
81 |
+
2023-10-10 22:44:47,594 - shuffle: "True"
|
82 |
+
2023-10-10 22:44:47,594 ----------------------------------------------------------------------------------------------------
|
83 |
+
2023-10-10 22:44:47,594 Plugins:
|
84 |
+
2023-10-10 22:44:47,594 - TensorboardLogger
|
85 |
+
2023-10-10 22:44:47,594 - LinearScheduler | warmup_fraction: '0.1'
|
86 |
+
2023-10-10 22:44:47,594 ----------------------------------------------------------------------------------------------------
|
87 |
+
2023-10-10 22:44:47,594 Final evaluation on model from best epoch (best-model.pt)
|
88 |
+
2023-10-10 22:44:47,594 - metric: "('micro avg', 'f1-score')"
|
89 |
+
2023-10-10 22:44:47,594 ----------------------------------------------------------------------------------------------------
|
90 |
+
2023-10-10 22:44:47,594 Computation:
|
91 |
+
2023-10-10 22:44:47,594 - compute on device: cuda:0
|
92 |
+
2023-10-10 22:44:47,594 - embedding storage: none
|
93 |
+
2023-10-10 22:44:47,594 ----------------------------------------------------------------------------------------------------
|
94 |
+
2023-10-10 22:44:47,594 Model training base path: "hmbench-newseye/fr-hmbyt5-preliminary/byt5-small-historic-multilingual-span20-flax-bs4-wsFalse-e10-lr0.00015-poolingfirst-layers-1-crfFalse-1"
|
95 |
+
2023-10-10 22:44:47,594 ----------------------------------------------------------------------------------------------------
|
96 |
+
2023-10-10 22:44:47,595 ----------------------------------------------------------------------------------------------------
|
97 |
+
2023-10-10 22:44:47,595 Logging anything other than scalars to TensorBoard is currently not supported.
|
98 |
+
2023-10-10 22:45:45,122 epoch 1 - iter 178/1786 - loss 2.82297678 - time (sec): 57.53 - samples/sec: 442.90 - lr: 0.000015 - momentum: 0.000000
|
99 |
+
2023-10-10 22:46:40,872 epoch 1 - iter 356/1786 - loss 2.69802388 - time (sec): 113.28 - samples/sec: 445.03 - lr: 0.000030 - momentum: 0.000000
|
100 |
+
2023-10-10 22:47:36,688 epoch 1 - iter 534/1786 - loss 2.43012170 - time (sec): 169.09 - samples/sec: 443.81 - lr: 0.000045 - momentum: 0.000000
|
101 |
+
2023-10-10 22:48:32,330 epoch 1 - iter 712/1786 - loss 2.13148166 - time (sec): 224.73 - samples/sec: 442.71 - lr: 0.000060 - momentum: 0.000000
|
102 |
+
2023-10-10 22:49:29,955 epoch 1 - iter 890/1786 - loss 1.82875030 - time (sec): 282.36 - samples/sec: 445.19 - lr: 0.000075 - momentum: 0.000000
|
103 |
+
2023-10-10 22:50:25,069 epoch 1 - iter 1068/1786 - loss 1.63107710 - time (sec): 337.47 - samples/sec: 440.99 - lr: 0.000090 - momentum: 0.000000
|
104 |
+
2023-10-10 22:51:20,820 epoch 1 - iter 1246/1786 - loss 1.46922238 - time (sec): 393.22 - samples/sec: 438.43 - lr: 0.000105 - momentum: 0.000000
|
105 |
+
2023-10-10 22:52:17,591 epoch 1 - iter 1424/1786 - loss 1.33149873 - time (sec): 449.99 - samples/sec: 439.23 - lr: 0.000120 - momentum: 0.000000
|
106 |
+
2023-10-10 22:53:14,658 epoch 1 - iter 1602/1786 - loss 1.21920691 - time (sec): 507.06 - samples/sec: 440.59 - lr: 0.000134 - momentum: 0.000000
|
107 |
+
2023-10-10 22:54:11,490 epoch 1 - iter 1780/1786 - loss 1.13034640 - time (sec): 563.89 - samples/sec: 439.76 - lr: 0.000149 - momentum: 0.000000
|
108 |
+
2023-10-10 22:54:13,204 ----------------------------------------------------------------------------------------------------
|
109 |
+
2023-10-10 22:54:13,205 EPOCH 1 done: loss 1.1277 - lr: 0.000149
|
110 |
+
2023-10-10 22:54:33,067 DEV : loss 0.23323342204093933 - f1-score (micro avg) 0.4173
|
111 |
+
2023-10-10 22:54:33,096 saving best model
|
112 |
+
2023-10-10 22:54:33,952 ----------------------------------------------------------------------------------------------------
|
113 |
+
2023-10-10 22:55:30,220 epoch 2 - iter 178/1786 - loss 0.24102985 - time (sec): 56.27 - samples/sec: 470.07 - lr: 0.000148 - momentum: 0.000000
|
114 |
+
2023-10-10 22:56:23,566 epoch 2 - iter 356/1786 - loss 0.23283162 - time (sec): 109.61 - samples/sec: 460.21 - lr: 0.000147 - momentum: 0.000000
|
115 |
+
2023-10-10 22:57:18,800 epoch 2 - iter 534/1786 - loss 0.21609252 - time (sec): 164.85 - samples/sec: 450.65 - lr: 0.000145 - momentum: 0.000000
|
116 |
+
2023-10-10 22:58:13,255 epoch 2 - iter 712/1786 - loss 0.20093014 - time (sec): 219.30 - samples/sec: 452.98 - lr: 0.000143 - momentum: 0.000000
|
117 |
+
2023-10-10 22:59:08,194 epoch 2 - iter 890/1786 - loss 0.18923384 - time (sec): 274.24 - samples/sec: 452.74 - lr: 0.000142 - momentum: 0.000000
|
118 |
+
2023-10-10 23:00:01,459 epoch 2 - iter 1068/1786 - loss 0.18291744 - time (sec): 327.50 - samples/sec: 452.31 - lr: 0.000140 - momentum: 0.000000
|
119 |
+
2023-10-10 23:00:56,471 epoch 2 - iter 1246/1786 - loss 0.17526909 - time (sec): 382.52 - samples/sec: 453.63 - lr: 0.000138 - momentum: 0.000000
|
120 |
+
2023-10-10 23:01:52,071 epoch 2 - iter 1424/1786 - loss 0.16882287 - time (sec): 438.12 - samples/sec: 455.71 - lr: 0.000137 - momentum: 0.000000
|
121 |
+
2023-10-10 23:02:46,754 epoch 2 - iter 1602/1786 - loss 0.16338648 - time (sec): 492.80 - samples/sec: 453.92 - lr: 0.000135 - momentum: 0.000000
|
122 |
+
2023-10-10 23:03:41,905 epoch 2 - iter 1780/1786 - loss 0.15840008 - time (sec): 547.95 - samples/sec: 452.81 - lr: 0.000133 - momentum: 0.000000
|
123 |
+
2023-10-10 23:03:43,567 ----------------------------------------------------------------------------------------------------
|
124 |
+
2023-10-10 23:03:43,567 EPOCH 2 done: loss 0.1584 - lr: 0.000133
|
125 |
+
2023-10-10 23:04:05,708 DEV : loss 0.11548721790313721 - f1-score (micro avg) 0.7577
|
126 |
+
2023-10-10 23:04:05,741 saving best model
|
127 |
+
2023-10-10 23:04:15,609 ----------------------------------------------------------------------------------------------------
|
128 |
+
2023-10-10 23:05:10,541 epoch 3 - iter 178/1786 - loss 0.08136549 - time (sec): 54.93 - samples/sec: 434.94 - lr: 0.000132 - momentum: 0.000000
|
129 |
+
2023-10-10 23:06:06,807 epoch 3 - iter 356/1786 - loss 0.07652555 - time (sec): 111.19 - samples/sec: 444.12 - lr: 0.000130 - momentum: 0.000000
|
130 |
+
2023-10-10 23:07:00,995 epoch 3 - iter 534/1786 - loss 0.08122126 - time (sec): 165.38 - samples/sec: 447.71 - lr: 0.000128 - momentum: 0.000000
|
131 |
+
2023-10-10 23:07:56,758 epoch 3 - iter 712/1786 - loss 0.08475757 - time (sec): 221.15 - samples/sec: 440.39 - lr: 0.000127 - momentum: 0.000000
|
132 |
+
2023-10-10 23:08:52,555 epoch 3 - iter 890/1786 - loss 0.08475656 - time (sec): 276.94 - samples/sec: 445.13 - lr: 0.000125 - momentum: 0.000000
|
133 |
+
2023-10-10 23:09:48,953 epoch 3 - iter 1068/1786 - loss 0.08234623 - time (sec): 333.34 - samples/sec: 444.34 - lr: 0.000123 - momentum: 0.000000
|
134 |
+
2023-10-10 23:10:44,473 epoch 3 - iter 1246/1786 - loss 0.07964466 - time (sec): 388.86 - samples/sec: 444.66 - lr: 0.000122 - momentum: 0.000000
|
135 |
+
2023-10-10 23:11:41,277 epoch 3 - iter 1424/1786 - loss 0.07944389 - time (sec): 445.66 - samples/sec: 445.15 - lr: 0.000120 - momentum: 0.000000
|
136 |
+
2023-10-10 23:12:37,418 epoch 3 - iter 1602/1786 - loss 0.07901652 - time (sec): 501.80 - samples/sec: 448.61 - lr: 0.000118 - momentum: 0.000000
|
137 |
+
2023-10-10 23:13:29,629 epoch 3 - iter 1780/1786 - loss 0.07923333 - time (sec): 554.02 - samples/sec: 447.67 - lr: 0.000117 - momentum: 0.000000
|
138 |
+
2023-10-10 23:13:31,248 ----------------------------------------------------------------------------------------------------
|
139 |
+
2023-10-10 23:13:31,248 EPOCH 3 done: loss 0.0792 - lr: 0.000117
|
140 |
+
2023-10-10 23:13:52,415 DEV : loss 0.1301306039094925 - f1-score (micro avg) 0.7635
|
141 |
+
2023-10-10 23:13:52,446 saving best model
|
142 |
+
2023-10-10 23:13:59,901 ----------------------------------------------------------------------------------------------------
|
143 |
+
2023-10-10 23:14:57,003 epoch 4 - iter 178/1786 - loss 0.05275662 - time (sec): 57.10 - samples/sec: 436.39 - lr: 0.000115 - momentum: 0.000000
|
144 |
+
2023-10-10 23:15:52,667 epoch 4 - iter 356/1786 - loss 0.05729787 - time (sec): 112.76 - samples/sec: 436.43 - lr: 0.000113 - momentum: 0.000000
|
145 |
+
2023-10-10 23:16:49,319 epoch 4 - iter 534/1786 - loss 0.05946890 - time (sec): 169.41 - samples/sec: 435.58 - lr: 0.000112 - momentum: 0.000000
|
146 |
+
2023-10-10 23:17:46,635 epoch 4 - iter 712/1786 - loss 0.06229628 - time (sec): 226.73 - samples/sec: 437.60 - lr: 0.000110 - momentum: 0.000000
|
147 |
+
2023-10-10 23:18:44,994 epoch 4 - iter 890/1786 - loss 0.05943268 - time (sec): 285.09 - samples/sec: 440.37 - lr: 0.000108 - momentum: 0.000000
|
148 |
+
2023-10-10 23:19:41,425 epoch 4 - iter 1068/1786 - loss 0.05885487 - time (sec): 341.52 - samples/sec: 441.36 - lr: 0.000107 - momentum: 0.000000
|
149 |
+
2023-10-10 23:20:39,891 epoch 4 - iter 1246/1786 - loss 0.05697691 - time (sec): 399.99 - samples/sec: 442.67 - lr: 0.000105 - momentum: 0.000000
|
150 |
+
2023-10-10 23:21:37,350 epoch 4 - iter 1424/1786 - loss 0.05690202 - time (sec): 457.45 - samples/sec: 440.56 - lr: 0.000103 - momentum: 0.000000
|
151 |
+
2023-10-10 23:22:34,147 epoch 4 - iter 1602/1786 - loss 0.05713425 - time (sec): 514.24 - samples/sec: 437.45 - lr: 0.000102 - momentum: 0.000000
|
152 |
+
2023-10-10 23:23:29,165 epoch 4 - iter 1780/1786 - loss 0.05724047 - time (sec): 569.26 - samples/sec: 435.87 - lr: 0.000100 - momentum: 0.000000
|
153 |
+
2023-10-10 23:23:30,870 ----------------------------------------------------------------------------------------------------
|
154 |
+
2023-10-10 23:23:30,870 EPOCH 4 done: loss 0.0573 - lr: 0.000100
|
155 |
+
2023-10-10 23:23:54,716 DEV : loss 0.15049181878566742 - f1-score (micro avg) 0.7765
|
156 |
+
2023-10-10 23:23:54,760 saving best model
|
157 |
+
2023-10-10 23:23:58,096 ----------------------------------------------------------------------------------------------------
|
158 |
+
2023-10-10 23:24:54,936 epoch 5 - iter 178/1786 - loss 0.03945234 - time (sec): 56.84 - samples/sec: 447.40 - lr: 0.000098 - momentum: 0.000000
|
159 |
+
2023-10-10 23:25:50,552 epoch 5 - iter 356/1786 - loss 0.04091510 - time (sec): 112.45 - samples/sec: 432.68 - lr: 0.000097 - momentum: 0.000000
|
160 |
+
2023-10-10 23:26:46,765 epoch 5 - iter 534/1786 - loss 0.03892648 - time (sec): 168.66 - samples/sec: 441.54 - lr: 0.000095 - momentum: 0.000000
|
161 |
+
2023-10-10 23:27:42,683 epoch 5 - iter 712/1786 - loss 0.04293506 - time (sec): 224.58 - samples/sec: 448.27 - lr: 0.000093 - momentum: 0.000000
|
162 |
+
2023-10-10 23:28:33,681 epoch 5 - iter 890/1786 - loss 0.04244048 - time (sec): 275.58 - samples/sec: 448.05 - lr: 0.000092 - momentum: 0.000000
|
163 |
+
2023-10-10 23:29:27,510 epoch 5 - iter 1068/1786 - loss 0.04236998 - time (sec): 329.41 - samples/sec: 447.40 - lr: 0.000090 - momentum: 0.000000
|
164 |
+
2023-10-10 23:30:20,804 epoch 5 - iter 1246/1786 - loss 0.04272781 - time (sec): 382.70 - samples/sec: 450.66 - lr: 0.000088 - momentum: 0.000000
|
165 |
+
2023-10-10 23:31:16,093 epoch 5 - iter 1424/1786 - loss 0.04290125 - time (sec): 437.99 - samples/sec: 452.49 - lr: 0.000087 - momentum: 0.000000
|
166 |
+
2023-10-10 23:32:09,690 epoch 5 - iter 1602/1786 - loss 0.04192998 - time (sec): 491.59 - samples/sec: 453.80 - lr: 0.000085 - momentum: 0.000000
|
167 |
+
2023-10-10 23:33:03,194 epoch 5 - iter 1780/1786 - loss 0.04145570 - time (sec): 545.09 - samples/sec: 455.03 - lr: 0.000083 - momentum: 0.000000
|
168 |
+
2023-10-10 23:33:04,778 ----------------------------------------------------------------------------------------------------
|
169 |
+
2023-10-10 23:33:04,779 EPOCH 5 done: loss 0.0415 - lr: 0.000083
|
170 |
+
2023-10-10 23:33:26,148 DEV : loss 0.1764456331729889 - f1-score (micro avg) 0.7759
|
171 |
+
2023-10-10 23:33:26,179 ----------------------------------------------------------------------------------------------------
|
172 |
+
2023-10-10 23:34:19,998 epoch 6 - iter 178/1786 - loss 0.02795317 - time (sec): 53.82 - samples/sec: 463.22 - lr: 0.000082 - momentum: 0.000000
|
173 |
+
2023-10-10 23:35:15,410 epoch 6 - iter 356/1786 - loss 0.02843129 - time (sec): 109.23 - samples/sec: 453.52 - lr: 0.000080 - momentum: 0.000000
|
174 |
+
2023-10-10 23:36:11,923 epoch 6 - iter 534/1786 - loss 0.02722917 - time (sec): 165.74 - samples/sec: 453.47 - lr: 0.000078 - momentum: 0.000000
|
175 |
+
2023-10-10 23:37:06,711 epoch 6 - iter 712/1786 - loss 0.02848990 - time (sec): 220.53 - samples/sec: 449.83 - lr: 0.000077 - momentum: 0.000000
|
176 |
+
2023-10-10 23:38:02,420 epoch 6 - iter 890/1786 - loss 0.02740472 - time (sec): 276.24 - samples/sec: 445.37 - lr: 0.000075 - momentum: 0.000000
|
177 |
+
2023-10-10 23:38:58,416 epoch 6 - iter 1068/1786 - loss 0.02716161 - time (sec): 332.23 - samples/sec: 445.09 - lr: 0.000073 - momentum: 0.000000
|
178 |
+
2023-10-10 23:39:56,084 epoch 6 - iter 1246/1786 - loss 0.02693100 - time (sec): 389.90 - samples/sec: 446.84 - lr: 0.000072 - momentum: 0.000000
|
179 |
+
2023-10-10 23:40:51,935 epoch 6 - iter 1424/1786 - loss 0.02765192 - time (sec): 445.75 - samples/sec: 446.68 - lr: 0.000070 - momentum: 0.000000
|
180 |
+
2023-10-10 23:41:49,667 epoch 6 - iter 1602/1786 - loss 0.02865346 - time (sec): 503.49 - samples/sec: 446.24 - lr: 0.000068 - momentum: 0.000000
|
181 |
+
2023-10-10 23:42:43,134 epoch 6 - iter 1780/1786 - loss 0.02926603 - time (sec): 556.95 - samples/sec: 445.33 - lr: 0.000067 - momentum: 0.000000
|
182 |
+
2023-10-10 23:42:44,853 ----------------------------------------------------------------------------------------------------
|
183 |
+
2023-10-10 23:42:44,853 EPOCH 6 done: loss 0.0292 - lr: 0.000067
|
184 |
+
2023-10-10 23:43:07,588 DEV : loss 0.18137316405773163 - f1-score (micro avg) 0.7884
|
185 |
+
2023-10-10 23:43:07,619 saving best model
|
186 |
+
2023-10-10 23:43:10,492 ----------------------------------------------------------------------------------------------------
|
187 |
+
2023-10-10 23:44:05,615 epoch 7 - iter 178/1786 - loss 0.01383948 - time (sec): 55.12 - samples/sec: 460.35 - lr: 0.000065 - momentum: 0.000000
|
188 |
+
2023-10-10 23:44:59,535 epoch 7 - iter 356/1786 - loss 0.01746561 - time (sec): 109.04 - samples/sec: 446.78 - lr: 0.000063 - momentum: 0.000000
|
189 |
+
2023-10-10 23:45:54,596 epoch 7 - iter 534/1786 - loss 0.01663426 - time (sec): 164.10 - samples/sec: 452.10 - lr: 0.000062 - momentum: 0.000000
|
190 |
+
2023-10-10 23:46:48,264 epoch 7 - iter 712/1786 - loss 0.01902346 - time (sec): 217.77 - samples/sec: 454.40 - lr: 0.000060 - momentum: 0.000000
|
191 |
+
2023-10-10 23:47:41,899 epoch 7 - iter 890/1786 - loss 0.02013304 - time (sec): 271.40 - samples/sec: 453.06 - lr: 0.000058 - momentum: 0.000000
|
192 |
+
2023-10-10 23:48:36,745 epoch 7 - iter 1068/1786 - loss 0.01908802 - time (sec): 326.25 - samples/sec: 454.51 - lr: 0.000057 - momentum: 0.000000
|
193 |
+
2023-10-10 23:49:31,014 epoch 7 - iter 1246/1786 - loss 0.02158022 - time (sec): 380.52 - samples/sec: 455.03 - lr: 0.000055 - momentum: 0.000000
|
194 |
+
2023-10-10 23:50:23,849 epoch 7 - iter 1424/1786 - loss 0.02132979 - time (sec): 433.35 - samples/sec: 453.35 - lr: 0.000053 - momentum: 0.000000
|
195 |
+
2023-10-10 23:51:17,892 epoch 7 - iter 1602/1786 - loss 0.02171688 - time (sec): 487.40 - samples/sec: 457.34 - lr: 0.000052 - momentum: 0.000000
|
196 |
+
2023-10-10 23:52:11,674 epoch 7 - iter 1780/1786 - loss 0.02202637 - time (sec): 541.18 - samples/sec: 458.44 - lr: 0.000050 - momentum: 0.000000
|
197 |
+
2023-10-10 23:52:13,249 ----------------------------------------------------------------------------------------------------
|
198 |
+
2023-10-10 23:52:13,249 EPOCH 7 done: loss 0.0222 - lr: 0.000050
|
199 |
+
2023-10-10 23:52:35,192 DEV : loss 0.19753895699977875 - f1-score (micro avg) 0.788
|
200 |
+
2023-10-10 23:52:35,227 ----------------------------------------------------------------------------------------------------
|
201 |
+
2023-10-10 23:53:28,735 epoch 8 - iter 178/1786 - loss 0.01237478 - time (sec): 53.51 - samples/sec: 458.92 - lr: 0.000048 - momentum: 0.000000
|
202 |
+
2023-10-10 23:54:21,786 epoch 8 - iter 356/1786 - loss 0.01268093 - time (sec): 106.56 - samples/sec: 455.22 - lr: 0.000047 - momentum: 0.000000
|
203 |
+
2023-10-10 23:55:14,892 epoch 8 - iter 534/1786 - loss 0.01457889 - time (sec): 159.66 - samples/sec: 451.29 - lr: 0.000045 - momentum: 0.000000
|
204 |
+
2023-10-10 23:56:10,090 epoch 8 - iter 712/1786 - loss 0.01501835 - time (sec): 214.86 - samples/sec: 456.91 - lr: 0.000043 - momentum: 0.000000
|
205 |
+
2023-10-10 23:57:03,967 epoch 8 - iter 890/1786 - loss 0.01553035 - time (sec): 268.74 - samples/sec: 456.57 - lr: 0.000042 - momentum: 0.000000
|
206 |
+
2023-10-10 23:57:57,805 epoch 8 - iter 1068/1786 - loss 0.01603908 - time (sec): 322.58 - samples/sec: 452.35 - lr: 0.000040 - momentum: 0.000000
|
207 |
+
2023-10-10 23:58:51,476 epoch 8 - iter 1246/1786 - loss 0.01600935 - time (sec): 376.25 - samples/sec: 453.99 - lr: 0.000038 - momentum: 0.000000
|
208 |
+
2023-10-10 23:59:45,167 epoch 8 - iter 1424/1786 - loss 0.01580202 - time (sec): 429.94 - samples/sec: 453.91 - lr: 0.000037 - momentum: 0.000000
|
209 |
+
2023-10-11 00:00:39,583 epoch 8 - iter 1602/1786 - loss 0.01601921 - time (sec): 484.35 - samples/sec: 456.64 - lr: 0.000035 - momentum: 0.000000
|
210 |
+
2023-10-11 00:01:35,445 epoch 8 - iter 1780/1786 - loss 0.01577049 - time (sec): 540.22 - samples/sec: 458.60 - lr: 0.000033 - momentum: 0.000000
|
211 |
+
2023-10-11 00:01:37,335 ----------------------------------------------------------------------------------------------------
|
212 |
+
2023-10-11 00:01:37,336 EPOCH 8 done: loss 0.0159 - lr: 0.000033
|
213 |
+
2023-10-11 00:01:59,826 DEV : loss 0.21947550773620605 - f1-score (micro avg) 0.7661
|
214 |
+
2023-10-11 00:01:59,858 ----------------------------------------------------------------------------------------------------
|
215 |
+
2023-10-11 00:02:55,469 epoch 9 - iter 178/1786 - loss 0.01523368 - time (sec): 55.61 - samples/sec: 447.74 - lr: 0.000032 - momentum: 0.000000
|
216 |
+
2023-10-11 00:03:49,363 epoch 9 - iter 356/1786 - loss 0.01407707 - time (sec): 109.50 - samples/sec: 445.95 - lr: 0.000030 - momentum: 0.000000
|
217 |
+
2023-10-11 00:04:44,566 epoch 9 - iter 534/1786 - loss 0.01448829 - time (sec): 164.71 - samples/sec: 454.38 - lr: 0.000028 - momentum: 0.000000
|
218 |
+
2023-10-11 00:05:37,166 epoch 9 - iter 712/1786 - loss 0.01334062 - time (sec): 217.31 - samples/sec: 449.99 - lr: 0.000027 - momentum: 0.000000
|
219 |
+
2023-10-11 00:06:31,408 epoch 9 - iter 890/1786 - loss 0.01253925 - time (sec): 271.55 - samples/sec: 448.63 - lr: 0.000025 - momentum: 0.000000
|
220 |
+
2023-10-11 00:07:27,023 epoch 9 - iter 1068/1786 - loss 0.01207849 - time (sec): 327.16 - samples/sec: 446.90 - lr: 0.000023 - momentum: 0.000000
|
221 |
+
2023-10-11 00:08:22,340 epoch 9 - iter 1246/1786 - loss 0.01195030 - time (sec): 382.48 - samples/sec: 445.36 - lr: 0.000022 - momentum: 0.000000
|
222 |
+
2023-10-11 00:09:18,578 epoch 9 - iter 1424/1786 - loss 0.01110923 - time (sec): 438.72 - samples/sec: 446.22 - lr: 0.000020 - momentum: 0.000000
|
223 |
+
2023-10-11 00:10:14,720 epoch 9 - iter 1602/1786 - loss 0.01105152 - time (sec): 494.86 - samples/sec: 446.93 - lr: 0.000018 - momentum: 0.000000
|
224 |
+
2023-10-11 00:11:11,874 epoch 9 - iter 1780/1786 - loss 0.01082556 - time (sec): 552.01 - samples/sec: 449.12 - lr: 0.000017 - momentum: 0.000000
|
225 |
+
2023-10-11 00:11:13,673 ----------------------------------------------------------------------------------------------------
|
226 |
+
2023-10-11 00:11:13,673 EPOCH 9 done: loss 0.0109 - lr: 0.000017
|
227 |
+
2023-10-11 00:11:35,988 DEV : loss 0.23203261196613312 - f1-score (micro avg) 0.7856
|
228 |
+
2023-10-11 00:11:36,018 ----------------------------------------------------------------------------------------------------
|
229 |
+
2023-10-11 00:12:32,054 epoch 10 - iter 178/1786 - loss 0.00835357 - time (sec): 56.03 - samples/sec: 450.39 - lr: 0.000015 - momentum: 0.000000
|
230 |
+
2023-10-11 00:13:26,270 epoch 10 - iter 356/1786 - loss 0.00922388 - time (sec): 110.25 - samples/sec: 446.76 - lr: 0.000013 - momentum: 0.000000
|
231 |
+
2023-10-11 00:14:18,625 epoch 10 - iter 534/1786 - loss 0.00920895 - time (sec): 162.60 - samples/sec: 444.35 - lr: 0.000012 - momentum: 0.000000
|
232 |
+
2023-10-11 00:15:12,635 epoch 10 - iter 712/1786 - loss 0.00888713 - time (sec): 216.61 - samples/sec: 455.30 - lr: 0.000010 - momentum: 0.000000
|
233 |
+
2023-10-11 00:16:07,255 epoch 10 - iter 890/1786 - loss 0.00878696 - time (sec): 271.23 - samples/sec: 460.18 - lr: 0.000008 - momentum: 0.000000
|
234 |
+
2023-10-11 00:16:59,266 epoch 10 - iter 1068/1786 - loss 0.00942608 - time (sec): 323.25 - samples/sec: 460.00 - lr: 0.000007 - momentum: 0.000000
|
235 |
+
2023-10-11 00:17:54,373 epoch 10 - iter 1246/1786 - loss 0.00924649 - time (sec): 378.35 - samples/sec: 462.85 - lr: 0.000005 - momentum: 0.000000
|
236 |
+
2023-10-11 00:18:47,015 epoch 10 - iter 1424/1786 - loss 0.01026711 - time (sec): 430.99 - samples/sec: 460.94 - lr: 0.000003 - momentum: 0.000000
|
237 |
+
2023-10-11 00:19:40,614 epoch 10 - iter 1602/1786 - loss 0.00973267 - time (sec): 484.59 - samples/sec: 459.29 - lr: 0.000002 - momentum: 0.000000
|
238 |
+
2023-10-11 00:20:34,899 epoch 10 - iter 1780/1786 - loss 0.00929539 - time (sec): 538.88 - samples/sec: 460.31 - lr: 0.000000 - momentum: 0.000000
|
239 |
+
2023-10-11 00:20:36,575 ----------------------------------------------------------------------------------------------------
|
240 |
+
2023-10-11 00:20:36,575 EPOCH 10 done: loss 0.0093 - lr: 0.000000
|
241 |
+
2023-10-11 00:20:58,741 DEV : loss 0.2327549308538437 - f1-score (micro avg) 0.783
|
242 |
+
2023-10-11 00:20:59,663 ----------------------------------------------------------------------------------------------------
|
243 |
+
2023-10-11 00:20:59,665 Loading model from best epoch ...
|
244 |
+
2023-10-11 00:21:03,556 SequenceTagger predicts: Dictionary with 17 tags: O, S-PER, B-PER, E-PER, I-PER, S-LOC, B-LOC, E-LOC, I-LOC, S-ORG, B-ORG, E-ORG, I-ORG, S-HumanProd, B-HumanProd, E-HumanProd, I-HumanProd
|
245 |
+
2023-10-11 00:22:14,198
|
246 |
+
Results:
|
247 |
+
- F-score (micro) 0.7025
|
248 |
+
- F-score (macro) 0.6076
|
249 |
+
- Accuracy 0.5549
|
250 |
+
|
251 |
+
By class:
|
252 |
+
precision recall f1-score support
|
253 |
+
|
254 |
+
LOC 0.6983 0.7123 0.7052 1095
|
255 |
+
PER 0.7871 0.7816 0.7843 1012
|
256 |
+
ORG 0.4790 0.5434 0.5092 357
|
257 |
+
HumanProd 0.3455 0.5758 0.4318 33
|
258 |
+
|
259 |
+
micro avg 0.6909 0.7145 0.7025 2497
|
260 |
+
macro avg 0.5775 0.6533 0.6076 2497
|
261 |
+
weighted avg 0.6983 0.7145 0.7057 2497
|
262 |
+
|
263 |
+
2023-10-11 00:22:14,198 ----------------------------------------------------------------------------------------------------
|