|
2023-10-24 16:04:44,877 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:04:44,878 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=13, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-24 16:04:44,878 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:04:44,878 MultiCorpus: 7936 train + 992 dev + 992 test sentences |
|
- NER_ICDAR_EUROPEANA Corpus: 7936 train + 992 dev + 992 test sentences - /home/ubuntu/.flair/datasets/ner_icdar_europeana/fr |
|
2023-10-24 16:04:44,878 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:04:44,878 Train: 7936 sentences |
|
2023-10-24 16:04:44,878 (train_with_dev=False, train_with_test=False) |
|
2023-10-24 16:04:44,879 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:04:44,879 Training Params: |
|
2023-10-24 16:04:44,879 - learning_rate: "5e-05" |
|
2023-10-24 16:04:44,879 - mini_batch_size: "4" |
|
2023-10-24 16:04:44,879 - max_epochs: "10" |
|
2023-10-24 16:04:44,879 - shuffle: "True" |
|
2023-10-24 16:04:44,879 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:04:44,879 Plugins: |
|
2023-10-24 16:04:44,879 - TensorboardLogger |
|
2023-10-24 16:04:44,879 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-24 16:04:44,879 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:04:44,879 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-24 16:04:44,879 - metric: "('micro avg', 'f1-score')" |
|
2023-10-24 16:04:44,879 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:04:44,879 Computation: |
|
2023-10-24 16:04:44,879 - compute on device: cuda:0 |
|
2023-10-24 16:04:44,879 - embedding storage: none |
|
2023-10-24 16:04:44,879 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:04:44,879 Model training base path: "hmbench-icdar/fr-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr5e-05-poolingfirst-layers-1-crfFalse-2" |
|
2023-10-24 16:04:44,879 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:04:44,879 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:04:44,879 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-24 16:04:56,741 epoch 1 - iter 198/1984 - loss 1.30874305 - time (sec): 11.86 - samples/sec: 1327.48 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 16:05:08,929 epoch 1 - iter 396/1984 - loss 0.77290785 - time (sec): 24.05 - samples/sec: 1366.71 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 16:05:21,110 epoch 1 - iter 594/1984 - loss 0.57572628 - time (sec): 36.23 - samples/sec: 1376.56 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 16:05:33,378 epoch 1 - iter 792/1984 - loss 0.46495084 - time (sec): 48.50 - samples/sec: 1393.71 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 16:05:45,281 epoch 1 - iter 990/1984 - loss 0.40496031 - time (sec): 60.40 - samples/sec: 1372.95 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 16:05:57,339 epoch 1 - iter 1188/1984 - loss 0.35918387 - time (sec): 72.46 - samples/sec: 1368.87 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 16:06:09,207 epoch 1 - iter 1386/1984 - loss 0.33254399 - time (sec): 84.33 - samples/sec: 1357.26 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-24 16:06:21,228 epoch 1 - iter 1584/1984 - loss 0.30875914 - time (sec): 96.35 - samples/sec: 1352.95 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-24 16:06:33,559 epoch 1 - iter 1782/1984 - loss 0.28915180 - time (sec): 108.68 - samples/sec: 1356.10 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-24 16:06:45,621 epoch 1 - iter 1980/1984 - loss 0.27550886 - time (sec): 120.74 - samples/sec: 1353.99 - lr: 0.000050 - momentum: 0.000000 |
|
2023-10-24 16:06:45,883 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:06:45,883 EPOCH 1 done: loss 0.2751 - lr: 0.000050 |
|
2023-10-24 16:06:48,984 DEV : loss 0.1138407364487648 - f1-score (micro avg) 0.66 |
|
2023-10-24 16:06:48,999 saving best model |
|
2023-10-24 16:06:49,463 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:07:01,531 epoch 2 - iter 198/1984 - loss 0.11655675 - time (sec): 12.07 - samples/sec: 1378.59 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-24 16:07:13,599 epoch 2 - iter 396/1984 - loss 0.12768765 - time (sec): 24.13 - samples/sec: 1366.33 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-24 16:07:25,662 epoch 2 - iter 594/1984 - loss 0.12928172 - time (sec): 36.20 - samples/sec: 1354.01 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-24 16:07:37,968 epoch 2 - iter 792/1984 - loss 0.12515423 - time (sec): 48.50 - samples/sec: 1358.05 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-24 16:07:50,011 epoch 2 - iter 990/1984 - loss 0.12468271 - time (sec): 60.55 - samples/sec: 1352.43 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-24 16:08:02,219 epoch 2 - iter 1188/1984 - loss 0.12585547 - time (sec): 72.76 - samples/sec: 1350.88 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-24 16:08:14,508 epoch 2 - iter 1386/1984 - loss 0.12536731 - time (sec): 85.04 - samples/sec: 1356.27 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-24 16:08:26,927 epoch 2 - iter 1584/1984 - loss 0.12683513 - time (sec): 97.46 - samples/sec: 1354.08 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-24 16:08:38,923 epoch 2 - iter 1782/1984 - loss 0.12567024 - time (sec): 109.46 - samples/sec: 1349.32 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-24 16:08:50,905 epoch 2 - iter 1980/1984 - loss 0.12355541 - time (sec): 121.44 - samples/sec: 1348.76 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-24 16:08:51,136 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:08:51,136 EPOCH 2 done: loss 0.1236 - lr: 0.000044 |
|
2023-10-24 16:08:54,244 DEV : loss 0.11781438440084457 - f1-score (micro avg) 0.7224 |
|
2023-10-24 16:08:54,259 saving best model |
|
2023-10-24 16:08:54,870 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:09:07,592 epoch 3 - iter 198/1984 - loss 0.09570974 - time (sec): 12.72 - samples/sec: 1320.29 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-24 16:09:19,560 epoch 3 - iter 396/1984 - loss 0.09234026 - time (sec): 24.69 - samples/sec: 1318.45 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-24 16:09:31,705 epoch 3 - iter 594/1984 - loss 0.09370577 - time (sec): 36.83 - samples/sec: 1341.85 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-24 16:09:43,846 epoch 3 - iter 792/1984 - loss 0.09168960 - time (sec): 48.97 - samples/sec: 1353.86 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-24 16:09:55,897 epoch 3 - iter 990/1984 - loss 0.09458487 - time (sec): 61.03 - samples/sec: 1343.19 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-24 16:10:08,031 epoch 3 - iter 1188/1984 - loss 0.09320143 - time (sec): 73.16 - samples/sec: 1339.84 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-24 16:10:20,096 epoch 3 - iter 1386/1984 - loss 0.09112387 - time (sec): 85.22 - samples/sec: 1342.96 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-24 16:10:32,057 epoch 3 - iter 1584/1984 - loss 0.09073603 - time (sec): 97.19 - samples/sec: 1343.93 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-24 16:10:44,110 epoch 3 - iter 1782/1984 - loss 0.09058074 - time (sec): 109.24 - samples/sec: 1344.60 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-24 16:10:56,367 epoch 3 - iter 1980/1984 - loss 0.09067995 - time (sec): 121.50 - samples/sec: 1347.52 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-24 16:10:56,599 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:10:56,599 EPOCH 3 done: loss 0.0906 - lr: 0.000039 |
|
2023-10-24 16:10:59,703 DEV : loss 0.1346270591020584 - f1-score (micro avg) 0.7411 |
|
2023-10-24 16:10:59,718 saving best model |
|
2023-10-24 16:11:00,312 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:11:12,521 epoch 4 - iter 198/1984 - loss 0.05402007 - time (sec): 12.21 - samples/sec: 1388.06 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-24 16:11:24,580 epoch 4 - iter 396/1984 - loss 0.06102748 - time (sec): 24.27 - samples/sec: 1343.10 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-24 16:11:36,914 epoch 4 - iter 594/1984 - loss 0.06492659 - time (sec): 36.60 - samples/sec: 1366.14 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-24 16:11:48,948 epoch 4 - iter 792/1984 - loss 0.06649210 - time (sec): 48.63 - samples/sec: 1356.46 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-24 16:12:01,047 epoch 4 - iter 990/1984 - loss 0.06862370 - time (sec): 60.73 - samples/sec: 1355.00 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-24 16:12:13,286 epoch 4 - iter 1188/1984 - loss 0.06836085 - time (sec): 72.97 - samples/sec: 1355.36 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-24 16:12:25,253 epoch 4 - iter 1386/1984 - loss 0.06765039 - time (sec): 84.94 - samples/sec: 1349.99 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-24 16:12:37,532 epoch 4 - iter 1584/1984 - loss 0.07146222 - time (sec): 97.22 - samples/sec: 1348.16 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-24 16:12:49,618 epoch 4 - iter 1782/1984 - loss 0.07238990 - time (sec): 109.30 - samples/sec: 1349.96 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-24 16:13:01,723 epoch 4 - iter 1980/1984 - loss 0.07174639 - time (sec): 121.41 - samples/sec: 1348.26 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-24 16:13:01,960 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:13:01,960 EPOCH 4 done: loss 0.0716 - lr: 0.000033 |
|
2023-10-24 16:13:05,387 DEV : loss 0.1819346696138382 - f1-score (micro avg) 0.7121 |
|
2023-10-24 16:13:05,402 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:13:17,704 epoch 5 - iter 198/1984 - loss 0.04521262 - time (sec): 12.30 - samples/sec: 1372.08 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-24 16:13:29,758 epoch 5 - iter 396/1984 - loss 0.04965805 - time (sec): 24.35 - samples/sec: 1333.01 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-24 16:13:42,101 epoch 5 - iter 594/1984 - loss 0.05396032 - time (sec): 36.70 - samples/sec: 1348.68 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-24 16:13:54,145 epoch 5 - iter 792/1984 - loss 0.05332345 - time (sec): 48.74 - samples/sec: 1336.01 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-24 16:14:06,219 epoch 5 - iter 990/1984 - loss 0.05289847 - time (sec): 60.82 - samples/sec: 1333.40 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-24 16:14:18,370 epoch 5 - iter 1188/1984 - loss 0.05257156 - time (sec): 72.97 - samples/sec: 1340.50 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 16:14:30,344 epoch 5 - iter 1386/1984 - loss 0.05371135 - time (sec): 84.94 - samples/sec: 1336.50 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 16:14:42,427 epoch 5 - iter 1584/1984 - loss 0.05289029 - time (sec): 97.02 - samples/sec: 1335.24 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 16:14:54,750 epoch 5 - iter 1782/1984 - loss 0.05253860 - time (sec): 109.35 - samples/sec: 1344.21 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 16:15:06,900 epoch 5 - iter 1980/1984 - loss 0.05342353 - time (sec): 121.50 - samples/sec: 1347.29 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 16:15:07,145 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:15:07,145 EPOCH 5 done: loss 0.0534 - lr: 0.000028 |
|
2023-10-24 16:15:10,265 DEV : loss 0.1831832379102707 - f1-score (micro avg) 0.7547 |
|
2023-10-24 16:15:10,281 saving best model |
|
2023-10-24 16:15:10,863 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:15:22,992 epoch 6 - iter 198/1984 - loss 0.04237849 - time (sec): 12.13 - samples/sec: 1337.47 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 16:15:35,132 epoch 6 - iter 396/1984 - loss 0.04244041 - time (sec): 24.27 - samples/sec: 1330.21 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 16:15:47,336 epoch 6 - iter 594/1984 - loss 0.04119784 - time (sec): 36.47 - samples/sec: 1321.23 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 16:15:59,255 epoch 6 - iter 792/1984 - loss 0.03907748 - time (sec): 48.39 - samples/sec: 1322.25 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 16:16:11,471 epoch 6 - iter 990/1984 - loss 0.03932677 - time (sec): 60.61 - samples/sec: 1329.87 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 16:16:23,682 epoch 6 - iter 1188/1984 - loss 0.03914299 - time (sec): 72.82 - samples/sec: 1346.94 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 16:16:35,824 epoch 6 - iter 1386/1984 - loss 0.03960027 - time (sec): 84.96 - samples/sec: 1346.44 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 16:16:47,954 epoch 6 - iter 1584/1984 - loss 0.03955263 - time (sec): 97.09 - samples/sec: 1343.36 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 16:17:00,498 epoch 6 - iter 1782/1984 - loss 0.03906932 - time (sec): 109.63 - samples/sec: 1338.19 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 16:17:12,589 epoch 6 - iter 1980/1984 - loss 0.03897848 - time (sec): 121.73 - samples/sec: 1344.46 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 16:17:12,829 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:17:12,829 EPOCH 6 done: loss 0.0389 - lr: 0.000022 |
|
2023-10-24 16:17:15,940 DEV : loss 0.19957636296749115 - f1-score (micro avg) 0.7514 |
|
2023-10-24 16:17:15,955 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:17:28,168 epoch 7 - iter 198/1984 - loss 0.02502948 - time (sec): 12.21 - samples/sec: 1380.74 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 16:17:40,287 epoch 7 - iter 396/1984 - loss 0.02588075 - time (sec): 24.33 - samples/sec: 1401.14 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 16:17:52,407 epoch 7 - iter 594/1984 - loss 0.02615491 - time (sec): 36.45 - samples/sec: 1368.78 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 16:18:04,399 epoch 7 - iter 792/1984 - loss 0.02913086 - time (sec): 48.44 - samples/sec: 1355.33 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 16:18:16,592 epoch 7 - iter 990/1984 - loss 0.02832750 - time (sec): 60.64 - samples/sec: 1358.80 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 16:18:28,567 epoch 7 - iter 1188/1984 - loss 0.02868687 - time (sec): 72.61 - samples/sec: 1354.60 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 16:18:40,812 epoch 7 - iter 1386/1984 - loss 0.02776447 - time (sec): 84.86 - samples/sec: 1358.22 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 16:18:53,016 epoch 7 - iter 1584/1984 - loss 0.02756283 - time (sec): 97.06 - samples/sec: 1358.28 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 16:19:05,288 epoch 7 - iter 1782/1984 - loss 0.02799328 - time (sec): 109.33 - samples/sec: 1357.02 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 16:19:17,165 epoch 7 - iter 1980/1984 - loss 0.02831462 - time (sec): 121.21 - samples/sec: 1350.06 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 16:19:17,411 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:19:17,411 EPOCH 7 done: loss 0.0285 - lr: 0.000017 |
|
2023-10-24 16:19:20,522 DEV : loss 0.2192317098379135 - f1-score (micro avg) 0.752 |
|
2023-10-24 16:19:20,537 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:19:32,740 epoch 8 - iter 198/1984 - loss 0.01571927 - time (sec): 12.20 - samples/sec: 1352.95 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 16:19:45,022 epoch 8 - iter 396/1984 - loss 0.02057122 - time (sec): 24.48 - samples/sec: 1364.10 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 16:19:57,061 epoch 8 - iter 594/1984 - loss 0.02018865 - time (sec): 36.52 - samples/sec: 1354.89 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 16:20:09,359 epoch 8 - iter 792/1984 - loss 0.01942023 - time (sec): 48.82 - samples/sec: 1337.22 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 16:20:21,446 epoch 8 - iter 990/1984 - loss 0.01908064 - time (sec): 60.91 - samples/sec: 1341.88 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 16:20:33,588 epoch 8 - iter 1188/1984 - loss 0.01893982 - time (sec): 73.05 - samples/sec: 1344.69 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 16:20:45,658 epoch 8 - iter 1386/1984 - loss 0.01968258 - time (sec): 85.12 - samples/sec: 1345.13 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 16:20:57,835 epoch 8 - iter 1584/1984 - loss 0.01929883 - time (sec): 97.30 - samples/sec: 1347.10 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 16:21:09,932 epoch 8 - iter 1782/1984 - loss 0.01879306 - time (sec): 109.39 - samples/sec: 1350.71 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 16:21:22,126 epoch 8 - iter 1980/1984 - loss 0.01895222 - time (sec): 121.59 - samples/sec: 1346.49 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 16:21:22,360 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:21:22,360 EPOCH 8 done: loss 0.0189 - lr: 0.000011 |
|
2023-10-24 16:21:25,487 DEV : loss 0.22360068559646606 - f1-score (micro avg) 0.7521 |
|
2023-10-24 16:21:25,502 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:21:37,492 epoch 9 - iter 198/1984 - loss 0.01775440 - time (sec): 11.99 - samples/sec: 1336.38 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 16:21:49,622 epoch 9 - iter 396/1984 - loss 0.01720248 - time (sec): 24.12 - samples/sec: 1342.89 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 16:22:01,654 epoch 9 - iter 594/1984 - loss 0.01399946 - time (sec): 36.15 - samples/sec: 1334.74 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 16:22:13,714 epoch 9 - iter 792/1984 - loss 0.01448509 - time (sec): 48.21 - samples/sec: 1341.70 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 16:22:26,082 epoch 9 - iter 990/1984 - loss 0.01346876 - time (sec): 60.58 - samples/sec: 1345.87 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 16:22:38,079 epoch 9 - iter 1188/1984 - loss 0.01320978 - time (sec): 72.58 - samples/sec: 1341.53 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 16:22:50,327 epoch 9 - iter 1386/1984 - loss 0.01288739 - time (sec): 84.82 - samples/sec: 1337.84 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 16:23:02,743 epoch 9 - iter 1584/1984 - loss 0.01244493 - time (sec): 97.24 - samples/sec: 1346.31 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 16:23:14,948 epoch 9 - iter 1782/1984 - loss 0.01309784 - time (sec): 109.44 - samples/sec: 1352.32 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 16:23:27,037 epoch 9 - iter 1980/1984 - loss 0.01325666 - time (sec): 121.53 - samples/sec: 1347.10 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 16:23:27,275 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:23:27,275 EPOCH 9 done: loss 0.0132 - lr: 0.000006 |
|
2023-10-24 16:23:30,725 DEV : loss 0.23320023715496063 - f1-score (micro avg) 0.7554 |
|
2023-10-24 16:23:30,740 saving best model |
|
2023-10-24 16:23:31,359 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:23:43,428 epoch 10 - iter 198/1984 - loss 0.01269146 - time (sec): 12.07 - samples/sec: 1382.28 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 16:23:55,427 epoch 10 - iter 396/1984 - loss 0.01140904 - time (sec): 24.07 - samples/sec: 1352.02 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 16:24:07,511 epoch 10 - iter 594/1984 - loss 0.01014037 - time (sec): 36.15 - samples/sec: 1356.57 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 16:24:20,208 epoch 10 - iter 792/1984 - loss 0.00961988 - time (sec): 48.85 - samples/sec: 1369.07 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 16:24:32,356 epoch 10 - iter 990/1984 - loss 0.00934250 - time (sec): 61.00 - samples/sec: 1360.44 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 16:24:44,568 epoch 10 - iter 1188/1984 - loss 0.00880262 - time (sec): 73.21 - samples/sec: 1366.91 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 16:24:56,562 epoch 10 - iter 1386/1984 - loss 0.00886579 - time (sec): 85.20 - samples/sec: 1360.69 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 16:25:08,616 epoch 10 - iter 1584/1984 - loss 0.00840621 - time (sec): 97.26 - samples/sec: 1364.15 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 16:25:20,602 epoch 10 - iter 1782/1984 - loss 0.00865685 - time (sec): 109.24 - samples/sec: 1355.92 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 16:25:32,569 epoch 10 - iter 1980/1984 - loss 0.00901465 - time (sec): 121.21 - samples/sec: 1349.08 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-24 16:25:32,857 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:25:32,857 EPOCH 10 done: loss 0.0090 - lr: 0.000000 |
|
2023-10-24 16:25:35,980 DEV : loss 0.25083112716674805 - f1-score (micro avg) 0.7613 |
|
2023-10-24 16:25:35,995 saving best model |
|
2023-10-24 16:25:37,068 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:25:37,069 Loading model from best epoch ... |
|
2023-10-24 16:25:38,537 SequenceTagger predicts: Dictionary with 13 tags: O, S-PER, B-PER, E-PER, I-PER, S-LOC, B-LOC, E-LOC, I-LOC, S-ORG, B-ORG, E-ORG, I-ORG |
|
2023-10-24 16:25:41,628 |
|
Results: |
|
- F-score (micro) 0.779 |
|
- F-score (macro) 0.6989 |
|
- Accuracy 0.6605 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
LOC 0.8326 0.8504 0.8414 655 |
|
PER 0.6911 0.7623 0.7249 223 |
|
ORG 0.5922 0.4803 0.5304 127 |
|
|
|
micro avg 0.7741 0.7841 0.7790 1005 |
|
macro avg 0.7053 0.6977 0.6989 1005 |
|
weighted avg 0.7708 0.7841 0.7763 1005 |
|
|
|
2023-10-24 16:25:41,628 ---------------------------------------------------------------------------------------------------- |
|
|