Update README.md
Browse files
README.md
CHANGED
@@ -73,5 +73,32 @@ print(ids)
|
|
73 |
|
74 |
# Citation
|
75 |
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
|
|
73 |
|
74 |
# Citation
|
75 |
|
76 |
+
```latex
|
77 |
+
@inproceedings{piskorski-etal-2024-cross-lingual,
|
78 |
+
title = "Cross-lingual Named Entity Corpus for {S}lavic Languages",
|
79 |
+
author = "Piskorski, Jakub and
|
80 |
+
Marci{\'n}czuk, Micha{\l} and
|
81 |
+
Yangarber, Roman",
|
82 |
+
editor = "Calzolari, Nicoletta and
|
83 |
+
Kan, Min-Yen and
|
84 |
+
Hoste, Veronique and
|
85 |
+
Lenci, Alessandro and
|
86 |
+
Sakti, Sakriani and
|
87 |
+
Xue, Nianwen",
|
88 |
+
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
|
89 |
+
month = may,
|
90 |
+
year = "2024",
|
91 |
+
address = "Torino, Italy",
|
92 |
+
publisher = "ELRA and ICCL",
|
93 |
+
url = "https://aclanthology.org/2024.lrec-main.369",
|
94 |
+
pages = "4143--4157",
|
95 |
+
abstract = "This paper presents a corpus manually annotated with named entities for six Slavic languages {---} Bulgarian, Czech, Polish, Slovenian, Russian,
|
96 |
+
and Ukrainian. This work is the result of a series of shared tasks, conducted in 2017{--}2023 as a part of the Workshops on Slavic Natural
|
97 |
+
Language Processing. The corpus consists of 5,017 documents on seven topics. The documents are annotated with five classes of named entities.
|
98 |
+
Each entity is described by a category, a lemma, and a unique cross-lingual identifier. We provide two train-tune dataset splits
|
99 |
+
{---} single topic out and cross topics. For each split, we set benchmarks using a transformer-based neural network architecture
|
100 |
+
with the pre-trained multilingual models {---} XLM-RoBERTa-large for named entity mention recognition and categorization,
|
101 |
+
and mT5-large for named entity lemmatization and linking.",
|
102 |
+
}
|
103 |
+
```
|
104 |
|