Update README.md
Browse files
README.md
CHANGED
@@ -42,5 +42,23 @@ The model was trained on [Japanese CC-100](http://data.statmt.org/cc-100/ja.txt.
|
|
42 |
# Tokenization
|
43 |
The model uses a [sentencepiece](https://github.com/google/sentencepiece)-based tokenizer, the vocabulary was trained on the Japanese Wikipedia using the official sentencepiece training script.
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
# Licenese
|
46 |
[The MIT license](https://opensource.org/licenses/MIT)
|
|
|
42 |
# Tokenization
|
43 |
The model uses a [sentencepiece](https://github.com/google/sentencepiece)-based tokenizer, the vocabulary was trained on the Japanese Wikipedia using the official sentencepiece training script.
|
44 |
|
45 |
+
# How to cite
|
46 |
+
~~~
|
47 |
+
@misc{rinna-japanese-gpt2-small,
|
48 |
+
title = {rinna/japanese-gpt2-small},
|
49 |
+
author = {Zhao, Tianyu and Sawada, Kei}
|
50 |
+
url = {https://huggingface.co/rinna/japanese-gpt2-small},
|
51 |
+
}
|
52 |
+
|
53 |
+
@inproceedings{sawada2024release,
|
54 |
+
title = {Release of Pre-Trained Models for the {J}apanese Language},
|
55 |
+
author = {Sawada, Kei and Zhao, Tianyu and Shing, Makoto and Mitsui, Kentaro and Kaga, Akio and Hono, Yukiya and Wakatsuki, Toshiaki and Mitsuda, Koh},
|
56 |
+
booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
|
57 |
+
month = {5},
|
58 |
+
year = {2024},
|
59 |
+
url = {https://arxiv.org/abs/2404.01657},
|
60 |
+
}
|
61 |
+
~~~
|
62 |
+
|
63 |
# Licenese
|
64 |
[The MIT license](https://opensource.org/licenses/MIT)
|