Spaces:
Build error
Build error
Initial Commit
Browse files- .gitignore +1 -0
- app.py +121 -0
- novel2vec_skipgram_gensim4_100dim.model +3 -0
- requirements.txt +1 -0
- tests.py +4 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__/*
|
app.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gradio import (
|
2 |
+
Blocks,
|
3 |
+
Tabs,
|
4 |
+
TabItem,
|
5 |
+
Textbox,
|
6 |
+
Markdown,
|
7 |
+
Button,
|
8 |
+
Group,
|
9 |
+
Label,
|
10 |
+
Row,
|
11 |
+
Examples,
|
12 |
+
)
|
13 |
+
from gensim.models import Word2Vec
|
14 |
+
import numpy as np
|
15 |
+
import re
|
16 |
+
|
17 |
+
novel2vec: Word2Vec = Word2Vec.load("./novel2vec_skipgram_gensim4_100dim.model")
|
18 |
+
|
19 |
+
|
20 |
+
def on_show_novel_vector_click(input_novel_title):
|
21 |
+
if input_novel_title not in novel2vec.wv:
|
22 |
+
return f"『{input_novel_title}』はデータベースにありません"
|
23 |
+
|
24 |
+
vector: np.ndarray = novel2vec.wv.get_vector(input_novel_title)
|
25 |
+
vector_text = " ".join(map(lambda x: "{:.3f}".format(x), vector))
|
26 |
+
return vector_text
|
27 |
+
|
28 |
+
|
29 |
+
def create_show_novel_vector_tab():
|
30 |
+
with TabItem("小説ベクトルを見る"):
|
31 |
+
with Row():
|
32 |
+
submit_button: Button = Button("ベクトルを見る")
|
33 |
+
input_novel_title_textbox: Textbox = Textbox(label="小説の題名", max_lines=1)
|
34 |
+
output_markdown: Markdown = Markdown()
|
35 |
+
submit_button.click(
|
36 |
+
on_show_novel_vector_click,
|
37 |
+
inputs=input_novel_title_textbox,
|
38 |
+
outputs=output_markdown,
|
39 |
+
)
|
40 |
+
|
41 |
+
|
42 |
+
def parse_novel_formula(novel_formula):
|
43 |
+
splitted: list = re.split(r"([\+\-])", novel_formula)
|
44 |
+
splitted: list = map(lambda x: x.replace(" ", ""), splitted)
|
45 |
+
splitted: list = filter(lambda x: not x == "", splitted)
|
46 |
+
positive: list = []
|
47 |
+
negative: list = []
|
48 |
+
adding_list: list = positive
|
49 |
+
for x in splitted:
|
50 |
+
if x == "+":
|
51 |
+
adding_list = positive
|
52 |
+
elif x == "-":
|
53 |
+
adding_list = negative
|
54 |
+
else:
|
55 |
+
adding_list.append(x)
|
56 |
+
return positive, negative
|
57 |
+
|
58 |
+
|
59 |
+
def on_start_calc_click(input_novel_formula):
|
60 |
+
positive, negative = parse_novel_formula(input_novel_formula)
|
61 |
+
unknown_titles = list(filter(lambda x: x not in novel2vec.wv, positive + negative))
|
62 |
+
if unknown_titles:
|
63 |
+
error_message = "** 以下の題名がデータベースにありません **"
|
64 |
+
for x in unknown_titles:
|
65 |
+
error_message += f"\n * {x}"
|
66 |
+
return None, error_message
|
67 |
+
|
68 |
+
result = novel2vec.wv.most_similar_cosmul(
|
69 |
+
positive=positive, negative=negative, topn=20
|
70 |
+
)
|
71 |
+
label_data = {title: confidence for title, confidence in result}
|
72 |
+
return label_data, ""
|
73 |
+
|
74 |
+
|
75 |
+
def create_calc_novels_tab():
|
76 |
+
with TabItem("小説の演算を行う"):
|
77 |
+
with Group():
|
78 |
+
with Row():
|
79 |
+
submit_button = Button("演算する", variant="primary")
|
80 |
+
input_novel_formula_textbox = Textbox(label="小説の題名または小説式")
|
81 |
+
with Group():
|
82 |
+
output_label = Label(label="演算結果")
|
83 |
+
output_markdown = Markdown("")
|
84 |
+
with Group():
|
85 |
+
Examples(
|
86 |
+
examples=["世界から猫が消えたなら", "君の膵臓を食べたい + 涼宮ハルヒの憂鬱 - 三日間の幸福"],
|
87 |
+
inputs=input_novel_formula_textbox,
|
88 |
+
outputs=[output_label, output_markdown],
|
89 |
+
fn=on_start_calc_click,
|
90 |
+
)
|
91 |
+
Markdown(
|
92 |
+
"""
|
93 |
+
* 小説の題名を1つ入力→ **類似した小説を検索** できます。
|
94 |
+
* 小説の題名を足し算したり引き算する→ **小説を演算** できます。
|
95 |
+
"""
|
96 |
+
)
|
97 |
+
|
98 |
+
submit_button.click(
|
99 |
+
on_start_calc_click,
|
100 |
+
inputs=input_novel_formula_textbox,
|
101 |
+
outputs=[output_label, output_markdown],
|
102 |
+
)
|
103 |
+
|
104 |
+
|
105 |
+
def main():
|
106 |
+
with Blocks() as interface:
|
107 |
+
Markdown("""
|
108 |
+
## Novel2Vec Console
|
109 |
+
Twitterハッシュタグ`名刺代わりの小説10選`をWord2Vecで学習させ、小説をベクトル化しました。
|
110 |
+
ツイートデータは[GINK03様が公開されているデータ](https://github.com/GINK03/novel_recommend/blob/master/var/shosetsu_dataset.csv)を用いました。
|
111 |
+
詳しくは[私の記事を参照](https://note.com/omiyayimo/n/n0301112dbcc7)ください。
|
112 |
+
""")
|
113 |
+
with Tabs():
|
114 |
+
create_calc_novels_tab()
|
115 |
+
create_show_novel_vector_tab()
|
116 |
+
|
117 |
+
interface.launch()
|
118 |
+
|
119 |
+
|
120 |
+
if __name__ == "__main__":
|
121 |
+
main()
|
novel2vec_skipgram_gensim4_100dim.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4cc9693de2040a63333ec5220fe6ff39adceff24f239e8d5a8b7b6d2f647cdb
|
3 |
+
size 3680495
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
gensim
|
tests.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def parse_novel_formula_test():
|
2 |
+
from app import parse_novel_formula
|
3 |
+
print(parse_novel_formula("-A+ B ++C-D+E"))
|
4 |
+
parse_novel_formula_test()
|