mya-mya commited on
Commit
ce4ac29
·
1 Parent(s): 2ef5116

Initial Commit

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. app.py +121 -0
  3. novel2vec_skipgram_gensim4_100dim.model +3 -0
  4. requirements.txt +1 -0
  5. tests.py +4 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/*
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio import (
2
+ Blocks,
3
+ Tabs,
4
+ TabItem,
5
+ Textbox,
6
+ Markdown,
7
+ Button,
8
+ Group,
9
+ Label,
10
+ Row,
11
+ Examples,
12
+ )
13
+ from gensim.models import Word2Vec
14
+ import numpy as np
15
+ import re
16
+
17
+ novel2vec: Word2Vec = Word2Vec.load("./novel2vec_skipgram_gensim4_100dim.model")
18
+
19
+
20
+ def on_show_novel_vector_click(input_novel_title):
21
+ if input_novel_title not in novel2vec.wv:
22
+ return f"『{input_novel_title}』はデータベースにありません"
23
+
24
+ vector: np.ndarray = novel2vec.wv.get_vector(input_novel_title)
25
+ vector_text = " ".join(map(lambda x: "{:.3f}".format(x), vector))
26
+ return vector_text
27
+
28
+
29
+ def create_show_novel_vector_tab():
30
+ with TabItem("小説ベクトルを見る"):
31
+ with Row():
32
+ submit_button: Button = Button("ベクトルを見る")
33
+ input_novel_title_textbox: Textbox = Textbox(label="小説の題名", max_lines=1)
34
+ output_markdown: Markdown = Markdown()
35
+ submit_button.click(
36
+ on_show_novel_vector_click,
37
+ inputs=input_novel_title_textbox,
38
+ outputs=output_markdown,
39
+ )
40
+
41
+
42
+ def parse_novel_formula(novel_formula):
43
+ splitted: list = re.split(r"([\+\-])", novel_formula)
44
+ splitted: list = map(lambda x: x.replace(" ", ""), splitted)
45
+ splitted: list = filter(lambda x: not x == "", splitted)
46
+ positive: list = []
47
+ negative: list = []
48
+ adding_list: list = positive
49
+ for x in splitted:
50
+ if x == "+":
51
+ adding_list = positive
52
+ elif x == "-":
53
+ adding_list = negative
54
+ else:
55
+ adding_list.append(x)
56
+ return positive, negative
57
+
58
+
59
+ def on_start_calc_click(input_novel_formula):
60
+ positive, negative = parse_novel_formula(input_novel_formula)
61
+ unknown_titles = list(filter(lambda x: x not in novel2vec.wv, positive + negative))
62
+ if unknown_titles:
63
+ error_message = "** 以下の題名がデータベースにありません **"
64
+ for x in unknown_titles:
65
+ error_message += f"\n * {x}"
66
+ return None, error_message
67
+
68
+ result = novel2vec.wv.most_similar_cosmul(
69
+ positive=positive, negative=negative, topn=20
70
+ )
71
+ label_data = {title: confidence for title, confidence in result}
72
+ return label_data, ""
73
+
74
+
75
+ def create_calc_novels_tab():
76
+ with TabItem("小説の演算を行う"):
77
+ with Group():
78
+ with Row():
79
+ submit_button = Button("演算する", variant="primary")
80
+ input_novel_formula_textbox = Textbox(label="小説の題名または小説式")
81
+ with Group():
82
+ output_label = Label(label="演算結果")
83
+ output_markdown = Markdown("")
84
+ with Group():
85
+ Examples(
86
+ examples=["世界から猫が消えたなら", "君の膵臓を食べたい + 涼宮ハルヒの憂鬱 - 三日間の幸福"],
87
+ inputs=input_novel_formula_textbox,
88
+ outputs=[output_label, output_markdown],
89
+ fn=on_start_calc_click,
90
+ )
91
+ Markdown(
92
+ """
93
+ * 小説の題名を1つ入力→ **類似した小説を検索** できます。
94
+ * 小説の題名を足し算したり引き算する→ **小説を演算** できます。
95
+ """
96
+ )
97
+
98
+ submit_button.click(
99
+ on_start_calc_click,
100
+ inputs=input_novel_formula_textbox,
101
+ outputs=[output_label, output_markdown],
102
+ )
103
+
104
+
105
+ def main():
106
+ with Blocks() as interface:
107
+ Markdown("""
108
+ ## Novel2Vec Console
109
+ Twitterハッシュタグ`名刺代わりの小説10選`をWord2Vecで学習させ、小説をベクトル化しました。
110
+ ツイートデータは[GINK03様が公開されているデータ](https://github.com/GINK03/novel_recommend/blob/master/var/shosetsu_dataset.csv)を用いました。
111
+ 詳しくは[私の記事を参照](https://note.com/omiyayimo/n/n0301112dbcc7)ください。
112
+ """)
113
+ with Tabs():
114
+ create_calc_novels_tab()
115
+ create_show_novel_vector_tab()
116
+
117
+ interface.launch()
118
+
119
+
120
+ if __name__ == "__main__":
121
+ main()
novel2vec_skipgram_gensim4_100dim.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4cc9693de2040a63333ec5220fe6ff39adceff24f239e8d5a8b7b6d2f647cdb
3
+ size 3680495
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ gensim
tests.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ def parse_novel_formula_test():
2
+ from app import parse_novel_formula
3
+ print(parse_novel_formula("-A+ B ++C-D+E"))
4
+ parse_novel_formula_test()