Spaces:
Build error
Build error
from gradio import ( | |
Blocks, | |
Tabs, | |
TabItem, | |
Textbox, | |
Markdown, | |
Button, | |
Group, | |
Label, | |
Row, | |
Examples, | |
) | |
from gensim.models import Word2Vec | |
import numpy as np | |
import re | |
novel2vec: Word2Vec = Word2Vec.load("./novel2vec_skipgram_gensim4_100dim.model") | |
def on_show_novel_vector_click(input_novel_title): | |
if input_novel_title not in novel2vec.wv: | |
return f"『{input_novel_title}』はデータベースにありません" | |
vector: np.ndarray = novel2vec.wv.get_vector(input_novel_title) | |
vector_text = " ".join(map(lambda x: "{:.3f}".format(x), vector)) | |
return vector_text | |
def create_show_novel_vector_tab(): | |
with TabItem("小説ベクトルを見る"): | |
with Row(): | |
submit_button: Button = Button("ベクトルを見る") | |
input_novel_title_textbox: Textbox = Textbox(label="小説の題名", max_lines=1) | |
output_markdown: Markdown = Markdown() | |
submit_button.click( | |
on_show_novel_vector_click, | |
inputs=input_novel_title_textbox, | |
outputs=output_markdown, | |
) | |
def parse_novel_formula(novel_formula): | |
splitted: list = re.split(r"([\+\-])", novel_formula) | |
splitted: list = map(lambda x: x.replace(" ", ""), splitted) | |
splitted: list = filter(lambda x: not x == "", splitted) | |
positive: list = [] | |
negative: list = [] | |
adding_list: list = positive | |
for x in splitted: | |
if x == "+": | |
adding_list = positive | |
elif x == "-": | |
adding_list = negative | |
else: | |
adding_list.append(x) | |
return positive, negative | |
def on_start_calc_click(input_novel_formula): | |
positive, negative = parse_novel_formula(input_novel_formula) | |
unknown_titles = list(filter(lambda x: x not in novel2vec.wv, positive + negative)) | |
if unknown_titles: | |
error_message = "** 以下の題名がデータベースにありません **" | |
for x in unknown_titles: | |
error_message += f"\n * {x}" | |
return None, error_message | |
result = novel2vec.wv.most_similar_cosmul( | |
positive=positive, negative=negative, topn=20 | |
) | |
label_data = {title: confidence for title, confidence in result} | |
return label_data, "" | |
def create_calc_novels_tab(): | |
with TabItem("小説の演算を行う"): | |
with Group(): | |
with Row(): | |
submit_button = Button("演算する", variant="primary") | |
input_novel_formula_textbox = Textbox(label="小説の題名または小説式") | |
with Group(): | |
output_label = Label(label="演算結果") | |
output_markdown = Markdown("") | |
with Group(): | |
Examples( | |
examples=["世界から猫が消えたなら", "君の膵臓を食べたい + 涼宮ハルヒの憂鬱 - 三日間の幸福"], | |
inputs=input_novel_formula_textbox, | |
outputs=[output_label, output_markdown], | |
fn=on_start_calc_click, | |
) | |
Markdown( | |
""" | |
* 小説の題名を1つ入力→ **類似した小説を検索** できます。 | |
* 小説の題名を足し算したり引き算する→ **小説を演算** できます。 | |
""" | |
) | |
submit_button.click( | |
on_start_calc_click, | |
inputs=input_novel_formula_textbox, | |
outputs=[output_label, output_markdown], | |
) | |
def main(): | |
with Blocks() as interface: | |
Markdown(""" | |
## Novel2Vec Console | |
Twitterハッシュタグ`名刺代わりの小説10選`をWord2Vecで学習させ、小説をベクトル化しました。 | |
ツイートデータは[GINK03様が公開されているデータ](https://github.com/GINK03/novel_recommend/blob/master/var/shosetsu_dataset.csv)を用いました。 | |
詳しくは[私の記事を参照](https://note.com/omiyayimo/n/n0301112dbcc7)ください。 | |
""") | |
with Tabs(): | |
create_calc_novels_tab() | |
create_show_novel_vector_tab() | |
interface.launch() | |
if __name__ == "__main__": | |
main() | |