|
import json |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
from pyserini.search.lucene import LuceneSearcher |
|
|
|
searcher = LuceneSearcher("indexes/docs") |
|
searcher.set_language("ja") |
|
|
|
|
|
def show_search_results(query): |
|
search_results = searcher.search(query, k=100) |
|
|
|
data = {"project_name": [], "description": []} |
|
for result in search_results: |
|
docid = result.docid |
|
doc = searcher.doc(docid) |
|
data_json = json.loads(doc.raw()) |
|
|
|
url = data_json["url"] |
|
description = data_json["description"] |
|
project_name = data_json["project_name"] |
|
|
|
data["project_name"].append(f"[{project_name}]({url})") |
|
data["description"].append(description) |
|
|
|
data = pd.DataFrame(data) |
|
return data |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown( |
|
""" |
|
# Awesome Japanese NLP resources search π |
|
You can search for open-source software from [500+ Japanese NLP repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources). |
|
""" |
|
) |
|
|
|
query = gr.Textbox( |
|
label="Search English or Japanese words", placeholder="llm" |
|
) |
|
df = gr.DataFrame(type="pandas", datatype="markdown", height=1000) |
|
|
|
query.change(fn=show_search_results, inputs=query, outputs=df) |
|
|
|
demo.launch() |
|
|