import json import gradio as gr import pandas as pd from pyserini.search.lucene import LuceneSearcher searcher = LuceneSearcher("indexes/docs") searcher.set_language("ja") def show_search_results(query): search_results = searcher.search(query, k=100) data = {"project_name": [], "description": []} for result in search_results: docid = result.docid doc = searcher.doc(docid) data_json = json.loads(doc.raw()) url = data_json["url"] description = data_json["description"] project_name = data_json["project_name"] data["project_name"].append(f"[{project_name}]({url})") data["description"].append(description) data = pd.DataFrame(data) return data with gr.Blocks() as demo: gr.Markdown( """ # Awesome Japanese NLP resources search 🔎 You can search for open-source software from [500+ Japanese NLP repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources). """ ) query = gr.Textbox( label="Search English or Japanese words", placeholder="llm" ) df = gr.DataFrame(type="pandas", datatype="markdown", height=1000) query.change(fn=show_search_results, inputs=query, outputs=df) demo.launch()