File size: 1,254 Bytes
c5b16f9 1864742 c5b16f9 1864742 94349ba 1864742 94349ba 1864742 94349ba 1864742 94349ba 1864742 94349ba 1864742 94349ba 1864742 94349ba 1864742 94349ba 1864742 94349ba 1864742 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import json
import gradio as gr
import pandas as pd
from pyserini.search.lucene import LuceneSearcher
searcher = LuceneSearcher("indexes/docs")
searcher.set_language("ja")
def show_search_results(query):
search_results = searcher.search(query, k=100)
data = {"project_name": [], "description": []}
for result in search_results:
docid = result.docid
doc = searcher.doc(docid)
data_json = json.loads(doc.raw())
url = data_json["url"]
description = data_json["description"]
project_name = data_json["project_name"]
data["project_name"].append(f"[{project_name}]({url})")
data["description"].append(description)
data = pd.DataFrame(data)
return data
with gr.Blocks() as demo:
gr.Markdown(
"""
# Awesome Japanese NLP resources search 🔎
You can search for open-source software from [500+ Japanese NLP repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources).
"""
)
query = gr.Textbox(
label="Search English or Japanese words", placeholder="llm"
)
df = gr.DataFrame(type="pandas", datatype="markdown", height=1000)
query.change(fn=show_search_results, inputs=query, outputs=df)
demo.launch()
|