update app.py
Browse files
README.md
CHANGED
@@ -3,8 +3,8 @@ title: Awesome Japanese Nlp Resources Search
|
|
3 |
emoji: ๐ข
|
4 |
colorFrom: gray
|
5 |
colorTo: indigo
|
6 |
-
sdk:
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
3 |
emoji: ๐ข
|
4 |
colorFrom: gray
|
5 |
colorTo: indigo
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.36.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
app.py
CHANGED
@@ -1,110 +1,46 @@
|
|
1 |
import json
|
2 |
|
3 |
-
import
|
|
|
4 |
from pyserini.search.lucene import LuceneSearcher
|
5 |
|
|
|
|
|
6 |
|
7 |
-
class SearchApplication:
|
8 |
-
def __init__(self):
|
9 |
-
self.title = "Awesome Japanese NLP resources search"
|
10 |
|
11 |
-
|
12 |
-
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
)
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
" repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)."
|
28 |
-
)
|
29 |
-
st.write("#")
|
30 |
|
31 |
-
|
32 |
-
|
33 |
|
34 |
-
def set_page_config(self):
|
35 |
-
st.set_page_config(
|
36 |
-
page_title=self.title,
|
37 |
-
page_icon="๐",
|
38 |
-
layout="centered",
|
39 |
-
)
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
45 |
|
46 |
-
|
47 |
-
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
with word1:
|
51 |
-
button1 = st.button("Python")
|
52 |
-
if button1:
|
53 |
-
self.query = "Python"
|
54 |
|
55 |
-
|
56 |
-
button2 = st.button("ChatGPT")
|
57 |
-
if button2:
|
58 |
-
self.query = "ChatGPT"
|
59 |
-
|
60 |
-
with word3:
|
61 |
-
button3 = st.button("่พๆธ")
|
62 |
-
if button3:
|
63 |
-
self.query = "่พๆธ"
|
64 |
-
|
65 |
-
with word4:
|
66 |
-
button4 = st.button("ใณใผใใน")
|
67 |
-
if button4:
|
68 |
-
self.query = "Corpus"
|
69 |
-
|
70 |
-
def show_search_results(self):
|
71 |
-
if self.query or self.search_button:
|
72 |
-
st.write("#")
|
73 |
-
|
74 |
-
search_results = self.searcher.search(self.query, k=500)
|
75 |
-
num_search_results = len(search_results)
|
76 |
-
st.write(f"{num_search_results} results")
|
77 |
-
|
78 |
-
for result in sorted(
|
79 |
-
search_results,
|
80 |
-
key=lambda x: json.loads(self.searcher.doc(x.docid).raw())[
|
81 |
-
"stargazers_count"
|
82 |
-
],
|
83 |
-
reverse=True,
|
84 |
-
):
|
85 |
-
docid = result.docid
|
86 |
-
doc = self.searcher.doc(docid)
|
87 |
-
data_json = json.loads(doc.raw())
|
88 |
-
|
89 |
-
description = data_json["description"]
|
90 |
-
url = data_json["url"]
|
91 |
-
project_name = data_json["project_name"]
|
92 |
-
main_topic = data_json["main_topic"]
|
93 |
-
sub_topic = data_json["sub_topic"]
|
94 |
-
|
95 |
-
st.write("---")
|
96 |
-
st.subheader(f"[{project_name}]({url})")
|
97 |
-
st.markdown(description)
|
98 |
-
if sub_topic is None:
|
99 |
-
st.caption(f"{main_topic}")
|
100 |
-
else:
|
101 |
-
st.caption(f"{main_topic} / {sub_topic}")
|
102 |
-
st.write("#")
|
103 |
-
|
104 |
-
|
105 |
-
def main():
|
106 |
-
SearchApplication()
|
107 |
-
|
108 |
-
|
109 |
-
if __name__ == "__main__":
|
110 |
-
main()
|
|
|
1 |
import json
|
2 |
|
3 |
+
import gradio as gr
|
4 |
+
import pandas as pd
|
5 |
from pyserini.search.lucene import LuceneSearcher
|
6 |
|
7 |
+
searcher = LuceneSearcher("indexes/docs")
|
8 |
+
searcher.set_language("ja")
|
9 |
|
|
|
|
|
|
|
10 |
|
11 |
+
def show_search_results(query):
|
12 |
+
search_results = searcher.search(query, k=100)
|
13 |
|
14 |
+
data = {"project_name": [], "description": []}
|
15 |
+
for result in search_results:
|
16 |
+
docid = result.docid
|
17 |
+
doc = searcher.doc(docid)
|
18 |
+
data_json = json.loads(doc.raw())
|
|
|
19 |
|
20 |
+
url = data_json["url"]
|
21 |
+
description = data_json["description"]
|
22 |
+
project_name = data_json["project_name"]
|
23 |
|
24 |
+
data["project_name"].append(f"[{project_name}]({url})")
|
25 |
+
data["description"].append(description)
|
|
|
|
|
|
|
26 |
|
27 |
+
data = pd.DataFrame(data)
|
28 |
+
return data
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
with gr.Blocks() as demo:
|
32 |
+
gr.Markdown(
|
33 |
+
"""
|
34 |
+
# Awesome Japanese NLP resources search ๐
|
35 |
+
You can search for open-source software from [500+ Japanese NLP repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources).
|
36 |
+
"""
|
37 |
+
)
|
38 |
|
39 |
+
query = gr.Textbox(
|
40 |
+
label="Search English or Japanese words", placeholder="llm"
|
41 |
+
)
|
42 |
+
df = gr.DataFrame(type="pandas", datatype="markdown", height=1000)
|
43 |
|
44 |
+
query.change(fn=show_search_results, inputs=query, outputs=df)
|
|
|
|
|
|
|
|
|
45 |
|
46 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|