File size: 1,648 Bytes
c5b16f9 fa55c84 c5b16f9 fa55c84 c5b16f9 cff5397 c5b16f9 cff5397 c5b16f9 cff5397 c5b16f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import json
import streamlit as st
from pyserini.search.lucene import LuceneSearcher
# Search engine
searcher = LuceneSearcher("indexes/docs")
searcher.set_language("ja")
# UI
st.set_page_config(
page_title="awesome-japanese-nlp-resources-search",
page_icon="π",
layout="centered",
)
cola, colb, colc = st.columns([5, 4, 5])
st.header("awesome-japanese-nlp-resources-search π")
st.markdown(
"You can search for open-source software from [400+ Japanese NLP"
" repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)."
)
col1, col2 = st.columns([9, 1])
with col1:
search_query = st.text_input(label="", placeholder="Search")
with col2:
st.write("#")
button_clicked = st.button("π")
if search_query or button_clicked:
search_results = searcher.search(search_query, k=100_000)
st.write(
'<p align="light" style="color:grey;">'
f" {len(search_results):,.0f} repositories </p>",
unsafe_allow_html=True,
)
for result in search_results:
data_json = json.loads(result.raw)
description = data_json["description"]
url = data_json["url"]
project_name = data_json["project_name"]
main_topic = data_json["main_topic"]
sub_topic = data_json["sub_topic"]
try:
st.markdown(f"### [{project_name}]({url})")
st.markdown(f"{description}")
if sub_topic is None:
st.text(f"{main_topic}")
else:
st.text(f"{main_topic} / {sub_topic}")
st.markdown("")
st.markdown("")
except:
pass
|