import json import streamlit as st from pyserini.search.lucene import LuceneSearcher # Search engine searcher = LuceneSearcher("indexes/docs") searcher.set_language("ja") # UI st.set_page_config( page_title="awesome-japanese-nlp-resources-search", page_icon="😎", layout="centered", ) cola, colb, colc = st.columns([5, 4, 5]) st.header("awesome-japanese-nlp-resources-search 😎") st.markdown( "You can search for open-source software from [400+ Japanese NLP" " repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)." ) col1, col2 = st.columns([9, 1]) with col1: search_query = st.text_input(label="", placeholder="Search") with col2: st.write("#") button_clicked = st.button("🔎") if search_query or button_clicked: search_results = searcher.search(search_query, k=100_000) st.write( '

' f" {len(search_results):,.0f} repositories

", unsafe_allow_html=True, ) for result in search_results: data_json = json.loads(result.raw) description = data_json["description"] url = data_json["url"] project_name = data_json["project_name"] main_topic = data_json["main_topic"] sub_topic = data_json["sub_topic"] try: st.markdown(f"### [{project_name}]({url})") st.markdown(f"{description}") if sub_topic is None: st.text(f"{main_topic}") else: st.text(f"{main_topic} / {sub_topic}") st.markdown("") st.markdown("") except: pass