Spaces:

taishi-i
/

awesome-japanese-nlp-resources-search

Running

File size: 1,648 Bytes

import json

import streamlit as st
from pyserini.search.lucene import LuceneSearcher

# Search engine
searcher = LuceneSearcher("indexes/docs")
searcher.set_language("ja")

# UI
st.set_page_config(
    page_title="awesome-japanese-nlp-resources-search",
    page_icon="😎",
    layout="centered",
)


cola, colb, colc = st.columns([5, 4, 5])

st.header("awesome-japanese-nlp-resources-search 😎")
st.markdown(
    "You can search for open-source software from [400+ Japanese NLP"
    " repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)."
)

col1, col2 = st.columns([9, 1])
with col1:
    search_query = st.text_input(label="", placeholder="Search")

with col2:
    st.write("#")
    button_clicked = st.button("🔎")


if search_query or button_clicked:
    search_results = searcher.search(search_query, k=100_000)

    st.write(
        '<p align="light" style="color:grey;">'
        f" {len(search_results):,.0f} repositories </p>",
        unsafe_allow_html=True,
    )

    for result in search_results:
        data_json = json.loads(result.raw)
        description = data_json["description"]
        url = data_json["url"]
        project_name = data_json["project_name"]
        main_topic = data_json["main_topic"]
        sub_topic = data_json["sub_topic"]

        try:
            st.markdown(f"### [{project_name}]({url})")
            st.markdown(f"{description}")
            if sub_topic is None:
                st.text(f"{main_topic}")
            else:
                st.text(f"{main_topic} / {sub_topic}")
            st.markdown("")
            st.markdown("")

        except:
            pass