File size: 1,648 Bytes
c5b16f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa55c84
c5b16f9
 
 
fa55c84
 
 
 
c5b16f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cff5397
 
c5b16f9
 
cff5397
c5b16f9
cff5397
 
 
 
 
c5b16f9
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import json

import streamlit as st
from pyserini.search.lucene import LuceneSearcher

# Search engine
searcher = LuceneSearcher("indexes/docs")
searcher.set_language("ja")

# UI
st.set_page_config(
    page_title="awesome-japanese-nlp-resources-search",
    page_icon="😎",
    layout="centered",
)


cola, colb, colc = st.columns([5, 4, 5])

st.header("awesome-japanese-nlp-resources-search 😎")
st.markdown(
    "You can search for open-source software from [400+ Japanese NLP"
    " repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)."
)

col1, col2 = st.columns([9, 1])
with col1:
    search_query = st.text_input(label="", placeholder="Search")

with col2:
    st.write("#")
    button_clicked = st.button("πŸ”Ž")


if search_query or button_clicked:
    search_results = searcher.search(search_query, k=100_000)

    st.write(
        '<p align="light" style="color:grey;">'
        f" {len(search_results):,.0f} repositories </p>",
        unsafe_allow_html=True,
    )

    for result in search_results:
        data_json = json.loads(result.raw)
        description = data_json["description"]
        url = data_json["url"]
        project_name = data_json["project_name"]
        main_topic = data_json["main_topic"]
        sub_topic = data_json["sub_topic"]

        try:
            st.markdown(f"### [{project_name}]({url})")
            st.markdown(f"{description}")
            if sub_topic is None:
                st.text(f"{main_topic}")
            else:
                st.text(f"{main_topic} / {sub_topic}")
            st.markdown("")
            st.markdown("")

        except:
            pass