Spaces:

taishi-i
/

awesome-japanese-nlp-resources-search

Running

Update index files

fa55c84 over 1 year ago

1.65 kB

	import json

	import streamlit as st
	from pyserini.search.lucene import LuceneSearcher

	# Search engine
	searcher = LuceneSearcher("indexes/docs")
	searcher.set_language("ja")

	# UI
	st.set_page_config(
	page_title="awesome-japanese-nlp-resources-search",
	page_icon="😎",
	layout="centered",
	)


	cola, colb, colc = st.columns([5, 4, 5])

	st.header("awesome-japanese-nlp-resources-search 😎")
	st.markdown(
	"You can search for open-source software from [400+ Japanese NLP"
	" repositories](https://github.com/taishi-i/awesome-japanese-nlp-resources)."
	)

	col1, col2 = st.columns([9, 1])
	with col1:
	search_query = st.text_input(label="", placeholder="Search")

	with col2:
	st.write("#")
	button_clicked = st.button("🔎")


	if search_query or button_clicked:
	search_results = searcher.search(search_query, k=100_000)

	st.write(
	'<p align="light" style="color:grey;">'
	f" {len(search_results):,.0f} repositories </p>",
	unsafe_allow_html=True,
	)

	for result in search_results:
	data_json = json.loads(result.raw)
	description = data_json["description"]
	url = data_json["url"]
	project_name = data_json["project_name"]
	main_topic = data_json["main_topic"]
	sub_topic = data_json["sub_topic"]

	try:
	st.markdown(f"### [{project_name}]({url})")
	st.markdown(f"{description}")
	if sub_topic is None:
	st.text(f"{main_topic}")
	else:
	st.text(f"{main_topic} / {sub_topic}")
	st.markdown("")
	st.markdown("")

	except:
	pass