|
import streamlit as st |
|
import pandas as pd |
|
from sentence_transformers import SentenceTransformer |
|
from forgebox.cosine import CosineSearch |
|
import numpy as np |
|
|
|
from PIL import Image |
|
image = Image.open('shiji.png') |
|
|
|
|
|
st.markdown(""" |
|
## 🍻 跨古/现代文搜索: 用白话搜史记 |
|
""") |
|
|
|
st.sidebar.image(image, use_column_width=True) |
|
st.sidebar.markdown(""" |
|
Search《Records of the Grand Historian》 with modern Chinese |
|
### References |
|
* Model trained [here, please hit ⭐️](https://github.com/raynardj/yuan) |
|
* [Trained crossed language BERT](https://huggingface.co./raynardj/xlsearch-cross-lang-search-zh-vs-classicical-cn) |
|
### Related projects |
|
* Read more [ancient books(almost all) with a translator](https://huggingface.co./spaces/raynardj/duguwen-classical-chinese-to-morden-translate) |
|
* [Modern Chines to classical Chinese translator](https://huggingface.co./spaces/raynardj/modern-chinese-to-ancient-translate-wenyanwen) |
|
""") |
|
|
|
TAG = "raynardj/xlsearch-cross-lang-search-zh-vs-classicical-cn" |
|
|
|
@st.cache(allow_output_mutation=True) |
|
def load_encoder(): |
|
with st.spinner(f"Loading Transformer:{TAG}"): |
|
encoder = SentenceTransformer(TAG) |
|
return encoder |
|
|
|
encoder = load_encoder() |
|
|
|
@st.cache(allow_output_mutation=True) |
|
def load_book(): |
|
with st.spinner(f"📚 Loading Book..."): |
|
df = pd.read_csv("grand_historian.csv") |
|
return list(df.sentences) |
|
|
|
all_lines = load_book() |
|
|
|
@st.cache(allow_output_mutation=True) |
|
def encode_book(): |
|
with st.spinner(f"Encoding sentences for book《Records of the Grand Historian》"): |
|
vec = np.load('vec.npy') |
|
cosine = CosineSearch(vec) |
|
return cosine |
|
|
|
cosine = encode_book() |
|
|
|
def search(text): |
|
enc = encoder.encode(text) |
|
order = cosine(enc) |
|
sentence_df = pd.DataFrame({"sentence":np.array(all_lines)[order[:5]]}) |
|
return sentence_df |
|
|
|
keyword = st.text_input("用白话搜", "") |
|
if st.button("搜索"): |
|
if keyword: |
|
with st.spinner(f"🔍 Searching for {keyword}"): |
|
df = search(keyword) |
|
st.table(df) |
|
|