imvladikon's picture
Update app.py
95c0484
raw
history blame
No virus
1.56 kB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import streamlit as st
from spacy import displacy
import span_marker
import spacy
import spacy_udpipe
spacy_udpipe.download("ar")
nlp = spacy_udpipe.load("ar")
nlp.add_pipe("span_marker",
config={"model": "iahlt/span-marker-xlm-roberta-base-ar"})
def get_html(html: str):
"""Convert HTML so it can be rendered."""
WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
# Newlines seem to mess with the rendering
html = html.replace("\n", " ")
return WRAPPER.format(html)
def page_init():
st.header("Named Entity Recognition Demo")
if __name__ == '__main__':
page_init()
displacy_options = {}
sample_text = "تعلم في جامعة أوكسفورد، جامعة برنستون، جامعة كولومبيا."
text = st.text_area("Text", sample_text, height=200, max_chars=1000)
btn = st.button("Annotate")
if text and btn:
doc = nlp(text)
html = displacy.render(
doc,
style="ent",
options=displacy_options,
manual=False,
)
style = "<style>mark.entity { display: inline-block }</style>"
st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)
else:
st.write("")
st.markdown(
"""
<style>
textarea {
direction: rtl;
}
</style>
""",
unsafe_allow_html=True,
)