#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import streamlit as st
from spacy import displacy
import span_marker
import spacy
import spacy_udpipe
spacy_udpipe.download("ar")
nlp = spacy_udpipe.load("ar")
nlp.add_pipe("span_marker",
config={"model": "iahlt/span-marker-xlm-roberta-base-ar"})
def get_html(html: str):
"""Convert HTML so it can be rendered."""
WRAPPER = """
{}
"""
# Newlines seem to mess with the rendering
html = html.replace("\n", " ")
return WRAPPER.format(html)
def page_init():
st.header("Named Entity Recognition Demo")
if __name__ == '__main__':
page_init()
displacy_options = {}
sample_text = "تعلم في جامعة أوكسفورد، جامعة برنستون، جامعة كولومبيا."
text = st.text_area("Text", sample_text, height=200, max_chars=1000)
btn = st.button("Annotate")
if text and btn:
doc = nlp(text)
html = displacy.render(
doc,
style="ent",
options=displacy_options,
manual=False,
)
style = ""
st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)
else:
st.write("")
st.markdown(
"""
""",
unsafe_allow_html=True,
)