imvladikon's picture
Update app.py
a3c2037
raw
history blame
No virus
4.61 kB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests
import spacy_udpipe
import streamlit as st
from spacy import displacy
spacy_udpipe.download("ar")
nlp = spacy_udpipe.load("ar")
nlp.add_pipe("span_marker",
config={"model": "iahlt/span-marker-xlm-roberta-base-ar"})
DEFAULT_LABEL_COLORS = {
"ORG": "#17A2B8",
"ORGS": "#17A2B8",
"ORGANIZATION": "#17A2B8",
"PRODUCT": "#FA9F42",
"COMMERCIAL_ITEM": "#FA9F42",
"DUC": "#FA9F42",
"GPE": "#FFC107",
"LOC": "#28A745",
"LOCATION": "#28A745",
"PERSON": "#0069B4",
"PER": "#0069B4",
"PERS": "#0069B4",
"TTL": "#FA8B1B",
"TITLE": "#FA8B1B",
"NORP": "#c887fb",
"FAC": "#721817",
"EVENT": "#2B4162",
"EVE": "#2B4162",
"LAW": "#C880B7",
"LANGUAGE": "#437F97",
"ANG": "#437F97",
"WORK_OF_ART": "#0B6E4F",
"WOA": "#0B6E4F",
"DATE": "#849324",
"TIME": "#849324",
"TIMEX": "#849324",
"MONEY": "#6C757D",
"QUANTITY": "#FD151B",
"ORDINAL": "#FD151B",
"CARDINAL": "#FD151B",
"PERCENT": "#F1D302",
"MISC": "#e7d2e4",
"OTHER": '#ff8197',
}
def get_html(html: str):
"""Convert HTML so it can be rendered."""
WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem"; direction: rtl; >{}</div>"""
# Newlines seem to mess with the rendering
html = html.replace("\n", " ")
style = "<style>mark.entity { display: inline-block }</style>"
html = WRAPPER.format(html)
return f"{style}{html}"
def page_init():
st.header("Named Entity Recognition Demo")
@st.cache_data
def get_html_from_server(text):
base_url = "https://ne-api.iahlt.org/api/arabic/ner/?text={}"
def get_entities(text):
text = text.strip()
if text == "":
return []
response = requests.get(base_url.format(requests.utils.quote(text)))
answer = response.json()
ents = []
for ent in answer["ents"]:
if ent["entity_group"] == "O":
continue
ents.append({
"start": ent["start"],
"end": ent["end"],
"label": ent["entity_group"]
})
answer["ents"] = ents
return answer
def render_entities(text):
entities = get_entities(text)
html = displacy.render(entities,
style="ent",
options={"direction": "rtl", "colors": DEFAULT_LABEL_COLORS},
manual=True)
return html.replace("ltr", "rtl")
return get_html(render_entities(text))
if __name__ == '__main__':
page_init()
sample_text = """
تمكن البطل الملاكم "محمد عيسى" القناص من الفوز في مباراته ببطولة دبي وذلك بعد انهائه النزال بالضربة القاضية. حيث يواصل البطل محمد عيسى مسيرته بتسلَّق الرُّتَب والألقاب ليصل لملاكمة الاحتراف.
""".strip()
text = st.text_area("Text", sample_text, height=200, max_chars=1000)
btn = st.button("Annotate")
style = """
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Scheherazade+New">
<style>
.stTextArea textarea {
font-size: 20px;
font-color: black;
font-family: 'Scheherazade+New';
direction: rtl;
}
.entities {
font-size: 16px;
font-family: 'David+Libre';
direction: rtl;
}
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.write(style, unsafe_allow_html=True)
if text and btn:
doc = nlp(text)
html = displacy.render(
doc,
style="ent",
options={"direction": "rtl", "colors": DEFAULT_LABEL_COLORS},
manual=False,
)
nemo_html = get_html(html)
iahlt_html = get_html_from_server(text)
html = f"""
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
<div>
<h3>WikiANN-trained model results</h3>
{nemo_html}
</div>
</div>
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
<div>
<h3>IAHLT results</h3>
{iahlt_html}
</div>
</div>
"""
st.write(html, unsafe_allow_html=True)
else:
st.write("")