Spaces:
Runtime error
Runtime error
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
import requests | |
import spacy_udpipe | |
import streamlit as st | |
from spacy import displacy | |
spacy_udpipe.download("ar") | |
nlp = spacy_udpipe.load("ar") | |
nlp.add_pipe("span_marker", | |
config={"model": "iahlt/span-marker-xlm-roberta-base-ar"}) | |
DEFAULT_LABEL_COLORS = { | |
"ORG": "#17A2B8", | |
"ORGS": "#17A2B8", | |
"ORGANIZATION": "#17A2B8", | |
"PRODUCT": "#FA9F42", | |
"COMMERCIAL_ITEM": "#FA9F42", | |
"DUC": "#FA9F42", | |
"GPE": "#FFC107", | |
"LOC": "#28A745", | |
"LOCATION": "#28A745", | |
"PERSON": "#0069B4", | |
"PER": "#0069B4", | |
"PERS": "#0069B4", | |
"TTL": "#FA8B1B", | |
"TITLE": "#FA8B1B", | |
"NORP": "#c887fb", | |
"FAC": "#721817", | |
"EVENT": "#2B4162", | |
"EVE": "#2B4162", | |
"LAW": "#C880B7", | |
"LANGUAGE": "#437F97", | |
"ANG": "#437F97", | |
"WORK_OF_ART": "#0B6E4F", | |
"WOA": "#0B6E4F", | |
"DATE": "#849324", | |
"TIME": "#849324", | |
"TIMEX": "#849324", | |
"MONEY": "#6C757D", | |
"QUANTITY": "#FD151B", | |
"ORDINAL": "#FD151B", | |
"CARDINAL": "#FD151B", | |
"PERCENT": "#F1D302", | |
"MISC": "#e7d2e4", | |
"OTHER": '#ff8197', | |
} | |
def get_html(html: str): | |
"""Convert HTML so it can be rendered.""" | |
WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem"; direction: rtl; >{}</div>""" | |
# Newlines seem to mess with the rendering | |
html = html.replace("\n", " ") | |
style = "<style>mark.entity { display: inline-block }</style>" | |
html = WRAPPER.format(html) | |
return f"{style}{html}" | |
def page_init(): | |
st.header("Named Entity Recognition Demo") | |
def get_html_from_server(text): | |
base_url = "https://ne-api.iahlt.org/api/arabic/ner/?text={}" | |
def get_entities(text): | |
text = text.strip() | |
if text == "": | |
return [] | |
response = requests.get(base_url.format(text)) | |
answer = response.json() | |
ents = [] | |
for ent in answer["ents"]: | |
if ent["entity_group"] == "O": | |
continue | |
ents.append({ | |
"start": ent["start"], | |
"end": ent["end"], | |
"label": ent["entity_group"] | |
}) | |
answer["ents"] = ents | |
return answer | |
def render_entities(text): | |
entities = get_entities(text) | |
html = displacy.render(entities, | |
style="ent", | |
options={"direction": "rtl", "colors": DEFAULT_LABEL_COLORS}, | |
manual=True) | |
return html.replace("ltr", "rtl") | |
return get_html(render_entities(text)) | |
if __name__ == '__main__': | |
page_init() | |
sample_text = """ | |
تمكن البطل الملاكم "محمد عيسى" القناص من الفوز في مباراته ببطولة دبي وذلك بعد انهائه النزال بالضربة القاضية. حيث يواصل البطل محمد عيسى مسيرته بتسلَّق الرُّتَب والألقاب ليصل لملاكمة الاحتراف. | |
""".strip() | |
text = st.text_area("Text", sample_text, height=200, max_chars=1000) | |
btn = st.button("Annotate") | |
style = """ | |
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Scheherazade+New"> | |
<style> | |
.stTextArea textarea { | |
font-size: 20px; | |
font-color: black; | |
font-family: 'Scheherazade+New'; | |
direction: rtl; | |
} | |
.entities { | |
font-size: 16px; | |
font-family: 'David+Libre'; | |
direction: rtl; | |
} | |
#MainMenu {visibility: hidden;} | |
footer {visibility: hidden;} | |
</style> | |
""" | |
st.write(style, unsafe_allow_html=True) | |
if text and btn: | |
doc = nlp(text) | |
html = displacy.render( | |
doc, | |
style="ent", | |
options={"direction": "rtl", "colors": DEFAULT_LABEL_COLORS}, | |
manual=False, | |
) | |
nemo_html = get_html(html) | |
iahlt_html = get_html_from_server(text) | |
html = f""" | |
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl"> | |
<div> | |
<h3>WikiANN-trained model results</h3> | |
{nemo_html} | |
</div> | |
</div> | |
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl"> | |
<div> | |
<h3>IAHLT results</h3> | |
{iahlt_html} | |
</div> | |
</div> | |
""" | |
st.write(html, unsafe_allow_html=True) | |
else: | |
st.write("") | |