Spaces:
Runtime error
Runtime error
File size: 4,592 Bytes
95c0484 c407b1b 95c0484 c407b1b 95c0484 c407b1b 95c0484 c407b1b 95c0484 c407b1b 95c0484 0d10901 c407b1b 0d10901 95c0484 c407b1b 95c0484 c407b1b 95c0484 c407b1b 95c0484 c407b1b 69b2a84 c407b1b 909811d c407b1b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests
import spacy_udpipe
import streamlit as st
from spacy import displacy
spacy_udpipe.download("ar")
nlp = spacy_udpipe.load("ar")
nlp.add_pipe("span_marker",
config={"model": "iahlt/span-marker-xlm-roberta-base-ar"})
DEFAULT_LABEL_COLORS = {
"ORG": "#17A2B8",
"ORGS": "#17A2B8",
"ORGANIZATION": "#17A2B8",
"PRODUCT": "#FA9F42",
"COMMERCIAL_ITEM": "#FA9F42",
"DUC": "#FA9F42",
"GPE": "#FFC107",
"LOC": "#28A745",
"LOCATION": "#28A745",
"PERSON": "#0069B4",
"PER": "#0069B4",
"PERS": "#0069B4",
"TTL": "#FA8B1B",
"TITLE": "#FA8B1B",
"NORP": "#c887fb",
"FAC": "#721817",
"EVENT": "#2B4162",
"EVE": "#2B4162",
"LAW": "#C880B7",
"LANGUAGE": "#437F97",
"ANG": "#437F97",
"WORK_OF_ART": "#0B6E4F",
"WOA": "#0B6E4F",
"DATE": "#849324",
"TIME": "#849324",
"TIMEX": "#849324",
"MONEY": "#6C757D",
"QUANTITY": "#FD151B",
"ORDINAL": "#FD151B",
"CARDINAL": "#FD151B",
"PERCENT": "#F1D302",
"MISC": "#e7d2e4",
"OTHER": '#ff8197',
}
def get_html(html: str):
"""Convert HTML so it can be rendered."""
WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem"; direction: rtl; >{}</div>"""
# Newlines seem to mess with the rendering
html = html.replace("\n", " ")
style = "<style>mark.entity { display: inline-block }</style>"
html = WRAPPER.format(html)
return f"{style}{html}"
def page_init():
st.header("Named Entity Recognition Demo")
@st.cache_data
def get_html_from_server(text):
base_url = "https://ne-api.iahlt.org/api/arabic/ner/?text={}"
def get_entities(text):
text = text.strip()
if text == "":
return []
response = requests.get(base_url.format(text))
answer = response.json()
ents = []
for ent in answer["ents"]:
if ent["entity_group"] == "O":
continue
ents.append({
"start": ent["start"],
"end": ent["end"],
"label": ent["entity_group"]
})
answer["ents"] = ents
return answer
def render_entities(text):
entities = get_entities(text)
html = displacy.render(entities,
style="ent",
options={"direction": "rtl", "colors": DEFAULT_LABEL_COLORS},
manual=True)
return html.replace("ltr", "rtl")
return get_html(render_entities(text))
if __name__ == '__main__':
page_init()
sample_text = """
تمكن البطل الملاكم "محمد عيسى" القناص من الفوز في مباراته ببطولة دبي وذلك بعد انهائه النزال بالضربة القاضية. حيث يواصل البطل محمد عيسى مسيرته بتسلَّق الرُّتَب والألقاب ليصل لملاكمة الاحتراف.
""".strip()
text = st.text_area("Text", sample_text, height=200, max_chars=1000)
btn = st.button("Annotate")
style = """
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Scheherazade+New">
<style>
.stTextArea textarea {
font-size: 20px;
font-color: black;
font-family: 'Scheherazade+New';
direction: rtl;
}
.entities {
font-size: 16px;
font-family: 'David+Libre';
direction: rtl;
}
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.write(style, unsafe_allow_html=True)
if text and btn:
doc = nlp(text)
html = displacy.render(
doc,
style="ent",
options={"direction": "rtl", "colors": DEFAULT_LABEL_COLORS},
manual=False,
)
nemo_html = get_html(html)
iahlt_html = get_html_from_server(text)
html = f"""
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
<div>
<h3>WikiANN-trained model results</h3>
{nemo_html}
</div>
</div>
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
<div>
<h3>IAHLT results</h3>
{iahlt_html}
</div>
</div>
"""
st.write(html, unsafe_allow_html=True)
else:
st.write("")
|