#!/usr/bin/env python3 # -*- coding: utf-8 -*- import requests import spacy_udpipe import streamlit as st from spacy import displacy try: spacy_udpipe.download("ar") except: pass nlp = spacy_udpipe.load("ar") nlp.add_pipe("span_marker", config={"model": "iahlt/span-marker-xlm-roberta-base-ar"}) DEFAULT_LABEL_COLORS = { "ORG": "#17A2B8", "ORGS": "#17A2B8", "ORGANIZATION": "#17A2B8", "PRODUCT": "#FA9F42", "COMMERCIAL_ITEM": "#FA9F42", "DUC": "#FA9F42", "GPE": "#FFC107", "LOC": "#28A745", "LOCATION": "#28A745", "PERSON": "#0069B4", "PER": "#0069B4", "PERS": "#0069B4", "TTL": "#FA8B1B", "TITLE": "#FA8B1B", "NORP": "#c887fb", "FAC": "#721817", "EVENT": "#2B4162", "EVE": "#2B4162", "LAW": "#C880B7", "LANGUAGE": "#437F97", "ANG": "#437F97", "WORK_OF_ART": "#0B6E4F", "WOA": "#0B6E4F", "DATE": "#849324", "TIME": "#849324", "TIMEX": "#849324", "MONEY": "#6C757D", "QUANTITY": "#FD151B", "ORDINAL": "#FD151B", "CARDINAL": "#FD151B", "PERCENT": "#F1D302", "MISC": "#e7d2e4", "OTHER": '#ff8197', } def get_html(html: str): """Convert HTML so it can be rendered.""" WRAPPER = """
{}
""" # Newlines seem to mess with the rendering html = html.replace("\n", " ") style = "" html = WRAPPER.format(html) return f"{style}{html}" def page_init(): st.header("Named Entity Recognition Demo") @st.cache_data def get_html_from_server(text): base_url = "https://ne-api.iahlt.org/api/arabic/ner/?text={}" def get_entities(text): text = text.strip() if text == "": return [] response = requests.get(base_url.format(requests.utils.quote(text))) answer = response.json() ents = [] for ent in answer["ents"]: if ent["entity_group"] == "O": continue ents.append({ "start": ent["start"], "end": ent["end"], "label": ent["entity_group"] }) answer["ents"] = ents return answer def render_entities(text): entities = get_entities(text) html = displacy.render(entities, style="ent", options={"direction": "rtl", "colors": DEFAULT_LABEL_COLORS}, manual=True) return html.replace("ltr", "rtl") return get_html(render_entities(text)) if __name__ == '__main__': page_init() sample_text = """ تمكن البطل الملاكم "محمد عيسى" القناص من الفوز في مباراته ببطولة دبي وذلك بعد انهائه النزال بالضربة القاضية. حيث يواصل البطل محمد عيسى مسيرته بتسلَّق الرُّتَب والألقاب ليصل لملاكمة الاحتراف. """.strip() text = st.text_area("Text", sample_text, height=200, max_chars=1000) btn = st.button("Annotate") style = """ """ st.write(style, unsafe_allow_html=True) if text and btn: doc = nlp(text) html = displacy.render( doc, style="ent", options={"direction": "rtl", "colors": DEFAULT_LABEL_COLORS}, manual=False, ) nemo_html = get_html(html) iahlt_html = get_html_from_server(text) html = f"""

WikiANN-trained model results

{nemo_html}

IAHLT results

{iahlt_html}
""" st.write(html, unsafe_allow_html=True) else: st.write("")