Spaces:
Runtime error
Runtime error
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
import requests | |
import spacy_udpipe | |
import streamlit as st | |
from spacy import displacy | |
# model = span_marker.SpanMarkerModel.from_pretrained("iahlt/iahlt-span-marker-alephbert-small-nemo-mt-he") | |
spacy_udpipe.download("he") | |
nlp = spacy_udpipe.load("he") | |
nlp.add_pipe("span_marker", | |
config={"model": "iahlt/span-marker-alephbert-small-nemo-mt-he"}) | |
def get_html(html: str): | |
"""Convert HTML so it can be rendered.""" | |
WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem"; direction: rtl; >{}</div>""" | |
# Newlines seem to mess with the rendering | |
html = html.replace("\n", " ") | |
style = "<style>mark.entity { display: inline-block }</style>" | |
html = WRAPPER.format(html) | |
return f"{style}{html}" | |
def page_init(): | |
st.header("Named Entity Recognition Demo") | |
def get_html_from_server(text): | |
base_url = "https://ne-api.iahlt.org/api/hebrew/ner/?text={}" | |
def get_entities(text): | |
text = text.strip() | |
if text == "": | |
return [] | |
response = requests.get(base_url.format(text)) | |
answer = response.json() | |
ents = [] | |
for ent in answer["ents"]: | |
if ent["entity_group"] == "O": | |
continue | |
ents.append({ | |
"start": ent["start"], | |
"end": ent["end"], | |
"label": ent["entity_group"] | |
}) | |
answer["ents"] = ents | |
return answer | |
def render_entities(text): | |
entities = get_entities(text) | |
html = displacy.render(entities, | |
style="ent", | |
options={"direction": "rtl"}, | |
manual=True) | |
return html.replace("ltr", "rtl") | |
return get_html(render_entities(text)) | |
if __name__ == '__main__': | |
page_init() | |
sample_text = "讬讜\"专 讜注讚转 讛谞讜注专 谞转谉 住诇讜讘讟讬拽 讗诪专 砖讛砖讞拽谞讬诐 砖诇 讗谞讞谞讜 诇讗 诪砖转诇讘讬诐 讘讗讬专讜驻讛." | |
text = st.text_area("Text", sample_text, height=200, max_chars=1000) | |
btn = st.button("Annotate") | |
style = """ | |
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=David+Libre"> | |
<style> | |
.stTextArea textarea { | |
font-size: 20px; | |
font-color: black; | |
font-family: 'David+Libre'; | |
direction: rtl; | |
} | |
.entities { | |
font-size: 16px; | |
font-family: 'David+Libre'; | |
direction: rtl; | |
} | |
#MainMenu {visibility: hidden;} | |
footer {visibility: hidden;} | |
</style> | |
""" | |
st.write(style, unsafe_allow_html=True) | |
if text and btn: | |
doc = nlp(text) | |
html = displacy.render( | |
doc, | |
style="ent", | |
options={"direction": "rtl"}, | |
manual=False, | |
) | |
nemo_html = get_html(html) | |
iahlt_html = get_html_from_server(text) | |
html = f""" | |
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl"> | |
<div> | |
<h3>Nemo model results</h3> | |
{nemo_html} | |
</div> | |
</div> | |
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl"> | |
<div> | |
<h3>IAHLT results</h3> | |
{iahlt_html} | |
</div> | |
</div> | |
""" | |
st.write(html, unsafe_allow_html=True) | |
else: | |
st.write("") | |