File size: 1,558 Bytes
95c0484
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
909811d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import streamlit as st
from spacy import displacy

import span_marker
import spacy
import spacy_udpipe


spacy_udpipe.download("ar")
nlp = spacy_udpipe.load("ar")
nlp.add_pipe("span_marker",
                 config={"model": "iahlt/span-marker-xlm-roberta-base-ar"})

def get_html(html: str):
    """Convert HTML so it can be rendered."""
    WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
    # Newlines seem to mess with the rendering
    html = html.replace("\n", " ")
    return WRAPPER.format(html)

def page_init():
    st.header("Named Entity Recognition Demo")
    

if __name__ == '__main__':
    page_init()
    
    displacy_options = {}

    sample_text = "تعلم في جامعة أوكسفورد، جامعة برنستون، جامعة كولومبيا."

    text = st.text_area("Text", sample_text, height=200, max_chars=1000)
    btn = st.button("Annotate")
    if text and btn:
        doc = nlp(text)
        html = displacy.render(
            doc,
            style="ent",
            options=displacy_options,
            manual=False,
        )
        style = "<style>mark.entity { display: inline-block }</style>"
        st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)
    else:
        st.write("")

    st.markdown(
        """
        <style>
        textarea {
            direction: rtl;
        }
        </style>
        """,
        unsafe_allow_html=True,
    )