imvladikon commited on
Commit
c407b1b
1 Parent(s): 0d10901

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -26
app.py CHANGED
@@ -1,62 +1,155 @@
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
 
 
3
  import streamlit as st
4
  from spacy import displacy
5
 
6
- import span_marker
7
- import spacy
8
- import spacy_udpipe
9
-
10
-
11
  spacy_udpipe.download("ar")
12
  nlp = spacy_udpipe.load("ar")
13
  nlp.add_pipe("span_marker",
14
  config={"model": "iahlt/span-marker-xlm-roberta-base-ar"})
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def get_html(html: str):
17
  """Convert HTML so it can be rendered."""
18
- WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
19
  # Newlines seem to mess with the rendering
20
  html = html.replace("\n", " ")
21
- return WRAPPER.format(html)
 
 
 
22
 
23
  def page_init():
24
  st.header("Named Entity Recognition Demo")
25
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  if __name__ == '__main__':
28
  page_init()
29
-
30
- displacy_options = {}
31
 
32
  sample_text = """
33
- أبو عَبد الله مُحَمَّد بن مُوسَى الخَوارِزمي عالم رياضيات وفلك وجغرافيا مسلم. يكنى بأبي جعفر. قيل أنه ولد حوالي 164هـ 781م وقيل أنه توفيَ بعد 232 هـ أي (بعد 847م). يعتبر من أوائل علماء الرياضيات المسلمين حيث ساهمت أعماله بدور كبير في تقدم الرياضيات في عصره.
34
  """.strip()
35
 
36
  text = st.text_area("Text", sample_text, height=200, max_chars=1000)
37
  btn = st.button("Annotate")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  if text and btn:
39
  doc = nlp(text)
40
  html = displacy.render(
41
  doc,
42
  style="ent",
43
- options=displacy_options,
44
  manual=False,
45
  )
46
- style = "<style>mark.entity { display: inline-block }</style>"
47
- st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)
48
- else:
49
- st.write("")
50
 
51
- st.markdown(
52
- """
53
- <style>
54
- textarea {
55
- direction: rtl;
56
- }
57
- </style>
58
- """,
59
- unsafe_allow_html=True,
60
- )
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
 
 
 
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
+ import requests
4
+ import spacy_udpipe
5
  import streamlit as st
6
  from spacy import displacy
7
 
 
 
 
 
 
8
  spacy_udpipe.download("ar")
9
  nlp = spacy_udpipe.load("ar")
10
  nlp.add_pipe("span_marker",
11
  config={"model": "iahlt/span-marker-xlm-roberta-base-ar"})
12
 
13
+
14
+ DEFAULT_LABEL_COLORS = {
15
+ "ORG": "#17A2B8",
16
+ "ORGS": "#17A2B8",
17
+ "ORGANIZATION": "#17A2B8",
18
+ "PRODUCT": "#FA9F42",
19
+ "COMMERCIAL_ITEM": "#FA9F42",
20
+ "DUC": "#FA9F42",
21
+ "GPE": "#FFC107",
22
+ "LOC": "#28A745",
23
+ "LOCATION": "#28A745",
24
+ "PERSON": "#0069B4",
25
+ "PER": "#0069B4",
26
+ "PERS": "#0069B4",
27
+ "TTL": "#FA8B1B",
28
+ "TITLE": "#FA8B1B",
29
+ "NORP": "#c887fb",
30
+ "FAC": "#721817",
31
+ "EVENT": "#2B4162",
32
+ "EVE": "#2B4162",
33
+ "LAW": "#C880B7",
34
+ "LANGUAGE": "#437F97",
35
+ "ANG": "#437F97",
36
+ "WORK_OF_ART": "#0B6E4F",
37
+ "WOA": "#0B6E4F",
38
+ "DATE": "#849324",
39
+ "TIME": "#849324",
40
+ "TIMEX": "#849324",
41
+ "MONEY": "#6C757D",
42
+ "QUANTITY": "#FD151B",
43
+ "ORDINAL": "#FD151B",
44
+ "CARDINAL": "#FD151B",
45
+ "PERCENT": "#F1D302",
46
+ "MISC": "#e7d2e4",
47
+ "OTHER": '#ff8197',
48
+ }
49
+
50
  def get_html(html: str):
51
  """Convert HTML so it can be rendered."""
52
+ WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem"; direction: rtl; >{}</div>"""
53
  # Newlines seem to mess with the rendering
54
  html = html.replace("\n", " ")
55
+ style = "<style>mark.entity { display: inline-block }</style>"
56
+ html = WRAPPER.format(html)
57
+ return f"{style}{html}"
58
+
59
 
60
  def page_init():
61
  st.header("Named Entity Recognition Demo")
62
+
63
+
64
+ @st.cache_data
65
+ def get_html_from_server(text):
66
+ base_url = "https://ne-api.iahlt.org/api/arabic/ner/?text={}"
67
+
68
+ def get_entities(text):
69
+ text = text.strip()
70
+ if text == "":
71
+ return []
72
+ response = requests.get(base_url.format(text))
73
+ answer = response.json()
74
+ ents = []
75
+ for ent in answer["ents"]:
76
+ if ent["entity_group"] == "O":
77
+ continue
78
+ ents.append({
79
+ "start": ent["start"],
80
+ "end": ent["end"],
81
+ "label": ent["entity_group"]
82
+ })
83
+ answer["ents"] = ents
84
+ return answer
85
+
86
+ def render_entities(text):
87
+ entities = get_entities(text)
88
+ html = displacy.render(entities,
89
+ style="ent",
90
+ options={"direction": "rtl", "colors": DEFAULT_LABEL_COLORS},
91
+ manual=True)
92
+ return html.replace("ltr", "rtl")
93
+
94
+ return get_html(render_entities(text))
95
+
96
 
97
  if __name__ == '__main__':
98
  page_init()
 
 
99
 
100
  sample_text = """
101
+ تمكن البطل الملاكم "محمد عيسى" القناص من الفوز في مباراته ببطولة دبي وذلك بعد انهائه النزال بالضربة القاضية. حيث يواصل البطل محمد عيسى مسيرته بتسلَّق الرُّتَب والألقاب ليصل لملاكمة الاحتراف.
102
  """.strip()
103
 
104
  text = st.text_area("Text", sample_text, height=200, max_chars=1000)
105
  btn = st.button("Annotate")
106
+ style = """
107
+ <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Scheherazade+New">
108
+ <style>
109
+ .stTextArea textarea {
110
+ font-size: 20px;
111
+ font-color: black;
112
+ font-family: 'Scheherazade+New';
113
+ direction: rtl;
114
+ }
115
+ .entities {
116
+ font-size: 16px;
117
+ font-family: 'David+Libre';
118
+ direction: rtl;
119
+ }
120
+ #MainMenu {visibility: hidden;}
121
+ footer {visibility: hidden;}
122
+ </style>
123
+ """
124
+ st.write(style, unsafe_allow_html=True)
125
+
126
  if text and btn:
127
  doc = nlp(text)
128
  html = displacy.render(
129
  doc,
130
  style="ent",
131
+ options={"direction": "rtl", "colors": DEFAULT_LABEL_COLORS},
132
  manual=False,
133
  )
 
 
 
 
134
 
135
+ nemo_html = get_html(html)
136
+ iahlt_html = get_html_from_server(text)
 
 
 
 
 
 
 
 
137
 
138
+ html = f"""
139
+ <div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
140
+ <div>
141
+ <h3>Nemo model results</h3>
142
+ {nemo_html}
143
+ </div>
144
+ </div>
145
+ <div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
146
+ <div>
147
+ <h3>IAHLT results</h3>
148
+ {iahlt_html}
149
+ </div>
150
+ </div>
151
+ """
152
+ st.write(html, unsafe_allow_html=True)
153
 
154
+ else:
155
+ st.write("")