denisp1 commited on
Commit
8dc1277
β€’
1 Parent(s): eecfbf7

Create new file

Browse files
Files changed (1) hide show
  1. app.py +193 -0
app.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from multiprocessing import Process
3
+ from annotated_text import annotated_text
4
+ from bs4 import BeautifulSoup
5
+ import pandas as pd
6
+ import torch
7
+ import math
8
+ import re
9
+ import json
10
+ import requests
11
+ import spacy
12
+ import errant
13
+ import time
14
+ import os
15
+
16
+ def start_server():
17
+ os.system("python3 -m spacy download en_core_web_sm")
18
+ os.system("uvicorn GrammarTokenize:app --port 8080 --host 0.0.0.0 --workers 2")
19
+
20
+ def load_models():
21
+ if not is_port_in_use(8080):
22
+ with st.spinner(text="Loading models, please wait..."):
23
+ proc = Process(target=start_server, args=(), daemon=True)
24
+ proc.start()
25
+ while not is_port_in_use(8080):
26
+ time.sleep(1)
27
+ st.success("Model server started.")
28
+ else:
29
+ st.success("Model server already running...")
30
+ st.session_state['models_loaded'] = True
31
+
32
+ def is_port_in_use(port):
33
+ import socket
34
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
35
+ return s.connect_ex(('0.0.0.0', port)) == 0
36
+
37
+ if 'models_loaded' not in st.session_state:
38
+ st.session_state['models_loaded'] = False
39
+
40
+
41
+ def show_highlights(input_text, corrected_sentence):
42
+ try:
43
+ strikeout = lambda x: '\u0336'.join(x) + '\u0336'
44
+ highlight_text = highlight(input_text, corrected_sentence)
45
+ color_map = {'d':'#faa', 'a':'#afa', 'c':'#fea'}
46
+ tokens = re.split(r'(<[dac]\s.*?<\/[dac]>)', highlight_text)
47
+ annotations = []
48
+ for token in tokens:
49
+ soup = BeautifulSoup(token, 'html.parser')
50
+ tags = soup.findAll()
51
+ if tags:
52
+ _tag = tags[0].name
53
+ _type = tags[0]['type']
54
+ _text = tags[0]['edit']
55
+ _color = color_map[_tag]
56
+
57
+ if _tag == 'd':
58
+ _text = strikeout(tags[0].text)
59
+
60
+ annotations.append((_text, _type, _color))
61
+ else:
62
+ annotations.append(token)
63
+ annotated_text(*annotations)
64
+ except Exception as e:
65
+ st.error('Some error occured!' + str(e))
66
+ st.stop()
67
+
68
+ def show_edits(input_text, corrected_sentence):
69
+ try:
70
+ edits = get_edits(input_text, corrected_sentence)
71
+ df = pd.DataFrame(edits, columns=['type','original word', 'original start', 'original end', 'correct word', 'correct start', 'correct end'])
72
+ df = df.set_index('type')
73
+ st.table(df)
74
+ except Exception as e:
75
+ st.error('Some error occured!')
76
+ st.stop()
77
+
78
+ def highlight(orig, cor):
79
+ edits = _get_edits(orig, cor)
80
+ orig_tokens = orig.split()
81
+
82
+ ignore_indexes = []
83
+
84
+ for edit in edits:
85
+ edit_type = edit[0]
86
+ edit_str_start = edit[1]
87
+ edit_spos = edit[2]
88
+ edit_epos = edit[3]
89
+ edit_str_end = edit[4]
90
+
91
+ # if no_of_tokens(edit_str_start) > 1 ==> excluding the first token, mark all other tokens for deletion
92
+ for i in range(edit_spos+1, edit_epos):
93
+ ignore_indexes.append(i)
94
+
95
+ if edit_str_start == "":
96
+ if edit_spos - 1 >= 0:
97
+ new_edit_str = orig_tokens[edit_spos - 1]
98
+ edit_spos -= 1
99
+ else:
100
+ new_edit_str = orig_tokens[edit_spos + 1]
101
+ edit_spos += 1
102
+ if edit_type == "PUNCT":
103
+ st = "<a type='" + edit_type + "' edit='" + \
104
+ edit_str_end + "'>" + new_edit_str + "</a>"
105
+ else:
106
+ st = "<a type='" + edit_type + "' edit='" + new_edit_str + \
107
+ " " + edit_str_end + "'>" + new_edit_str + "</a>"
108
+ orig_tokens[edit_spos] = st
109
+ elif edit_str_end == "":
110
+ st = "<d type='" + edit_type + "' edit=''>" + edit_str_start + "</d>"
111
+ orig_tokens[edit_spos] = st
112
+ else:
113
+ st = "<c type='" + edit_type + "' edit='" + \
114
+ edit_str_end + "'>" + edit_str_start + "</c>"
115
+ orig_tokens[edit_spos] = st
116
+
117
+ for i in sorted(ignore_indexes, reverse=True):
118
+ del(orig_tokens[i])
119
+
120
+ return(" ".join(orig_tokens))
121
+
122
+
123
+ def _get_edits(orig, cor):
124
+ orig = annotator.parse(orig)
125
+ cor = annotator.parse(cor)
126
+ alignment = annotator.align(orig, cor)
127
+ edits = annotator.merge(alignment)
128
+
129
+ if len(edits) == 0:
130
+ return []
131
+
132
+ edit_annotations = []
133
+ for e in edits:
134
+ e = annotator.classify(e)
135
+ edit_annotations.append((e.type[2:], e.o_str, e.o_start, e.o_end, e.c_str, e.c_start, e.c_end))
136
+
137
+ if len(edit_annotations) > 0:
138
+ return edit_annotations
139
+ else:
140
+ return []
141
+
142
+ def get_edits(orig, cor):
143
+ return _get_edits(orig, cor)
144
+
145
+ def get_correction(input_text):
146
+ correct_request = "http://0.0.0.0:8080/correct?input_sentence="+input_text
147
+ correct_response = requests.get(correct_request)
148
+ correct_json = json.loads(correct_response.text)
149
+ scored_corrected_sentence = correct_json["scored_corrected_sentence"]
150
+
151
+ corrected_sentence, score = scored_corrected_sentence
152
+ st.markdown(f'##### Corrected text:')
153
+ st.write('')
154
+ st.success(corrected_sentence)
155
+ exp1 = st.expander(label='Show highlights', expanded=True)
156
+ with exp1:
157
+ show_highlights(input_text, corrected_sentence)
158
+ exp2 = st.expander(label='Show edits')
159
+ with exp2:
160
+ show_edits(input_text, corrected_sentence)
161
+
162
+
163
+ if __name__ == "__main__":
164
+
165
+ st.title('Grammar Styler')
166
+ st.subheader('Grammar and sentence structure restyler')
167
+ examples = [
168
+ "I looked at the med cabinet and meds are out. Can you order me more?",
169
+ "Been spendin my whole life jus to her dat song",
170
+ "whatdjya think about dat?",
171
+ "Lets git sum holesome waves and go surfin"
172
+ ]
173
+
174
+ if not st.session_state['models_loaded']:
175
+ load_models()
176
+
177
+ import en_core_web_sm
178
+ nlp = en_core_web_sm.load()
179
+ annotator = errant.load('en', nlp)
180
+
181
+ st.markdown(f'##### Try it now:')
182
+ input_text = st.selectbox(
183
+ label="Choose an example",
184
+ options=examples
185
+ )
186
+ st.write("(or)")
187
+ input_text = st.text_input(
188
+ label="Bring your own sentence",
189
+ value=input_text
190
+ )
191
+
192
+ if input_text.strip():
193
+ get_correction(input_text)