edithram23 commited on
Commit
57bcde6
·
verified ·
1 Parent(s): a9299cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -181,18 +181,18 @@ if uploaded_file is not None:
181
  text = pg.get_text()
182
  sentences = sentence_tokenize(text)
183
  for sent in sentences:
184
- # x = mask_generation(sent)
185
 
186
- # sent_n_q_c=[]
187
- # sent_n = list(set(sent.lower().replace('.',' ').split("\n")))
188
- # for i in sent_n:
189
- # for j in i.split(" "):
190
- # sent_n_q_c+=j.split(',')
191
- # x_q = x.lower().replace('.',' ').split(' ')
192
- # e=[]
193
- # for i in x_q:
194
- # e+=i.split(',')
195
- # t5_words=set(sent_n_q_c).difference(set(e))
196
  entities,words_out = extract_entities(sent)
197
  # print("\nwords_out:",words_out)
198
  # print("\nT5",t5_words)
@@ -202,7 +202,7 @@ if uploaded_file is not None:
202
  new=[]
203
  for w in words_out:
204
  new+=w.split('\n')
205
- # words_out+=t5_words
206
  new+=bert_words
207
  words_out = [i for i in new if len(i)>3]
208
  # print("\nfinal:",words_out)
 
181
  text = pg.get_text()
182
  sentences = sentence_tokenize(text)
183
  for sent in sentences:
184
+ x = mask_generation(sent)
185
 
186
+ sent_n_q_c=[]
187
+ sent_n = list(set(sent.lower().replace('.',' ').split("\n")))
188
+ for i in sent_n:
189
+ for j in i.split(" "):
190
+ sent_n_q_c+=j.split(',')
191
+ x_q = x.lower().replace('.',' ').split(' ')
192
+ e=[]
193
+ for i in x_q:
194
+ e+=i.split(',')
195
+ t5_words=set(sent_n_q_c).difference(set(e))
196
  entities,words_out = extract_entities(sent)
197
  # print("\nwords_out:",words_out)
198
  # print("\nT5",t5_words)
 
202
  new=[]
203
  for w in words_out:
204
  new+=w.split('\n')
205
+ words_out+=t5_words
206
  new+=bert_words
207
  words_out = [i for i in new if len(i)>3]
208
  # print("\nfinal:",words_out)