devve1 commited on
Commit
952b8a3
1 Parent(s): 3b7e1ca

Update ppt_chunker.py

Browse files
Files changed (1) hide show
  1. ppt_chunker.py +5 -5
ppt_chunker.py CHANGED
@@ -21,15 +21,15 @@ def process_chunk(chunk, nlp):
21
  tokens = nlp(current_text)
22
 
23
  try:
24
- next = chunk[1][i+1]
25
  except IndexError:
26
  continue
27
 
28
- if (type(next) is list) and next[1].isupper() and (next[0] == ('Title' or 'NarrativeText' or 'UncategorizedText')):
29
  print(f'TOKEN: {current_text}, {tokens[-1]}, {tokens[-1].pos_}')
30
  print(f'{str(tokens[-1])}')
31
  if (tokens[-1].pos_ in {'SYM', "ADP", 'ADV', 'PART', 'PRON', 'DET', "AUX", 'SCONJ', 'CONJ', "CCONJ"}) or ((tokens[-1].pos_ in {'PROPN', 'NOUN', 'VERB'}) and (str(tokens[-1]) in WRONG_NOUNS)):
32
- chunk[1][i+1][1] = current_text + ' ' + next[1]
33
  marked.append(i)
34
 
35
  for i in marked:
@@ -73,10 +73,10 @@ def ppt_chunk(file_like, model):
73
  else:
74
  if elem.text[-1] in NON_ENDING_PUNCT:
75
  try:
76
- next = elements[i+1]
77
  except:
78
  pass
79
- elements[i+1].text = elem.text + ' ' + next.text
80
  marked.add(elem.text)
81
 
82
  if elem.category == "ListItem":
 
21
  tokens = nlp(current_text)
22
 
23
  try:
24
+ next_ = chunk[1][i+1]
25
  except IndexError:
26
  continue
27
 
28
+ if (type(next_) is list) and next_[1].isupper() and (next_[0] == ('Title' or 'NarrativeText' or 'UncategorizedText')):
29
  print(f'TOKEN: {current_text}, {tokens[-1]}, {tokens[-1].pos_}')
30
  print(f'{str(tokens[-1])}')
31
  if (tokens[-1].pos_ in {'SYM', "ADP", 'ADV', 'PART', 'PRON', 'DET', "AUX", 'SCONJ', 'CONJ', "CCONJ"}) or ((tokens[-1].pos_ in {'PROPN', 'NOUN', 'VERB'}) and (str(tokens[-1]) in WRONG_NOUNS)):
32
+ chunk[1][i+1][1] = current_text + ' ' + next_[1]
33
  marked.append(i)
34
 
35
  for i in marked:
 
73
  else:
74
  if elem.text[-1] in NON_ENDING_PUNCT:
75
  try:
76
+ next_ = elements[i+1]
77
  except:
78
  pass
79
+ elements[i+1].text = elem.text + ' ' + next_.text
80
  marked.add(elem.text)
81
 
82
  if elem.category == "ListItem":