devve1 commited on
Commit
e276989
1 Parent(s): 4ab5553

Update ppt_chunker.py

Browse files
Files changed (1) hide show
  1. ppt_chunker.py +3 -3
ppt_chunker.py CHANGED
@@ -14,7 +14,7 @@ def process_chunk(chunk, nlp):
14
  current = chunk[1][i]
15
  current_text = current[1]
16
 
17
- if (type(current) is tuple) and current_text.isupper() and (current[0] == 'Title'):
18
  tokens = nlp(current_text)
19
 
20
  try:
@@ -22,7 +22,7 @@ def process_chunk(chunk, nlp):
22
  except IndexError:
23
  continue
24
 
25
- if (type(next) is tuple) and next[1].isupper() and (next[0] == 'Title'):
26
  if tokens[-1].pos_ in ["ADP", 'PART', 'PRON', 'DET', "AUX", 'SCONJ', 'CONJ', "CCONJ"]:
27
  chunk[1][i+1][1] = current_text + ' ' + next[1]
28
  marked.append(i)
@@ -53,7 +53,7 @@ def ppt_chunk(file_like, model):
53
  if elem.category == "ListItem":
54
  list_items.append(elem.text)
55
  else:
56
- current_chunk.append((elem.category, elem.text))
57
 
58
  sr = time.time()
59
  for chunk in chunks:
 
14
  current = chunk[1][i]
15
  current_text = current[1]
16
 
17
+ if (type(current) is list) and current_text.isupper() and (current[0] == 'Title'):
18
  tokens = nlp(current_text)
19
 
20
  try:
 
22
  except IndexError:
23
  continue
24
 
25
+ if (type(next) is list) and next[1].isupper() and (next[0] == 'Title'):
26
  if tokens[-1].pos_ in ["ADP", 'PART', 'PRON', 'DET', "AUX", 'SCONJ', 'CONJ', "CCONJ"]:
27
  chunk[1][i+1][1] = current_text + ' ' + next[1]
28
  marked.append(i)
 
53
  if elem.category == "ListItem":
54
  list_items.append(elem.text)
55
  else:
56
+ current_chunk.append([elem.category, elem.text])
57
 
58
  sr = time.time()
59
  for chunk in chunks: