devve1 commited on
Commit
9478533
1 Parent(s): 28de8cf

Update ppt_chunker.py

Browse files
Files changed (1) hide show
  1. ppt_chunker.py +4 -0
ppt_chunker.py CHANGED
@@ -129,8 +129,12 @@ def ppt_chunk(file_like, nlp):
129
  for i, sub_chunk in enumerate(chunk[1]):
130
  if type(sub_chunk) is list:
131
  if sub_chunk[0] == 'Title':
 
 
132
  nb_titles += 1
133
  elif sub_chunk[0] == 'UncategorizedText':
 
 
134
  nb_sub_titles += 1
135
  else:
136
  if (nb_titles <= 1) and (nb_sub_titles <= 1):
 
129
  for i, sub_chunk in enumerate(chunk[1]):
130
  if type(sub_chunk) is list:
131
  if sub_chunk[0] == 'Title':
132
+ if ((i == 0) or (i == 1)) and (metadata_main_title != sub_chunk[1]):
133
+ metadata_main_title = sub_chunk[1]
134
  nb_titles += 1
135
  elif sub_chunk[0] == 'UncategorizedText':
136
+ if (i == 1) and (metadata_sub_title != sub_chunk[1]):
137
+ metadata_sub_title = sub_chunk[1]
138
  nb_sub_titles += 1
139
  else:
140
  if (nb_titles <= 1) and (nb_sub_titles <= 1):