Update ppt_chunker.py
Browse files- ppt_chunker.py +4 -0
ppt_chunker.py
CHANGED
@@ -129,8 +129,12 @@ def ppt_chunk(file_like, nlp):
|
|
129 |
for i, sub_chunk in enumerate(chunk[1]):
|
130 |
if type(sub_chunk) is list:
|
131 |
if sub_chunk[0] == 'Title':
|
|
|
|
|
132 |
nb_titles += 1
|
133 |
elif sub_chunk[0] == 'UncategorizedText':
|
|
|
|
|
134 |
nb_sub_titles += 1
|
135 |
else:
|
136 |
if (nb_titles <= 1) and (nb_sub_titles <= 1):
|
|
|
129 |
for i, sub_chunk in enumerate(chunk[1]):
|
130 |
if type(sub_chunk) is list:
|
131 |
if sub_chunk[0] == 'Title':
|
132 |
+
if ((i == 0) or (i == 1)) and (metadata_main_title != sub_chunk[1]):
|
133 |
+
metadata_main_title = sub_chunk[1]
|
134 |
nb_titles += 1
|
135 |
elif sub_chunk[0] == 'UncategorizedText':
|
136 |
+
if (i == 1) and (metadata_sub_title != sub_chunk[1]):
|
137 |
+
metadata_sub_title = sub_chunk[1]
|
138 |
nb_sub_titles += 1
|
139 |
else:
|
140 |
if (nb_titles <= 1) and (nb_sub_titles <= 1):
|