Update ppt_chunker.py
Browse files- ppt_chunker.py +6 -4
ppt_chunker.py
CHANGED
@@ -125,6 +125,7 @@ def ppt_chunk(file_like, nlp):
|
|
125 |
nb_titles = 0
|
126 |
nb_sub_titles = 0
|
127 |
metadata_sub_title = ''
|
|
|
128 |
|
129 |
for i, sub_chunk in enumerate(chunk[1]):
|
130 |
if type(sub_chunk) is list:
|
@@ -144,12 +145,13 @@ def ppt_chunk(file_like, nlp):
|
|
144 |
clean_trailing_punctuation(metadata_main_title),
|
145 |
clean_trailing_punctuation(metadata_sub_title)
|
146 |
)
|
|
|
147 |
break
|
148 |
-
|
149 |
-
#if i == len(chunk) - 1:
|
150 |
-
# l = "\n".join([c[1].lower() for c in chunk[1] if type(c) is list])
|
151 |
-
# weakDict.insert(chunk[0], l, metadata_main_title, metadata_sub_title)
|
152 |
|
|
|
|
|
|
|
|
|
153 |
for w in weakDict.all_texts():
|
154 |
print(f'AFTER : {type(w)}, {w}')
|
155 |
|
|
|
125 |
nb_titles = 0
|
126 |
nb_sub_titles = 0
|
127 |
metadata_sub_title = ''
|
128 |
+
condition_met = False
|
129 |
|
130 |
for i, sub_chunk in enumerate(chunk[1]):
|
131 |
if type(sub_chunk) is list:
|
|
|
145 |
clean_trailing_punctuation(metadata_main_title),
|
146 |
clean_trailing_punctuation(metadata_sub_title)
|
147 |
)
|
148 |
+
condition_met = True
|
149 |
break
|
|
|
|
|
|
|
|
|
150 |
|
151 |
+
if not condition_met:
|
152 |
+
cleaned_titles_chunk = "\n".join([c[1].lower() for c in chunk[1] if type(c) is list])
|
153 |
+
weakDict.insert(chunk[0], cleaned_titles_chunk, metadata_main_title, metadata_sub_title)
|
154 |
+
|
155 |
for w in weakDict.all_texts():
|
156 |
print(f'AFTER : {type(w)}, {w}')
|
157 |
|