Spaces:
Running
on
T4
Running
on
T4
Update ppt_chunker.py
Browse files- ppt_chunker.py +7 -4
ppt_chunker.py
CHANGED
@@ -130,20 +130,23 @@ def ppt_chunk(file_like, nlp):
|
|
130 |
first_chunk = chunk[1][i-1]
|
131 |
|
132 |
if first_chunk[0] == 'UncategorizedText':
|
133 |
-
|
134 |
-
|
|
|
135 |
try:
|
136 |
ok = chunk[1][i-2]
|
137 |
|
138 |
if ok[0] == 'Title':
|
139 |
-
|
|
|
140 |
weakDict.insert(chunk[0], sub_chunk, metadata_main_title, metadata_sub_title)
|
141 |
break
|
142 |
except IndexError:
|
143 |
weakDict.insert(chunk[0], sub_chunk, metadata_sub_title)
|
144 |
break
|
145 |
elif first_chunk[0] == 'Title':
|
146 |
-
|
|
|
147 |
weakDict.insert(chunk[0], sub_chunk, metadata_main_title)
|
148 |
break
|
149 |
except IndexError:
|
|
|
130 |
first_chunk = chunk[1][i-1]
|
131 |
|
132 |
if first_chunk[0] == 'UncategorizedText':
|
133 |
+
if metadata_sub_title != first_chunk[1]:
|
134 |
+
metadata_sub_title = first_chunk[1]
|
135 |
+
|
136 |
try:
|
137 |
ok = chunk[1][i-2]
|
138 |
|
139 |
if ok[0] == 'Title':
|
140 |
+
if metadata_main_title != ok[1]:
|
141 |
+
metadata_main_title = ok[1]
|
142 |
weakDict.insert(chunk[0], sub_chunk, metadata_main_title, metadata_sub_title)
|
143 |
break
|
144 |
except IndexError:
|
145 |
weakDict.insert(chunk[0], sub_chunk, metadata_sub_title)
|
146 |
break
|
147 |
elif first_chunk[0] == 'Title':
|
148 |
+
if metadata_main_title != first_chunk[1]:
|
149 |
+
metadata_main_title = first_chunk[1]
|
150 |
weakDict.insert(chunk[0], sub_chunk, metadata_main_title)
|
151 |
break
|
152 |
except IndexError:
|