Spaces:
Running
on
T4
Running
on
T4
Update ppt_chunker.py
Browse files- ppt_chunker.py +2 -2
ppt_chunker.py
CHANGED
@@ -183,9 +183,9 @@ def ppt_chunker(file_like, llm):
|
|
183 |
continue
|
184 |
|
185 |
if current_chunk == '':
|
186 |
-
current_chunk = clean(elem.text, extra_whitespace=True, dashes=True, bullets=True, lowercase=True, trailing_punctuation=True)
|
187 |
else:
|
188 |
-
current_chunk += '\n' + clean(elem.text, extra_whitespace=True, dashes=True, bullets=True, lowercase=True, trailing_punctuation=True)
|
189 |
|
190 |
for chunk in chunks:
|
191 |
print(f' TEXT : {chunk}')
|
|
|
183 |
continue
|
184 |
|
185 |
if current_chunk == '':
|
186 |
+
current_chunk = clean(elem.text, extra_whitespace=True, dashes=True, bullets=True, lowercase=True, trailing_punctuation=True)
|
187 |
else:
|
188 |
+
current_chunk += '\n' + clean(elem.text, extra_whitespace=True, dashes=True, bullets=True, lowercase=True, trailing_punctuation=True)
|
189 |
|
190 |
for chunk in chunks:
|
191 |
print(f' TEXT : {chunk}')
|