Spaces:
Running
on
T4
Running
on
T4
Update ppt_chunker.py
Browse files- ppt_chunker.py +10 -2
ppt_chunker.py
CHANGED
@@ -185,6 +185,10 @@ def build_prompt_conv():
|
|
185 |
}
|
186 |
]
|
187 |
|
|
|
|
|
|
|
|
|
188 |
def ppt_chunker(file_like, llm):
|
189 |
import time
|
190 |
|
@@ -204,10 +208,14 @@ def ppt_chunker(file_like, llm):
|
|
204 |
|
205 |
if elem.category == 'Table':
|
206 |
if current_chunk == '':
|
207 |
-
|
|
|
|
|
208 |
current_chunk = elem.text
|
209 |
else:
|
210 |
-
|
|
|
|
|
211 |
current_chunk += '\n' + elem.text
|
212 |
continue
|
213 |
|
|
|
185 |
}
|
186 |
]
|
187 |
|
188 |
+
def multiline_string_to_json(multiline_string: str):
|
189 |
+
for line in multiline_string.splitlines():
|
190 |
+
|
191 |
+
|
192 |
def ppt_chunker(file_like, llm):
|
193 |
import time
|
194 |
|
|
|
208 |
|
209 |
if elem.category == 'Table':
|
210 |
if current_chunk == '':
|
211 |
+
lines = elem.text.split('\n')
|
212 |
+
result = '\n'.join(' '.join(line.split()) for line in lines)
|
213 |
+
print(f'TAB : {result}')
|
214 |
current_chunk = elem.text
|
215 |
else:
|
216 |
+
lines = elem.text.split('\n')
|
217 |
+
result = '\n'.join(' '.join(line.split()) for line in lines)
|
218 |
+
print(f'TAB : {result}')
|
219 |
current_chunk += '\n' + elem.text
|
220 |
continue
|
221 |
|