Spaces:

GameScribes
/

Multipurpose-AI-Agent-Development

Sleeping

devve1 commited on Aug 1

Commit

c213948

•

1 Parent(s): c144ff2

Update ppt_chunker.py

Files changed (1) hide show

ppt_chunker.py CHANGED Viewed

@@ -1,12 +1,22 @@
 from io import StringIO
 from unstructured.cleaners.core import clean
 from unstructured.partition.pptx import partition_pptx
 from ordered_multimap import OrderedMultiIndexMapWeakRef
-def ppt_chunk(file_like):
     elements = partition_pptx(file=file_like)
     for elem in elements:
         elem.text = clean(elem.text, bullets=True)

 from io import StringIO
+import joblib
 from unstructured.cleaners.core import clean
 from unstructured.partition.pptx import partition_pptx
 from ordered_multimap import OrderedMultiIndexMapWeakRef
+def split_and_clean():
+def ppt_chunk(file_like, model):
     elements = partition_pptx(file=file_like)
+    num_cores = joblib.cpu_count()
+    image_content = joblib.Parallel(n_jobs=num_cores, verbose=1)(
+        joblib.delayed(split_and_clean)(i, pdf_bytes, scale) for i in page_indices
+    )
     for elem in elements:
         elem.text = clean(elem.text, bullets=True)