devve1 commited on
Commit
16c09ea
1 Parent(s): a5d303f

Update ppt_chunker.py

Browse files
Files changed (1) hide show
  1. ppt_chunker.py +3 -3
ppt_chunker.py CHANGED
@@ -1,6 +1,6 @@
1
  from io import StringIO
2
  from typing import List
3
- from concurrent.futures import ThreadPoolExecutor
4
 
5
  from unstructured.partition.pptx import partition_pptx
6
  from unstructured.cleaners.core import clean_ordered_bullets, clean_bullets, clean_non_ascii_chars
@@ -85,8 +85,8 @@ def ppt_chunk(file_like, nlp):
85
 
86
  return chunk
87
 
88
- with ThreadPoolExecutor() as executor:
89
- chunks = list(executor.map(process_chunk, chunks))
90
 
91
  er = time.time()
92
  fr = er - sr
 
1
  from io import StringIO
2
  from typing import List
3
+ from multiprocessing import Pool, cpu_count
4
 
5
  from unstructured.partition.pptx import partition_pptx
6
  from unstructured.cleaners.core import clean_ordered_bullets, clean_bullets, clean_non_ascii_chars
 
85
 
86
  return chunk
87
 
88
+ with Pool(cpu_count()) as pool:
89
+ chunks = pool.map(process_chunk, chunks)
90
 
91
  er = time.time()
92
  fr = er - sr