Update ppt_chunker.py
Browse files- ppt_chunker.py +3 -3
ppt_chunker.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from io import StringIO
|
2 |
from typing import List
|
3 |
-
from
|
4 |
|
5 |
from unstructured.partition.pptx import partition_pptx
|
6 |
from unstructured.cleaners.core import clean_ordered_bullets, clean_bullets, clean_non_ascii_chars
|
@@ -85,8 +85,8 @@ def ppt_chunk(file_like, nlp):
|
|
85 |
|
86 |
return chunk
|
87 |
|
88 |
-
with
|
89 |
-
chunks =
|
90 |
|
91 |
er = time.time()
|
92 |
fr = er - sr
|
|
|
1 |
from io import StringIO
|
2 |
from typing import List
|
3 |
+
from multiprocessing import Pool, cpu_count
|
4 |
|
5 |
from unstructured.partition.pptx import partition_pptx
|
6 |
from unstructured.cleaners.core import clean_ordered_bullets, clean_bullets, clean_non_ascii_chars
|
|
|
85 |
|
86 |
return chunk
|
87 |
|
88 |
+
with Pool(cpu_count()) as pool:
|
89 |
+
chunks = pool.map(process_chunk, chunks)
|
90 |
|
91 |
er = time.time()
|
92 |
fr = er - sr
|