Spaces:

GameScribes
/

Multipurpose-AI-Agent-Development

Sleeping

devve1 commited on Jul 30

Commit

8d00777

•

1 Parent(s): fa7e026

Update ppt_parser.py

Files changed (1) hide show

ppt_parser.py CHANGED Viewed

@@ -46,28 +46,12 @@ class RAGFlowPptParser(object):
                     texts.append(t)
             return "\n".join(texts)
-    def __call__(self, fnm, from_page, to_page, callback=None):
-        ppt = Presentation(fnm) if isinstance(
-            fnm, str) else Presentation(
-            BytesIO(fnm))
-        txts = defaultdict(set)
-        metadata_main_title: str = ''
-        self.total_page = len(ppt.slides)
-        for i, slide in enumerate(ppt.slides):
-            if i < from_page:
-                continue
-            if i >= to_page:
-                break
-            texts = []
-            text_shapes = [shape for shape in slide.shapes if shape.has_text_frame]
-            if len(text_shapes) == 1 and text_shapes[0].has_text_frame:
-                metadata_main_title = text_shapes[0].text_frame.text
-                continue
-            for shape in sorted(
-                    slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)):
-                txt = self.__extract(shape)
-                if txt:
-                    texts.append(txt)
-            txts[metadata_main_title].add("\n".join(texts))
-        return txts

                     texts.append(t)
             return "\n".join(texts)
+    def __call__(self, fnm, from_page, to_page):
+        texts = []
+        for shape in sorted(
+            slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)
+        ):
+            txt = self.__extract(shape)
+            if txt:
+                texts.append(txt)
+        return "\n".join(texts)