Spaces:

GameScribes
/

Multipurpose-AI-Agent-Development

Running on T4

App Files Files Community

devve1 commited on Jul 30

Commit

230b5de

•

1 Parent(s): 2f6050c

Update ppt_chunker.py

Browse files

Files changed (1) hide show

ppt_chunker.py +2 -26

ppt_chunker.py CHANGED Viewed

@@ -1,34 +1,12 @@
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import re
-import copy
-import pandas as pd
 from io import StringIO
-from datetime import datetime
-from ppt_parser import RAGFlowPptParser
 from pptx import Presentation
 from ordered_multimap import OrderedMultiIndexMapWeakRef
 def ppt_chunk(file_like, from_page=0, to_page=100000):
-    """
-    The supported file formats are pptx.
-    Every page will be treated as a chunk. And the thumbnail of every page will be stored.
-    PPT file will be parsed by using this method automatically, setting-up for every PPT file is not necessary.
-    """
     weakDict = OrderedMultiIndexMapWeakRef()
-    metadatas = pd.DataFrame([0])
     source_stream = StringIO(file_like.read())
     ppt = Presentation(source_stream)
@@ -37,8 +15,6 @@ def ppt_chunk(file_like, from_page=0, to_page=100000):
     ppt_parser = RAGFlowPptParser()
     metadata_main_title = ''
-    processing_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     for pn, slide in enumerate(ppt.slides):
         if pn < from_page:

 from io import StringIO
 from pptx import Presentation
+from ppt_parser import RAGFlowPptParser
 from ordered_multimap import OrderedMultiIndexMapWeakRef
 def ppt_chunk(file_like, from_page=0, to_page=100000):
     weakDict = OrderedMultiIndexMapWeakRef()
     source_stream = StringIO(file_like.read())
     ppt = Presentation(source_stream)
     ppt_parser = RAGFlowPptParser()
     metadata_main_title = ''
     for pn, slide in enumerate(ppt.slides):
         if pn < from_page: