Spaces:

GameScribes
/

Multipurpose-AI-Agent-Development

Paused

App Files Files Community

Multipurpose-AI-Agent-Development / ppt_parser.py

devve1

Update ppt_parser.py

3de1057 verified about 2 months ago

raw

history blame

No virus

1.96 kB

	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	# Modifications :
	# - Author : dev1ous
	# - Date : 07/30/2024
	# - Content : Extracting slide with unique title and add the title as a metadata for a slide.
	# Assuming slide with unique title are the start of a new subject for the following slide

	class RAGFlowPptParser(object):
	def __init__(self):
	super().__init__()

	def __extract(self, shape):
	if shape.shape_type == 19:
	tb = shape.table
	rows = []
	for i in range(1, len(tb.rows)):
	rows.append("; ".join([tb.cell(
	0, j).text + ": " + tb.cell(i, j).text for j in range(len(tb.columns)) if tb.cell(i, j)]))
	return "\n".join(rows)

	if shape.has_text_frame:
	return shape.text_frame.text

	if shape.shape_type == 6:
	texts = []
	for p in sorted(shape.shapes, key=lambda x: (x.top // 10, x.left)):
	t = self.__extract(p)
	if t:
	texts.append(t)
	return "\n".join(texts)

	def __call__(self, fnm, from_page, to_page):
	texts = []
	for shape in sorted(
	slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)
	):
	txt = self.__extract(shape)
	if txt:
	texts.append(txt)
	return "\n".join(texts)