Spaces:

os1187
/

Talk2Deck

Runtime error

App Files Files Community

Talk2Deck / app.py

os1187

Update app.py

fa7dedf verified 12 months ago

raw

history blame contribute delete

3.6 kB

	import os
	import gradio as gr
	from transformers import ViTFeatureExtractor, ViTModel
	from PIL import Image
	from transformers import AutoTokenizer, AutoModel
	import torch
	from pdf2image import convert_from_path
	import io
	from io import BytesIO

	# CSS styles
	css = """
	.button {
	padding: 10px 20px;
	background: #007BFF;
	color: white;
	border: none;
	cursor: pointer;
	font-size: 16px;
	margin: 10px;
	}
	"""

	# Define layout with custom styles
	layout = [
	gr.Row([gr.File(label="Upload PDF", type="binary")]), # Corrected 'type' parameter
	gr.Row([gr.Button("Generate Insights")]),
	gr.Row([gr.Textbox("Placeholder for PDF insights", label="Insights", type="text")])
	]

	# Function to get image embeddings using ViT
	def get_image_embeddings(image_path, model_name='google/vit-base-patch16-224'):
	feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)
	model = ViTModel.from_pretrained(model_name)

	image = Image.open(image_path)
	inputs = feature_extractor(images=image, return_tensors="pt")
	outputs = model(**inputs)
	embeddings = outputs.last_hidden_state.mean(dim=1) # Mean pooling
	return embeddings

	# Function to convert PDF to images
	def pdf_to_images(pdf_file, img_dir):
	images = convert_from_path(pdf_file)

	# Create the directory if it doesn't exist
	os.makedirs(img_dir, exist_ok=True)

	for i, image in enumerate(images):
	image_path = f"{img_dir}/page_{i + 1}.png"
	image.save(image_path, "PNG")

	print(f"Converted {len(images)} pages to images and saved in {img_dir}")

	# Function to get text embeddings using a transformer model
	def get_text_embeddings(text, model_name='bert-base-uncased'):
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModel.from_pretrained(model_name)

	inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
	outputs = model(**inputs)
	embeddings = outputs.last_hidden_state.mean(dim=1) # Mean pooling
	return embeddings

	# Function to process PDF and generate a response
	def process_pdf_and_generate_response(pdf_file):
	try:
	# Convert the binary stream to a file-like object
	pdf_file_stream = BytesIO(pdf_file)

	# Convert PDF to images
	img_dir = "pdf_images"
	pdf_to_images(pdf_file_stream, img_dir)

	# Generate embeddings for each image
	image_embeddings = []
	for filename in os.listdir(img_dir):
	if filename.endswith(".png"):
	image_path = os.path.join(img_dir, filename)
	image_embeddings.append(get_image_embeddings(image_path))

	# Perform some text analysis on the PDF content (replace with your logic)
	pdf_text = "PDF content analysis placeholder"
	text_embeddings = get_text_embeddings(pdf_text)

	# Combine image and text embeddings and generate a response (replace with your logic)
	combined_embeddings = torch.cat([*image_embeddings, text_embeddings], dim=0)
	response = "Response based on the processed PDF"
	except Exception as e:
	response = f"An error occurred: {str(e)}"
	return response

	iface = gr.Interface(
	fn=process_pdf_and_generate_response,
	inputs=gr.File(label="Upload PDF", type="binary"), # Corrected 'type' parameter
	outputs=gr.Textbox("Placeholder for PDF insights", label="Insights", type="text"),
	title="pdf-chatbot",
	description="Upload a PDF and receive insights based on its content.",
	css=css
	)

	if __name__ == "__main__":
	iface.launch()