|
import os |
|
import gradio as gr |
|
from transformers import ViTFeatureExtractor, ViTModel |
|
from PIL import Image |
|
from transformers import AutoTokenizer, AutoModel |
|
import torch |
|
from pdf2image import convert_from_path |
|
import io |
|
from io import BytesIO |
|
|
|
|
|
css = """ |
|
.button { |
|
padding: 10px 20px; |
|
background: #007BFF; |
|
color: white; |
|
border: none; |
|
cursor: pointer; |
|
font-size: 16px; |
|
margin: 10px; |
|
} |
|
""" |
|
|
|
|
|
layout = [ |
|
gr.Row([gr.File(label="Upload PDF", type="binary")]), |
|
gr.Row([gr.Button("Generate Insights")]), |
|
gr.Row([gr.Textbox("Placeholder for PDF insights", label="Insights", type="text")]) |
|
] |
|
|
|
|
|
def get_image_embeddings(image_path, model_name='google/vit-base-patch16-224'): |
|
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name) |
|
model = ViTModel.from_pretrained(model_name) |
|
|
|
image = Image.open(image_path) |
|
inputs = feature_extractor(images=image, return_tensors="pt") |
|
outputs = model(**inputs) |
|
embeddings = outputs.last_hidden_state.mean(dim=1) |
|
return embeddings |
|
|
|
|
|
def pdf_to_images(pdf_file, img_dir): |
|
images = convert_from_path(pdf_file) |
|
|
|
|
|
os.makedirs(img_dir, exist_ok=True) |
|
|
|
for i, image in enumerate(images): |
|
image_path = f"{img_dir}/page_{i + 1}.png" |
|
image.save(image_path, "PNG") |
|
|
|
print(f"Converted {len(images)} pages to images and saved in {img_dir}") |
|
|
|
|
|
def get_text_embeddings(text, model_name='bert-base-uncased'): |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModel.from_pretrained(model_name) |
|
|
|
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512) |
|
outputs = model(**inputs) |
|
embeddings = outputs.last_hidden_state.mean(dim=1) |
|
return embeddings |
|
|
|
|
|
def process_pdf_and_generate_response(pdf_file): |
|
try: |
|
|
|
pdf_file_stream = BytesIO(pdf_file) |
|
|
|
|
|
img_dir = "pdf_images" |
|
pdf_to_images(pdf_file_stream, img_dir) |
|
|
|
|
|
image_embeddings = [] |
|
for filename in os.listdir(img_dir): |
|
if filename.endswith(".png"): |
|
image_path = os.path.join(img_dir, filename) |
|
image_embeddings.append(get_image_embeddings(image_path)) |
|
|
|
|
|
pdf_text = "PDF content analysis placeholder" |
|
text_embeddings = get_text_embeddings(pdf_text) |
|
|
|
|
|
combined_embeddings = torch.cat([*image_embeddings, text_embeddings], dim=0) |
|
response = "Response based on the processed PDF" |
|
except Exception as e: |
|
response = f"An error occurred: {str(e)}" |
|
return response |
|
|
|
iface = gr.Interface( |
|
fn=process_pdf_and_generate_response, |
|
inputs=gr.File(label="Upload PDF", type="binary"), |
|
outputs=gr.Textbox("Placeholder for PDF insights", label="Insights", type="text"), |
|
title="pdf-chatbot", |
|
description="Upload a PDF and receive insights based on its content.", |
|
css=css |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|
|
|