from transformers import pipeline import fitz import gradio as gr import requests import io import re import os from PIL import Image summarizer = pipeline("summarization", model="facebook/bart-large-cnn") qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad") os.environ["HUGGINGFACE_HUB_TOKEN"] = "ctp-hw" my_key = os.environ["HUGGINGFACE_HUB_TOKEN"] def extract_text_from_pdf(pdf_file): with fitz.open(pdf_file) as pdf: text = "" for page in pdf: text += page.get_text("text") text = re.sub(r'\s+', ' ', text) text = text.strip() return text def summarize_pdf(pdf_file): text = extract_text_from_pdf(pdf_file) if len(text) > 1000: chunks = [text[i:i+1000] for i in range(0, len(text), 1000)] summary = "" for chunk in chunks: summary += summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] + " " else: summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] return summary def answer_question(pdf_file, question): text = extract_text_from_pdf(pdf_file) answer = qa_model(question=question, context=text) return answer['answer'] API_URL = "https://api-inference.huggingface.co/models/stable-diffusion-v1-5/stable-diffusion-v1-5" headers = {"Authorization": f"Bearer {my_key}"} def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.content def summarize_and_qa(pdf_file, question): summary = summarize_pdf(pdf_file) answer = answer_question(pdf_file, question) image_bytes = query({ "inputs": answer, }) image = Image.open(io.BytesIO(image_bytes)) return summary, answer, image gr.Interface( fn=summarize_and_qa, inputs=["file", "text"], outputs=["textbox", "textbox", "image"], title="PDF Summary and Q&A", description="Upload a PDF to get a summary and answer questions based on the content. It will also give a picture to help you better understand the content." ).launch() if __name__ == "__main__": demo.launch()