import fitz import gradio as gr import requests import io import re from PIL import Image summarizer = pipeline("summarization", model="facebook/bart-large-cnn") qa_model = pipeline("question-answering", model="deepset/bert-large-uncased-whole-word-masking-squad2") def extract_text_from_pdf(pdf_file): with fitz.open(pdf_file) as pdf: text = "" for page in pdf: text += page.get_text("text") text = re.sub(r'\s+', ' ', text).strip() return text def summarize(text): if len(text) > 1000: chunks = [text[i:i+1000] for i in range(0, len(text), 1000)] summary = "" for chunk in chunks: summary += summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] + " " else: summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] return summary # API_URL = "https://api-inference.huggingface.co/models/deepset/bert-large-uncased-whole-word-masking-squad2" # headers = {"Authorization": f"Bearer {my_key}"} # def query(payload): # response = requests.post(API_URL, headers=headers, json=payload) # return response.content def answer_question(text, question): response = qa_model(question=question, context=text) answer = response['answer'] return answer def summarize_and_qa(pdf_file, question): text = extract_text_from_pdf(pdf_file) summary = summarize(text) answer = answer_question(text, question) # image_bytes = query({"inputs": answer}) # if image_bytes: # try: # image = Image.open(io.BytesIO(image_bytes)) # except Exception as e: # return summary, answer, None # else: # image = None return summary, answer gr.Interface( fn=summarize_and_qa, inputs=["file", "text"], outputs=["textbox", "textbox"], title="Understand your PDF Better", description="Upload a PDF to get a summary. You can ask any question regardging the content of the PDF. It will also generate a picture to help you better understand the content." ).launch(debug=True, share=True)