CTP-HW / app.py
anitap's picture
Update app.py
1f2b1df verified
raw
history blame
2.17 kB
from transformers import pipeline
import fitz
import gradio as gr
import requests
import io
import re
import os
from PIL import Image
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
os.environ["HUGGINGFACE_HUB_TOKEN"] = "ctp-hw"
my_key = os.environ["HUGGINGFACE_HUB_TOKEN"]
def extract_text_from_pdf(pdf_file):
with fitz.open(pdf_file) as pdf:
text = ""
for page in pdf:
text += page.get_text("text")
text = re.sub(r'\s+', ' ', text)
text = text.strip()
return text
def summarize_pdf(pdf_file):
text = extract_text_from_pdf(pdf_file)
if len(text) > 1000:
chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
summary = ""
for chunk in chunks:
summary += summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] + " "
else:
summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
return summary
def answer_question(pdf_file, question):
text = extract_text_from_pdf(pdf_file)
answer = qa_model(question=question, context=text)
return answer['answer']
API_URL = "https://api-inference.huggingface.co/models/stable-diffusion-v1-5/stable-diffusion-v1-5"
headers = {"Authorization": f"Bearer {my_key}"}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.content
def summarize_and_qa(pdf_file, question):
summary = summarize_pdf(pdf_file)
answer = answer_question(pdf_file, question)
image_bytes = query({
"inputs": answer,
})
image = Image.open(io.BytesIO(image_bytes))
return summary, answer, image
gr.Interface(
fn=summarize_and_qa,
inputs=["file", "text"],
outputs=["textbox", "textbox", "image"],
title="PDF Summary and Q&A",
description="Upload a PDF to get a summary and answer questions based on the content. It will also give a picture to help you better understand the content."
).launch()
if __name__ == "__main__":
demo.launch()