|
from transformers import pipeline |
|
|
|
import fitz |
|
import gradio as gr |
|
import requests |
|
import io |
|
import re |
|
import os |
|
from PIL import Image |
|
|
|
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
|
qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad") |
|
|
|
os.environ["HUGGINGFACE_HUB_TOKEN"] = "ctp-hw" |
|
my_key = os.environ["HUGGINGFACE_HUB_TOKEN"] |
|
|
|
def extract_text_from_pdf(pdf_file): |
|
with fitz.open(pdf_file) as pdf: |
|
text = "" |
|
for page in pdf: |
|
text += page.get_text("text") |
|
|
|
text = re.sub(r'\s+', ' ', text) |
|
text = text.strip() |
|
return text |
|
|
|
def summarize_pdf(pdf_file): |
|
text = extract_text_from_pdf(pdf_file) |
|
|
|
if len(text) > 1000: |
|
chunks = [text[i:i+1000] for i in range(0, len(text), 1000)] |
|
summary = "" |
|
for chunk in chunks: |
|
summary += summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] + " " |
|
else: |
|
summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] |
|
|
|
return summary |
|
|
|
def answer_question(pdf_file, question): |
|
text = extract_text_from_pdf(pdf_file) |
|
answer = qa_model(question=question, context=text) |
|
return answer['answer'] |
|
|
|
API_URL = "https://api-inference.huggingface.co/models/stable-diffusion-v1-5/stable-diffusion-v1-5" |
|
headers = {"Authorization": f"Bearer {my_key}"} |
|
|
|
def query(payload): |
|
response = requests.post(API_URL, headers=headers, json=payload) |
|
return response.content |
|
|
|
def summarize_and_qa(pdf_file, question): |
|
summary = summarize_pdf(pdf_file) |
|
answer = answer_question(pdf_file, question) |
|
image_bytes = query({ |
|
"inputs": answer, |
|
}) |
|
image = Image.open(io.BytesIO(image_bytes)) |
|
return summary, answer, image |
|
|
|
gr.Interface( |
|
fn=summarize_and_qa, |
|
inputs=["file", "text"], |
|
outputs=["textbox", "textbox", "image"], |
|
title="PDF Summary and Q&A", |
|
description="Upload a PDF to get a summary and answer questions based on the content. It will also give a picture to help you better understand the content." |
|
).launch() |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|