File size: 2,167 Bytes
1f2b1df df0e0ab 1f2b1df df0e0ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
from transformers import pipeline
import fitz
import gradio as gr
import requests
import io
import re
import os
from PIL import Image
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
os.environ["HUGGINGFACE_HUB_TOKEN"] = "ctp-hw"
my_key = os.environ["HUGGINGFACE_HUB_TOKEN"]
def extract_text_from_pdf(pdf_file):
with fitz.open(pdf_file) as pdf:
text = ""
for page in pdf:
text += page.get_text("text")
text = re.sub(r'\s+', ' ', text)
text = text.strip()
return text
def summarize_pdf(pdf_file):
text = extract_text_from_pdf(pdf_file)
if len(text) > 1000:
chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
summary = ""
for chunk in chunks:
summary += summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] + " "
else:
summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
return summary
def answer_question(pdf_file, question):
text = extract_text_from_pdf(pdf_file)
answer = qa_model(question=question, context=text)
return answer['answer']
API_URL = "https://api-inference.huggingface.co/models/stable-diffusion-v1-5/stable-diffusion-v1-5"
headers = {"Authorization": f"Bearer {my_key}"}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.content
def summarize_and_qa(pdf_file, question):
summary = summarize_pdf(pdf_file)
answer = answer_question(pdf_file, question)
image_bytes = query({
"inputs": answer,
})
image = Image.open(io.BytesIO(image_bytes))
return summary, answer, image
gr.Interface(
fn=summarize_and_qa,
inputs=["file", "text"],
outputs=["textbox", "textbox", "image"],
title="PDF Summary and Q&A",
description="Upload a PDF to get a summary and answer questions based on the content. It will also give a picture to help you better understand the content."
).launch()
if __name__ == "__main__":
demo.launch()
|