|
import os |
|
import gradio as gr |
|
from langchain.document_loaders import OnlinePDFLoader |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.chat_models import ChatAnthropic |
|
from langchain.prompts import ChatPromptTemplate |
|
from transformers import pipeline |
|
|
|
|
|
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") |
|
|
|
pdf_content = "" |
|
|
|
def load_pdf(pdf_doc): |
|
global pdf_content |
|
if pdf_doc is None: |
|
return "No PDF uploaded.", "" |
|
try: |
|
loader = OnlinePDFLoader(pdf_doc.name) |
|
documents = loader.load() |
|
pdf_content = ' '.join(documents) |
|
return "PDF Loaded Successfully.", pdf_content |
|
except Exception as e: |
|
return f"Error processing PDF: {e}", "" |
|
|
|
def chat_with_pdf(question): |
|
model = ChatAnthropic() |
|
prompt = ChatPromptTemplate.from_messages([ |
|
("human", pdf_content), |
|
("human", question), |
|
("human", "Give a clear summary of this pdf information at an 8th grade reading level.") |
|
]) |
|
chain = prompt | model |
|
response = chain.invoke({}) |
|
summarizer = pipeline("summarization") |
|
summary = summarizer(pdf_content, max_length=1000, min_length=30, do_sample=False)[0]['summary_text'] |
|
return summary, response.content |
|
|
|
def gradio_interface(pdf_doc, question): |
|
if not pdf_content: |
|
return load_pdf(pdf_doc) |
|
else: |
|
return chat_with_pdf(question) |
|
|
|
gr.Interface( |
|
fn=gradio_interface, |
|
inputs=[ |
|
gr.components.File(label="Load a pdf", file_types=['.pdf'], type="file"), |
|
gr.components.Textbox(label="Ask a question about the PDF") |
|
], |
|
outputs=[ |
|
gr.components.Textbox(label="Summary"), |
|
gr.components.Textbox(label="Chat Response") |
|
], |
|
live=True, |
|
title=os.getenv("ANTHROPIC_API_KEY")+"Chat with PDF content using Anthropic", |
|
description="Upload a .PDF and interactively chat about its content.", |
|
api_name='chat_with_pdf_3' |
|
).launch() |
|
|