cjzhi98's picture
Update app.py
19940b7 verified
raw
history blame contribute delete
No virus
2.25 kB
import gradio as gr
import os
from dataset_loader import load_dataset
from dotenv import load_dotenv
load_dotenv()
if not os.path.isdir("low_partners_chatbot_data"):
load_dataset()
from low_partners_chatbot_data.agent import Agent
import gradio as gr
import fitz # PyMuPDF
from PIL import Image
import io
import os
agent = Agent()
def extract_pages_from_multiple_pdfs(pdf_info):
images = []
for pdf_path, page_numbers in pdf_info.items():
if not os.path.exists(pdf_path):
print(f"PDF file not found: {pdf_path}")
continue
try:
doc = fitz.open(pdf_path)
for page_num in page_numbers:
if 1 <= page_num <= len(doc):
page = doc.load_page(page_num - 1)
pix = page.get_pixmap(
matrix=fitz.Matrix(2, 2)
) # Increase resolution
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
images.append(
(img, f"{os.path.basename(pdf_path)} - Page {page_num}")
)
else:
print(f"Page {page_num} is out of range for {pdf_path}")
doc.close()
except Exception as e:
print(f"An error occurred with {pdf_path}: {str(e)}")
return images
def get_answer(query):
answer, pdf_info = agent.ask(query)
print(f"Answer: {answer}")
print(f"PDF Info: {pdf_info}")
images = []
if pdf_info:
images = extract_pages_from_multiple_pdfs(pdf_info)
if not images:
return answer, None
return answer, images
iface = gr.Interface(
fn=get_answer,
inputs="text", # No inputs needed as the function provides the information
outputs=[
gr.Textbox(label="Answer"),
gr.Gallery(label="Related Documents", allow_preview=True, preview=True),
],
title="Low & Partners Chatbot",
description=(
"This is a chatbot for Low & Partners Law Firm. Ask me anything related to law!\n\n"
"Note: This chatbot does not implement memory module, ask specific questions and don't expect it to remember previous conversation.\n"
),
)
iface.launch()