File size: 2,247 Bytes
4f5f1ec
 
 
 
 
 
 
 
 
 
19940b7
4f5f1ec
 
0eb21ba
 
 
 
 
 
19940b7
 
0eb21ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f5f1ec
0eb21ba
19940b7
0eb21ba
 
 
 
 
4f5f1ec
0eb21ba
 
4f5f1ec
0eb21ba
4f5f1ec
 
 
0eb21ba
 
 
 
 
 
 
 
 
90d71c5
0eb21ba
4f5f1ec
 
 
19940b7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import gradio as gr
import os
from dataset_loader import load_dataset
from dotenv import load_dotenv

load_dotenv()

if not os.path.isdir("low_partners_chatbot_data"):
    load_dataset()

from low_partners_chatbot_data.agent import Agent


import gradio as gr
import fitz  # PyMuPDF
from PIL import Image
import io
import os

agent = Agent()


def extract_pages_from_multiple_pdfs(pdf_info):
    images = []
    for pdf_path, page_numbers in pdf_info.items():
        if not os.path.exists(pdf_path):
            print(f"PDF file not found: {pdf_path}")
            continue

        try:
            doc = fitz.open(pdf_path)
            for page_num in page_numbers:
                if 1 <= page_num <= len(doc):
                    page = doc.load_page(page_num - 1)
                    pix = page.get_pixmap(
                        matrix=fitz.Matrix(2, 2)
                    )  # Increase resolution
                    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                    images.append(
                        (img, f"{os.path.basename(pdf_path)} - Page {page_num}")
                    )
                else:
                    print(f"Page {page_num} is out of range for {pdf_path}")
            doc.close()
        except Exception as e:
            print(f"An error occurred with {pdf_path}: {str(e)}")

    return images


def get_answer(query):
    answer, pdf_info = agent.ask(query)
    print(f"Answer: {answer}")
    print(f"PDF Info: {pdf_info}")
    images = []
    if pdf_info:
        images = extract_pages_from_multiple_pdfs(pdf_info)

    if not images:
        return answer, None

    return answer, images


iface = gr.Interface(
    fn=get_answer,
    inputs="text",  # No inputs needed as the function provides the information
    outputs=[
        gr.Textbox(label="Answer"),
        gr.Gallery(label="Related Documents", allow_preview=True, preview=True),
    ],
    title="Low & Partners Chatbot",
    description=(
        "This is a chatbot for Low & Partners Law Firm. Ask me anything related to law!\n\n"
        "Note: This chatbot does not implement memory module, ask specific questions and don't expect it to remember previous conversation.\n"
    ),
)

iface.launch()