File size: 1,969 Bytes
ddb0d1e
ac61621
ddb0d1e
86d3138
0a55ae4
ddb0d1e
ac61621
 
 
 
 
 
 
d22351a
ddb0d1e
45a0b43
954da24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddb0d1e
 
45a0b43
 
ddb0d1e
 
 
 
 
bf9d3cd
 
 
ddb0d1e
 
 
ac61621
ddb0d1e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from pprint import pprint
from PyPDF2 import PdfReader
import gradio as gr
from transformers import pipeline
import os

# Function to read PDF file content directly
def read_pdf(pdf_path):
    content = ""
    reader = PdfReader(pdf_path)
    for page in reader.pages:
        content += page.extract_text()
    return content

# Process and retrieve answers
def process_invoice(file, hf_token, questions):
    try:
        # Read the PDF content directly
        print("Reading PDF content...")
        pdf_content = read_pdf(file.name)
        print(f"PDF Content: {pdf_content[:500]}...")  # Print first 500 characters for verification

        # Initialize the Hugging Face pipeline
        print("Initializing the Hugging Face pipeline...")
        qa_pipeline = pipeline("question-answering", model="mistralai/Mixtral-8x7B-Instruct-v0.1", use_auth_token=hf_token)

        answers = {}
        for question in questions.split(','):
            print(f"Asking question: {question.strip()}")
            result = qa_pipeline(question=question.strip(), context=pdf_content)
            answers[question] = result['answer']
            print(f"Answer: {result['answer']}")

        return answers
    except Exception as e:
        print(f"Error: {e}")
        return {"error": str(e)}

# Gradio interface
def gradio_interface(file, hf_token, questions):
    answers = process_invoice(file, hf_token, questions)
    return answers

interface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.File(file_count="single", label="Upload Invoice (PDF)"),
        gr.Textbox(type="password", label="Enter your Hugging Face Token"),
        gr.Textbox(lines=5, placeholder="Enter your questions separated by commas")
    ],
    outputs="json",
    title="Invoice Data Extraction",
    description="Upload an invoice PDF, provide your Hugging Face token, and get the extracted data based on your questions."
)

if __name__ == "__main__":
    interface.launch()