File size: 1,581 Bytes
ddb0d1e
86d3138
ac61621
ddb0d1e
 
86d3138
ddb0d1e
ac61621
 
 
 
 
 
 
d22351a
ddb0d1e
45a0b43
ac61621
 
ddb0d1e
86d3138
 
ddb0d1e
 
 
86d3138
 
ddb0d1e
 
 
 
45a0b43
 
ddb0d1e
 
 
 
 
ac61621
ddb0d1e
 
 
 
 
ac61621
ddb0d1e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from pprint import pprint
from getpass import getpass
from PyPDF2 import PdfReader
import gradio as gr
import os
from transformers import pipeline

# Function to read PDF file content directly
def read_pdf(pdf_path):
    content = ""
    reader = PdfReader(pdf_path)
    for page in reader.pages:
        content += page.extract_text()
    return content

# Process and retrieve answers
def process_invoice(file, hf_token, questions):
    # Read the PDF content directly
    pdf_content = read_pdf(file.name)

    # Initialize the Hugging Face pipeline
    qa_pipeline = pipeline("question-answering", model="mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)

    answers = {}
    for question in questions.split(','):
        result = qa_pipeline(question=question.strip(), context=pdf_content)
        answers[question] = result['answer']

    return answers

# Gradio interface
def gradio_interface(file, hf_token, questions):
    answers = process_invoice(file, hf_token, questions)
    return answers

interface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.inputs.File(file_count="single", type="file", label="Upload Invoice (PDF)"),
        gr.inputs.Textbox(type="password", label="Enter your Hugging Face Token"),
        gr.inputs.Textbox(lines=5, placeholder="Enter your questions separated by commas")
    ],
    outputs="json",
    title="Invoice Data Extraction",
    description="Upload an invoice PDF, provide your Hugging Face token, and get the extracted data based on your questions."
)

if __name__ == "__main__":
    interface.launch()