Update app.py
Browse files
app.py
CHANGED
@@ -13,13 +13,18 @@ def read_pdf(pdf_path):
|
|
13 |
return content
|
14 |
|
15 |
# Process and retrieve answers
|
16 |
-
def process_invoice(file,
|
17 |
try:
|
18 |
# Read the PDF content directly
|
19 |
print("Reading PDF content...")
|
20 |
pdf_content = read_pdf(file.name)
|
21 |
print(f"PDF Content: {pdf_content[:500]}...") # Print first 500 characters for verification
|
22 |
|
|
|
|
|
|
|
|
|
|
|
23 |
# Initialize the Hugging Face pipeline
|
24 |
print("Initializing the Hugging Face pipeline...")
|
25 |
qa_pipeline = pipeline("question-answering", model="mistralai/Mixtral-8x7B-Instruct-v0.1", use_auth_token=hf_token)
|
@@ -37,20 +42,19 @@ def process_invoice(file, hf_token, questions):
|
|
37 |
return {"error": str(e)}
|
38 |
|
39 |
# Gradio interface
|
40 |
-
def gradio_interface(file,
|
41 |
-
answers = process_invoice(file,
|
42 |
return answers
|
43 |
|
44 |
interface = gr.Interface(
|
45 |
fn=gradio_interface,
|
46 |
inputs=[
|
47 |
gr.File(file_count="single", label="Upload Invoice (PDF)"),
|
48 |
-
gr.Textbox(type="password", label="Enter your Hugging Face Token"),
|
49 |
gr.Textbox(lines=5, placeholder="Enter your questions separated by commas")
|
50 |
],
|
51 |
outputs="json",
|
52 |
title="Invoice Data Extraction",
|
53 |
-
description="Upload an invoice PDF
|
54 |
)
|
55 |
|
56 |
if __name__ == "__main__":
|
|
|
13 |
return content
|
14 |
|
15 |
# Process and retrieve answers
|
16 |
+
def process_invoice(file, questions):
|
17 |
try:
|
18 |
# Read the PDF content directly
|
19 |
print("Reading PDF content...")
|
20 |
pdf_content = read_pdf(file.name)
|
21 |
print(f"PDF Content: {pdf_content[:500]}...") # Print first 500 characters for verification
|
22 |
|
23 |
+
# Get the Hugging Face token from environment variables
|
24 |
+
hf_token = os.getenv("HF_TOKEN")
|
25 |
+
if not hf_token:
|
26 |
+
raise ValueError("Hugging Face token not found in environment variables.")
|
27 |
+
|
28 |
# Initialize the Hugging Face pipeline
|
29 |
print("Initializing the Hugging Face pipeline...")
|
30 |
qa_pipeline = pipeline("question-answering", model="mistralai/Mixtral-8x7B-Instruct-v0.1", use_auth_token=hf_token)
|
|
|
42 |
return {"error": str(e)}
|
43 |
|
44 |
# Gradio interface
|
45 |
+
def gradio_interface(file, questions):
|
46 |
+
answers = process_invoice(file, questions)
|
47 |
return answers
|
48 |
|
49 |
interface = gr.Interface(
|
50 |
fn=gradio_interface,
|
51 |
inputs=[
|
52 |
gr.File(file_count="single", label="Upload Invoice (PDF)"),
|
|
|
53 |
gr.Textbox(lines=5, placeholder="Enter your questions separated by commas")
|
54 |
],
|
55 |
outputs="json",
|
56 |
title="Invoice Data Extraction",
|
57 |
+
description="Upload an invoice PDF and get the extracted data based on your questions."
|
58 |
)
|
59 |
|
60 |
if __name__ == "__main__":
|