Spaces:

Omkar008
/

Invoice_To_Json

Sleeping

App Files Files Community

Invoice_To_Json / app.py

Omkar008

Update app.py

e503c5d verified 2 months ago

raw

history blame

3.33 kB

	import streamlit as st
	from pdf2image import convert_from_path
	import google.generativeai as genai
	import os
	import imghdr
	import PIL.Image
	import json
	import tempfile
	from dotenv import load_dotenv
	load_dotenv()

	# Configure Gemini API
	GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
	genai.configure(api_key=GOOGLE_API_KEY)
	model = genai.GenerativeModel('gemini-1.5-flash')

	def system_prompt() -> str:
	return """You are a Invoice/Receipt Analysing tool. Analyse the image of the invoice provided and extract information from the following receipt image and return a JSON object with these exact keys:Customer_Details,Products,Total Amount.

	Rules:
	1. For total_cost, use the highest monetary value in the text.
	2. Customer_Details will consist of another json object consisting the keys Customer_Name,Customer_Address,Email Address,Phone Number,Customer ID,Billing Address,Shipping Address,Account Number,Tax ID/VAT Number,Company Name,Payment Method.
	3. Look carefully for any address labeled as "Bill To" or "Billing Address".Look for this on the left side of the layout.
	4.Look carefully for any address labeled as "Ship To" or "Shipping Address".Look for this on the right side of the layout
	5.Products will consist of another json object consisting of keys Product_name_1,quantity,unit_price.
	6. If any value is not found, return null.
	7. If all values are null, return null.
	Ensure the strictly that output is a valid JSON object containing strictly the above keys, without any explanations.
	Generate a JSON response in the following format without using the ```json block. Ensure the output is properly formatted as plain text JSON.

	"""

	def get_invoice_details(image):
	response = model.generate_content([system_prompt(), image], stream=True)
	response.resolve()
	return response.text

	def process_file(file):
	images = []
	if file.type == "application/pdf":
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(file.getvalue())
	tmp_file_path = tmp_file.name

	pdf_images = convert_from_path(tmp_file_path)
	for img in pdf_images:
	images.append(img)

	os.unlink(tmp_file_path)
	elif file.type.startswith('image'):
	image = PIL.Image.open(file)
	images.append(image)
	else:
	st.error(f"Unsupported file type: {file.type}")
	return None

	return images

	def main():
	st.title("Invoice Analyzer")

	uploaded_file = st.file_uploader("Choose an image or PDF file", type=["jpg", "jpeg", "png", "pdf"])

	if uploaded_file is not None:
	images = process_file(uploaded_file)

	if images:
	for i, img in enumerate(images):
	st.image(img, caption=f"Page {i+1}", use_column_width=True)

	with st.spinner(f"Analyzing page {i+1}..."):
	json_output = get_invoice_details(img)

	try:
	parsed_json = json.loads(json_output)
	st.json(parsed_json)
	except json.JSONDecodeError:
	st.error(f"Failed to parse JSON for page {i+1}. Raw output:")
	st.text(json_output)

	if __name__ == "__main__":
	main()