Spaces:

yuvaranianandhan24
/

invoice_extractor

Sleeping

App Files Files Community

yuvaranianandhan24 commited on Apr 23, 2024

Commit

671cbf4

verified ·

1 Parent(s): 18df39c

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -10

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ def main():
         process_image(uploaded_image)
 def process_pdf(uploaded_file):
-    # Save the uploaded file to a temporary location
     with NamedTemporaryFile(delete=False) as temp_file:
         temp_file.write(uploaded_file.read())
         temp_file_path = temp_file.name
@@ -33,26 +33,48 @@ def process_pdf(uploaded_file):
     for page in pages:
         st.write(page.page_content)
-    llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin",model_type="llama",
-                config={'max_new_tokens':128,'temperature':0.01})
     template = """Extract invoice number, name of organization, address, date,
-        Qty, Rate ,Tax ,Amount {pages}
-    Output : entity : type
     """
     prompt_template = PromptTemplate(input_variables=["pages"], template=template)
     chain = LLMChain(llm=llm, prompt=prompt_template)
     result = chain.run(pages=pages[0].page_content)
     st.write("Extracted entities:")
     entities = result.strip().split("\n")
     table_data = [line.split(":") for line in entities]
     st.table(table_data)
 def process_image(uploaded_image):
-    # Process the uploaded image
-    st.write("Image processing is not implemented yet.")
 if __name__ == "__main__":
-    main()

         process_image(uploaded_image)
 def process_pdf(uploaded_file):
+    # Process the uploaded PDF file
     with NamedTemporaryFile(delete=False) as temp_file:
         temp_file.write(uploaded_file.read())
         temp_file_path = temp_file.name
     for page in pages:
         st.write(page.page_content)
+    llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin", model_type="llama",
+                config={'max_new_tokens': 128, 'temperature': 0.01})
     template = """Extract invoice number, name of organization, address, date,
+        Qty, Rate, Tax, Amount {pages}
+    Output: entity: type
     """
     prompt_template = PromptTemplate(input_variables=["pages"], template=template)
     chain = LLMChain(llm=llm, prompt=prompt_template)
     result = chain.run(pages=pages[0].page_content)
     st.write("Extracted entities:")
     entities = result.strip().split("\n")
     table_data = [line.split(":") for line in entities]
     st.table(table_data)
 def process_image(uploaded_image):
+    # Process the uploaded image using OCR
+    image = Image.open(uploaded_image)
+    text = pytesseract.image_to_string(image)
+    st.write("Extracted text from the image:")
+    st.write(text)
+    # Apply entity extraction logic to the extracted text
+    llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin", model_type="llama",
+                config={'max_new_tokens': 128, 'temperature': 0.01})
+    template = """Extract invoice number, name of organization, address, date,
+        Qty, Rate, Tax, Amount {text}
+    Output: entity: type
+    """
+    prompt_template = PromptTemplate(input_variables=["text"], template=template)
+    chain = LLMChain(llm=llm, prompt=prompt_template)
+    result = chain.run(text)
+    st.write("Extracted entities:")
+    entities = result.strip().split("\n")
+    table_data = [line.split(":") for line in entities]
+    st.table(table_data)
 if __name__ == "__main__":
+    main()