Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ def main():
|
|
20 |
process_image(uploaded_image)
|
21 |
|
22 |
def process_pdf(uploaded_file):
|
23 |
-
#
|
24 |
with NamedTemporaryFile(delete=False) as temp_file:
|
25 |
temp_file.write(uploaded_file.read())
|
26 |
temp_file_path = temp_file.name
|
@@ -33,26 +33,48 @@ def process_pdf(uploaded_file):
|
|
33 |
for page in pages:
|
34 |
st.write(page.page_content)
|
35 |
|
36 |
-
llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin",model_type="llama",
|
37 |
-
config={'max_new_tokens':128,'temperature':0.01})
|
38 |
-
|
39 |
template = """Extract invoice number, name of organization, address, date,
|
40 |
-
Qty, Rate ,
|
41 |
-
Output
|
42 |
"""
|
43 |
prompt_template = PromptTemplate(input_variables=["pages"], template=template)
|
44 |
chain = LLMChain(llm=llm, prompt=prompt_template)
|
45 |
|
46 |
result = chain.run(pages=pages[0].page_content)
|
47 |
-
|
48 |
st.write("Extracted entities:")
|
49 |
entities = result.strip().split("\n")
|
50 |
table_data = [line.split(":") for line in entities]
|
51 |
st.table(table_data)
|
52 |
|
53 |
def process_image(uploaded_image):
|
54 |
-
# Process the uploaded image
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
if __name__ == "__main__":
|
58 |
-
main()
|
|
|
20 |
process_image(uploaded_image)
|
21 |
|
22 |
def process_pdf(uploaded_file):
|
23 |
+
# Process the uploaded PDF file
|
24 |
with NamedTemporaryFile(delete=False) as temp_file:
|
25 |
temp_file.write(uploaded_file.read())
|
26 |
temp_file_path = temp_file.name
|
|
|
33 |
for page in pages:
|
34 |
st.write(page.page_content)
|
35 |
|
36 |
+
llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin", model_type="llama",
|
37 |
+
config={'max_new_tokens': 128, 'temperature': 0.01})
|
38 |
+
|
39 |
template = """Extract invoice number, name of organization, address, date,
|
40 |
+
Qty, Rate, Tax, Amount {pages}
|
41 |
+
Output: entity: type
|
42 |
"""
|
43 |
prompt_template = PromptTemplate(input_variables=["pages"], template=template)
|
44 |
chain = LLMChain(llm=llm, prompt=prompt_template)
|
45 |
|
46 |
result = chain.run(pages=pages[0].page_content)
|
47 |
+
|
48 |
st.write("Extracted entities:")
|
49 |
entities = result.strip().split("\n")
|
50 |
table_data = [line.split(":") for line in entities]
|
51 |
st.table(table_data)
|
52 |
|
53 |
def process_image(uploaded_image):
|
54 |
+
# Process the uploaded image using OCR
|
55 |
+
image = Image.open(uploaded_image)
|
56 |
+
text = pytesseract.image_to_string(image)
|
57 |
+
|
58 |
+
st.write("Extracted text from the image:")
|
59 |
+
st.write(text)
|
60 |
+
|
61 |
+
# Apply entity extraction logic to the extracted text
|
62 |
+
llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin", model_type="llama",
|
63 |
+
config={'max_new_tokens': 128, 'temperature': 0.01})
|
64 |
+
|
65 |
+
template = """Extract invoice number, name of organization, address, date,
|
66 |
+
Qty, Rate, Tax, Amount {text}
|
67 |
+
Output: entity: type
|
68 |
+
"""
|
69 |
+
prompt_template = PromptTemplate(input_variables=["text"], template=template)
|
70 |
+
chain = LLMChain(llm=llm, prompt=prompt_template)
|
71 |
+
|
72 |
+
result = chain.run(text)
|
73 |
+
|
74 |
+
st.write("Extracted entities:")
|
75 |
+
entities = result.strip().split("\n")
|
76 |
+
table_data = [line.split(":") for line in entities]
|
77 |
+
st.table(table_data)
|
78 |
|
79 |
if __name__ == "__main__":
|
80 |
+
main()
|