yuvaranianandhan24 commited on
Commit
671cbf4
·
verified ·
1 Parent(s): 18df39c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -10
app.py CHANGED
@@ -20,7 +20,7 @@ def main():
20
  process_image(uploaded_image)
21
 
22
  def process_pdf(uploaded_file):
23
- # Save the uploaded file to a temporary location
24
  with NamedTemporaryFile(delete=False) as temp_file:
25
  temp_file.write(uploaded_file.read())
26
  temp_file_path = temp_file.name
@@ -33,26 +33,48 @@ def process_pdf(uploaded_file):
33
  for page in pages:
34
  st.write(page.page_content)
35
 
36
- llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin",model_type="llama",
37
- config={'max_new_tokens':128,'temperature':0.01})
38
-
39
  template = """Extract invoice number, name of organization, address, date,
40
- Qty, Rate ,Tax ,Amount {pages}
41
- Output : entity : type
42
  """
43
  prompt_template = PromptTemplate(input_variables=["pages"], template=template)
44
  chain = LLMChain(llm=llm, prompt=prompt_template)
45
 
46
  result = chain.run(pages=pages[0].page_content)
47
-
48
  st.write("Extracted entities:")
49
  entities = result.strip().split("\n")
50
  table_data = [line.split(":") for line in entities]
51
  st.table(table_data)
52
 
53
  def process_image(uploaded_image):
54
- # Process the uploaded image
55
- st.write("Image processing is not implemented yet.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  if __name__ == "__main__":
58
- main()
 
20
  process_image(uploaded_image)
21
 
22
  def process_pdf(uploaded_file):
23
+ # Process the uploaded PDF file
24
  with NamedTemporaryFile(delete=False) as temp_file:
25
  temp_file.write(uploaded_file.read())
26
  temp_file_path = temp_file.name
 
33
  for page in pages:
34
  st.write(page.page_content)
35
 
36
+ llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin", model_type="llama",
37
+ config={'max_new_tokens': 128, 'temperature': 0.01})
38
+
39
  template = """Extract invoice number, name of organization, address, date,
40
+ Qty, Rate, Tax, Amount {pages}
41
+ Output: entity: type
42
  """
43
  prompt_template = PromptTemplate(input_variables=["pages"], template=template)
44
  chain = LLMChain(llm=llm, prompt=prompt_template)
45
 
46
  result = chain.run(pages=pages[0].page_content)
47
+
48
  st.write("Extracted entities:")
49
  entities = result.strip().split("\n")
50
  table_data = [line.split(":") for line in entities]
51
  st.table(table_data)
52
 
53
  def process_image(uploaded_image):
54
+ # Process the uploaded image using OCR
55
+ image = Image.open(uploaded_image)
56
+ text = pytesseract.image_to_string(image)
57
+
58
+ st.write("Extracted text from the image:")
59
+ st.write(text)
60
+
61
+ # Apply entity extraction logic to the extracted text
62
+ llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin", model_type="llama",
63
+ config={'max_new_tokens': 128, 'temperature': 0.01})
64
+
65
+ template = """Extract invoice number, name of organization, address, date,
66
+ Qty, Rate, Tax, Amount {text}
67
+ Output: entity: type
68
+ """
69
+ prompt_template = PromptTemplate(input_variables=["text"], template=template)
70
+ chain = LLMChain(llm=llm, prompt=prompt_template)
71
+
72
+ result = chain.run(text)
73
+
74
+ st.write("Extracted entities:")
75
+ entities = result.strip().split("\n")
76
+ table_data = [line.split(":") for line in entities]
77
+ st.table(table_data)
78
 
79
  if __name__ == "__main__":
80
+ main()