Arch10 commited on
Commit
a041eb6
·
verified ·
1 Parent(s): aca06b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -28
app.py CHANGED
@@ -1,39 +1,44 @@
1
  import streamlit as st
2
- from transformers import AutoTokenizer, AutoModel
3
- import torch
4
  from PIL import Image
 
5
 
6
- # Load the pre-trained GOT OCR 2.0 model and tokenizer
7
- @st.cache_resource(show_spinner=True)
8
- def load_model():
9
- tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
10
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Check for GPU, fallback to CPU
11
- model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True)
12
- model = model.eval().to(device) # Move the model to the appropriate device
13
- return tokenizer, model, device
14
 
15
- # Streamlit interface
16
- st.title("OCR Application using General OCR Theory (GOT) 2.0")
17
- st.write("Upload an image to extract text using the GOT OCR 2.0 model.")
18
 
19
- # File upload handler
20
- uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
21
 
22
- if uploaded_file is not None:
23
- # Display the uploaded image
24
- st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
25
 
26
- # Load model
27
- tokenizer, model, device = load_model()
28
-
29
- # Load the image
30
- image = Image.open(uploaded_file)
31
- image.save("temp_image.png") # Save the uploaded image to a temporary file
32
 
33
  # Perform OCR
34
  with st.spinner("Extracting text..."):
35
- res = model.chat(tokenizer, "temp_image.png", ocr_type='ocr')
 
 
 
 
 
 
 
 
 
 
36
 
37
- # Display the result
38
- st.write("Extracted Text:")
39
- st.text(res)
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import easyocr
 
3
  from PIL import Image
4
+ import re
5
 
6
+ # Initialize EasyOCR Reader
7
+ reader = easyocr.Reader(['en', 'hi'])
 
 
 
 
 
 
8
 
9
+ # Streamlit app title
10
+ st.title("Image Text Extraction and Keyword Search using EasyOCR")
 
11
 
12
+ # File uploader for image input
13
+ uploaded_image = st.file_uploader("Upload an image", type=['png', 'jpg', 'jpeg'])
14
 
15
+ if uploaded_image is not None:
16
+ # Load the uploaded image
17
+ image = Image.open(uploaded_image)
18
 
19
+ # Display the image
20
+ st.image(image, caption='Uploaded Image', use_column_width=True)
 
 
 
 
21
 
22
  # Perform OCR
23
  with st.spinner("Extracting text..."):
24
+ results = reader.readtext(image)
25
+
26
+ # Extract the text
27
+ extracted_text = " ".join([text for (_, text, _) in results])
28
+
29
+ if extracted_text:
30
+ st.success("Extracted Text:")
31
+ st.write(extracted_text)
32
+
33
+ # Keyword search feature
34
+ keyword = st.text_input("Enter a keyword to search in the extracted text:")
35
 
36
+ if keyword:
37
+ # Highlight matches
38
+ highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", extracted_text, flags=re.IGNORECASE)
39
+ st.markdown(f"**Search Results for '{keyword}':**", unsafe_allow_html=True)
40
+ st.markdown(highlighted_text, unsafe_allow_html=True)
41
+ else:
42
+ st.info("Enter a keyword to search.")
43
+ else:
44
+ st.warning("No text detected in the image.")