Spaces:

Divyansh12
/

OCR_Application

Sleeping

App Files Files Community

Divyansh12 commited on Sep 30, 2024

Commit

d42b279

verified ·

1 Parent(s): 0cc2c1c

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -6

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ st.set_page_config(layout="wide")
 # Load the model and tokenizer only once
 @st.cache_resource
 def load_model(model_name):
-    if model_name == "OCR on CPU":
         tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
         model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval()
     else:
@@ -20,7 +20,7 @@ def load_model(model_name):
     return model, tokenizer
 if "model" not in st.session_state or "tokenizer" not in st.session_state:
-    model, tokenizer = load_model("OCR on CPU")
     st.session_state.update({"model": model, "tokenizer": tokenizer})
 # Function to run the GOT model for multilingual OCR
@@ -41,8 +41,8 @@ def highlight_text(text, search_term):
     return re.sub(re.escape(search_term), lambda m: f'<span style="background-color: red;">{m.group()}</span>', text, flags=re.IGNORECASE) if search_term else text
 # Streamlit App
-st.title("GOT-OCR Multilingual Demo")
-st.write("Upload an image for OCR")
 # Create two columns
 col1, col2 = st.columns(2)
@@ -56,7 +56,7 @@ with col1:
 # Right column - Model selection, options, and displaying extracted text
 with col2:
-    model_option = st.selectbox("Select Model", ["OCR on CPU", "OCR on GPU"])
     if st.button("Run OCR"):
         if uploaded_image:
@@ -73,4 +73,5 @@ with col2:
     # Display the extracted text if it exists in session state
     if "extracted_text" in st.session_state:
         search_term = st.text_input("Enter a word or phrase to highlight:")
-        st.subheader("Extracted Text:")

 # Load the model and tokenizer only once
 @st.cache_resource
 def load_model(model_name):
+    if model_name == "OCR for English or Hindi (CPU)":
         tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
         model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval()
     else:
     return model, tokenizer
 if "model" not in st.session_state or "tokenizer" not in st.session_state:
+    model, tokenizer = load_model("OCR for English or Hindi (CPU)")
     st.session_state.update({"model": model, "tokenizer": tokenizer})
 # Function to run the GOT model for multilingual OCR
     return re.sub(re.escape(search_term), lambda m: f'<span style="background-color: red;">{m.group()}</span>', text, flags=re.IGNORECASE) if search_term else text
 # Streamlit App
+st.title(":blue[Optical Character Recognition Application]")
+st.write("upload image")
 # Create two columns
 col1, col2 = st.columns(2)
 # Right column - Model selection, options, and displaying extracted text
 with col2:
+    model_option = st.selectbox("Select Model", ["OCR for English or Hindi (CPU)", "OCR for English (GPU)"])
     if st.button("Run OCR"):
         if uploaded_image:
     # Display the extracted text if it exists in session state
     if "extracted_text" in st.session_state:
         search_term = st.text_input("Enter a word or phrase to highlight:")
+        st.subheader("Extracted Text:")
+        st.markdown(f'<div style="white-space: pre-wrap;">{highlight_text(st.session_state["extracted_text"], search_term)}</div>', unsafe_allow_html=True)