Divyansh12 commited on
Commit
d42b279
·
verified ·
1 Parent(s): 0cc2c1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -11,7 +11,7 @@ st.set_page_config(layout="wide")
11
  # Load the model and tokenizer only once
12
  @st.cache_resource
13
  def load_model(model_name):
14
- if model_name == "OCR on CPU":
15
  tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
16
  model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval()
17
  else:
@@ -20,7 +20,7 @@ def load_model(model_name):
20
  return model, tokenizer
21
 
22
  if "model" not in st.session_state or "tokenizer" not in st.session_state:
23
- model, tokenizer = load_model("OCR on CPU")
24
  st.session_state.update({"model": model, "tokenizer": tokenizer})
25
 
26
  # Function to run the GOT model for multilingual OCR
@@ -41,8 +41,8 @@ def highlight_text(text, search_term):
41
  return re.sub(re.escape(search_term), lambda m: f'<span style="background-color: red;">{m.group()}</span>', text, flags=re.IGNORECASE) if search_term else text
42
 
43
  # Streamlit App
44
- st.title("GOT-OCR Multilingual Demo")
45
- st.write("Upload an image for OCR")
46
 
47
  # Create two columns
48
  col1, col2 = st.columns(2)
@@ -56,7 +56,7 @@ with col1:
56
 
57
  # Right column - Model selection, options, and displaying extracted text
58
  with col2:
59
- model_option = st.selectbox("Select Model", ["OCR on CPU", "OCR on GPU"])
60
 
61
  if st.button("Run OCR"):
62
  if uploaded_image:
@@ -73,4 +73,5 @@ with col2:
73
  # Display the extracted text if it exists in session state
74
  if "extracted_text" in st.session_state:
75
  search_term = st.text_input("Enter a word or phrase to highlight:")
76
- st.subheader("Extracted Text:")
 
 
11
  # Load the model and tokenizer only once
12
  @st.cache_resource
13
  def load_model(model_name):
14
+ if model_name == "OCR for English or Hindi (CPU)":
15
  tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
16
  model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval()
17
  else:
 
20
  return model, tokenizer
21
 
22
  if "model" not in st.session_state or "tokenizer" not in st.session_state:
23
+ model, tokenizer = load_model("OCR for English or Hindi (CPU)")
24
  st.session_state.update({"model": model, "tokenizer": tokenizer})
25
 
26
  # Function to run the GOT model for multilingual OCR
 
41
  return re.sub(re.escape(search_term), lambda m: f'<span style="background-color: red;">{m.group()}</span>', text, flags=re.IGNORECASE) if search_term else text
42
 
43
  # Streamlit App
44
+ st.title(":blue[Optical Character Recognition Application]")
45
+ st.write("upload image")
46
 
47
  # Create two columns
48
  col1, col2 = st.columns(2)
 
56
 
57
  # Right column - Model selection, options, and displaying extracted text
58
  with col2:
59
+ model_option = st.selectbox("Select Model", ["OCR for English or Hindi (CPU)", "OCR for English (GPU)"])
60
 
61
  if st.button("Run OCR"):
62
  if uploaded_image:
 
73
  # Display the extracted text if it exists in session state
74
  if "extracted_text" in st.session_state:
75
  search_term = st.text_input("Enter a word or phrase to highlight:")
76
+ st.subheader("Extracted Text:")
77
+ st.markdown(f'<div style="white-space: pre-wrap;">{highlight_text(st.session_state["extracted_text"], search_term)}</div>', unsafe_allow_html=True)