Spaces:
Sleeping
Sleeping
Divyansh12
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ st.set_page_config(layout="wide")
|
|
11 |
# Load the model and tokenizer only once
|
12 |
@st.cache_resource
|
13 |
def load_model(model_name):
|
14 |
-
if model_name == "OCR
|
15 |
tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
|
16 |
model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval()
|
17 |
else:
|
@@ -20,7 +20,7 @@ def load_model(model_name):
|
|
20 |
return model, tokenizer
|
21 |
|
22 |
if "model" not in st.session_state or "tokenizer" not in st.session_state:
|
23 |
-
model, tokenizer = load_model("OCR
|
24 |
st.session_state.update({"model": model, "tokenizer": tokenizer})
|
25 |
|
26 |
# Function to run the GOT model for multilingual OCR
|
@@ -41,8 +41,8 @@ def highlight_text(text, search_term):
|
|
41 |
return re.sub(re.escape(search_term), lambda m: f'<span style="background-color: red;">{m.group()}</span>', text, flags=re.IGNORECASE) if search_term else text
|
42 |
|
43 |
# Streamlit App
|
44 |
-
st.title("
|
45 |
-
st.write("
|
46 |
|
47 |
# Create two columns
|
48 |
col1, col2 = st.columns(2)
|
@@ -56,7 +56,7 @@ with col1:
|
|
56 |
|
57 |
# Right column - Model selection, options, and displaying extracted text
|
58 |
with col2:
|
59 |
-
model_option = st.selectbox("Select Model", ["OCR
|
60 |
|
61 |
if st.button("Run OCR"):
|
62 |
if uploaded_image:
|
@@ -73,4 +73,5 @@ with col2:
|
|
73 |
# Display the extracted text if it exists in session state
|
74 |
if "extracted_text" in st.session_state:
|
75 |
search_term = st.text_input("Enter a word or phrase to highlight:")
|
76 |
-
st.subheader("Extracted Text:")
|
|
|
|
11 |
# Load the model and tokenizer only once
|
12 |
@st.cache_resource
|
13 |
def load_model(model_name):
|
14 |
+
if model_name == "OCR for English or Hindi (CPU)":
|
15 |
tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
|
16 |
model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval()
|
17 |
else:
|
|
|
20 |
return model, tokenizer
|
21 |
|
22 |
if "model" not in st.session_state or "tokenizer" not in st.session_state:
|
23 |
+
model, tokenizer = load_model("OCR for English or Hindi (CPU)")
|
24 |
st.session_state.update({"model": model, "tokenizer": tokenizer})
|
25 |
|
26 |
# Function to run the GOT model for multilingual OCR
|
|
|
41 |
return re.sub(re.escape(search_term), lambda m: f'<span style="background-color: red;">{m.group()}</span>', text, flags=re.IGNORECASE) if search_term else text
|
42 |
|
43 |
# Streamlit App
|
44 |
+
st.title(":blue[Optical Character Recognition Application]")
|
45 |
+
st.write("upload image")
|
46 |
|
47 |
# Create two columns
|
48 |
col1, col2 = st.columns(2)
|
|
|
56 |
|
57 |
# Right column - Model selection, options, and displaying extracted text
|
58 |
with col2:
|
59 |
+
model_option = st.selectbox("Select Model", ["OCR for English or Hindi (CPU)", "OCR for English (GPU)"])
|
60 |
|
61 |
if st.button("Run OCR"):
|
62 |
if uploaded_image:
|
|
|
73 |
# Display the extracted text if it exists in session state
|
74 |
if "extracted_text" in st.session_state:
|
75 |
search_term = st.text_input("Enter a word or phrase to highlight:")
|
76 |
+
st.subheader("Extracted Text:")
|
77 |
+
st.markdown(f'<div style="white-space: pre-wrap;">{highlight_text(st.session_state["extracted_text"], search_term)}</div>', unsafe_allow_html=True)
|