Spaces:

Arch10
/

ocr-app

Sleeping

ocr-app / app.py

Update app.py

46b1d1d verified 4 months ago

1.64 kB

	import streamlit as st
	import easyocr
	import requests
	from PIL import Image
	import re
	from io import BytesIO

	# Initialize EasyOCR Reader
	reader = easyocr.Reader(['en', 'hi'])

	# Streamlit app title
	st.title("Image Text Extraction and Keyword Search using EasyOCR")

	# Input for image URL
	image_url = st.text_input("Enter the image URL:")

	if image_url:
	try:
	# Fetch the image from the URL
	response = requests.get(image_url)
	image = Image.open(BytesIO(response.content))

	# Display the image
	st.image(image, caption='Uploaded Image', use_column_width=True)

	# Perform OCR
	with st.spinner("Extracting text..."):
	results = reader.readtext(image)

	# Extract the text
	extracted_text = " ".join([text for (_, text, _) in results])

	if extracted_text:
	st.success("Extracted Text:")
	st.write(extracted_text)

	# Keyword search feature
	keyword = st.text_input("Enter a keyword to search in the extracted text:")

	if keyword:
	# Highlight matches
	highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", extracted_text, flags=re.IGNORECASE)
	st.markdown(f"Search Results for '{keyword}':", unsafe_allow_html=True)
	st.markdown(highlighted_text, unsafe_allow_html=True)
	else:
	st.info("Enter a keyword to search.")
	else:
	st.warning("No text detected in the image.")

	except Exception as e:
	st.error("Error fetching or processing the image. Please check the URL.")