File size: 5,378 Bytes
bab94a3 38cd73f 46919f7 bab94a3 38cd73f bab94a3 38cd73f bab94a3 38cd73f 3440b09 38cd73f 3440b09 38cd73f 1c7c08e f1e5612 1c7c08e 38cd73f 3440b09 38cd73f 426234b 38cd73f bab94a3 38cd73f 46919f7 38cd73f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import pandas as pd
import numpy as np
import streamlit as st
import easyocr
import PIL
from PIL import Image, ImageDraw
from captcha.image import ImageCaptcha
import random, string
import utlis
def rectangle(image, result):
""" draw rectangles on image based on predicted coordinates"""
draw = ImageDraw.Draw(image)
for res in result:
top_left = tuple(res[0][0]) # top left coordinates as tuple
bottom_right = tuple(res[0][2]) # bottom right coordinates as tuple
draw.rectangle((top_left, bottom_right), outline="blue", width=2)
#display image on streamlit
# define the costant
length_captcha = 4
width = 200
height = 150
# define the function for the captcha control
def captcha_control():
#control if the captcha is correct
if 'controllo' not in st.session_state or st.session_state['controllo'] == False:
st.title("Captcha Control on OCR")
# define the session state for control if the captcha is correct
st.session_state['controllo'] = False
col1, col2 = st.columns(2)
# define the session state for the captcha text because it doesn't change during refreshes
if 'Captcha' not in st.session_state:
st.session_state['Captcha'] = ''.join(random.choices(string.ascii_uppercase + string.digits, k=length_captcha))
print("the captcha is: ", st.session_state['Captcha'])
#setup the captcha widget
image = ImageCaptcha(width=width, height=height)
data = image.generate(st.session_state['Captcha'])
capta2_text = col2.text_area('Enter captcha text', height=30)
if st.button("Verify the code"):
print(capta2_text, st.session_state['Captcha'])
capta2_text = capta2_text.replace(" ", "")
# if the captcha is correct, the controllo session state is set to True
if st.session_state['Captcha'].lower() == capta2_text.lower().strip():
del st.session_state['Captcha']
st.session_state['controllo'] = True
# if the captcha is wrong, the controllo session state is set to False and the captcha is regenerated
st.error("🚨 Error on Captcha...")
del st.session_state['Captcha']
del st.session_state['controllo']
#wait for the button click
# main title
st.title("Get text from image with Persian and Arabic OCR")
# subtitle
st.markdown("## Persian and Arabic OCR :")
#try_again = 0
def main():
holder = st.empty()
# upload image file
file = holder.file_uploader(label = "Upload Here", type=['png', 'jpg', 'jpeg'])
# global try_again
# if try_again == 1:
# del st.session_state['controllo']
# st.experimental_rerun()
# try_again = 1
#read the csv file and display the dataframe
if file is not None:
image = # read image with PIL library
w, h = image.size
if w > 600 or h > 400:
st.write("Due to the slowness of the server, the images were resized to 800x600.")
image = image.resize((600, 400))
st.image(image) #display
# it will only detect the English and Turkish part of the image as text
reader = easyocr.Reader(['fa','ar'], gpu=False) #, model_storage_directory='temp/',user_network_directory='temp/net'
result = reader.readtext(np.array(image)) # turn image to numpy array
# Add a placeholder
# latest_iteration = st.empty()
# bar = st.progress(0)
# for i in range(100):
# Update the progress bar with each iteration.
# latest_iteration.text(f'Iteration {i+1}')
# bar.progress(i + 1)
# time.sleep(0.1)
# print all predicted text:
#for idx in range(len(result)):
# pred_text = result[idx][1]
# st.write(pred_text)
extracted_text = utlis.get_raw_text(result)
st.markdown('<p style="direction:rtl; text-align: right"> '+extracted_text+' </p>', unsafe_allow_html=True)
# collect the results in the dictionary:
textdic_easyocr = {}
for idx in range(len(result)):
pred_coor = result[idx][0]
pred_text = result[idx][1]
pred_confidence = result[idx][2]
textdic_easyocr[pred_text] = {}
textdic_easyocr[pred_text]['pred_confidence'] = pred_confidence
# create a data frame which shows the predicted text and prediction confidence
df = pd.DataFrame.from_dict(textdic_easyocr).T
# get boxes on the image
rectangle(image, result)
st.spinner(text="In progress...")
st.write("Upload your image")
if 'controllo' not in st.session_state or st.session_state['controllo'] == False: