import os import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont from tqdm import tqdm import os import easyocr models_dir = "./models" images_dir = "./images" output_dir = "./output" dirs = [models_dir, images_dir, output_dir] for d in dirs: if not os.path.exists(output_dir): os.makedirs(output_dir) class KZReader: def __init__(self): self.reader = easyocr.Reader( ['en'], gpu=True, recog_network='best_norm_ED', detect_network="craft", user_network_directory=models_dir, model_storage_directory=models_dir, ) # this needs to run only once to load the model into memory def readtext(self, image,paragraph): result = reader.readtext(image = image, paragraph=True) return result """ Upload easy OCR model files with the same name and font file named Ubuntu-Regular.ttf, examples: best_norm_ED.pth best_norm_ED.py best_norm_ED.yaml Ubuntu-Regular.ttf to models directory Upload image files you want to test, examples: kz_book_simple.jpeg kz_blur.jpg kz_book_complex.jpg to images directory """ ''' font_path = models_dir + "/Ubuntu-Regular.ttf" reader = easyocr.Reader( ['en'], gpu=True, recog_network='best_norm_ED', detect_network="craft", user_network_directory=models_dir, model_storage_directory=models_dir, ) # this needs to run only once to load the model into memory image_extensions = (".jpg", ".jpeg", ".png") ''' ''' for image_name in tqdm(os.listdir(images_dir)): if not image_name.lower().endswith(image_extensions): print(f'unsupported file {image_name}') continue image_path = f'{images_dir}/{image_name}' print(image_path) # Read image as numpy array image = cv2.imread(image_path) # Rotate the image by 270 degrees # image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) # Convert the image from BGR to RGB (because OpenCV loads images in BGR format) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) results = reader.readtext(image=image) # Load custom font font = ImageFont.truetype(font_path, 32) # Display the results for (bbox, text, prob) in results: # Get the bounding box coordinates (top_left, top_right, bottom_right, bottom_left) = bbox top_left = (int(top_left[0]), int(top_left[1])) bottom_right = (int(bottom_right[0]), int(bottom_right[1])) # Draw the bounding box on the image cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2) # Convert the OpenCV image to a PIL image, draw the text, then convert back to an OpenCV image image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(image_pil) draw.text((top_left[0], top_left[1] - 40), text, font=font, fill=(0, 0, 255)) image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR) # Save image cv2.imwrite( f'{output_dir}/{image_name}', image) # reader.readtext(image = image, paragraph=True) '''