BookRecogntionKZ / kz_ocr_easy.py
ardakshalkar's picture
add files
d7deef5
raw
history blame contribute delete
No virus
2.57 kB
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import os
import easyocr
models_dir = "./models"
images_dir = "./images"
output_dir = "./output"
dirs = [models_dir, images_dir, output_dir]
for d in dirs:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
"""
Upload easy OCR model files with the same name and font file named Ubuntu-Regular.ttf, examples:
best_norm_ED.pth
best_norm_ED.py
best_norm_ED.yaml
Ubuntu-Regular.ttf
to models directory
Upload image files you want to test, examples:
kz_book_simple.jpeg
kz_blur.jpg
kz_book_complex.jpg
to images directory
"""
font_path = models_dir + "/Ubuntu-Regular.ttf"
reader = easyocr.Reader(
['en'],
gpu=True,
recog_network='best_norm_ED',
detect_network="craft",
user_network_directory=models_dir,
model_storage_directory=models_dir,
) # this needs to run only once to load the model into memory
image_extensions = (".jpg", ".jpeg", ".png")
for image_name in tqdm(os.listdir(images_dir)):
if not image_name.lower().endswith(image_extensions):
print(f'unsupported file {image_name}')
continue
image_path = f'{images_dir}/{image_name}'
print(image_path)
# Read image as numpy array
image = cv2.imread(image_path)
# Rotate the image by 270 degrees
# image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
# Convert the image from BGR to RGB (because OpenCV loads images in BGR format)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = reader.readtext(image=image)
# Load custom font
font = ImageFont.truetype(font_path, 32)
# Display the results
for (bbox, text, prob) in results:
# Get the bounding box coordinates
(top_left, top_right, bottom_right, bottom_left) = bbox
top_left = (int(top_left[0]), int(top_left[1]))
bottom_right = (int(bottom_right[0]), int(bottom_right[1]))
# Draw the bounding box on the image
cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2)
# Convert the OpenCV image to a PIL image, draw the text, then convert back to an OpenCV image
image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(image_pil)
draw.text((top_left[0], top_left[1] - 40), text, font=font, fill=(0, 0, 255))
image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
# Save image
cv2.imwrite( f'{output_dir}/{image_name}', image)
# reader.readtext(image = image, paragraph=True)