Spaces:

ardakshalkar
/

BookRecogntionKZ

Sleeping

File size: 2,570 Bytes

d7deef5

import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import os

import easyocr

models_dir = "./models"
images_dir = "./images"
output_dir = "./output"
dirs = [models_dir, images_dir, output_dir]
for d in dirs:
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

"""
Upload easy OCR model files with the same name and font file named Ubuntu-Regular.ttf, examples:
best_norm_ED.pth
best_norm_ED.py
best_norm_ED.yaml
Ubuntu-Regular.ttf

to models directory

Upload image files you want to test, examples:
kz_book_simple.jpeg
kz_blur.jpg
kz_book_complex.jpg

to images directory
"""

font_path = models_dir + "/Ubuntu-Regular.ttf"

reader = easyocr.Reader(
    ['en'],
    gpu=True,
    recog_network='best_norm_ED',
    detect_network="craft",
    user_network_directory=models_dir,
    model_storage_directory=models_dir,
)  # this needs to run only once to load the model into memory

image_extensions = (".jpg", ".jpeg", ".png")

for image_name in tqdm(os.listdir(images_dir)):
    if not image_name.lower().endswith(image_extensions):
        print(f'unsupported file {image_name}')
        continue
    image_path = f'{images_dir}/{image_name}'
    print(image_path)
    # Read image as numpy array
    image = cv2.imread(image_path)

    # Rotate the image by 270 degrees
    # image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)

    # Convert the image from BGR to RGB (because OpenCV loads images in BGR format)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = reader.readtext(image=image)

    # Load custom font
    font = ImageFont.truetype(font_path, 32)

    # Display the results
    for (bbox, text, prob) in results:
        # Get the bounding box coordinates
        (top_left, top_right, bottom_right, bottom_left) = bbox
        top_left = (int(top_left[0]), int(top_left[1]))
        bottom_right = (int(bottom_right[0]), int(bottom_right[1]))

        # Draw the bounding box on the image
        cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2)

        # Convert the OpenCV image to a PIL image, draw the text, then convert back to an OpenCV image
        image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        draw = ImageDraw.Draw(image_pil)
        draw.text((top_left[0], top_left[1] - 40), text, font=font, fill=(0, 0, 255))
        image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)

    # Save image
    cv2.imwrite( f'{output_dir}/{image_name}', image)

    # reader.readtext(image = image, paragraph=True)