Spaces:
Sleeping
Sleeping
File size: 3,199 Bytes
e4d7281 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import os
import easyocr
models_dir = "./models"
images_dir = "./images"
output_dir = "./output"
dirs = [models_dir, images_dir, output_dir]
for d in dirs:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
class KZReader:
def __init__(self):
self.reader = easyocr.Reader(
['en'],
gpu=True,
recog_network='best_norm_ED',
detect_network="craft",
user_network_directory=models_dir,
model_storage_directory=models_dir,
) # this needs to run only once to load the model into memory
def readtext(self, image,paragraph):
result = reader.readtext(image = image, paragraph=True)
return result
"""
Upload easy OCR model files with the same name and font file named Ubuntu-Regular.ttf, examples:
best_norm_ED.pth
best_norm_ED.py
best_norm_ED.yaml
Ubuntu-Regular.ttf
to models directory
Upload image files you want to test, examples:
kz_book_simple.jpeg
kz_blur.jpg
kz_book_complex.jpg
to images directory
"""
'''
font_path = models_dir + "/Ubuntu-Regular.ttf"
reader = easyocr.Reader(
['en'],
gpu=True,
recog_network='best_norm_ED',
detect_network="craft",
user_network_directory=models_dir,
model_storage_directory=models_dir,
) # this needs to run only once to load the model into memory
image_extensions = (".jpg", ".jpeg", ".png")
'''
'''
for image_name in tqdm(os.listdir(images_dir)):
if not image_name.lower().endswith(image_extensions):
print(f'unsupported file {image_name}')
continue
image_path = f'{images_dir}/{image_name}'
print(image_path)
# Read image as numpy array
image = cv2.imread(image_path)
# Rotate the image by 270 degrees
# image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
# Convert the image from BGR to RGB (because OpenCV loads images in BGR format)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = reader.readtext(image=image)
# Load custom font
font = ImageFont.truetype(font_path, 32)
# Display the results
for (bbox, text, prob) in results:
# Get the bounding box coordinates
(top_left, top_right, bottom_right, bottom_left) = bbox
top_left = (int(top_left[0]), int(top_left[1]))
bottom_right = (int(bottom_right[0]), int(bottom_right[1]))
# Draw the bounding box on the image
cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2)
# Convert the OpenCV image to a PIL image, draw the text, then convert back to an OpenCV image
image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(image_pil)
draw.text((top_left[0], top_left[1] - 40), text, font=font, fill=(0, 0, 255))
image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
# Save image
cv2.imwrite( f'{output_dir}/{image_name}', image)
# reader.readtext(image = image, paragraph=True)
''' |