File size: 3,199 Bytes
e4d7281
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import os

import easyocr

models_dir = "./models"
images_dir = "./images"
output_dir = "./output"
dirs = [models_dir, images_dir, output_dir]
for d in dirs:
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)


class KZReader:
    def __init__(self):
        self.reader = easyocr.Reader(
                            ['en'],
                            gpu=True,
                            recog_network='best_norm_ED',
                            detect_network="craft",
                            user_network_directory=models_dir,
                            model_storage_directory=models_dir,
                        )  # this needs to run only once to load the model into memory
    def readtext(self, image,paragraph):
        
        result = reader.readtext(image = image, paragraph=True)
        return result

"""
Upload easy OCR model files with the same name and font file named Ubuntu-Regular.ttf, examples:
best_norm_ED.pth
best_norm_ED.py
best_norm_ED.yaml
Ubuntu-Regular.ttf

to models directory

Upload image files you want to test, examples:
kz_book_simple.jpeg
kz_blur.jpg
kz_book_complex.jpg

to images directory
"""

'''
font_path = models_dir + "/Ubuntu-Regular.ttf"

reader = easyocr.Reader(
    ['en'],
    gpu=True,
    recog_network='best_norm_ED',
    detect_network="craft",
    user_network_directory=models_dir,
    model_storage_directory=models_dir,
)  # this needs to run only once to load the model into memory

image_extensions = (".jpg", ".jpeg", ".png")
'''

'''
for image_name in tqdm(os.listdir(images_dir)):
    if not image_name.lower().endswith(image_extensions):
        print(f'unsupported file {image_name}')
        continue
    image_path = f'{images_dir}/{image_name}'
    print(image_path)
    # Read image as numpy array
    image = cv2.imread(image_path)

    # Rotate the image by 270 degrees
    # image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)

    # Convert the image from BGR to RGB (because OpenCV loads images in BGR format)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = reader.readtext(image=image)

    # Load custom font
    font = ImageFont.truetype(font_path, 32)

    # Display the results
    for (bbox, text, prob) in results:
        # Get the bounding box coordinates
        (top_left, top_right, bottom_right, bottom_left) = bbox
        top_left = (int(top_left[0]), int(top_left[1]))
        bottom_right = (int(bottom_right[0]), int(bottom_right[1]))

        # Draw the bounding box on the image
        cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2)

        # Convert the OpenCV image to a PIL image, draw the text, then convert back to an OpenCV image
        image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        draw = ImageDraw.Draw(image_pil)
        draw.text((top_left[0], top_left[1] - 40), text, font=font, fill=(0, 0, 255))
        image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)

    # Save image
    cv2.imwrite( f'{output_dir}/{image_name}', image)

    # reader.readtext(image = image, paragraph=True)


'''