Spaces:

ardakshalkar
/

KazOCR

Sleeping

App Files Files Community

KazOCR / KZReader.py

ardakshalkar

first commit

e4d7281 about 1 year ago

raw

history blame

3.2 kB

	import os
	import cv2
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont
	from tqdm import tqdm
	import os

	import easyocr

	models_dir = "./models"
	images_dir = "./images"
	output_dir = "./output"
	dirs = [models_dir, images_dir, output_dir]
	for d in dirs:
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)


	class KZReader:
	def __init__(self):
	self.reader = easyocr.Reader(
	['en'],
	gpu=True,
	recog_network='best_norm_ED',
	detect_network="craft",
	user_network_directory=models_dir,
	model_storage_directory=models_dir,
	) # this needs to run only once to load the model into memory
	def readtext(self, image,paragraph):

	result = reader.readtext(image = image, paragraph=True)
	return result

	"""
	Upload easy OCR model files with the same name and font file named Ubuntu-Regular.ttf, examples:
	best_norm_ED.pth
	best_norm_ED.py
	best_norm_ED.yaml
	Ubuntu-Regular.ttf

	to models directory

	Upload image files you want to test, examples:
	kz_book_simple.jpeg
	kz_blur.jpg
	kz_book_complex.jpg

	to images directory
	"""

	'''
	font_path = models_dir + "/Ubuntu-Regular.ttf"

	reader = easyocr.Reader(
	['en'],
	gpu=True,
	recog_network='best_norm_ED',
	detect_network="craft",
	user_network_directory=models_dir,
	model_storage_directory=models_dir,
	) # this needs to run only once to load the model into memory

	image_extensions = (".jpg", ".jpeg", ".png")
	'''

	'''
	for image_name in tqdm(os.listdir(images_dir)):
	if not image_name.lower().endswith(image_extensions):
	print(f'unsupported file {image_name}')
	continue
	image_path = f'{images_dir}/{image_name}'
	print(image_path)
	# Read image as numpy array
	image = cv2.imread(image_path)

	# Rotate the image by 270 degrees
	# image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)

	# Convert the image from BGR to RGB (because OpenCV loads images in BGR format)
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	results = reader.readtext(image=image)

	# Load custom font
	font = ImageFont.truetype(font_path, 32)

	# Display the results
	for (bbox, text, prob) in results:
	# Get the bounding box coordinates
	(top_left, top_right, bottom_right, bottom_left) = bbox
	top_left = (int(top_left[0]), int(top_left[1]))
	bottom_right = (int(bottom_right[0]), int(bottom_right[1]))

	# Draw the bounding box on the image
	cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2)

	# Convert the OpenCV image to a PIL image, draw the text, then convert back to an OpenCV image
	image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
	draw = ImageDraw.Draw(image_pil)
	draw.text((top_left[0], top_left[1] - 40), text, font=font, fill=(0, 0, 255))
	image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)

	# Save image
	cv2.imwrite( f'{output_dir}/{image_name}', image)

	# reader.readtext(image = image, paragraph=True)


	'''