Spaces:

ardakshalkar
/

BookRecogntionKZ

Sleeping

App Files Files Community

BookRecogntionKZ / kz_ocr_easy.py

ardakshalkar

add files

d7deef5 9 months ago

raw

history blame contribute delete

No virus

2.57 kB

	import os
	import cv2
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont
	from tqdm import tqdm
	import os

	import easyocr

	models_dir = "./models"
	images_dir = "./images"
	output_dir = "./output"
	dirs = [models_dir, images_dir, output_dir]
	for d in dirs:
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)

	"""
	Upload easy OCR model files with the same name and font file named Ubuntu-Regular.ttf, examples:
	best_norm_ED.pth
	best_norm_ED.py
	best_norm_ED.yaml
	Ubuntu-Regular.ttf

	to models directory

	Upload image files you want to test, examples:
	kz_book_simple.jpeg
	kz_blur.jpg
	kz_book_complex.jpg

	to images directory
	"""

	font_path = models_dir + "/Ubuntu-Regular.ttf"

	reader = easyocr.Reader(
	['en'],
	gpu=True,
	recog_network='best_norm_ED',
	detect_network="craft",
	user_network_directory=models_dir,
	model_storage_directory=models_dir,
	) # this needs to run only once to load the model into memory

	image_extensions = (".jpg", ".jpeg", ".png")

	for image_name in tqdm(os.listdir(images_dir)):
	if not image_name.lower().endswith(image_extensions):
	print(f'unsupported file {image_name}')
	continue
	image_path = f'{images_dir}/{image_name}'
	print(image_path)
	# Read image as numpy array
	image = cv2.imread(image_path)

	# Rotate the image by 270 degrees
	# image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)

	# Convert the image from BGR to RGB (because OpenCV loads images in BGR format)
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	results = reader.readtext(image=image)

	# Load custom font
	font = ImageFont.truetype(font_path, 32)

	# Display the results
	for (bbox, text, prob) in results:
	# Get the bounding box coordinates
	(top_left, top_right, bottom_right, bottom_left) = bbox
	top_left = (int(top_left[0]), int(top_left[1]))
	bottom_right = (int(bottom_right[0]), int(bottom_right[1]))

	# Draw the bounding box on the image
	cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2)

	# Convert the OpenCV image to a PIL image, draw the text, then convert back to an OpenCV image
	image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
	draw = ImageDraw.Draw(image_pil)
	draw.text((top_left[0], top_left[1] - 40), text, font=font, fill=(0, 0, 255))
	image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)

	# Save image
	cv2.imwrite( f'{output_dir}/{image_name}', image)

	# reader.readtext(image = image, paragraph=True)