Spaces:

ml-debi
/

yolo-license-plate-detection

Runtime error

App Files Files Community

yolo-license-plate-detection / app.py

ml-debi

Update app.py

616da57 11 months ago

raw

history blame contribute delete

7.83 kB

	import cv2
	import numpy as np
	import onnxruntime as ort
	import pytesseract
	from PIL import Image
	import gradio as gr
	import torchvision
	from huggingface_hub import hf_hub_download


	app_title = "License Plate Object Detection"
	#model = ["ml-debi/yolov8_license_plate_detection"]
	model_path = "./best.onnx"

	examples = [["./examples/tesla.jpg"], ["./examples/mazda.jpg"], ["./examples/mercedes.jpg"]]


	def build_tesseract_options(psm=7):
	# tell Tesseract to only OCR alphanumeric characters
	alphanumeric = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
	options = "-c tessedit_char_whitelist={}".format(alphanumeric)
	# set the PSM mode
	options += " --psm {}".format(psm)
	# return the built options string
	return options

	# Cropped image processing
	def auto_canny(image, sigma=0.33):
	# compute the median of the single channel pixel intensities
	v = np.median(image)

	# apply automatic Canny edge detection using the computed median
	lower = int(max(0, (1.0 - sigma) * v))
	upper = int(min(255, (1.0 + sigma) * v))
	edged = cv2.Canny(image, lower, upper)

	# return the edged image
	return edged



	def ocr_image_process(img, sigma, block_size, constant):
	# If the input is a numpy array, convert it to a PIL Image
	if isinstance(img, np.ndarray):
	img = Image.fromarray(img)

	# Convert the PIL Image back to a numpy array if necessary
	if isinstance(img, Image.Image):
	img = np.array(img)

	gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
	thresh_inv = cv2.adaptiveThreshold(
	gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV,
	int(block_size) if block_size is not None else 41,
	int(constant) if constant is not None else 1)
	edges = auto_canny(thresh_inv, float(sigma) if sigma is not None else 0.33)
	ctrs, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
	img_area = img.shape[0]*img.shape[1]
	# Create a blank white image
	mask = np.ones(img.shape, dtype="uint8") * 255

	for i, ctr in enumerate(sorted_ctrs):
	x, y, w, h = cv2.boundingRect(ctr)
	roi_area = w*h
	roi_ratio = roi_area/img_area
	if((roi_ratio >= 0.015) and (roi_ratio < 0.09)):
	if ((h>1.2w) and (3w>=h)):
	# Draw filled rectangle (mask) on the mask image
	cv2.rectangle(mask, (x, y), (x+w, y+h), (0,0,0), -1)

	# Bitwise-or input image and mask to get result
	img = cv2.bitwise_or(img, mask)
	# Convert the image to grayscale (if it isn't already)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	return img


	def get_detections(image_path, size, ort_session):
	"""
	Function to get detections from the model.
	"""
	# Check if image_path is a string (indicating a file path)
	if isinstance(image_path, str):
	# Check if the image is a PNG
	if image_path.lower().endswith('.png'):
	# Open the image file
	img = Image.open(image_path)
	# Convert the image to RGB (removes the alpha channel)
	rgb_img = img.convert('RGB')
	# Create a new file name by replacing .png with .jpg
	jpg_image_path = os.path.splitext(image_path)[0] + '.jpg'
	# Save the RGB image as a JPG
	rgb_img.save(jpg_image_path)
	# Update image_path to point to the new JPG image
	image_path = jpg_image_path

	image = Image.open(image_path)
	# Check if image_path is a NumPy array
	elif isinstance(image_path, np.ndarray):
	image = Image.fromarray(image_path)
	else:
	raise ValueError(
	"image_path must be a file path (str) or a NumPy array.")

	scale_x = image.width / size
	scale_y = image.height / size
	resized_image = image.resize((size, size))
	transform = torchvision.transforms.ToTensor()
	input_tensor = transform(resized_image).unsqueeze(0)
	outputs = ort_session.run(None, {'images': input_tensor.numpy()})
	return image, outputs, scale_x, scale_y


	def non_maximum_supression(outputs, min_confidence):
	"""
	Function to apply non-maximum suppression.
	"""
	if min_confidence is None:
	min_confidence = 0.5
	boxes = outputs[0][0]
	confidences = boxes[4]
	max_confidence_index = np.argmax(confidences)
	if confidences[max_confidence_index] > min_confidence:
	return boxes[:, max_confidence_index]
	else:
	return None


	def drawings(image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr):
	"""
	Function to draw bounding boxes and apply OCR.
	"""
	x, y, w, h, c = boxes
	x_min, y_min = (x - w / 2) * scale_x, (y - h / 2) * scale_y
	x_max, y_max = (x + w / 2) * scale_x, (y + h / 2) * scale_y
	license_plate_image = image.crop((x_min, y_min, x_max, y_max))
	processed_cropped_image = ocr_image_process(license_plate_image, sigma, block_size, constant)

	if ocr == "easyocr":
	import easyocr
	reader = easyocr.Reader(['en'])
	result = reader.readtext(processed_cropped_image)
	try:
	license_plate_text = str.upper(result[0][1])
	except IndexError:
	license_plate_text = "No result found"
	print(license_plate_text)
	else:
	options = build_tesseract_options(7)
	license_plate_text = pytesseract.image_to_string(
	processed_cropped_image,
	config=options)
	print(license_plate_text)
	# Calculate the font scale based on image size
	font_scale = 0.001 * max(image.size)

	image = cv2.rectangle(np.array(image), (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 3)
	#cv2.putText(image, f'License Plate: {license_plate_text}', (int(x_min), int(y_max)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 2)
	cv2.putText(image, f'Confidence: {c:.2f}', (int(x_min), int(y_min)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1)

	return image, license_plate_image, processed_cropped_image, license_plate_text


	def yolo_predictions(image_path, size, sigma, block_size, constant, min_confidence, ort_session, ocr):
	"""
	Function to get YOLO predictions.
	"""
	image, outputs, scale_x, scale_y = get_detections(
	image_path, size, ort_session)
	boxes = non_maximum_supression(outputs, min_confidence)
	result_img, license_plate_image, processed_cropped_image, license_plate_text = drawings(
	image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr)
	return result_img, license_plate_image, processed_cropped_image, license_plate_text


	def predict(image, ocr, sigma, block_size, constant, min_confidence):

	size = 640
	ort_session = ort.InferenceSession(model_path)

	result_img, _, processed_cropped_image, license_plate_text = yolo_predictions(
	image, size, sigma, block_size, constant, min_confidence, ort_session, ocr)

	return result_img, processed_cropped_image, license_plate_text


	# Add output license plate text, and add examples and description
	iface = gr.Interface(
	fn=predict,
	inputs=[
	"image",
	gr.Dropdown(choices=['pytesseract', 'easyocr'], value="pytesseract", label='OCR Method'),
	gr.Slider(minimum=0, maximum=1, step=0.01, value=0.33, label='Sigma for Auto Canny'),
	gr.Number(value=41, label='Block Size for Adaptive Threshold'),
	gr.Number(value=1, label='Constant for Adaptive Threshold'),
	gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label='Minimum Confidence for NMS')
	],
	outputs=[
	gr.Image(label="Predicted image"),
	gr.Image(label="Processed license plate image"),
	gr.Textbox(label="Predicted license plate number")
	],
	examples=examples
	)
	iface.launch()