ml-debi's picture
Update app.py
616da57
import cv2
import numpy as np
import onnxruntime as ort
import pytesseract
from PIL import Image
import gradio as gr
import torchvision
from huggingface_hub import hf_hub_download
app_title = "License Plate Object Detection"
#model = ["ml-debi/yolov8_license_plate_detection"]
model_path = "./best.onnx"
examples = [["./examples/tesla.jpg"], ["./examples/mazda.jpg"], ["./examples/mercedes.jpg"]]
def build_tesseract_options(psm=7):
# tell Tesseract to only OCR alphanumeric characters
alphanumeric = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
options = "-c tessedit_char_whitelist={}".format(alphanumeric)
# set the PSM mode
options += " --psm {}".format(psm)
# return the built options string
return options
# Cropped image processing
def auto_canny(image, sigma=0.33):
# compute the median of the single channel pixel intensities
v = np.median(image)
# apply automatic Canny edge detection using the computed median
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
edged = cv2.Canny(image, lower, upper)
# return the edged image
return edged
def ocr_image_process(img, sigma, block_size, constant):
# If the input is a numpy array, convert it to a PIL Image
if isinstance(img, np.ndarray):
img = Image.fromarray(img)
# Convert the PIL Image back to a numpy array if necessary
if isinstance(img, Image.Image):
img = np.array(img)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
thresh_inv = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV,
int(block_size) if block_size is not None else 41,
int(constant) if constant is not None else 1)
edges = auto_canny(thresh_inv, float(sigma) if sigma is not None else 0.33)
ctrs, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
img_area = img.shape[0]*img.shape[1]
# Create a blank white image
mask = np.ones(img.shape, dtype="uint8") * 255
for i, ctr in enumerate(sorted_ctrs):
x, y, w, h = cv2.boundingRect(ctr)
roi_area = w*h
roi_ratio = roi_area/img_area
if((roi_ratio >= 0.015) and (roi_ratio < 0.09)):
if ((h>1.2*w) and (3*w>=h)):
# Draw filled rectangle (mask) on the mask image
cv2.rectangle(mask, (x, y), (x+w, y+h), (0,0,0), -1)
# Bitwise-or input image and mask to get result
img = cv2.bitwise_or(img, mask)
# Convert the image to grayscale (if it isn't already)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return img
def get_detections(image_path, size, ort_session):
"""
Function to get detections from the model.
"""
# Check if image_path is a string (indicating a file path)
if isinstance(image_path, str):
# Check if the image is a PNG
if image_path.lower().endswith('.png'):
# Open the image file
img = Image.open(image_path)
# Convert the image to RGB (removes the alpha channel)
rgb_img = img.convert('RGB')
# Create a new file name by replacing .png with .jpg
jpg_image_path = os.path.splitext(image_path)[0] + '.jpg'
# Save the RGB image as a JPG
rgb_img.save(jpg_image_path)
# Update image_path to point to the new JPG image
image_path = jpg_image_path
image = Image.open(image_path)
# Check if image_path is a NumPy array
elif isinstance(image_path, np.ndarray):
image = Image.fromarray(image_path)
else:
raise ValueError(
"image_path must be a file path (str) or a NumPy array.")
scale_x = image.width / size
scale_y = image.height / size
resized_image = image.resize((size, size))
transform = torchvision.transforms.ToTensor()
input_tensor = transform(resized_image).unsqueeze(0)
outputs = ort_session.run(None, {'images': input_tensor.numpy()})
return image, outputs, scale_x, scale_y
def non_maximum_supression(outputs, min_confidence):
"""
Function to apply non-maximum suppression.
"""
if min_confidence is None:
min_confidence = 0.5
boxes = outputs[0][0]
confidences = boxes[4]
max_confidence_index = np.argmax(confidences)
if confidences[max_confidence_index] > min_confidence:
return boxes[:, max_confidence_index]
else:
return None
def drawings(image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr):
"""
Function to draw bounding boxes and apply OCR.
"""
x, y, w, h, c = boxes
x_min, y_min = (x - w / 2) * scale_x, (y - h / 2) * scale_y
x_max, y_max = (x + w / 2) * scale_x, (y + h / 2) * scale_y
license_plate_image = image.crop((x_min, y_min, x_max, y_max))
processed_cropped_image = ocr_image_process(license_plate_image, sigma, block_size, constant)
if ocr == "easyocr":
import easyocr
reader = easyocr.Reader(['en'])
result = reader.readtext(processed_cropped_image)
try:
license_plate_text = str.upper(result[0][1])
except IndexError:
license_plate_text = "No result found"
print(license_plate_text)
else:
options = build_tesseract_options(7)
license_plate_text = pytesseract.image_to_string(
processed_cropped_image,
config=options)
print(license_plate_text)
# Calculate the font scale based on image size
font_scale = 0.001 * max(image.size)
image = cv2.rectangle(np.array(image), (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 3)
#cv2.putText(image, f'License Plate: {license_plate_text}', (int(x_min), int(y_max)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 2)
cv2.putText(image, f'Confidence: {c:.2f}', (int(x_min), int(y_min)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1)
return image, license_plate_image, processed_cropped_image, license_plate_text
def yolo_predictions(image_path, size, sigma, block_size, constant, min_confidence, ort_session, ocr):
"""
Function to get YOLO predictions.
"""
image, outputs, scale_x, scale_y = get_detections(
image_path, size, ort_session)
boxes = non_maximum_supression(outputs, min_confidence)
result_img, license_plate_image, processed_cropped_image, license_plate_text = drawings(
image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr)
return result_img, license_plate_image, processed_cropped_image, license_plate_text
def predict(image, ocr, sigma, block_size, constant, min_confidence):
size = 640
ort_session = ort.InferenceSession(model_path)
result_img, _, processed_cropped_image, license_plate_text = yolo_predictions(
image, size, sigma, block_size, constant, min_confidence, ort_session, ocr)
return result_img, processed_cropped_image, license_plate_text
# Add output license plate text, and add examples and description
iface = gr.Interface(
fn=predict,
inputs=[
"image",
gr.Dropdown(choices=['pytesseract', 'easyocr'], value="pytesseract", label='OCR Method'),
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.33, label='Sigma for Auto Canny'),
gr.Number(value=41, label='Block Size for Adaptive Threshold'),
gr.Number(value=1, label='Constant for Adaptive Threshold'),
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label='Minimum Confidence for NMS')
],
outputs=[
gr.Image(label="Predicted image"),
gr.Image(label="Processed license plate image"),
gr.Textbox(label="Predicted license plate number")
],
examples=examples
)
iface.launch()