Spaces:
Runtime error
Runtime error
import cv2 | |
import numpy as np | |
import onnxruntime as ort | |
import pytesseract | |
from PIL import Image | |
import gradio as gr | |
import torchvision | |
from huggingface_hub import hf_hub_download | |
app_title = "License Plate Object Detection" | |
#model = ["ml-debi/yolov8_license_plate_detection"] | |
model_path = "./best.onnx" | |
examples = [["./examples/tesla.jpg"], ["./examples/mazda.jpg"], ["./examples/mercedes.jpg"]] | |
def build_tesseract_options(psm=7): | |
# tell Tesseract to only OCR alphanumeric characters | |
alphanumeric = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" | |
options = "-c tessedit_char_whitelist={}".format(alphanumeric) | |
# set the PSM mode | |
options += " --psm {}".format(psm) | |
# return the built options string | |
return options | |
# Cropped image processing | |
def auto_canny(image, sigma=0.33): | |
# compute the median of the single channel pixel intensities | |
v = np.median(image) | |
# apply automatic Canny edge detection using the computed median | |
lower = int(max(0, (1.0 - sigma) * v)) | |
upper = int(min(255, (1.0 + sigma) * v)) | |
edged = cv2.Canny(image, lower, upper) | |
# return the edged image | |
return edged | |
def ocr_image_process(img, sigma, block_size, constant): | |
# If the input is a numpy array, convert it to a PIL Image | |
if isinstance(img, np.ndarray): | |
img = Image.fromarray(img) | |
# Convert the PIL Image back to a numpy array if necessary | |
if isinstance(img, Image.Image): | |
img = np.array(img) | |
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) | |
thresh_inv = cv2.adaptiveThreshold( | |
gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, | |
int(block_size) if block_size is not None else 41, | |
int(constant) if constant is not None else 1) | |
edges = auto_canny(thresh_inv, float(sigma) if sigma is not None else 0.33) | |
ctrs, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0]) | |
img_area = img.shape[0]*img.shape[1] | |
# Create a blank white image | |
mask = np.ones(img.shape, dtype="uint8") * 255 | |
for i, ctr in enumerate(sorted_ctrs): | |
x, y, w, h = cv2.boundingRect(ctr) | |
roi_area = w*h | |
roi_ratio = roi_area/img_area | |
if((roi_ratio >= 0.015) and (roi_ratio < 0.09)): | |
if ((h>1.2*w) and (3*w>=h)): | |
# Draw filled rectangle (mask) on the mask image | |
cv2.rectangle(mask, (x, y), (x+w, y+h), (0,0,0), -1) | |
# Bitwise-or input image and mask to get result | |
img = cv2.bitwise_or(img, mask) | |
# Convert the image to grayscale (if it isn't already) | |
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
return img | |
def get_detections(image_path, size, ort_session): | |
""" | |
Function to get detections from the model. | |
""" | |
# Check if image_path is a string (indicating a file path) | |
if isinstance(image_path, str): | |
# Check if the image is a PNG | |
if image_path.lower().endswith('.png'): | |
# Open the image file | |
img = Image.open(image_path) | |
# Convert the image to RGB (removes the alpha channel) | |
rgb_img = img.convert('RGB') | |
# Create a new file name by replacing .png with .jpg | |
jpg_image_path = os.path.splitext(image_path)[0] + '.jpg' | |
# Save the RGB image as a JPG | |
rgb_img.save(jpg_image_path) | |
# Update image_path to point to the new JPG image | |
image_path = jpg_image_path | |
image = Image.open(image_path) | |
# Check if image_path is a NumPy array | |
elif isinstance(image_path, np.ndarray): | |
image = Image.fromarray(image_path) | |
else: | |
raise ValueError( | |
"image_path must be a file path (str) or a NumPy array.") | |
scale_x = image.width / size | |
scale_y = image.height / size | |
resized_image = image.resize((size, size)) | |
transform = torchvision.transforms.ToTensor() | |
input_tensor = transform(resized_image).unsqueeze(0) | |
outputs = ort_session.run(None, {'images': input_tensor.numpy()}) | |
return image, outputs, scale_x, scale_y | |
def non_maximum_supression(outputs, min_confidence): | |
""" | |
Function to apply non-maximum suppression. | |
""" | |
if min_confidence is None: | |
min_confidence = 0.5 | |
boxes = outputs[0][0] | |
confidences = boxes[4] | |
max_confidence_index = np.argmax(confidences) | |
if confidences[max_confidence_index] > min_confidence: | |
return boxes[:, max_confidence_index] | |
else: | |
return None | |
def drawings(image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr): | |
""" | |
Function to draw bounding boxes and apply OCR. | |
""" | |
x, y, w, h, c = boxes | |
x_min, y_min = (x - w / 2) * scale_x, (y - h / 2) * scale_y | |
x_max, y_max = (x + w / 2) * scale_x, (y + h / 2) * scale_y | |
license_plate_image = image.crop((x_min, y_min, x_max, y_max)) | |
processed_cropped_image = ocr_image_process(license_plate_image, sigma, block_size, constant) | |
if ocr == "easyocr": | |
import easyocr | |
reader = easyocr.Reader(['en']) | |
result = reader.readtext(processed_cropped_image) | |
try: | |
license_plate_text = str.upper(result[0][1]) | |
except IndexError: | |
license_plate_text = "No result found" | |
print(license_plate_text) | |
else: | |
options = build_tesseract_options(7) | |
license_plate_text = pytesseract.image_to_string( | |
processed_cropped_image, | |
config=options) | |
print(license_plate_text) | |
# Calculate the font scale based on image size | |
font_scale = 0.001 * max(image.size) | |
image = cv2.rectangle(np.array(image), (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 3) | |
#cv2.putText(image, f'License Plate: {license_plate_text}', (int(x_min), int(y_max)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 2) | |
cv2.putText(image, f'Confidence: {c:.2f}', (int(x_min), int(y_min)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1) | |
return image, license_plate_image, processed_cropped_image, license_plate_text | |
def yolo_predictions(image_path, size, sigma, block_size, constant, min_confidence, ort_session, ocr): | |
""" | |
Function to get YOLO predictions. | |
""" | |
image, outputs, scale_x, scale_y = get_detections( | |
image_path, size, ort_session) | |
boxes = non_maximum_supression(outputs, min_confidence) | |
result_img, license_plate_image, processed_cropped_image, license_plate_text = drawings( | |
image, boxes, scale_x, scale_y, sigma, block_size, constant, ocr) | |
return result_img, license_plate_image, processed_cropped_image, license_plate_text | |
def predict(image, ocr, sigma, block_size, constant, min_confidence): | |
size = 640 | |
ort_session = ort.InferenceSession(model_path) | |
result_img, _, processed_cropped_image, license_plate_text = yolo_predictions( | |
image, size, sigma, block_size, constant, min_confidence, ort_session, ocr) | |
return result_img, processed_cropped_image, license_plate_text | |
# Add output license plate text, and add examples and description | |
iface = gr.Interface( | |
fn=predict, | |
inputs=[ | |
"image", | |
gr.Dropdown(choices=['pytesseract', 'easyocr'], value="pytesseract", label='OCR Method'), | |
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.33, label='Sigma for Auto Canny'), | |
gr.Number(value=41, label='Block Size for Adaptive Threshold'), | |
gr.Number(value=1, label='Constant for Adaptive Threshold'), | |
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label='Minimum Confidence for NMS') | |
], | |
outputs=[ | |
gr.Image(label="Predicted image"), | |
gr.Image(label="Processed license plate image"), | |
gr.Textbox(label="Predicted license plate number") | |
], | |
examples=examples | |
) | |
iface.launch() | |