import math import cv2 import imutils import numpy as np from matplotlib import pyplot as plt from sklearn.preprocessing import MinMaxScaler from ultralytics import YOLO from models.birefnet import BiRefNet from util.utils import check_state_dict from PIL import Image import torch from torchvision import transforms from transformers import AutoModelForImageSegmentation device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoModelForImageSegmentation.from_pretrained('/'.join(('zhengpeng7', 'BiRefNet')), trust_remote_code=True) # model.to(device) # Input Data transform_image = transforms.Compose([ transforms.Resize((1024, 1024)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) import torch from PIL import Image import torchvision.transforms as transforms def pred_segmentation(image , box=[-1, -1, -1, -1]): print('predicting segmentation...') # box: left, top, right, bottom w, h = image.size[:2] for idx_coord_value, coord_value in enumerate(box): if coord_value == -1: box[idx_coord_value] = [0, 0, w, h][idx_coord_value] image_crop = image.crop(box) input_images = transform_image(image_crop).unsqueeze(0) model.eval() # Prediction with torch.no_grad(): preds = model(input_images)[-1].sigmoid() pred = preds[0].squeeze() canvas = torch.zeros_like(pred) box_to_canvas = [int(round(coord_value * (canvas.shape[-1] / w, canvas.shape[-2] / h)[idx_coord_value % 2])) for idx_coord_value, coord_value in enumerate(box)] pred = torch.nn.functional.interpolate( pred.unsqueeze(0).unsqueeze(0), size=(box_to_canvas[3] - box_to_canvas[1], box_to_canvas[2] - box_to_canvas[0]), mode='bilinear', align_corners=True ).squeeze() canvas[box_to_canvas[1]:box_to_canvas[3], box_to_canvas[0]:box_to_canvas[2]] = pred # Show Results pred_pil = transforms.ToPILImage()(canvas) return pred_pil def pred_bbox(image): print('predicting bounding box...') open_cv_image = np.array(image) # Convert RGB to BGR image = open_cv_image[:, :, ::-1].copy() model = YOLO('models/weights/yolo_finetuned.pt') # Perform prediction results = model(image) boxes = results[0].boxes.xyxy.cpu().numpy()[0] # Extract the bounding box coordinates x1, y1, x2, y2 = map(int, list(boxes)) return [x1, y1, x2, y2] def get_kps_from_pil(pil_image): print('converting keypoints...') image_array = np.array(pil_image) # Find contours using OpenCV contours, _ = cv2.findContours(image_array, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Find the largest contour by area largest_contour = max(contours, key=cv2.contourArea) largest_contour = np.array(largest_contour) contour = [] for i in range(len(largest_contour)): contour.append(largest_contour[i][0]) scaler = MinMaxScaler() kps = scaler.fit_transform(contour) kps = np.array(kps) kps = kps * 299 kps = np.int32(kps) return kps def get_features_up(contour): feature = [] for i in range(0, 300): position = 0 unsorted_features = [] for j in range(len(contour)): point = contour[j] prev_point = point if j != 0: prev_point = contour[j - 1] if point[0] > i and position == 0: position = 1 elif point[0] < i and position == 0: position = -1 elif point[0] > i and position == -1: unsorted_features.append((point[1] + prev_point[1]) // 2) position = 1 elif point[0] < i and position == 1: position = -1 unsorted_features.append((point[1] + prev_point[1]) // 2) elif point[0] == i and position == 1: unsorted_features.append(point[1]) position = -1 elif point[0] == i and position == -1: unsorted_features.append(point[1]) position = 1 elif point[0] == i and position == 0: position = 1 if len(unsorted_features) != 0: if len(unsorted_features) == 1: unsorted_features.append((contour[0][1] + contour[-1][1]) // 2) unsorted_features.sort() feature.append(max(unsorted_features)) else: feature.append(-1) return feature def get_features_down(contour): feature = [] for i in range(0, 300): position = 0 unsorted_features = [] for j in range(len(contour)): point = contour[j] prev_point = point if j != 0: prev_point = contour[j - 1] if point[0] > i and position == 0: position = 1 elif point[0] < i and position == 0: position = -1 elif point[0] > i and position == -1: unsorted_features.append((point[1] + prev_point[1]) // 2) position = 1 elif point[0] < i and position == 1: position = -1 unsorted_features.append((point[1] + prev_point[1]) // 2) elif point[0] == i and position == 1: unsorted_features.append(point[1]) position = -1 elif point[0] == i and position == -1: unsorted_features.append(point[1]) position = 1 elif point[0] == i and position == 0: position = 1 if len(unsorted_features) != 0: if len(unsorted_features) == 1: unsorted_features.append((contour[0][1] + contour[-1][1]) // 2) unsorted_features.sort() feature.append(min(unsorted_features)) else: feature.append(-1) return feature def get_features_right(contour): feature = [] for i in range(0, 300): position = 0 unsorted_features = [] for j in range(len(contour)): point = contour[j] prev_point = point if j != 0: prev_point = contour[j - 1] if point[1] > i and position == 0: position = 1 elif point[1] < i and position == 0: position = -1 elif point[1] > i and position == -1: unsorted_features.append((point[0] + prev_point[0]) // 2) position = 1 elif point[1] < i and position == 1: position = -1 unsorted_features.append((point[0] + prev_point[0]) // 2) elif point[1] == i and position == 1: unsorted_features.append(point[0]) position = -1 elif point[1] == i and position == -1: unsorted_features.append(point[0]) position = 1 elif point[1] == i and position == 0: position = 1 if len(unsorted_features) != 0: if len(unsorted_features) == 1: unsorted_features.append((contour[0][0] + contour[-1][0]) // 2) unsorted_features.sort() feature.append(min(unsorted_features)) else: feature.append(-1) return feature def get_features_left(contour): feature = [] for i in range(0, 300): position = 0 unsorted_features = [] for j in range(len(contour)): point = contour[j] prev_point = point if j != 0: prev_point = contour[j - 1] if point[1] > i and position == 0: position = 1 elif point[1] < i and position == 0: position = -1 elif point[1] > i and position == -1: unsorted_features.append((point[0] + prev_point[0]) // 2) position = 1 elif point[1] < i and position == 1: position = -1 unsorted_features.append((point[0] + prev_point[0]) // 2) elif point[1] == i and position == 1: unsorted_features.append(point[0]) position = -1 elif point[1] == i and position == -1: unsorted_features.append(point[0]) position = 1 elif point[1] == i and position == 0: position = 1 if len(unsorted_features) != 0: if len(unsorted_features) == 1: unsorted_features.append((contour[0][0] + contour[-1][0]) // 2) unsorted_features.sort() feature.append(max(unsorted_features)) else: feature.append(-1) return feature def extract_features(contour): print('extracting features...') return get_features_down(contour) + get_features_up(contour) + get_features_right(contour) + get_features_left(contour) def final_features(image): image = rotate_image(image) pil_image = pred_segmentation(image, pred_bbox(image)) contour = get_kps_from_pil(pil_image) kp_image = show_kps(contour) return kp_image, extract_features(contour) def predict_kps(image): model = YOLO('models/weights/yolo_finetuned.pt') # Perform prediction results = model(image) kps = results[0].masks.xy[0] return kps def calculate_angle(p1, p2): delta_y = p2[1] - p1[1] delta_x = p2[0] - p1[0] return math.degrees(np.arctan2(delta_y, delta_x)) # Function to rotate points by a given angle def calculate_square(img): np_image = np.array(img) # Convert RGB (PIL) to BGR (OpenCV) if np_image.ndim == 3: # Check if the image is colored cv_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR) else: # For grayscale images, no conversion is needed cv_image = np_image rect = cv2.minAreaRect(predict_kps(cv_image)) box = cv2.boxPoints(rect) box = np.int32(box) return box def rotate_image(image): square = calculate_square(image) # Calculate the lengths of the sides side_lengths = [np.linalg.norm(square[i] - square[i + 1]) for i in range(len(square) - 1)] # Find the indices of the larger side max_index = np.argmax(side_lengths) # Find the two points that form the largest side p1, p2 = square[max_index], square[max_index + 1] # Calculate the angle between this side and the horizontal axis angle = calculate_angle(p1, p2) # Rotate the square to align the largest side with the horizontal axis rotated_image = image.rotate(angle) return rotated_image def show_kps(contour): black_image = np.zeros((300, 300), dtype=np.uint8) list1 = range(0, 300) list2 = list(zip(get_features_right(contour), list1)) list2 = np.array(list2) cv2.drawContours(black_image, [list2], -1, (255, 255, 255), 2) list2 = list(zip(get_features_left(contour), list1)) list2 = np.array(list2) cv2.drawContours(black_image, [list2], -1, (255, 255, 255), 2) list2 = list(zip(list1, get_features_up(contour))) list2 = np.array(list2) cv2.drawContours(black_image, [list2], -1, (255, 255, 255), 2) list2 = list(zip(list1, get_features_down(contour))) list2 = np.array(list2) cv2.drawContours(black_image, [list2], -1, (255, 255, 255), 2) return black_image