Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,236 Bytes
be186ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
from typing import Dict
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import random
def scale_boxes(boxes, width, height):
scaled_boxes = []
for box in boxes:
x_min, y_min, x_max, y_max = box
scaled_box = [x_min * width, y_min * height, x_max * width, y_max * height]
scaled_boxes.append(scaled_box)
return scaled_boxes
def draw_mask(mask, draw, random_color=True):
if random_color:
color = (
random.randint(0, 255),
random.randint(0, 255),
random.randint(0, 255),
153,
)
else:
color = (30, 144, 255, 153)
nonzero_coords = np.transpose(np.nonzero(mask))
for coord in nonzero_coords:
draw.point(coord[::-1], fill=color)
def bbox_visualization(image_pil: Image,
result: Dict,
draw_width: float = 6.0,
return_mask=True) -> Image:
"""Plot bounding boxes and labels on an image.
Args:
image_pil (PIL.Image): The input image as a PIL Image object.
result (Dict[str, Union[torch.Tensor, List[torch.Tensor]]]): The target dictionary containing
the bounding boxes and labels. The keys are:
- boxes (List[int]): A list of bounding boxes in shape (N, 4), [x1, y1, x2, y2] format.
- scores (List[float]): A list of scores for each bounding box. shape (N)
- labels (List[str]): A list of labels for each object
- masks (List[PIL.Image]): A list of masks in the format of PIL.Image
draw_score (bool): Draw score on the image. Defaults to False.
Returns:
PIL.Image: The input image with plotted bounding boxes, labels, and masks.
"""
# Get the bounding boxes and labels from the target dictionary
boxes = result["boxes"]
categorys = result["labels"]
masks = result.get("masks", [])
color_list= [(177, 214, 144),(255, 162, 76),
(13, 146, 244),(249, 84, 84),(54, 186, 152),
(74, 36, 157),(0, 159, 189),
(80, 118, 135),(188, 90, 148),(119, 205, 255)]
np.random.seed(42)
# Find all unique categories and build a cate2color dictionary
cate2color = {}
unique_categorys = sorted(set(categorys))
for idx,cate in enumerate(unique_categorys):
cate2color[cate] = color_list[idx%len(color_list)]
# Load a font with the specified size
font_size=30
font = ImageFont.truetype("utils/arial.ttf", font_size)
# Create a PIL ImageDraw object to draw on the input image
if isinstance(image_pil, np.ndarray):
image_pil = Image.fromarray(image_pil)
draw = ImageDraw.Draw(image_pil)
# Create a new binary mask image with the same size as the input image
mask = Image.new("L", image_pil.size, 0)
# Create a PIL ImageDraw object to draw on the mask image
mask_draw = ImageDraw.Draw(mask)
# Draw boxes, labels, and masks for each box and label in the target dictionary
for box, category in zip(boxes, categorys):
# Extract the box coordinates
x0, y0, x1, y1 = box
x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)
color = cate2color[category]
# Draw the box outline on the input image
draw.rectangle([x0, y0, x1, y1], outline=color, width=int(draw_width))
# Draw the label and score on the input image
text = f"{category}"
if hasattr(font, "getbbox"):
bbox = draw.textbbox((x0, y0), text, font)
else:
w, h = draw.textsize(text, font)
bbox = (x0, y0, w + x0, y0 + h)
draw.rectangle(bbox, fill=color)
draw.text((x0, y0), text, fill="white",font=font)
# Draw the mask on the input image if masks are provided
if len(masks) > 0 and return_mask:
size = image_pil.size
mask_image = Image.new("RGBA", size, color=(0, 0, 0, 0))
mask_draw = ImageDraw.Draw(mask_image)
for mask in masks:
mask = np.array(mask)[:, :, -1]
draw_mask(mask, mask_draw)
image_pil = Image.alpha_composite(image_pil.convert("RGBA"), mask_image).convert("RGB")
return image_pil
|