AiOS / detrsmpl /data /datasets /pipelines /hybrik_transforms.py
ttxskk
update
d7e58f0
raw
history blame
33.3 kB
import math
import random
import cv2
import mmcv
import numpy as np
from detrsmpl.core.conventions.keypoints_mapping import get_flip_pairs
from detrsmpl.utils.demo_utils import box2cs, xyxy2xywh
from ..builder import PIPELINES
from .transforms import (
_rotate_smpl_pose,
affine_transform,
get_affine_transform,
)
def get_bbox(bbox_xywh, w, h):
"""Obtain bbox in xyxy format given bbox in xywh format and applying
clipping to ensure bbox is within image bounds.
Args:
xywh (list): bbox in format (x, y, w, h).
w (int): image width
h (int): image height
Returns:
xyxy (numpy.ndarray): Converted bboxes in format (xmin, ymin,
xmax, ymax).
"""
bbox_xywh = bbox_xywh.reshape(1, 4)
xmin, ymin, xmax, ymax = bbox_clip_xyxy(bbox_xywh_to_xyxy(bbox_xywh), w, h)
bbox = np.array([xmin, ymin, xmax, ymax])
return bbox
def heatmap2coord(pred_jts,
pred_scores,
hm_shape,
bbox,
output_3d=False,
mean_bbox_scale=None):
"""Retrieve predicted keypoints and scores from heatmap."""
hm_width, hm_height = hm_shape
ndims = pred_jts.dim()
assert ndims in [2, 3], 'Dimensions of input heatmap should be 2 or 3'
if ndims == 2:
pred_jts = pred_jts.unsqueeze(0)
pred_scores = pred_scores.unsqueeze(0)
coords = pred_jts.cpu().numpy()
coords = coords.astype(float)
pred_scores = pred_scores.cpu().numpy()
pred_scores = pred_scores.astype(float)
coords[:, :, 0] = (coords[:, :, 0] + 0.5) * hm_width
coords[:, :, 1] = (coords[:, :, 1] + 0.5) * hm_height
preds = np.zeros_like(coords)
# transform bbox to scale
xmin, ymin, xmax, ymax = bbox
w = xmax - xmin
h = ymax - ymin
center = np.array([xmin + w * 0.5, ymin + h * 0.5])
scale = np.array([w, h])
# Transform back
for i in range(coords.shape[0]):
for j in range(coords.shape[1]):
preds[i, j, 0:2] = transform_preds(coords[i, j, 0:2], center,
scale, [hm_width, hm_height])
if output_3d:
if mean_bbox_scale is not None:
zscale = scale[0] / mean_bbox_scale
preds[i, j, 2] = coords[i, j, 2] / zscale
else:
preds[i, j, 2] = coords[i, j, 2]
# maxvals = np.ones((*preds.shape[:2], 1), dtype=float)
# score_mul = 1 if norm_name == 'sigmoid' else 5
return preds, pred_scores
def transform_preds(coords, center, scale, output_size):
"""Transform heatmap coordinates to image coordinates."""
target_coords = np.zeros(coords.shape)
trans = get_affine_transform(center,
scale,
0,
output_size,
inv=1,
pixel_std=1)
target_coords[0:2] = affine_transform(coords[0:2], trans)
return target_coords
def bbox_xywh_to_xyxy(xywh):
"""Convert bounding boxes from format (x, y, w, h) to (xmin, ymin, xmax,
ymax)
Args:
xywh (list, tuple or numpy.ndarray): bbox in format (x, y, w, h).
If numpy.ndarray is provided, we expect multiple bounding boxes with
shape `(N, 4)`.
Returns:
xyxy (tuple or numpy.ndarray): Converted bboxes in format (xmin, ymin,
xmax, ymax). Return numpy.ndarray if input is in the same format.
"""
if isinstance(xywh, (tuple, list)):
if not len(xywh) == 4:
raise IndexError(
'Bounding boxes must have 4 elements, given {}'.format(
len(xywh)))
w, h = np.maximum(xywh[2] - 1, 0), np.maximum(xywh[3] - 1, 0)
return (xywh[0], xywh[1], xywh[0] + w, xywh[1] + h)
elif isinstance(xywh, np.ndarray):
if not xywh.size % 4 == 0:
raise IndexError(
'Bounding boxes must have n * 4 elements, given {}'.format(
xywh.shape))
xyxy = np.hstack(
(xywh[:, :2], xywh[:, :2] + np.maximum(0, xywh[:, 2:4] - 1)))
return xyxy
else:
raise TypeError(
'Expect input xywh a list, tuple or numpy.ndarray, given {}'.
format(type(xywh)))
def bbox_clip_xyxy(xyxy, width, height):
"""Clip bounding box with format (xmin, ymin, xmax, ymax) to `(0, 0, width,
height)`.
Args:
xyxy (list, tuple or numpy.ndarray): bbox in format (xmin, ymin,
xmax, ymax). If numpy.ndarray is provided, we expect multiple bounding
boxes with shape `(N, 4)`.
width (int or float): Boundary width.
height (int or float): Boundary height.
Returns:
xyxy (list, tuple or numpy.ndarray): clipped bbox in format (xmin, ymin,
xmax, ymax) and input type
"""
if isinstance(xyxy, (tuple, list)):
if not len(xyxy) == 4:
raise IndexError(
'Bounding boxes must have 4 elements, given {}'.format(
len(xyxy)))
x1 = np.minimum(width - 1, np.maximum(0, xyxy[0]))
y1 = np.minimum(height - 1, np.maximum(0, xyxy[1]))
x2 = np.minimum(width - 1, np.maximum(0, xyxy[2]))
y2 = np.minimum(height - 1, np.maximum(0, xyxy[3]))
return (x1, y1, x2, y2)
elif isinstance(xyxy, np.ndarray):
if not xyxy.size % 4 == 0:
raise IndexError(
'Bounding boxes must have n * 4 elements, given {}'.format(
xyxy.shape))
x1 = np.minimum(width - 1, np.maximum(0, xyxy[:, 0]))
y1 = np.minimum(height - 1, np.maximum(0, xyxy[:, 1]))
x2 = np.minimum(width - 1, np.maximum(0, xyxy[:, 2]))
y2 = np.minimum(height - 1, np.maximum(0, xyxy[:, 3]))
return np.hstack((x1, y1, x2, y2))
else:
raise TypeError(
'Expect input xywh a list, tuple or numpy.ndarray, given {}'.
format(type(xyxy)))
def cam2pixel(cam_coord, f, c):
"""Convert coordinates from camera to image frame given f and c
Args:
cam_coord (np.ndarray): Coordinates in camera frame
f (list): focal length, fx, fy
c (list): principal point offset, x0, y0
Returns:
img_coord (np.ndarray): Coordinates in image frame
"""
x = cam_coord[:, 0] / (cam_coord[:, 2] + 1e-8) * f[0] + c[0]
y = cam_coord[:, 1] / (cam_coord[:, 2] + 1e-8) * f[1] + c[1]
z = cam_coord[:, 2]
img_coord = np.concatenate((x[:, None], y[:, None], z[:, None]), 1)
return img_coord
def get_intrinsic_matrix(f, c, inv=False):
"""Get intrisic matrix (or its inverse) given f and c.
Args:
f (list): focal length, fx, fy
c (list): principal point offset, x0, y0
inv (bool): Store True to get inverse. Default: False.
Returns:
intrinsic matrix (np.ndarray): 3x3 intrinsic matrix or its inverse
"""
intrinsic_metrix = np.zeros((3, 3)).astype(np.float32)
intrinsic_metrix[0, 0] = f[0]
intrinsic_metrix[0, 2] = c[0]
intrinsic_metrix[1, 1] = f[1]
intrinsic_metrix[1, 2] = c[1]
intrinsic_metrix[2, 2] = 1
if inv:
intrinsic_metrix = np.linalg.inv(intrinsic_metrix).astype(np.float32)
return intrinsic_metrix
def aa_to_quat_numpy(axis_angle):
"""Convert rotations given as axis/angle to quaternions.
Args:
axis_angle: Rotations given as a vector in axis angle form,
as a np.ndarray of shape (..., 3), where the magnitude is
the angle turned anticlockwise in radians around the
vector's direction.
Returns:
quaternions with real part first, as np.ndarray of shape (..., 4).
"""
angles = np.linalg.norm(axis_angle, ord=2, axis=-1, keepdims=True)
half_angles = 0.5 * angles
eps = 1e-6
small_angles = np.abs(angles) < eps
sin_half_angles_over_angles = np.empty_like(angles)
sin_half_angles_over_angles[~small_angles] = (
np.sin(half_angles[~small_angles]) / angles[~small_angles])
# for x small, sin(x/2) is about x/2 - (x/2)^3/6
# so sin(x/2)/x is about 1/2 - (x*x)/48
sin_half_angles_over_angles[small_angles] = (
0.5 - (angles[small_angles] * angles[small_angles]) / 48)
quaternions = np.concatenate(
[np.cos(half_angles), axis_angle * sin_half_angles_over_angles],
axis=-1)
return quaternions
def flip_thetas(thetas, theta_pairs):
"""Flip thetas.
Args:
thetas (np.ndarray): joints in shape (num_thetas, 3)
theta_pairs (list): flip pairs for thetas
Returns:
thetas_flip (np.ndarray): flipped thetas with shape (num_thetas, 3)
"""
thetas_flip = thetas.copy()
# reflect horizontally
thetas_flip[:, 1] = -1 * thetas_flip[:, 1]
thetas_flip[:, 2] = -1 * thetas_flip[:, 2]
# change left-right parts
for pair in theta_pairs:
thetas_flip[pair[0], :], thetas_flip[pair[1], :] = \
thetas_flip[pair[1], :], thetas_flip[pair[0], :].copy()
return thetas_flip
def flip_joints_3d(joints_3d, joints_3d_visible, width, flip_pairs):
"""Flip 3d joints.
Args:
joints_3d (np.ndarray): joints in shape (N, 3, 2)
width (int): Image width
joint_pairs (list): flip pairs for joints
Returns:
joints_3d_flipped (np.ndarray): flipped joints with shape (N, 3, 2)
joints_3d_visible_flipped (np.ndarray): visibility of (N, 3, 2)
"""
assert len(joints_3d) == len(joints_3d_visible)
joints_3d[:, 0] = width - joints_3d[:, 0] - 1
joints_3d_flipped = joints_3d.copy()
joints_3d_visible_flipped = joints_3d_visible.copy()
# Swap left-right parts
for left, right in flip_pairs:
joints_3d_flipped[left, :] = joints_3d[right, :]
joints_3d_flipped[right, :] = joints_3d[left, :]
joints_3d_visible_flipped[left, :] = joints_3d_visible[right, :]
joints_3d_visible_flipped[right, :] = joints_3d_visible[left, :]
joints_3d_flipped = joints_3d_flipped * joints_3d_visible_flipped
return joints_3d_flipped, joints_3d_visible_flipped
def flip_xyz_joints_3d(joints_3d, flip_pairs):
"""Flip 3d xyz joints.
Args:
joints_3d (np.ndarray): Joints in shape (N, 3)
joint_pairs (list): flip pairs for joints
Returns:
joints_3d_flipped (np.ndarray): flipped joints with shape (N, 3)
"""
joints_3d[:, 0] = -1 * joints_3d[:, 0]
joints_3d_flipped = joints_3d.copy()
# change left-right parts
for left, right in flip_pairs:
joints_3d_flipped[left, :] = joints_3d[right, :]
joints_3d_flipped[right, :] = joints_3d[left, :]
return joints_3d_flipped
def flip_twist(twist_phi, twist_weight, twist_pairs):
"""Flip twist and weight.
Args:
twist_phi (np.ndarray): twist in shape (num_twist, 2)
twist_weight (np.ndarray): weight in shape (num_twist, 2)
twist_pairs (list): flip pairs for twist
Returns:
twist_flip (np.ndarray): flipped twist with shape (num_twist, 2)
weight_flip (np.ndarray): flipped weights with shape (num_twist, 2)
"""
# twist_flip = -1 * twist_phi.copy() # 23 x 2
twist_flip = np.zeros_like(twist_phi)
weight_flip = twist_weight.copy()
twist_flip[:, 0] = twist_phi[:, 0].copy() # cos
twist_flip[:, 1] = -1 * twist_phi[:, 1].copy() # sin
for pair in twist_pairs:
idx0 = pair[0] - 1
idx1 = pair[1] - 1
twist_flip[idx0, :], twist_flip[idx1, :] = \
twist_flip[idx1, :], twist_flip[idx0, :].copy()
weight_flip[idx0, :], weight_flip[idx1, :] = \
weight_flip[idx1, :], weight_flip[idx0, :].copy()
return twist_flip, weight_flip
def _center_scale_to_box(center, scale):
"""Flip twist and weight.
Args:
joints_3d (np.ndarray): Joints in shape (N, 3)
joint_pairs (list): flip pairs for joints
Returns:
joints_3d_flipped (np.ndarray): flipped joints with shape (N, 3)
"""
pixel_std = 1.0
w = scale[0] * pixel_std
h = scale[1] * pixel_std
xmin = center[0] - w * 0.5
ymin = center[1] - h * 0.5
xmax = xmin + w
ymax = ymin + h
bbox = [xmin, ymin, xmax, ymax]
return bbox
@PIPELINES.register_module()
class RandomDPG(object):
"""Add dpg for data augmentation, including random crop and random sample
Required keys: 'bbox', 'ann_info
Modifies key: 'bbox', 'center', 'scale'
Args:
dpg_prob (float): Probability of dpg
"""
def __init__(self, dpg_prob):
self.dpg_prob = dpg_prob
def __call__(self, results):
if np.random.rand() > self.dpg_prob:
return results
bbox = results['bbox']
imgwidth = results['ann_info']['width']
imgheight = results['ann_info']['height']
PatchScale = random.uniform(0, 1)
width = bbox[2] - bbox[0]
ht = bbox[3] - bbox[1]
if PatchScale > 0.85:
ratio = ht / width
if (width < ht):
patchWidth = PatchScale * width
patchHt = patchWidth * ratio
else:
patchHt = PatchScale * ht
patchWidth = patchHt / ratio
xmin = bbox[0] + random.uniform(0, 1) * (width - patchWidth)
ymin = bbox[1] + random.uniform(0, 1) * (ht - patchHt)
xmax = xmin + patchWidth + 1
ymax = ymin + patchHt + 1
else:
xmin = max(
1,
min(bbox[0] + np.random.normal(-0.0142, 0.1158) * width,
imgwidth - 3))
ymin = max(
1,
min(bbox[1] + np.random.normal(0.0043, 0.068) * ht,
imgheight - 3))
xmax = min(
max(xmin + 2,
bbox[2] + np.random.normal(0.0154, 0.1337) * width),
imgwidth - 3)
ymax = min(
max(ymin + 2,
bbox[3] + np.random.normal(-0.0013, 0.0711) * ht),
imgheight - 3)
bbox_xyxy = np.array([xmin, ymin, xmax, ymax])
bbox_xywh = xyxy2xywh(bbox_xyxy)
center, scale = box2cs(bbox_xywh,
aspect_ratio=1.0,
bbox_scale_factor=1.0)
results['bbox'] = bbox_xyxy
results['center'] = center
results['scale'] = scale
return results
@PIPELINES.register_module()
class HybrIKRandomFlip:
"""Data augmentation with random image flip.
Required keys: 'img', 'keypoints3d', 'keypoints3d_vis', 'center',
and 'ann_info', 'has_smpl'
Additional keys required if has_smpl: 'keypoints3d17', 'keypoints3d17_vis',
'keypoints3d_relative', 'keypoints3d17_relative', 'pose'
Modifies key: 'img', 'keypoints3d', 'keypoints3d_vis', 'center', 'pose'
Additional keys modified if has_smpl: 'keypoints3d17', 'keypoints3d17_vis',
'keypoints3d_relative', 'keypoints3d17_relative', 'pose'
Args:
flip_prob (float): probability of the image being flipped. Default: 0.5
flip_pairs (list[int]): list of left-right keypoint pairs for flipping
"""
def __init__(self, flip_prob=0.5, flip_pairs=None):
assert 0 <= flip_prob <= 1
self.flip_prob = flip_prob
self.flip_pairs = flip_pairs
def __call__(self, results):
"""Perform data augmentation with random image flip."""
if np.random.rand() > self.flip_prob:
results['is_flipped'] = np.array([0])
return results
results['is_flipped'] = np.array([1])
# flip image
for key in results.get('img_fields', ['img']):
results[key] = mmcv.imflip(results[key], direction='horizontal')
width = results['img'][:, ::-1, :].shape[1]
# flip bbox center
center = results['center']
center[0] = width - 1 - center[0]
results['center'] = center
keypoints3d = results['keypoints3d']
keypoints3d_vis = results['keypoints3d_vis']
keypoints3d, keypoints3d_vis = flip_joints_3d(keypoints3d,
keypoints3d_vis, width,
self.flip_pairs)
if results['has_smpl']:
pose = results['pose']
smpl_flip_pairs = get_flip_pairs('smpl')
pose = flip_thetas(pose, smpl_flip_pairs)
keypoints3d17 = results['keypoints3d17']
keypoints3d17_vis = results['keypoints3d17_vis']
keypoints3d17_relative = results['keypoints3d17_relative']
keypoints3d_relative = results['keypoints3d_relative']
keypoints3d17, keypoints3d17_vis = flip_joints_3d(
keypoints3d17, keypoints3d17_vis, width, self.flip_pairs)
keypoints3d17_relative = flip_xyz_joints_3d(
keypoints3d17_relative, self.flip_pairs)
keypoints3d_relative = flip_xyz_joints_3d(keypoints3d_relative,
self.flip_pairs)
twist_phi, twist_weight = results['target_twist'], results[
'target_twist_weight']
results['target_twist'], results[
'target_twist_weight'] = flip_twist(twist_phi, twist_weight,
smpl_flip_pairs)
results['keypoints3d17_relative'] = keypoints3d17_relative.astype(
np.float32)
results['keypoints3d_relative'] = keypoints3d_relative.astype(
np.float32)
results['keypoints3d17'] = keypoints3d17.astype(np.float32)
results['keypoints3d17_vis'] = keypoints3d17_vis.astype(np.float32)
results['pose'] = pose.astype(np.float32)
results['keypoints3d'] = keypoints3d.astype(np.float32)
results['keypoints3d_vis'] = keypoints3d_vis.astype(np.float32)
return results
@PIPELINES.register_module()
class HybrIKAffine:
"""Affine transform the image to get input image. Affine transform the 2D
keypoints, 3D kepoints and IUV image too.
Required keys: 'img', 'keypoints3d', 'keypoints3d_vis', 'pose', 'ann_info',
'scale', 'keypoints3d17', 'keypoints3d17_vis', 'rotation' and 'center'.
Modifies key: 'img', 'keypoints3d','keypoints3d_vis', 'pose',
'keypoints3d17', 'keypoints3d17_vis'
"""
def __init__(self, img_res):
self.image_size = np.array([img_res, img_res])
def __call__(self, results):
img = results['img']
keypoints3d = results['keypoints3d']
num_joints = len(keypoints3d)
keypoints3d_vis = results['keypoints3d_vis']
has_smpl = results['has_smpl']
c = results['center']
s = results['scale']
r = results['rotation']
trans = get_affine_transform(c, s, r, self.image_size, pixel_std=1)
img = cv2.warpAffine(
img,
trans, (int(self.image_size[0]), int(self.image_size[1])),
flags=cv2.INTER_LINEAR)
for i in range(num_joints):
if keypoints3d_vis[i, 0] > 0.0:
keypoints3d[i, 0:2] = affine_transform(keypoints3d[i, 0:2],
trans)
if has_smpl:
keypoints3d17 = results['keypoints3d17']
keypoints3d17_vis = results['keypoints3d17_vis']
for i in range(17):
if keypoints3d17_vis[i, 0] > 0.0:
keypoints3d17[i, 0:2] = affine_transform(
keypoints3d17[i, 0:2], trans)
results['keypoints3d17'] = keypoints3d17
results['keypoints3d17_vis'] = keypoints3d17_vis
# to rotate poses
pose = results['pose']
pose = _rotate_smpl_pose(pose.reshape(-1), r)
results['pose'] = pose.reshape(24, 3)
results['img'] = img.astype(np.float32)
results['keypoints3d_vis'] = keypoints3d_vis.astype(np.float32)
results['keypoints3d'] = keypoints3d.astype(np.float32)
return results
@PIPELINES.register_module()
class RandomOcclusion:
"""Add random occlusion.
Add random occlusion based on occlusion probability.
Args:
occlusion_prob (float): probability of the image having
occlusion. Default: 0.5
"""
def __init__(self, occlusion_prob=0.5):
self.occlusion_prob = occlusion_prob
def __call__(self, results):
if np.random.rand() > self.occlusion_prob:
return results
xmin, ymin, xmax, ymax = results['bbox']
imgwidth = results['ann_info']['width']
imgheight = results['ann_info']['height']
img = results['img']
area_min = 0.0
area_max = 0.7
synth_area = (random.random() * (area_max - area_min) +
area_min) * (xmax - xmin) * (ymax - ymin)
ratio_min = 0.3
ratio_max = 1 / 0.3
synth_ratio = (random.random() * (ratio_max - ratio_min) + ratio_min)
synth_h = math.sqrt(synth_area * synth_ratio)
synth_w = math.sqrt(synth_area / synth_ratio)
synth_xmin = random.random() * ((xmax - xmin) - synth_w - 1) + xmin
synth_ymin = random.random() * ((ymax - ymin) - synth_h - 1) + ymin
if synth_xmin >= 0 and synth_ymin >= 0 and \
synth_xmin + synth_w < imgwidth and \
synth_ymin + synth_h < imgheight:
synth_xmin = int(synth_xmin)
synth_ymin = int(synth_ymin)
synth_w = int(synth_w)
synth_h = int(synth_h)
img[synth_ymin:synth_ymin + synth_h, synth_xmin:synth_xmin +
synth_w, :] = np.random.rand(synth_h, synth_w, 3) * 255
results['img'] = img
return results
@PIPELINES.register_module()
class GenerateHybrIKTarget:
"""Generate the targets required for training.
Required keys: 'keypoints3d', 'keypoints3d_vis', 'ann_info', 'depth_factor'
Additional keys if has_smpl: 'keypoints3d17', 'keypoints3d17_vis',
'keypoints3d_relative', 'keypoints3d17_relative' Add keys: 'target_uvd_29',
'target_xyz_24', 'target_weight_24', 'target_weight_29', 'target_xyz_17',
'target_weight_17', 'target_theta', 'target_beta', 'target_smpl_weight',
'target_theta_weight', trans_inv', 'bbox'
"""
def __init__(self, img_res, test_mode):
self.test_mode = test_mode
self.image_size = np.array([img_res, img_res])
def _integral_uvd_target_generator(self,
joints_3d,
num_joints,
patch_height,
patch_width,
depth_factor,
test_mode=False):
target_weight = np.ones((num_joints, 3), dtype=np.float32)
target_weight[:, 0] = joints_3d[:, 0, 1]
target_weight[:, 1] = joints_3d[:, 0, 1]
target_weight[:, 2] = joints_3d[:, 0, 1]
target = np.zeros((num_joints, 3), dtype=np.float32)
target[:, 0] = joints_3d[:, 0, 0] / patch_width - 0.5
target[:, 1] = joints_3d[:, 1, 0] / patch_height - 0.5
target[:, 2] = joints_3d[:, 2, 0] / depth_factor
target_weight[target[:, 0] > 0.5] = 0
target_weight[target[:, 0] < -0.5] = 0
target_weight[target[:, 1] > 0.5] = 0
target_weight[target[:, 1] < -0.5] = 0
target_weight[target[:, 2] > 0.5] = 0
target_weight[target[:, 2] < -0.5] = 0
target = target.reshape((-1))
target_weight = target_weight.reshape((-1))
return target, target_weight
def _integral_target_generator(self, joints_3d, num_joints, patch_height,
patch_width, depth_factor):
target_weight = np.ones((num_joints, 3), dtype=np.float32)
target_weight[:, 0] = joints_3d[:, 0, 1]
target_weight[:, 1] = joints_3d[:, 0, 1]
target_weight[:, 2] = joints_3d[:, 0, 1]
target = np.zeros((num_joints, 3), dtype=np.float32)
target[:, 0] = joints_3d[:, 0, 0] / patch_width - 0.5
target[:, 1] = joints_3d[:, 1, 0] / patch_height - 0.5
target[:, 2] = joints_3d[:, 2, 0] / depth_factor
target_weight[target[:, 0] > 0.5] = 0
target_weight[target[:, 0] < -0.5] = 0
target_weight[target[:, 1] > 0.5] = 0
target_weight[target[:, 1] < -0.5] = 0
target_weight[target[:, 2] > 0.5] = 0
target_weight[target[:, 2] < -0.5] = 0
target = target.reshape((-1))
target_weight = target_weight.reshape((-1))
return target, target_weight
def _integral_xyz_target_generator(self, joints_3d, joints_3d_vis,
num_joints, depth_factor):
target_weight = np.ones((num_joints, 3), dtype=np.float32)
target_weight[:, 0] = joints_3d_vis[:, 0]
target_weight[:, 1] = joints_3d_vis[:, 1]
target_weight[:, 2] = joints_3d_vis[:, 2]
target = np.zeros((num_joints, 3), dtype=np.float32)
target[:, 0] = joints_3d[:, 0] / int(depth_factor)
target[:, 1] = joints_3d[:, 1] / int(depth_factor)
target[:, 2] = joints_3d[:, 2] / int(depth_factor)
target = target.reshape((-1))
target_weight = target_weight.reshape((-1))
return target, target_weight
def _integral_target_generator_coco(self, joints_3d, num_joints,
patch_height, patch_width):
target_weight = np.ones((num_joints, 2), dtype=np.float32)
target_weight[:, 0] = joints_3d[:, 0, 1]
target_weight[:, 1] = joints_3d[:, 0, 1]
target = np.zeros((num_joints, 2), dtype=np.float32)
target[:, 0] = joints_3d[:, 0, 0] / patch_width - 0.5
target[:, 1] = joints_3d[:, 1, 0] / patch_height - 0.5
target = target.reshape((-1))
target_weight = target_weight.reshape((-1))
return target, target_weight
def __call__(self, results):
has_smpl = results['has_smpl']
inp_h, inp_w = self.image_size[0], self.image_size[1]
keypoints3d = results['keypoints3d']
num_joints = len(keypoints3d)
keypoints3d_vis = results['keypoints3d_vis']
depth_factor = results['depth_factor']
c = results['center']
s = results['scale']
r = results['rotation']
# generate new keys
trans_inv = get_affine_transform(c,
s,
r,
self.image_size,
inv=True,
pixel_std=1).astype(np.float32)
results['trans_inv'] = trans_inv.astype(np.float32)
bbox = _center_scale_to_box(c, s)
results['bbox'] = np.array(bbox, dtype=np.float32)
if has_smpl:
theta = results['pose']
# aa to quat
results['target_theta'] = aa_to_quat_numpy(theta).reshape(
24 * 4).astype(np.float32)
theta_24_weights = np.ones((24, 4))
results['target_theta_weight'] = theta_24_weights.reshape(
24 * 4).astype(np.float32)
results['target_beta'] = results['beta'].astype(np.float32)
results['target_smpl_weight'] = np.ones(1).astype(np.float32)
keypoints3d17_vis = results['keypoints3d17_vis']
keypoints3d17_relative = results['keypoints3d17_relative']
joints24_relative_3d = results['keypoints3d_relative'][:24, :]
gt_joints_29 = np.zeros((29, 3, 2), dtype=np.float32)
gt_joints_29[:, :, 0] = keypoints3d.copy()
gt_joints_29[:, :, 1] = keypoints3d_vis.copy()
target_uvd_29, target_weight_29 = \
self._integral_uvd_target_generator(
gt_joints_29, 29, inp_h, inp_w, depth_factor)
target_xyz_17, target_weight_17 = \
self._integral_xyz_target_generator(
keypoints3d17_relative, keypoints3d17_vis, 17,
depth_factor)
target_xyz_24, target_weight_24 = \
self._integral_xyz_target_generator(
joints24_relative_3d, keypoints3d_vis[:24, :], 24,
depth_factor)
target_weight_29 *= keypoints3d_vis.reshape(-1)
target_weight_24 *= keypoints3d_vis[:24, :].reshape(-1)
target_weight_17 *= keypoints3d17_vis.reshape(-1)
results['target_uvd_29'] = target_uvd_29.astype(np.float32)
results['target_xyz_24'] = target_xyz_24.astype(np.float32)
results['target_weight_29'] = target_weight_29.astype(np.float32)
results['target_weight_24'] = target_weight_24.astype(np.float32)
results['target_xyz_17'] = target_xyz_17.astype(np.float32)
results['target_weight_17'] = target_weight_17.astype(np.float32)
else:
label_uvd_29 = np.zeros((29, 3))
label_xyz_24 = np.zeros((24, 3))
label_uvd_29_mask = np.zeros((29, 3))
label_xyz_17 = np.zeros((17, 3))
label_xyz_17_mask = np.zeros((17, 3))
gt_joints = np.zeros((num_joints, 3, 2), dtype=np.float32)
gt_joints[:, :, 0] = keypoints3d.copy()
gt_joints[:, :, 1] = keypoints3d_vis.copy()
mask_idx = [1, 2, 6, 9, 10, 11]
if results['ann_info']['dataset_name'] == 'coco':
target, target_weight = self._integral_target_generator_coco(
gt_joints, num_joints, inp_h, inp_w)
label_jts_origin = target * target_weight
label_jts_mask_origin = target_weight
label_jts_origin = label_jts_origin.reshape(num_joints, 2)
label_jts_mask_origin = label_jts_mask_origin.reshape(
num_joints, 2)
label_jts_origin[mask_idx] = label_jts_origin[mask_idx] * 0
label_jts_mask_origin[
mask_idx] = label_jts_origin[mask_idx] * 0
label_uvd_29 = np.hstack([label_jts_origin, np.zeros([29, 1])])
label_uvd_29_mask = np.hstack(
[label_jts_mask_origin,
np.zeros([29, 1])])
elif results['ann_info']['dataset_name'] == 'mpi_inf_3dhp':
if not self.test_mode:
target, target_weight = self._integral_target_generator(
gt_joints, num_joints, inp_h, inp_w, depth_factor)
target_weight *= keypoints3d_vis.reshape(-1)
label_jts_origin = target * target_weight
label_jts_mask_origin = target_weight
label_jts_origin = label_jts_origin.reshape(num_joints, 3)
label_jts_mask_origin = label_jts_mask_origin.reshape(
num_joints, 3)
label_jts_origin[mask_idx] = label_jts_origin[mask_idx] * 0
label_jts_mask_origin[
mask_idx] = label_jts_origin[mask_idx] * 0
label_uvd_29 = label_jts_origin
label_uvd_29_mask = label_jts_mask_origin
label_uvd_29 = label_uvd_29.reshape(-1)
label_xyz_24 = label_xyz_24.reshape(-1)
label_uvd_24_mask = label_uvd_29_mask[:24, :].reshape(-1)
label_uvd_29_mask = label_uvd_29_mask.reshape(-1)
label_xyz_17 = label_xyz_17.reshape(-1)
label_xyz_17_mask = label_xyz_17_mask.reshape(-1)
results['target_uvd_29'] = label_uvd_29.astype(np.float32)
results['target_xyz_24'] = label_xyz_24.astype(np.float32)
results['target_weight_24'] = label_uvd_24_mask.astype(np.float32)
results['target_weight_29'] = label_uvd_29_mask.astype(np.float32)
results['target_xyz_17'] = label_xyz_17.astype(np.float32)
results['target_weight_17'] = label_xyz_17_mask.astype(np.float32)
results['target_theta'] = np.zeros(24 * 4).astype(np.float32)
results['target_beta'] = np.zeros(10).astype(np.float32)
results['target_smpl_weight'] = np.zeros(1).astype(np.float32)
results['target_theta_weight'] = np.zeros(24 * 4).astype(
np.float32)
return results
@PIPELINES.register_module()
class NewKeypointsSelection:
"""Select keypoints.
Modifies specified keys
Args:
map (dict): keypoints and index for selection
"""
def __init__(self, maps):
self.maps = maps
def __call__(self, results):
"""Perform keypoints selection."""
for map in self.maps:
for keypoint in map['keypoints']:
keypoints_index = map['keypoints_index']
if keypoint in results:
results[keypoint] = results[keypoint][...,
keypoints_index, :]
return results