import numpy as np import torch from torch.nn import functional as F import torchgeometry as tgm def batch_rodrigues(theta): """Convert axis-angle representation to rotation matrix. Args: theta: size = [B, 3] Returns: Rotation matrix corresponding to the quaternion -- size = [B, 3, 3] """ l1norm = torch.norm(theta + 1e-8, p=2, dim=1) angle = torch.unsqueeze(l1norm, -1) normalized = torch.div(theta, angle) angle = angle * 0.5 v_cos = torch.cos(angle) v_sin = torch.sin(angle) quat = torch.cat([v_cos, v_sin * normalized], dim=1) return quat_to_rotmat(quat) def quat_to_rotmat(quat): """Convert quaternion coefficients to rotation matrix. Args: quat: size = [B, 4] 4 <===>(w, x, y, z) Returns: Rotation matrix corresponding to the quaternion -- size = [B, 3, 3] """ norm_quat = quat norm_quat = norm_quat / norm_quat.norm(p=2, dim=1, keepdim=True) w = norm_quat[:, 0] x = norm_quat[:, 1] y = norm_quat[:, 2] z = norm_quat[:, 3] B = quat.size(0) w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2) wx, wy, wz = w * x, w * y, w * z xy, xz, yz = x * y, x * z, y * z rotMat = torch.stack([ w2 + x2 - y2 - z2, 2 * xy - 2 * wz, 2 * wy + 2 * xz, 2 * wz + 2 * xy, w2 - x2 + y2 - z2, 2 * yz - 2 * wx, 2 * xz - 2 * wy, 2 * wx + 2 * yz, w2 - x2 - y2 + z2 ], dim=1).view(B, 3, 3) return rotMat def rot6d_to_rotmat(x): """Convert 6D rotation representation to 3x3 rotation matrix. Based on Zhou et al., "On the Continuity of Rotation Representations in Neural Networks", CVPR 2019 Input: (B,6) Batch of 6-D rotation representations Output: (B,3,3) Batch of corresponding rotation matrices """ if isinstance(x, torch.Tensor): x = x.reshape(-1, 3, 2) elif isinstance(x, np.ndarray): x = x.view(-1, 3, 2) a1 = x[:, :, 0] a2 = x[:, :, 1] b1 = F.normalize(a1) b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1) b3 = torch.cross(b1, b2) return torch.stack((b1, b2, b3), dim=-1) def rot6d_to_axis_angle(x): batch_size = x.shape[0] x = x.view(-1, 3, 2) a1 = x[:, :, 0] a2 = x[:, :, 1] b1 = F.normalize(a1) b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1) b3 = torch.cross(b1, b2) rot_mat = torch.stack((b1, b2, b3), dim=-1) # 3x3 rotation matrix rot_mat = torch.cat([rot_mat, torch.zeros((batch_size, 3, 1)).cuda().float()], 2) # 3x4 rotation matrix axis_angle = tgm.rotation_matrix_to_angle_axis(rot_mat).reshape(-1, 3) # axis-angle axis_angle[torch.isnan(axis_angle)] = 0.0 return axis_angle def rotation_matrix_to_angle_axis(rotation_matrix): """ This function is borrowed from https://github.com/kornia/kornia Convert 3x4 rotation matrix to Rodrigues vector Args: rotation_matrix (Tensor): rotation matrix. Returns: Tensor: Rodrigues vector transformation. Shape: - Input: :math:`(N, 3, 4)` - Output: :math:`(N, 3)` Example: >>> input = torch.rand(2, 3, 4) # Nx3x4 >>> output = tgm.rotation_matrix_to_angle_axis(input) # Nx3 """ if rotation_matrix.shape[1:] == (3, 3): rot_mat = rotation_matrix.reshape(-1, 3, 3) hom = torch.tensor([0, 0, 1], dtype=torch.float32, device=rotation_matrix.device) hom = hom.reshape(1, 3, 1).expand(rot_mat.shape[0], -1, -1) rotation_matrix = torch.cat([rot_mat, hom], dim=-1) quaternion = rotation_matrix_to_quaternion(rotation_matrix) aa = quaternion_to_angle_axis(quaternion) aa[torch.isnan(aa)] = 0.0 return aa def quaternion_to_angle_axis(quaternion: torch.Tensor) -> torch.Tensor: """ This function is borrowed from https://github.com/kornia/kornia Convert quaternion vector to angle axis of rotation. Adapted from ceres C++ library: ceres-solver/include/ceres/rotation.h Args: quaternion (torch.Tensor): tensor with quaternions. Return: torch.Tensor: tensor with angle axis of rotation. Shape: - Input: :math:`(*, 4)` where `*` means, any number of dimensions - Output: :math:`(*, 3)` Example: >>> quaternion = torch.rand(2, 4) # Nx4 >>> angle_axis = tgm.quaternion_to_angle_axis(quaternion) # Nx3 """ if not torch.is_tensor(quaternion): raise TypeError('Input type is not a torch.Tensor. Got {}'.format( type(quaternion))) if not quaternion.shape[-1] == 4: raise ValueError( 'Input must be a tensor of shape Nx4 or 4. Got {}'.format( quaternion.shape)) # unpack input and compute conversion q1: torch.Tensor = quaternion[..., 1] q2: torch.Tensor = quaternion[..., 2] q3: torch.Tensor = quaternion[..., 3] sin_squared_theta: torch.Tensor = q1 * q1 + q2 * q2 + q3 * q3 sin_theta: torch.Tensor = torch.sqrt(sin_squared_theta) cos_theta: torch.Tensor = quaternion[..., 0] two_theta: torch.Tensor = 2.0 * torch.where( cos_theta < 0.0, torch.atan2(-sin_theta, -cos_theta), torch.atan2(sin_theta, cos_theta)) k_pos: torch.Tensor = two_theta / sin_theta k_neg: torch.Tensor = 2.0 * torch.ones_like(sin_theta) k: torch.Tensor = torch.where(sin_squared_theta > 0.0, k_pos, k_neg) angle_axis: torch.Tensor = torch.zeros_like(quaternion)[..., :3] angle_axis[..., 0] += q1 * k angle_axis[..., 1] += q2 * k angle_axis[..., 2] += q3 * k return angle_axis def rotation_matrix_to_quaternion(rotation_matrix, eps=1e-6): """ This function is borrowed from https://github.com/kornia/kornia Convert 3x4 rotation matrix to 4d quaternion vector This algorithm is based on algorithm described in https://github.com/KieranWynn/pyquaternion/blob/master/pyquaternion/quaternion.py#L201 Args: rotation_matrix (Tensor): the rotation matrix to convert. Return: Tensor: the rotation in quaternion Shape: - Input: :math:`(N, 3, 4)` - Output: :math:`(N, 4)` Example: >>> input = torch.rand(4, 3, 4) # Nx3x4 >>> output = tgm.rotation_matrix_to_quaternion(input) # Nx4 """ if not torch.is_tensor(rotation_matrix): raise TypeError('Input type is not a torch.Tensor. Got {}'.format( type(rotation_matrix))) if len(rotation_matrix.shape) > 3: raise ValueError( 'Input size must be a three dimensional tensor. Got {}'.format( rotation_matrix.shape)) if not rotation_matrix.shape[-2:] == (3, 4): raise ValueError( 'Input size must be a N x 3 x 4 tensor. Got {}'.format( rotation_matrix.shape)) rmat_t = torch.transpose(rotation_matrix, 1, 2) mask_d2 = rmat_t[:, 2, 2] < eps mask_d0_d1 = rmat_t[:, 0, 0] > rmat_t[:, 1, 1] mask_d0_nd1 = rmat_t[:, 0, 0] < -rmat_t[:, 1, 1] t0 = 1 + rmat_t[:, 0, 0] - rmat_t[:, 1, 1] - rmat_t[:, 2, 2] q0 = torch.stack([ rmat_t[:, 1, 2] - rmat_t[:, 2, 1], t0, rmat_t[:, 0, 1] + rmat_t[:, 1, 0], rmat_t[:, 2, 0] + rmat_t[:, 0, 2] ], -1) t0_rep = t0.repeat(4, 1).t() t1 = 1 - rmat_t[:, 0, 0] + rmat_t[:, 1, 1] - rmat_t[:, 2, 2] q1 = torch.stack([ rmat_t[:, 2, 0] - rmat_t[:, 0, 2], rmat_t[:, 0, 1] + rmat_t[:, 1, 0], t1, rmat_t[:, 1, 2] + rmat_t[:, 2, 1] ], -1) t1_rep = t1.repeat(4, 1).t() t2 = 1 - rmat_t[:, 0, 0] - rmat_t[:, 1, 1] + rmat_t[:, 2, 2] q2 = torch.stack([ rmat_t[:, 0, 1] - rmat_t[:, 1, 0], rmat_t[:, 2, 0] + rmat_t[:, 0, 2], rmat_t[:, 1, 2] + rmat_t[:, 2, 1], t2 ], -1) t2_rep = t2.repeat(4, 1).t() t3 = 1 + rmat_t[:, 0, 0] + rmat_t[:, 1, 1] + rmat_t[:, 2, 2] q3 = torch.stack([ t3, rmat_t[:, 1, 2] - rmat_t[:, 2, 1], rmat_t[:, 2, 0] - rmat_t[:, 0, 2], rmat_t[:, 0, 1] - rmat_t[:, 1, 0] ], -1) t3_rep = t3.repeat(4, 1).t() mask_c0 = mask_d2 * mask_d0_d1 mask_c1 = mask_d2 * ~mask_d0_d1 mask_c2 = ~mask_d2 * mask_d0_nd1 mask_c3 = ~mask_d2 * ~mask_d0_nd1 mask_c0 = mask_c0.view(-1, 1).type_as(q0) mask_c1 = mask_c1.view(-1, 1).type_as(q1) mask_c2 = mask_c2.view(-1, 1).type_as(q2) mask_c3 = mask_c3.view(-1, 1).type_as(q3) q = q0 * mask_c0 + q1 * mask_c1 + q2 * mask_c2 + q3 * mask_c3 q /= torch.sqrt(t0_rep * mask_c0 + t1_rep * mask_c1 + # noqa t2_rep * mask_c2 + t3_rep * mask_c3) # noqa q *= 0.5 return q def perspective_projection(points, rotation, translation, focal_length, camera_center): """This function computes the perspective projection of a set of points. Input: points (bs, N, 3): 3D points rotation (bs, 3, 3): Camera rotation translation (bs, 3): Camera translation focal_length (bs,) or scalar: Focal length camera_center (bs, 2): Camera center """ batch_size = points.shape[0] K = torch.zeros([batch_size, 3, 3], device=points.device) K[:, 0, 0] = focal_length K[:, 1, 1] = focal_length K[:, 2, 2] = 1. K[:, :-1, -1] = camera_center # Transform points points = torch.einsum('bij,bkj->bki', rotation, points) points = points + translation.unsqueeze(1) # Apply perspective distortion projected_points = points / points[:, :, -1].unsqueeze(-1) # Apply camera intrinsics projected_points = torch.einsum('bij,bkj->bki', K, projected_points) return projected_points[:, :, :-1] def estimate_translation_np(S, joints_2d, joints_conf, focal_length=5000, img_size=224): """Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d. Input: S: (25, 3) 3D joint locations joints: (25, 3) 2D joint locations and confidence Returns: (3,) camera translation vector """ num_joints = S.shape[0] # focal length f = np.array([focal_length, focal_length]) # optical center center = np.array([img_size / 2., img_size / 2.]) # transformations Z = np.reshape(np.tile(S[:, 2], (2, 1)).T, -1) XY = np.reshape(S[:, 0:2], -1) OO = np.tile(center, num_joints) F = np.tile(f, num_joints) weight2 = np.reshape(np.tile(np.sqrt(joints_conf), (2, 1)).T, -1) # least squares Q = np.array([ F * np.tile(np.array([1, 0]), num_joints), F * np.tile(np.array([0, 1]), num_joints), OO - np.reshape(joints_2d, -1) ]).T c = (np.reshape(joints_2d, -1) - OO) * Z - F * XY # weighted least squares W = np.diagflat(weight2) Q = np.dot(W, Q) c = np.dot(W, c) # square matrix A = np.dot(Q.T, Q) b = np.dot(Q.T, c) # solution trans = np.linalg.solve(A, b) return trans def estimate_translation(S, joints_2d, focal_length=5000., img_size=224.): """Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d. Input: S: (B, 49, 3) 3D joint locations joints: (B, 49, 3) 2D joint locations and confidence Returns: (B, 3) camera translation vectors """ device = S.device # Use only joints 25:49 (GT joints) S = S[:, 25:, :].cpu().numpy() joints_2d = joints_2d[:, 25:, :].cpu().numpy() joints_conf = joints_2d[:, :, -1] joints_2d = joints_2d[:, :, :-1] trans = np.zeros((S.shape[0], 3), dtype=np.float32) # Find the translation for each example in the batch for i in range(S.shape[0]): S_i = S[i] joints_i = joints_2d[i] conf_i = joints_conf[i] trans[i] = estimate_translation_np(S_i, joints_i, conf_i, focal_length=focal_length, img_size=img_size) return torch.from_numpy(trans).to(device) def project_points(points_3d, camera, focal_length, img_res): """Perform orthographic projection of 3D points using the camera parameters, return projected 2D points in image plane. Notes: batch size: B point number: N Args: points_3d (Tensor([B, N, 3])): 3D points. camera (Tensor([B, 3])): camera parameters with the 3 channel as (scale, translation_x, translation_y) Returns: points_2d (Tensor([B, N, 2])): projected 2D points in image space. """ batch_size = points_3d.shape[0] device = points_3d.device cam_t = torch.stack([ camera[:, 1], camera[:, 2], 2 * focal_length / (img_res * camera[:, 0] + 1e-9) ], dim=-1) camera_center = camera.new_zeros([batch_size, 2]) rot_t = torch.eye(3, device=device, dtype=points_3d.dtype).unsqueeze(0).expand( batch_size, -1, -1) keypoints_2d = perspective_projection(points_3d, rotation=rot_t, translation=cam_t, focal_length=focal_length, camera_center=camera_center) return keypoints_2d def project_points_new(points_3d, pred_cam, focal_length, camera_center): """Perform orthographic projection of 3D points using the camera parameters, return projected 2D points in image plane. Notes: batch size: B point number: N Args: points_3d (Tensor([B, N, 3])): 3D points. camera (Tensor([B, 3])): camera parameters with the 3 channel as (scale, translation_x, translation_y) Returns: points_2d (Tensor([B, N, 2])): projected 2D points in image space. """ batch_size = points_3d.shape[0] device = points_3d.device (s, tx, ty) = (pred_cam[:, 0] + 1e-9), pred_cam[:, 1], pred_cam[:, 2] depth, dx, dy = 1./s, tx/s, ty/s cam_t = torch.stack([dx, dy, depth], 1) # cam_t = torch.stack([ # camera[:, 1], camera[:, 2], 2 * focal_length / # (img_res * camera[:, 0] + 1e-9) # ], # dim=-1) rot_t = torch.eye(3, device=device, dtype=points_3d.dtype).unsqueeze(0).expand( batch_size, -1, -1) keypoints_2d = perspective_projection(points_3d, rotation=rot_t, translation=cam_t, focal_length=focal_length, camera_center=camera_center) return keypoints_2d def weak_perspective_projection(points, scale, translation): """This function computes the weak perspective projection of a set of points. Input: points (bs, N, 3): 3D points scale (bs,1): scalar translation (bs, 2): point 2D translation """ projected_points = scale.view( -1, 1, 1) * (points[:, :, :2] + translation.view(-1, 1, 2)) return projected_points def estimate_cam_weakperspective(joints3d, joints2d, joints2d_conf, joints3d_conf, img_size) -> torch.Tensor: ''' img_size: wh ''' w, h = img_size if joints2d_conf is not None: valid_ids = torch.where(joints2d_conf.view(-1) > 0)[0] joints2d = joints2d[valid_ids] if joints3d_conf is not None: valid_ids = torch.where(joints3d_conf.view(-1) > 0)[0] joints3d = joints3d[valid_ids] x1 = torch.min(joints3d[..., 0]) x2 = torch.max(joints3d[..., 0]) y1 = torch.min(joints3d[..., 1]) y2 = torch.max(joints3d[..., 1]) # img_size = img_size if isinstance(img_size, int) else int(img_size[0]) u1 = 2*torch.min(joints2d[..., 0]) / w -1 u2 = 2*torch.max(joints2d[..., 0]) / w -1 v1 = (2 * torch.min(joints2d[..., 1])-h)/max(w,h) v2 = (2 * torch.max(joints2d[..., 1])-h)/max(w,h) # u1 = torch.min(joints2d[..., 0]) / w # u2 = torch.max(joints2d[..., 0]) / w # v1 = torch.min(joints2d[..., 1]) / h # v2 = torch.max(joints2d[..., 1]) / h sx = (u1 - u2) / (x1 - x2) sy = (v1 - v2) / (y1 - y2) s = torch.sqrt(sx * sy) tx_1 = u1 / s - x1 # u1 = s*(tx_1 + x1) ty_1 = v1 / s - y1 # v1 = s*(ty_1 + y1) tx_2 = u2 / s - x2 # u2 = s*(tx_2 + x2) ty_2 = v2 / s - y2 # v2 = s*(ty_2 + y2) tx = (tx_1 + tx_2) / 2 ty = (ty_1 + ty_2) / 2 cam = torch.Tensor([s, tx, ty]).view(3) return cam def estimate_cam_weakperspective_batch( joints3d, joints2d, joints2d_conf, joints3d_conf, img_size): ''' img_size: b,w,h ''' device = joints3d.device joints2d = joints2d.detach().cpu() joints3d = joints3d.detach().cpu() assert joints2d.ndim == 3 # B, J, 2 assert joints3d.ndim == 3 # B, J, 3 cam = torch.zeros(joints3d.shape[0], 3) for i in range(joints3d.shape[0]): joints3d_i = joints3d[i] joints2d_i = joints2d[i] if joints2d_conf is not None: conf2d_i = joints2d_conf[i].detach().cpu() else: conf2d_i = None if joints3d_conf is not None: conf3d_i = joints3d_conf[i].detach().cpu() else: conf3d_i = None cam[i] = estimate_cam_weakperspective(joints3d=joints3d_i, joints2d=joints2d_i, joints2d_conf=conf2d_i, joints3d_conf=conf3d_i, img_size=img_size[i]) return cam.to(device) def pred_cam_to_transl(pred_camera, focal_length, img_size): pred_cam_t = torch.stack([ pred_camera[:, 1], pred_camera[:, 2], 2 * focal_length / (img_size * pred_camera[:, 0] + 1e-9) ], dim=-1) return pred_cam_t