Spaces:
Running
on
Zero
Running
on
Zero
import einops | |
import torch | |
# --- Intrinsics Transformations --- | |
def normalize_intrinsics(intrinsics, image_shape): | |
'''Normalize an intrinsics matrix given the image shape''' | |
intrinsics = intrinsics.clone() | |
intrinsics[..., 0, :] /= image_shape[1] | |
intrinsics[..., 1, :] /= image_shape[0] | |
return intrinsics | |
def unnormalize_intrinsics(intrinsics, image_shape): | |
'''Unnormalize an intrinsics matrix given the image shape''' | |
intrinsics = intrinsics.clone() | |
intrinsics[..., 0, :] *= image_shape[1] | |
intrinsics[..., 1, :] *= image_shape[0] | |
return intrinsics | |
# --- Quaternions, Rotations and Scales --- | |
def quaternion_to_matrix(quaternions, eps: float = 1e-8): | |
''' | |
Convert the 4-dimensional quaternions to 3x3 rotation matrices. | |
This is adapted from Pytorch3D: | |
https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/transforms/rotation_conversions.py | |
''' | |
# Order changed to match scipy format! | |
i, j, k, r = torch.unbind(quaternions, dim=-1) | |
two_s = 2 / ((quaternions * quaternions).sum(dim=-1) + eps) | |
o = torch.stack( | |
( | |
1 - two_s * (j * j + k * k), | |
two_s * (i * j - k * r), | |
two_s * (i * k + j * r), | |
two_s * (i * j + k * r), | |
1 - two_s * (i * i + k * k), | |
two_s * (j * k - i * r), | |
two_s * (i * k - j * r), | |
two_s * (j * k + i * r), | |
1 - two_s * (i * i + j * j), | |
), | |
-1, | |
) | |
return einops.rearrange(o, "... (i j) -> ... i j", i=3, j=3) | |
def build_covariance(scale, rotation_xyzw): | |
'''Build the 3x3 covariance matrix from the three dimensional scale and the | |
four dimension quaternion''' | |
scale = scale.diag_embed() | |
rotation = quaternion_to_matrix(rotation_xyzw) | |
return ( | |
rotation | |
) | |
# --- Projections --- | |
def homogenize_points(points): | |
"""Append a '1' along the final dimension of the tensor (i.e. convert xyz->xyz1)""" | |
return torch.cat([points, torch.ones_like(points[..., :1])], dim=-1) | |
def normalize_homogenous_points(points): | |
"""Normalize the point vectors""" | |
return points / points[..., -1:] | |
def pixel_space_to_camera_space(pixel_space_points, depth, intrinsics): | |
""" | |
Convert pixel space points to camera space points. | |
Args: | |
pixel_space_points (torch.Tensor): Pixel space points with shape (h, w, 2) | |
depth (torch.Tensor): Depth map with shape (b, v, h, w, 1) | |
intrinsics (torch.Tensor): Camera intrinsics with shape (b, v, 3, 3) | |
Returns: | |
torch.Tensor: Camera space points with shape (b, v, h, w, 3). | |
""" | |
pixel_space_points = homogenize_points(pixel_space_points) | |
camera_space_points = torch.einsum('b v i j , h w j -> b v h w i', intrinsics.inverse(), pixel_space_points) | |
camera_space_points = camera_space_points * depth | |
return camera_space_points | |
def camera_space_to_world_space(camera_space_points, c2w): | |
""" | |
Convert camera space points to world space points. | |
Args: | |
camera_space_points (torch.Tensor): Camera space points with shape (b, v, h, w, 3) | |
c2w (torch.Tensor): Camera to world extrinsics matrix with shape (b, v, 4, 4) | |
Returns: | |
torch.Tensor: World space points with shape (b, v, h, w, 3). | |
""" | |
camera_space_points = homogenize_points(camera_space_points) | |
world_space_points = torch.einsum('b v i j , b v h w j -> b v h w i', c2w, camera_space_points) | |
return world_space_points[..., :3] | |
def camera_space_to_pixel_space(camera_space_points, intrinsics): | |
""" | |
Convert camera space points to pixel space points. | |
Args: | |
camera_space_points (torch.Tensor): Camera space points with shape (b, v1, v2, h, w, 3) | |
c2w (torch.Tensor): Camera to world extrinsics matrix with shape (b, v2, 3, 3) | |
Returns: | |
torch.Tensor: World space points with shape (b, v1, v2, h, w, 2). | |
""" | |
camera_space_points = normalize_homogenous_points(camera_space_points) | |
pixel_space_points = torch.einsum('b u i j , b v u h w j -> b v u h w i', intrinsics, camera_space_points) | |
return pixel_space_points[..., :2] | |
def world_space_to_camera_space(world_space_points, c2w): | |
""" | |
Convert world space points to pixel space points. | |
Args: | |
world_space_points (torch.Tensor): World space points with shape (b, v1, h, w, 3) | |
c2w (torch.Tensor): Camera to world extrinsics matrix with shape (b, v2, 4, 4) | |
Returns: | |
torch.Tensor: Camera space points with shape (b, v1, v2, h, w, 3). | |
""" | |
world_space_points = homogenize_points(world_space_points) | |
camera_space_points = torch.einsum('b u i j , b v h w j -> b v u h w i', c2w.inverse(), world_space_points) | |
return camera_space_points[..., :3] | |
def unproject_depth(depth, intrinsics, c2w): | |
""" | |
Turn the depth map into a 3D point cloud in world space | |
Args: | |
depth: (b, v, h, w, 1) | |
intrinsics: (b, v, 3, 3) | |
c2w: (b, v, 4, 4) | |
Returns: | |
torch.Tensor: World space points with shape (b, v, h, w, 3). | |
""" | |
# Compute indices of pixels | |
h, w = depth.shape[-3], depth.shape[-2] | |
x_grid, y_grid = torch.meshgrid( | |
torch.arange(w, device=depth.device, dtype=torch.float32), | |
torch.arange(h, device=depth.device, dtype=torch.float32), | |
indexing='xy' | |
) # (h, w), (h, w) | |
# Compute coordinates of pixels in camera space | |
pixel_space_points = torch.stack((x_grid, y_grid), dim=-1) # (..., h, w, 2) | |
camera_points = pixel_space_to_camera_space(pixel_space_points, depth, intrinsics) # (..., h, w, 3) | |
# Convert points to world space | |
world_points = camera_space_to_world_space(camera_points, c2w) # (..., h, w, 3) | |
return world_points | |