# # Copyright (C) 2023, Inria # GRAPHDECO research group, https://team.inria.fr/graphdeco # All rights reserved. # # This software is free for non-commercial, research and evaluation use # under the terms of the LICENSE.md file. # # For inquiries contact george.drettakis@inria.fr # from scene.cameras import Camera import numpy as np from utils.general_utils import PILtoTorch from utils.graphics_utils import fov2focal import torch import scipy import matplotlib.pyplot as plt WARNED = False def loadCam(args, id, cam_info, resolution_scale): orig_w, orig_h = cam_info.image.size if args.resolution in [1, 2, 4, 8]: resolution = round(orig_w/(resolution_scale * args.resolution)), round(orig_h/(resolution_scale * args.resolution)) else: # should be a type that converts to float if args.resolution == -1: if orig_w > 1600: global WARNED if not WARNED: print("[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n " "If this is not desired, please explicitly specify '--resolution/-r' as 1") WARNED = True global_down = orig_w / 1600 else: global_down = 1 else: global_down = orig_w / args.resolution scale = float(global_down) * float(resolution_scale) resolution = (int(orig_w / scale), int(orig_h / scale)) resized_image_rgb = PILtoTorch(cam_info.image, resolution) gt_image = resized_image_rgb[:3, ...] loaded_mask = None if resized_image_rgb.shape[1] == 4: loaded_mask = resized_image_rgb[3:4, ...] if cam_info.conf_map is not None: conf_map = torch.tensor(cam_info.conf_map) else: conf_map = None if cam_info.depth_map is not None: depth_map = torch.tensor(cam_info.depth_map) else: depth_map = None if cam_info.dynamic_mask is not None: dynamic_mask = torch.tensor(cam_info.dynamic_mask) else: dynamic_mask = None if cam_info.enlarged_dynamic_mask is not None: enlaenlarged_dynamic_mask = torch.tensor(cam_info.enlarged_dynamic_mask) else: enlaenlarged_dynamic_mask = None if cam_info.dyna_avg_map is not None: dyna_avg_map = torch.tensor(cam_info.dyna_avg_map) else: dyna_avg_map = None if cam_info.dyna_max_map is not None: dyna_max_map = torch.tensor(cam_info.dyna_max_map) else: dyna_max_map = None if cam_info.gt_dynamic_mask is not None: gt_dynamic_mask = torch.tensor(cam_info.gt_dynamic_mask) else: gt_dynamic_mask = None return Camera(colmap_id=cam_info.uid, intr=cam_info.intr, R=cam_info.R, T=cam_info.T, original_pose=cam_info.original_pose, FoVx=cam_info.FovX, FoVy=cam_info.FovY, image=gt_image, gt_alpha_mask=loaded_mask, dynamic_mask=dynamic_mask, enlarged_dynamic_mask=enlaenlarged_dynamic_mask, dyna_avg_map=dyna_avg_map, dyna_max_map=dyna_max_map, gt_dynamic_mask=gt_dynamic_mask, conf_map=conf_map, depth_map=depth_map, image_name=cam_info.image_name, uid=id, data_device=args.data_device) def cameraList_from_camInfos(cam_infos, resolution_scale, args): camera_list = [] for id, c in enumerate(cam_infos): camera_list.append(loadCam(args, id, c, resolution_scale)) return camera_list def camera_to_JSON(id, camera : Camera): Rt = np.zeros((4, 4)) Rt[:3, :3] = camera.R.transpose() Rt[:3, 3] = camera.T Rt[3, 3] = 1.0 W2C = np.linalg.inv(Rt) pos = W2C[:3, 3] rot = W2C[:3, :3] serializable_array_2d = [x.tolist() for x in rot] camera_entry = { 'id' : id, 'img_name' : camera.image_name, 'width' : camera.width, 'height' : camera.height, 'position': pos.tolist(), 'rotation': serializable_array_2d, 'fy' : fov2focal(camera.FovY, camera.height), 'fx' : fov2focal(camera.FovX, camera.width) } return camera_entry def transform_poses_pca(poses): """Transforms poses so principal components lie on XYZ axes. Args: poses: a (N, 3, 4) array containing the cameras' camera to world transforms. Returns: A tuple (poses, transform), with the transformed poses and the applied camera_to_world transforms. """ t = poses[:, :3, 3] t_mean = t.mean(axis=0) t = t - t_mean eigval, eigvec = np.linalg.eig(t.T @ t) # Sort eigenvectors in order of largest to smallest eigenvalue. inds = np.argsort(eigval)[::-1] eigvec = eigvec[:, inds] rot = eigvec.T if np.linalg.det(rot) < 0: rot = np.diag(np.array([1, 1, -1])) @ rot transform = np.concatenate([rot, rot @ -t_mean[:, None]], -1) poses_recentered = unpad_poses(transform @ pad_poses(poses)) transform = np.concatenate([transform, np.eye(4)[3:]], axis=0) # Flip coordinate system if z component of y-axis is negative if poses_recentered.mean(axis=0)[2, 1] < 0: poses_recentered = np.diag(np.array([1, -1, -1])) @ poses_recentered transform = np.diag(np.array([1, -1, -1, 1])) @ transform # Just make sure it's it in the [-1, 1]^3 cube scale_factor = 1. / np.max(np.abs(poses_recentered[:, :3, 3])) poses_recentered[:, :3, 3] *= scale_factor transform = np.diag(np.array([scale_factor] * 3 + [1])) @ transform return poses_recentered, transform def generate_interpolated_path(poses, n_interp, spline_degree=5, smoothness=.03, rot_weight=.1): """Creates a smooth spline path between input keyframe camera poses. Spline is calculated with poses in format (position, lookat-point, up-point). Args: poses: (n, 3, 4) array of input pose keyframes. n_interp: returned path will have n_interp * (n - 1) total poses. spline_degree: polynomial degree of B-spline. smoothness: parameter for spline smoothing, 0 forces exact interpolation. rot_weight: relative weighting of rotation/translation in spline solve. Returns: Array of new camera poses with shape (n_interp * (n - 1), 3, 4). """ def poses_to_points(poses, dist): """Converts from pose matrices to (position, lookat, up) format.""" pos = poses[:, :3, -1] lookat = poses[:, :3, -1] - dist * poses[:, :3, 2] up = poses[:, :3, -1] + dist * poses[:, :3, 1] return np.stack([pos, lookat, up], 1) def points_to_poses(points): """Converts from (position, lookat, up) format to pose matrices.""" return np.array([viewmatrix(p - l, u - p, p) for p, l, u in points]) def interp(points, n, k, s): """Runs multidimensional B-spline interpolation on the input points.""" sh = points.shape pts = np.reshape(points, (sh[0], -1)) k = min(k, sh[0] - 1) tck, _ = scipy.interpolate.splprep(pts.T, k=k, s=s) u = np.linspace(0, 1, n, endpoint=False) new_points = np.array(scipy.interpolate.splev(u, tck)) new_points = np.reshape(new_points.T, (n, sh[1], sh[2])) return new_points ### Additional operation # inter_poses = [] # for pose in poses: # tmp_pose = np.eye(4) # tmp_pose[:3] = np.concatenate([pose.R.T, pose.T[:, None]], 1) # tmp_pose = np.linalg.inv(tmp_pose) # tmp_pose[:, 1:3] *= -1 # inter_poses.append(tmp_pose) # inter_poses = np.stack(inter_poses, 0) # poses, transform = transform_poses_pca(inter_poses) points = poses_to_points(poses, dist=rot_weight) new_points = interp(points, n_interp * (points.shape[0] - 1), k=spline_degree, s=smoothness) return points_to_poses(new_points) def viewmatrix(lookdir, up, position): """Construct lookat view matrix.""" vec2 = normalize(lookdir) vec0 = normalize(np.cross(up, vec2)) vec1 = normalize(np.cross(vec2, vec0)) m = np.stack([vec0, vec1, vec2, position], axis=1) return m def normalize(x): """Normalization helper function.""" return x / np.linalg.norm(x) def pad_poses(p): """Pad [..., 3, 4] pose matrices with a homogeneous bottom row [0,0,0,1].""" bottom = np.broadcast_to([0, 0, 0, 1.], p[..., :1, :4].shape) return np.concatenate([p[..., :3, :4], bottom], axis=-2) def unpad_poses(p): """Remove the homogeneous bottom row from [..., 4, 4] pose matrices.""" return p[..., :3, :4] def visualizer(camera_poses, colors, save_path="/mnt/data/1.png"): fig = plt.figure() ax = fig.add_subplot(111, projection="3d") for pose, color in zip(camera_poses, colors): rotation = pose[:3, :3] translation = pose[:3, 3] # Corrected to use 3D translation component camera_positions = np.einsum( "...ij,...j->...i", np.linalg.inv(rotation), -translation ) ax.scatter( camera_positions[0], camera_positions[1], camera_positions[2], c=color, marker="o", ) ax.set_xlabel("X") ax.set_ylabel("Y") ax.set_zlabel("Z") ax.set_title("Camera Poses") plt.savefig(save_path) plt.close() return save_path