|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from scene.cameras import Camera |
|
import numpy as np |
|
from utils.general_utils import PILtoTorch |
|
from utils.graphics_utils import fov2focal |
|
import torch |
|
import scipy |
|
import matplotlib.pyplot as plt |
|
|
|
WARNED = False |
|
|
|
def loadCam(args, id, cam_info, resolution_scale): |
|
|
|
orig_w, orig_h = cam_info.image.size |
|
if args.resolution in [1, 2, 4, 8]: |
|
resolution = round(orig_w/(resolution_scale * args.resolution)), round(orig_h/(resolution_scale * args.resolution)) |
|
else: |
|
if args.resolution == -1: |
|
if orig_w > 1600: |
|
global WARNED |
|
if not WARNED: |
|
print("[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n " |
|
"If this is not desired, please explicitly specify '--resolution/-r' as 1") |
|
WARNED = True |
|
global_down = orig_w / 1600 |
|
else: |
|
global_down = 1 |
|
else: |
|
global_down = orig_w / args.resolution |
|
|
|
scale = float(global_down) * float(resolution_scale) |
|
resolution = (int(orig_w / scale), int(orig_h / scale)) |
|
|
|
resized_image_rgb = PILtoTorch(cam_info.image, resolution) |
|
|
|
gt_image = resized_image_rgb[:3, ...] |
|
loaded_mask = None |
|
|
|
if resized_image_rgb.shape[1] == 4: |
|
loaded_mask = resized_image_rgb[3:4, ...] |
|
|
|
if cam_info.conf_map is not None: |
|
conf_map = torch.tensor(cam_info.conf_map) |
|
else: |
|
conf_map = None |
|
|
|
if cam_info.depth_map is not None: |
|
depth_map = torch.tensor(cam_info.depth_map) |
|
else: |
|
depth_map = None |
|
|
|
|
|
|
|
if cam_info.dynamic_mask is not None: |
|
dynamic_mask = torch.tensor(cam_info.dynamic_mask) |
|
else: |
|
dynamic_mask = None |
|
|
|
if cam_info.enlarged_dynamic_mask is not None: |
|
enlaenlarged_dynamic_mask = torch.tensor(cam_info.enlarged_dynamic_mask) |
|
else: |
|
enlaenlarged_dynamic_mask = None |
|
|
|
if cam_info.dyna_avg_map is not None: |
|
dyna_avg_map = torch.tensor(cam_info.dyna_avg_map) |
|
else: |
|
dyna_avg_map = None |
|
|
|
if cam_info.dyna_max_map is not None: |
|
dyna_max_map = torch.tensor(cam_info.dyna_max_map) |
|
else: |
|
dyna_max_map = None |
|
|
|
if cam_info.gt_dynamic_mask is not None: |
|
gt_dynamic_mask = torch.tensor(cam_info.gt_dynamic_mask) |
|
else: |
|
gt_dynamic_mask = None |
|
|
|
|
|
|
|
|
|
return Camera(colmap_id=cam_info.uid, intr=cam_info.intr, |
|
R=cam_info.R, T=cam_info.T, |
|
original_pose=cam_info.original_pose, |
|
FoVx=cam_info.FovX, FoVy=cam_info.FovY, |
|
image=gt_image, gt_alpha_mask=loaded_mask, |
|
dynamic_mask=dynamic_mask, enlarged_dynamic_mask=enlaenlarged_dynamic_mask, |
|
dyna_avg_map=dyna_avg_map, dyna_max_map=dyna_max_map, |
|
gt_dynamic_mask=gt_dynamic_mask, |
|
conf_map=conf_map, depth_map=depth_map, |
|
image_name=cam_info.image_name, uid=id, data_device=args.data_device) |
|
|
|
|
|
def cameraList_from_camInfos(cam_infos, resolution_scale, args): |
|
camera_list = [] |
|
|
|
for id, c in enumerate(cam_infos): |
|
camera_list.append(loadCam(args, id, c, resolution_scale)) |
|
|
|
return camera_list |
|
|
|
|
|
def camera_to_JSON(id, camera : Camera): |
|
Rt = np.zeros((4, 4)) |
|
Rt[:3, :3] = camera.R.transpose() |
|
Rt[:3, 3] = camera.T |
|
Rt[3, 3] = 1.0 |
|
|
|
W2C = np.linalg.inv(Rt) |
|
pos = W2C[:3, 3] |
|
rot = W2C[:3, :3] |
|
serializable_array_2d = [x.tolist() for x in rot] |
|
camera_entry = { |
|
'id' : id, |
|
'img_name' : camera.image_name, |
|
'width' : camera.width, |
|
'height' : camera.height, |
|
'position': pos.tolist(), |
|
'rotation': serializable_array_2d, |
|
'fy' : fov2focal(camera.FovY, camera.height), |
|
'fx' : fov2focal(camera.FovX, camera.width) |
|
} |
|
return camera_entry |
|
|
|
|
|
def transform_poses_pca(poses): |
|
"""Transforms poses so principal components lie on XYZ axes. |
|
|
|
Args: |
|
poses: a (N, 3, 4) array containing the cameras' camera to world transforms. |
|
|
|
Returns: |
|
A tuple (poses, transform), with the transformed poses and the applied |
|
camera_to_world transforms. |
|
""" |
|
t = poses[:, :3, 3] |
|
t_mean = t.mean(axis=0) |
|
t = t - t_mean |
|
|
|
eigval, eigvec = np.linalg.eig(t.T @ t) |
|
|
|
inds = np.argsort(eigval)[::-1] |
|
eigvec = eigvec[:, inds] |
|
rot = eigvec.T |
|
if np.linalg.det(rot) < 0: |
|
rot = np.diag(np.array([1, 1, -1])) @ rot |
|
|
|
transform = np.concatenate([rot, rot @ -t_mean[:, None]], -1) |
|
poses_recentered = unpad_poses(transform @ pad_poses(poses)) |
|
transform = np.concatenate([transform, np.eye(4)[3:]], axis=0) |
|
|
|
|
|
if poses_recentered.mean(axis=0)[2, 1] < 0: |
|
poses_recentered = np.diag(np.array([1, -1, -1])) @ poses_recentered |
|
transform = np.diag(np.array([1, -1, -1, 1])) @ transform |
|
|
|
|
|
scale_factor = 1. / np.max(np.abs(poses_recentered[:, :3, 3])) |
|
poses_recentered[:, :3, 3] *= scale_factor |
|
transform = np.diag(np.array([scale_factor] * 3 + [1])) @ transform |
|
|
|
return poses_recentered, transform |
|
|
|
def generate_interpolated_path(poses, n_interp, spline_degree=5, |
|
smoothness=.03, rot_weight=.1): |
|
"""Creates a smooth spline path between input keyframe camera poses. |
|
|
|
Spline is calculated with poses in format (position, lookat-point, up-point). |
|
|
|
Args: |
|
poses: (n, 3, 4) array of input pose keyframes. |
|
n_interp: returned path will have n_interp * (n - 1) total poses. |
|
spline_degree: polynomial degree of B-spline. |
|
smoothness: parameter for spline smoothing, 0 forces exact interpolation. |
|
rot_weight: relative weighting of rotation/translation in spline solve. |
|
|
|
Returns: |
|
Array of new camera poses with shape (n_interp * (n - 1), 3, 4). |
|
""" |
|
|
|
def poses_to_points(poses, dist): |
|
"""Converts from pose matrices to (position, lookat, up) format.""" |
|
pos = poses[:, :3, -1] |
|
lookat = poses[:, :3, -1] - dist * poses[:, :3, 2] |
|
up = poses[:, :3, -1] + dist * poses[:, :3, 1] |
|
return np.stack([pos, lookat, up], 1) |
|
|
|
def points_to_poses(points): |
|
"""Converts from (position, lookat, up) format to pose matrices.""" |
|
return np.array([viewmatrix(p - l, u - p, p) for p, l, u in points]) |
|
|
|
def interp(points, n, k, s): |
|
"""Runs multidimensional B-spline interpolation on the input points.""" |
|
sh = points.shape |
|
pts = np.reshape(points, (sh[0], -1)) |
|
k = min(k, sh[0] - 1) |
|
tck, _ = scipy.interpolate.splprep(pts.T, k=k, s=s) |
|
u = np.linspace(0, 1, n, endpoint=False) |
|
new_points = np.array(scipy.interpolate.splev(u, tck)) |
|
new_points = np.reshape(new_points.T, (n, sh[1], sh[2])) |
|
return new_points |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
points = poses_to_points(poses, dist=rot_weight) |
|
new_points = interp(points, |
|
n_interp * (points.shape[0] - 1), |
|
k=spline_degree, |
|
s=smoothness) |
|
return points_to_poses(new_points) |
|
|
|
|
|
def viewmatrix(lookdir, up, position): |
|
"""Construct lookat view matrix.""" |
|
vec2 = normalize(lookdir) |
|
vec0 = normalize(np.cross(up, vec2)) |
|
vec1 = normalize(np.cross(vec2, vec0)) |
|
m = np.stack([vec0, vec1, vec2, position], axis=1) |
|
return m |
|
|
|
def normalize(x): |
|
"""Normalization helper function.""" |
|
return x / np.linalg.norm(x) |
|
|
|
def pad_poses(p): |
|
"""Pad [..., 3, 4] pose matrices with a homogeneous bottom row [0,0,0,1].""" |
|
bottom = np.broadcast_to([0, 0, 0, 1.], p[..., :1, :4].shape) |
|
return np.concatenate([p[..., :3, :4], bottom], axis=-2) |
|
|
|
|
|
def unpad_poses(p): |
|
"""Remove the homogeneous bottom row from [..., 4, 4] pose matrices.""" |
|
return p[..., :3, :4] |
|
|
|
|
|
def visualizer(camera_poses, colors, save_path="/mnt/data/1.png"): |
|
fig = plt.figure() |
|
ax = fig.add_subplot(111, projection="3d") |
|
|
|
for pose, color in zip(camera_poses, colors): |
|
rotation = pose[:3, :3] |
|
translation = pose[:3, 3] |
|
camera_positions = np.einsum( |
|
"...ij,...j->...i", np.linalg.inv(rotation), -translation |
|
) |
|
|
|
ax.scatter( |
|
camera_positions[0], |
|
camera_positions[1], |
|
camera_positions[2], |
|
c=color, |
|
marker="o", |
|
) |
|
|
|
ax.set_xlabel("X") |
|
ax.set_ylabel("Y") |
|
ax.set_zlabel("Z") |
|
ax.set_title("Camera Poses") |
|
|
|
plt.savefig(save_path) |
|
plt.close() |
|
|
|
return save_path |