Spaces:

xymeow7
/

quasi-physical-sims

Sleeping

File size: 20,244 Bytes

710e818

import torch
import torch.nn.functional as F
import cv2 as cv
import numpy as np
import os
from glob import glob
from icecream import ic
from scipy.spatial.transform import Rotation as Rot
from scipy.spatial.transform import Slerp


# This function is borrowed from IDR: https://github.com/lioryariv/idr
def load_K_Rt_from_P(filename, P=None):
    if P is None:
        lines = open(filename).read().splitlines()
        if len(lines) == 4:
            lines = lines[1:]
        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
        P = np.asarray(lines).astype(np.float32).squeeze()

    out = cv.decomposeProjectionMatrix(P)
    K = out[0]
    R = out[1]
    t = out[2]

    K = K / K[2, 2]
    intrinsics = np.eye(4)
    intrinsics[:3, :3] = K

    pose = np.eye(4, dtype=np.float32)
    pose[:3, :3] = R.transpose()
    pose[:3, 3] = (t[:3] / t[3])[:, 0]

    return intrinsics, pose

def filter_iamges_via_pixel_values(data_dir):
    images_lis = sorted(glob(os.path.join(data_dir, 'image/*.png'))) ## images lis ##
    n_images = len(images_lis)
    images_np = np.stack([cv.imread(im_name) for im_name in images_lis]) / 255.0
    print(f"images_np: {images_np.shape}")
    # nn_frames x res x res x 3 #
    images_np = 1. - images_np
    has_density_values = (np.sum(images_np, axis=-1) > 0.7).astype(np.float32)
    has_density_values = np.sum(np.sum(has_density_values, axis=-1), axis=-1)
    tot_res_nns = float(images_np.shape[1] * images_np.shape[2])
    has_density_ratio = has_density_values / tot_res_nns ### has density ratio and ratio # 
    print(f"has_density_values: {has_density_values.shape}")
    paried_has_density_ratio_list = [(i_fr, has_density_ratio[i_fr].item()) for i_fr in range(has_density_ratio.shape[0])]
    paried_has_density_ratio_list = sorted(paried_has_density_ratio_list, key=lambda ii: ii[1], reverse=True)
    mid_rnk_value = len(paried_has_density_ratio_list) // 4
    print(f"mid value of the density ratio")
    print(paried_has_density_ratio_list[mid_rnk_value])
    iamge_idx = paried_has_density_ratio_list[mid_rnk_value][0]
    print(f"iamge idx: {images_lis[iamge_idx]}")
    print(paried_has_density_ratio_list[:mid_rnk_value])
    tot_selected_img_idx_list = [ii[0] for ii in paried_has_density_ratio_list[:mid_rnk_value]]
    tot_selected_img_idx_list =sorted(tot_selected_img_idx_list)
    print(len(tot_selected_img_idx_list))
    # print(tot_selected_img_idx_list[54])
    print(tot_selected_img_idx_list)
    
    

class Dataset:
    def __init__(self, conf, time_idx, mode='train'):
        super(Dataset, self).__init__()
        print('Load data: Begin')
        self.device = torch.device('cuda')
        self.conf = conf
        
        self.selected_img_idxes_list = [0, 1, 5, 6, 7, 8, 9, 13, 14, 15, 35, 36, 42, 43, 44, 48, 49, 50, 51, 55, 56, 57, 61, 62, 63, 69, 84, 90, 91, 92, 96, 97]
        # self.selected_img_idxes_list = [0, 1, 5, 6, 7, 8, 9, 12, 13, 14, 15, 20, 21, 22, 23, 26, 27, 28, 29, 35, 36, 37, 40, 41, 70, 71, 79, 82, 83, 84, 85, 92, 93, 96, 97, 98, 99, 105, 106, 107, 110, 111, 112, 113, 118, 119, 120, 121, 124, 125, 133, 134, 135, 139, 174, 175, 176, 177, 180, 188, 189, 190, 191, 194, 195]
        
        self.selected_img_idxes_list = [0, 1, 6, 7, 8, 9, 12, 13, 14, 15, 20, 21, 22, 23, 26, 27, 36, 40, 41, 70, 71, 78, 82, 83, 84, 85, 90, 91, 92, 93, 96, 97]
        
        self.selected_img_idxes_list = [0, 1, 6, 7, 8, 9, 12, 13, 14, 15, 20, 21, 22, 23, 26, 27, 36, 40, 41, 70, 71, 78, 82, 83, 84, 85, 90, 91, 92, 93, 96, 97, 98, 99, 104, 105, 106, 107, 110, 111, 112, 113, 118, 119, 120, 121, 124, 125, 134, 135, 139, 174, 175, 176, 177, 180, 181, 182, 183, 188, 189, 190, 191, 194, 195]
        # selected img idxes list #
        self.selected_img_idxes_list = [0, 1, 6, 7, 8, 9, 12, 13, 14, 20, 21, 22, 23, 26, 27, 70, 78, 83, 84, 85, 91, 92, 93, 96, 97, 98, 99, 105, 106, 107, 110, 111, 112, 113, 119, 120, 121, 124, 125, 175, 176, 181, 182, 188, 189, 190, 191, 194, 195]
        # or the timestep to the dataset instance ## # selected img idxes list #
        self.selected_img_idxes = np.array(self.selected_img_idxes_list).astype(np.int32)
        
        
        
       

        self.data_dir = conf.get_string('data_dir')
        
        self.data_dir = os.path.join(self.data_dir, f"{time_idx}") # the time_idx #
        
        self.render_cameras_name = conf.get_string('render_cameras_name')
        self.object_cameras_name = conf.get_string('object_cameras_name')

        ## camera outside sphere ## 
        self.camera_outside_sphere = conf.get_bool('camera_outside_sphere', default=True)
        self.scale_mat_scale = conf.get_float('scale_mat_scale', default=1.1)

        camera_dict = np.load(os.path.join(self.data_dir, self.render_cameras_name))
        # camera_dict = np.load("/home/xueyi/diffsim/NeuS/public_data/dtu_scan24/cameras_sphere.npz")
        self.camera_dict = camera_dict # rendr camera dict #
        # render camera dict # # number of pixels in the views -> very thin geometry is not useful 
        self.images_lis = sorted(glob(os.path.join(self.data_dir, 'image/*.png')))
        
        # iamges_lis # and the images_lis and the images_lis #
        # self.images_lis = self.images_lis[:1] # totoal views and poses of the camera; # and select cameras for rendering #
        
        self.n_images = len(self.images_lis)
        
        if mode == 'train_from_model_rules':
            self.images_np = cv.imread(self.images_lis[0]) / 256.0
            print(self.images_np.shape)
            self.images_np = np.reshape(self.images_np, (1, self.images_np.shape[0], self.images_np.shape[1], self.images_np.shape[2]))
            self.images_np = [self.images_np for _ in range(len(self.images_lis))]
            self.images_np = np.concatenate(self.images_np, axis=0)
        else:
            presaved_imags_npy_fn = os.path.join(self.data_dir, "processed_images.npy")
            if not os.path.exists(presaved_imags_npy_fn):
                self.images_np = []
                for i_im_idx, im_name in enumerate(self.images_lis):
                    print(f"loading {i_im_idx} / {len(self.images_lis)}")
                    cur_im = cv.imread(im_name) # for im_name in self.images_lis
                    self.images_np.append(cur_im)
                self.images_np = np.stack(self.images_np) / 256.0
                np.save(presaved_imags_npy_fn, self.images_np)
            else:
                print(f"Loading from {presaved_imags_npy_fn}")
                self.images_np = np.load(presaved_imags_npy_fn, allow_pickle=True)

        # self.images_np = np.stack([cv.imread(im_name) for im_name in self.images_lis]) / 256.0
        
        
        # self.selected_img_idxes_list = list(range(self.images_np.shape[0]))
        # self.selected_img_idxes = np.array(self.selected_img_idxes_list).astype(np.int32)
        
        # get 
        self.images_np = self.images_np[self.selected_img_idxes] ## get selected iamges_np #
        
        ### if we deal with the backgound carefully ### ### get 
        # self.images_np = np.stack([cv.imread(im_name) for im_name in self.images_lis]) / 255.0
        # self.images_np = self.images_np[self.selected_img_idxes]
        self.images_np = 1. - self.images_np ### 
        
        
        
        self.masks_lis = sorted(glob(os.path.join(self.data_dir, 'mask/*.png')))
        
        if mode == 'train_from_model_rules':
            self.masks_np = cv.imread(self.masks_lis[0]) / 256.0
            print("masks shape:", self.masks_np.shape)
            self.masks_np = np.reshape(self.masks_np, (1, self.masks_np.shape[0], self.masks_np.shape[1], self.masks_np.shape[2])) # .repeat(len(self.masks_lis), 1, 1)
            self.masks_np = [self.masks_np for _ in range(len(self.masks_lis))]
            self.masks_np = np.concatenate(self.masks_np, axis=0)
        else:
            presaved_masks_npy_fn = os.path.join(self.data_dir, "processed_masks.npy")
            # self.masks_lis = self.masks_lis[:1]
            
            if not os.path.exists(presaved_masks_npy_fn):
                try:
                    self.masks_np = np.stack([cv.imread(im_name) for im_name in self.masks_lis]) / 256.0
                    self.masks_np = self.masks_np[self.selected_img_idxes]
                except:
                    self.masks_np = self.images_np.copy()
                np.save(presaved_masks_npy_fn, self.masks_np)
            else:
                print(f"Loading from {presaved_masks_npy_fn}")
                self.masks_np = np.load(presaved_masks_npy_fn, allow_pickle=True)


        
        


        # world_mat is a projection matrix from world to image
        self.world_mats_np = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in range(self.n_images)]

        self.scale_mats_np = []

        # scale_mat: used for coordinate normalization, we assume the scene to render is inside a unit sphere at origin.
        self.scale_mats_np = [camera_dict['scale_mat_%d' % idx].astype(np.float32) for idx in range(self.n_images)]

        self.intrinsics_all = []
        self.pose_all = []

        # for idx, (scale_mat, world_mat) in enumerate(zip(self.scale_mats_np, self.world_mats_np)):
        for idx  in self.selected_img_idxes_list:
            scale_mat = self.scale_mats_np[idx]
            world_mat = self.world_mats_np[idx]
            
            if "hand" in self.data_dir:
                intrinsics = np.eye(4)
                fov = 512. / 2. # * 2
                res = 512.
                intrinsics[:3, :3] = np.array([
                    [fov, 0, 0.5* res], # res #
                    [0, fov, 0.5* res], # res #
                    [0, 0, 1]
                ], dtype=np.float32)
                pose = camera_dict['camera_mat_%d' % idx].astype(np.float32)
            else:
                P = world_mat @ scale_mat
                P = P[:3, :4]
                intrinsics, pose = load_K_Rt_from_P(None, P)
                
            self.intrinsics_all.append(torch.from_numpy(intrinsics).float())
            self.pose_all.append(torch.from_numpy(pose).float())

        ### images, masks, 
        self.images = torch.from_numpy(self.images_np.astype(np.float32)).cpu()  # [n_images, H, W, 3] #
        self.masks  = torch.from_numpy(self.masks_np.astype(np.float32)).cpu()   # [n_images, H, W, 3] #
        self.intrinsics_all = torch.stack(self.intrinsics_all).to(self.device)   # [n_images, 4, 4] # optimize sdf field # rigid model hand
        self.intrinsics_all_inv = torch.inverse(self.intrinsics_all)  # [n_images, 4, 4]
        self.focal = self.intrinsics_all[0][0, 0]
        self.pose_all = torch.stack(self.pose_all).to(self.device)  # [n_images, 4, 4]
        self.H, self.W = self.images.shape[1], self.images.shape[2]
        self.image_pixels = self.H * self.W

        object_bbox_min = np.array([-1.01, -1.01, -1.01, 1.0])
        object_bbox_max = np.array([ 1.01,  1.01,  1.01, 1.0])
        # Object scale mat: region of interest to **extract mesh**
        object_scale_mat = np.load(os.path.join(self.data_dir, self.object_cameras_name))['scale_mat_0']
        object_bbox_min = np.linalg.inv(self.scale_mats_np[0]) @ object_scale_mat @ object_bbox_min[:, None]
        object_bbox_max = np.linalg.inv(self.scale_mats_np[0]) @ object_scale_mat @ object_bbox_max[:, None]
        self.object_bbox_min = object_bbox_min[:3, 0]
        self.object_bbox_max = object_bbox_max[:3, 0]
        
        self.n_images = self.images.size(0)

        print('Load data: End')
        
    def get_rays(H, W, K, c2w, inverse_y, flip_x, flip_y, mode='center'):
        i, j = torch.meshgrid( # meshgrid #
            torch.linspace(0, W-1, W, device=c2w.device),
            torch.linspace(0, H-1, H, device=c2w.device))
        i = i.t().float()
        j = j.t().float()
        if mode == 'lefttop':
            pass
        elif mode == 'center':
            i, j = i+0.5, j+0.5
        elif mode == 'random':
            i = i+torch.rand_like(i)
            j = j+torch.rand_like(j)
        else:
            raise NotImplementedError

        if flip_x:
            i = i.flip((1,))
        if flip_y:
            j = j.flip((0,))
        if inverse_y:
            dirs = torch.stack([(i-K[0][2])/K[0][0], (j-K[1][2])/K[1][1], torch.ones_like(i)], -1)
        else:
            dirs = torch.stack([(i-K[0][2])/K[0][0], -(j-K[1][2])/K[1][1], -torch.ones_like(i)], -1)
        # Rotate ray directions from camera frame to the world frame
        rays_d = torch.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1)  # dot product, equals to: [c2w.dot(dir) for dir in dirs]
        # Translate camera frame's origin to the world frame. It is the origin of all rays.
        rays_o = c2w[:3,3].expand(rays_d.shape)
        return rays_o, rays_d

    def gen_rays_at(self, img_idx, resolution_level=1):
        """
        Generate rays at world space from one camera.
        """
        l = resolution_level
        tx = torch.linspace(0, self.W - 1, self.W // l)
        ty = torch.linspace(0, self.H - 1, self.H // l)
        pixels_x, pixels_y = torch.meshgrid(tx, ty)
        
        ##### previous method #####
        # p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1) # W, H, 3
        # # p = torch.stack([pixels_x, pixels_y, -1. * torch.ones_like(pixels_y)], dim=-1) # W, H, 3
        # p = torch.matmul(self.intrinsics_all_inv[img_idx, None, None, :3, :3], p[:, :, :, None]).squeeze()  # W, H, 3
        # rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True)  # W, H, 3
        # rays_v = torch.matmul(self.pose_all[img_idx, None, None, :3, :3], rays_v[:, :, :, None]).squeeze()  # W, H, 3
        # rays_o = self.pose_all[img_idx, None, None, :3, 3].expand(rays_v.shape)  # W, H, 3
        ##### previous method #####
        
        fov = 512.; res = 512.
        K = np.array([
            [fov, 0, 0.5* res],
            [0, fov, 0.5* res],
            [0, 0, 1]
        ], dtype=np.float32)
        K = torch.from_numpy(K).float().cuda()
        
        
        # ### `center` mode ### #
        c2w = self.pose_all[img_idx]
        pixels_x, pixels_y = pixels_x+0.5, pixels_y+0.5
        
        dirs = torch.stack([(pixels_x-K[0][2])/K[0][0], -(pixels_y-K[1][2])/K[1][1], -torch.ones_like(pixels_x)], -1)
        rays_v = torch.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1) 
        rays_o = c2w[:3,3].expand(rays_v.shape)
        # dirs = torch.stack([(i-K[0][2])/K[0][0], -(j-K[1][2])/K[1][1], -torch.ones_like(i)], -1)
        
        # p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1) # W, H, 3
        # # p = torch.stack([pixels_x, pixels_y, -1. * torch.ones_like(pixels_y)], dim=-1) # W, H, 3
        # p = torch.matmul(self.intrinsics_all_inv[img_idx, None, None, :3, :3], p[:, :, :, None]).squeeze()  # W, H, 3
        # rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True)  # W, H, 3
        # rays_v = torch.matmul(self.pose_all[img_idx, None, None, :3, :3], rays_v[:, :, :, None]).squeeze()  # W, H, 3
        # rays_o = self.pose_all[img_idx, None, None, :3, 3].expand(rays_v.shape)  # W, H, 3
        return rays_o.transpose(0, 1), rays_v.transpose(0, 1)

    def gen_random_rays_at(self, img_idx, batch_size):
        """
        Generate random rays at world space from one camera.
        """
        img_idx = img_idx.cpu()
        pixels_x = torch.randint(low=0, high=self.W, size=[batch_size]).cpu()
        pixels_y = torch.randint(low=0, high=self.H, size=[batch_size]).cpu()
        
        # print(self.images.device, img_idx.device, pixels_y.device)
        color = self.images[img_idx][(pixels_y, pixels_x)]    # batch_size, 3
        
        mask = self.masks[img_idx][(pixels_y, pixels_x)]      # batch_size, 3
        
        
        ##### previous method #####
        # p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1).float()  # batch_size, 3
        # # p = torch.stack([pixels_x, pixels_y, -1. * torch.ones_like(pixels_y)], dim=-1).float()  # batch_size, 3
        # p = torch.matmul(self.intrinsics_all_inv[img_idx, None, :3, :3], p[:, :, None]).squeeze() # batch_size, 3
        # rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True)    # batch_size, 3
        # rays_v = torch.matmul(self.pose_all[img_idx, None, :3, :3], rays_v[:, :, None]).squeeze()  # batch_size, 3
        # rays_o = self.pose_all[img_idx, None, :3, 3].expand(rays_v.shape) # batch_size, 3
        ##### previous method #####
        
        fov = 512.; res = 512.
        K = np.array([
            [fov, 0, 0.5* res],
            [0, fov, 0.5* res],
            [0, 0, 1]
        ], dtype=np.float32)
        K = torch.from_numpy(K).float().cuda()
        
        
        # ### `center` mode ### #
        c2w = self.pose_all[img_idx]
        
        pixels_x = pixels_x.cuda()
        pixels_y = pixels_y.cuda()
        pixels_x, pixels_y = pixels_x+0.5, pixels_y+0.5
        
        dirs = torch.stack([(pixels_x-K[0][2])/K[0][0], -(pixels_y-K[1][2])/K[1][1], -torch.ones_like(pixels_x)], -1)
        rays_v = torch.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1) 
        rays_o = c2w[:3,3].expand(rays_v.shape)
        
        
        return torch.cat([rays_o.cpu(), rays_v.cpu(), color, mask[:, :1]], dim=-1).cuda()    # batch_size, 10

    def gen_rays_between(self, idx_0, idx_1, ratio, resolution_level=1):
        """
        Interpolate pose between two cameras.
        """
        l = resolution_level
        tx = torch.linspace(0, self.W - 1, self.W // l)
        ty = torch.linspace(0, self.H - 1, self.H // l)
        pixels_x, pixels_y = torch.meshgrid(tx, ty)
        p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1)  # W, H, 3
        p = torch.matmul(self.intrinsics_all_inv[0, None, None, :3, :3], p[:, :, :, None]).squeeze()  # W, H, 3
        rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True)  # W, H, 3
        trans = self.pose_all[idx_0, :3, 3] * (1.0 - ratio) + self.pose_all[idx_1, :3, 3] * ratio
        pose_0 = self.pose_all[idx_0].detach().cpu().numpy()
        pose_1 = self.pose_all[idx_1].detach().cpu().numpy()
        pose_0 = np.linalg.inv(pose_0)
        pose_1 = np.linalg.inv(pose_1)
        rot_0 = pose_0[:3, :3]
        rot_1 = pose_1[:3, :3]
        rots = Rot.from_matrix(np.stack([rot_0, rot_1]))
        key_times = [0, 1]
        slerp = Slerp(key_times, rots)
        rot = slerp(ratio)
        pose = np.diag([1.0, 1.0, 1.0, 1.0])
        pose = pose.astype(np.float32)
        pose[:3, :3] = rot.as_matrix()
        pose[:3, 3] = ((1.0 - ratio) * pose_0 + ratio * pose_1)[:3, 3]
        pose = np.linalg.inv(pose)
        rot = torch.from_numpy(pose[:3, :3]).cuda()
        trans = torch.from_numpy(pose[:3, 3]).cuda()
        rays_v = torch.matmul(rot[None, None, :3, :3], rays_v[:, :, :, None]).squeeze()  # W, H, 3
        rays_o = trans[None, None, :3].expand(rays_v.shape)  # W, H, 3
        return rays_o.transpose(0, 1), rays_v.transpose(0, 1)

    def near_far_from_sphere(self, rays_o, rays_d):
        a = torch.sum(rays_d**2, dim=-1, keepdim=True)
        b = 2.0 * torch.sum(rays_o * rays_d, dim=-1, keepdim=True)
        mid = 0.5 * (-b) / a
        near = mid - 1.0
        far = mid + 1.0
        return near, far

    def image_at(self, idx, resolution_level):
        if self.selected_img_idxes_list is not None:
            img = cv.imread(self.images_lis[self.selected_img_idxes_list[idx]])
        else:
            img = cv.imread(self.images_lis[idx])
        return (cv.resize(img, (self.W // resolution_level, self.H // resolution_level))).clip(0, 255)


if __name__=='__main__':
    data_dir = "/data/datasets/genn/diffsim/diffredmax/save_res/goal_optimize_model_hand_sphere_test_obj_type_active_nfr_10_view_divide_0.5_n_views_7_three_planes_False_recon_dvgo_new_Nposes_7_routine_2"
    data_dir = "/data/datasets/genn/diffsim/neus/public_data/hand_test"
    data_dir = "/data2/datasets/diffsim/neus/public_data/hand_test_routine_2"
    data_dir = "/data2/datasets/diffsim/neus/public_data/hand_test_routine_2_light_color"
    filter_iamges_via_pixel_values(data_dir=data_dir)