Spaces:

liruiw
/

GenSim

Runtime error

File size: 44,339 Bytes

8fc2b4e

"""Miscellaneous utilities."""

import cv2
import random
import matplotlib
import matplotlib.pyplot as plt
import meshcat
import meshcat.geometry as g
import meshcat.transformations as mtf

import PIL
import yaml
import numpy as np
from transforms3d import euler

import pybullet as p
import kornia
from omegaconf import OmegaConf

import os
import torch
import torchvision


# -----------------------------------------------------------------------------
# HEIGHTMAP UTILS
# -----------------------------------------------------------------------------

def get_heightmap(points, colors, bounds, pixel_size):
    """Get top-down (z-axis) orthographic heightmap image from 3D pointcloud.
  
    Args:
      points: HxWx3 float array of 3D points in world coordinates.
      colors: HxWx3 uint8 array of values in range 0-255 aligned with points.
      bounds: 3x2 float array of values (rows: X,Y,Z; columns: min,max) defining
        region in 3D space to generate heightmap in world coordinates.
      pixel_size: float defining size of each pixel in meters.
  
    Returns:
      heightmap: HxW float array of height (from lower z-bound) in meters.
      colormap: HxWx3 uint8 array of backprojected color aligned with heightmap.
    """
    width = int(np.round((bounds[0, 1] - bounds[0, 0]) / pixel_size))
    height = int(np.round((bounds[1, 1] - bounds[1, 0]) / pixel_size))
    heightmap = np.zeros((height, width), dtype=np.float32)
    colormap = np.zeros((height, width, colors.shape[-1]), dtype=np.uint8)

    # Filter out 3D points that are outside of the predefined bounds.
    ix = (points[Ellipsis, 0] >= bounds[0, 0]) & (points[Ellipsis, 0] < bounds[0, 1])
    iy = (points[Ellipsis, 1] >= bounds[1, 0]) & (points[Ellipsis, 1] < bounds[1, 1])
    iz = (points[Ellipsis, 2] >= bounds[2, 0]) & (points[Ellipsis, 2] < bounds[2, 1])
    valid = ix & iy & iz
    points = points[valid]
    colors = colors[valid]

    # Sort 3D points by z-value, which works with array assignment to simulate
    # z-buffering for rendering the heightmap image.
    iz = np.argsort(points[:, -1])
    points, colors = points[iz], colors[iz]
    px = np.int32(np.floor((points[:, 0] - bounds[0, 0]) / pixel_size))
    py = np.int32(np.floor((points[:, 1] - bounds[1, 0]) / pixel_size))
    px = np.clip(px, 0, width - 1)
    py = np.clip(py, 0, height - 1)
    heightmap[py, px] = points[:, 2] - bounds[2, 0]
    for c in range(colors.shape[-1]):
        colormap[py, px, c] = colors[:, c]
    return heightmap, colormap


def get_pointcloud(depth, intrinsics):
    """Get 3D pointcloud from perspective depth image.
  
    Args:
      depth: HxW float array of perspective depth in meters.
      intrinsics: 3x3 float array of camera intrinsics matrix.
  
    Returns:
      points: HxWx3 float array of 3D points in camera coordinates.
    """
    height, width = depth.shape
    xlin = np.linspace(0, width - 1, width)
    ylin = np.linspace(0, height - 1, height)
    px, py = np.meshgrid(xlin, ylin)
    px = (px - intrinsics[0, 2]) * (depth / intrinsics[0, 0])
    py = (py - intrinsics[1, 2]) * (depth / intrinsics[1, 1])
    points = np.float32([px, py, depth]).transpose(1, 2, 0)
    return points


def transform_pointcloud(points, transform):
    """Apply rigid transformation to 3D pointcloud.
  
    Args:
      points: HxWx3 float array of 3D points in camera coordinates.
      transform: 4x4 float array representing a rigid transformation matrix.
  
    Returns:
      points: HxWx3 float array of transformed 3D points.
    """
    padding = ((0, 0), (0, 0), (0, 1))
    homogen_points = np.pad(points.copy(), padding,
                            'constant', constant_values=1)
    for i in range(3):
        points[Ellipsis, i] = np.sum(transform[i, :] * homogen_points, axis=-1)
    return points


def reconstruct_heightmaps(color, depth, configs, bounds, pixel_size):
    """Reconstruct top-down heightmap views from multiple 3D pointclouds."""
    heightmaps, colormaps = [], []
    for color, depth, config in zip(color, depth, configs):
        intrinsics = np.array(config['intrinsics']).reshape(3, 3)
        xyz = get_pointcloud(depth, intrinsics)
        position = np.array(config['position']).reshape(3, 1)
        rotation = p.getMatrixFromQuaternion(config['rotation'])
        rotation = np.array(rotation).reshape(3, 3)
        transform = np.eye(4)
        transform[:3, :] = np.hstack((rotation, position))
        xyz = transform_pointcloud(xyz, transform)
        heightmap, colormap = get_heightmap(xyz, color, bounds, pixel_size)
        heightmaps.append(heightmap)
        colormaps.append(colormap)
    return heightmaps, colormaps


def pix_to_xyz(pixel, height, bounds, pixel_size, skip_height=False):
    """Convert from pixel location on heightmap to 3D position."""
    u, v = pixel
    x = bounds[0, 0] + v * pixel_size
    y = bounds[1, 0] + u * pixel_size
    if not skip_height:
        z = bounds[2, 0] + height[u, v]
    else:
        z = 0.0
    return (x, y, z)


def xyz_to_pix(position, bounds, pixel_size):
    """Convert from 3D position to pixel location on heightmap."""
    u = int(np.round((position[1] - bounds[1, 0]) / pixel_size))
    v = int(np.round((position[0] - bounds[0, 0]) / pixel_size))
    return (u, v)


def unproject_vectorized(uv_coordinates, depth_values,
                         intrinsic,
                         distortion):
    """Vectorized version of unproject(), for N points.
  
    Args:
      uv_coordinates: pixel coordinates to unproject of shape (n, 2).
      depth_values: depth values corresponding index-wise to the uv_coordinates of
        shape (n).
      intrinsic: array of shape (3, 3). This is typically the return value of
        intrinsics_to_matrix.
      distortion: camera distortion parameters of shape (5,).
  
    Returns:
      xyz coordinates in camera frame of shape (n, 3).
    """
    cam_mtx = intrinsic  # shape [3, 3]
    cam_dist = np.array(distortion)  # shape [5]

    # shape of points_undistorted is [N, 2] after the squeeze().
    points_undistorted = cv2.undistortPoints(
        uv_coordinates.reshape((-1, 1, 2)), cam_mtx, cam_dist).squeeze()

    x = points_undistorted[:, 0] * depth_values
    y = points_undistorted[:, 1] * depth_values

    xyz = np.vstack((x, y, depth_values)).T
    return xyz


def unproject_depth_vectorized(im_depth, depth_dist,
                               camera_mtx,
                               camera_dist):
    """Unproject depth image into 3D point cloud, using calibration.
  
    Args:
      im_depth: raw depth image, pre-calibration of shape (height, width).
      depth_dist: depth distortion parameters of shape (8,)
      camera_mtx: intrinsics matrix of shape (3, 3). This is typically the return
        value of intrinsics_to_matrix.
      camera_dist: camera distortion parameters shape (5,).
  
    Returns:
      numpy array of shape [3, H*W]. each column is xyz coordinates
    """
    h, w = im_depth.shape

    # shape of each u_map, v_map is [H, W].
    u_map, v_map = np.meshgrid(np.linspace(
        0, w - 1, w), np.linspace(0, h - 1, h))

    adjusted_depth = depth_dist[0] + im_depth * depth_dist[1]

    # shape after stack is [N, 2], where N = H * W.
    uv_coordinates = np.stack((u_map.reshape(-1), v_map.reshape(-1)), axis=-1)

    return unproject_vectorized(uv_coordinates, adjusted_depth.reshape(-1),
                                camera_mtx, camera_dist)


# -----------------------------------------------------------------------------
# MATH UTILS
# -----------------------------------------------------------------------------


def sample_distribution(prob, n_samples=1):
    """Sample data point from a custom distribution."""
    flat_prob = prob.flatten() / np.sum(prob)
    rand_ind = np.random.choice(
        np.arange(len(flat_prob)), n_samples, p=flat_prob, replace=False)
    rand_ind_coords = np.array(np.unravel_index(rand_ind, prob.shape)).T
    return np.int32(rand_ind_coords.squeeze())


# -------------------------------------------------------------------------
# Transformation Helper Functions
# -------------------------------------------------------------------------


def invert(pose):
    return p.invertTransform(pose[0], pose[1])


def multiply(pose0, pose1):
    return p.multiplyTransforms(pose0[0], pose0[1], pose1[0], pose1[1])


def apply(pose, position):
    position = np.float32(position)
    position_shape = position.shape
    position = np.float32(position).reshape(3, -1)
    rotation = np.float32(p.getMatrixFromQuaternion(pose[1])).reshape(3, 3)
    translation = np.float32(pose[0]).reshape(3, 1)
    position = rotation @ position + translation
    return tuple(position.reshape(position_shape))


def eulerXYZ_to_quatXYZW(rotation):  # pylint: disable=invalid-name
    """Abstraction for converting from a 3-parameter rotation to quaterion.
  
    This will help us easily switch which rotation parameterization we use.
    Quaternion should be in xyzw order for pybullet.
  
    Args:
      rotation: a 3-parameter rotation, in xyz order tuple of 3 floats
  
    Returns:
      quaternion, in xyzw order, tuple of 4 floats
    """
    euler_zxy = (rotation[2], rotation[0], rotation[1])
    quaternion_wxyz = euler.euler2quat(*euler_zxy, axes='szxy')
    q = quaternion_wxyz
    quaternion_xyzw = (q[1], q[2], q[3], q[0])
    return quaternion_xyzw


def quatXYZW_to_eulerXYZ(quaternion_xyzw):  # pylint: disable=invalid-name
    """Abstraction for converting from quaternion to a 3-parameter toation.
  
    This will help us easily switch which rotation parameterization we use.
    Quaternion should be in xyzw order for pybullet.
  
    Args:
      quaternion_xyzw: in xyzw order, tuple of 4 floats
  
    Returns:
      rotation: a 3-parameter rotation, in xyz order, tuple of 3 floats
    """
    q = quaternion_xyzw
    quaternion_wxyz = np.array([q[3], q[0], q[1], q[2]])
    euler_zxy = euler.quat2euler(quaternion_wxyz, axes='szxy')
    euler_xyz = (euler_zxy[1], euler_zxy[2], euler_zxy[0])
    return euler_xyz


def apply_transform(transform_to_from, points_from):
    r"""Transforms points (3D) into new frame.
  
    Using transform_to_from notation.
  
    Args:
      transform_to_from: numpy.ndarray of shape [B,4,4], SE3
      points_from: numpy.ndarray of shape [B,3,N]
  
    Returns:
      points_to: numpy.ndarray of shape [B,3,N]
    """
    num_points = points_from.shape[-1]

    # non-batched
    if len(transform_to_from.shape) == 2:
        ones = np.ones((1, num_points))

        # makes these each into homogenous vectors
        points_from = np.vstack((points_from, ones))  # [4,N]
        points_to = transform_to_from @ points_from  # [4,N]
        return points_to[0:3, :]  # [3,N]

    # batched
    else:
        assert len(transform_to_from.shape) == 3
        batch_size = transform_to_from.shape[0]
        zeros = np.ones((batch_size, 1, num_points))
        points_from = np.concatenate((points_from, zeros), axis=1)
        assert points_from.shape[1] == 4
        points_to = transform_to_from @ points_from
        return points_to[:, 0:3, :]


# -----------------------------------------------------------------------------
# IMAGE UTILS
# -----------------------------------------------------------------------------


def preprocess(img, dist='transporter'):
    """Pre-process input (subtract mean, divide by std)."""

    transporter_color_mean = [0.18877631, 0.18877631, 0.18877631]
    transporter_color_std = [0.07276466, 0.07276466, 0.07276466]
    transporter_depth_mean = 0.00509261
    transporter_depth_std = 0.00903967

    franka_color_mean = [0.622291933, 0.628313992, 0.623031488]
    franka_color_std = [0.168154213, 0.17626014, 0.184527364]
    franka_depth_mean = 0.872146842
    franka_depth_std = 0.195743116

    clip_color_mean = [0.48145466, 0.4578275, 0.40821073]
    clip_color_std = [0.26862954, 0.26130258, 0.27577711]

    # choose distribution
    if dist == 'clip':
        color_mean = clip_color_mean
        color_std = clip_color_std
    elif dist == 'mdetr':
        color_mean = [0.485, 0.456, 0.406]
        color_std = [0.229, 0.224, 0.225]
    elif dist == 'franka':
        color_mean = franka_color_mean
        color_std = franka_color_std
    else:
        color_mean = transporter_color_mean
        color_std = transporter_color_std

    if dist == 'franka':
        depth_mean = franka_depth_mean
        depth_std = franka_depth_std
    else:
        depth_mean = transporter_depth_mean
        depth_std = transporter_depth_std

    # convert to pytorch tensor (if required)
    if type(img) == torch.Tensor:
        def cast_shape(stat, img):
            tensor = torch.from_numpy(np.array(stat)).to(device=img.device, dtype=img.dtype)
            tensor = tensor.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
            tensor = tensor.repeat(img.shape[0], 1, img.shape[-2], img.shape[-1])
            return tensor

        color_mean = cast_shape(color_mean, img)
        color_std = cast_shape(color_std, img)
        depth_mean = cast_shape(depth_mean, img)
        depth_std = cast_shape(depth_std, img)

        # normalize
        img = img.clone()
        img[:, :3, :, :] = ((img[:, :3, :, :] / 255 - color_mean) / color_std)
        img[:, 3:, :, :] = ((img[:, 3:, :, :] - depth_mean) / depth_std)
    else:
        # normalize
        img[:, :, :3] = (img[:, :, :3] / 255 - color_mean) / color_std
        img[:, :, 3:] = (img[:, :, 3:] - depth_mean) / depth_std

    # if dist == 'franka' or dist == 'transporter':
    #     print(np.mean(img[:,:3,:,:].detach().cpu().numpy(), axis=(0,2,3)),
    #           np.mean(img[:,3,:,:].detach().cpu().numpy()))

    return img


def deprocess(img):
    color_mean = 0.18877631
    depth_mean = 0.00509261
    color_std = 0.07276466
    depth_std = 0.00903967

    img[:, :, :3] = np.uint8(((img[:, :, :3] * color_std) + color_mean) * 255)
    img[:, :, 3:] = np.uint8(((img[:, :, 3:] * depth_std) + depth_mean) * 255)
    return img


def get_fused_heightmap(obs, configs, bounds, pix_size):
    """Reconstruct orthographic heightmaps with segmentation masks."""
    heightmaps, colormaps = reconstruct_heightmaps(
        obs['color'], obs['depth'], configs, bounds, pix_size)
    colormaps = np.float32(colormaps)
    heightmaps = np.float32(heightmaps)

    # Fuse maps from different views.
    valid = np.sum(colormaps, axis=3) > 0
    repeat = np.sum(valid, axis=0)
    repeat[repeat == 0] = 1
    cmap = np.sum(colormaps, axis=0) / repeat[Ellipsis, None]
    cmap = np.uint8(np.round(cmap))
    hmap = np.max(heightmaps, axis=0)  # Max to handle occlusions.
    return cmap, hmap


def get_image_transform(theta, trans, pivot=(0, 0)):
    """Compute composite 2D rigid transformation matrix."""
    # Get 2D rigid transformation matrix that rotates an image by theta (in
    # radians) around pivot (in pixels) and translates by trans vector (in
    # pixels)
    pivot_t_image = np.array([[1., 0., -pivot[0]], [0., 1., -pivot[1]],
                              [0., 0., 1.]])
    image_t_pivot = np.array([[1., 0., pivot[0]], [0., 1., pivot[1]],
                              [0., 0., 1.]])
    transform = np.array([[np.cos(theta), -np.sin(theta), trans[0]],
                          [np.sin(theta), np.cos(theta), trans[1]], [0., 0., 1.]])
    return np.dot(image_t_pivot, np.dot(transform, pivot_t_image))


def check_transform(image, pixel, transform):
    """Valid transform only if pixel locations are still in FoV after transform."""
    new_pixel = np.flip(
        np.int32(
            np.round(
                np.dot(transform,
                       np.float32([pixel[1], pixel[0],
                                   1.]).reshape(3, 1))))[:2].squeeze())
    valid = np.all(
        new_pixel >= 0
    ) and new_pixel[0] < image.shape[0] and new_pixel[1] < image.shape[1]
    return valid, new_pixel


def get_se3_from_image_transform(theta, trans, pivot, heightmap, bounds,
                                 pixel_size):
    """Calculate SE3 from image transform."""
    position_center = pix_to_xyz(
        np.flip(np.int32(np.round(pivot))),
        heightmap,
        bounds,
        pixel_size,
        skip_height=False)
    new_position_center = pix_to_xyz(
        np.flip(np.int32(np.round(pivot + trans))),
        heightmap,
        bounds,
        pixel_size,
        skip_height=True)
    # Don't look up the z height, it might get augmented out of frame
    new_position_center = (new_position_center[0], new_position_center[1],
                           position_center[2])

    delta_position = np.array(new_position_center) - np.array(position_center)

    t_world_center = np.eye(4)
    t_world_center[0:3, 3] = np.array(position_center)

    t_centernew_center = np.eye(4)
    euler_zxy = (-theta, 0, 0)
    t_centernew_center[0:3, 0:3] = euler.euler2mat(
        *euler_zxy, axes='szxy')[0:3, 0:3]

    t_centernew_center_tonly = np.eye(4)
    t_centernew_center_tonly[0:3, 3] = -delta_position
    t_centernew_center = t_centernew_center @ t_centernew_center_tonly

    t_world_centernew = t_world_center @ np.linalg.inv(t_centernew_center)
    return t_world_center, t_world_centernew


def get_random_image_transform_params(image_size, theta_sigma=60):
    theta = np.random.normal(0, np.deg2rad(theta_sigma))

    trans_sigma = np.min(image_size) / 6
    trans = np.random.normal(0, trans_sigma, size=2)  # [x, y]
    pivot = (image_size[1] / 2, image_size[0] / 2)
    return theta, trans, pivot


def q_mult(q1, q2):
    w1, x1, y1, z1 = q1
    w2, x2, y2, z2 = q2
    w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
    x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
    y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2
    z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2
    return (w, x, y, z)

def perturb(input_image, pixels, theta_sigma=60, add_noise=False):
    """Data augmentation on images."""
    image_size = input_image.shape[:2]

    # Compute random rigid transform.
    while True:
        theta, trans, pivot = get_random_image_transform_params(image_size, theta_sigma=theta_sigma)
        transform = get_image_transform(theta, trans, pivot)
        transform_params = theta, trans, pivot

        # Ensure pixels remain in the image after transform.
        is_valid = True
        new_pixels = []
        new_rounded_pixels = []
        for pixel in pixels:
            pixel = np.float32([pixel[1], pixel[0], 1.]).reshape(3, 1)

            rounded_pixel = np.int32(np.round(transform @ pixel))[:2].squeeze()
            rounded_pixel = np.flip(rounded_pixel)

            pixel = (transform @ pixel)[:2].squeeze()
            pixel = np.flip(pixel)

            in_fov_rounded = rounded_pixel[0] < image_size[0] and rounded_pixel[
                1] < image_size[1]
            in_fov = pixel[0] < image_size[0] and pixel[1] < image_size[1]

            is_valid = is_valid and np.all(rounded_pixel >= 0) and np.all(
                pixel >= 0) and in_fov_rounded and in_fov

            new_pixels.append(pixel)
            new_rounded_pixels.append(rounded_pixel)
        if is_valid:
            break

    # Apply rigid transform to image and pixel labels.
    input_image = cv2.warpAffine(
        input_image,
        transform[:2, :], (image_size[1], image_size[0]),
        flags=cv2.INTER_LINEAR)

    # Apply noise
    color = np.int32(input_image[:,:,:3])
    depth = np.float32(input_image[:,:,3:])

    if add_noise:
        color += np.int32(np.random.normal(0, 3, image_size + (3,)))
        color = np.uint8(np.clip(color, 0, 255))

        depth += np.float32(np.random.normal(0, 0.003, image_size + (3,)))

    input_image = np.concatenate((color, depth), axis=2)

    # length of 5
    transform_params = np.array([theta, trans[0], trans[1], pivot[0], pivot[1]])
    return input_image, new_pixels, new_rounded_pixels, transform_params


def apply_perturbation(input_image, transform_params):
    '''Apply data augmentation with specific transform params'''
    image_size = input_image.shape[:2]

    # Apply rigid transform to image and pixel labels.
    theta, trans, pivot = transform_params[0], transform_params[1:3], transform_params[3:5]
    transform = get_image_transform(theta, trans, pivot)

    input_image = cv2.warpAffine(
        input_image,
        transform[:2, :], (image_size[1], image_size[0]),
        flags=cv2.INTER_LINEAR)
    return input_image


class ImageRotator:
    """Rotate for n rotations."""
    # Reference: https://kornia.readthedocs.io/en/latest/tutorials/warp_affine.html?highlight=rotate

    def __init__(self, n_rotations):
        self.angles = []
        for i in range(n_rotations):
            theta = i * 2 * 180 / n_rotations
            self.angles.append(theta)

    def __call__(self, x_list, pivot, reverse=False):
        rot_x_list = []
        for i, angle in enumerate(self.angles):
            x = x_list[i]# .unsqueeze(0)
            # create transformation (rotation)
            size = len(x)
            alpha = angle if not reverse else (-1.0 * angle)  # in degrees
            angle = torch.ones(size) * alpha

            # define the rotation center
            if type(pivot) is not torch.Tensor:
                center = torch.FloatTensor(pivot)[...,[1,0]]
                center = center.view(1,-1).repeat((size,1))
            else:
                center = pivot[...,[1,0]].view(1,-1).clone().to(angle.device)
            # center: torch.tensor = torch.ones(size, 2)
            # center[..., 0] = int(pivot[1])
            # center[..., 1] = int(pivot[0])

            # define the scale factor
            scale = torch.ones(size, 2)

            # # compute the transformation matrix
            M  = kornia.geometry.get_rotation_matrix2d(center, angle, scale)
            # x_warped = torchvision.transforms.functional.affine(x.float(), scale=1., 
            #             center=[int(pivot[1]),int(pivot[0])], 
            #             angle=alpha, translate=[0,0], shear=0, 
            #             interpolation= torchvision.transforms.InterpolationMode.BILINEAR)


            # apply the transformation to original image
            # M = M.repeat(len(x), 1, 1)
            _, _, h, w = x.shape
            x_warped = kornia.geometry.transform.warp_affine(x.float(), M.to(x.device), dsize=(h, w))
            x_warped = x_warped
            rot_x_list.append(x_warped)

        return rot_x_list

# KD Tree Utils
# Construct K-D Tree to roughly estimate how many objects can fit inside the box.
class TreeNode:

    def __init__(self, parent, children, bbox):
        self.parent = parent
        self.children = children
        self.bbox = bbox  # min x, min y, min z, max x, max y, max z

def KDTree(node, min_object_dim, margin, bboxes):
    size = node.bbox[3:] - node.bbox[:3]

    # Choose which axis to split.
    split = size > 2 * min_object_dim
    if np.sum(split) == 0:
        bboxes.append(node.bbox)
        return
    split = np.float32(split) / np.sum(split)
    split_axis = np.random.choice(range(len(split)), 1, p=split)[0]

    # Split along chosen axis and create 2 children
    cut_ind = np.random.rand() * \
              (size[split_axis] - 2 * min_object_dim) + \
              node.bbox[split_axis] + min_object_dim
    child1_bbox = node.bbox.copy()
    child1_bbox[3 + split_axis] = cut_ind - margin / 2.
    child2_bbox = node.bbox.copy()
    child2_bbox[split_axis] = cut_ind + margin / 2.
    node.children = [
        TreeNode(node, [], bbox=child1_bbox),
        TreeNode(node, [], bbox=child2_bbox)
    ]
    KDTree(node.children[0], min_object_dim, margin, bboxes)
    KDTree(node.children[1], min_object_dim, margin, bboxes)

# -----------------------------------------------------------------------------
# Shape Name UTILS
# -----------------------------------------------------------------------------
google_seen_obj_shapes = {
            'train': [
                'alarm clock',
                'android toy',
                'black boot with leopard print',
                'black fedora',
                'black razer mouse',
                'black sandal',
                'black shoe with orange stripes',
                'bull figure',
                'butterfinger chocolate',
                'c clamp',
                'can opener',
                'crayon box',
                'dog statue',
                'frypan',
                'green and white striped towel',
                'grey soccer shoe with cleats',
                'hard drive',
                'honey dipper',
                'magnifying glass',
                'mario figure',
                'nintendo 3ds',
                'nintendo cartridge',
                'office depot box',
                'orca plush toy',
                'pepsi gold caffeine free box',
                'pepsi wild cherry box',
                'porcelain cup',
                'purple tape',
                'red and white flashlight',
                'rhino figure',
                'rocket racoon figure',
                'scissors',
                'silver tape',
                'spatula with purple head',
                'spiderman figure',
                'tablet',
                'toy school bus',
            ],
            'val': [
                'ball puzzle',
                'black and blue sneakers',
                'black shoe with green stripes',
                'brown fedora',
                'dinosaur figure',
                'hammer',
                'light brown boot with golden laces',
                'lion figure',
                'pepsi max box',
                'pepsi next box',
                'porcelain salad plate',
                'porcelain spoon',
                'red and white striped towel',
                'red cup',
                'screwdriver',
                'toy train',
                'unicorn toy',
                'white razer mouse',
                'yoshi figure'
            ],
            'test': [
                'ball puzzle',
                'black and blue sneakers',
                'black shoe with green stripes',
                'brown fedora',
                'dinosaur figure',
                'hammer',
                'light brown boot with golden laces',
                'lion figure',
                'pepsi max box',
                'pepsi next box',
                'porcelain salad plate',
                'porcelain spoon',
                'red and white striped towel',
                'red cup',
                'screwdriver',
                'toy train',
                'unicorn toy',
                'white razer mouse',
                'yoshi figure'
            ],
        }

google_unseen_obj_shapes = {
            'train': [
                'alarm clock',
                'android toy',
                'black boot with leopard print',
                'black fedora',
                'black razer mouse',
                'black sandal',
                'black shoe with orange stripes',
                'bull figure',
                'butterfinger chocolate',
                'c clamp',
                'can opener',
                'crayon box',
                'dog statue',
                'frypan',
                'green and white striped towel',
                'grey soccer shoe with cleats',
                'hard drive',
                'honey dipper',
                'magnifying glass',
                'mario figure',
                'nintendo 3ds',
                'nintendo cartridge',
                'office depot box',
                'orca plush toy',
                'pepsi gold caffeine free box',
                'pepsi wild cherry box',
                'porcelain cup',
                'purple tape',
                'red and white flashlight',
                'rhino figure',
                'rocket racoon figure',
                'scissors',
                'silver tape',
                'spatula with purple head',
                'spiderman figure',
                'tablet',
                'toy school bus',
            ],
            'val': [
                'ball puzzle',
                'black and blue sneakers',
                'black shoe with green stripes',
                'brown fedora',
                'dinosaur figure',
                'hammer',
                'light brown boot with golden laces',
                'lion figure',
                'pepsi max box',
                'pepsi next box',
                'porcelain salad plate',
                'porcelain spoon',
                'red and white striped towel',
                'red cup',
                'screwdriver',
                'toy train',
                'unicorn toy',
                'white razer mouse',
                'yoshi figure'
            ],
            'test': [
                'ball puzzle',
                'black and blue sneakers',
                'black shoe with green stripes',
                'brown fedora',
                'dinosaur figure',
                'hammer',
                'light brown boot with golden laces',
                'lion figure',
                'pepsi max box',
                'pepsi next box',
                'porcelain salad plate',
                'porcelain spoon',
                'red and white striped towel',
                'red cup',
                'screwdriver',
                'toy train',
                'unicorn toy',
                'white razer mouse',
                'yoshi figure'
            ],
        }

google_all_shapes = {
            'train': [
                'alarm clock',
                'android toy',
                'ball puzzle',
                'black and blue sneakers',
                'black boot with leopard print',
                'black fedora',
                'black razer mouse',
                'black sandal',
                'black shoe with green stripes',
                'black shoe with orange stripes',
                'brown fedora',
                'bull figure',
                'butterfinger chocolate',
                'c clamp',
                'can opener',
                'crayon box',
                'dinosaur figure',
                'dog statue',
                'frypan',
                'green and white striped towel',
                'grey soccer shoe with cleats',
                'hammer',
                'hard drive',
                'honey dipper',
                'light brown boot with golden laces',
                'lion figure',
                'magnifying glass',
                'mario figure',
                'nintendo 3ds',
                'nintendo cartridge',
                'office depot box',
                'orca plush toy',
                'pepsi gold caffeine free box',
                'pepsi max box',
                'pepsi next box',
                'pepsi wild cherry box',
                'porcelain cup',
                'porcelain salad plate',
                'porcelain spoon',
                'purple tape',
                'red and white flashlight',
                'red and white striped towel',
                'red cup',
                'rhino figure',
                'rocket racoon figure',
                'scissors',
                'screwdriver',
                'silver tape',
                'spatula with purple head',
                'spiderman figure',
                'tablet',
                'toy school bus',
                'toy train',
                'unicorn toy',
                'white razer mouse',
                'yoshi figure',
            ],
            'val': [
                'alarm clock',
                'android toy',
                'ball puzzle',
                'black and blue sneakers',
                'black boot with leopard print',
                'black fedora',
                'black razer mouse',
                'black sandal',
                'black shoe with green stripes',
                'black shoe with orange stripes',
                'brown fedora',
                'bull figure',
                'butterfinger chocolate',
                'c clamp',
                'can opener',
                'crayon box',
                'dinosaur figure',
                'dog statue',
                'frypan',
                'green and white striped towel',
                'grey soccer shoe with cleats',
                'hammer',
                'hard drive',
                'honey dipper',
                'light brown boot with golden laces',
                'lion figure',
                'magnifying glass',
                'mario figure',
                'nintendo 3ds',
                'nintendo cartridge',
                'office depot box',
                'orca plush toy',
                'pepsi gold caffeine free box',
                'pepsi max box',
                'pepsi next box',
                'pepsi wild cherry box',
                'porcelain cup',
                'porcelain salad plate',
                'porcelain spoon',
                'purple tape',
                'red and white flashlight',
                'red and white striped towel',
                'red cup',
                'rhino figure',
                'rocket racoon figure',
                'scissors',
                'screwdriver',
                'silver tape',
                'spatula with purple head',
                'spiderman figure',
                'tablet',
                'toy school bus',
                'toy train',
                'unicorn toy',
                'white razer mouse',
                'yoshi figure',
            ],
            'test': [
                'alarm clock',
                'android toy',
                'ball puzzle',
                'black and blue sneakers',
                'black boot with leopard print',
                'black fedora',
                'black razer mouse',
                'black sandal',
                'black shoe with green stripes',
                'black shoe with orange stripes',
                'brown fedora',
                'bull figure',
                'butterfinger chocolate',
                'c clamp',
                'can opener',
                'crayon box',
                'dinosaur figure',
                'dog statue',
                'frypan',
                'green and white striped towel',
                'grey soccer shoe with cleats',
                'hammer',
                'hard drive',
                'honey dipper',
                'light brown boot with golden laces',
                'lion figure',
                'magnifying glass',
                'mario figure',
                'nintendo 3ds',
                'nintendo cartridge',
                'office depot box',
                'orca plush toy',
                'pepsi gold caffeine free box',
                'pepsi max box',
                'pepsi next box',
                'pepsi wild cherry box',
                'porcelain cup',
                'porcelain salad plate',
                'porcelain spoon',
                'purple tape',
                'red and white flashlight',
                'red and white striped towel',
                'red cup',
                'rhino figure',
                'rocket racoon figure',
                'scissors',
                'screwdriver',
                'silver tape',
                'spatula with purple head',
                'spiderman figure',
                'tablet',
                'toy school bus',
                'toy train',
                'unicorn toy',
                'white razer mouse',
                'yoshi figure',
            ],
        }
assembling_kit_shapes = {
            0: "letter R shape",
            1: "letter A shape",
            2: "triangle",
            3: "square",
            4: "plus",
            5: "letter T shape",
            6: "diamond",
            7: "pentagon",
            8: "rectangle",
            9: "flower",
            10: "star",
            11: "circle",
            12: "letter G shape",
            13: "letter V shape",
            14: "letter E shape",
            15: "letter L shape",
            16: "ring",
            17: "hexagon",
            18: "heart",
            19: "letter M shape",
        }

# -----------------------------------------------------------------------------
# COLOR AND PLOT UTILS
# -----------------------------------------------------------------------------


# Colors (Tableau palette).
COLORS = {
    'blue': [78.0 / 255.0, 121.0 / 255.0, 167.0 / 255.0],
    'red': [255.0 / 255.0, 087.0 / 255.0, 089.0 / 255.0],
    'green': [089.0 / 255.0, 169.0 / 255.0, 079.0 / 255.0],
    'orange': [242.0 / 255.0, 142.0 / 255.0, 043.0 / 255.0],
    'yellow': [237.0 / 255.0, 201.0 / 255.0, 072.0 / 255.0],
    'purple': [176.0 / 255.0, 122.0 / 255.0, 161.0 / 255.0],
    'pink': [255.0 / 255.0, 157.0 / 255.0, 167.0 / 255.0],
    'cyan': [118.0 / 255.0, 183.0 / 255.0, 178.0 / 255.0],
    'brown': [156.0 / 255.0, 117.0 / 255.0, 095.0 / 255.0],
    'white': [255.0 / 255.0, 255.0 / 255.0, 255.0 / 255.0],
    'gray': [186.0 / 255.0, 176.0 / 255.0, 172.0 / 255.0],
    'indigo': [75.0 / 255.0, 0.0 / 255.0, 130.0 / 255.0],
    'violet': [143.0 / 255.0, 0.0 / 255.0, 255.0 / 255.0],
    'black': [0.0 / 255.0, 0.0 / 255.0, 0.0 / 255.0],
    'silver': [192.0 / 255.0, 192.0 / 255.0, 192.0 / 255.0],
    'gold': [255.0 / 255.0, 215.0 / 255.0, 0.0 / 255.0],    

}

COLORS_NAMES = list(COLORS.keys())
TRAIN_COLORS = ['blue', 'red', 'green', 'yellow', 'brown', 'gray', 'cyan']
EVAL_COLORS = ['blue', 'red', 'green', 'orange', 'purple', 'pink', 'white']


def get_colors(mode, n_colors=-1, **kwargs):
    all_color_names = get_colors_names(mode)

    if n_colors == -1:
        all_color_names = all_color_names
    else:
        all_color_names = random.sample(all_color_names, n_colors)
    return [COLORS[cn] for cn in all_color_names], all_color_names

def get_colors_names(mode):
    if mode == 'train':
        return TRAIN_COLORS
    elif mode == 'full':
        return TRAIN_COLORS
    else:
        return TRAIN_COLORS

def get_random_color():
    return get_colors(mode='train', n_colors=1)

def solve_hanoi_all(n_disks):
    # Solve Hanoi sequence with dynamic programming.
    hanoi_steps = []  # [[object index, from rod, to rod], ...]

    def solve_hanoi(n, t0, t1, t2):
        if n == 0:
            hanoi_steps.append([n, t0, t1])
            return
        solve_hanoi(n - 1, t0, t2, t1)
        hanoi_steps.append([n, t0, t1])
        solve_hanoi(n - 1, t2, t1, t0)

    solve_hanoi(n_disks - 1, 0, 2, 1)
    return hanoi_steps

def plot(fname,  # pylint: disable=dangerous-default-value
         title,
         ylabel,
         xlabel,
         data,
         xlim=[-np.inf, 0],
         xticks=None,
         ylim=[np.inf, -np.inf],
         show_std=True):
    """Plot frame data."""
    # Data is a dictionary that maps experiment names to tuples with 3
    # elements: x (size N array) and y (size N array) and y_std (size N array)

    # Get data limits.
    for name, (x, y, _) in data.items():
        del name
        y = np.array(y)
        xlim[0] = max(xlim[0], np.min(x))
        xlim[1] = max(xlim[1], np.max(x))
        ylim[0] = min(ylim[0], np.min(y))
        ylim[1] = max(ylim[1], np.max(y))

    # Draw background.
    plt.title(title, fontsize=14)
    plt.ylim(ylim)
    plt.ylabel(ylabel, fontsize=14)
    plt.yticks(fontsize=14)
    plt.xlim(xlim)
    plt.xlabel(xlabel, fontsize=14)
    plt.grid(True, linestyle='-', color=[0.8, 0.8, 0.8])
    ax = plt.gca()
    for axis in ['top', 'bottom', 'left', 'right']:
        ax.spines[axis].set_color('#000000')
    plt.rcParams.update({'font.size': 14})
    plt.rcParams['mathtext.default'] = 'regular'
    matplotlib.rcParams['pdf.fonttype'] = 42
    matplotlib.rcParams['ps.fonttype'] = 42

    # Draw data.
    color_iter = 0
    for name, (x, y, std) in data.items():
        del name
        x, y, std = np.float32(x), np.float32(y), np.float32(std)
        upper = np.clip(y + std, ylim[0], ylim[1])
        lower = np.clip(y - std, ylim[0], ylim[1])
        color = COLORS[list(COLORS.keys())[color_iter]]
        if show_std:
            plt.fill_between(x, upper, lower, color=color, linewidth=0, alpha=0.3)
        plt.plot(x, y, color=color, linewidth=2, marker='o', alpha=1.)
        color_iter += 1

    if xticks:
        plt.xticks(ticks=range(len(xticks)), labels=xticks, fontsize=14)
    else:
        plt.xticks(fontsize=14)
    plt.legend([name for name, _ in data.items()],
               loc='lower right', fontsize=14)
    plt.tight_layout()
    plt.savefig(fname)
    plt.clf()


# -----------------------------------------------------------------------------
# MESHCAT UTILS
# -----------------------------------------------------------------------------

def create_visualizer(clear=True):
    print('Waiting for meshcat server... have you started a server?')
    vis = meshcat.Visualizer(zmq_url='tcp://127.0.0.1:6000')
    if clear:
        vis.delete()
    return vis


def make_frame(vis, name, h, radius, o=1.0):
    """Add a red-green-blue triad to the Meschat visualizer.
  
    Args:
      vis (MeshCat Visualizer): the visualizer
      name (string): name for this frame (should be unique)
      h (float): height of frame visualization
      radius (float): radius of frame visualization
      o (float): opacity
    """
    vis[name]['x'].set_object(
        g.Cylinder(height=h, radius=radius),
        g.MeshLambertMaterial(color=0xff0000, reflectivity=0.8, opacity=o))
    rotate_x = mtf.rotation_matrix(np.pi / 2.0, [0, 0, 1])
    rotate_x[0, 3] = h / 2
    vis[name]['x'].set_transform(rotate_x)

    vis[name]['y'].set_object(
        g.Cylinder(height=h, radius=radius),
        g.MeshLambertMaterial(color=0x00ff00, reflectivity=0.8, opacity=o))
    rotate_y = mtf.rotation_matrix(np.pi / 2.0, [0, 1, 0])
    rotate_y[1, 3] = h / 2
    vis[name]['y'].set_transform(rotate_y)

    vis[name]['z'].set_object(
        g.Cylinder(height=h, radius=radius),
        g.MeshLambertMaterial(color=0x0000ff, reflectivity=0.8, opacity=o))
    rotate_z = mtf.rotation_matrix(np.pi / 2.0, [1, 0, 0])
    rotate_z[2, 3] = h / 2
    vis[name]['z'].set_transform(rotate_z)


def meshcat_visualize(vis, obs, act, info):
    """Visualize data using meshcat."""

    for key in sorted(info.keys()):
        pose = info[key]
        pick_transform = np.eye(4)
        pick_transform[0:3, 3] = pose[0]
        quaternion_wxyz = np.asarray(
            [pose[1][3], pose[1][0], pose[1][1], pose[1][2]])
        pick_transform[0:3, 0:3] = mtf.quaternion_matrix(quaternion_wxyz)[0:3, 0:3]
        label = 'obj_' + str(key)
        make_frame(vis, label, h=0.05, radius=0.0012, o=1.0)
        vis[label].set_transform(pick_transform)

    for cam_index in range(len(act['camera_config'])):
        verts = unproject_depth_vectorized(
            obs['depth'][cam_index], np.array([0, 1]),
            np.array(act['camera_config'][cam_index]['intrinsics']).reshape(3, 3),
            np.zeros(5))

        # switch from [N,3] to [3,N]
        verts = verts.T

        cam_transform = np.eye(4)
        cam_transform[0:3, 3] = act['camera_config'][cam_index]['position']
        quaternion_xyzw = act['camera_config'][cam_index]['rotation']
        quaternion_wxyz = np.asarray([
            quaternion_xyzw[3], quaternion_xyzw[0], quaternion_xyzw[1],
            quaternion_xyzw[2]
        ])
        cam_transform[0:3, 0:3] = mtf.quaternion_matrix(quaternion_wxyz)[0:3, 0:3]
        verts = apply_transform(cam_transform, verts)

        colors = obs['color'][cam_index].reshape(-1, 3).T / 255.0

        vis['pointclouds/' + str(cam_index)].set_object(
            g.PointCloud(position=verts, color=colors))


# -----------------------------------------------------------------------------
# CONFIG UTILS
# -----------------------------------------------------------------------------

def set_seed(seed, torch=False):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

    if torch:
        import torch
        torch.manual_seed(seed)


def load_cfg(yaml_path):
    with open(yaml_path, 'r') as f:
        data = yaml.safe_load(f)
    return data


def load_hydra_config(config_path):
    return OmegaConf.load(config_path)