Spaces:

ttxskk
/

AiOS

Sleeping

File size: 56,165 Bytes

d7e58f0

import math
from typing import Iterable, List, Optional, Tuple, Union

import numpy as np
import torch
from pytorch3d.renderer import cameras
from pytorch3d.structures import Meshes
from pytorch3d.transforms import Transform3d

from detrsmpl.core.conventions.cameras.convert_convention import (
    convert_camera_matrix,
    convert_ndc_to_screen,
    convert_screen_to_ndc,
    convert_world_view,
)
from detrsmpl.utils.transforms import ee_to_rotmat
from .builder import CAMERAS


class MMCamerasBase(cameras.CamerasBase):
    """Inherited from Pytorch3D CamerasBase and provide some new functions."""
    def __init__(self, **kwargs) -> None:
        """Initialize your cameras with `build_cameras` following:

        1): provide `K`, `R`, `T`, `resolution`/`image_size`, `in_ndc`
            directly.
            `K` should be shape of (N, 3, 3) or (N, 4, 4).
            `R` should be shape of (N, 3, 3).
            `T` should be shape of (N, 3).
        2): if `K` is not provided, will use `get_default_projection_matrix`
            to generate K from camera intrinsic parameters.
            E.g., you can pass `focal_length`, `principal_point` for
            perspective camers.
            If these args are not provided, will use default values.
        3): if `R` is not provided, will use Identity matrix as default.
        4): if `T` is not provided, will use zeros matrix as default.
        5): `convention` means your source parameter camera convention.
            This mainly depends on how you get the matrixs. E.g., you get the
            `K` `R`, `T` by calibration with opencv, you should set
            `convention = opencv`. To figure out your camera convention,
            please see the definition of its extrinsic and intrinsic matrixs.
            For projection and rendering, the matrixs will be converted to
            `pytorch3d` finally since the `transforms3d` called in rendering
            and projection are defined as `pytorch3d` convention.
        6): `image_size` equals `resolution`.
        7): `in_ndc` could be set for 'PerspectiveCameras' and
            'OrthographicCameras', other cameras are fixed for this arg.
            `in_ndc = True` means your projection matrix is defined as `camera
            space to NDC space`. Under this cirecumstance you need to set
            `image_size` or `resolution` (they are equal) when you need to do
            `transform_points_screen`. You can also override resolution
            in `transform_points_screen` function.
            `in_ndc = False` means your projections matrix is defined as
            `cameras space to screen space`. Under this cirecumstance you do
            not need to set `image_size` or `resolution` (they are equal) when
            you need to do `transform_points_screen` since the projection
            matrix is defined as view space to screen space.
        """
        for k in kwargs:
            if isinstance(kwargs.get(k), np.ndarray):
                kwargs.update({k: torch.Tensor(kwargs[k])})
        convention = kwargs.pop('convention', 'pytorch3d').lower()
        in_ndc = kwargs.pop('in_ndc', kwargs.get('_in_ndc'))
        kwargs.update(_in_ndc=in_ndc)
        is_perspective = kwargs.get('_is_perspective')
        kwargs.pop('is_perspective', None)

        image_size = kwargs.get('image_size', kwargs.get('resolution', None))

        if image_size is not None:
            if isinstance(image_size, (int, float)):
                image_size = (image_size, image_size)
            if isinstance(image_size, (tuple, list)):
                image_size = torch.Tensor(image_size)
            if isinstance(image_size, torch.Tensor):
                if image_size.numel() == 1:
                    image_size = image_size.repeat(2)
                image_size = image_size.view(-1, 2)

        if kwargs.get('K') is None:
            focal_length = kwargs.get('focal_length', None)
            if focal_length is not None:
                if not isinstance(focal_length, Iterable):
                    focal_length = [focal_length, focal_length]
                if not torch.is_tensor(focal_length):
                    focal_length = torch.FloatTensor(focal_length).view(-1, 2)
                elif focal_length.numel() == 1:
                    focal_length = focal_length.repeat(2).view(-1, 2)
                kwargs.update(focal_length=focal_length)

            principal_point = kwargs.get('principal_point', None)
            if principal_point is not None:
                if isinstance(principal_point, (tuple, list)):
                    principal_point = torch.FloatTensor(principal_point)
                principal_point = principal_point.view(-1, 2)
                kwargs.update(principal_point=principal_point)

            K = self.get_default_projection_matrix(**kwargs)

            K, _, _ = convert_camera_matrix(K=K,
                                            is_perspective=is_perspective,
                                            convention_src='pytorch3d',
                                            convention_dst='pytorch3d',
                                            in_ndc_src=in_ndc,
                                            in_ndc_dst=in_ndc,
                                            resolution_dst=image_size,
                                            resolution_src=image_size)
            kwargs.update(K=K)

        K, R, T = convert_camera_matrix(K=kwargs.get('K'),
                                        R=kwargs.get('R', None),
                                        T=kwargs.get('T', None),
                                        convention_src=convention,
                                        convention_dst='pytorch3d',
                                        is_perspective=is_perspective,
                                        in_ndc_src=in_ndc,
                                        in_ndc_dst=in_ndc,
                                        resolution_src=image_size,
                                        resolution_dst=image_size)

        if image_size is not None:
            if image_size.shape[0] == 1:
                image_size = image_size.repeat(K.shape[0], 1)
            kwargs.update(image_size=image_size)
            kwargs.update(resolution=image_size)

        kwargs.update(K=K, R=R, T=T)

        super().__init__(**kwargs)

    def get_camera_plane_normals(self, **kwargs) -> torch.Tensor:
        """Get the identity normal vector which stretchs out of the camera
        plane.

        Could pass `R` to override the camera extrinsic rotation matrix.
        Returns:
            torch.Tensor: shape will be (N, 3)
        """
        normals = torch.Tensor([0, 0, 1]).view(1, 3).to(self.device)
        w2v_trans = self.get_world_to_view_transform(**kwargs)
        normals = w2v_trans.inverse().transform_normals(normals)
        return normals.view(-1, 3)

    def compute_depth_of_points(self, points: torch.Tensor) -> torch.Tensor:
        """Compute depth of points to the camera plane.

        Args:
            points ([torch.Tensor]): shape should be (batch_size, ..., 3).

        Returns:
            torch.Tensor: shape will be (batch_size, 1)
        """
        world_to_view_transform = self.get_world_to_view_transform()
        world_to_view_points = world_to_view_transform.transform_points(
            points.to(self.device))
        return world_to_view_points[..., 2:3]

    def compute_normal_of_meshes(self, meshes: Meshes) -> torch.Tensor:
        """Compute normal of meshes in the camera view.

        Args:
            points ([torch.Tensor]): shape should be (batch_size, 3).

        Returns:
            torch.Tensor: shape will be (batch_size, 1)
        """
        world_to_view_transform = self.get_world_to_view_transform()
        world_to_view_normals = world_to_view_transform.transform_normals(
            meshes.verts_normals_padded().to(self.device))
        return world_to_view_normals

    def __repr__(self):
        """Rewrite __repr__

        Returns:
            str: print the information of cameras (N, in_ndc, device).
        """
        main_str = super().__repr__()
        main_str = main_str.split(')')[0]
        main_str += f'N: {self.__len__()}, in_ndc: {self.in_ndc()}, '
        main_str += f'device: {self.device})'
        return main_str

    def get_image_size(self):
        """Returns the image size, if provided, expected in the form of
        (height, width) The image size is used for conversion of projected
        points to screen coordinates."""
        if hasattr(self, 'image_size'):
            image_size = self.image_size
        if hasattr(self, 'resolution'):
            if self.resolution is not None:
                image_size = self.resolution
        else:
            image_size = None

        return image_size

    def __getitem__(
        self, index: Union[slice, int, torch.Tensor, List,
                           Tuple]) -> 'MMCamerasBase':
        """Slice the cameras by batch dim.

        Args:
            index (Union[slice, int, torch.Tensor, List, Tuple]):
            index for slicing.

        Returns:
            MMCamerasBase: sliced cameras.
        """
        if isinstance(index, int):
            index = [index]
        return self.__class__(K=self.K[index],
                              R=self.R[index],
                              T=self.T[index],
                              image_size=self.get_image_size()[index]
                              if self.get_image_size() is not None else None,
                              in_ndc=self.in_ndc(),
                              convention='pytorch3d',
                              device=self.device)

    def extend(self, N) -> 'MMCamerasBase':
        """Create new camera class which contains each input camera N times.

        Args:
            N: number of new copies of each camera.

        Returns:
            MMCamerasBase object.
        """
        return self.__class__(K=self.K.repeat(N, 1, 1),
                              R=self.R.repeat(N, 1, 1),
                              T=self.T.repeat(N, 1),
                              image_size=self.get_image_size(),
                              in_ndc=self.in_ndc(),
                              convention='pytorch3d',
                              device=self.device)

    def extend_(self, N):
        """extend camera inplace."""
        self.K = self.K.repeat(N, 1, 1)
        self.R = self.R.repeat(N, 1, 1)
        self.T = self.T.repeat(N, 1)
        self._N = self._N * N

    @classmethod
    def get_default_projection_matrix(cls, ):
        """Class method. Calculate the projective transformation matrix by
        default parameters.

        Args:
            **kwargs: parameters for the projection can be passed in as keyword
                arguments to override the default values set in `__init__`.

        Return:
            a `torch.Tensor` which represents a batch of projection matrices K
            of shape (N, 4, 4)
        """
        raise NotImplementedError()

    def to_screen_(self, **kwargs) -> 'MMCamerasBase':
        """Convert to screen inplace."""
        if self.in_ndc():
            if self.get_image_size() is None:
                self.image_size = kwargs.get('image_size')
            else:
                self.image_size = self.get_image_size()
            self.K = convert_ndc_to_screen(K=self.K,
                                           resolution=self.image_size,
                                           is_perspective=self._is_perspective)
            self._in_ndc = False
        else:
            print('Redundant operation, already in screen.')

    def to_ndc_(self, **kwargs) -> 'MMCamerasBase':
        """Convert to ndc inplace."""
        if self.in_ndc():
            print('Redundant operation, already in ndc.')
        else:
            if self.get_image_size() is None:
                self.image_size = kwargs.get('image_size')
            else:
                self.image_size = self.get_image_size()
            self.K = convert_screen_to_ndc(K=self.K,
                                           resolution=self.image_size,
                                           is_perspective=self._is_perspective)
            self._in_ndc = True

    def to_screen(self, **kwargs) -> 'MMCamerasBase':
        """Convert to screen."""
        if self.in_ndc():
            if self.get_image_size() is None:
                self.image_size = kwargs.get('image_size')
            else:
                self.image_size = self.get_image_size()

            K = convert_ndc_to_screen(K=self.K,
                                      resolution=self.image_size,
                                      is_perspective=self._is_perspective)
            return self.__class__(K=K,
                                  R=self.R,
                                  T=self.T,
                                  in_ndc=False,
                                  resolution=self.image_size)
        else:
            print('Redundant operation, already in screen.')

    def to_ndc(self, **kwargs) -> 'MMCamerasBase':
        """Convert to ndc."""
        if self.in_ndc():
            print('Redundant operation, already in ndc.')
        else:
            if self.get_image_size() is None:
                self.image_size = kwargs.get('image_size')
            else:
                self.image_size = self.get_image_size()
            K = convert_screen_to_ndc(K=self.K,
                                      resolution=self.image_size,
                                      is_perspective=self._is_perspective)
            return self.__class__(K=K,
                                  R=self.R,
                                  T=self.T,
                                  in_ndc=True,
                                  resolution=self.image_size)

    def detach(self) -> 'MMCamerasBase':
        image_size = self.image_size.detach(
        ) if self.image_size is not None else None
        return self.__class__(K=self.K.detach(),
                              R=self.R.detach(),
                              T=self.T.detach(),
                              in_ndc=self.in_ndc(),
                              device=self.device,
                              resolution=image_size)

    def concat(self, others) -> 'MMCamerasBase':
        if isinstance(others, type(self)):
            others = [others]
        else:
            raise TypeError('Could only concat with same type cameras.')
        return concat_cameras([self] + others)


@CAMERAS.register_module(name=('WeakPerspectiveCameras', 'WeakPerspective',
                               'weakperspective'))
class WeakPerspectiveCameras(MMCamerasBase):
    """Inherited from [Pytorch3D cameras](https://github.com/facebookresearch/
    pytorch3d/blob/main/pytorch3d/renderer/cameras.py) and mimiced the code
    style. And re-inmplemented functions: compute_projection_matrix,
    get_projection_transform, unproject_points, is_perspective, in_ndc for
    render.

    K modified from [VIBE](https://github.com/mkocabas/VIBE/blob/master/
    lib/utils/renderer.py) and changed to opencv convention.
    Original license please see docs/additional_license/md.

    This intrinsic matrix is orthographics indeed, but could serve as
    weakperspective for single smpl mesh.
    """
    def __init__(
        self,
        scale_x: Union[torch.Tensor, float] = 1.0,
        scale_y: Union[torch.Tensor, float] = 1.0,
        transl_x: Union[torch.Tensor, float] = 0.0,
        transl_y: Union[torch.Tensor, float] = 0.0,
        znear: Union[torch.Tensor, float] = -1.0,
        aspect_ratio: Union[torch.Tensor, float] = 1.0,
        K: Optional[torch.Tensor] = None,
        R: Optional[torch.Tensor] = None,
        T: Optional[torch.Tensor] = None,
        device: Union[torch.device, str] = 'cpu',
        convention: str = 'pytorch3d',
        **kwargs,
    ):
        """Initialize. If K is provided, don't need scale_x, scale_y, transl_x,
        transl_y, znear, aspect_ratio.

        Args:
            scale_x (Union[torch.Tensor, float], optional):
                Scale in x direction.
                Defaults to 1.0.
            scale_y (Union[torch.Tensor, float], optional):
                Scale in y direction.
                Defaults to 1.0.
            transl_x (Union[torch.Tensor, float], optional):
                Translation in x direction.
                Defaults to 0.0.
            transl_y (Union[torch.Tensor, float], optional):
                Translation in y direction.
                Defaults to 0.0.
            znear (Union[torch.Tensor, float], optional):
                near clipping plane of the view frustrum.
                Defaults to -1.0.
            aspect_ratio (Union[torch.Tensor, float], optional):
                aspect ratio of the image pixels. 1.0 indicates square pixels.
                Defaults to 1.0.
            K (Optional[torch.Tensor], optional): Intrinsic matrix of shape
                (N, 4, 4). If provided, don't need scale_x, scale_y, transl_x,
                transl_y, znear, aspect_ratio.
                Defaults to None.
            R (Optional[torch.Tensor], optional):
                Rotation matrix of shape (N, 3, 3).
                Defaults to None.
            T (Optional[torch.Tensor], optional):
                Translation matrix of shape (N, 3).
                Defaults to None.
            device (Union[torch.device, str], optional):
                torch device. Defaults to 'cpu'.
        """
        kwargs.update(
            _in_ndc=True,
            _is_perspective=False,
        )
        kwargs.pop('in_ndc', None)
        kwargs.pop('is_perspective', None)
        super().__init__(scale_x=scale_x,
                         scale_y=scale_y,
                         transl_x=transl_x,
                         transl_y=transl_y,
                         znear=znear,
                         aspect_ratio=aspect_ratio,
                         K=K,
                         R=R,
                         T=T,
                         device=device,
                         convention=convention,
                         **kwargs)

    @staticmethod
    def convert_orig_cam_to_matrix(
            orig_cam: torch.Tensor,
            **kwargs) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """Compute intrinsic camera matrix from orig_cam parameter of smpl.

        .. code-block:: python

            r > 1::

                K = [[sx*r,   0,    0,   tx*sx*r],
                     [0,     sy,    0,     ty*sy],
                     [0,      0,    1,         0],
                     [0,      0,    0,         1]]

            or r < 1::

                K = [[sx,    0,     0,   tx*sx],
                     [0,   sy/r,    0,  ty*sy/r],
                     [0,     0,     1,      0],
                     [0,     0,     0,      1],]

            rotation matrix: (N, 3, 3)::

                [[1, 0, 0],
                 [0, 1, 0],
                 [0, 0, 1]]

            translation matrix: (N, 3)::

                [0, 0, -znear]

        Args:
            orig_cam (torch.Tensor): shape should be (N, 4).
            znear (Union[torch.Tensor, float], optional):
                near clipping plane of the view frustrum.
                Defaults to 0.0.
            aspect_ratio (Union[torch.Tensor, float], optional):
                aspect ratio of the image pixels. 1.0 indicates square pixels.
                Defaults to 1.0.

        Returns:
            Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
            opencv intrinsic matrix: (N, 4, 4)
        """
        znear = kwargs.get('znear', -1.0)
        aspect_ratio = kwargs.get('aspect_ratio', 1.0)
        _N = orig_cam.shape[0]
        scale_x, scale_y, transl_x, transl_y = orig_cam[:, 0], orig_cam[:, 1],\
            orig_cam[:, 2], orig_cam[:, 3]
        K = torch.zeros((_N, 4, 4), dtype=torch.float32)
        if aspect_ratio >= 1.0:
            K[:, 0, 0] = scale_x * aspect_ratio
            K[:, 1, 1] = scale_y
            K[:, 0, 3] = transl_x * scale_x * aspect_ratio
            K[:, 1, 3] = transl_y * scale_y
        else:
            K[:, 0, 0] = scale_x
            K[:, 1, 1] = scale_y / aspect_ratio
            K[:, 0, 3] = transl_x * scale_x
            K[:, 1, 3] = transl_y * scale_y / aspect_ratio

        K[:, 3, 3] = 1
        K[:, 2, 2] = 1
        R = torch.eye(3, 3)[None].repeat(_N, 1, 1)
        T = torch.zeros(_N, 3)
        T[:, 2] = znear
        return K, R, T

    @staticmethod
    def convert_K_to_orig_cam(
        K: torch.Tensor,
        aspect_ratio: Union[torch.Tensor, float] = 1.0,
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """Compute intrinsic camera matrix from pred camera parameter of smpl.

        Args:
            K (torch.Tensor):
                opencv orthographics intrinsic matrix: (N, 4, 4)

            .. code-block:: python

                K = [[sx*r,   0,    0,   tx*sx*r],
                     [0,     sy,    0,   ty*sy],
                     [0,     0,     1,       0],
                     [0,     0,     0,       1],]

            aspect_ratio (Union[torch.Tensor, float], optional):
                aspect ratio of the image pixels. 1.0 indicates square pixels.
                Defaults to 1.0.

        Returns:

            orig_cam (torch.Tensor): shape should be (N, 4).
        """
        _N = K.shape[0]
        s_x = K[:, 0, 0] / aspect_ratio
        s_y = K[:, 1, 1] / aspect_ratio
        t_x = K[:, 0, 3] / (aspect_ratio * s_x)
        t_y = K[:, 1, 3] / s_y
        orig_cam = torch.cat([s_x, s_y, t_x, t_y], -1).view(_N, 4)
        return orig_cam

    @classmethod
    def get_default_projection_matrix(cls, **args):
        """Class method. Calculate the projective transformation matrix by
        default parameters.

        Args:
            **kwargs: parameters for the projection can be passed in as keyword
                arguments to override the default values set in `__init__`.

        Return:
            a `torch.Tensor` which represents a batch of projection matrices K
            of shape (N, 4, 4)
        """
        orig_cam = args.get('orig_cam', None)
        scale_x = args.get('scale_x', 1.0)
        scale_y = args.get('scale_y', 1.0)
        transl_x = args.get('transl_x', 0.0)
        transl_y = args.get('transl_y', 0.0)
        aspect_ratio = args.get('aspect_ratio', 1.0)
        batch_size = args.get('batch_size', 1)
        device = args.get('device', 'cpu')

        if orig_cam is not None:
            K, _, _ = cls.convert_orig_cam_to_matrix(orig_cam, **args)
        else:
            K = torch.zeros((1, 4, 4), dtype=torch.float32)

            K[:, 0, 0] = scale_x * aspect_ratio
            K[:, 1, 1] = scale_y
            K[:, 3, 3] = 1
            K[:, 0, 3] = transl_x * scale_x * aspect_ratio
            K[:, 1, 3] = transl_y * scale_y
            K[:, 2, 2] = 1
            K = K.repeat(batch_size, 1, 1).to(device)
        return K

    def compute_projection_matrix(self, scale_x, scale_y, transl_x, transl_y,
                                  aspect_ratio) -> torch.Tensor:
        """Compute the calibration matrix K of shape (N, 4, 4)

        Args:
            scale_x (Union[torch.Tensor, float], optional):
                Scale in x direction.
            scale_y (Union[torch.Tensor, float], optional):
                Scale in y direction.
            transl_x (Union[torch.Tensor, float], optional):
                Translation in x direction.
            transl_y (Union[torch.Tensor, float], optional):
                Translation in y direction.
            aspect_ratio (Union[torch.Tensor, float], optional):
                aspect ratio of the image pixels. 1.0 indicates square pixels.

        Returns:
            torch.FloatTensor of the calibration matrix with shape (N, 4, 4)
        """
        K = torch.zeros((self._N, 4, 4),
                        dtype=torch.float32,
                        device=self.device)

        K[:, 0, 0] = scale_x * aspect_ratio
        K[:, 1, 1] = scale_y
        K[:, 3, 3] = 1
        K[:, 0, 3] = transl_x * scale_x * aspect_ratio
        K[:, 1, 3] = transl_y * scale_y
        K[:, 2, 2] = 1
        return K

    def get_projection_transform(self, **kwargs) -> Transform3d:
        """Calculate the orthographic projection matrix. Use column major
        order.

        Args:
            **kwargs: parameters for the projection can be passed in to
                      override the default values set in __init__.
        Return:
            a Transform3d object which represents a batch of projection
               matrices of shape (N, 4, 4)
        """
        K = kwargs.get('K', self.K)
        if K is not None:
            if K.shape != (self._N, 4, 4):
                msg = f'Expected K to have shape of ({self._N}, 4, 4)'
                raise ValueError(msg)
        else:
            K = self.compute_projection_matrix(
                kwargs.get('scale_x', self.scale_x),
                kwargs.get('scale_y', self.scale_y),
                kwargs.get('transl_x', self.trans_x),
                kwargs.get('transl_y', self.trans_y),
                kwargs.get('aspect_ratio', self.aspect_ratio))

        transform = Transform3d(matrix=K.transpose(1, 2).contiguous(),
                                device=self.device)
        return transform

    def unproject_points(self,
                         xy_depth: torch.Tensor,
                         world_coordinates: bool = True,
                         **kwargs) -> torch.Tensor:
        """Sends points from camera coordinates (NDC or screen) back to camera
        view or world coordinates depending on the `world_coordinates` boolean
        argument of the function."""
        if world_coordinates:
            to_camera_transform = self.get_full_projection_transform(**kwargs)
        else:
            to_camera_transform = self.get_projection_transform(**kwargs)

        unprojection_transform = to_camera_transform.inverse()
        return unprojection_transform.transform_points(xy_depth)

    def is_perspective(self):
        """Boolean of whether is perspective."""
        return False

    def in_ndc(self):
        """Boolean of whether in NDC."""
        return True

    def to_ndc_(self, **kwargs):
        """Not implemented."""
        raise NotImplementedError()

    def to_screen_(self, **kwargs):
        """Not implemented."""
        raise NotImplementedError()

    def to_ndc(self, **kwargs):
        """Not implemented."""
        raise NotImplementedError()

    def to_screen(self, **kwargs):
        """Not implemented."""
        raise NotImplementedError()


@CAMERAS.register_module(name=('PerspectiveCameras', 'perspective',
                               'Perspective'))
class PerspectiveCameras(cameras.PerspectiveCameras, MMCamerasBase):
    """Inherited from Pytorch3D `PerspectiveCameras`."""
    def __init__(
        self,
        focal_length=1.0,
        principal_point=((0.0, 0.0), ),
        R: Optional[torch.Tensor] = None,
        T: Optional[torch.Tensor] = None,
        K: Optional[torch.Tensor] = None,
        device: Union[torch.device, str] = 'cpu',
        in_ndc: bool = True,
        convention: str = 'pytorch3d',
        image_size: Optional[Union[List, Tuple, torch.Tensor]] = None,
        **kwargs,
    ) -> None:
        """
        Args:
            focal_length (float, torch.Tensor, optional):  Defaults to 1.0.
            principal_point (tuple, optional):  Defaults to ((0.0, 0.0), ).
            R (Optional[torch.Tensor], optional):  Defaults to None.
            T (Optional[torch.Tensor], optional):  Defaults to None.
            K (Optional[torch.Tensor], optional):  Defaults to None.
            device (Union[torch.device, str], optional):  Defaults to 'cpu'.
            in_ndc (bool, optional):  Defaults to True.
            convention (str, optional):  Defaults to 'pytorch3d'.
            image_size (Optional[Union[List, Tuple, torch.Tensor]], optional):
                 Defaults to None.

        """
        if image_size is not None:
            kwargs.update({'image_size': image_size})
        kwargs.update(
            _in_ndc=in_ndc,
            _is_perspective=True,
        )
        kwargs.pop('is_perspective', None)
        kwargs.pop('in_ndc', None)

        super(cameras.PerspectiveCameras,
              self).__init__(device=device,
                             focal_length=focal_length,
                             principal_point=principal_point,
                             R=R,
                             T=T,
                             K=K,
                             convention=convention,
                             **kwargs)
        if image_size is not None:
            if (self.image_size < 1).any():  # pyre-ignore
                raise ValueError('Image_size provided has invalid values')
        else:
            self.image_size = None

    def __getitem__(self, index: Union[slice, int, torch.Tensor, List, Tuple]):
        """Slice the cameras by batch dim.

        Args:
            index (Union[slice, int, torch.Tensor, List, Tuple]):
            index for slicing.

        Returns:
            MMCamerasBase: sliced cameras.
        """
        return super(cameras.PerspectiveCameras, self).__getitem__(index)

    @classmethod
    def get_default_projection_matrix(cls, **args) -> torch.Tensor:
        """Class method. Calculate the projective transformation matrix by
        default parameters.

        Args:
            **kwargs: parameters for the projection can be passed in as keyword
                arguments to override the default values set in `__init__`.

        Return:
            a `torch.Tensor` which represents a batch of projection matrices K
            of shape (N, 4, 4)
        """
        batch_size = args.get('batch_size', 1)
        device = args.get('device', 'cpu')
        focal_length = args.get('focal_length')
        principal_point = args.get('principal_point')

        return cameras._get_sfm_calibration_matrix(
            N=batch_size,
            device=device,
            focal_length=focal_length,
            principal_point=principal_point,
            orthographic=False)

    def get_ndc_camera_transform(self, **kwargs) -> Transform3d:
        kwargs.pop('cameras', None)
        return super().get_ndc_camera_transform(**kwargs)

    def transform_points_screen(self,
                                points,
                                eps: Optional[float] = None,
                                **kwargs) -> torch.Tensor:
        kwargs.pop('cameras', None)
        return super().transform_points_screen(points, eps, **kwargs)


@CAMERAS.register_module(name=('FoVPerspectiveCameras', 'FoVPerspective',
                               'fovperspective'))
class FoVPerspectiveCameras(cameras.FoVPerspectiveCameras, MMCamerasBase):
    """Inherited from Pytorch3D `FoVPerspectiveCameras`."""
    def __init__(
        self,
        znear=1.0,
        zfar=100.0,
        aspect_ratio=1.0,
        fov=60.0,
        degrees: bool = True,
        R: Optional[torch.Tensor] = None,
        T: Optional[torch.Tensor] = None,
        K: Optional[torch.Tensor] = None,
        device: Union[torch.device, str] = 'cpu',
        convention: str = 'pytorch3d',
        **kwargs,
    ) -> None:
        """Initialize a camera.

        Args:
            znear (float, optional):  Defaults to 1.0.
            zfar (float, optional):  Defaults to 100.0.
            aspect_ratio (float, optional):  Defaults to 1.0.
            fov (float, optional):  Defaults to 60.0.
            degrees (bool, optional):  Defaults to True.
            R (Optional[torch.Tensor], optional):  Defaults to None.
            T (Optional[torch.Tensor], optional):  Defaults to None.
            K (Optional[torch.Tensor], optional):  Defaults to None.
            device (Union[torch.device, str], optional):  Defaults to 'cpu'.
            convention (str, optional):  Defaults to 'pytorch3d'.
        """
        kwargs.update(
            _in_ndc=True,
            _is_perspective=True,
        )
        kwargs.pop('in_ndc', None)
        kwargs.pop('is_perspective', None)
        super(cameras.FoVPerspectiveCameras, self).__init__(
            device=device,
            znear=znear,
            zfar=zfar,
            aspect_ratio=aspect_ratio,
            fov=fov,
            R=R,
            T=T,
            K=K,
            convention=convention,
            **kwargs,
        )
        self.degrees = degrees

    def __getitem__(self, index: Union[slice, int, torch.Tensor, List, Tuple]):
        """Slice the cameras by batch dim.

        Args:
            index (Union[slice, int, torch.Tensor, List, Tuple]):
            index for slicing.

        Returns:
            MMCamerasBase: sliced cameras.
        """
        return super(cameras.FoVPerspectiveCameras, self).__getitem__(index)

    def get_ndc_camera_transform(self, **kwargs) -> Transform3d:
        kwargs.pop('cameras', None)
        return super().get_ndc_camera_transform(**kwargs)

    def transform_points_screen(self,
                                points,
                                eps: Optional[float] = None,
                                **kwargs) -> torch.Tensor:
        kwargs.pop('cameras', None)
        return super().transform_points_screen(points, eps, **kwargs)

    @classmethod
    def get_default_projection_matrix(cls, **args) -> torch.Tensor:
        """Class method. Calculate the projective transformation matrix by
        default parameters.

        Args:
            **kwargs: parameters for the projection can be passed in as keyword
                arguments to override the default values set in `__init__`.

        Return:
            a `torch.Tensor` which represents a batch of projection matrices K
            of shape (N, 4, 4)
        """
        znear = args.get('znear', 1.0)
        zfar = args.get('zfar', 100.0)
        aspect_ratio = args.get('aspect_ratio', 1.0)
        fov = args.get('fov', 60.0)
        degrees = args.get('degrees', True)
        batch_size = args.get('batch_size', 1)

        K = torch.zeros((1, 4, 4), dtype=torch.float32)
        if degrees:
            fov = (math.pi / 180) * fov

        if not torch.is_tensor(fov):
            fov = torch.tensor(fov)
        tanHalfFov = torch.tan((fov / 2))
        max_y = tanHalfFov * znear
        min_y = -max_y
        max_x = max_y * aspect_ratio
        min_x = -max_x

        z_sign = 1.0

        K[:, 0, 0] = 2.0 * znear / (max_x - min_x)
        K[:, 1, 1] = 2.0 * znear / (max_y - min_y)
        K[:, 0, 2] = (max_x + min_x) / (max_x - min_x)
        K[:, 1, 2] = (max_y + min_y) / (max_y - min_y)
        K[:, 3, 2] = z_sign

        K[:, 2, 2] = z_sign * zfar / (zfar - znear)
        K[:, 2, 3] = -(zfar * znear) / (zfar - znear)
        K = K.repeat(batch_size, 1, 1)
        return K

    def to_ndc_(self, **kwargs):
        """Not implemented."""
        raise NotImplementedError()

    def to_screen_(self, **kwargs):
        """Not implemented."""
        raise NotImplementedError()

    def to_ndc(self, **kwargs):
        """Not implemented."""
        raise NotImplementedError()

    def to_screen(self, **kwargs):
        """Not implemented."""
        raise NotImplementedError()


@CAMERAS.register_module(name=('OrthographicCameras', 'Orthographic',
                               'orthographic'))
class OrthographicCameras(cameras.OrthographicCameras, MMCamerasBase):
    """Inherited from Pytorch3D `OrthographicCameras`."""
    def __init__(
        self,
        focal_length=1.0,
        principal_point=((0.0, 0.0), ),
        R: Optional[torch.Tensor] = None,
        T: Optional[torch.Tensor] = None,
        K: Optional[torch.Tensor] = None,
        device: Union[torch.Tensor, str] = 'cpu',
        in_ndc: bool = True,
        image_size: Optional[torch.Tensor] = None,
        convention: str = 'pytorch3d',
        **kwargs,
    ) -> None:
        """Initialize OrthographicCameras.

        Args:
            focal_length (float, optional):  Defaults to 1.0.
            principal_point (tuple, optional):  Defaults to ((0.0, 0.0), ).
            R (Optional[torch.Tensor], optional):  Defaults to None.
            T (Optional[torch.Tensor], optional):  Defaults to None.
            K (Optional[torch.Tensor], optional):  Defaults to None.
            device (Union[torch.Tensor, str], optional):  Defaults to 'cpu'.
            in_ndc (bool, optional):  Defaults to True.
            image_size (Optional[torch.Tensor], optional):  Defaults to None.
            convention (str, optional):  Defaults to 'pytorch3d'.

        Raises:
            ValueError: [description]
        """
        if image_size is not None:
            kwargs.update({'image_size': image_size})
        kwargs.update(
            _is_perspective=False,
            _in_ndc=in_ndc,
        )
        kwargs.pop('is_perspective', None)
        kwargs.pop('in_ndc', None)
        super(cameras.OrthographicCameras,
              self).__init__(device=device,
                             focal_length=focal_length,
                             principal_point=principal_point,
                             R=R,
                             T=T,
                             K=K,
                             convention=convention,
                             **kwargs)
        if image_size is not None:
            if (self.image_size < 1).any():  # pyre-ignore
                raise ValueError('Image_size provided has invalid values')
        else:
            self.image_size = None

    def get_ndc_camera_transform(self, **kwargs) -> Transform3d:
        kwargs.pop('cameras', None)
        return super().get_ndc_camera_transform(**kwargs)

    def transform_points_screen(self,
                                points,
                                eps: Optional[float] = None,
                                **kwargs) -> torch.Tensor:
        kwargs.pop('cameras', None)
        return super().transform_points_screen(points, eps, **kwargs)

    def __getitem__(self, index: Union[slice, int, torch.Tensor, List, Tuple]):
        """Slice the cameras by batch dim.

        Args:
            index (Union[slice, int, torch.Tensor, List, Tuple]):
            index for slicing.

        Returns:
            MMCamerasBase: sliced cameras.
        """
        return super(cameras.OrthographicCameras, self).__getitem__(index)

    @classmethod
    def get_default_projection_matrix(cls, **args) -> torch.Tensor:
        """Class method. Calculate the projective transformation matrix by
        default parameters.

        .. code-block:: python

            fx = focal_length[:,0]
            fy = focal_length[:,1]
            px = principal_point[:,0]
            py = principal_point[:,1]

            K = [[fx,   0,    0,  px],
                 [0,   fy,    0,  py],
                 [0,    0,    1,   0],
                 [0,    0,    0,   1],]

        Args:
            **kwargs: parameters for the projection can be passed in as keyword
                arguments to override the default values.

        Return:
            a `torch.Tensor` which represents a batch of projection matrices K
            of shape (N, 4, 4)
        """
        batch_size = args.get('batch_size', 1)
        device = args.get('device', 'cpu')
        focal_length = args.get('focal_length')
        principal_point = args.get('principal_point')

        return cameras._get_sfm_calibration_matrix(
            N=batch_size,
            device=device,
            focal_length=focal_length,
            principal_point=principal_point,
            orthographic=True)


@CAMERAS.register_module(name=('FoVOrthographicCameras', 'FoVOrthographic',
                               'fovorthographic'))
class FoVOrthographicCameras(cameras.FoVOrthographicCameras, MMCamerasBase):
    """Inherited from Pytorch3D `FoVOrthographicCameras`."""
    def __init__(
            self,
            znear: Union[torch.Tensor, int, float] = 1.0,
            zfar: Union[torch.Tensor, int, float] = 100.0,
            max_y: Union[torch.Tensor, int, float] = 1.0,
            min_y: Union[torch.Tensor, int, float] = -1.0,
            max_x: Union[torch.Tensor, int, float] = 1.0,
            min_x: Union[torch.Tensor, int, float] = -1.0,
            scale_xyz: Union[Iterable[float],
                             Iterable[int]] = ((1.0, 1.0, 1.0), ),  # (1, 3)
            R: Optional[torch.Tensor] = None,
            T: Optional[torch.Tensor] = None,
            K: Optional[torch.Tensor] = None,
            device: Union[torch.device, str] = 'cpu',
            convention: str = 'pytorch3d',
            **kwargs):
        """reimplemented __init__, add `convention`.

        Args:
            znear (Union[torch.Tensor, int, float], optional):
                Defaults to 1.0.
            zfar (Union[torch.Tensor, int, float], optional):
                Defaults to 100.0.
            max_y (Union[torch.Tensor, int, float], optional):
                Defaults to 1.0.
            min_y (Union[torch.Tensor, int, float], optional):
                Defaults to -1.0.
            max_x (Union[torch.Tensor, int, float], optional):
                Defaults to 1.0.
            min_x (Union[torch.Tensor, int, float], optional):
                Defaults to -1.0.
            scale_xyz (Union[Iterable[float], Iterable[int]], optional):
                Defaults to ((1.0, 1.0, 1.0), ).
            T (Optional[torch.Tensor], optional):  Defaults to None.
            K (Optional[torch.Tensor], optional):  Defaults to None.
            device (Union[torch.device, str], optional):  Defaults to 'cpu'.
            convention (str, optional):  Defaults to 'pytorch3d'.
        """
        kwargs.update(_is_perspective=False, _in_ndc=True)
        kwargs.pop('in_ndc', None)
        kwargs.pop('is_perspective', None)
        super(cameras.FoVOrthographicCameras,
              self).__init__(device=device,
                             znear=znear,
                             zfar=zfar,
                             max_y=max_y,
                             min_y=min_y,
                             max_x=max_x,
                             min_x=min_x,
                             scale_xyz=scale_xyz,
                             R=R,
                             T=T,
                             K=K,
                             convention=convention,
                             **kwargs)

    def __getitem__(self, index: Union[slice, int, torch.Tensor, List, Tuple]):
        """Slice the cameras by batch dim.

        Args:
            index (Union[slice, int, torch.Tensor, List, Tuple]):
            index for slicing.

        Returns:
            MMCamerasBase: sliced cameras.
        """
        return super(cameras.FoVOrthographicCameras, self).__getitem__(index)

    @classmethod
    def get_default_projection_matrix(cls, **args) -> torch.Tensor:
        """Class method. Calculate the projective transformation matrix by
        default parameters.

        .. code-block:: python

            scale_x = 2 / (max_x - min_x)
            scale_y = 2 / (max_y - min_y)
            scale_z = 2 / (far-near)
            mid_x = (max_x + min_x) / (max_x - min_x)
            mix_y = (max_y + min_y) / (max_y - min_y)
            mid_z = (far + near) / (far - near)

            K = [[scale_x,        0,         0,  -mid_x],
                 [0,        scale_y,         0,  -mix_y],
                 [0,              0,  -scale_z,  -mid_z],
                 [0,              0,         0,       1],]

        Args:
            **kwargs: parameters for the projection can be passed in as keyword
                arguments to override the default values.

        Return:
            a `torch.Tensor` which represents a batch of projection matrices K
            of shape (N, 4, 4)
        """
        znear = args.get('znear', 1.0)
        zfar = args.get('zfar', 100.0)
        max_y = args.get('max_y', 1.0)
        min_y = args.get('min_y', -1.0)
        max_x = args.get('max_x', 1.0)
        min_x = args.get('min_x', -1.0)
        scale_xyz = args.get(
            'scale_xyz',
            ((1.0, 1.0, 1.0), ),
        )
        batch_size = args.get('batch_size', 1)

        K = torch.zeros((1, 4, 4), dtype=torch.float32)
        ones = torch.ones((1), dtype=torch.float32)
        z_sign = +1.0

        if not isinstance(scale_xyz, torch.Tensor):
            scale_xyz = torch.Tensor(scale_xyz)
        K[:, 0, 0] = (2.0 / (max_x - min_x)) * scale_xyz[:, 0]
        K[:, 1, 1] = (2.0 / (max_y - min_y)) * scale_xyz[:, 1]
        K[:, 0, 3] = -(max_x + min_x) / (max_x - min_x)
        K[:, 1, 3] = -(max_y + min_y) / (max_y - min_y)
        K[:, 3, 3] = ones

        # NOTE: This maps the z coordinate to the range [0, 1] and replaces the
        # the OpenGL z normalization to [-1, 1]
        K[:, 2, 2] = z_sign * (1.0 / (zfar - znear)) * scale_xyz[:, 2]
        K[:, 2, 3] = -znear / (zfar - znear)
        K = K.repeat(batch_size, 1, 1)
        return K

    def to_ndc_(self, **kwargs):
        """Not implemented."""
        raise NotImplementedError()

    def to_screen_(self, **kwargs):
        """Not implemented."""
        raise NotImplementedError()

    def to_ndc(self, **kwargs):
        """Not implemented."""
        raise NotImplementedError()

    def to_screen(self, **kwargs):
        """Not implemented."""
        raise NotImplementedError()

    def get_ndc_camera_transform(self, **kwargs) -> Transform3d:
        kwargs.pop('cameras', None)
        return super().get_ndc_camera_transform(**kwargs)

    def transform_points_screen(self,
                                points,
                                eps: Optional[float] = None,
                                **kwargs) -> torch.Tensor:
        kwargs.pop('cameras', None)
        return super().transform_points_screen(points, eps, **kwargs)


def concat_cameras(cameras_list: List[MMCamerasBase]) -> MMCamerasBase:
    """Concat a list of cameras of the same type.

    Args:
        cameras_list (List[cameras.CamerasBase]): a list of cameras.

    Returns:
        MMCamerasBase: the returned cameras concated following the batch
            dim.
    """
    K = []
    R = []
    T = []
    is_perspective = cameras_list[0].is_perspective()
    in_ndc = cameras_list[0].in_ndc()
    cam_cls = type(cameras_list[0])
    image_size = cameras_list[0].get_image_size()
    device = cameras_list[0].device
    for cam in cameras_list:
        assert type(cam) is cam_cls
        assert cam.in_ndc() is in_ndc
        assert cam.is_perspective() is is_perspective
        assert cam.device is device
        K.append(cam.K)
        R.append(cam.R)
        T.append(cam.T)
    K = torch.cat(K)
    R = torch.cat(R)
    T = torch.cat(T)
    concated_cameras = cam_cls(K=K,
                               R=R,
                               T=T,
                               device=device,
                               is_perspective=is_perspective,
                               in_ndc=in_ndc,
                               image_size=image_size)
    return concated_cameras


def compute_orbit_cameras(
    K: Union[torch.Tensor, np.ndarray, None] = None,
    elev: float = 0,
    azim: float = 0,
    dist: float = 2.7,
    at: Union[torch.Tensor, List, Tuple] = (0, 0, 0),
    batch_size: int = 1,
    orbit_speed: Union[float, Tuple[float, float]] = 0,
    dist_speed: Optional[float] = 0,
    convention: str = 'pytorch3d',
) -> Union[torch.Tensor, torch.Tensor, torch.Tensor]:
    """Generate a sequence of moving cameras following an orbit.

    Args:
        K (Union[torch.Tensor, np.ndarray, None], optional):
            Intrinsic matrix. Will generate a default K if None.
            Defaults to None.
        elev (float, optional):  This is the angle between the
            vector from the object to the camera, and the horizontal
            plane y = 0 (xz-plane).
            Defaults to 0.
        azim (float, optional): angle in degrees or radians. The vector
            from the object to the camera is projected onto a horizontal
            plane y = 0. azim is the angle between the projected vector and a
            reference vector at (0, 0, 1) on the reference plane (the
            horizontal plane).
            Defaults to 0.
        dist (float, optional): distance of the camera from the object.
            Defaults to 2.7.
        at (Union[torch.Tensor, List, Tuple], optional):
            the position of the object(s) in world coordinates.
            Defaults to (0, 0, 0).
        batch_size (int, optional): number of frames. Defaults to 1.
        orbit_speed (Union[float, Tuple[float, float]], optional):
            degree speed of camera moving along the orbit.
            Could be one or two number. One number for only elev speed,
            two number for both.
            Defaults to 0.
        dist_speed (Optional[float], optional):
            speed of camera moving along the center line.
            Defaults to 0.
        convention (str, optional): Camera convention. Defaults to 'pytorch3d'.

    Returns:
        Union[torch.Tensor, torch.Tensor, torch.Tensor]: computed K, R, T.
    """
    if not isinstance(orbit_speed, Iterable):
        orbit_speed = (orbit_speed, 0.0)
    if not isinstance(at, torch.Tensor):
        at = torch.Tensor(at)
    at = at.view(1, 3)
    if batch_size > 1 and orbit_speed[0] != 0:
        azim = torch.linspace(azim, azim + batch_size * orbit_speed[0],
                              batch_size)
    if batch_size > 1 and orbit_speed[1] != 0:
        elev = torch.linspace(elev, elev + batch_size * orbit_speed[1],
                              batch_size)
    if batch_size > 1 and dist_speed != 0:
        dist = torch.linspace(dist, dist + batch_size * dist_speed, batch_size)

    if convention == 'opencv':
        rotation_compensate = ee_to_rotmat(
            torch.Tensor([math.pi, 0, 0]).view(1, 3))
        at = rotation_compensate.permute(0, 2, 1) @ at.view(-1, 3, 1)
        at = at.view(1, 3)
    R, T = cameras.look_at_view_transform(dist=dist,
                                          elev=elev,
                                          azim=azim,
                                          at=at)
    if K is None:
        K = FoVPerspectiveCameras.get_default_projection_matrix(
            batch_size=batch_size)
    if convention == 'opencv':
        rotation_compensate = ee_to_rotmat(
            torch.Tensor([math.pi, 0, 0]).view(1, 3))
        R = rotation_compensate.permute(0, 2, 1) @ R
    return K, R, T


def compute_direction_cameras(
    K: Union[torch.Tensor, np.ndarray, None] = None,
    at: Union[torch.Tensor, List, Tuple, None] = None,
    eye: Union[torch.Tensor, List, Tuple, None] = None,
    plane: Union[Iterable[torch.Tensor], None] = None,
    dist: float = 1.0,
    batch_size: int = 1,
    dist_speed: float = 0.0,
    z_vec: Union[torch.Tensor, List, Tuple, None] = None,
    y_vec: Union[torch.Tensor, List, Tuple] = (0, 1, 0),
    convention: str = 'pytorch3d',
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
    """Generate a sequence of moving cameras along a direction.
    We need a `z_vec`, `y_vec` to generate `x_vec` so as to get the `R` matrix.
    And we need `eye` as `T` matrix.
    `K` matrix could be set or use default.
    We recommend `y_vec` as default (0, 1, 0), and it will be orthogonal
        decomposed. The `x_vec` will be generated by cross production from
        `y_vec` and `x_vec`.
    You can set `z_vec` by: 1. set `at`, `dist`, `dist_speed`, `plane`,
                            `batch_size` to get `eye`, then get `z_vec`.
                            2. set `at`, `eye` directly and get `z_vec`.
                            3. set `z_vec` directly and:
                                1). set `eye` and `dist`.
                                2). set `at`, `dist`, `dist_speed`,
                                `batch_size` then get `eye`.
        When we have `eye`, `z_vec`, `y_vec`, we will have `R` and `T`.

    Args:
        K (Union[torch.Tensor, np.ndarray, None], optional):
            Intrinsic matrix. Will generate a default K if None.
            Defaults to None.
        at (Union[torch.Tensor, List, Tuple], optional):
            the position of the object(s) in world coordinates.
            Required.
            Defaults to None.
        eye (Union[torch.Tensor, List, Tuple], optional):
            the position of the camera(s) in world coordinates.
            If eye is not None, it will override the camera position derived
            from plane, dist, dist_speed.
            Defaults to None.
        plane (Optional[Iterable[torch.Tensor, List, Tuple]], optional):
            The plane of your z direction normal.
            Should be a tuple or list containing two vectors of shape (N, 3).
            Defaults to None.
        dist (float, optional): distance to at.
            Defaults to 1.0.
        dist_speed (float, optional): distance moving speed.
            Defaults to 1.0.
        batch_size (int, optional): number of frames.
            Defaults to 1.
        z_vec (Union[torch.Tensor, List, Tuple], optional):
            z direction of shape (-1, 3). If z_vec is not None, it will
            override plane, dist, dist_speed.
            Defaults to None.
        y_vec (Union[torch.Tensor, List, Tuple], optional):
            Will only be used when z_vec is used.
            Defaults to (0, 1, 0).
        convention (str, optional): Camera convention.
            Defaults to 'pytorch3d'.

    Returns:
        Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: computed K, R, T.
    """
    def norm_vec(vec):
        return vec / torch.sqrt((vec * vec).sum())

    if z_vec is None:
        assert at is not None
        at = torch.Tensor(at).view(-1, 3)
        if eye is None:
            assert plane is not None
            dist = torch.linspace(dist, dist + batch_size * dist_speed,
                                  batch_size)
            vec1 = torch.Tensor(plane[0]).view(-1, 3)
            norm_vec1 = norm_vec(vec1)
            vec2 = torch.Tensor(plane[1]).view(-1, 3)
            norm_vec2 = norm_vec(vec2)
            norm = torch.cross(norm_vec1, norm_vec2)
            normed_norm = norm_vec(norm)
            eye = at + normed_norm * dist
        else:
            eye = torch.Tensor(eye).view(-1, 3)
            norm = eye - at
            normed_norm = norm_vec(norm)

        z_vec = -normed_norm
    else:
        z_vec = torch.Tensor(z_vec).view(-1, 3)
        z_vec = norm_vec(z_vec)
        if eye is None:
            assert at is not None
            at = torch.Tensor(at).view(-1, 3)
            dist = torch.linspace(dist, dist + batch_size * dist_speed,
                                  batch_size)
            eye = -z_vec * dist + at
        eye = torch.Tensor(eye).view(-1, 3)
        assert eye is not None
        z_vec = norm_vec(z_vec)
        normed_norm = -z_vec

    z_vec = z_vec.view(-1, 3)
    y_vec = torch.Tensor(y_vec).view(-1, 3)

    y_vec = y_vec - torch.bmm(y_vec.view(-1, 1, 3), z_vec.view(-1, 3, 1)).view(
        -1, 1) * z_vec
    y_vec = norm_vec(y_vec)
    x_vec = torch.cross(y_vec, z_vec)
    R = torch.cat(
        [x_vec.view(-1, 3, 1),
         y_vec.view(-1, 3, 1),
         z_vec.view(-1, 3, 1)], 1).view(-1, 3, 3)
    T = eye

    R = R.permute(0, 2, 1)
    _, T = convert_world_view(R=R, T=T)

    if K is None:
        K = FoVPerspectiveCameras.get_default_projection_matrix(
            batch_size=batch_size)
    K, R, T = convert_camera_matrix(K=K,
                                    R=R,
                                    T=T,
                                    is_perspective=True,
                                    convention_src='pytorch3d',
                                    convention_dst=convention)
    return K, R, T