Spaces:

JOY-Huang
/

InstantIR

Running on Zero

App Files Files Community

InstantIR / diffusers /pipelines /shap_e /camera.py

JOY-Huang

add local diffusers

62c110b 18 days ago

raw

history blame

4.94 kB

	# Copyright 2024 Open AI and The HuggingFace Team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from dataclasses import dataclass
	from typing import Tuple

	import numpy as np
	import torch


	@dataclass
	class DifferentiableProjectiveCamera:
	"""
	Implements a batch, differentiable, standard pinhole camera
	"""

	origin: torch.Tensor # [batch_size x 3]
	x: torch.Tensor # [batch_size x 3]
	y: torch.Tensor # [batch_size x 3]
	z: torch.Tensor # [batch_size x 3]
	width: int
	height: int
	x_fov: float
	y_fov: float
	shape: Tuple[int]

	def __post_init__(self):
	assert self.x.shape[0] == self.y.shape[0] == self.z.shape[0] == self.origin.shape[0]
	assert self.x.shape[1] == self.y.shape[1] == self.z.shape[1] == self.origin.shape[1] == 3
	assert len(self.x.shape) == len(self.y.shape) == len(self.z.shape) == len(self.origin.shape) == 2

	def resolution(self):
	return torch.from_numpy(np.array([self.width, self.height], dtype=np.float32))

	def fov(self):
	return torch.from_numpy(np.array([self.x_fov, self.y_fov], dtype=np.float32))

	def get_image_coords(self) -> torch.Tensor:
	"""
	:return: coords of shape (width * height, 2)
	"""
	pixel_indices = torch.arange(self.height * self.width)
	coords = torch.stack(
	[
	pixel_indices % self.width,
	torch.div(pixel_indices, self.width, rounding_mode="trunc"),
	],
	axis=1,
	)
	return coords

	@property
	def camera_rays(self):
	batch_size, *inner_shape = self.shape
	inner_batch_size = int(np.prod(inner_shape))

	coords = self.get_image_coords()
	coords = torch.broadcast_to(coords.unsqueeze(0), [batch_size * inner_batch_size, *coords.shape])
	rays = self.get_camera_rays(coords)

	rays = rays.view(batch_size, inner_batch_size * self.height * self.width, 2, 3)

	return rays

	def get_camera_rays(self, coords: torch.Tensor) -> torch.Tensor:
	batch_size, *shape, n_coords = coords.shape
	assert n_coords == 2
	assert batch_size == self.origin.shape[0]

	flat = coords.view(batch_size, -1, 2)

	res = self.resolution()
	fov = self.fov()

	fracs = (flat.float() / (res - 1)) * 2 - 1
	fracs = fracs * torch.tan(fov / 2)

	fracs = fracs.view(batch_size, -1, 2)
	directions = (
	self.z.view(batch_size, 1, 3)
	+ self.x.view(batch_size, 1, 3) * fracs[:, :, :1]
	+ self.y.view(batch_size, 1, 3) * fracs[:, :, 1:]
	)
	directions = directions / directions.norm(dim=-1, keepdim=True)
	rays = torch.stack(
	[
	torch.broadcast_to(self.origin.view(batch_size, 1, 3), [batch_size, directions.shape[1], 3]),
	directions,
	],
	dim=2,
	)
	return rays.view(batch_size, *shape, 2, 3)

	def resize_image(self, width: int, height: int) -> "DifferentiableProjectiveCamera":
	"""
	Creates a new camera for the resized view assuming the aspect ratio does not change.
	"""
	assert width * self.height == height * self.width, "The aspect ratio should not change."
	return DifferentiableProjectiveCamera(
	origin=self.origin,
	x=self.x,
	y=self.y,
	z=self.z,
	width=width,
	height=height,
	x_fov=self.x_fov,
	y_fov=self.y_fov,
	)


	def create_pan_cameras(size: int) -> DifferentiableProjectiveCamera:
	origins = []
	xs = []
	ys = []
	zs = []
	for theta in np.linspace(0, 2 * np.pi, num=20):
	z = np.array([np.sin(theta), np.cos(theta), -0.5])
	z /= np.sqrt(np.sum(z**2))
	origin = -z * 4
	x = np.array([np.cos(theta), -np.sin(theta), 0.0])
	y = np.cross(z, x)
	origins.append(origin)
	xs.append(x)
	ys.append(y)
	zs.append(z)
	return DifferentiableProjectiveCamera(
	origin=torch.from_numpy(np.stack(origins, axis=0)).float(),
	x=torch.from_numpy(np.stack(xs, axis=0)).float(),
	y=torch.from_numpy(np.stack(ys, axis=0)).float(),
	z=torch.from_numpy(np.stack(zs, axis=0)).float(),
	width=size,
	height=size,
	x_fov=0.7,
	y_fov=0.7,
	shape=(1, len(xs)),
	)