Pose3D / app.py
wufeim's picture
init push
deb99f0
raw
history blame
6.24 kB
import BboxTools as bbt
import gradio as gr
import numpy as np
from pytorch3d.renderer import RasterizationSettings, PerspectiveCameras, MeshRasterizer, MeshRenderer, HardPhongShader, BlendParams, camera_position_from_spherical_angles, look_at_rotation, PointLights
from pytorch3d.renderer import TexturesVertex as Textures
from pytorch3d.structures import Meshes
import torch
mesh_paths = {
"Aeroplane": "CAD_selected/aeroplane.off",
"Bicycle": "CAD_selected/bicycle.off",
"Boat": "CAD_selected/boat.off",
"Bottle": "CAD_selected/bottle.off",
"Bus": "CAD_selected/bus.off",
"Car": "CAD_selected/car.off",
"Chair": "CAD_selected/chair.off",
"Diningtable": "CAD_selected/diningtable.off",
"Motorbike": "CAD_selected/motorbike.off",
"Sofa": "CAD_selected/sofa.off",
"Train": "CAD_selected/train.off",
"Tvmonitor": "CAD_selected/tvmonitor.off",
}
def rotation_theta(theta, device_=None):
# cos -sin 0
# sin cos 0
# 0 0 1
if type(theta) == float:
if device_ is None:
device_ = 'cpu'
theta = torch.ones((1, 1, 1)).to(device_) * theta
else:
if device_ is None:
device_ = theta.device
theta = theta.view(-1, 1, 1)
mul_ = torch.Tensor([[1, 0, 0, 0, 1, 0, 0, 0, 0], [0, -1, 0, 1, 0, 0, 0, 0, 0]]).view(1, 2, 9).to(device_)
bia_ = torch.Tensor([0] * 8 + [1]).view(1, 1, 9).to(device_)
# [n, 1, 2]
cos_sin = torch.cat((torch.cos(theta), torch.sin(theta)), dim=2).to(device_)
# [n, 1, 2] @ [1, 2, 9] + [1, 1, 9] => [n, 1, 9] => [n, 3, 3]
trans = torch.matmul(cos_sin, mul_) + bia_
trans = trans.view(-1, 3, 3)
return trans
def campos_to_R_T(campos, theta, device='cpu', at=((0, 0, 0),), up=((0, 1, 0), )):
R = look_at_rotation(campos, at=at, device=device, up=up) # (n, 3, 3)
R = torch.bmm(R, rotation_theta(theta, device_=device))
T = -torch.bmm(R.transpose(1, 2), campos.unsqueeze(2))[:, :, 0] # (1, 3)
return R, T
def load_off(off_file_name, to_torch=False):
file_handle = open(off_file_name)
file_list = file_handle.readlines()
n_points = int(file_list[1].split(' ')[0])
all_strings = ''.join(file_list[2:2 + n_points])
array_ = np.fromstring(all_strings, dtype=np.float32, sep='\n')
all_strings = ''.join(file_list[2 + n_points:])
array_int = np.fromstring(all_strings, dtype=np.int32, sep='\n')
array_ = array_.reshape((-1, 3))
if not to_torch:
return array_, array_int.reshape((-1, 4))[:, 1::]
else:
return torch.from_numpy(array_), torch.from_numpy(array_int.reshape((-1, 4))[:, 1::])
def pre_process_mesh_pascal(verts):
verts = torch.cat((verts[:, 0:1], verts[:, 2:3], -verts[:, 1:2]), dim=1)
return verts
def render(azimuth, elevation, theta, dist, category, unit):
azimuth = float(azimuth)
elevation = float(elevation)
theta = float(theta)
dist = float(dist)
h, w = 256, 256
render_image_size = max(h, w)
crop_size = (256, 256)
device = 'cpu'
cameras = PerspectiveCameras(focal_length=12.0, device=device)
raster_settings = RasterizationSettings(
image_size=render_image_size,
blur_radius=0.0,
faces_per_pixel=1,
bin_size=0
)
raster_settings1 = RasterizationSettings(
image_size=render_image_size // 8,
blur_radius=0.0,
faces_per_pixel=1,
bin_size=0
)
rasterizer = MeshRasterizer(
cameras=cameras,
raster_settings=raster_settings1
)
lights = PointLights(device=device, location=((2.0, 2.0, -2.0),))
phong_renderer = MeshRenderer(
rasterizer=MeshRasterizer(
cameras=cameras,
raster_settings=raster_settings
),
shader=HardPhongShader(device=device, lights=lights, cameras=cameras)
)
x3d, xface = load_off(mesh_paths[category])
x3d = x3d * 1.0
verts = torch.from_numpy(x3d).to(device)
verts = pre_process_mesh_pascal(verts)
faces = torch.from_numpy(xface).to(device)
verts_rgb = torch.ones_like(verts)[None]
# verts_rgb = torch.ones_like(verts)[None] * torch.Tensor(color).view(1, 1, 3).to(verts.device)
textures = Textures(verts_rgb.to(device))
meshes = Meshes(verts=[verts], faces=[faces], textures=textures)
# meshes = Meshes(verts=[verts], faces=[faces])
C = camera_position_from_spherical_angles(dist, elevation, azimuth, degrees=(unit=='Degree'), device=device)
R, T = campos_to_R_T(C, theta, device=device)
image = phong_renderer(meshes_world=meshes.clone(), R=R, T=T)
image = image[:, ..., :3]
box_ = bbt.box_by_shape(crop_size, (render_image_size // 2,) * 2)
bbox = box_.bbox
image = image[:, bbox[0][0]:bbox[0][1], bbox[1][0]:bbox[1][1], :]
image = torch.squeeze(image).detach().cpu().numpy()
image = np.array((image / image.max()) * 255).astype(np.uint8)
cx, cy = (128, 128)
dx = int(-cx + w/2)
dy = int(-cy + h/2)
image_pad = np.pad(image, ((abs(dy), abs(dy)), (abs(dx), abs(dx)), (0, 0)), mode='edge')
image = image_pad[dy+abs(dy):dy+abs(dy)+image.shape[0], dx+abs(dx):dx+abs(dx)+image.shape[1]]
return image
with gr.Blocks() as demo:
gr.Markdown('# Visualize object pose')
gr.Markdown('This app runs on a free HuggingFace Space with no GPU support. Rendering an image generally takes a few seconds.')
with gr.Row():
with gr.Column(scale=1):
azimuth_box = gr.Textbox(label="Azimuth", value="45")
elevation_box = gr.Textbox(label="Elevation", value="15")
theta_box = gr.Textbox(label="Theta", value="0")
dist_box = gr.Textbox(label="Distance", value="4")
category_radio = gr.Radio(["Aeroplane", "Bicycle", "Boat", "Bottle", "Bus", "Car", "Chair", "Diningtable", "Motorbike", "Sofa", "Train", "Tvmonitor"], value="Aeroplane")
unit_radio = gr.Radio(["Degree", "Radian"], value="Degree")
render_btn = gr.Button("Render")
with gr.Column(scale=1):
output = gr.Image(shape=(256, 256))
render_btn.click(fn=render, inputs=[azimuth_box, elevation_box, theta_box, dist_box, category_radio, unit_radio], outputs=output)
demo.launch()