File size: 6,238 Bytes
deb99f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import BboxTools as bbt
import gradio as gr
import numpy as np
from pytorch3d.renderer import RasterizationSettings, PerspectiveCameras, MeshRasterizer, MeshRenderer, HardPhongShader, BlendParams, camera_position_from_spherical_angles, look_at_rotation, PointLights
from pytorch3d.renderer import TexturesVertex as Textures
from pytorch3d.structures import Meshes
import torch
mesh_paths = {
"Aeroplane": "CAD_selected/aeroplane.off",
"Bicycle": "CAD_selected/bicycle.off",
"Boat": "CAD_selected/boat.off",
"Bottle": "CAD_selected/bottle.off",
"Bus": "CAD_selected/bus.off",
"Car": "CAD_selected/car.off",
"Chair": "CAD_selected/chair.off",
"Diningtable": "CAD_selected/diningtable.off",
"Motorbike": "CAD_selected/motorbike.off",
"Sofa": "CAD_selected/sofa.off",
"Train": "CAD_selected/train.off",
"Tvmonitor": "CAD_selected/tvmonitor.off",
}
def rotation_theta(theta, device_=None):
# cos -sin 0
# sin cos 0
# 0 0 1
if type(theta) == float:
if device_ is None:
device_ = 'cpu'
theta = torch.ones((1, 1, 1)).to(device_) * theta
else:
if device_ is None:
device_ = theta.device
theta = theta.view(-1, 1, 1)
mul_ = torch.Tensor([[1, 0, 0, 0, 1, 0, 0, 0, 0], [0, -1, 0, 1, 0, 0, 0, 0, 0]]).view(1, 2, 9).to(device_)
bia_ = torch.Tensor([0] * 8 + [1]).view(1, 1, 9).to(device_)
# [n, 1, 2]
cos_sin = torch.cat((torch.cos(theta), torch.sin(theta)), dim=2).to(device_)
# [n, 1, 2] @ [1, 2, 9] + [1, 1, 9] => [n, 1, 9] => [n, 3, 3]
trans = torch.matmul(cos_sin, mul_) + bia_
trans = trans.view(-1, 3, 3)
return trans
def campos_to_R_T(campos, theta, device='cpu', at=((0, 0, 0),), up=((0, 1, 0), )):
R = look_at_rotation(campos, at=at, device=device, up=up) # (n, 3, 3)
R = torch.bmm(R, rotation_theta(theta, device_=device))
T = -torch.bmm(R.transpose(1, 2), campos.unsqueeze(2))[:, :, 0] # (1, 3)
return R, T
def load_off(off_file_name, to_torch=False):
file_handle = open(off_file_name)
file_list = file_handle.readlines()
n_points = int(file_list[1].split(' ')[0])
all_strings = ''.join(file_list[2:2 + n_points])
array_ = np.fromstring(all_strings, dtype=np.float32, sep='\n')
all_strings = ''.join(file_list[2 + n_points:])
array_int = np.fromstring(all_strings, dtype=np.int32, sep='\n')
array_ = array_.reshape((-1, 3))
if not to_torch:
return array_, array_int.reshape((-1, 4))[:, 1::]
else:
return torch.from_numpy(array_), torch.from_numpy(array_int.reshape((-1, 4))[:, 1::])
def pre_process_mesh_pascal(verts):
verts = torch.cat((verts[:, 0:1], verts[:, 2:3], -verts[:, 1:2]), dim=1)
return verts
def render(azimuth, elevation, theta, dist, category, unit):
azimuth = float(azimuth)
elevation = float(elevation)
theta = float(theta)
dist = float(dist)
h, w = 256, 256
render_image_size = max(h, w)
crop_size = (256, 256)
device = 'cpu'
cameras = PerspectiveCameras(focal_length=12.0, device=device)
raster_settings = RasterizationSettings(
image_size=render_image_size,
blur_radius=0.0,
faces_per_pixel=1,
bin_size=0
)
raster_settings1 = RasterizationSettings(
image_size=render_image_size // 8,
blur_radius=0.0,
faces_per_pixel=1,
bin_size=0
)
rasterizer = MeshRasterizer(
cameras=cameras,
raster_settings=raster_settings1
)
lights = PointLights(device=device, location=((2.0, 2.0, -2.0),))
phong_renderer = MeshRenderer(
rasterizer=MeshRasterizer(
cameras=cameras,
raster_settings=raster_settings
),
shader=HardPhongShader(device=device, lights=lights, cameras=cameras)
)
x3d, xface = load_off(mesh_paths[category])
x3d = x3d * 1.0
verts = torch.from_numpy(x3d).to(device)
verts = pre_process_mesh_pascal(verts)
faces = torch.from_numpy(xface).to(device)
verts_rgb = torch.ones_like(verts)[None]
# verts_rgb = torch.ones_like(verts)[None] * torch.Tensor(color).view(1, 1, 3).to(verts.device)
textures = Textures(verts_rgb.to(device))
meshes = Meshes(verts=[verts], faces=[faces], textures=textures)
# meshes = Meshes(verts=[verts], faces=[faces])
C = camera_position_from_spherical_angles(dist, elevation, azimuth, degrees=(unit=='Degree'), device=device)
R, T = campos_to_R_T(C, theta, device=device)
image = phong_renderer(meshes_world=meshes.clone(), R=R, T=T)
image = image[:, ..., :3]
box_ = bbt.box_by_shape(crop_size, (render_image_size // 2,) * 2)
bbox = box_.bbox
image = image[:, bbox[0][0]:bbox[0][1], bbox[1][0]:bbox[1][1], :]
image = torch.squeeze(image).detach().cpu().numpy()
image = np.array((image / image.max()) * 255).astype(np.uint8)
cx, cy = (128, 128)
dx = int(-cx + w/2)
dy = int(-cy + h/2)
image_pad = np.pad(image, ((abs(dy), abs(dy)), (abs(dx), abs(dx)), (0, 0)), mode='edge')
image = image_pad[dy+abs(dy):dy+abs(dy)+image.shape[0], dx+abs(dx):dx+abs(dx)+image.shape[1]]
return image
with gr.Blocks() as demo:
gr.Markdown('# Visualize object pose')
gr.Markdown('This app runs on a free HuggingFace Space with no GPU support. Rendering an image generally takes a few seconds.')
with gr.Row():
with gr.Column(scale=1):
azimuth_box = gr.Textbox(label="Azimuth", value="45")
elevation_box = gr.Textbox(label="Elevation", value="15")
theta_box = gr.Textbox(label="Theta", value="0")
dist_box = gr.Textbox(label="Distance", value="4")
category_radio = gr.Radio(["Aeroplane", "Bicycle", "Boat", "Bottle", "Bus", "Car", "Chair", "Diningtable", "Motorbike", "Sofa", "Train", "Tvmonitor"], value="Aeroplane")
unit_radio = gr.Radio(["Degree", "Radian"], value="Degree")
render_btn = gr.Button("Render")
with gr.Column(scale=1):
output = gr.Image(shape=(256, 256))
render_btn.click(fn=render, inputs=[azimuth_box, elevation_box, theta_box, dist_box, category_radio, unit_radio], outputs=output)
demo.launch()
|