import BboxTools as bbt import gradio as gr import numpy as np from pytorch3d.renderer import RasterizationSettings, PerspectiveCameras, MeshRasterizer, MeshRenderer, HardPhongShader, BlendParams, camera_position_from_spherical_angles, look_at_rotation, PointLights from pytorch3d.renderer import TexturesVertex as Textures from pytorch3d.structures import Meshes import torch mesh_paths = { "Aeroplane": "CAD_selected/aeroplane.off", "Bicycle": "CAD_selected/bicycle.off", "Boat": "CAD_selected/boat.off", "Bottle": "CAD_selected/bottle.off", "Bus": "CAD_selected/bus.off", "Car": "CAD_selected/car.off", "Chair": "CAD_selected/chair.off", "Diningtable": "CAD_selected/diningtable.off", "Motorbike": "CAD_selected/motorbike.off", "Sofa": "CAD_selected/sofa.off", "Train": "CAD_selected/train.off", "Tvmonitor": "CAD_selected/tvmonitor.off", } def rotation_theta(theta, device_=None): # cos -sin 0 # sin cos 0 # 0 0 1 if type(theta) == float: if device_ is None: device_ = 'cpu' theta = torch.ones((1, 1, 1)).to(device_) * theta else: if device_ is None: device_ = theta.device theta = theta.view(-1, 1, 1) mul_ = torch.Tensor([[1, 0, 0, 0, 1, 0, 0, 0, 0], [0, -1, 0, 1, 0, 0, 0, 0, 0]]).view(1, 2, 9).to(device_) bia_ = torch.Tensor([0] * 8 + [1]).view(1, 1, 9).to(device_) # [n, 1, 2] cos_sin = torch.cat((torch.cos(theta), torch.sin(theta)), dim=2).to(device_) # [n, 1, 2] @ [1, 2, 9] + [1, 1, 9] => [n, 1, 9] => [n, 3, 3] trans = torch.matmul(cos_sin, mul_) + bia_ trans = trans.view(-1, 3, 3) return trans def campos_to_R_T(campos, theta, device='cpu', at=((0, 0, 0),), up=((0, 1, 0), )): R = look_at_rotation(campos, at=at, device=device, up=up) # (n, 3, 3) R = torch.bmm(R, rotation_theta(theta, device_=device)) T = -torch.bmm(R.transpose(1, 2), campos.unsqueeze(2))[:, :, 0] # (1, 3) return R, T def load_off(off_file_name, to_torch=False): file_handle = open(off_file_name) file_list = file_handle.readlines() n_points = int(file_list[1].split(' ')[0]) all_strings = ''.join(file_list[2:2 + n_points]) array_ = np.fromstring(all_strings, dtype=np.float32, sep='\n') all_strings = ''.join(file_list[2 + n_points:]) array_int = np.fromstring(all_strings, dtype=np.int32, sep='\n') array_ = array_.reshape((-1, 3)) if not to_torch: return array_, array_int.reshape((-1, 4))[:, 1::] else: return torch.from_numpy(array_), torch.from_numpy(array_int.reshape((-1, 4))[:, 1::]) def pre_process_mesh_pascal(verts): verts = torch.cat((verts[:, 0:1], verts[:, 2:3], -verts[:, 1:2]), dim=1) return verts def render(azimuth, elevation, theta, dist, category, unit): azimuth = float(azimuth) elevation = float(elevation) theta = float(theta) dist = float(dist) h, w = 256, 256 render_image_size = max(h, w) crop_size = (256, 256) device = 'cpu' cameras = PerspectiveCameras(focal_length=12.0, device=device) raster_settings = RasterizationSettings( image_size=render_image_size, blur_radius=0.0, faces_per_pixel=1, bin_size=0 ) raster_settings1 = RasterizationSettings( image_size=render_image_size // 8, blur_radius=0.0, faces_per_pixel=1, bin_size=0 ) rasterizer = MeshRasterizer( cameras=cameras, raster_settings=raster_settings1 ) lights = PointLights(device=device, location=((2.0, 2.0, -2.0),)) phong_renderer = MeshRenderer( rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings ), shader=HardPhongShader(device=device, lights=lights, cameras=cameras) ) x3d, xface = load_off(mesh_paths[category]) x3d = x3d * 1.0 verts = torch.from_numpy(x3d).to(device) verts = pre_process_mesh_pascal(verts) faces = torch.from_numpy(xface).to(device) verts_rgb = torch.ones_like(verts)[None] # verts_rgb = torch.ones_like(verts)[None] * torch.Tensor(color).view(1, 1, 3).to(verts.device) textures = Textures(verts_rgb.to(device)) meshes = Meshes(verts=[verts], faces=[faces], textures=textures) # meshes = Meshes(verts=[verts], faces=[faces]) C = camera_position_from_spherical_angles(dist, elevation, azimuth, degrees=(unit=='Degree'), device=device) R, T = campos_to_R_T(C, theta, device=device) image = phong_renderer(meshes_world=meshes.clone(), R=R, T=T) image = image[:, ..., :3] box_ = bbt.box_by_shape(crop_size, (render_image_size // 2,) * 2) bbox = box_.bbox image = image[:, bbox[0][0]:bbox[0][1], bbox[1][0]:bbox[1][1], :] image = torch.squeeze(image).detach().cpu().numpy() image = np.array((image / image.max()) * 255).astype(np.uint8) cx, cy = (128, 128) dx = int(-cx + w/2) dy = int(-cy + h/2) image_pad = np.pad(image, ((abs(dy), abs(dy)), (abs(dx), abs(dx)), (0, 0)), mode='edge') image = image_pad[dy+abs(dy):dy+abs(dy)+image.shape[0], dx+abs(dx):dx+abs(dx)+image.shape[1]] return image with gr.Blocks() as demo: gr.Markdown('# Visualize object pose') gr.Markdown('This app runs on a free HuggingFace Space with no GPU support. Rendering an image generally takes a few seconds.') with gr.Row(): with gr.Column(scale=1): azimuth_box = gr.Textbox(label="Azimuth", value="45") elevation_box = gr.Textbox(label="Elevation", value="15") theta_box = gr.Textbox(label="Theta", value="0") dist_box = gr.Textbox(label="Distance", value="4") category_radio = gr.Radio(["Aeroplane", "Bicycle", "Boat", "Bottle", "Bus", "Car", "Chair", "Diningtable", "Motorbike", "Sofa", "Train", "Tvmonitor"], value="Aeroplane") unit_radio = gr.Radio(["Degree", "Radian"], value="Degree") render_btn = gr.Button("Render") with gr.Column(scale=1): output = gr.Image(shape=(256, 256)) render_btn.click(fn=render, inputs=[azimuth_box, elevation_box, theta_box, dist_box, category_radio, unit_radio], outputs=output) demo.launch()