cavargas10 commited on
Commit
1ccf97b
verified
1 Parent(s): 44fcbfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -38
app.py CHANGED
@@ -19,17 +19,16 @@ from safetensors.torch import load_file
19
  from torchvision.transforms import v2
20
  from tqdm import tqdm
21
 
22
- from src.utils.camera_util import (FOV_to_intrinsics, get_circular_camera_poses, get_zero123plus_input_cameras)
 
23
  from src.utils.infer_util import (remove_background, resize_foreground)
24
- from src.utils.mesh_util import save_glb, save_obj, save_texmap
25
  from src.utils.train_util import instantiate_from_config
26
 
27
- # Inicializa un tensor en CUDA y verifica el dispositivo.
28
  zero = torch.Tensor([0]).cuda()
29
- print(zero.device) # Verifica que el dispositivo sea CUDA.
30
- print(zero.device) # Verifica nuevamente que el dispositivo sea CUDA.
31
 
32
- # Funci贸n para encontrar el path de CUDA.
33
  def find_cuda():
34
  cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
35
  if cuda_home and os.path.exists(cuda_home):
@@ -42,7 +41,6 @@ def find_cuda():
42
 
43
  return None
44
 
45
- # Funci贸n para obtener las c谩maras de renderizado.
46
  def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
47
  c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
48
  if is_flexicubes:
@@ -50,17 +48,18 @@ def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexi
50
  cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
51
  else:
52
  extrinsics = c2ws.flatten(-2)
53
- intrinsics = FOV_to_intrinsics(50.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
 
54
  cameras = torch.cat([extrinsics, intrinsics], dim=-1)
55
  cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
56
  return cameras
57
 
58
- # Verifica si la imagen de entrada es nula.
59
  def check_input_image(input_image):
60
  if input_image is None:
61
  raise gr.Error("No image selected!")
62
 
63
- # Preprocesa la imagen de entrada y opcionalmente elimina el fondo.
64
  def preprocess(input_image, do_remove_background):
65
  rembg_session = rembg.new_session() if do_remove_background else None
66
 
@@ -70,28 +69,31 @@ def preprocess(input_image, do_remove_background):
70
 
71
  return input_image
72
 
73
- # Genera vistas m煤ltiples de la imagen de entrada.
74
  @spaces.GPU(duration=20)
75
  def generate_mvs(input_image, sample_steps, sample_seed):
76
  seed_everything(sample_seed)
77
 
78
- print(zero.device) # Verifica que el dispositivo sea CUDA.
79
 
80
- z123_image = pipeline(input_image, num_inference_steps=sample_steps).images[0]
 
81
 
82
  show_image = np.asarray(z123_image, dtype=np.uint8)
83
  show_image = torch.from_numpy(show_image)
84
- show_image = rearrange(show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
85
- show_image = rearrange(show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
 
 
86
  show_image = Image.fromarray(show_image.numpy())
87
 
88
  return z123_image, show_image
89
 
90
- # Convierte im谩genes en modelos 3D y genera texturas.
91
  @spaces.GPU
92
  def make3d(images):
93
 
94
- print(zero.device) # Verifica que el dispositivo sea CUDA.
95
 
96
  global model
97
  if IS_FLEXICUBES:
@@ -102,13 +104,16 @@ def make3d(images):
102
  images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float()
103
  images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2)
104
 
105
- input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0).to(device)
106
- render_cameras = get_render_cameras(batch_size=1, radius=2.5, is_flexicubes=IS_FLEXICUBES).to(device)
 
 
107
 
108
  images = images.unsqueeze(0).to(device)
109
- images = v2.functional.resize(images, (320, 320), interpolation=3, antialias=True).clamp(0, 1)
 
110
 
111
- mesh_fpath = tempfile.NamedTemporaryFile(suffix=".obj", delete=False).name
112
  print(mesh_fpath)
113
  mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
114
  mesh_dirname = os.path.dirname(mesh_fpath)
@@ -117,24 +122,38 @@ def make3d(images):
117
 
118
  with torch.no_grad():
119
  planes = model.forward_planes(images, input_cameras)
120
- mesh_out = model.extract_mesh(planes, use_texture_map=True, **infer_config)
121
 
122
- vertices, faces, vertex_colors, texture_map = mesh_out
123
- vertices = vertices[:, [1, 2, 0]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- save_glb(vertices, faces, vertex_colors, texture_map, mesh_glb_fpath)
126
- save_obj(vertices, faces, vertex_colors, texture_map, mesh_fpath)
127
- save_texmap(texture_map, texmap_fpath)
128
 
129
- print(f"Mesh and texture saved to {mesh_fpath} and {texmap_fpath}")
130
-
131
- return mesh_fpath, mesh_glb_fpath, texmap_fpath
132
-
133
- # Procesa la imagen generada a partir de un prompt de texto.
134
  @spaces.GPU
135
  def process_image(num_images, prompt):
136
 
137
- print(zero.device) # Verifica que el dispositivo sea CUDA.
138
 
139
  global pipe
140
  with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
@@ -148,7 +167,8 @@ def process_image(num_images, prompt):
148
  timesteps=[800]
149
  ).images
150
 
151
- # Configuraci贸n inicial del entorno CUDA y carga de configuraci贸n del modelo.
 
152
  cuda_path = find_cuda()
153
  config_path = 'configs/instant-mesh-large.yaml'
154
  config = OmegaConf.load(config_path)
@@ -159,7 +179,7 @@ infer_config = config.infer_config
159
  IS_FLEXICUBES = config_name.startswith('instant-mesh')
160
  device = torch.device('cuda')
161
 
162
- # Carga del modelo de difusi贸n.
163
  print('Loading diffusion model ...')
164
  pipeline = DiffusionPipeline.from_pretrained(
165
  "sudo-ai/zero123plus-v1.2",
@@ -177,20 +197,21 @@ pipeline.unet.load_state_dict(state_dict, strict=True)
177
 
178
  pipeline = pipeline.to(device)
179
 
180
- # Carga del modelo de reconstrucci贸n.
181
  print('Loading reconstruction model ...')
182
  model_ckpt_path = hf_hub_download(
183
  repo_id="TencentARC/InstantMesh", filename="instant_mesh_large.ckpt", repo_type="model")
184
  model = instantiate_from_config(model_config)
185
  state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
186
- state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k}
 
187
  model.load_state_dict(state_dict, strict=True)
188
 
189
  model = model.to(device)
190
 
191
  print('Carga Completa!')
192
 
193
- # Interfaz de usuario usando Gradio.
194
  with gr.Blocks() as demo:
195
  with gr.Row(variant="panel"):
196
  with gr.Column():
 
19
  from torchvision.transforms import v2
20
  from tqdm import tqdm
21
 
22
+ from src.utils.camera_util import (FOV_to_intrinsics, get_circular_camera_poses,
23
+ get_zero123plus_input_cameras)
24
  from src.utils.infer_util import (remove_background, resize_foreground)
25
+ from src.utils.mesh_util import save_glb, save_obj, save_obj_with_mtl
26
  from src.utils.train_util import instantiate_from_config
27
 
 
28
  zero = torch.Tensor([0]).cuda()
29
+ print(zero.device) # <-- 'cpu' 馃
30
+ print(zero.device) # <-- 'cuda:0' 馃
31
 
 
32
  def find_cuda():
33
  cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
34
  if cuda_home and os.path.exists(cuda_home):
 
41
 
42
  return None
43
 
 
44
  def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
45
  c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
46
  if is_flexicubes:
 
48
  cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
49
  else:
50
  extrinsics = c2ws.flatten(-2)
51
+ intrinsics = FOV_to_intrinsics(50.0).unsqueeze(
52
+ 0).repeat(M, 1, 1).float().flatten(-2)
53
  cameras = torch.cat([extrinsics, intrinsics], dim=-1)
54
  cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
55
  return cameras
56
 
57
+
58
  def check_input_image(input_image):
59
  if input_image is None:
60
  raise gr.Error("No image selected!")
61
 
62
+
63
  def preprocess(input_image, do_remove_background):
64
  rembg_session = rembg.new_session() if do_remove_background else None
65
 
 
69
 
70
  return input_image
71
 
72
+
73
  @spaces.GPU(duration=20)
74
  def generate_mvs(input_image, sample_steps, sample_seed):
75
  seed_everything(sample_seed)
76
 
77
+ print(zero.device) # <-- 'cuda:0' 馃
78
 
79
+ z123_image = pipeline(
80
+ input_image, num_inference_steps=sample_steps).images[0]
81
 
82
  show_image = np.asarray(z123_image, dtype=np.uint8)
83
  show_image = torch.from_numpy(show_image)
84
+ show_image = rearrange(
85
+ show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
86
+ show_image = rearrange(
87
+ show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
88
  show_image = Image.fromarray(show_image.numpy())
89
 
90
  return z123_image, show_image
91
 
92
+
93
  @spaces.GPU
94
  def make3d(images):
95
 
96
+ print(zero.device) # <-- 'cuda:0' 馃
97
 
98
  global model
99
  if IS_FLEXICUBES:
 
104
  images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float()
105
  images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2)
106
 
107
+ input_cameras = get_zero123plus_input_cameras(
108
+ batch_size=1, radius=4.0).to(device)
109
+ render_cameras = get_render_cameras(
110
+ batch_size=1, radius=2.5, is_flexicubes=IS_FLEXICUBES).to(device)
111
 
112
  images = images.unsqueeze(0).to(device)
113
+ images = v2.functional.resize(
114
+ images, (320, 320), interpolation=3, antialias=True).clamp(0, 1)
115
 
116
+ mesh_fpath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
117
  print(mesh_fpath)
118
  mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
119
  mesh_dirname = os.path.dirname(mesh_fpath)
 
122
 
123
  with torch.no_grad():
124
  planes = model.forward_planes(images, input_cameras)
 
125
 
126
+ # Extraer el mesh y la textura
127
+ mesh_out = model.extract_mesh(
128
+ planes,
129
+ use_texture_map=True, # Habilitar la generaci贸n de texturas
130
+ **infer_config,
131
+ )
132
+
133
+ # Guardar el mesh con o sin textura
134
+ if len(mesh_out) == 5: # Si se genera una textura
135
+ vertices, faces, uvs, mesh_tex_idx, tex_map = mesh_out
136
+ save_obj_with_mtl(
137
+ vertices.data.cpu().numpy(),
138
+ uvs.data.cpu().numpy(),
139
+ faces.data.cpu().numpy(),
140
+ mesh_tex_idx.data.cpu().numpy(),
141
+ tex_map.permute(1, 2, 0).data.cpu().numpy(),
142
+ mesh_fpath,
143
+ )
144
+ else:
145
+ vertices, faces, vertex_colors = mesh_out
146
+ save_obj(vertices, faces, vertex_colors, mesh_fpath)
147
+
148
+ print(f"Mesh saved to {mesh_fpath}")
149
+
150
+ return mesh_fpath, mesh_glb_fpath, texmap_fpath if len(mesh_out) == 5 else None
151
 
 
 
 
152
 
 
 
 
 
 
153
  @spaces.GPU
154
  def process_image(num_images, prompt):
155
 
156
+ print(zero.device) # <-- 'cuda:0' 馃
157
 
158
  global pipe
159
  with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
 
167
  timesteps=[800]
168
  ).images
169
 
170
+
171
+ # Configuration
172
  cuda_path = find_cuda()
173
  config_path = 'configs/instant-mesh-large.yaml'
174
  config = OmegaConf.load(config_path)
 
179
  IS_FLEXICUBES = config_name.startswith('instant-mesh')
180
  device = torch.device('cuda')
181
 
182
+ # Load diffusion model
183
  print('Loading diffusion model ...')
184
  pipeline = DiffusionPipeline.from_pretrained(
185
  "sudo-ai/zero123plus-v1.2",
 
197
 
198
  pipeline = pipeline.to(device)
199
 
200
+ # Load reconstruction model
201
  print('Loading reconstruction model ...')
202
  model_ckpt_path = hf_hub_download(
203
  repo_id="TencentARC/InstantMesh", filename="instant_mesh_large.ckpt", repo_type="model")
204
  model = instantiate_from_config(model_config)
205
  state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
206
+ state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith(
207
+ 'lrm_generator.') and 'source_camera' not in k}
208
  model.load_state_dict(state_dict, strict=True)
209
 
210
  model = model.to(device)
211
 
212
  print('Carga Completa!')
213
 
214
+ # Gradio UI
215
  with gr.Blocks() as demo:
216
  with gr.Row(variant="panel"):
217
  with gr.Column():