cavargas10 commited on
Commit
a6809ac
·
verified ·
1 Parent(s): afad0d4

Actualización

Browse files
Files changed (1) hide show
  1. app.py +29 -37
app.py CHANGED
@@ -19,16 +19,17 @@ from safetensors.torch import load_file
19
  from torchvision.transforms import v2
20
  from tqdm import tqdm
21
 
22
- from src.utils.camera_util import (FOV_to_intrinsics, get_circular_camera_poses,
23
- get_zero123plus_input_cameras)
24
  from src.utils.infer_util import (remove_background, resize_foreground)
25
  from src.utils.mesh_util import save_glb, save_obj
26
  from src.utils.train_util import instantiate_from_config
27
 
 
28
  zero = torch.Tensor([0]).cuda()
29
- print(zero.device) # <-- 'cpu' 🤔
30
- print(zero.device) # <-- 'cuda:0' 🤗
31
 
 
32
  def find_cuda():
33
  cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
34
  if cuda_home and os.path.exists(cuda_home):
@@ -41,6 +42,7 @@ def find_cuda():
41
 
42
  return None
43
 
 
44
  def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
45
  c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
46
  if is_flexicubes:
@@ -48,18 +50,17 @@ def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexi
48
  cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
49
  else:
50
  extrinsics = c2ws.flatten(-2)
51
- intrinsics = FOV_to_intrinsics(50.0).unsqueeze(
52
- 0).repeat(M, 1, 1).float().flatten(-2)
53
  cameras = torch.cat([extrinsics, intrinsics], dim=-1)
54
  cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
55
  return cameras
56
 
57
-
58
  def check_input_image(input_image):
59
  if input_image is None:
60
  raise gr.Error("No image selected!")
61
 
62
-
63
  def preprocess(input_image, do_remove_background):
64
  rembg_session = rembg.new_session() if do_remove_background else None
65
 
@@ -69,31 +70,28 @@ def preprocess(input_image, do_remove_background):
69
 
70
  return input_image
71
 
72
-
73
  @spaces.GPU(duration=20)
74
  def generate_mvs(input_image, sample_steps, sample_seed):
75
  seed_everything(sample_seed)
76
 
77
- print(zero.device) # <-- 'cuda:0' 🤗
78
 
79
- z123_image = pipeline(
80
- input_image, num_inference_steps=sample_steps).images[0]
81
 
82
  show_image = np.asarray(z123_image, dtype=np.uint8)
83
  show_image = torch.from_numpy(show_image)
84
- show_image = rearrange(
85
- show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
86
- show_image = rearrange(
87
- show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
88
  show_image = Image.fromarray(show_image.numpy())
89
 
90
  return z123_image, show_image
91
 
92
-
93
  @spaces.GPU
94
  def make3d(images):
95
 
96
- print(zero.device) # <-- 'cuda:0' 🤗
97
 
98
  global model
99
  if IS_FLEXICUBES:
@@ -104,16 +102,13 @@ def make3d(images):
104
  images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float()
105
  images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2)
106
 
107
- input_cameras = get_zero123plus_input_cameras(
108
- batch_size=1, radius=4.0).to(device)
109
- render_cameras = get_render_cameras(
110
- batch_size=1, radius=2.5, is_flexicubes=IS_FLEXICUBES).to(device)
111
 
112
  images = images.unsqueeze(0).to(device)
113
- images = v2.functional.resize(
114
- images, (320, 320), interpolation=3, antialias=True).clamp(0, 1)
115
 
116
- mesh_fpath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
117
  print(mesh_fpath)
118
  mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
119
  mesh_dirname = os.path.dirname(mesh_fpath)
@@ -121,8 +116,7 @@ def make3d(images):
121
 
122
  with torch.no_grad():
123
  planes = model.forward_planes(images, input_cameras)
124
- mesh_out = model.extract_mesh(
125
- planes, use_texture_map=False, **infer_config)
126
 
127
  vertices, faces, vertex_colors = mesh_out
128
  vertices = vertices[:, [1, 2, 0]]
@@ -134,11 +128,11 @@ def make3d(images):
134
 
135
  return mesh_fpath, mesh_glb_fpath
136
 
137
-
138
  @spaces.GPU
139
  def process_image(num_images, prompt):
140
 
141
- print(zero.device) # <-- 'cuda:0' 🤗
142
 
143
  global pipe
144
  with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
@@ -152,8 +146,7 @@ def process_image(num_images, prompt):
152
  timesteps=[800]
153
  ).images
154
 
155
-
156
- # Configuration
157
  cuda_path = find_cuda()
158
  config_path = 'configs/instant-mesh-large.yaml'
159
  config = OmegaConf.load(config_path)
@@ -164,7 +157,7 @@ infer_config = config.infer_config
164
  IS_FLEXICUBES = config_name.startswith('instant-mesh')
165
  device = torch.device('cuda')
166
 
167
- # Load diffusion model
168
  print('Loading diffusion model ...')
169
  pipeline = DiffusionPipeline.from_pretrained(
170
  "sudo-ai/zero123plus-v1.2",
@@ -182,21 +175,20 @@ pipeline.unet.load_state_dict(state_dict, strict=True)
182
 
183
  pipeline = pipeline.to(device)
184
 
185
- # Load reconstruction model
186
  print('Loading reconstruction model ...')
187
  model_ckpt_path = hf_hub_download(
188
  repo_id="TencentARC/InstantMesh", filename="instant_mesh_large.ckpt", repo_type="model")
189
  model = instantiate_from_config(model_config)
190
  state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
191
- state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith(
192
- 'lrm_generator.') and 'source_camera' not in k}
193
  model.load_state_dict(state_dict, strict=True)
194
 
195
  model = model.to(device)
196
 
197
  print('Carga Completa!')
198
 
199
- # Gradio UI
200
  with gr.Blocks() as demo:
201
  with gr.Row(variant="panel"):
202
  with gr.Column():
@@ -265,4 +257,4 @@ with gr.Blocks() as demo:
265
  outputs=[output_model_obj, output_model_glb]
266
  )
267
 
268
- demo.launch()
 
19
  from torchvision.transforms import v2
20
  from tqdm import tqdm
21
 
22
+ from src.utils.camera_util import (FOV_to_intrinsics, get_circular_camera_poses, get_zero123plus_input_cameras)
 
23
  from src.utils.infer_util import (remove_background, resize_foreground)
24
  from src.utils.mesh_util import save_glb, save_obj
25
  from src.utils.train_util import instantiate_from_config
26
 
27
+ # Inicializa un tensor en CUDA y verifica el dispositivo.
28
  zero = torch.Tensor([0]).cuda()
29
+ print(zero.device) # Verifica que el dispositivo sea CUDA.
30
+ print(zero.device) # Verifica nuevamente que el dispositivo sea CUDA.
31
 
32
+ # Función para encontrar el path de CUDA.
33
  def find_cuda():
34
  cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
35
  if cuda_home and os.path.exists(cuda_home):
 
42
 
43
  return None
44
 
45
+ # Función para obtener las cámaras de renderizado.
46
  def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
47
  c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
48
  if is_flexicubes:
 
50
  cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
51
  else:
52
  extrinsics = c2ws.flatten(-2)
53
+ intrinsics = FOV_to_intrinsics(50.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
 
54
  cameras = torch.cat([extrinsics, intrinsics], dim=-1)
55
  cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
56
  return cameras
57
 
58
+ # Verifica si la imagen de entrada es nula.
59
  def check_input_image(input_image):
60
  if input_image is None:
61
  raise gr.Error("No image selected!")
62
 
63
+ # Preprocesa la imagen de entrada y opcionalmente elimina el fondo.
64
  def preprocess(input_image, do_remove_background):
65
  rembg_session = rembg.new_session() if do_remove_background else None
66
 
 
70
 
71
  return input_image
72
 
73
+ # Genera vistas múltiples de la imagen de entrada.
74
  @spaces.GPU(duration=20)
75
  def generate_mvs(input_image, sample_steps, sample_seed):
76
  seed_everything(sample_seed)
77
 
78
+ print(zero.device) # Verifica que el dispositivo sea CUDA.
79
 
80
+ z123_image = pipeline(input_image, num_inference_steps=sample_steps).images[0]
 
81
 
82
  show_image = np.asarray(z123_image, dtype=np.uint8)
83
  show_image = torch.from_numpy(show_image)
84
+ show_image = rearrange(show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
85
+ show_image = rearrange(show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
 
 
86
  show_image = Image.fromarray(show_image.numpy())
87
 
88
  return z123_image, show_image
89
 
90
+ # Convierte imágenes en modelos 3D.
91
  @spaces.GPU
92
  def make3d(images):
93
 
94
+ print(zero.device) # Verifica que el dispositivo sea CUDA.
95
 
96
  global model
97
  if IS_FLEXICUBES:
 
102
  images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float()
103
  images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2)
104
 
105
+ input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0).to(device)
106
+ render_cameras = get_render_cameras(batch_size=1, radius=2.5, is_flexicubes=IS_FLEXICUBES).to(device)
 
 
107
 
108
  images = images.unsqueeze(0).to(device)
109
+ images = v2.functional.resize(images, (320, 320), interpolation=3, antialias=True).clamp(0, 1)
 
110
 
111
+ mesh_fpath = tempfile.NamedTemporaryFile(suffix=".obj", delete=False).name
112
  print(mesh_fpath)
113
  mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
114
  mesh_dirname = os.path.dirname(mesh_fpath)
 
116
 
117
  with torch.no_grad():
118
  planes = model.forward_planes(images, input_cameras)
119
+ mesh_out = model.extract_mesh(planes, use_texture_map=False, **infer_config)
 
120
 
121
  vertices, faces, vertex_colors = mesh_out
122
  vertices = vertices[:, [1, 2, 0]]
 
128
 
129
  return mesh_fpath, mesh_glb_fpath
130
 
131
+ # Procesa la imagen generada a partir de un prompt de texto.
132
  @spaces.GPU
133
  def process_image(num_images, prompt):
134
 
135
+ print(zero.device) # Verifica que el dispositivo sea CUDA.
136
 
137
  global pipe
138
  with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
 
146
  timesteps=[800]
147
  ).images
148
 
149
+ # Configuración inicial del entorno CUDA y carga de configuración del modelo.
 
150
  cuda_path = find_cuda()
151
  config_path = 'configs/instant-mesh-large.yaml'
152
  config = OmegaConf.load(config_path)
 
157
  IS_FLEXICUBES = config_name.startswith('instant-mesh')
158
  device = torch.device('cuda')
159
 
160
+ # Carga del modelo de difusión.
161
  print('Loading diffusion model ...')
162
  pipeline = DiffusionPipeline.from_pretrained(
163
  "sudo-ai/zero123plus-v1.2",
 
175
 
176
  pipeline = pipeline.to(device)
177
 
178
+ # Carga del modelo de reconstrucción.
179
  print('Loading reconstruction model ...')
180
  model_ckpt_path = hf_hub_download(
181
  repo_id="TencentARC/InstantMesh", filename="instant_mesh_large.ckpt", repo_type="model")
182
  model = instantiate_from_config(model_config)
183
  state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
184
+ state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k}
 
185
  model.load_state_dict(state_dict, strict=True)
186
 
187
  model = model.to(device)
188
 
189
  print('Carga Completa!')
190
 
191
+ # Interfaz de usuario usando Gradio.
192
  with gr.Blocks() as demo:
193
  with gr.Row(variant="panel"):
194
  with gr.Column():
 
257
  outputs=[output_model_obj, output_model_glb]
258
  )
259
 
260
+ demo.launch()