loading model from local and remove depth control net

Browse files

Files changed (5) hide show

{checkpoints → .checkpoints/ControlNetModel}/config.json +0 -0
{checkpoints → .checkpoints/ControlNetModel}/diffusion_pytorch_model.safetensors +0 -0
{checkpoints → .checkpoints}/ip-adapter.bin +0 -0
checkpoints/.DS_Store +0 -0
handler.py +43 -55

{checkpoints → .checkpoints/ControlNetModel}/config.json RENAMED Viewed

File without changes

{checkpoints → .checkpoints/ControlNetModel}/diffusion_pytorch_model.safetensors RENAMED Viewed

File without changes

{checkpoints → .checkpoints}/ip-adapter.bin RENAMED Viewed

File without changes

checkpoints/.DS_Store DELETED Viewed

Binary file (6.15 kB)

handler.py CHANGED Viewed

@@ -1,16 +1,3 @@
-# from typing import List, Any
-# import torch
-# from diffusers import StableCascadePriorPipeline, StableCascadeDecoderPipeline
-# # Configurar el dispositivo para ejecutar el modelo
-# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# if device.type != 'cuda':
-#     raise ValueError("Se requiere ejecutar en GPU")
-# # Configurar el tipo de dato mixto basado en la capacidad de la GPU
-# dtype = torch.bfloat16 if torch.cuda.get_device_capability(device.index)[0] >= 8 else torch.float16
-# start_test
 import cv2
 import numpy as np
@@ -45,31 +32,31 @@ dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
 class EndpointHandler():
     def __init__(self, model_dir):
-        hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/config.json", local_dir="./checkpoints")
-        hf_hub_download(
-            repo_id="InstantX/InstantID",
-            filename="ControlNetModel/diffusion_pytorch_model.safetensors",
-            local_dir="./checkpoints",
-        )
-        hf_hub_download(repo_id="InstantX/InstantID", filename="ip-adapter.bin", local_dir="./checkpoints")
         print("Model dir: ", model_dir)
         face_adapter = f"./checkpoints/ip-adapter.bin"
         controlnet_path = f"./checkpoints/ControlNetModel"
-        transform = Compose([
-            Resize(
-                width=518,
-                height=518,
-                resize_target=False,
-                keep_aspect_ratio=True,
-                ensure_multiple_of=14,
-                resize_method='lower_bound',
-                image_interpolation_method=cv2.INTER_CUBIC,
-            ),
-            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-            PrepareForNet(),
-        ])
         self.controlnet_identitynet = ControlNetModel.from_pretrained(
             controlnet_path, torch_dtype=dtype
@@ -103,7 +90,7 @@ class EndpointHandler():
         # controlnet-pose/canny/depth
         controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
         controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
-        controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
         controlnet_pose = ControlNetModel.from_pretrained(
             controlnet_pose_model, torch_dtype=dtype
@@ -111,49 +98,49 @@ class EndpointHandler():
         controlnet_canny = ControlNetModel.from_pretrained(
             controlnet_canny_model, torch_dtype=dtype
         ).to(device)
-        controlnet_depth = ControlNetModel.from_pretrained(
-            controlnet_depth_model, torch_dtype=dtype
-        ).to(device)
         openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
-        depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
         def get_canny_image(image, t1=100, t2=200):
             image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
             edges = cv2.Canny(image, t1, t2)
             return Image.fromarray(edges, "L")
-        def get_depth_map(image):
-            image = np.array(image) / 255.0
-            h, w = image.shape[:2]
-            image = transform({'image': image})['image']
-            image = torch.from_numpy(image).unsqueeze(0).to("cuda")
-            with torch.no_grad():
-                depth = depth_anything(image)
-            depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
-            depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
-            depth = depth.cpu().numpy().astype(np.uint8)
-            depth_image = Image.fromarray(depth)
-            return depth_image
         self.controlnet_map = {
             "pose": controlnet_pose,
             "canny": controlnet_canny,
-            "depth": controlnet_depth,
         }
         self.controlnet_map_fn = {
             "pose": openpose,
             "canny": get_canny_image,
-            "depth": get_depth_map,
         }
         self.app = FaceAnalysis(name="buffalo_l", root="./", providers=["CPUExecutionProvider"])
@@ -167,8 +154,9 @@ class EndpointHandler():
         identitynet_strength_ratio = 0.8
         pose_strength = 0.4
         canny_strength = 0.3
-        depth_strength = 0.5
-        controlnet_selection = ["pose", "canny", "depth"]
         face_image_path = "https://i.ibb.co/SKg69dD/kaifu-resize.png"
         pose_image_path = "https://i.ibb.co/ZSrQ8ZJ/pose.jpg"
@@ -279,7 +267,7 @@ class EndpointHandler():
             controlnet_scales = {
                 "pose": pose_strength,
                 "canny": canny_strength,
-                "depth": depth_strength,
             }
             self.pipe.controlnet = MultiControlNetModel(
                 [self.controlnet_identitynet]

 import cv2
 import numpy as np
 class EndpointHandler():
     def __init__(self, model_dir):
+        # hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/config.json", local_dir="./checkpoints")
+        # hf_hub_download(
+        #     repo_id="InstantX/InstantID",
+        #     filename="ControlNetModel/diffusion_pytorch_model.safetensors",
+        #     local_dir="./checkpoints",
+        # )
+        # hf_hub_download(repo_id="InstantX/InstantID", filename="ip-adapter.bin", local_dir="./checkpoints")
         print("Model dir: ", model_dir)
         face_adapter = f"./checkpoints/ip-adapter.bin"
         controlnet_path = f"./checkpoints/ControlNetModel"
+        # transform = Compose([
+        #     Resize(
+        #         width=518,
+        #         height=518,
+        #         resize_target=False,
+        #         keep_aspect_ratio=True,
+        #         ensure_multiple_of=14,
+        #         resize_method='lower_bound',
+        #         image_interpolation_method=cv2.INTER_CUBIC,
+        #     ),
+        #     NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        #     PrepareForNet(),
+        # ])
         self.controlnet_identitynet = ControlNetModel.from_pretrained(
             controlnet_path, torch_dtype=dtype
         # controlnet-pose/canny/depth
         controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
         controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
+        # controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
         controlnet_pose = ControlNetModel.from_pretrained(
             controlnet_pose_model, torch_dtype=dtype
         controlnet_canny = ControlNetModel.from_pretrained(
             controlnet_canny_model, torch_dtype=dtype
         ).to(device)
+        # controlnet_depth = ControlNetModel.from_pretrained(
+        #     controlnet_depth_model, torch_dtype=dtype
+        # ).to(device)
         openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
+        # depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
         def get_canny_image(image, t1=100, t2=200):
             image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
             edges = cv2.Canny(image, t1, t2)
             return Image.fromarray(edges, "L")
+        # def get_depth_map(image):
+        #     image = np.array(image) / 255.0
+        #     h, w = image.shape[:2]
+        #     image = transform({'image': image})['image']
+        #     image = torch.from_numpy(image).unsqueeze(0).to("cuda")
+        #     with torch.no_grad():
+        #         depth = depth_anything(image)
+        #     depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
+        #     depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
+        #     depth = depth.cpu().numpy().astype(np.uint8)
+        #     depth_image = Image.fromarray(depth)
+        #     return depth_image
         self.controlnet_map = {
             "pose": controlnet_pose,
             "canny": controlnet_canny,
+            # "depth": controlnet_depth,
         }
         self.controlnet_map_fn = {
             "pose": openpose,
             "canny": get_canny_image,
+            # "depth": get_depth_map,
         }
         self.app = FaceAnalysis(name="buffalo_l", root="./", providers=["CPUExecutionProvider"])
         identitynet_strength_ratio = 0.8
         pose_strength = 0.4
         canny_strength = 0.3
+        # depth_strength = 0.5
+        controlnet_selection = ["pose", "canny"]
+        # controlnet_selection = ["pose", "canny", "depth"]
         face_image_path = "https://i.ibb.co/SKg69dD/kaifu-resize.png"
         pose_image_path = "https://i.ibb.co/ZSrQ8ZJ/pose.jpg"
             controlnet_scales = {
                 "pose": pose_strength,
                 "canny": canny_strength,
+                # "depth": depth_strength,
             }
             self.pipe.controlnet = MultiControlNetModel(
                 [self.controlnet_identitynet]