Charles-Elena
/

InstantID

Diffusers

ONNX

Safetensors

Inference Endpoints

Model card Files Files and versions Community

yamildiego commited on Mar 14, 2024

Commit

0feaae9

1 Parent(s): 75c898e

call zero to 100

Browse files

Files changed (1) hide show

handler.py +148 -150

handler.py CHANGED Viewed

@@ -165,160 +165,158 @@ class EndpointHandler():
         self.app.prepare(ctx_id=0, det_size=(640, 640))
     def __call__(self, param):
-        print("Param: ", param)
-        return None
-        # self.pipe.scheduler = diffusers.LCMScheduler.from_config(self.pipe.scheduler.config)
-        # self.pipe.enable_lora()
-        # adapter_strength_ratio = 0.8
-        # identitynet_strength_ratio = 0.8
-        # pose_strength = 0.4
-        # canny_strength = 0.3
-        # depth_strength = 0.5
-        # controlnet_selection = ["pose", "canny", "depth"]
-        # face_image_path = "./kaifu_resize.png"
-        # pose_image_path = "./pose.jpg"
-        # def convert_from_cv2_to_image(img: np.ndarray) -> Image:
-        #     return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
-        # def convert_from_image_to_cv2(img: Image) -> np.ndarray:
-        #     return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
-        # # check if the input is valid
-        # # if face_image_path is None:
-        # #     raise gr.Error(
-        # #         f"Cannot find any input face image! Please upload the face image"
-        # #     )
-        # #  check the prompt
-        # # if prompt is None:
-        # prompt = "a person"
-        # negative_prompt=""
-        # # apply the style template
-        # # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
-        # face_image = load_image(face_image_path)
-        # face_image = resize_img(face_image, max_side=1024)
-        # face_image_cv2 = convert_from_image_to_cv2(face_image)
-        # height, width, _ = face_image_cv2.shape
-        # # Extract face features
-        # face_info = self.app.get(face_image_cv2)
-        # print("error si no hay face")
-        # # if len(face_info) == 0:
-        # #     raise gr.Error(
-        # #         f"Unable to detect a face in the image. Please upload a different photo with a clear face."
-        # #     )
-        # face_info = sorted(
-        #     face_info,
-        #     key=lambda x: (x["bbox"][2] - x["bbox"][0]) * x["bbox"][3] - x["bbox"][1],
-        # )[
-        #     -1
-        # ]  # only use the maximum face
-        # def resize_img(
-        #     input_image,
-        #     max_side=1280,
-        #     min_side=1024,
-        #     size=None,
-        #     pad_to_max_side=False,
-        #     mode=PIL.Image.BILINEAR,
-        #     base_pixel_number=64,
-        # ):
-        #     w, h = input_image.size
-        #     if size is not None:
-        #         w_resize_new, h_resize_new = size
-        #     else:
-        #         ratio = min_side / min(h, w)
-        #         w, h = round(ratio * w), round(ratio * h)
-        #         ratio = max_side / max(h, w)
-        #         input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
-        #         w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
-        #         h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
-        #     input_image = input_image.resize([w_resize_new, h_resize_new], mode)
-        #     if pad_to_max_side:
-        #         res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
-        #         offset_x = (max_side - w_resize_new) // 2
-        #         offset_y = (max_side - h_resize_new) // 2
-        #         res[
-        #             offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new
-        #         ] = np.array(input_image)
-        #         input_image = Image.fromarray(res)
-        #     return input_image
-        # face_emb = face_info["embedding"]
-        # face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info["kps"])
-        # img_controlnet = face_image
-        # if pose_image_path is not None:
-        #     pose_image = load_image(pose_image_path)
-        #     pose_image = resize_img(pose_image, max_side=1024)
-        #     img_controlnet = pose_image
-        #     pose_image_cv2 = convert_from_image_to_cv2(pose_image)
-        #     face_info = self.app.get(pose_image_cv2)
-        #     # get error if no face is detected
-        #     # if len(face_info) == 0:
-        #     #     raise gr.Error(
-        #     #         f"Cannot find any face in the reference image! Please upload another person image"
-        #     #     )
-        #     face_info = face_info[-1]
-        #     face_kps = draw_kps(pose_image, face_info["kps"])
-        #     width, height = face_kps.size
-        # control_mask = np.zeros([height, width, 3])
-        # x1, y1, x2, y2 = face_info["bbox"]
-        # x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
-        # control_mask[y1:y2, x1:x2] = 255
-        # control_mask = Image.fromarray(control_mask.astype(np.uint8))
-        # if len(controlnet_selection) > 0:
-        #     controlnet_scales = {
-        #         "pose": pose_strength,
-        #         "canny": canny_strength,
-        #         "depth": depth_strength,
-        #     }
-        #     self.pipe.controlnet = MultiControlNetModel(
-        #         [self.controlnet_identitynet]
-        #         + [self.controlnet_map[s] for s in controlnet_selection]
-        #     )
-        #     control_scales = [float(identitynet_strength_ratio)] + [
-        #         controlnet_scales[s] for s in controlnet_selection
-        #     ]
-        #     control_images = [face_kps] + [
-        #         self.controlnet_map_fn[s](img_controlnet).resize((width, height))
-        #         for s in controlnet_selection
-        #     ]
-        # else:
-        #     self.pipe.controlnet = self.controlnet_identitynet
-        #     control_scales = float(identitynet_strength_ratio)
-        #     control_images = face_kps
-        # generator = torch.Generator(device=device.type).manual_seed(3)
-        # print("Start inference...")
-        # self.pipe.set_ip_adapter_scale(adapter_strength_ratio)
-        # images = self.pipe(
-        #     prompt=prompt,
-        #     negative_prompt=negative_prompt,
-        #     image_embeds=face_emb,
-        #     image=control_images,
-        #     control_mask=control_mask,
-        #     controlnet_conditioning_scale=control_scales,
-        #     num_inference_steps=30,
-        #     guidance_scale=7.5,
-        #     height=height,
-        #     width=width,
-        #     generator=generator,
-        # ).images
-        # return images[0]

         self.app.prepare(ctx_id=0, det_size=(640, 640))
     def __call__(self, param):
+        self.pipe.scheduler = diffusers.LCMScheduler.from_config(self.pipe.scheduler.config)
+        self.pipe.enable_lora()
+        adapter_strength_ratio = 0.8
+        identitynet_strength_ratio = 0.8
+        pose_strength = 0.4
+        canny_strength = 0.3
+        depth_strength = 0.5
+        controlnet_selection = ["pose", "canny", "depth"]
+        face_image_path = "./kaifu_resize.png"
+        pose_image_path = "./pose.jpg"
+        def convert_from_cv2_to_image(img: np.ndarray) -> Image:
+            return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+        def convert_from_image_to_cv2(img: Image) -> np.ndarray:
+            return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+        # check if the input is valid
+        # if face_image_path is None:
+        #     raise gr.Error(
+        #         f"Cannot find any input face image! Please upload the face image"
+        #     )
+        #  check the prompt
+        # if prompt is None:
+        prompt = "a person"
+        negative_prompt=""
+        # apply the style template
+        # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
+        face_image = load_image(face_image_path)
+        face_image = resize_img(face_image, max_side=1024)
+        face_image_cv2 = convert_from_image_to_cv2(face_image)
+        height, width, _ = face_image_cv2.shape
+        # Extract face features
+        face_info = self.app.get(face_image_cv2)
+        print("error si no hay face")
+        # if len(face_info) == 0:
+        #     raise gr.Error(
+        #         f"Unable to detect a face in the image. Please upload a different photo with a clear face."
+        #     )
+        face_info = sorted(
+            face_info,
+            key=lambda x: (x["bbox"][2] - x["bbox"][0]) * x["bbox"][3] - x["bbox"][1],
+        )[
+            -1
+        ]  # only use the maximum face
+        def resize_img(
+            input_image,
+            max_side=1280,
+            min_side=1024,
+            size=None,
+            pad_to_max_side=False,
+            mode=PIL.Image.BILINEAR,
+            base_pixel_number=64,
+        ):
+            w, h = input_image.size
+            if size is not None:
+                w_resize_new, h_resize_new = size
+            else:
+                ratio = min_side / min(h, w)
+                w, h = round(ratio * w), round(ratio * h)
+                ratio = max_side / max(h, w)
+                input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
+                w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
+                h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
+            input_image = input_image.resize([w_resize_new, h_resize_new], mode)
+            if pad_to_max_side:
+                res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
+                offset_x = (max_side - w_resize_new) // 2
+                offset_y = (max_side - h_resize_new) // 2
+                res[
+                    offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new
+                ] = np.array(input_image)
+                input_image = Image.fromarray(res)
+            return input_image
+        face_emb = face_info["embedding"]
+        face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info["kps"])
+        img_controlnet = face_image
+        if pose_image_path is not None:
+            pose_image = load_image(pose_image_path)
+            pose_image = resize_img(pose_image, max_side=1024)
+            img_controlnet = pose_image
+            pose_image_cv2 = convert_from_image_to_cv2(pose_image)
+            face_info = self.app.get(pose_image_cv2)
+            # get error if no face is detected
+            # if len(face_info) == 0:
+            #     raise gr.Error(
+            #         f"Cannot find any face in the reference image! Please upload another person image"
+            #     )
+            face_info = face_info[-1]
+            face_kps = draw_kps(pose_image, face_info["kps"])
+            width, height = face_kps.size
+        control_mask = np.zeros([height, width, 3])
+        x1, y1, x2, y2 = face_info["bbox"]
+        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
+        control_mask[y1:y2, x1:x2] = 255
+        control_mask = Image.fromarray(control_mask.astype(np.uint8))
+        if len(controlnet_selection) > 0:
+            controlnet_scales = {
+                "pose": pose_strength,
+                "canny": canny_strength,
+                "depth": depth_strength,
+            }
+            self.pipe.controlnet = MultiControlNetModel(
+                [self.controlnet_identitynet]
+                + [self.controlnet_map[s] for s in controlnet_selection]
+            )
+            control_scales = [float(identitynet_strength_ratio)] + [
+                controlnet_scales[s] for s in controlnet_selection
+            ]
+            control_images = [face_kps] + [
+                self.controlnet_map_fn[s](img_controlnet).resize((width, height))
+                for s in controlnet_selection
+            ]
+        else:
+            self.pipe.controlnet = self.controlnet_identitynet
+            control_scales = float(identitynet_strength_ratio)
+            control_images = face_kps
+        generator = torch.Generator(device=device.type).manual_seed(3)
+        print("Start inference...")
+        self.pipe.set_ip_adapter_scale(adapter_strength_ratio)
+        images = self.pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            image_embeds=face_emb,
+            image=control_images,
+            control_mask=control_mask,
+            controlnet_conditioning_scale=control_scales,
+            num_inference_steps=30,
+            guidance_scale=7.5,
+            height=height,
+            width=width,
+            generator=generator,
+        ).images
+        return images[0]