controlnet-3d-pose

Runtime error

App Files Files Community

mishig HF staff commited on Apr 13, 2023

Commit

3806189

•

1 Parent(s): ecd427f

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -63

app.py CHANGED Viewed

@@ -1,16 +1,15 @@
-from controlnet_aux import OpenposeDetector
 from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
 from diffusers import UniPCMultistepScheduler
 import gradio as gr
 import torch
 import base64
 from io import BytesIO
-from PIL import Image
-# live conditioning
-canvas_html = "<pose-canvas id='canvas-root' style='display:flex;max-width: 500px;margin: 0 auto;'></pose-canvas>"
 load_js = """
 async () => {
-  const url = "https://huggingface.co/datasets/radames/gradio-components/raw/main/pose-gradio.js"
   fetch(url)
     .then(res => res.text())
     .then(text => {
@@ -21,22 +20,18 @@ async () => {
     });
 }
 """
 get_js_image = """
-async (image_in_img, prompt, image_file_live_opt, live_conditioning) => {
-  const canvasEl = document.getElementById("canvas-root");
-  const data = canvasEl? canvasEl._data : null;
-  return [image_in_img, prompt, image_file_live_opt, data]
 }
 """
-# Constants
-low_threshold = 100
-high_threshold = 200
 # Models
-pose_model = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
 controlnet = ControlNetModel.from_pretrained(
-    "lllyasviel/sd-controlnet-openpose", torch_dtype=torch.float16
 )
 pipe = StableDiffusionControlNetPipeline.from_pretrained(
     "runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16
@@ -54,62 +49,73 @@ pipe.enable_xformers_memory_efficient_attention()
 generator = torch.manual_seed(0)
-def get_pose(image):
-    return pose_model(image)
-def generate_images(image, prompt, image_file_live_opt='file', live_conditioning=None):
-    if image is None and 'image' not in live_conditioning:
-        raise gr.Error("Please provide an image")
     try:
-        if image_file_live_opt == 'file':
-            pose = get_pose(image)
-        elif image_file_live_opt == 'webcam':
-            base64_img = live_conditioning['image']
-            image_data = base64.b64decode(base64_img.split(',')[1])
-            pose = Image.open(BytesIO(image_data)).convert(
-                'RGB').resize((512, 512))
         output = pipe(
             prompt,
-            pose,
             generator=generator,
-            num_images_per_prompt=3,
             num_inference_steps=20,
         )
         all_outputs = []
-        all_outputs.append(pose)
         for image in output.images:
             all_outputs.append(image)
         return all_outputs
     except Exception as e:
         raise gr.Error(str(e))
-def toggle(choice):
-    if choice == "file":
-        return gr.update(visible=True, value=None), gr.update(visible=False, value=None)
-    elif choice == "webcam":
-        return gr.update(visible=False, value=None), gr.update(visible=True, value=canvas_html)
 with gr.Blocks() as blocks:
-    gr.Markdown("""
-    ## Generate controlled outputs with ControlNet and Stable Diffusion
-    This Space uses pose estimated lines as the additional conditioning
-    [Check out our blog to see how this was done (and train your own controlnet)](https://huggingface.co/blog/train-your-controlnet)
-    """)
     with gr.Row():
-        live_conditioning = gr.JSON(value={}, visible=False)
         with gr.Column():
-            image_file_live_opt = gr.Radio(["file", "webcam"], value="file",
-                                           label="How would you like to upload your image?")
-            image_in_img = gr.Image(source="upload", visible=True, type="pil")
-            canvas = gr.HTML(None, elem_id="canvas_html", visible=False)
-            image_file_live_opt.change(fn=toggle,
-                                       inputs=[image_file_live_opt],
-                                       outputs=[image_in_img, canvas],
-                                       queue=False)
             prompt = gr.Textbox(
                 label="Enter your prompt",
                 max_lines=1,
@@ -118,20 +124,20 @@ with gr.Blocks() as blocks:
             run_button = gr.Button("Generate")
         with gr.Column():
             gallery = gr.Gallery().style(grid=[2], height="auto")
     run_button.click(fn=generate_images,
-                     inputs=[image_in_img, prompt,
-                             image_file_live_opt, live_conditioning],
                      outputs=[gallery],
                      _js=get_js_image)
     blocks.load(None, None, None, _js=load_js)
-    gr.Examples(fn=generate_images,
-                examples=[
-                    ["./yoga1.jpeg",
-                        "best quality, extremely detailed"]
-                ],
-                inputs=[image_in_img, prompt],
-                outputs=[gallery],
-                cache_examples=True)
 blocks.launch(debug=True)

 from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
 from diffusers import UniPCMultistepScheduler
 import gradio as gr
 import torch
 import base64
 from io import BytesIO
+from PIL import Image, ImageFilter
+canvas_html = '<pose-maker/>'
 load_js = """
 async () => {
+  const url = "https://huggingface.co/datasets/mishig/gradio-components/raw/main/mannequinAll.js"
   fetch(url)
     .then(res => res.text())
     .then(text => {
     });
 }
 """
 get_js_image = """
+async (canvas, prompt) => {
+  const poseMakerEl = document.querySelector("pose-maker");
+  const imgBase64 = poseMakerEl.captureScreenshot();
+  return [imgBase64, prompt]
 }
 """
 # Models
 controlnet = ControlNetModel.from_pretrained(
+    "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16
 )
 pipe = StableDiffusionControlNetPipeline.from_pretrained(
     "runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16
 generator = torch.manual_seed(0)
+def generate_images(canvas, prompt):
     try:
+        base64_img = canvas
+        image_data = base64.b64decode(base64_img.split(',')[1])
+        input_img = Image.open(BytesIO(image_data)).convert(
+            'RGB').resize((512, 512))
+        input_img = input_img.filter(ImageFilter.GaussianBlur(radius=5))
         output = pipe(
             prompt,
+            input_img,
             generator=generator,
+            num_images_per_prompt=2,
             num_inference_steps=20,
         )
         all_outputs = []
         for image in output.images:
             all_outputs.append(image)
         return all_outputs
     except Exception as e:
         raise gr.Error(str(e))
+def placeholder_fn(axis):
+    pass
+js_change_rotation_axis = """
+async (axis) => {
+  const poseMakerEl = document.querySelector("pose-maker");
+  poseMakerEl.changeRotationAxis(axis);
+}
+"""
+js_pose_template = """
+async (pose) => {
+  const poseMakerEl = document.querySelector("pose-maker");
+  poseMakerEl.setPose(pose);
+}
+"""
 with gr.Blocks() as blocks:
+    gr.HTML(
+        """
+            <div style="text-align: center; margin: 0 auto;">
+              <div
+                style="
+                  display: inline-flex;
+                  align-items: center;
+                  gap: 0.8rem;
+                  font-size: 1.75rem;
+                "
+              >
+                <h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">
+                  Pose in 3D & Render with ControlNet (SD-1.5)
+                </h1>
+              </div>
+              <p style="margin-bottom: 10px; font-size: 94%; line-height: 23px;">
+                Using <a href="https://github.com/lllyasviel/ControlNet">ControlNet</a> and <a href="https://boytchev.github.io/mannequin.js/">three.js/mannequin.js</a>
+              </p>
+              <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>
+            </div>
+        """
+    )
     with gr.Row():
         with gr.Column():
+            canvas = gr.HTML(canvas_html, elem_id="canvas_html", visible=True)
+            with gr.Row():
+                rotation_axis = gr.Radio(["x", "y", "z"], value="x", label="Joint rotation axis")
+                pose_template = gr.Radio(["regular", "ballet", "handstand", "split", "kick", "chilling"], value="regular", label="Pose template")
             prompt = gr.Textbox(
                 label="Enter your prompt",
                 max_lines=1,
             run_button = gr.Button("Generate")
         with gr.Column():
             gallery = gr.Gallery().style(grid=[2], height="auto")
+    rotation_axis.change(fn=placeholder_fn,
+                            inputs=[rotation_axis],
+                            outputs=[],
+                            queue=False,
+                            _js=js_change_rotation_axis)
+    pose_template.change(fn=placeholder_fn,
+                            inputs=[pose_template],
+                            outputs=[],
+                            queue=False,
+                            _js=js_pose_template)
     run_button.click(fn=generate_images,
+                     inputs=[canvas, prompt],
                      outputs=[gallery],
                      _js=get_js_image)
     blocks.load(None, None, None, _js=load_js)
 blocks.launch(debug=True)