Spaces:

owiedotch
/

ccsr-upscaler

Running on Zero

App Files Files Community

owiedotch commited on Sep 1

Commit

49b322e

•

1 Parent(s): 0582ce0

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -32

app.py CHANGED Viewed

@@ -61,7 +61,6 @@ ckpt = torch.load("weights/real-world_ccsr.ckpt", map_location="cpu")
 load_state_dict(model, ckpt, strict=True)
 model.freeze()
-# Check if CUDA is available, otherwise use CPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
@@ -85,27 +84,26 @@ def process(
     vae_encoder_tile_size: int,
     vae_decoder_tile_size: int
 ):
-    print(
-        f"control image shape={control_img.size}\n"
-        f"num_samples={num_samples}, sr_scale={sr_scale}, strength={strength}\n"
-        f"positive_prompt='{positive_prompt}', negative_prompt='{negative_prompt}'\n"
-        f"cdf scale={cfg_scale}, steps={steps}, use_color_fix={use_color_fix}\n"
-        f"seed={seed}\n"
-        f"tile_diffusion={tile_diffusion}, tile_diffusion_size={tile_diffusion_size}, tile_diffusion_stride={tile_diffusion_stride}"
-        f"tile_vae={tile_vae}, vae_encoder_tile_size={vae_encoder_tile_size}, vae_decoder_tile_size={vae_decoder_tile_size}"
-    )
     pl.seed_everything(seed)
-    # Resize lr
     if sr_scale != 1:
         control_img = control_img.resize(
             tuple(math.ceil(x * sr_scale) for x in control_img.size),
             Image.BICUBIC
         )
     input_size = control_img.size
-    # Resize the lr image
     if not tile_diffusion:
         control_img = auto_resize(control_img, 512)
     else:
@@ -129,39 +127,28 @@ def process(
         shape = (1, 4, height // 8, width // 8)
         x_T = torch.randn(shape, device=device, dtype=torch.float32)
-        # Modify the get_learned_conditioning method to handle the attention mask issue
-        def modified_get_learned_conditioning(model, prompt):
-            tokens = model.cond_stage_model.tokenizer.encode(prompt)
-            tokens = torch.LongTensor(tokens).to(model.device).unsqueeze(0)
-            encoder_hidden_states = model.cond_stage_model.transformer(input_ids=tokens).last_hidden_state
-            return encoder_hidden_states
-        cond = modified_get_learned_conditioning(model, positive_prompt)
-        uncond = modified_get_learned_conditioning(model, negative_prompt)
         if not tile_diffusion and not tile_vae:
             samples = sampler.sample_ccsr(
                 steps=steps, t_max=0.6667, t_min=0.3333, shape=shape, cond_img=control,
-                positive_prompt=cond, negative_prompt=uncond, x_T=x_T,
                 cfg_scale=cfg_scale,
                 color_fix_type="adain" if use_color_fix else "none"
             )
         else:
             if tile_vae:
-                # Note: Tiled VAE is not implemented in this version
-                pass
             if tile_diffusion:
                 samples = sampler.sample_with_tile_ccsr(
                     tile_size=tile_diffusion_size, tile_stride=tile_diffusion_stride,
                     steps=steps, t_max=0.6667, t_min=0.3333, shape=shape, cond_img=control,
-                    positive_prompt=cond, negative_prompt=uncond, x_T=x_T,
                     cfg_scale=cfg_scale,
                     color_fix_type="adain" if use_color_fix else "none"
                 )
             else:
                 samples = sampler.sample_ccsr(
                     steps=steps, t_max=0.6667, t_min=0.3333, shape=shape, cond_img=control,
-                    positive_prompt=cond, negative_prompt=uncond, x_T=x_T,
                     cfg_scale=cfg_scale,
                     color_fix_type="adain" if use_color_fix else "none"
                 )
@@ -180,12 +167,31 @@ def update_output_resolution(image, scale):
         return f"Current resolution: {width}x{height}. Output resolution: {int(width*scale)}x{int(height*scale)}"
     return "Upload an image to see the output resolution"
 # Improved UI design
 css = """
 .container {max-width: 1200px; margin: auto; padding: 20px;}
 .input-image {width: 100%; max-height: 500px; object-fit: contain;}
 .output-gallery {display: flex; flex-wrap: wrap; justify-content: center;}
 .output-image {margin: 10px; max-width: 45%; height: auto;}
 """
 with gr.Blocks(css=css) as block:
@@ -194,7 +200,20 @@ with gr.Blocks(css=css) as block:
     with gr.Row():
         with gr.Column(scale=1):
             input_image = gr.Image(type="pil", label="Input Image", elem_classes="input-image")
-            sr_scale = gr.Slider(label="SR Scale", minimum=1, maximum=8, value=4, step=0.1, info="Super-resolution scale factor.")
             output_resolution = gr.Markdown("Upload an image to see the output resolution")
             run_button = gr.Button(value="Run", variant="primary")
@@ -221,15 +240,43 @@ with gr.Blocks(css=css) as block:
     with gr.Row():
         result_gallery = gr.Gallery(label="Output", show_label=False, elem_id="gallery", elem_classes="output-gallery")
     inputs = [
         input_image, num_samples, sr_scale, strength, positive_prompt, negative_prompt,
         cfg_scale, steps, use_color_fix, seed, tile_diffusion, tile_diffusion_size,
         tile_diffusion_stride, tile_vae, vae_encoder_tile_size, vae_decoder_tile_size,
     ]
-    run_button.click(fn=process, inputs=inputs, outputs=[result_gallery])
-    input_image.change(update_output_resolution, inputs=[input_image, sr_scale], outputs=[output_resolution])
-    sr_scale.change(update_output_resolution, inputs=[input_image, sr_scale], outputs=[output_resolution])
     input_image.change(
         lambda x: gr.update(interactive=x is not None),

 load_state_dict(model, ckpt, strict=True)
 model.freeze()
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
     vae_encoder_tile_size: int,
     vae_decoder_tile_size: int
 ):
+    print(f"control image shape={control_img.size}\n"
+          f"num_samples={num_samples}, sr_scale={sr_scale}, strength={strength}\n"
+          f"positive_prompt='{positive_prompt}', negative_prompt='{negative_prompt}'\n"
+          f"cfg scale={cfg_scale}, steps={steps}, use_color_fix={use_color_fix}\n"
+          f"seed={seed}\n"
+          f"tile_diffusion={tile_diffusion}, tile_diffusion_size={tile_diffusion_size}, tile_diffusion_stride={tile_diffusion_stride}"
+          f"tile_vae={tile_vae}, vae_encoder_tile_size={vae_encoder_tile_size}, vae_decoder_tile_size={vae_decoder_tile_size}")
     pl.seed_everything(seed)
+    # Resize input image
     if sr_scale != 1:
         control_img = control_img.resize(
             tuple(math.ceil(x * sr_scale) for x in control_img.size),
             Image.BICUBIC
         )
     input_size = control_img.size
+    # Resize the image
     if not tile_diffusion:
         control_img = auto_resize(control_img, 512)
     else:
         shape = (1, 4, height // 8, width // 8)
         x_T = torch.randn(shape, device=device, dtype=torch.float32)
         if not tile_diffusion and not tile_vae:
             samples = sampler.sample_ccsr(
                 steps=steps, t_max=0.6667, t_min=0.3333, shape=shape, cond_img=control,
+                positive_prompt=positive_prompt, negative_prompt=negative_prompt, x_T=x_T,
                 cfg_scale=cfg_scale,
                 color_fix_type="adain" if use_color_fix else "none"
             )
         else:
             if tile_vae:
+                model._init_tiled_vae(encoder_tile_size=vae_encoder_tile_size, decoder_tile_size=vae_decoder_tile_size)
             if tile_diffusion:
                 samples = sampler.sample_with_tile_ccsr(
                     tile_size=tile_diffusion_size, tile_stride=tile_diffusion_stride,
                     steps=steps, t_max=0.6667, t_min=0.3333, shape=shape, cond_img=control,
+                    positive_prompt=positive_prompt, negative_prompt=negative_prompt, x_T=x_T,
                     cfg_scale=cfg_scale,
                     color_fix_type="adain" if use_color_fix else "none"
                 )
             else:
                 samples = sampler.sample_ccsr(
                     steps=steps, t_max=0.6667, t_min=0.3333, shape=shape, cond_img=control,
+                    positive_prompt=positive_prompt, negative_prompt=negative_prompt, x_T=x_T,
                     cfg_scale=cfg_scale,
                     color_fix_type="adain" if use_color_fix else "none"
                 )
         return f"Current resolution: {width}x{height}. Output resolution: {int(width*scale)}x{int(height*scale)}"
     return "Upload an image to see the output resolution"
+def update_scale_choices(image):
+    if image is not None:
+        width, height = image.size
+        aspect_ratio = width / height
+        common_resolutions = [
+            (1280, 720), (1920, 1080), (2560, 1440), (3840, 2160),  # 16:9
+            (1440, 1440), (2048, 2048), (2560, 2560), (3840, 3840)  # 1:1
+        ]
+        choices = []
+        for w, h in common_resolutions:
+            if abs(w/h - aspect_ratio) < 0.1:  # Allow some tolerance for aspect ratio
+                scale = max(w/width, h/height)
+                if scale > 1:
+                    choices.append(f"{w}x{h} ({scale:.2f}x)")
+        choices.append("Custom")
+        return gr.update(choices=choices, value=choices[1] if len(choices) > 1 else "Custom")
+    return gr.update(choices=["Custom"], value="Custom")
 # Improved UI design
 css = """
 .container {max-width: 1200px; margin: auto; padding: 20px;}
 .input-image {width: 100%; max-height: 500px; object-fit: contain;}
 .output-gallery {display: flex; flex-wrap: wrap; justify-content: center;}
 .output-image {margin: 10px; max-width: 45%; height: auto;}
+.gr-form {border: 1px solid #e0e0e0; border-radius: 8px; padding: 16px; margin-bottom: 16px;}
 """
 with gr.Blocks(css=css) as block:
     with gr.Row():
         with gr.Column(scale=1):
             input_image = gr.Image(type="pil", label="Input Image", elem_classes="input-image")
+            sr_scale = gr.Dropdown(
+                label="Output Resolution",
+                choices=["Custom"],
+                value="Custom",
+                interactive=True
+            )
+            custom_scale = gr.Slider(
+                label="Custom Scale",
+                minimum=1,
+                maximum=8,
+                value=4,
+                step=0.1,
+                visible=True
+            )
             output_resolution = gr.Markdown("Upload an image to see the output resolution")
             run_button = gr.Button(value="Run", variant="primary")
     with gr.Row():
         result_gallery = gr.Gallery(label="Output", show_label=False, elem_id="gallery", elem_classes="output-gallery")
+    def update_custom_scale(choice):
+        return gr.update(visible=choice == "Custom")
+    sr_scale.change(update_custom_scale, inputs=[sr_scale], outputs=[custom_scale])
+    def get_scale_value(choice, custom):
+        if choice == "Custom":
+            return custom
+        return float(choice.split()[-1].strip("()x"))
     inputs = [
         input_image, num_samples, sr_scale, strength, positive_prompt, negative_prompt,
         cfg_scale, steps, use_color_fix, seed, tile_diffusion, tile_diffusion_size,
         tile_diffusion_stride, tile_vae, vae_encoder_tile_size, vae_decoder_tile_size,
     ]
+    run_button.click(
+        fn=lambda *args: process(*args[:1], args[1], get_scale_value(args[2], args[-1]), *args[3:-1]),
+        inputs=inputs + [custom_scale],
+        outputs=[result_gallery]
+    )
+    input_image.change(
+        update_scale_choices,
+        inputs=[input_image],
+        outputs=[sr_scale]
+    )
+    input_image.change(
+        update_output_resolution,
+        inputs=[input_image, sr_scale],
+        outputs=[output_resolution]
+    )
+    sr_scale.change(
+        update_output_resolution,
+        inputs=[input_image, sr_scale],
+        outputs=[output_resolution]
+    )
     input_image.change(
         lambda x: gr.update(interactive=x is not None),