Spaces:

Surn
/

HexaGrid

Running on Zero

App Files Files Community

Surn commited on 11 days ago

Commit

f97739f

1 Parent(s): ab3ef5d

Add Depth Estimation Back

Browse files

Files changed (3) hide show

app.py +124 -41
utils/constants.py +4 -4
utils/depth_estimation.py +1 -8

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import numpy as np
 import torch
 from typing import Optional, Union, List, Tuple
 from PIL import Image, ImageFilter
 import cv2
 import utils.constants as constants
@@ -11,12 +12,13 @@ import gradio as gr
 from haishoku.haishoku import Haishoku
 from tempfile import NamedTemporaryFile
-#from pathlib import Path
 import atexit
 import random
-import logging
 #import accelerate
-from transformers import AutoTokenizer
 import gc
 IS_SHARED_SPACE = constants.IS_SHARED_SPACE
@@ -44,7 +46,8 @@ from utils.image_utils import (
     multiply_and_blend_images,
     alpha_composite_with_control,
     crop_and_resize_image,
-    convert_to_rgba_png
 )
 from utils.hex_grid import (
@@ -587,12 +590,9 @@ def generate_ai_image_local (
             image.save(tmp.name, format="PNG")
             constants.temp_files.append(tmp.name)
             print(f"Image saved to {tmp.name}")
-            #release_torch_resources()
-            gc.collect()
             return tmp.name
     except Exception as e:
         print(f"Error generating AI image: {e}")
-        #release_torch_resources()
         gc.collect()
         return None
@@ -651,7 +651,7 @@ def generate_input_image_click(map_option, prompt_textbox_value, negative_prompt
         upscaled_image.save(tmp_upscaled.name, format="PNG")
         constants.temp_files.append(tmp_upscaled.name)
         print(f"Upscaled image saved to {tmp_upscaled.name}")
     # Return the path of the upscaled image
     return tmp_upscaled.name
@@ -686,10 +686,120 @@ def add_border(image, mask_width, mask_height, blank_color):
     print(f"Adding border to image with width: {mask_width}, height: {mask_height}, color: {margin_color}")
     return shrink_and_paste_on_blank(bordered_image_output, mask_width, mask_height, margin_color)
 def getVersions():
     return versions_html()
 generate_input_image_click.zerogpu = True
 #def main(debug=False):
 title = "HexaGrid Creator"
 #description = "Customizable Hexagon Grid Image Generator"
@@ -865,7 +975,7 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty')
                             # Add Dropdown for sizing of Images, height and width based on selection. Options are 16x9, 16x10, 4x5, 1x1
                             # The values of height and width are based on common resolutions for each aspect ratio
                             # Default to 16x9, 912x512
-                            image_size_ratio = gr.Dropdown(label="Image Size", choices=["16:9", "16:10", "4:5", "4:3", "2:1","3:2","1:1", "9:16", "10:16", "5:4", "3:4","1:2", "2:3"], value="16:9", elem_classes="solid", type="value", scale=0, interactive=True)
                         with gr.Column():
                             seed_slider = gr.Slider(
                                 label="Seed",
@@ -1012,33 +1122,6 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty')
             elem_id="examples")
     with gr.Row():
         gr.HTML(value=getVersions(), visible=True, elem_id="versions")
-    # with gr.Row():
-    #     reinstall_torch = gr.Button("Reinstall Torch", elem_classes="solid small", variant="secondary")
-    #     reinstall_cuda_toolkit = gr.Button("Install CUDA Toolkit", elem_classes="solid small", variant="secondary")
-    #     reinitialize_cuda = gr.Button("Reinitialize CUDA", elem_classes="solid small", variant="secondary")
-    #     torch_release = gr.Button("Release Torch Resources", elem_classes="solid small", variant="secondary")
-    # reinitialize_cuda.click(
-    #     fn=initialize_cuda,
-    #     inputs=[],
-    #     outputs=[]
-    # )
-    # torch_release.click(
-    #     fn=release_torch_resources,
-    #     inputs=[],
-    #     outputs=[]
-    # )
-    # reinstall_torch.click(
-    #     fn=install_torch,
-    #     inputs=[],
-    #     outputs=[]
-    # )
-    # reinstall_cuda_toolkit.click(
-    #     fn=install_cuda_toolkit,
-    #     inputs=[],
-    #     outputs=[]
-    # )
     color_display.select(on_color_display_select,inputs=[color_display], outputs=[selected_row])
     color_display.input(on_input,inputs=[color_display], outputs=[color_display, gr.State(excluded_color_list)])
@@ -1057,11 +1140,11 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty')
         inputs=[map_options, prompt_textbox, negative_prompt_textbox, model_textbox, randomize_seed, seed_slider, gr.State(False), gr.State(0.5), image_size_ratio],
         outputs=[input_image], scroll_to_output=True
     )
-    # generate_depth_button.click(
-    #     fn=generate_depth_button_click,
-    #     inputs=[depth_image_source, voxel_size_factor, input_image, output_image, overlay_image, bordered_image_output],
-    #     outputs=[depth_map_output, model_output], scroll_to_output=True
-    # )
     model_textbox.change(
         fn=update_prompt_notes,
         inputs=model_textbox,

 import numpy as np
 import torch
 from typing import Optional, Union, List, Tuple
 from PIL import Image, ImageFilter
 import cv2
 import utils.constants as constants
 from haishoku.haishoku import Haishoku
 from tempfile import NamedTemporaryFile
 import atexit
 import random
 #import accelerate
+from transformers import AutoTokenizer, DPTImageProcessor, DPTForDepthEstimation
+from pathlib import Path
+import logging
+#logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
 import gc
 IS_SHARED_SPACE = constants.IS_SHARED_SPACE
     multiply_and_blend_images,
     alpha_composite_with_control,
     crop_and_resize_image,
+    convert_to_rgba_png,
+    resize_image_with_aspect_ratio
 )
 from utils.hex_grid import (
             image.save(tmp.name, format="PNG")
             constants.temp_files.append(tmp.name)
             print(f"Image saved to {tmp.name}")
             return tmp.name
     except Exception as e:
         print(f"Error generating AI image: {e}")
         gc.collect()
         return None
         upscaled_image.save(tmp_upscaled.name, format="PNG")
         constants.temp_files.append(tmp_upscaled.name)
         print(f"Upscaled image saved to {tmp_upscaled.name}")
+    gc.collect()
     # Return the path of the upscaled image
     return tmp_upscaled.name
     print(f"Adding border to image with width: {mask_width}, height: {mask_height}, color: {margin_color}")
     return shrink_and_paste_on_blank(bordered_image_output, mask_width, mask_height, margin_color)
+################################## DEPTH ESTIMATION ##################################
+#@spaces.GPU()
+def estimate_depth(image):
+    # Load models once during module import
+    image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
+    depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large", ignore_mismatched_sizes=True)
+    # Ensure image is in RGB mode
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    # Resize the image for the model
+    image_resized = image.resize(
+        (image.width, image.height),
+        Image.Resampling.LANCZOS
+    )
+    # Prepare image for the model
+    encoding = image_processor(image_resized, return_tensors="pt")
+    # Forward pass
+    with torch.no_grad():
+        outputs = depth_model(**encoding)
+        predicted_depth = outputs.predicted_depth
+    # Interpolate to original size
+    prediction = torch.nn.functional.interpolate(
+        predicted_depth.unsqueeze(1),
+        size=(image.height, image.width),
+        mode="bicubic",
+        align_corners=False,
+    ).squeeze()
+    # Convert to depth image
+    output = prediction.cpu().numpy()
+    depth_min = output.min()
+    depth_max = output.max()
+    max_val = (2**8) - 1
+    # Normalize and convert to 8-bit image
+    depth_image = max_val * (output - depth_min) / (depth_max - depth_min)
+    depth_image = depth_image.astype("uint8")
+    depth_pil = Image.fromarray(depth_image)
+    return depth_pil, output
+#@spaces.GPU()
+def create_3d_model(rgb_image, depth_array, voxel_size_factor=0.01):
+    import open3d as o3d
+    depth_o3d = o3d.geometry.Image(depth_array.astype(np.float32))
+    rgb_o3d = o3d.geometry.Image(np.array(rgb_image))
+    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        rgb_o3d,
+        depth_o3d,
+        convert_rgb_to_intensity=False
+    )
+    # Create a point cloud from the RGBD image
+    camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
+        rgb_image.width,
+        rgb_image.height,
+        fx=1.0,
+        fy=1.0,
+        cx=rgb_image.width / 2.0,
+        cy=rgb_image.height / 2.0,
+    )
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+        rgbd_image,
+        camera_intrinsic
+    )
+    # Voxel downsample
+    voxel_size = max(pcd.get_max_bound() - pcd.get_min_bound()) * voxel_size_factor
+    voxel_grid = o3d.geometry.VoxelGrid.create_from_point_cloud(pcd, voxel_size=voxel_size)
+    # Save the 3D model to a temporary file
+    temp_dir = Path.cwd() / "temp_models"
+    temp_dir.mkdir(exist_ok=True)
+    model_path = temp_dir / "model.ply"
+    o3d.io.write_voxel_grid(str(model_path), voxel_grid)
+    return str(model_path)
+def generate_depth_and_3d(input_image_path, voxel_size_factor):
+    image = Image.open(input_image_path).convert("RGB")
+    resized_image = resize_image_with_aspect_ratio(image, 2688, 1680)
+    depth_image, depth_array = estimate_depth(resized_image)
+    model_path = create_3d_model(resized_image, depth_array, voxel_size_factor=voxel_size_factor)
+    return depth_image, model_path
+def generate_depth_button_click(depth_image_source, voxel_size_factor, input_image, output_image, overlay_image, bordered_image_output):
+    if depth_image_source == "Input Image":
+        image_path = input_image
+    elif depth_image_source == "Output Image":
+        image_path = output_image
+    elif depth_image_source == "Image with Margins":
+        image_path = bordered_image_output
+    else:
+        image_path = overlay_image
+    return generate_depth_and_3d(image_path, voxel_size_factor)
 def getVersions():
     return versions_html()
 generate_input_image_click.zerogpu = True
+#generate_depth_button_click.zerogpu = True
 #def main(debug=False):
 title = "HexaGrid Creator"
 #description = "Customizable Hexagon Grid Image Generator"
                             # Add Dropdown for sizing of Images, height and width based on selection. Options are 16x9, 16x10, 4x5, 1x1
                             # The values of height and width are based on common resolutions for each aspect ratio
                             # Default to 16x9, 912x512
+                            image_size_ratio = gr.Dropdown(label="Image Aspect Ratio", choices=["16:9", "16:10", "4:5", "4:3", "2:1","3:2","1:1", "9:16", "10:16", "5:4", "3:4","1:2", "2:3"], value="16:9", elem_classes="solid", type="value", scale=0, interactive=True)
                         with gr.Column():
                             seed_slider = gr.Slider(
                                 label="Seed",
             elem_id="examples")
     with gr.Row():
         gr.HTML(value=getVersions(), visible=True, elem_id="versions")
     color_display.select(on_color_display_select,inputs=[color_display], outputs=[selected_row])
     color_display.input(on_input,inputs=[color_display], outputs=[color_display, gr.State(excluded_color_list)])
         inputs=[map_options, prompt_textbox, negative_prompt_textbox, model_textbox, randomize_seed, seed_slider, gr.State(False), gr.State(0.5), image_size_ratio],
         outputs=[input_image], scroll_to_output=True
     )
+    generate_depth_button.click(
+        fn=generate_depth_button_click,
+        inputs=[depth_image_source, voxel_size_factor, input_image, output_image, overlay_image, bordered_image_output],
+        outputs=[depth_map_output, model_output], scroll_to_output=True
+    )
     model_textbox.change(
         fn=update_prompt_notes,
         inputs=model_textbox,

utils/constants.py CHANGED Viewed

@@ -44,9 +44,9 @@ TARGET_SIZE = (2688,1536)
 PROMPTS = {
     "BorderBlack": "Top-down view of a hexagon-based alien map with black borders. Features rivers, mountains, volcanoes, and snow at top and bottom. Colors: light blue, green, tan, brown. No reflections or shadows. Partial hexes on edges are black.",
-    "Earth": "Top-down view of a hexagonal world map with rivers, mountains, volcanoes, and snow at top and bottom. Colors: light blue, green, tan, brown. No reflections or shadows. Partial edge hexes are black. Overhead view.",
-    "Map3": "Top-down view of a mystic forest map with lakes, dense forests, magical flora, and hex grids. Designed for tabletop gaming with clarity and strategic elements. Colors: light blue, green, purple, brown. Partial hexes on edges are black.",
-    "Map4": "Top-down view of a medieval battlefield map with lakes, forests, magical fauna, and hex grids. Emphasizes clarity and strategy for tabletop games. Colors: teal, dark green, violet, brown. Partial edge hexes are black. Viewed from above.",
     "Alien Landscape": "Top-down view of a barren alien world map made from hexagon pieces. Features light blue rivers, brown mountains, red volcanoes, and white snow at top and bottom. Colors: light blue, green, tan, brown. Partial hexes on edges are black.",
     "Alien World": "Top-down view of an alien world map built from hexagon pieces. Includes rivers, mountains, volcanoes, and snowy areas. Colors: light blue, green, tan, brown. Partial edge hexes are black. Overhead view.",
     "Mystic Forest": "Top-down view of a mystic forest map with lakes, dense forests, magical flora, and hex grids. Designed for clarity in tabletop gaming. Colors: light blue, green, purple, brown. Partial hexes on edges are black.",
@@ -56,7 +56,7 @@ PROMPTS = {
 NEGATIVE_PROMPTS = {
     "BorderBlack": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
-    "Earth": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, missing realistic map of the Earth, isometric",
     "Map3": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
     "Map4": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
     "Alien Landscape": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",

 PROMPTS = {
     "BorderBlack": "Top-down view of a hexagon-based alien map with black borders. Features rivers, mountains, volcanoes, and snow at top and bottom. Colors: light blue, green, tan, brown. No reflections or shadows. Partial hexes on edges are black.",
+    "Earth": "Top-down view of a world map with rivers, mountains, volcanoes, and snow at top and bottom. Colors: light blue, green, tan, brown. No reflections or shadows. Partial edge hexes are black. Overhead view.",
+    "Beeuty": "Top-down view of a table map with honeycomb_shapes, lakes, dense forests, magical flora, and hex_grids. Map for tabletop gaming with clarity and strategic elements. Colors: yellow, green, purple, brown. Partial hexes on edges are black.",
+    "Scyfi City": "Top-down view of a futuristic urban battlefield map with lakes, forests, ruined buildings, and city streets. Emphasizes clarity and strategy for tabletop games. Colors: teal, dark green, violet, brown. Partial edge hexes are black. Viewed from above.",
     "Alien Landscape": "Top-down view of a barren alien world map made from hexagon pieces. Features light blue rivers, brown mountains, red volcanoes, and white snow at top and bottom. Colors: light blue, green, tan, brown. Partial hexes on edges are black.",
     "Alien World": "Top-down view of an alien world map built from hexagon pieces. Includes rivers, mountains, volcanoes, and snowy areas. Colors: light blue, green, tan, brown. Partial edge hexes are black. Overhead view.",
     "Mystic Forest": "Top-down view of a mystic forest map with lakes, dense forests, magical flora, and hex grids. Designed for clarity in tabletop gaming. Colors: light blue, green, purple, brown. Partial hexes on edges are black.",
 NEGATIVE_PROMPTS = {
     "BorderBlack": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
+    "Earth": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, isometric",
     "Map3": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
     "Map4": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
     "Alien Landscape": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",

utils/depth_estimation.py CHANGED Viewed

@@ -9,14 +9,7 @@ from pathlib import Path
 import logging
 logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
 from utils.image_utils import (
-    change_color,
-    open_image,
-    build_prerendered_images,
-    upscale_image,
-    crop_and_resize_image,
-    resize_image_with_aspect_ratio,
-    show_lut,
-    apply_lut_to_image_path
 )
 # Load models once during module import

 import logging
 logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
 from utils.image_utils import (
+    resize_image_with_aspect_ratio
 )
 # Load models once during module import