Add Depth Estimation Back
Browse files- app.py +124 -41
- utils/constants.py +4 -4
- utils/depth_estimation.py +1 -8
app.py
CHANGED
@@ -3,6 +3,7 @@ import os
|
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
from typing import Optional, Union, List, Tuple
|
|
|
6 |
from PIL import Image, ImageFilter
|
7 |
import cv2
|
8 |
import utils.constants as constants
|
@@ -11,12 +12,13 @@ import gradio as gr
|
|
11 |
from haishoku.haishoku import Haishoku
|
12 |
|
13 |
from tempfile import NamedTemporaryFile
|
14 |
-
#from pathlib import Path
|
15 |
import atexit
|
16 |
import random
|
17 |
-
import logging
|
18 |
#import accelerate
|
19 |
-
from transformers import AutoTokenizer
|
|
|
|
|
|
|
20 |
import gc
|
21 |
|
22 |
IS_SHARED_SPACE = constants.IS_SHARED_SPACE
|
@@ -44,7 +46,8 @@ from utils.image_utils import (
|
|
44 |
multiply_and_blend_images,
|
45 |
alpha_composite_with_control,
|
46 |
crop_and_resize_image,
|
47 |
-
convert_to_rgba_png
|
|
|
48 |
)
|
49 |
|
50 |
from utils.hex_grid import (
|
@@ -587,12 +590,9 @@ def generate_ai_image_local (
|
|
587 |
image.save(tmp.name, format="PNG")
|
588 |
constants.temp_files.append(tmp.name)
|
589 |
print(f"Image saved to {tmp.name}")
|
590 |
-
#release_torch_resources()
|
591 |
-
gc.collect()
|
592 |
return tmp.name
|
593 |
except Exception as e:
|
594 |
print(f"Error generating AI image: {e}")
|
595 |
-
#release_torch_resources()
|
596 |
gc.collect()
|
597 |
return None
|
598 |
|
@@ -651,7 +651,7 @@ def generate_input_image_click(map_option, prompt_textbox_value, negative_prompt
|
|
651 |
upscaled_image.save(tmp_upscaled.name, format="PNG")
|
652 |
constants.temp_files.append(tmp_upscaled.name)
|
653 |
print(f"Upscaled image saved to {tmp_upscaled.name}")
|
654 |
-
|
655 |
# Return the path of the upscaled image
|
656 |
return tmp_upscaled.name
|
657 |
|
@@ -686,10 +686,120 @@ def add_border(image, mask_width, mask_height, blank_color):
|
|
686 |
print(f"Adding border to image with width: {mask_width}, height: {mask_height}, color: {margin_color}")
|
687 |
return shrink_and_paste_on_blank(bordered_image_output, mask_width, mask_height, margin_color)
|
688 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
689 |
def getVersions():
|
690 |
return versions_html()
|
691 |
|
692 |
generate_input_image_click.zerogpu = True
|
|
|
693 |
#def main(debug=False):
|
694 |
title = "HexaGrid Creator"
|
695 |
#description = "Customizable Hexagon Grid Image Generator"
|
@@ -865,7 +975,7 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty')
|
|
865 |
# Add Dropdown for sizing of Images, height and width based on selection. Options are 16x9, 16x10, 4x5, 1x1
|
866 |
# The values of height and width are based on common resolutions for each aspect ratio
|
867 |
# Default to 16x9, 912x512
|
868 |
-
image_size_ratio = gr.Dropdown(label="Image
|
869 |
with gr.Column():
|
870 |
seed_slider = gr.Slider(
|
871 |
label="Seed",
|
@@ -1012,33 +1122,6 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty')
|
|
1012 |
elem_id="examples")
|
1013 |
with gr.Row():
|
1014 |
gr.HTML(value=getVersions(), visible=True, elem_id="versions")
|
1015 |
-
# with gr.Row():
|
1016 |
-
# reinstall_torch = gr.Button("Reinstall Torch", elem_classes="solid small", variant="secondary")
|
1017 |
-
# reinstall_cuda_toolkit = gr.Button("Install CUDA Toolkit", elem_classes="solid small", variant="secondary")
|
1018 |
-
# reinitialize_cuda = gr.Button("Reinitialize CUDA", elem_classes="solid small", variant="secondary")
|
1019 |
-
# torch_release = gr.Button("Release Torch Resources", elem_classes="solid small", variant="secondary")
|
1020 |
-
|
1021 |
-
# reinitialize_cuda.click(
|
1022 |
-
# fn=initialize_cuda,
|
1023 |
-
# inputs=[],
|
1024 |
-
# outputs=[]
|
1025 |
-
# )
|
1026 |
-
# torch_release.click(
|
1027 |
-
# fn=release_torch_resources,
|
1028 |
-
# inputs=[],
|
1029 |
-
# outputs=[]
|
1030 |
-
# )
|
1031 |
-
# reinstall_torch.click(
|
1032 |
-
# fn=install_torch,
|
1033 |
-
# inputs=[],
|
1034 |
-
# outputs=[]
|
1035 |
-
# )
|
1036 |
-
|
1037 |
-
# reinstall_cuda_toolkit.click(
|
1038 |
-
# fn=install_cuda_toolkit,
|
1039 |
-
# inputs=[],
|
1040 |
-
# outputs=[]
|
1041 |
-
# )
|
1042 |
|
1043 |
color_display.select(on_color_display_select,inputs=[color_display], outputs=[selected_row])
|
1044 |
color_display.input(on_input,inputs=[color_display], outputs=[color_display, gr.State(excluded_color_list)])
|
@@ -1057,11 +1140,11 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty')
|
|
1057 |
inputs=[map_options, prompt_textbox, negative_prompt_textbox, model_textbox, randomize_seed, seed_slider, gr.State(False), gr.State(0.5), image_size_ratio],
|
1058 |
outputs=[input_image], scroll_to_output=True
|
1059 |
)
|
1060 |
-
|
1061 |
-
|
1062 |
-
|
1063 |
-
|
1064 |
-
|
1065 |
model_textbox.change(
|
1066 |
fn=update_prompt_notes,
|
1067 |
inputs=model_textbox,
|
|
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
from typing import Optional, Union, List, Tuple
|
6 |
+
|
7 |
from PIL import Image, ImageFilter
|
8 |
import cv2
|
9 |
import utils.constants as constants
|
|
|
12 |
from haishoku.haishoku import Haishoku
|
13 |
|
14 |
from tempfile import NamedTemporaryFile
|
|
|
15 |
import atexit
|
16 |
import random
|
|
|
17 |
#import accelerate
|
18 |
+
from transformers import AutoTokenizer, DPTImageProcessor, DPTForDepthEstimation
|
19 |
+
from pathlib import Path
|
20 |
+
import logging
|
21 |
+
#logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
|
22 |
import gc
|
23 |
|
24 |
IS_SHARED_SPACE = constants.IS_SHARED_SPACE
|
|
|
46 |
multiply_and_blend_images,
|
47 |
alpha_composite_with_control,
|
48 |
crop_and_resize_image,
|
49 |
+
convert_to_rgba_png,
|
50 |
+
resize_image_with_aspect_ratio
|
51 |
)
|
52 |
|
53 |
from utils.hex_grid import (
|
|
|
590 |
image.save(tmp.name, format="PNG")
|
591 |
constants.temp_files.append(tmp.name)
|
592 |
print(f"Image saved to {tmp.name}")
|
|
|
|
|
593 |
return tmp.name
|
594 |
except Exception as e:
|
595 |
print(f"Error generating AI image: {e}")
|
|
|
596 |
gc.collect()
|
597 |
return None
|
598 |
|
|
|
651 |
upscaled_image.save(tmp_upscaled.name, format="PNG")
|
652 |
constants.temp_files.append(tmp_upscaled.name)
|
653 |
print(f"Upscaled image saved to {tmp_upscaled.name}")
|
654 |
+
gc.collect()
|
655 |
# Return the path of the upscaled image
|
656 |
return tmp_upscaled.name
|
657 |
|
|
|
686 |
print(f"Adding border to image with width: {mask_width}, height: {mask_height}, color: {margin_color}")
|
687 |
return shrink_and_paste_on_blank(bordered_image_output, mask_width, mask_height, margin_color)
|
688 |
|
689 |
+
|
690 |
+
################################## DEPTH ESTIMATION ##################################
|
691 |
+
|
692 |
+
|
693 |
+
#@spaces.GPU()
|
694 |
+
def estimate_depth(image):
|
695 |
+
# Load models once during module import
|
696 |
+
image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
697 |
+
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large", ignore_mismatched_sizes=True)
|
698 |
+
|
699 |
+
# Ensure image is in RGB mode
|
700 |
+
if image.mode != "RGB":
|
701 |
+
image = image.convert("RGB")
|
702 |
+
|
703 |
+
# Resize the image for the model
|
704 |
+
image_resized = image.resize(
|
705 |
+
(image.width, image.height),
|
706 |
+
Image.Resampling.LANCZOS
|
707 |
+
)
|
708 |
+
|
709 |
+
# Prepare image for the model
|
710 |
+
encoding = image_processor(image_resized, return_tensors="pt")
|
711 |
+
|
712 |
+
# Forward pass
|
713 |
+
with torch.no_grad():
|
714 |
+
outputs = depth_model(**encoding)
|
715 |
+
predicted_depth = outputs.predicted_depth
|
716 |
+
|
717 |
+
# Interpolate to original size
|
718 |
+
prediction = torch.nn.functional.interpolate(
|
719 |
+
predicted_depth.unsqueeze(1),
|
720 |
+
size=(image.height, image.width),
|
721 |
+
mode="bicubic",
|
722 |
+
align_corners=False,
|
723 |
+
).squeeze()
|
724 |
+
|
725 |
+
# Convert to depth image
|
726 |
+
output = prediction.cpu().numpy()
|
727 |
+
depth_min = output.min()
|
728 |
+
depth_max = output.max()
|
729 |
+
max_val = (2**8) - 1
|
730 |
+
|
731 |
+
# Normalize and convert to 8-bit image
|
732 |
+
depth_image = max_val * (output - depth_min) / (depth_max - depth_min)
|
733 |
+
depth_image = depth_image.astype("uint8")
|
734 |
+
|
735 |
+
depth_pil = Image.fromarray(depth_image)
|
736 |
+
|
737 |
+
return depth_pil, output
|
738 |
+
|
739 |
+
#@spaces.GPU()
|
740 |
+
def create_3d_model(rgb_image, depth_array, voxel_size_factor=0.01):
|
741 |
+
import open3d as o3d
|
742 |
+
depth_o3d = o3d.geometry.Image(depth_array.astype(np.float32))
|
743 |
+
rgb_o3d = o3d.geometry.Image(np.array(rgb_image))
|
744 |
+
|
745 |
+
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
746 |
+
rgb_o3d,
|
747 |
+
depth_o3d,
|
748 |
+
convert_rgb_to_intensity=False
|
749 |
+
)
|
750 |
+
|
751 |
+
# Create a point cloud from the RGBD image
|
752 |
+
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
|
753 |
+
rgb_image.width,
|
754 |
+
rgb_image.height,
|
755 |
+
fx=1.0,
|
756 |
+
fy=1.0,
|
757 |
+
cx=rgb_image.width / 2.0,
|
758 |
+
cy=rgb_image.height / 2.0,
|
759 |
+
)
|
760 |
+
|
761 |
+
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
|
762 |
+
rgbd_image,
|
763 |
+
camera_intrinsic
|
764 |
+
)
|
765 |
+
|
766 |
+
# Voxel downsample
|
767 |
+
voxel_size = max(pcd.get_max_bound() - pcd.get_min_bound()) * voxel_size_factor
|
768 |
+
voxel_grid = o3d.geometry.VoxelGrid.create_from_point_cloud(pcd, voxel_size=voxel_size)
|
769 |
+
|
770 |
+
# Save the 3D model to a temporary file
|
771 |
+
temp_dir = Path.cwd() / "temp_models"
|
772 |
+
temp_dir.mkdir(exist_ok=True)
|
773 |
+
model_path = temp_dir / "model.ply"
|
774 |
+
o3d.io.write_voxel_grid(str(model_path), voxel_grid)
|
775 |
+
|
776 |
+
return str(model_path)
|
777 |
+
|
778 |
+
def generate_depth_and_3d(input_image_path, voxel_size_factor):
|
779 |
+
image = Image.open(input_image_path).convert("RGB")
|
780 |
+
resized_image = resize_image_with_aspect_ratio(image, 2688, 1680)
|
781 |
+
depth_image, depth_array = estimate_depth(resized_image)
|
782 |
+
model_path = create_3d_model(resized_image, depth_array, voxel_size_factor=voxel_size_factor)
|
783 |
+
return depth_image, model_path
|
784 |
+
|
785 |
+
def generate_depth_button_click(depth_image_source, voxel_size_factor, input_image, output_image, overlay_image, bordered_image_output):
|
786 |
+
if depth_image_source == "Input Image":
|
787 |
+
image_path = input_image
|
788 |
+
elif depth_image_source == "Output Image":
|
789 |
+
image_path = output_image
|
790 |
+
elif depth_image_source == "Image with Margins":
|
791 |
+
image_path = bordered_image_output
|
792 |
+
else:
|
793 |
+
image_path = overlay_image
|
794 |
+
|
795 |
+
return generate_depth_and_3d(image_path, voxel_size_factor)
|
796 |
+
|
797 |
+
|
798 |
def getVersions():
|
799 |
return versions_html()
|
800 |
|
801 |
generate_input_image_click.zerogpu = True
|
802 |
+
#generate_depth_button_click.zerogpu = True
|
803 |
#def main(debug=False):
|
804 |
title = "HexaGrid Creator"
|
805 |
#description = "Customizable Hexagon Grid Image Generator"
|
|
|
975 |
# Add Dropdown for sizing of Images, height and width based on selection. Options are 16x9, 16x10, 4x5, 1x1
|
976 |
# The values of height and width are based on common resolutions for each aspect ratio
|
977 |
# Default to 16x9, 912x512
|
978 |
+
image_size_ratio = gr.Dropdown(label="Image Aspect Ratio", choices=["16:9", "16:10", "4:5", "4:3", "2:1","3:2","1:1", "9:16", "10:16", "5:4", "3:4","1:2", "2:3"], value="16:9", elem_classes="solid", type="value", scale=0, interactive=True)
|
979 |
with gr.Column():
|
980 |
seed_slider = gr.Slider(
|
981 |
label="Seed",
|
|
|
1122 |
elem_id="examples")
|
1123 |
with gr.Row():
|
1124 |
gr.HTML(value=getVersions(), visible=True, elem_id="versions")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1125 |
|
1126 |
color_display.select(on_color_display_select,inputs=[color_display], outputs=[selected_row])
|
1127 |
color_display.input(on_input,inputs=[color_display], outputs=[color_display, gr.State(excluded_color_list)])
|
|
|
1140 |
inputs=[map_options, prompt_textbox, negative_prompt_textbox, model_textbox, randomize_seed, seed_slider, gr.State(False), gr.State(0.5), image_size_ratio],
|
1141 |
outputs=[input_image], scroll_to_output=True
|
1142 |
)
|
1143 |
+
generate_depth_button.click(
|
1144 |
+
fn=generate_depth_button_click,
|
1145 |
+
inputs=[depth_image_source, voxel_size_factor, input_image, output_image, overlay_image, bordered_image_output],
|
1146 |
+
outputs=[depth_map_output, model_output], scroll_to_output=True
|
1147 |
+
)
|
1148 |
model_textbox.change(
|
1149 |
fn=update_prompt_notes,
|
1150 |
inputs=model_textbox,
|
utils/constants.py
CHANGED
@@ -44,9 +44,9 @@ TARGET_SIZE = (2688,1536)
|
|
44 |
|
45 |
PROMPTS = {
|
46 |
"BorderBlack": "Top-down view of a hexagon-based alien map with black borders. Features rivers, mountains, volcanoes, and snow at top and bottom. Colors: light blue, green, tan, brown. No reflections or shadows. Partial hexes on edges are black.",
|
47 |
-
"Earth": "Top-down view of a
|
48 |
-
"
|
49 |
-
"
|
50 |
"Alien Landscape": "Top-down view of a barren alien world map made from hexagon pieces. Features light blue rivers, brown mountains, red volcanoes, and white snow at top and bottom. Colors: light blue, green, tan, brown. Partial hexes on edges are black.",
|
51 |
"Alien World": "Top-down view of an alien world map built from hexagon pieces. Includes rivers, mountains, volcanoes, and snowy areas. Colors: light blue, green, tan, brown. Partial edge hexes are black. Overhead view.",
|
52 |
"Mystic Forest": "Top-down view of a mystic forest map with lakes, dense forests, magical flora, and hex grids. Designed for clarity in tabletop gaming. Colors: light blue, green, purple, brown. Partial hexes on edges are black.",
|
@@ -56,7 +56,7 @@ PROMPTS = {
|
|
56 |
|
57 |
NEGATIVE_PROMPTS = {
|
58 |
"BorderBlack": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
|
59 |
-
"Earth": "humans, modern_buildings, vehicles, text, logos, reflections, shadows,
|
60 |
"Map3": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
|
61 |
"Map4": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
|
62 |
"Alien Landscape": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
|
|
|
44 |
|
45 |
PROMPTS = {
|
46 |
"BorderBlack": "Top-down view of a hexagon-based alien map with black borders. Features rivers, mountains, volcanoes, and snow at top and bottom. Colors: light blue, green, tan, brown. No reflections or shadows. Partial hexes on edges are black.",
|
47 |
+
"Earth": "Top-down view of a world map with rivers, mountains, volcanoes, and snow at top and bottom. Colors: light blue, green, tan, brown. No reflections or shadows. Partial edge hexes are black. Overhead view.",
|
48 |
+
"Beeuty": "Top-down view of a table map with honeycomb_shapes, lakes, dense forests, magical flora, and hex_grids. Map for tabletop gaming with clarity and strategic elements. Colors: yellow, green, purple, brown. Partial hexes on edges are black.",
|
49 |
+
"Scyfi City": "Top-down view of a futuristic urban battlefield map with lakes, forests, ruined buildings, and city streets. Emphasizes clarity and strategy for tabletop games. Colors: teal, dark green, violet, brown. Partial edge hexes are black. Viewed from above.",
|
50 |
"Alien Landscape": "Top-down view of a barren alien world map made from hexagon pieces. Features light blue rivers, brown mountains, red volcanoes, and white snow at top and bottom. Colors: light blue, green, tan, brown. Partial hexes on edges are black.",
|
51 |
"Alien World": "Top-down view of an alien world map built from hexagon pieces. Includes rivers, mountains, volcanoes, and snowy areas. Colors: light blue, green, tan, brown. Partial edge hexes are black. Overhead view.",
|
52 |
"Mystic Forest": "Top-down view of a mystic forest map with lakes, dense forests, magical flora, and hex grids. Designed for clarity in tabletop gaming. Colors: light blue, green, purple, brown. Partial hexes on edges are black.",
|
|
|
56 |
|
57 |
NEGATIVE_PROMPTS = {
|
58 |
"BorderBlack": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
|
59 |
+
"Earth": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, isometric",
|
60 |
"Map3": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
|
61 |
"Map4": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
|
62 |
"Alien Landscape": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
|
utils/depth_estimation.py
CHANGED
@@ -9,14 +9,7 @@ from pathlib import Path
|
|
9 |
import logging
|
10 |
logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
|
11 |
from utils.image_utils import (
|
12 |
-
|
13 |
-
open_image,
|
14 |
-
build_prerendered_images,
|
15 |
-
upscale_image,
|
16 |
-
crop_and_resize_image,
|
17 |
-
resize_image_with_aspect_ratio,
|
18 |
-
show_lut,
|
19 |
-
apply_lut_to_image_path
|
20 |
)
|
21 |
|
22 |
# Load models once during module import
|
|
|
9 |
import logging
|
10 |
logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
|
11 |
from utils.image_utils import (
|
12 |
+
resize_image_with_aspect_ratio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
)
|
14 |
|
15 |
# Load models once during module import
|