Surn commited on
Commit
f97739f
·
1 Parent(s): ab3ef5d

Add Depth Estimation Back

Browse files
Files changed (3) hide show
  1. app.py +124 -41
  2. utils/constants.py +4 -4
  3. utils/depth_estimation.py +1 -8
app.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  import numpy as np
4
  import torch
5
  from typing import Optional, Union, List, Tuple
 
6
  from PIL import Image, ImageFilter
7
  import cv2
8
  import utils.constants as constants
@@ -11,12 +12,13 @@ import gradio as gr
11
  from haishoku.haishoku import Haishoku
12
 
13
  from tempfile import NamedTemporaryFile
14
- #from pathlib import Path
15
  import atexit
16
  import random
17
- import logging
18
  #import accelerate
19
- from transformers import AutoTokenizer
 
 
 
20
  import gc
21
 
22
  IS_SHARED_SPACE = constants.IS_SHARED_SPACE
@@ -44,7 +46,8 @@ from utils.image_utils import (
44
  multiply_and_blend_images,
45
  alpha_composite_with_control,
46
  crop_and_resize_image,
47
- convert_to_rgba_png
 
48
  )
49
 
50
  from utils.hex_grid import (
@@ -587,12 +590,9 @@ def generate_ai_image_local (
587
  image.save(tmp.name, format="PNG")
588
  constants.temp_files.append(tmp.name)
589
  print(f"Image saved to {tmp.name}")
590
- #release_torch_resources()
591
- gc.collect()
592
  return tmp.name
593
  except Exception as e:
594
  print(f"Error generating AI image: {e}")
595
- #release_torch_resources()
596
  gc.collect()
597
  return None
598
 
@@ -651,7 +651,7 @@ def generate_input_image_click(map_option, prompt_textbox_value, negative_prompt
651
  upscaled_image.save(tmp_upscaled.name, format="PNG")
652
  constants.temp_files.append(tmp_upscaled.name)
653
  print(f"Upscaled image saved to {tmp_upscaled.name}")
654
-
655
  # Return the path of the upscaled image
656
  return tmp_upscaled.name
657
 
@@ -686,10 +686,120 @@ def add_border(image, mask_width, mask_height, blank_color):
686
  print(f"Adding border to image with width: {mask_width}, height: {mask_height}, color: {margin_color}")
687
  return shrink_and_paste_on_blank(bordered_image_output, mask_width, mask_height, margin_color)
688
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
689
  def getVersions():
690
  return versions_html()
691
 
692
  generate_input_image_click.zerogpu = True
 
693
  #def main(debug=False):
694
  title = "HexaGrid Creator"
695
  #description = "Customizable Hexagon Grid Image Generator"
@@ -865,7 +975,7 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty')
865
  # Add Dropdown for sizing of Images, height and width based on selection. Options are 16x9, 16x10, 4x5, 1x1
866
  # The values of height and width are based on common resolutions for each aspect ratio
867
  # Default to 16x9, 912x512
868
- image_size_ratio = gr.Dropdown(label="Image Size", choices=["16:9", "16:10", "4:5", "4:3", "2:1","3:2","1:1", "9:16", "10:16", "5:4", "3:4","1:2", "2:3"], value="16:9", elem_classes="solid", type="value", scale=0, interactive=True)
869
  with gr.Column():
870
  seed_slider = gr.Slider(
871
  label="Seed",
@@ -1012,33 +1122,6 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty')
1012
  elem_id="examples")
1013
  with gr.Row():
1014
  gr.HTML(value=getVersions(), visible=True, elem_id="versions")
1015
- # with gr.Row():
1016
- # reinstall_torch = gr.Button("Reinstall Torch", elem_classes="solid small", variant="secondary")
1017
- # reinstall_cuda_toolkit = gr.Button("Install CUDA Toolkit", elem_classes="solid small", variant="secondary")
1018
- # reinitialize_cuda = gr.Button("Reinitialize CUDA", elem_classes="solid small", variant="secondary")
1019
- # torch_release = gr.Button("Release Torch Resources", elem_classes="solid small", variant="secondary")
1020
-
1021
- # reinitialize_cuda.click(
1022
- # fn=initialize_cuda,
1023
- # inputs=[],
1024
- # outputs=[]
1025
- # )
1026
- # torch_release.click(
1027
- # fn=release_torch_resources,
1028
- # inputs=[],
1029
- # outputs=[]
1030
- # )
1031
- # reinstall_torch.click(
1032
- # fn=install_torch,
1033
- # inputs=[],
1034
- # outputs=[]
1035
- # )
1036
-
1037
- # reinstall_cuda_toolkit.click(
1038
- # fn=install_cuda_toolkit,
1039
- # inputs=[],
1040
- # outputs=[]
1041
- # )
1042
 
1043
  color_display.select(on_color_display_select,inputs=[color_display], outputs=[selected_row])
1044
  color_display.input(on_input,inputs=[color_display], outputs=[color_display, gr.State(excluded_color_list)])
@@ -1057,11 +1140,11 @@ with gr.Blocks(css_paths="style_20250128.css", title=title, theme='Surn/beeuty')
1057
  inputs=[map_options, prompt_textbox, negative_prompt_textbox, model_textbox, randomize_seed, seed_slider, gr.State(False), gr.State(0.5), image_size_ratio],
1058
  outputs=[input_image], scroll_to_output=True
1059
  )
1060
- # generate_depth_button.click(
1061
- # fn=generate_depth_button_click,
1062
- # inputs=[depth_image_source, voxel_size_factor, input_image, output_image, overlay_image, bordered_image_output],
1063
- # outputs=[depth_map_output, model_output], scroll_to_output=True
1064
- # )
1065
  model_textbox.change(
1066
  fn=update_prompt_notes,
1067
  inputs=model_textbox,
 
3
  import numpy as np
4
  import torch
5
  from typing import Optional, Union, List, Tuple
6
+
7
  from PIL import Image, ImageFilter
8
  import cv2
9
  import utils.constants as constants
 
12
  from haishoku.haishoku import Haishoku
13
 
14
  from tempfile import NamedTemporaryFile
 
15
  import atexit
16
  import random
 
17
  #import accelerate
18
+ from transformers import AutoTokenizer, DPTImageProcessor, DPTForDepthEstimation
19
+ from pathlib import Path
20
+ import logging
21
+ #logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
22
  import gc
23
 
24
  IS_SHARED_SPACE = constants.IS_SHARED_SPACE
 
46
  multiply_and_blend_images,
47
  alpha_composite_with_control,
48
  crop_and_resize_image,
49
+ convert_to_rgba_png,
50
+ resize_image_with_aspect_ratio
51
  )
52
 
53
  from utils.hex_grid import (
 
590
  image.save(tmp.name, format="PNG")
591
  constants.temp_files.append(tmp.name)
592
  print(f"Image saved to {tmp.name}")
 
 
593
  return tmp.name
594
  except Exception as e:
595
  print(f"Error generating AI image: {e}")
 
596
  gc.collect()
597
  return None
598
 
 
651
  upscaled_image.save(tmp_upscaled.name, format="PNG")
652
  constants.temp_files.append(tmp_upscaled.name)
653
  print(f"Upscaled image saved to {tmp_upscaled.name}")
654
+ gc.collect()
655
  # Return the path of the upscaled image
656
  return tmp_upscaled.name
657
 
 
686
  print(f"Adding border to image with width: {mask_width}, height: {mask_height}, color: {margin_color}")
687
  return shrink_and_paste_on_blank(bordered_image_output, mask_width, mask_height, margin_color)
688
 
689
+
690
+ ################################## DEPTH ESTIMATION ##################################
691
+
692
+
693
+ #@spaces.GPU()
694
+ def estimate_depth(image):
695
+ # Load models once during module import
696
+ image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
697
+ depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large", ignore_mismatched_sizes=True)
698
+
699
+ # Ensure image is in RGB mode
700
+ if image.mode != "RGB":
701
+ image = image.convert("RGB")
702
+
703
+ # Resize the image for the model
704
+ image_resized = image.resize(
705
+ (image.width, image.height),
706
+ Image.Resampling.LANCZOS
707
+ )
708
+
709
+ # Prepare image for the model
710
+ encoding = image_processor(image_resized, return_tensors="pt")
711
+
712
+ # Forward pass
713
+ with torch.no_grad():
714
+ outputs = depth_model(**encoding)
715
+ predicted_depth = outputs.predicted_depth
716
+
717
+ # Interpolate to original size
718
+ prediction = torch.nn.functional.interpolate(
719
+ predicted_depth.unsqueeze(1),
720
+ size=(image.height, image.width),
721
+ mode="bicubic",
722
+ align_corners=False,
723
+ ).squeeze()
724
+
725
+ # Convert to depth image
726
+ output = prediction.cpu().numpy()
727
+ depth_min = output.min()
728
+ depth_max = output.max()
729
+ max_val = (2**8) - 1
730
+
731
+ # Normalize and convert to 8-bit image
732
+ depth_image = max_val * (output - depth_min) / (depth_max - depth_min)
733
+ depth_image = depth_image.astype("uint8")
734
+
735
+ depth_pil = Image.fromarray(depth_image)
736
+
737
+ return depth_pil, output
738
+
739
+ #@spaces.GPU()
740
+ def create_3d_model(rgb_image, depth_array, voxel_size_factor=0.01):
741
+ import open3d as o3d
742
+ depth_o3d = o3d.geometry.Image(depth_array.astype(np.float32))
743
+ rgb_o3d = o3d.geometry.Image(np.array(rgb_image))
744
+
745
+ rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
746
+ rgb_o3d,
747
+ depth_o3d,
748
+ convert_rgb_to_intensity=False
749
+ )
750
+
751
+ # Create a point cloud from the RGBD image
752
+ camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
753
+ rgb_image.width,
754
+ rgb_image.height,
755
+ fx=1.0,
756
+ fy=1.0,
757
+ cx=rgb_image.width / 2.0,
758
+ cy=rgb_image.height / 2.0,
759
+ )
760
+
761
+ pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
762
+ rgbd_image,
763
+ camera_intrinsic
764
+ )
765
+
766
+ # Voxel downsample
767
+ voxel_size = max(pcd.get_max_bound() - pcd.get_min_bound()) * voxel_size_factor
768
+ voxel_grid = o3d.geometry.VoxelGrid.create_from_point_cloud(pcd, voxel_size=voxel_size)
769
+
770
+ # Save the 3D model to a temporary file
771
+ temp_dir = Path.cwd() / "temp_models"
772
+ temp_dir.mkdir(exist_ok=True)
773
+ model_path = temp_dir / "model.ply"
774
+ o3d.io.write_voxel_grid(str(model_path), voxel_grid)
775
+
776
+ return str(model_path)
777
+
778
+ def generate_depth_and_3d(input_image_path, voxel_size_factor):
779
+ image = Image.open(input_image_path).convert("RGB")
780
+ resized_image = resize_image_with_aspect_ratio(image, 2688, 1680)
781
+ depth_image, depth_array = estimate_depth(resized_image)
782
+ model_path = create_3d_model(resized_image, depth_array, voxel_size_factor=voxel_size_factor)
783
+ return depth_image, model_path
784
+
785
+ def generate_depth_button_click(depth_image_source, voxel_size_factor, input_image, output_image, overlay_image, bordered_image_output):
786
+ if depth_image_source == "Input Image":
787
+ image_path = input_image
788
+ elif depth_image_source == "Output Image":
789
+ image_path = output_image
790
+ elif depth_image_source == "Image with Margins":
791
+ image_path = bordered_image_output
792
+ else:
793
+ image_path = overlay_image
794
+
795
+ return generate_depth_and_3d(image_path, voxel_size_factor)
796
+
797
+
798
  def getVersions():
799
  return versions_html()
800
 
801
  generate_input_image_click.zerogpu = True
802
+ #generate_depth_button_click.zerogpu = True
803
  #def main(debug=False):
804
  title = "HexaGrid Creator"
805
  #description = "Customizable Hexagon Grid Image Generator"
 
975
  # Add Dropdown for sizing of Images, height and width based on selection. Options are 16x9, 16x10, 4x5, 1x1
976
  # The values of height and width are based on common resolutions for each aspect ratio
977
  # Default to 16x9, 912x512
978
+ image_size_ratio = gr.Dropdown(label="Image Aspect Ratio", choices=["16:9", "16:10", "4:5", "4:3", "2:1","3:2","1:1", "9:16", "10:16", "5:4", "3:4","1:2", "2:3"], value="16:9", elem_classes="solid", type="value", scale=0, interactive=True)
979
  with gr.Column():
980
  seed_slider = gr.Slider(
981
  label="Seed",
 
1122
  elem_id="examples")
1123
  with gr.Row():
1124
  gr.HTML(value=getVersions(), visible=True, elem_id="versions")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1125
 
1126
  color_display.select(on_color_display_select,inputs=[color_display], outputs=[selected_row])
1127
  color_display.input(on_input,inputs=[color_display], outputs=[color_display, gr.State(excluded_color_list)])
 
1140
  inputs=[map_options, prompt_textbox, negative_prompt_textbox, model_textbox, randomize_seed, seed_slider, gr.State(False), gr.State(0.5), image_size_ratio],
1141
  outputs=[input_image], scroll_to_output=True
1142
  )
1143
+ generate_depth_button.click(
1144
+ fn=generate_depth_button_click,
1145
+ inputs=[depth_image_source, voxel_size_factor, input_image, output_image, overlay_image, bordered_image_output],
1146
+ outputs=[depth_map_output, model_output], scroll_to_output=True
1147
+ )
1148
  model_textbox.change(
1149
  fn=update_prompt_notes,
1150
  inputs=model_textbox,
utils/constants.py CHANGED
@@ -44,9 +44,9 @@ TARGET_SIZE = (2688,1536)
44
 
45
  PROMPTS = {
46
  "BorderBlack": "Top-down view of a hexagon-based alien map with black borders. Features rivers, mountains, volcanoes, and snow at top and bottom. Colors: light blue, green, tan, brown. No reflections or shadows. Partial hexes on edges are black.",
47
- "Earth": "Top-down view of a hexagonal world map with rivers, mountains, volcanoes, and snow at top and bottom. Colors: light blue, green, tan, brown. No reflections or shadows. Partial edge hexes are black. Overhead view.",
48
- "Map3": "Top-down view of a mystic forest map with lakes, dense forests, magical flora, and hex grids. Designed for tabletop gaming with clarity and strategic elements. Colors: light blue, green, purple, brown. Partial hexes on edges are black.",
49
- "Map4": "Top-down view of a medieval battlefield map with lakes, forests, magical fauna, and hex grids. Emphasizes clarity and strategy for tabletop games. Colors: teal, dark green, violet, brown. Partial edge hexes are black. Viewed from above.",
50
  "Alien Landscape": "Top-down view of a barren alien world map made from hexagon pieces. Features light blue rivers, brown mountains, red volcanoes, and white snow at top and bottom. Colors: light blue, green, tan, brown. Partial hexes on edges are black.",
51
  "Alien World": "Top-down view of an alien world map built from hexagon pieces. Includes rivers, mountains, volcanoes, and snowy areas. Colors: light blue, green, tan, brown. Partial edge hexes are black. Overhead view.",
52
  "Mystic Forest": "Top-down view of a mystic forest map with lakes, dense forests, magical flora, and hex grids. Designed for clarity in tabletop gaming. Colors: light blue, green, purple, brown. Partial hexes on edges are black.",
@@ -56,7 +56,7 @@ PROMPTS = {
56
 
57
  NEGATIVE_PROMPTS = {
58
  "BorderBlack": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
59
- "Earth": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, missing realistic map of the Earth, isometric",
60
  "Map3": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
61
  "Map4": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
62
  "Alien Landscape": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
 
44
 
45
  PROMPTS = {
46
  "BorderBlack": "Top-down view of a hexagon-based alien map with black borders. Features rivers, mountains, volcanoes, and snow at top and bottom. Colors: light blue, green, tan, brown. No reflections or shadows. Partial hexes on edges are black.",
47
+ "Earth": "Top-down view of a world map with rivers, mountains, volcanoes, and snow at top and bottom. Colors: light blue, green, tan, brown. No reflections or shadows. Partial edge hexes are black. Overhead view.",
48
+ "Beeuty": "Top-down view of a table map with honeycomb_shapes, lakes, dense forests, magical flora, and hex_grids. Map for tabletop gaming with clarity and strategic elements. Colors: yellow, green, purple, brown. Partial hexes on edges are black.",
49
+ "Scyfi City": "Top-down view of a futuristic urban battlefield map with lakes, forests, ruined buildings, and city streets. Emphasizes clarity and strategy for tabletop games. Colors: teal, dark green, violet, brown. Partial edge hexes are black. Viewed from above.",
50
  "Alien Landscape": "Top-down view of a barren alien world map made from hexagon pieces. Features light blue rivers, brown mountains, red volcanoes, and white snow at top and bottom. Colors: light blue, green, tan, brown. Partial hexes on edges are black.",
51
  "Alien World": "Top-down view of an alien world map built from hexagon pieces. Includes rivers, mountains, volcanoes, and snowy areas. Colors: light blue, green, tan, brown. Partial edge hexes are black. Overhead view.",
52
  "Mystic Forest": "Top-down view of a mystic forest map with lakes, dense forests, magical flora, and hex grids. Designed for clarity in tabletop gaming. Colors: light blue, green, purple, brown. Partial hexes on edges are black.",
 
56
 
57
  NEGATIVE_PROMPTS = {
58
  "BorderBlack": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
59
+ "Earth": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, isometric",
60
  "Map3": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
61
  "Map4": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
62
  "Alien Landscape": "humans, modern_buildings, vehicles, text, logos, reflections, shadows, realistic map of the Earth, isometric",
utils/depth_estimation.py CHANGED
@@ -9,14 +9,7 @@ from pathlib import Path
9
  import logging
10
  logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
11
  from utils.image_utils import (
12
- change_color,
13
- open_image,
14
- build_prerendered_images,
15
- upscale_image,
16
- crop_and_resize_image,
17
- resize_image_with_aspect_ratio,
18
- show_lut,
19
- apply_lut_to_image_path
20
  )
21
 
22
  # Load models once during module import
 
9
  import logging
10
  logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
11
  from utils.image_utils import (
12
+ resize_image_with_aspect_ratio
 
 
 
 
 
 
 
13
  )
14
 
15
  # Load models once during module import