Spaces:

Surn
/

HexaGrid

Running on Zero

App Files Files Community

Surn commited on 29 days ago

Commit

6ef117e

1 Parent(s): 1cb68a6

Merge from Main repository

Browse files

Files changed (30) hide show

LUT/BlackWhite.cube +0 -0
LUT/CineCold.cube +0 -0
LUT/CineDrama.cube +0 -0
LUT/CineVibrant.cube +0 -0
LUT/CineWarm.cube +0 -0
LUT/Depth_of_Field.cube +0 -0
LUT/Glow_Highlights.cube +0 -0
LUT/RedWhiteBlue.cube +0 -0
assets/logo.png → LUT/daisy.jpg +2 -2
LUT/grayscale.cube +0 -0
LUT/scenery01.cube +0 -0
app.py +20 -19
assets/logo_hex.png +0 -3
assets/logo_old.png +0 -3
assets/logo_hex.gif → images/prerendered/grid_1.png +2 -2
src/block.py +333 -0
src/condition.py +116 -0
src/generate.py +294 -0
src/lora_controller.py +75 -0
src/transformer.py +270 -0
utils/ai_generator.py +12 -4
utils/ai_generator_diffusers_flux.py +90 -27
utils/color_utils.py +214 -0
utils/constants.py +28 -2
utils/depth_estimation.py +121 -0
utils/excluded_colors.py +56 -0
utils/file_utils.py +10 -0
utils/hex_grid.py +8 -8
utils/lora_details.py +46 -1
utils/version_info.py +81 -0

LUT/BlackWhite.cube ADDED Viewed

The diff for this file is too large to render. See raw diff

LUT/CineCold.cube ADDED Viewed

The diff for this file is too large to render. See raw diff

LUT/CineDrama.cube ADDED Viewed

The diff for this file is too large to render. See raw diff

LUT/CineVibrant.cube ADDED Viewed

The diff for this file is too large to render. See raw diff

LUT/CineWarm.cube ADDED Viewed

The diff for this file is too large to render. See raw diff

LUT/Depth_of_Field.cube ADDED Viewed

The diff for this file is too large to render. See raw diff

LUT/Glow_Highlights.cube CHANGED Viewed

The diff for this file is too large to render. See raw diff

LUT/RedWhiteBlue.cube ADDED Viewed

The diff for this file is too large to render. See raw diff

assets/logo.png → LUT/daisy.jpg RENAMED Viewed

File without changes

LUT/grayscale.cube CHANGED Viewed

The diff for this file is too large to render. See raw diff

LUT/scenery01.cube CHANGED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from tempfile import NamedTemporaryFile
 from pathlib import Path
 import atexit
 import random
 # Import constants
 import utils.constants as constants
@@ -308,16 +309,16 @@ with gr.Blocks(css_paths="style_20250128.css", title="HexaGrid Creator", theme='
             )
         with gr.Column():
             with gr.Accordion("Hex Coloring and Exclusion", open = False):
-            with gr.Row():
-                with gr.Column():
-                    color_picker = gr.ColorPicker(label="Pick a color to exclude",value="#505050")
-                with gr.Column():
-                        filter_color = gr.Checkbox(label="Filter Excluded Colors from Sampling", value=False,)
-            exclude_color_button = gr.Button("Exclude Color", elem_id="exlude_color_button", elem_classes="solid")
-                color_display = gr.DataFrame(label="List of Excluded RGBA Colors", headers=["R", "G", "B", "A"], elem_id="excluded_colors", type="array", value=build_dataframe(excluded_color_list), interactive=True, elem_classes="solid centered")
-            selected_row = gr.Number(0, label="Selected Row", visible=False)
-            delete_button = gr.Button("Delete Row", elem_id="delete_exclusion_button", elem_classes="solid")
-                fill_hex = gr.Checkbox(label="Fill Hex with color from Image", value=True)
             with gr.Accordion("Image Filters", open = False):
                 with gr.Row():
                     with gr.Column():
@@ -468,15 +469,15 @@ with gr.Blocks(css_paths="style_20250128.css", title="HexaGrid Creator", theme='
             ### The custom color list is a comma separated list of hex colors.
             #### Example: "A,2,3,4,5,6,7,8,9,10,J,Q,K", "red,#0000FF,#00FF00,red,#FFFF00,#00FFFF,#FF8000,#FF00FF,#FF0080,#FF8000,#FF0080,lightblue"
             """, elem_id="hex_text_info", visible=False)
-        add_hex_text.change(
-        fn=lambda x: (
-            gr.update(visible=(x == "Custom List")),
-            gr.update(visible=(x == "Custom List")),
-            gr.update(visible=(x != None))
-            ),
-            inputs=add_hex_text,
-            outputs=[custom_text_list, custom_text_color_list, hex_text_info]
-        )
     with gr.Row():
         hex_size = gr.Number(label="Hexagon Size", value=32, minimum=1, maximum=768)
         border_size = gr.Slider(-5,25,value=0,step=1,label="Border Size")

 from pathlib import Path
 import atexit
 import random
+import spaces
 # Import constants
 import utils.constants as constants
             )
         with gr.Column():
             with gr.Accordion("Hex Coloring and Exclusion", open = False):
+                with gr.Row():
+                    with gr.Column():
+                        color_picker = gr.ColorPicker(label="Pick a color to exclude",value="#505050")
+                    with gr.Column():
+                            filter_color = gr.Checkbox(label="Filter Excluded Colors from Sampling", value=False,)
+                    exclude_color_button = gr.Button("Exclude Color", elem_id="exlude_color_button", elem_classes="solid")
+                    color_display = gr.DataFrame(label="List of Excluded RGBA Colors", headers=["R", "G", "B", "A"], elem_id="excluded_colors", type="array", value=build_dataframe(excluded_color_list), interactive=True, elem_classes="solid centered")
+                    selected_row = gr.Number(0, label="Selected Row", visible=False)
+                    delete_button = gr.Button("Delete Row", elem_id="delete_exclusion_button", elem_classes="solid")
+                    fill_hex = gr.Checkbox(label="Fill Hex with color from Image", value=True)
             with gr.Accordion("Image Filters", open = False):
                 with gr.Row():
                     with gr.Column():
             ### The custom color list is a comma separated list of hex colors.
             #### Example: "A,2,3,4,5,6,7,8,9,10,J,Q,K", "red,#0000FF,#00FF00,red,#FFFF00,#00FFFF,#FF8000,#FF00FF,#FF0080,#FF8000,#FF0080,lightblue"
             """, elem_id="hex_text_info", visible=False)
+            add_hex_text.change(
+                fn=lambda x: (
+                    gr.update(visible=(x == "Custom List")),
+                    gr.update(visible=(x == "Custom List")),
+                    gr.update(visible=(x != None))
+                ),
+                inputs=add_hex_text,
+                outputs=[custom_text_list, custom_text_color_list, hex_text_info]
+            )
     with gr.Row():
         hex_size = gr.Number(label="Hexagon Size", value=32, minimum=1, maximum=768)
         border_size = gr.Slider(-5,25,value=0,step=1,label="Border Size")

assets/logo_hex.png DELETED Viewed

Git LFS Details

SHA256: 9c0f91c488296e7234f829effe6da9d997704fa9b4e95739af7049d8d91db72b
Pointer size: 131 Bytes
Size of remote file: 547 kB

assets/logo_old.png DELETED Viewed

Git LFS Details

SHA256: 77dc2f8c3d405d0a11cde6dbca3ab97e5a3bbd23f89872fa94483953c8505799
Pointer size: 130 Bytes
Size of remote file: 49.9 kB

assets/logo_hex.gif → images/prerendered/grid_1.png RENAMED Viewed

File without changes

src/block.py ADDED Viewed

	@@ -0,0 +1,333 @@

+import torch
+from typing import List, Union, Optional, Dict, Any, Callable
+from diffusers.models.attention_processor import Attention, F
+from .lora_controller import enable_lora
+def attn_forward(
+    attn: Attention,
+    hidden_states: torch.FloatTensor,
+    encoder_hidden_states: torch.FloatTensor = None,
+    condition_latents: torch.FloatTensor = None,
+    attention_mask: Optional[torch.FloatTensor] = None,
+    image_rotary_emb: Optional[torch.Tensor] = None,
+    cond_rotary_emb: Optional[torch.Tensor] = None,
+    model_config: Optional[Dict[str, Any]] = {},
+) -> torch.FloatTensor:
+    batch_size, _, _ = (
+        hidden_states.shape
+        if encoder_hidden_states is None
+        else encoder_hidden_states.shape
+    )
+    with enable_lora(
+        (attn.to_q, attn.to_k, attn.to_v), model_config.get("latent_lora", False)
+    ):
+        # `sample` projections.
+        query = attn.to_q(hidden_states)
+        key = attn.to_k(hidden_states)
+        value = attn.to_v(hidden_states)
+    inner_dim = key.shape[-1]
+    head_dim = inner_dim // attn.heads
+    query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+    key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+    value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+    if attn.norm_q is not None:
+        query = attn.norm_q(query)
+    if attn.norm_k is not None:
+        key = attn.norm_k(key)
+    # the attention in FluxSingleTransformerBlock does not use `encoder_hidden_states`
+    if encoder_hidden_states is not None:
+        # `context` projections.
+        encoder_hidden_states_query_proj = attn.add_q_proj(encoder_hidden_states)
+        encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states)
+        encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states)
+        encoder_hidden_states_query_proj = encoder_hidden_states_query_proj.view(
+            batch_size, -1, attn.heads, head_dim
+        ).transpose(1, 2)
+        encoder_hidden_states_key_proj = encoder_hidden_states_key_proj.view(
+            batch_size, -1, attn.heads, head_dim
+        ).transpose(1, 2)
+        encoder_hidden_states_value_proj = encoder_hidden_states_value_proj.view(
+            batch_size, -1, attn.heads, head_dim
+        ).transpose(1, 2)
+        if attn.norm_added_q is not None:
+            encoder_hidden_states_query_proj = attn.norm_added_q(
+                encoder_hidden_states_query_proj
+            )
+        if attn.norm_added_k is not None:
+            encoder_hidden_states_key_proj = attn.norm_added_k(
+                encoder_hidden_states_key_proj
+            )
+        # attention
+        query = torch.cat([encoder_hidden_states_query_proj, query], dim=2)
+        key = torch.cat([encoder_hidden_states_key_proj, key], dim=2)
+        value = torch.cat([encoder_hidden_states_value_proj, value], dim=2)
+    if image_rotary_emb is not None:
+        from diffusers.models.embeddings import apply_rotary_emb
+        query = apply_rotary_emb(query, image_rotary_emb)
+        key = apply_rotary_emb(key, image_rotary_emb)
+    if condition_latents is not None:
+        cond_query = attn.to_q(condition_latents)
+        cond_key = attn.to_k(condition_latents)
+        cond_value = attn.to_v(condition_latents)
+        cond_query = cond_query.view(batch_size, -1, attn.heads, head_dim).transpose(
+            1, 2
+        )
+        cond_key = cond_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+        cond_value = cond_value.view(batch_size, -1, attn.heads, head_dim).transpose(
+            1, 2
+        )
+        if attn.norm_q is not None:
+            cond_query = attn.norm_q(cond_query)
+        if attn.norm_k is not None:
+            cond_key = attn.norm_k(cond_key)
+    if cond_rotary_emb is not None:
+        cond_query = apply_rotary_emb(cond_query, cond_rotary_emb)
+        cond_key = apply_rotary_emb(cond_key, cond_rotary_emb)
+    if condition_latents is not None:
+        query = torch.cat([query, cond_query], dim=2)
+        key = torch.cat([key, cond_key], dim=2)
+        value = torch.cat([value, cond_value], dim=2)
+    if not model_config.get("union_cond_attn", True):
+        # If we don't want to use the union condition attention, we need to mask the attention
+        # between the hidden states and the condition latents
+        attention_mask = torch.ones(
+            query.shape[2], key.shape[2], device=query.device, dtype=torch.bool
+        )
+        condition_n = cond_query.shape[2]
+        attention_mask[-condition_n:, :-condition_n] = False
+        attention_mask[:-condition_n, -condition_n:] = False
+    if hasattr(attn, "c_factor"):
+        attention_mask = torch.zeros(
+            query.shape[2], key.shape[2], device=query.device, dtype=query.dtype
+        )
+        condition_n = cond_query.shape[2]
+        bias = torch.log(attn.c_factor[0])
+        attention_mask[-condition_n:, :-condition_n] = bias
+        attention_mask[:-condition_n, -condition_n:] = bias
+    hidden_states = F.scaled_dot_product_attention(
+        query, key, value, dropout_p=0.0, is_causal=False, attn_mask=attention_mask
+    )
+    hidden_states = hidden_states.transpose(1, 2).reshape(
+        batch_size, -1, attn.heads * head_dim
+    )
+    hidden_states = hidden_states.to(query.dtype)
+    if encoder_hidden_states is not None:
+        if condition_latents is not None:
+            encoder_hidden_states, hidden_states, condition_latents = (
+                hidden_states[:, : encoder_hidden_states.shape[1]],
+                hidden_states[
+                    :, encoder_hidden_states.shape[1] : -condition_latents.shape[1]
+                ],
+                hidden_states[:, -condition_latents.shape[1] :],
+            )
+        else:
+            encoder_hidden_states, hidden_states = (
+                hidden_states[:, : encoder_hidden_states.shape[1]],
+                hidden_states[:, encoder_hidden_states.shape[1] :],
+            )
+        with enable_lora((attn.to_out[0],), model_config.get("latent_lora", False)):
+            # linear proj
+            hidden_states = attn.to_out[0](hidden_states)
+            # dropout
+            hidden_states = attn.to_out[1](hidden_states)
+        encoder_hidden_states = attn.to_add_out(encoder_hidden_states)
+        if condition_latents is not None:
+            condition_latents = attn.to_out[0](condition_latents)
+            condition_latents = attn.to_out[1](condition_latents)
+        return (
+            (hidden_states, encoder_hidden_states, condition_latents)
+            if condition_latents is not None
+            else (hidden_states, encoder_hidden_states)
+        )
+    elif condition_latents is not None:
+        # if there are condition_latents, we need to separate the hidden_states and the condition_latents
+        hidden_states, condition_latents = (
+            hidden_states[:, : -condition_latents.shape[1]],
+            hidden_states[:, -condition_latents.shape[1] :],
+        )
+        return hidden_states, condition_latents
+    else:
+        return hidden_states
+def block_forward(
+    self,
+    hidden_states: torch.FloatTensor,
+    encoder_hidden_states: torch.FloatTensor,
+    condition_latents: torch.FloatTensor,
+    temb: torch.FloatTensor,
+    cond_temb: torch.FloatTensor,
+    cond_rotary_emb=None,
+    image_rotary_emb=None,
+    model_config: Optional[Dict[str, Any]] = {},
+):
+    use_cond = condition_latents is not None
+    with enable_lora((self.norm1.linear,), model_config.get("latent_lora", False)):
+        norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(
+            hidden_states, emb=temb
+        )
+    norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = (
+        self.norm1_context(encoder_hidden_states, emb=temb)
+    )
+    if use_cond:
+        (
+            norm_condition_latents,
+            cond_gate_msa,
+            cond_shift_mlp,
+            cond_scale_mlp,
+            cond_gate_mlp,
+        ) = self.norm1(condition_latents, emb=cond_temb)
+    # Attention.
+    result = attn_forward(
+        self.attn,
+        model_config=model_config,
+        hidden_states=norm_hidden_states,
+        encoder_hidden_states=norm_encoder_hidden_states,
+        condition_latents=norm_condition_latents if use_cond else None,
+        image_rotary_emb=image_rotary_emb,
+        cond_rotary_emb=cond_rotary_emb if use_cond else None,
+    )
+    attn_output, context_attn_output = result[:2]
+    cond_attn_output = result[2] if use_cond else None
+    # Process attention outputs for the `hidden_states`.
+    # 1. hidden_states
+    attn_output = gate_msa.unsqueeze(1) * attn_output
+    hidden_states = hidden_states + attn_output
+    # 2. encoder_hidden_states
+    context_attn_output = c_gate_msa.unsqueeze(1) * context_attn_output
+    encoder_hidden_states = encoder_hidden_states + context_attn_output
+    # 3. condition_latents
+    if use_cond:
+        cond_attn_output = cond_gate_msa.unsqueeze(1) * cond_attn_output
+        condition_latents = condition_latents + cond_attn_output
+        if model_config.get("add_cond_attn", False):
+            hidden_states += cond_attn_output
+    # LayerNorm + MLP.
+    # 1. hidden_states
+    norm_hidden_states = self.norm2(hidden_states)
+    norm_hidden_states = (
+        norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
+    )
+    # 2. encoder_hidden_states
+    norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states)
+    norm_encoder_hidden_states = (
+        norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None]
+    )
+    # 3. condition_latents
+    if use_cond:
+        norm_condition_latents = self.norm2(condition_latents)
+        norm_condition_latents = (
+            norm_condition_latents * (1 + cond_scale_mlp[:, None])
+            + cond_shift_mlp[:, None]
+        )
+    # Feed-forward.
+    with enable_lora((self.ff.net[2],), model_config.get("latent_lora", False)):
+        # 1. hidden_states
+        ff_output = self.ff(norm_hidden_states)
+        ff_output = gate_mlp.unsqueeze(1) * ff_output
+    # 2. encoder_hidden_states
+    context_ff_output = self.ff_context(norm_encoder_hidden_states)
+    context_ff_output = c_gate_mlp.unsqueeze(1) * context_ff_output
+    # 3. condition_latents
+    if use_cond:
+        cond_ff_output = self.ff(norm_condition_latents)
+        cond_ff_output = cond_gate_mlp.unsqueeze(1) * cond_ff_output
+    # Process feed-forward outputs.
+    hidden_states = hidden_states + ff_output
+    encoder_hidden_states = encoder_hidden_states + context_ff_output
+    if use_cond:
+        condition_latents = condition_latents + cond_ff_output
+    # Clip to avoid overflow.
+    if encoder_hidden_states.dtype == torch.float16:
+        encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504)
+    return encoder_hidden_states, hidden_states, condition_latents if use_cond else None
+def single_block_forward(
+    self,
+    hidden_states: torch.FloatTensor,
+    temb: torch.FloatTensor,
+    image_rotary_emb=None,
+    condition_latents: torch.FloatTensor = None,
+    cond_temb: torch.FloatTensor = None,
+    cond_rotary_emb=None,
+    model_config: Optional[Dict[str, Any]] = {},
+):
+    using_cond = condition_latents is not None
+    residual = hidden_states
+    with enable_lora(
+        (
+            self.norm.linear,
+            self.proj_mlp,
+        ),
+        model_config.get("latent_lora", False),
+    ):
+        norm_hidden_states, gate = self.norm(hidden_states, emb=temb)
+        mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states))
+    if using_cond:
+        residual_cond = condition_latents
+        norm_condition_latents, cond_gate = self.norm(condition_latents, emb=cond_temb)
+        mlp_cond_hidden_states = self.act_mlp(self.proj_mlp(norm_condition_latents))
+    attn_output = attn_forward(
+        self.attn,
+        model_config=model_config,
+        hidden_states=norm_hidden_states,
+        image_rotary_emb=image_rotary_emb,
+        **(
+            {
+                "condition_latents": norm_condition_latents,
+                "cond_rotary_emb": cond_rotary_emb if using_cond else None,
+            }
+            if using_cond
+            else {}
+        ),
+    )
+    if using_cond:
+        attn_output, cond_attn_output = attn_output
+    with enable_lora((self.proj_out,), model_config.get("latent_lora", False)):
+        hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2)
+        gate = gate.unsqueeze(1)
+        hidden_states = gate * self.proj_out(hidden_states)
+        hidden_states = residual + hidden_states
+    if using_cond:
+        condition_latents = torch.cat([cond_attn_output, mlp_cond_hidden_states], dim=2)
+        cond_gate = cond_gate.unsqueeze(1)
+        condition_latents = cond_gate * self.proj_out(condition_latents)
+        condition_latents = residual_cond + condition_latents
+    if hidden_states.dtype == torch.float16:
+        hidden_states = hidden_states.clip(-65504, 65504)
+    return hidden_states if not using_cond else (hidden_states, condition_latents)

src/condition.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import torch
+from typing import Optional, Union, List, Tuple
+from diffusers.pipelines import FluxPipeline
+from PIL import Image, ImageFilter
+import numpy as np
+import cv2
+condition_dict = {
+    "depth": 0,
+    "canny": 1,
+    "subject": 4,
+    "coloring": 6,
+    "deblurring": 7,
+    "fill": 9,
+}
+class Condition(object):
+    def __init__(
+        self,
+        condition_type: str,
+        raw_img: Union[Image.Image, torch.Tensor] = None,
+        condition: Union[Image.Image, torch.Tensor] = None,
+        mask=None,
+    ) -> None:
+        self.condition_type = condition_type
+        assert raw_img is not None or condition is not None
+        if raw_img is not None:
+            self.condition = self.get_condition(condition_type, raw_img)
+        else:
+            self.condition = condition
+        # TODO: Add mask support
+        assert mask is None, "Mask not supported yet"
+    def get_condition(
+        self, condition_type: str, raw_img: Union[Image.Image, torch.Tensor]
+    ) -> Union[Image.Image, torch.Tensor]:
+        """
+        Returns the condition image.
+        """
+        if condition_type == "depth":
+            from transformers import pipeline
+            depth_pipe = pipeline(
+                task="depth-estimation",
+                model="LiheYoung/depth-anything-small-hf",
+                device="cuda",
+            )
+            source_image = raw_img.convert("RGB")
+            condition_img = depth_pipe(source_image)["depth"].convert("RGB")
+            return condition_img
+        elif condition_type == "canny":
+            img = np.array(raw_img)
+            edges = cv2.Canny(img, 100, 200)
+            edges = Image.fromarray(edges).convert("RGB")
+            return edges
+        elif condition_type == "subject":
+            return raw_img
+        elif condition_type == "coloring":
+            return raw_img.convert("L").convert("RGB")
+        elif condition_type == "deblurring":
+            condition_image = (
+                raw_img.convert("RGB")
+                .filter(ImageFilter.GaussianBlur(10))
+                .convert("RGB")
+            )
+            return condition_image
+        elif condition_type == "fill":
+            return raw_img.convert("RGB")
+        return self.condition
+    @property
+    def type_id(self) -> int:
+        """
+        Returns the type id of the condition.
+        """
+        return condition_dict[self.condition_type]
+    @classmethod
+    def get_type_id(cls, condition_type: str) -> int:
+        """
+        Returns the type id of the condition.
+        """
+        return condition_dict[condition_type]
+    def _encode_image(self, pipe: FluxPipeline, cond_img: Image.Image) -> torch.Tensor:
+        """
+        Encodes an image condition into tokens using the pipeline.
+        """
+        cond_img = pipe.image_processor.preprocess(cond_img)
+        cond_img = cond_img.to(pipe.device).to(pipe.dtype)
+        cond_img = pipe.vae.encode(cond_img).latent_dist.sample()
+        cond_img = (
+            cond_img - pipe.vae.config.shift_factor
+        ) * pipe.vae.config.scaling_factor
+        cond_tokens = pipe._pack_latents(cond_img, *cond_img.shape)
+        cond_ids = pipe._prepare_latent_image_ids(
+            cond_img.shape[0],
+            cond_img.shape[2]//2,
+            cond_img.shape[3]//2,
+            pipe.device,
+            pipe.dtype,
+        )
+        return cond_tokens, cond_ids
+    def encode(self, pipe: FluxPipeline) -> Tuple[torch.Tensor, torch.Tensor, int]:
+        """
+        Encodes the condition into tokens, ids and type_id.
+        """
+        if self.condition_type in [
+            "depth",
+            "canny",
+            "subject",
+            "coloring",
+            "deblurring",
+            "fill",
+        ]:
+            tokens, ids = self._encode_image(pipe, self.condition)
+        else:
+            raise NotImplementedError(
+                f"Condition type {self.condition_type} not implemented"
+            )
+        type_id = torch.ones_like(ids[:, :1]) * self.type_id
+        return tokens, ids, type_id

src/generate.py ADDED Viewed

	@@ -0,0 +1,294 @@

+import torch
+import yaml, os
+from diffusers.pipelines import FluxPipeline
+from typing import List, Union, Optional, Dict, Any, Callable
+from .transformer import tranformer_forward
+from .condition import Condition
+from diffusers.pipelines.flux.pipeline_flux import (
+    FluxPipelineOutput,
+    calculate_shift,
+    retrieve_timesteps,
+    np,
+)
+def prepare_params(
+    prompt: Union[str, List[str]] = None,
+    prompt_2: Optional[Union[str, List[str]]] = None,
+    height: Optional[int] = 512,
+    width: Optional[int] = 512,
+    num_inference_steps: int = 28,
+    timesteps: List[int] = None,
+    guidance_scale: float = 3.5,
+    num_images_per_prompt: Optional[int] = 1,
+    generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+    latents: Optional[torch.FloatTensor] = None,
+    prompt_embeds: Optional[torch.FloatTensor] = None,
+    pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+    output_type: Optional[str] = "pil",
+    return_dict: bool = True,
+    joint_attention_kwargs: Optional[Dict[str, Any]] = None,
+    callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
+    callback_on_step_end_tensor_inputs: List[str] = ["latents"],
+    max_sequence_length: int = 512,
+    **kwargs: dict,
+):
+    return (
+        prompt,
+        prompt_2,
+        height,
+        width,
+        num_inference_steps,
+        timesteps,
+        guidance_scale,
+        num_images_per_prompt,
+        generator,
+        latents,
+        prompt_embeds,
+        pooled_prompt_embeds,
+        output_type,
+        return_dict,
+        joint_attention_kwargs,
+        callback_on_step_end,
+        callback_on_step_end_tensor_inputs,
+        max_sequence_length,
+    )
+def seed_everything(seed: int = 42):
+    torch.backends.cudnn.deterministic = True
+    torch.manual_seed(seed)
+    np.random.seed(seed)
+@torch.no_grad()
+def generate(
+    pipeline: FluxPipeline,
+    conditions: List[Condition] = None,
+    model_config: Optional[Dict[str, Any]] = {},
+    condition_scale: float = 1.0,
+    **params: dict,
+):
+    # model_config = model_config or get_config(config_path).get("model", {})
+    if condition_scale != 1:
+        for name, module in pipeline.transformer.named_modules():
+            if not name.endswith(".attn"):
+                continue
+            module.c_factor = torch.ones(1, 1) * condition_scale
+    self = pipeline
+    (
+        prompt,
+        prompt_2,
+        height,
+        width,
+        num_inference_steps,
+        timesteps,
+        guidance_scale,
+        num_images_per_prompt,
+        generator,
+        latents,
+        prompt_embeds,
+        pooled_prompt_embeds,
+        output_type,
+        return_dict,
+        joint_attention_kwargs,
+        callback_on_step_end,
+        callback_on_step_end_tensor_inputs,
+        max_sequence_length,
+    ) = prepare_params(**params)
+    height = height or self.default_sample_size * self.vae_scale_factor
+    width = width or self.default_sample_size * self.vae_scale_factor
+    # 1. Check inputs. Raise error if not correct
+    self.check_inputs(
+        prompt,
+        prompt_2,
+        height,
+        width,
+        prompt_embeds=prompt_embeds,
+        pooled_prompt_embeds=pooled_prompt_embeds,
+        callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
+        max_sequence_length=max_sequence_length,
+    )
+    self._guidance_scale = guidance_scale
+    self._joint_attention_kwargs = joint_attention_kwargs
+    self._interrupt = False
+    # 2. Define call parameters
+    if prompt is not None and isinstance(prompt, str):
+        batch_size = 1
+    elif prompt is not None and isinstance(prompt, list):
+        batch_size = len(prompt)
+    else:
+        batch_size = prompt_embeds.shape[0]
+    device = self._execution_device
+    lora_scale = (
+        self.joint_attention_kwargs.get("scale", None)
+        if self.joint_attention_kwargs is not None
+        else None
+    )
+    (
+        prompt_embeds,
+        pooled_prompt_embeds,
+        text_ids,
+    ) = self.encode_prompt(
+        prompt=prompt,
+        prompt_2=prompt_2,
+        prompt_embeds=prompt_embeds,
+        pooled_prompt_embeds=pooled_prompt_embeds,
+        device=device,
+        num_images_per_prompt=num_images_per_prompt,
+        max_sequence_length=max_sequence_length,
+        lora_scale=lora_scale,
+    )
+    # 4. Prepare latent variables
+    num_channels_latents = self.transformer.config.in_channels // 4
+    latents, latent_image_ids = self.prepare_latents(
+        batch_size * num_images_per_prompt,
+        num_channels_latents,
+        height,
+        width,
+        prompt_embeds.dtype,
+        device,
+        generator,
+        latents,
+    )
+    # 4.1. Prepare conditions
+    condition_latents, condition_ids, condition_type_ids = ([] for _ in range(3))
+    use_condition = conditions is not None or []
+    if use_condition:
+        assert len(conditions) <= 1, "Only one condition is supported for now."
+        pipeline.set_adapters(
+            {
+                512: "subject_512",
+                1024: "subject_1024",
+            }[height]
+        )
+        for condition in conditions:
+            tokens, ids, type_id = condition.encode(self)
+            condition_latents.append(tokens)  # [batch_size, token_n, token_dim]
+            condition_ids.append(ids)  # [token_n, id_dim(3)]
+            condition_type_ids.append(type_id)  # [token_n, 1]
+        condition_latents = torch.cat(condition_latents, dim=1)
+        condition_ids = torch.cat(condition_ids, dim=0)
+        if condition.condition_type == "subject":
+            delta = 32 if height == 512 else -32
+            # print(f"Condition delta: {delta}")
+            condition_ids[:, 2] += delta
+        condition_type_ids = torch.cat(condition_type_ids, dim=0)
+    # 5. Prepare timesteps
+    sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps)
+    image_seq_len = latents.shape[1]
+    mu = calculate_shift(
+        image_seq_len,
+        self.scheduler.config.base_image_seq_len,
+        self.scheduler.config.max_image_seq_len,
+        self.scheduler.config.base_shift,
+        self.scheduler.config.max_shift,
+    )
+    timesteps, num_inference_steps = retrieve_timesteps(
+        self.scheduler,
+        num_inference_steps,
+        device,
+        timesteps,
+        sigmas,
+        mu=mu,
+    )
+    num_warmup_steps = max(
+        len(timesteps) - num_inference_steps * self.scheduler.order, 0
+    )
+    self._num_timesteps = len(timesteps)
+    # 6. Denoising loop
+    with self.progress_bar(total=num_inference_steps) as progress_bar:
+        for i, t in enumerate(timesteps):
+            if self.interrupt:
+                continue
+            # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
+            timestep = t.expand(latents.shape[0]).to(latents.dtype)
+            # handle guidance
+            if self.transformer.config.guidance_embeds:
+                guidance = torch.tensor([guidance_scale], device=device)
+                guidance = guidance.expand(latents.shape[0])
+            else:
+                guidance = None
+            noise_pred = tranformer_forward(
+                self.transformer,
+                model_config=model_config,
+                # Inputs of the condition (new feature)
+                condition_latents=condition_latents if use_condition else None,
+                condition_ids=condition_ids if use_condition else None,
+                condition_type_ids=condition_type_ids if use_condition else None,
+                # Inputs to the original transformer
+                hidden_states=latents,
+                # YiYi notes: divide it by 1000 for now because we scale it by 1000 in the transforme rmodel (we should not keep it but I want to keep the inputs same for the model for testing)
+                timestep=timestep / 1000,
+                guidance=guidance,
+                pooled_projections=pooled_prompt_embeds,
+                encoder_hidden_states=prompt_embeds,
+                txt_ids=text_ids,
+                img_ids=latent_image_ids,
+                joint_attention_kwargs=self.joint_attention_kwargs,
+                return_dict=False,
+            )[0]
+            # compute the previous noisy sample x_t -> x_t-1
+            latents_dtype = latents.dtype
+            latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
+            if latents.dtype != latents_dtype:
+                if torch.backends.mps.is_available():
+                    # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
+                    latents = latents.to(latents_dtype)
+            if callback_on_step_end is not None:
+                callback_kwargs = {}
+                for k in callback_on_step_end_tensor_inputs:
+                    callback_kwargs[k] = locals()[k]
+                callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+                latents = callback_outputs.pop("latents", latents)
+                prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
+            # call the callback, if provided
+            if i == len(timesteps) - 1 or (
+                (i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0
+            ):
+                progress_bar.update()
+    if output_type == "latent":
+        image = latents
+    else:
+        latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
+        latents = (
+            latents / self.vae.config.scaling_factor
+        ) + self.vae.config.shift_factor
+        image = self.vae.decode(latents, return_dict=False)[0]
+        image = self.image_processor.postprocess(image, output_type=output_type)
+    # Offload all models
+    self.maybe_free_model_hooks()
+    if condition_scale != 1:
+        for name, module in pipeline.transformer.named_modules():
+            if not name.endswith(".attn"):
+                continue
+            del module.c_factor
+    if not return_dict:
+        return (image,)
+    return FluxPipelineOutput(images=image)

src/lora_controller.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from peft.tuners.tuners_utils import BaseTunerLayer
+from typing import List, Any, Optional, Type
+class enable_lora:
+    def __init__(self, lora_modules: List[BaseTunerLayer], activated: bool) -> None:
+        self.activated: bool = activated
+        if activated:
+            return
+        self.lora_modules: List[BaseTunerLayer] = [
+            each for each in lora_modules if isinstance(each, BaseTunerLayer)
+        ]
+        self.scales = [
+            {
+                active_adapter: lora_module.scaling[active_adapter]
+                for active_adapter in lora_module.active_adapters
+            }
+            for lora_module in self.lora_modules
+        ]
+    def __enter__(self) -> None:
+        if self.activated:
+            return
+        for lora_module in self.lora_modules:
+            if not isinstance(lora_module, BaseTunerLayer):
+                continue
+            lora_module.scale_layer(0)
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[Any],
+    ) -> None:
+        if self.activated:
+            return
+        for i, lora_module in enumerate(self.lora_modules):
+            if not isinstance(lora_module, BaseTunerLayer):
+                continue
+            for active_adapter in lora_module.active_adapters:
+                lora_module.scaling[active_adapter] = self.scales[i][active_adapter]
+class set_lora_scale:
+    def __init__(self, lora_modules: List[BaseTunerLayer], scale: float) -> None:
+        self.lora_modules: List[BaseTunerLayer] = [
+            each for each in lora_modules if isinstance(each, BaseTunerLayer)
+        ]
+        self.scales = [
+            {
+                active_adapter: lora_module.scaling[active_adapter]
+                for active_adapter in lora_module.active_adapters
+            }
+            for lora_module in self.lora_modules
+        ]
+        self.scale = scale
+    def __enter__(self) -> None:
+        for lora_module in self.lora_modules:
+            if not isinstance(lora_module, BaseTunerLayer):
+                continue
+            lora_module.scale_layer(self.scale)
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[Any],
+    ) -> None:
+        for i, lora_module in enumerate(self.lora_modules):
+            if not isinstance(lora_module, BaseTunerLayer):
+                continue
+            for active_adapter in lora_module.active_adapters:
+                lora_module.scaling[active_adapter] = self.scales[i][active_adapter]

src/transformer.py ADDED Viewed

	@@ -0,0 +1,270 @@

+import torch
+from diffusers.pipelines import FluxPipeline
+from typing import List, Union, Optional, Dict, Any, Callable
+from .block import block_forward, single_block_forward
+from .lora_controller import enable_lora
+from diffusers.models.transformers.transformer_flux import (
+    FluxTransformer2DModel,
+    Transformer2DModelOutput,
+    USE_PEFT_BACKEND,
+    is_torch_version,
+    scale_lora_layers,
+    unscale_lora_layers,
+    logger,
+)
+import numpy as np
+def prepare_params(
+    hidden_states: torch.Tensor,
+    encoder_hidden_states: torch.Tensor = None,
+    pooled_projections: torch.Tensor = None,
+    timestep: torch.LongTensor = None,
+    img_ids: torch.Tensor = None,
+    txt_ids: torch.Tensor = None,
+    guidance: torch.Tensor = None,
+    joint_attention_kwargs: Optional[Dict[str, Any]] = None,
+    controlnet_block_samples=None,
+    controlnet_single_block_samples=None,
+    return_dict: bool = True,
+    **kwargs: dict,
+):
+    return (
+        hidden_states,
+        encoder_hidden_states,
+        pooled_projections,
+        timestep,
+        img_ids,
+        txt_ids,
+        guidance,
+        joint_attention_kwargs,
+        controlnet_block_samples,
+        controlnet_single_block_samples,
+        return_dict,
+    )
+def tranformer_forward(
+    transformer: FluxTransformer2DModel,
+    condition_latents: torch.Tensor,
+    condition_ids: torch.Tensor,
+    condition_type_ids: torch.Tensor,
+    model_config: Optional[Dict[str, Any]] = {},
+    return_conditional_latents: bool = False,
+    c_t=0,
+    **params: dict,
+):
+    self = transformer
+    use_condition = condition_latents is not None
+    use_condition_in_single_blocks = model_config.get(
+        "use_condition_in_single_blocks", True
+    )
+    # if return_conditional_latents is True, use_condition and use_condition_in_single_blocks must be True
+    assert not return_conditional_latents or (
+        use_condition and use_condition_in_single_blocks
+    ), "`return_conditional_latents` is True, `use_condition` and `use_condition_in_single_blocks` must be True"
+    (
+        hidden_states,
+        encoder_hidden_states,
+        pooled_projections,
+        timestep,
+        img_ids,
+        txt_ids,
+        guidance,
+        joint_attention_kwargs,
+        controlnet_block_samples,
+        controlnet_single_block_samples,
+        return_dict,
+    ) = prepare_params(**params)
+    if joint_attention_kwargs is not None:
+        joint_attention_kwargs = joint_attention_kwargs.copy()
+        lora_scale = joint_attention_kwargs.pop("scale", 1.0)
+    else:
+        lora_scale = 1.0
+    if USE_PEFT_BACKEND:
+        # weight the lora layers by setting `lora_scale` for each PEFT layer
+        scale_lora_layers(self, lora_scale)
+    else:
+        if (
+            joint_attention_kwargs is not None
+            and joint_attention_kwargs.get("scale", None) is not None
+        ):
+            logger.warning(
+                "Passing `scale` via `joint_attention_kwargs` when not using the PEFT backend is ineffective."
+            )
+    with enable_lora((self.x_embedder,), model_config.get("latent_lora", False)):
+        hidden_states = self.x_embedder(hidden_states)
+    condition_latents = self.x_embedder(condition_latents) if use_condition else None
+    timestep = timestep.to(hidden_states.dtype) * 1000
+    if guidance is not None:
+        guidance = guidance.to(hidden_states.dtype) * 1000
+    else:
+        guidance = None
+    temb = (
+        self.time_text_embed(timestep, pooled_projections)
+        if guidance is None
+        else self.time_text_embed(timestep, guidance, pooled_projections)
+    )
+    cond_temb = (
+        self.time_text_embed(torch.ones_like(timestep) * c_t * 1000, pooled_projections)
+        if guidance is None
+        else self.time_text_embed(
+            torch.ones_like(timestep) * c_t * 1000, guidance, pooled_projections
+        )
+    )
+    if hasattr(self, "cond_type_embed") and condition_type_ids is not None:
+        cond_type_proj = self.time_text_embed.time_proj(condition_type_ids[0])
+        cond_type_emb = self.cond_type_embed(cond_type_proj.to(dtype=cond_temb.dtype))
+        cond_temb = cond_temb + cond_type_emb
+    encoder_hidden_states = self.context_embedder(encoder_hidden_states)
+    if txt_ids.ndim == 3:
+        logger.warning(
+            "Passing `txt_ids` 3d torch.Tensor is deprecated."
+            "Please remove the batch dimension and pass it as a 2d torch Tensor"
+        )
+        txt_ids = txt_ids[0]
+    if img_ids.ndim == 3:
+        logger.warning(
+            "Passing `img_ids` 3d torch.Tensor is deprecated."
+            "Please remove the batch dimension and pass it as a 2d torch Tensor"
+        )
+        img_ids = img_ids[0]
+    ids = torch.cat((txt_ids, img_ids), dim=0)
+    image_rotary_emb = self.pos_embed(ids)
+    if use_condition:
+        cond_ids = condition_ids
+        cond_rotary_emb = self.pos_embed(cond_ids)
+    # hidden_states = torch.cat([hidden_states, condition_latents], dim=1)
+    for index_block, block in enumerate(self.transformer_blocks):
+        if self.training and self.gradient_checkpointing:
+            def create_custom_forward(module, return_dict=None):
+                def custom_forward(*inputs):
+                    if return_dict is not None:
+                        return module(*inputs, return_dict=return_dict)
+                    else:
+                        return module(*inputs)
+                return custom_forward
+            ckpt_kwargs: Dict[str, Any] = (
+                {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+            )
+            encoder_hidden_states, hidden_states = torch.utils.checkpoint.checkpoint(
+                create_custom_forward(block),
+                hidden_states,
+                encoder_hidden_states,
+                temb,
+                image_rotary_emb,
+                **ckpt_kwargs,
+            )
+        else:
+            encoder_hidden_states, hidden_states, condition_latents = block_forward(
+                block,
+                model_config=model_config,
+                hidden_states=hidden_states,
+                encoder_hidden_states=encoder_hidden_states,
+                condition_latents=condition_latents if use_condition else None,
+                temb=temb,
+                cond_temb=cond_temb if use_condition else None,
+                cond_rotary_emb=cond_rotary_emb if use_condition else None,
+                image_rotary_emb=image_rotary_emb,
+            )
+        # controlnet residual
+        if controlnet_block_samples is not None:
+            interval_control = len(self.transformer_blocks) / len(
+                controlnet_block_samples
+            )
+            interval_control = int(np.ceil(interval_control))
+            hidden_states = (
+                hidden_states
+                + controlnet_block_samples[index_block // interval_control]
+            )
+    hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
+    for index_block, block in enumerate(self.single_transformer_blocks):
+        if self.training and self.gradient_checkpointing:
+            def create_custom_forward(module, return_dict=None):
+                def custom_forward(*inputs):
+                    if return_dict is not None:
+                        return module(*inputs, return_dict=return_dict)
+                    else:
+                        return module(*inputs)
+                return custom_forward
+            ckpt_kwargs: Dict[str, Any] = (
+                {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+            )
+            hidden_states = torch.utils.checkpoint.checkpoint(
+                create_custom_forward(block),
+                hidden_states,
+                temb,
+                image_rotary_emb,
+                **ckpt_kwargs,
+            )
+        else:
+            result = single_block_forward(
+                block,
+                model_config=model_config,
+                hidden_states=hidden_states,
+                temb=temb,
+                image_rotary_emb=image_rotary_emb,
+                **(
+                    {
+                        "condition_latents": condition_latents,
+                        "cond_temb": cond_temb,
+                        "cond_rotary_emb": cond_rotary_emb,
+                    }
+                    if use_condition_in_single_blocks and use_condition
+                    else {}
+                ),
+            )
+            if use_condition_in_single_blocks and use_condition:
+                hidden_states, condition_latents = result
+            else:
+                hidden_states = result
+        # controlnet residual
+        if controlnet_single_block_samples is not None:
+            interval_control = len(self.single_transformer_blocks) / len(
+                controlnet_single_block_samples
+            )
+            interval_control = int(np.ceil(interval_control))
+            hidden_states[:, encoder_hidden_states.shape[1] :, ...] = (
+                hidden_states[:, encoder_hidden_states.shape[1] :, ...]
+                + controlnet_single_block_samples[index_block // interval_control]
+            )
+    hidden_states = hidden_states[:, encoder_hidden_states.shape[1] :, ...]
+    hidden_states = self.norm_out(hidden_states, temb)
+    output = self.proj_out(hidden_states)
+    if return_conditional_latents:
+        condition_latents = (
+            self.norm_out(condition_latents, cond_temb) if use_condition else None
+        )
+        condition_output = self.proj_out(condition_latents) if use_condition else None
+    if USE_PEFT_BACKEND:
+        # remove `lora_scale` from each PEFT layer
+        unscale_lora_layers(self, lora_scale)
+    if not return_dict:
+        return (
+            (output,) if not return_conditional_latents else (output, condition_output)
+        )
+    return Transformer2DModelOutput(sample=output)

utils/ai_generator.py CHANGED Viewed

@@ -1,7 +1,8 @@
 # utils/ai_generator.py
 import os
-import time  # Added for implementing delays
 import torch
 import random
 from utils.ai_generator_diffusers_flux import generate_ai_image_local
@@ -34,6 +35,9 @@ def generate_ai_image(
     lora_weights=None,
     conditioned_image=None,
     pipeline = "FluxPipeline",
     *args,
     **kwargs
 ):
@@ -51,7 +55,9 @@ def generate_ai_image(
             seed=seed,
             conditioned_image=conditioned_image,
             pipeline_name=pipeline,
-            strength=0.5
         )
     else:
         print("No local GPU available. Sending request to Hugging Face API.")
@@ -59,10 +65,12 @@ def generate_ai_image(
             map_option,
             prompt_textbox_value,
             neg_prompt_textbox_value,
-            model
         )
-def generate_ai_image_remote(map_option, prompt_textbox_value, neg_prompt_textbox_value, model, height=512, width=896, num_inference_steps=50, guidance_scale=3.5, seed=777):
     max_retries = 3
     retry_delay = 4  # Initial delay in seconds

 # utils/ai_generator.py
 import os
+import time
+from turtle import width  # Added for implementing delays
 import torch
 import random
 from utils.ai_generator_diffusers_flux import generate_ai_image_local
     lora_weights=None,
     conditioned_image=None,
     pipeline = "FluxPipeline",
+    width=912,
+    height=512,
+    strength=0.5,
     *args,
     **kwargs
 ):
             seed=seed,
             conditioned_image=conditioned_image,
             pipeline_name=pipeline,
+            strength=strength,
+            height=height,
+            width=width
         )
     else:
         print("No local GPU available. Sending request to Hugging Face API.")
             map_option,
             prompt_textbox_value,
             neg_prompt_textbox_value,
+            model,
+            height=height,
+            width=width
         )
+def generate_ai_image_remote(map_option, prompt_textbox_value, neg_prompt_textbox_value, model, height=512, width=912, num_inference_steps=30, guidance_scale=3.5, seed=777):
     max_retries = 3
     retry_delay = 4  # Initial delay in seconds

utils/ai_generator_diffusers_flux.py CHANGED Viewed

@@ -1,13 +1,13 @@
 # utils/ai_generator_diffusers_flux.py
 import os
 import torch
-from diffusers import FluxPipeline,FluxImg2ImgPipeline
 import accelerate
 import transformers
 import safetensors
 import xformers
 from diffusers.utils import load_image
-# from huggingface_hub import hf_hub_download
 from PIL import Image
 from tempfile import NamedTemporaryFile
 from src.condition import Condition
@@ -16,15 +16,14 @@ from utils.image_utils import (
      crop_and_resize_image,
 )
 from utils.version_info import (
-    versions_html,
     get_torch_info,
     get_diffusers_version,
     get_transformers_version,
     get_xformers_version
 )
-from utils.lora_details import get_trigger_words
 from utils.color_utils import detect_color_format
-# import utils.misc as misc
 from pathlib import Path
 import warnings
 warnings.filterwarnings("ignore", message=".*Torch was not compiled with flash attention.*")
@@ -93,6 +92,7 @@ def generate_image_from_text(
     generate_params = {k: v for k, v in generate_params.items() if v is not None}
     result = pipe(**generate_params)
     image = result.images[0]
     return image
 def generate_image_lowmem(
@@ -101,10 +101,10 @@ def generate_image_lowmem(
     model_name="black-forest-labs/FLUX.1-dev",
     lora_weights=None,
     conditioned_image=None,
-    image_width=1344,
     image_height=848,
     guidance_scale=3.5,
-    num_inference_steps=50,
     seed=0,
     true_cfg_scale=1.0,
     pipeline_name="FluxPipeline",
@@ -117,7 +117,7 @@ def generate_image_lowmem(
         raise ValueError(f"Unsupported pipeline type '{pipeline_name}'. "
                         f"Available options: {list(PIPELINE_CLASSES.keys())}")
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    print(f"device:{device}\nmodel_name:{model_name}\n")
     print(f"\n {get_torch_info()}\n")
     # Disable gradient calculations
     with torch.no_grad():
@@ -141,27 +141,59 @@ def generate_image_lowmem(
         if pipeline_name == "FluxPipeline":
             pipe.enable_vae_tiling()
         # Load LoRA weights
         if lora_weights:
             for lora_weight in lora_weights:
                 lora_configs = constants.LORA_DETAILS.get(lora_weight, [])
                 if lora_configs:
                     for config in lora_configs:
                         # Load LoRA weights with optional weight_name and adapter_name
-                        weight_name = config.get("weight_name")
-                        adapter_name = config.get("adapter_name")
-                        if weight_name and adapter_name:
-                            pipe.load_lora_weights(
-                                lora_weight,
-                                weight_name=weight_name,
-                                adapter_name=adapter_name,
-                                use_auth_token=constants.HF_API_TOKEN
-                            )
-                        else:
-                            pipe.load_lora_weights(
-                                lora_weight,
-                                use_auth_token=constants.HF_API_TOKEN
-                            )
                         # Apply 'pipe' configurations if present
                         if 'pipe' in config:
                             pipe_config = config['pipe']
@@ -174,6 +206,7 @@ def generate_image_lowmem(
                                     print(f"Method {method_name} not found in pipe.")
                 else:
                     pipe.load_lora_weights(lora_weight, use_auth_token=constants.HF_API_TOKEN)
         generator = torch.Generator(device=device).manual_seed(seed)
         conditions = []
         if conditioned_image is not None:
@@ -194,8 +227,20 @@ def generate_image_lowmem(
                 "negative_prompt": neg_prompt,
                 "true_cfg_scale": true_cfg_scale,
             }
         generate_params = {
-            "prompt": text,
             "height": image_height,
             "width": image_width,
             "guidance_scale": guidance_scale,
@@ -204,6 +249,7 @@ def generate_image_lowmem(
         if additional_parameters:
             generate_params.update(additional_parameters)
         generate_params = {k: v for k, v in generate_params.items() if v is not None}
         # Generate the image
         result = pipe(**generate_params)
         image = result.images[0]
@@ -214,6 +260,7 @@ def generate_image_lowmem(
     # Delete the pipeline and clear cache
     del pipe
     torch.cuda.empty_cache()
     print(torch.cuda.memory_summary(device=None, abbreviated=False))
     return image
@@ -225,8 +272,8 @@ def generate_ai_image_local (
     lora_weights=None,
     conditioned_image=None,
     height=512,
-    width=896,
-    num_inference_steps=50,
     guidance_scale=3.5,
     seed=777,
     pipeline_name="FluxPipeline",
@@ -293,4 +340,20 @@ def generate_ai_image_local (
             return tmp.name
     except Exception as e:
         print(f"Error generating AI image: {e}")
-        return None

 # utils/ai_generator_diffusers_flux.py
 import os
 import torch
+from diffusers import FluxPipeline,FluxImg2ImgPipeline,FluxControlPipeline
 import accelerate
 import transformers
 import safetensors
 import xformers
 from diffusers.utils import load_image
+from huggingface_hub import hf_hub_download
 from PIL import Image
 from tempfile import NamedTemporaryFile
 from src.condition import Condition
      crop_and_resize_image,
 )
 from utils.version_info import (
     get_torch_info,
     get_diffusers_version,
     get_transformers_version,
     get_xformers_version
 )
+from utils.lora_details import get_trigger_words, approximate_token_count, split_prompt_precisely
 from utils.color_utils import detect_color_format
+import utils.misc as misc
 from pathlib import Path
 import warnings
 warnings.filterwarnings("ignore", message=".*Torch was not compiled with flash attention.*")
     generate_params = {k: v for k, v in generate_params.items() if v is not None}
     result = pipe(**generate_params)
     image = result.images[0]
+    pipe.unload_lora_weights()
     return image
 def generate_image_lowmem(
     model_name="black-forest-labs/FLUX.1-dev",
     lora_weights=None,
     conditioned_image=None,
+    image_width=1368,
     image_height=848,
     guidance_scale=3.5,
+    num_inference_steps=30,
     seed=0,
     true_cfg_scale=1.0,
     pipeline_name="FluxPipeline",
         raise ValueError(f"Unsupported pipeline type '{pipeline_name}'. "
                         f"Available options: {list(PIPELINE_CLASSES.keys())}")
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"device:{device}\nmodel_name:{model_name}\nlora_weights:{lora_weights}\n")
     print(f"\n {get_torch_info()}\n")
     # Disable gradient calculations
     with torch.no_grad():
         if pipeline_name == "FluxPipeline":
             pipe.enable_vae_tiling()
         # Load LoRA weights
+        # note: does not yet handle multiple LoRA weights with different names, needs .set_adapters(["depth", "hyper-sd"], adapter_weights=[0.85, 0.125])
         if lora_weights:
             for lora_weight in lora_weights:
                 lora_configs = constants.LORA_DETAILS.get(lora_weight, [])
+                lora_weight_set = False
                 if lora_configs:
                     for config in lora_configs:
                         # Load LoRA weights with optional weight_name and adapter_name
+                        if 'weight_name' in config:
+                            weight_name = config.get("weight_name")
+                            adapter_name = config.get("adapter_name")
+                            lora_collection = config.get("lora_collection")
+                            if weight_name and adapter_name and lora_collection and lora_weight_set == False:
+                                pipe.load_lora_weights(
+                                    lora_collection,
+                                    weight_name=weight_name,
+                                    adapter_name=adapter_name,
+                                    token=constants.HF_API_TOKEN
+                                )
+                                lora_weight_set = True
+                                print(f"\npipe.load_lora_weights({lora_weight}, weight_name={weight_name}, adapter_name={adapter_name}, lora_collection={lora_collection}\n")
+                            elif weight_name and adapter_name==None and lora_collection and lora_weight_set == False:
+                                pipe.load_lora_weights(
+                                    lora_collection,
+                                    weight_name=weight_name,
+                                    token=constants.HF_API_TOKEN
+                                )
+                                lora_weight_set = True
+                                print(f"\npipe.load_lora_weights({lora_weight}, weight_name={weight_name}, adapter_name={adapter_name}, lora_collection={lora_collection}\n")
+                            elif weight_name and adapter_name and lora_weight_set == False:
+                                pipe.load_lora_weights(
+                                    lora_weight,
+                                    weight_name=weight_name,
+                                    adapter_name=adapter_name,
+                                    token=constants.HF_API_TOKEN
+                                )
+                                lora_weight_set = True
+                                print(f"\npipe.load_lora_weights({lora_weight}, weight_name={weight_name}, adapter_name={adapter_name}\n")
+                            elif weight_name and adapter_name==None and lora_weight_set == False:
+                                pipe.load_lora_weights(
+                                    lora_weight,
+                                    weight_name=weight_name,
+                                    token=constants.HF_API_TOKEN
+                                )
+                                lora_weight_set = True
+                                print(f"\npipe.load_lora_weights({lora_weight}, weight_name={weight_name}, adapter_name={adapter_name}\n")
+                            elif lora_weight_set == False:
+                                pipe.load_lora_weights(
+                                    lora_weight,
+                                    token=constants.HF_API_TOKEN
+                                )
+                                lora_weight_set = True
+                                print(f"\npipe.load_lora_weights({lora_weight}, weight_name={weight_name}, adapter_name={adapter_name}\n")
                         # Apply 'pipe' configurations if present
                         if 'pipe' in config:
                             pipe_config = config['pipe']
                                     print(f"Method {method_name} not found in pipe.")
                 else:
                     pipe.load_lora_weights(lora_weight, use_auth_token=constants.HF_API_TOKEN)
+        # Set the random seed for reproducibility
         generator = torch.Generator(device=device).manual_seed(seed)
         conditions = []
         if conditioned_image is not None:
                 "negative_prompt": neg_prompt,
                 "true_cfg_scale": true_cfg_scale,
             }
+        # handle long prompts by splitting them
+        if approximate_token_count(text) > 76:
+            prompt, prompt2 = split_prompt_precisely(text)
+            prompt_parameters = {
+                "prompt" : prompt,
+                "prompt_2": prompt2
+            }
+        else:
+            prompt_parameters = {
+                "prompt" :text
+        }
+        additional_parameters.update(prompt_parameters)
+        # Combine all parameters
         generate_params = {
             "height": image_height,
             "width": image_width,
             "guidance_scale": guidance_scale,
         if additional_parameters:
             generate_params.update(additional_parameters)
         generate_params = {k: v for k, v in generate_params.items() if v is not None}
+        print(f"generate_params: {generate_params}")
         # Generate the image
         result = pipe(**generate_params)
         image = result.images[0]
     # Delete the pipeline and clear cache
     del pipe
     torch.cuda.empty_cache()
+    torch.cuda.ipc_collect()
     print(torch.cuda.memory_summary(device=None, abbreviated=False))
     return image
     lora_weights=None,
     conditioned_image=None,
     height=512,
+    width=912,
+    num_inference_steps=30,
     guidance_scale=3.5,
     seed=777,
     pipeline_name="FluxPipeline",
             return tmp.name
     except Exception as e:
         print(f"Error generating AI image: {e}")
+        return None
+# does not work
+def merge_LoRA_weights(model="black-forest-labs/FLUX.1-dev",
+    lora_weights="Borcherding/FLUX.1-dev-LoRA-FractalLand-v0.1"):
+    model_suffix = model.split("/")[-1]
+    if model_suffix not in lora_weights:
+        raise ValueError(f"The model suffix '{model_suffix}' must be in the lora_weights string '{lora_weights}' to proceed.")
+    pipe = FluxPipeline.from_pretrained(model, torch_dtype=torch.bfloat16)
+    pipe.load_lora_weights(lora_weights)
+    pipe.save_lora_weights(os.getenv("TMPDIR"))
+    lora_name = lora_weights.split("/")[-1] + "-merged"
+    pipe.save_pretrained(lora_name)
+    pipe.unload_lora_weights()

utils/color_utils.py ADDED Viewed

	@@ -0,0 +1,214 @@

+# utils/color_utils.py
+from PIL import Image, ImageColor
+import re
+import cairocffi as cairo
+import pangocffi
+import pangocairocffi
+def multiply_and_clamp(value, scale, min_value=0, max_value=255):
+    return min(max(value * scale, min_value), max_value)
+# Convert decimal color to hexadecimal color (rgb or rgba)
+def rgb_to_hex(rgb):
+    color = "#"
+    for i in rgb:
+        num = int(i)
+        color += str(hex(num))[-2:].replace("x", "0").upper()
+    return color
+def parse_hex_color(hex_color, base = 1):
+    """
+    This function is set to pass the color in (1.0,1.0, 1.0, 1.0) format.
+    Change base to 255 to get the color in (255, 255, 255, 255) format.
+    Parses a hex color string or tuple into RGBA components.
+    Parses color values specified in various formats and convert them into normalized RGBA components
+    suitable for use in color calculations, rendering, or manipulation.
+    Supports:
+    - #RRGGBBAA
+    - #RRGGBB (assumes full opacity)
+    - (r, g, b, a) tuple
+    """
+    if isinstance(hex_color, tuple):
+        if len(hex_color) == 4:
+            r, g, b, a = hex_color
+        elif len(hex_color) == 3:
+            r, g, b = hex_color
+            a = 1.0  # Full opacity
+        else:
+            raise ValueError("Tuple must be in the format (r, g, b) or (r, g, b, a)")
+        return r / 255.0, g / 255.0, b / 255.0, a / 255.0 if a <= 1 else a
+    if hex_color.startswith("#"):
+        if len(hex_color) == 6:
+            r = int(hex_color[0:2], 16) / 255.0
+            g = int(hex_color[2:4], 16) / 255.0
+            b = int(hex_color[4:6], 16) / 255.0
+            a = 1.0  # Full opacity
+        elif len(hex_color) == 8:
+            r = int(hex_color[0:2], 16) / 255.0
+            g = int(hex_color[2:4], 16) / 255.0
+            b = int(hex_color[4:6], 16) / 255.0
+            a = int(hex_color[6:8], 16) / 255.0
+    else:
+        try:
+            r, g, b, a = ImageColor.getcolor(hex_color, "RGBA")
+            r = r / 255
+            g = g / 255
+            b = b / 255
+            a = a / 255
+        except:
+            raise ValueError("Hex color must be in the format RRGGBB, RRGGBBAA, ( r, g, b, a) or a common color name")
+    return multiply_and_clamp(r,base, max_value= base), multiply_and_clamp(g, base, max_value= base), multiply_and_clamp(b , base, max_value= base), multiply_and_clamp(a , base, max_value= base)
+# Define a function to convert a hexadecimal color code to an RGB(A) tuple
+def hex_to_rgb(hex):
+    if hex.startswith("#"):
+        clean_hex = hex.replace('#','')
+        # Use a generator expression to convert pairs of hexadecimal digits to integers and create a tuple
+        return tuple(int(clean_hex[i:i+2], 16) for i in range(0, len(clean_hex),2))
+    else:
+        return detect_color_format(hex)
+def detect_color_format(color):
+    """
+    Detects if the color is in RGB, RGBA, or hex format,
+    and converts it to an RGBA tuple with integer components.
+    Args:
+        color (str or tuple): The color to detect.
+    Returns:
+        tuple: The color in RGBA format as a tuple of 4 integers.
+    Raises:
+        ValueError: If the input color is not in a recognized format.
+    """
+    # Handle color as a tuple of floats or integers
+    if isinstance(color, tuple):
+        if len(color) == 3 or len(color) == 4:
+            # Ensure all components are numbers
+            if all(isinstance(c, (int, float)) for c in color):
+                r, g, b = color[:3]
+                a = color[3] if len(color) == 4 else 255
+                return (
+                    max(0, min(255, int(round(r)))),
+                    max(0, min(255, int(round(g)))),
+                    max(0, min(255, int(round(b)))),
+                    max(0, min(255, int(round(a * 255)) if a <= 1 else round(a))),
+                )
+        else:
+            raise ValueError(f"Invalid color tuple length: {len(color)}")
+    # Handle hex color codes
+    if isinstance(color, str):
+        color = color.strip()
+        # Try to use PIL's ImageColor
+        try:
+            rgba = ImageColor.getcolor(color, "RGBA")
+            return rgba
+        except ValueError:
+            pass
+        # Handle 'rgba(r, g, b, a)' string format
+        rgba_match = re.match(r'rgba\(\s*([0-9.]+),\s*([0-9.]+),\s*([0-9.]+),\s*([0-9.]+)\s*\)', color)
+        if rgba_match:
+            r, g, b, a = map(float, rgba_match.groups())
+            return (
+                max(0, min(255, int(round(r)))),
+                max(0, min(255, int(round(g)))),
+                max(0, min(255, int(round(b)))),
+                max(0, min(255, int(round(a * 255)) if a <= 1 else round(a))),
+            )
+        # Handle 'rgb(r, g, b)' string format
+        rgb_match = re.match(r'rgb\(\s*([0-9.]+),\s*([0-9.]+),\s*([0-9.]+)\s*\)', color)
+        if rgb_match:
+            r, g, b = map(float, rgb_match.groups())
+            return (
+                max(0, min(255, int(round(r)))),
+                max(0, min(255, int(round(g)))),
+                max(0, min(255, int(round(b)))),
+                255,
+            )
+    # If none of the above conversions work, raise an error
+    raise ValueError(f"Invalid color format: {color}")
+def update_color_opacity(color, opacity):
+    """
+    Updates the opacity of a color value.
+    Parameters:
+        color (tuple): A color represented as an RGB or RGBA tuple.
+        opacity (int): An integer between 0 and 255 representing the desired opacity.
+    Returns:
+        tuple: The color as an RGBA tuple with the updated opacity.
+    """
+    # Ensure opacity is within the valid range
+    opacity = max(0, min(255, int(opacity)))
+    if len(color) == 3:
+        # Color is RGB, add the opacity to make it RGBA
+        return color + (opacity,)
+    elif len(color) == 4:
+        # Color is RGBA, replace the alpha value with the new opacity
+        return color[:3] + (opacity,)
+    else:
+        raise ValueError(f"Invalid color format: {color}. Must be an RGB or RGBA tuple.")
+def draw_text_with_emojis(image, text, font_color, offset_x, offset_y, font_name, font_size):
+    """
+    Draws text with emojis directly onto the given PIL image at specified coordinates with the specified color.
+    Parameters:
+        image (PIL.Image.Image): The RGBA image to draw on.
+        text (str): The text to draw, including emojis.
+        font_color (tuple): RGBA color tuple for the text (e.g., (255, 0, 0, 255)).
+        offset_x (int): The x-coordinate for the text center position.
+        offset_y (int): The y-coordinate for the text center position.
+        font_name (str): The name of the font family.
+        font_size (int): Size of the font.
+    Returns:
+        None: The function modifies the image in place.
+    """
+    if image.mode != 'RGBA':
+        raise ValueError("Image must be in RGBA mode.")
+    # Convert PIL image to a mutable bytearray
+    img_data = bytearray(image.tobytes("raw", "BGRA"))
+    # Create a Cairo ImageSurface that wraps the image's data
+    surface = cairo.ImageSurface.create_for_data(
+        img_data,
+        cairo.FORMAT_ARGB32,
+        image.width,
+        image.height,
+        image.width * 4
+    )
+    context = cairo.Context(surface)
+    # Create Pango layout
+    layout = pangocairocffi.create_layout(context)
+    layout._set_text(text)
+    # Set font description
+    desc = pangocffi.FontDescription()
+    desc._set_family(font_name)
+    desc._set_size(pangocffi.units_from_double(font_size))
+    layout._set_font_description(desc)
+    # Set text color
+    r, g, b, a = parse_hex_color(font_color)
+    context.set_source_rgba(r , g , b , a )
+    # Move to the position (top-left corner adjusted to center the text)
+    context.move_to(offset_x, offset_y)
+    # Render the text
+    pangocairocffi.show_layout(context, layout)
+    # Flush the surface to ensure all drawing operations are complete
+    surface.flush()
+    # Convert the modified bytearray back to a PIL Image
+    modified_image = Image.frombuffer(
+    "RGBA",
+    (image.width, image.height),
+    bytes(img_data),
+    "raw",
+    "BGRA",  # Cairo stores data in BGRA order
+    surface.get_stride(),
+    ).convert("RGBA")
+    return modified_image

utils/constants.py CHANGED Viewed

@@ -1,8 +1,8 @@
-import os
 #Set the environment variables
 os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:256,expandable_segments:True"
-IS_SHARED_SPACE = "Surn/HexaGridCreator" in os.environ.get('SPACE_ID', '')
 # Set the temporary folder location
 os.environ['TEMP'] = r'e:\\TMP'
@@ -292,3 +292,29 @@ lut_folder = "./LUT"
 lut_files = [os.path.join(lut_folder, f).replace("\\", "/") for f in os.listdir(lut_folder) if f.endswith(".cube")]
 temp_files = []

+import os
 #Set the environment variables
 os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:256,expandable_segments:True"
+IS_SHARED_SPACE = "Surn/HexaGrid" in os.environ.get('SPACE_ID', '')
 # Set the temporary folder location
 os.environ['TEMP'] = r'e:\\TMP'
 lut_files = [os.path.join(lut_folder, f).replace("\\", "/") for f in os.listdir(lut_folder) if f.endswith(".cube")]
 temp_files = []
+cards = [
+    "2♥️", "3♥️", "4♥️", "5♥️", "6♥️", "7♥️", "8♥️", "9♥️", "10♥️", "J♥️", "Q♥️", "K♥️", "A♥️",
+    "2♦️", "3♦️", "4♦️", "5♦️", "6♦️", "7♦️", "8♦️", "9♦️", "10♦️", "J♦️", "Q♦️", "K♦️", "A♦️",
+    "2♣️", "3♣️", "4♣️", "5♣️", "6♣️", "7♣️", "8♣️", "9♣️", "10♣️", "J♣️", "Q♣️", "K♣️", "A♣️",
+    "2♠️", "3♠️", "4♠️", "5♠️", "6♠️", "7♠️", "8♠️", "9♠️", "10♠️", "J♠️", "Q♠️", "K♠️", "A♠️"
+]
+cards_alternating = [
+    "2♥️", "3♥️", "4♥️", "5♥️", "6♥️", "7♥️", "8♥️", "9♥️", "10♥️", "J♥️", "Q♥️", "K♥️", "A♥️",
+    "2♣️", "3♣️", "4♣️", "5♣️", "6♣️", "7♣️", "8♣️", "9♣️", "10♣️", "J♣️", "Q♣️", "K♣️", "A♣️",
+    "2♦️", "3♦️", "4♦️", "5♦️", "6♦️", "7♦️", "8♦️", "9♦️", "10♦️", "J♦️", "Q♦️", "K♦️", "A♦️",
+    "2♠️", "3♠️", "4♠️", "5♠️", "6♠️", "7♠️", "8♠️", "9♠️", "10♠️", "J♠️", "Q♠️", "K♠️", "A♠️"
+]
+card_colors = [
+    "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000",  # Hearts
+    "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000",  # Diamonds
+    "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000",  # Clubs
+    "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000"   # Spades
+]
+card_colors_alternating = [
+    "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000",  # Hearts
+    "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000",  # Clubs
+    "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000", "#FF0000",  # Diamonds
+    "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000", "#000000"   # Spades
+]

utils/depth_estimation.py ADDED Viewed

	@@ -0,0 +1,121 @@

+# utils/depth_estimation.py
+import torch
+import numpy as np
+from PIL import Image
+import open3d as o3d
+from transformers import DPTImageProcessor, DPTForDepthEstimation
+from pathlib import Path
+import logging
+logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
+from utils.image_utils import (
+    change_color,
+    open_image,
+    build_prerendered_images,
+    upscale_image,
+    crop_and_resize_image,
+    resize_image_with_aspect_ratio,
+    show_lut,
+    apply_lut_to_image_path
+)
+# Load models once during module import
+image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
+depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large", ignore_mismatched_sizes=True)
+def estimate_depth(image):
+    # Ensure image is in RGB mode
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    # Resize the image for the model
+    image_resized = image.resize(
+        (image.width, image.height),
+        Image.Resampling.LANCZOS
+    )
+    # Prepare image for the model
+    encoding = image_processor(image_resized, return_tensors="pt")
+    # Forward pass
+    with torch.no_grad():
+        outputs = depth_model(**encoding)
+        predicted_depth = outputs.predicted_depth
+    # Interpolate to original size
+    prediction = torch.nn.functional.interpolate(
+        predicted_depth.unsqueeze(1),
+        size=(image.height, image.width),
+        mode="bicubic",
+        align_corners=False,
+    ).squeeze()
+    # Convert to depth image
+    output = prediction.cpu().numpy()
+    depth_min = output.min()
+    depth_max = output.max()
+    max_val = (2**8) - 1
+    # Normalize and convert to 8-bit image
+    depth_image = max_val * (output - depth_min) / (depth_max - depth_min)
+    depth_image = depth_image.astype("uint8")
+    depth_pil = Image.fromarray(depth_image)
+    return depth_pil, output
+def create_3d_model(rgb_image, depth_array, voxel_size_factor=0.01):
+    depth_o3d = o3d.geometry.Image(depth_array.astype(np.float32))
+    rgb_o3d = o3d.geometry.Image(np.array(rgb_image))
+    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        rgb_o3d,
+        depth_o3d,
+        convert_rgb_to_intensity=False
+    )
+    # Create a point cloud from the RGBD image
+    camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
+        rgb_image.width,
+        rgb_image.height,
+        fx=1.0,
+        fy=1.0,
+        cx=rgb_image.width / 2.0,
+        cy=rgb_image.height / 2.0,
+    )
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+        rgbd_image,
+        camera_intrinsic
+    )
+    # Voxel downsample
+    voxel_size = max(pcd.get_max_bound() - pcd.get_min_bound()) * voxel_size_factor
+    voxel_grid = o3d.geometry.VoxelGrid.create_from_point_cloud(pcd, voxel_size=voxel_size)
+    # Save the 3D model to a temporary file
+    temp_dir = Path.cwd() / "temp_models"
+    temp_dir.mkdir(exist_ok=True)
+    model_path = temp_dir / "model.ply"
+    o3d.io.write_voxel_grid(str(model_path), voxel_grid)
+    return str(model_path)
+def generate_depth_and_3d(input_image_path, voxel_size_factor):
+    image = Image.open(input_image_path).convert("RGB")
+    resized_image = resize_image_with_aspect_ratio(image, 2688, 1680)
+    depth_image, depth_array = estimate_depth(resized_image)
+    model_path = create_3d_model(resized_image, depth_array, voxel_size_factor=voxel_size_factor)
+    return depth_image, model_path
+def generate_depth_button_click(depth_image_source, voxel_size_factor, input_image, output_image, overlay_image, bordered_image_output):
+    if depth_image_source == "Input Image":
+        image_path = input_image
+    elif depth_image_source == "Output Image":
+        image_path = output_image
+    elif depth_image_source == "Image with Margins":
+        image_path = bordered_image_output
+    else:
+        image_path = overlay_image
+    return generate_depth_and_3d(image_path, voxel_size_factor)

utils/excluded_colors.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# utils/excluded_colors.py
+import gradio as gr
+from utils.color_utils import (
+    hex_to_rgb,
+)
+from utils.image_utils import (
+    convert_str_to_int_or_zero,
+)
+excluded_color_list = gr.State([(0,0,0,0),(255,255,255,0)])
+def add_color(color, excluded_colors_var):
+    excluded_colors = excluded_colors_var.value
+    # Convert the color from hex to RGBA
+    color = hex_to_rgb(color) + (255,)
+    if color not in [tuple(lst) for lst in excluded_colors]:
+        excluded_colors.append(color)
+        excluded_color_lst = [tuple(lst) for lst in excluded_colors]
+    else:
+        excluded_color_lst = [tuple(lst) for lst in excluded_colors]
+    return excluded_color_lst, excluded_color_lst
+def delete_color(row, excluded_colors_var):
+    global excluded_color_list
+    excluded_colors = list(excluded_colors_var)
+    row_index = convert_str_to_int_or_zero(row)
+    print(f"Delete Excluded Color {row_index} of {len(excluded_colors) - 1}")
+    if row_index <= len(excluded_colors) - 1:
+        del excluded_colors[row_index]
+        excluded_color_lst = [tuple(lst) for lst in excluded_colors]
+        excluded_color_list = excluded_color_lst
+        return excluded_color_lst
+    else:
+        excluded_color_lst = [tuple(lst) for lst in excluded_color_list]
+        print(f"Row index {row_index} not found in the list:{excluded_color_lst}")
+        excluded_color_list = excluded_color_lst
+        return excluded_color_lst
+def build_dataframe(excluded_colors_var):
+    excluded_colors = [tuple(lst) for lst in excluded_colors_var.value]
+    #print(f"input: {excluded_colors}")
+    return excluded_colors
+def on_input(excluded_colors):
+    print(f"input: {excluded_colors}")
+    excluded_color_lst = [tuple(lst) for lst in excluded_colors]
+    print(f"output: {excluded_color_lst}")
+    return excluded_color_lst, excluded_color_lst
+# Event listener for when the user selects a row
+def on_color_display_select(selected_rows, event: gr.SelectData):
+    # Get the selected row
+    selected_index = event.index[0]
+    print(f"Selected row index:{selected_rows[selected_index]}, index: {selected_index}")
+    return selected_index

utils/file_utils.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# file_utils
+import os
+import utils.constants as constants
+def cleanup_temp_files():
+    for file_path in constants.temp_files:
+        try:
+            os.remove(file_path)
+        except Exception as e:
+            print(f"Failed to delete temp file {file_path}: {e}")

utils/hex_grid.py CHANGED Viewed

@@ -193,9 +193,7 @@ def generate_hexagon_grid_with_text(hex_size, border_size, input_image=None, ima
         # Prepare the text and color lists
         text_list = []
         color_list = []
-        if add_hex_text_option == "Row-Column Coordinates":
-            pass  # Coordinates will be generated dynamically
-        elif add_hex_text_option == "Playing Cards Sequential":
             text_list = constants.cards
             color_list = constants.card_colors
         elif add_hex_text_option == "Playing Cards Alternate Red and Black":
@@ -204,13 +202,13 @@ def generate_hexagon_grid_with_text(hex_size, border_size, input_image=None, ima
         elif add_hex_text_option == "Custom List":
             if custom_text_list:
                 #text_list = [text.strip() for text in custom_text_list.split(",")]
-                text_list = ast.literal_eval(custom_text_list) if custom_text_list else None
             if custom_text_color_list:
                 #color_list = [color.strip() for color in custom_text_color_list.split(",")]
                 color_list = ast.literal_eval(custom_text_color_list) if custom_text_color_list else None
         else:
-            text_list = []
-            color_list = []
     hex_index = -1  # Initialize hex index
     def draw_hexagon(x, y, color="#FFFFFFFF", rotation=0, outline_color="#12165380", outline_width=0, sides=6):
         side_length = (hex_size * 2) / math.sqrt(3)
@@ -277,10 +275,12 @@ def generate_hexagon_grid_with_text(hex_size, border_size, input_image=None, ima
                     # Determine the text to draw
                     if add_hex_text_option == "Row-Column Coordinates":
                         text = f"{col},{row}"
                     elif text_list:
                         text = text_list[hex_index % len(text_list)]
                     else:
-                        text = ""
                     # Determine the text color
                     if color_list:
                         # Extract the opacity from the border color and add to the color_list
@@ -296,7 +296,7 @@ def generate_hexagon_grid_with_text(hex_size, border_size, input_image=None, ima
                         text_color = border_color
                         #text_color = "#{:02x}{:02x}{:02x}{:02x}".format(*text_color)
                     # Skip if text is empty
-                    if text != "":
                         print(f"Drawing Text: {text} color: {text_color} size: {font_size}")
                         # Calculate text size using Pango
                         # Create a temporary surface to calculate text size

         # Prepare the text and color lists
         text_list = []
         color_list = []
+        if add_hex_text_option == "Playing Cards Sequential":
             text_list = constants.cards
             color_list = constants.card_colors
         elif add_hex_text_option == "Playing Cards Alternate Red and Black":
         elif add_hex_text_option == "Custom List":
             if custom_text_list:
                 #text_list = [text.strip() for text in custom_text_list.split(",")]
+                text_list = ast.literal_eval(custom_text_list) if custom_text_list else None
             if custom_text_color_list:
                 #color_list = [color.strip() for color in custom_text_color_list.split(",")]
                 color_list = ast.literal_eval(custom_text_color_list) if custom_text_color_list else None
         else:
+            # Coordinates will be generated dynamically
+            pass
     hex_index = -1  # Initialize hex index
     def draw_hexagon(x, y, color="#FFFFFFFF", rotation=0, outline_color="#12165380", outline_width=0, sides=6):
         side_length = (hex_size * 2) / math.sqrt(3)
                     # Determine the text to draw
                     if add_hex_text_option == "Row-Column Coordinates":
                         text = f"{col},{row}"
+                    elif add_hex_text_option == "Sequential Numbers":
+                        text = f"{hex_index}"
                     elif text_list:
                         text = text_list[hex_index % len(text_list)]
                     else:
+                        text = None
                     # Determine the text color
                     if color_list:
                         # Extract the opacity from the border color and add to the color_list
                         text_color = border_color
                         #text_color = "#{:02x}{:02x}{:02x}{:02x}".format(*text_color)
                     # Skip if text is empty
+                    if text != None:
                         print(f"Drawing Text: {text} color: {text_color} size: {font_size}")
                         # Calculate text size using Pango
                         # Create a temporary surface to calculate text size

utils/lora_details.py CHANGED Viewed

@@ -21,7 +21,7 @@ def upd_prompt_notes(model_textbox_value):
                 notes = item['notes']
                 break
     else:
-        notes = "Enter Prompt description of your image"
     return gr.update(value=notes)
 def get_trigger_words(model_textbox_value):
@@ -57,3 +57,48 @@ def upd_trigger_words(model_textbox_value):
     """
     trigger_words = get_trigger_words(model_textbox_value)
     return gr.update(value=trigger_words)

                 notes = item['notes']
                 break
     else:
+        notes = "Enter Prompt description of your image, \nusing models without LoRa may take a 30 minutes."
     return gr.update(value=notes)
 def get_trigger_words(model_textbox_value):
     """
     trigger_words = get_trigger_words(model_textbox_value)
     return gr.update(value=trigger_words)
+def approximate_token_count(prompt):
+    """
+    Approximates the number of tokens in a prompt based on word count.
+    Parameters:
+        prompt (str): The text prompt.
+    Returns:
+        int: The approximate number of tokens.
+    """
+    words = prompt.split()
+    # Average tokens per word (can vary based on language and model)
+    tokens_per_word = 1.3
+    return int(len(words) * tokens_per_word)
+def split_prompt_by_tokens(prompt, token_number):
+    words = prompt.split()
+    # Average tokens per word (can vary based on language and model)
+    tokens_per_word = 1.3
+    return ' '.join(words[:int(tokens_per_word * token_number)]), ' '.join(words[int(tokens_per_word * token_number):])
+# Split prompt precisely by token count
+import tiktoken
+def split_prompt_precisely(prompt, max_tokens=77, model="gpt-3.5-turbo"):
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        encoding = tiktoken.get_encoding("cl100k_base")
+    tokens = encoding.encode(prompt)
+    if len(tokens) <= max_tokens:
+        return prompt, ""
+    # Find the split point
+    split_point = max_tokens
+    split_tokens = tokens[:split_point]
+    remaining_tokens = tokens[split_point:]
+    split_prompt = encoding.decode(split_tokens)
+    remaining_prompt = encoding.decode(remaining_tokens)
+    return split_prompt, remaining_prompt

utils/version_info.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# utils/version_info.py
+import subprocess
+import os
+import torch
+import sys
+import gradio as gr
+git = os.environ.get('GIT', "git")
+def commit_hash():
+    try:
+        return subprocess.check_output([git, "rev-parse", "HEAD"], shell=False, encoding='utf8').strip()
+    except Exception:
+        return "<none>"
+def get_xformers_version():
+    try:
+        import xformers
+        return xformers.__version__
+    except Exception:
+        return "<none>"
+def get_transformers_version():
+    try:
+        import transformers
+        return transformers.__version__
+    except Exception:
+        return "<none>"
+def get_accelerate_version():
+    try:
+        import accelerate
+        return accelerate.__version__
+    except Exception:
+        return "<none>"
+def get_safetensors_version():
+    try:
+        import safetensors
+        return safetensors.__version__
+    except Exception:
+        return "<none>"
+def get_diffusers_version():
+    try:
+        import diffusers
+        return diffusers.__version__
+    except Exception:
+        return "<none>"
+def get_torch_info():
+    try:
+        return [torch.__version__, f"CUDA Version:{torch.version.cuda}", f"Available:{torch.cuda.is_available()}", f"flash attention enabled: {torch.backends.cuda.flash_sdp_enabled()}", f"Capabilities: {torch.cuda.get_device_capability(0)}", f"Device Name: {torch.cuda.get_device_name(0)}"]
+    except Exception:
+        return "<none>"
+def versions_html():
+    python_version = ".".join([str(x) for x in sys.version_info[0:3]])
+    commit = commit_hash()
+    # Define the Toggle Dark Mode link with JavaScript
+    toggle_dark_link = '''
+        <a href="#" onclick="document.body.classList.toggle('dark'); return false;" style="cursor: pointer; text-decoration: underline; color: #1a0dab;">
+            Toggle Dark Mode
+        </a>
+    '''
+    # version: <a href="https://github.com/Oncorporation/audiocraft/commit/{"" if commit == "<none>" else commit}" target="_blank">{"click" if commit == "<none>" else commit}</a>
+    return f"""
+        version: <a href="https://github.com/Oncorporation/audiocraft/commit/{"" if commit == "<none>" else commit}" target="_blank">{"click" if commit == "<none>" else commit}</a>
+        &#x2000;•&#x2000;
+        python: <span title="{sys.version}">{python_version}</span>
+        &#x2000;•&#x2000;
+        torch: {getattr(torch, '__long_version__',torch.__version__)}
+        &#x2000;•&#x2000;
+        diffusers: {get_diffusers_version()}
+        &#x2000;•&#x2000;
+        transformers: {get_transformers_version()}
+        &#x2000;•&#x2000;
+        gradio: {gr.__version__}
+        &#x2000;•&#x2000;
+        {toggle_dark_link}
+        """