Spaces:

xiaozaa
/

catvton-flux-try-on

Running on Zero

App Files Files Community

asutermo commited on Jan 23

Commit

bf8ca35

1 Parent(s): da1d0d5

nearly working

Browse files

Files changed (2) hide show

cog.yaml +2 -2
predict.py +19 -20

cog.yaml CHANGED Viewed

@@ -18,9 +18,9 @@ build:
     - torch==2.4.0
     - transformers==4.43.3
     - datasets==2.20.0
-    - accelerate==0.26.1
     - jupyter==1.0.0
-    - numpy==1.26.3
     - pillow==10.2.0
     - peft==0.13.2
     - diffusers>=0.32.0

     - torch==2.4.0
     - transformers==4.43.3
     - datasets==2.20.0
+    - accelerate==1.3.0
     - jupyter==1.0.0
+    - numpy==1.26.4
     - pillow==10.2.0
     - peft==0.13.2
     - diffusers>=0.32.0

predict.py CHANGED Viewed

@@ -1,38 +1,37 @@
-import os
-import subprocess
-import time
 from cog import BasePredictor, Input, Path, Secret
-from diffusers.utils import load_image, check_min_version
 from diffusers import FluxFillPipeline
 from diffusers import FluxTransformer2DModel
-import numpy as np
 import torch
 from torchvision import transforms
 class Predictor(BasePredictor):
     def setup(self) -> None:
         """Load part of the model into memory to make running multiple predictions efficient"""
-        self.dtype = torch.bloat16
         self.try_on_transformer = FluxTransformer2DModel.from_pretrained("xiaozaa/catvton-flux-beta",
-            torch_dtype=self.dtype)
         self.try_off_transformer = FluxTransformer2DModel.from_pretrained("xiaozaa/cat-tryoff-flux",
-            torch_dtype=self.dtype)
     def predict(self,
                 hf_token: Secret = Input(description="Hugging Face API token. Create a write token at https://huggingface.co/settings/token. You also need to approve the Flux Dev terms."),
                 image: Path = Input(description="Image file path", default="https://github.com/nftblackmagic/catvton-flux/raw/main/example/person/1.jpg"),
-                mask: Path = Input(description="Mask file path", default="https://github.com/nftblackmagic/catvton-flux/blob/main/example/person/1_mask.png?raw=true"),
                 try_on: bool = Input(True, description="Try on or try off"),
                 garment: Path = Input(description="Garment file path", default="https://github.com/nftblackmagic/catvton-flux/raw/main/example/garment/00035_00.jpg"),
                 num_steps: int = Input(50, description="Number of steps to run the model for"),
                 guidance_scale: float = Input(30, description="Guidance scale for the model"),
                 seed: int = Input(0, description="Seed for the model"),
                 width: int = Input(576, description="Width of the output image"),
-                height: int = Input(768, description="Height of the output image")):
         size = (width, height)
         if try_on:
             self.transformer = self.try_on_transformer
         else:
@@ -41,11 +40,11 @@ class Predictor(BasePredictor):
         self.pipe = FluxFillPipeline.from_pretrained(
             "black-forest-labs/FLUX.1-dev",
             transformer=self.transformer,
-            torch_dtype=self.dtype,
-            token=hf_token
         ).to("cuda")
-        self.pipe.transformer.to(self.dtype)
         transform = transforms.Compose([
             transforms.ToTensor(),
@@ -55,10 +54,6 @@ class Predictor(BasePredictor):
             transforms.ToTensor()
         ])
-        i = load_image(image).convert("RGB").resize(size)
-        m = load_image(mask).convert("RGB").resize(size)
-        g = load_image(garment).convert("RGB").resize(size)
         # Transform images using the new preprocessing
         image_tensor = transform(i)
         mask_tensor = mask_transform(m)[:1]  # Take only first channel
@@ -94,5 +89,9 @@ class Predictor(BasePredictor):
         width = size[0]
         garment_result = result.crop((0, 0, width, size[1]))
         try_result = result.crop((width, 0, width * 2, size[1]))
-        return garment_result, try_result

+from typing import List
 from cog import BasePredictor, Input, Path, Secret
+from diffusers.utils import load_image
 from diffusers import FluxFillPipeline
 from diffusers import FluxTransformer2DModel
 import torch
 from torchvision import transforms
 class Predictor(BasePredictor):
     def setup(self) -> None:
         """Load part of the model into memory to make running multiple predictions efficient"""
         self.try_on_transformer = FluxTransformer2DModel.from_pretrained("xiaozaa/catvton-flux-beta",
+            torch_dtype=torch.bfloat16)
         self.try_off_transformer = FluxTransformer2DModel.from_pretrained("xiaozaa/cat-tryoff-flux",
+            torch_dtype=torch.bfloat16)
     def predict(self,
                 hf_token: Secret = Input(description="Hugging Face API token. Create a write token at https://huggingface.co/settings/token. You also need to approve the Flux Dev terms."),
                 image: Path = Input(description="Image file path", default="https://github.com/nftblackmagic/catvton-flux/raw/main/example/person/1.jpg"),
+                mask: Path = Input(description="Mask file path", default="https://github.com/nftblackmagic/catvton-flux/blob/main/example/person/1_mask.png"),
                 try_on: bool = Input(True, description="Try on or try off"),
                 garment: Path = Input(description="Garment file path", default="https://github.com/nftblackmagic/catvton-flux/raw/main/example/garment/00035_00.jpg"),
                 num_steps: int = Input(50, description="Number of steps to run the model for"),
                 guidance_scale: float = Input(30, description="Guidance scale for the model"),
                 seed: int = Input(0, description="Seed for the model"),
                 width: int = Input(576, description="Width of the output image"),
+                height: int = Input(768, description="Height of the output image"))  -> List[Path]:
         size = (width, height)
+        i = load_image(str(image)).convert("RGB").resize(size)
+        m = load_image(str(mask)).convert("RGB").resize(size)
+        g = load_image(str(garment)).convert("RGB").resize(size)
         if try_on:
             self.transformer = self.try_on_transformer
         else:
         self.pipe = FluxFillPipeline.from_pretrained(
             "black-forest-labs/FLUX.1-dev",
             transformer=self.transformer,
+            torch_dtype=torch.bfloat16,
+            token=hf_token.get_secret_value()
         ).to("cuda")
+        self.pipe.transformer.to(torch.bfloat16)
         transform = transforms.Compose([
             transforms.ToTensor(),
             transforms.ToTensor()
         ])
         # Transform images using the new preprocessing
         image_tensor = transform(i)
         mask_tensor = mask_transform(m)[:1]  # Take only first channel
         width = size[0]
         garment_result = result.crop((0, 0, width, size[1]))
         try_result = result.crop((width, 0, width * 2, size[1]))
+        out_path = "/tmp/try.png"
+        try_result.save(out_path)
+        garm_out_path = "/tmp/garment.png"
+        garment_result.save(garm_out_path)
+        return [Path(out_path), Path(garm_out_path)]