Spaces:

LTT
/

Kiss3DGen

Running on Zero

JiantaoLin commited on Dec 24, 2024

Commit

52a094c

1 Parent(s): 0213646

new

Files changed (2) hide show

pipeline/kiss3d_wrapper.py CHANGED Viewed

@@ -90,7 +90,8 @@ def init_wrapper_from_config(config_path):
     # load lora weights
     flux_pipe.load_lora_weights(flux_lora_pth)
-    flux_pipe.to(device=flux_device)
     # flux_pipe = None
     # load redux model
@@ -163,7 +164,7 @@ def init_wrapper_from_config(config_path):
         logger.info('==> Loading LLM ...')
         llm_device = llm_configs.get('device', 'cpu')
         llm, llm_tokenizer = load_llm_model(llm_configs['base_model'])
-        llm.to(llm_device)
         # logger.warning(f"GPU memory allocated after load llm model on {llm_device}: {torch.cuda.memory_allocated(device=llm_device) / 1024**3} GB")
     else:
         llm, llm_tokenizer = None, None
@@ -267,11 +268,14 @@ class kiss3d_wrapper(object):
         return caption_text
     # @spaces.GPU
     def get_detailed_prompt(self, prompt, seed=None):
         if self.llm_model is not None:
             detailed_prompt = get_llm_response(self.llm_model, self.llm_tokenizer, prompt, seed=seed)
             logger.info(f"LLM refined prompt result: \"{detailed_prompt}\"")
             return detailed_prompt
         return prompt
     def del_llm_model(self):

     # load lora weights
     flux_pipe.load_lora_weights(flux_lora_pth)
+    # flux_pipe.to(device=flux_device)
+    flux_pipe.enable_model_cpu_offload(device=flux_device)
     # flux_pipe = None
     # load redux model
         logger.info('==> Loading LLM ...')
         llm_device = llm_configs.get('device', 'cpu')
         llm, llm_tokenizer = load_llm_model(llm_configs['base_model'])
+        # llm.to(llm_device)
         # logger.warning(f"GPU memory allocated after load llm model on {llm_device}: {torch.cuda.memory_allocated(device=llm_device) / 1024**3} GB")
     else:
         llm, llm_tokenizer = None, None
         return caption_text
     # @spaces.GPU
     def get_detailed_prompt(self, prompt, seed=None):
+        self.llm_model.to(self.config['llm']['device'])
         if self.llm_model is not None:
             detailed_prompt = get_llm_response(self.llm_model, self.llm_tokenizer, prompt, seed=seed)
             logger.info(f"LLM refined prompt result: \"{detailed_prompt}\"")
             return detailed_prompt
+        self.llm_model.to('cpu')
+        torch.cuda.empty_cache()
         return prompt
     def del_llm_model(self):

pipeline/pipeline_config/default.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 flux:
   base_model: "https://huggingface.co/Comfy-Org/flux1-dev/blob/main/flux1-dev-fp8.safetensors"
-  flux_dtype: 'fp8'
   lora: "./checkpoint/flux_lora/rgb_normal_large.safetensors"
   controlnet: "InstantX/FLUX.1-dev-Controlnet-Union"
   redux: "black-forest-labs/FLUX.1-Redux-dev"

 flux:
   base_model: "https://huggingface.co/Comfy-Org/flux1-dev/blob/main/flux1-dev-fp8.safetensors"
+  flux_dtype: 'bf16'
   lora: "./checkpoint/flux_lora/rgb_normal_large.safetensors"
   controlnet: "InstantX/FLUX.1-dev-Controlnet-Union"
   redux: "black-forest-labs/FLUX.1-Redux-dev"