VanguardAI commited on
Commit
2e5cfb3
·
verified ·
1 Parent(s): 82043d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -16,11 +16,12 @@ from langchain_community.llms import OpenAI
16
  from PIL import Image
17
  from decord import VideoReader, cpu
18
  import requests
 
 
19
 
20
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
21
  MODEL = 'llama3-groq-70b-8192-tool-use-preview'
22
 
23
- # Load MiniCPM-V-2_6 with 4-bit quantization
24
  text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True,
25
  device_map="auto", torch_dtype=torch.bfloat16)
26
  tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
@@ -28,9 +29,15 @@ tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_co
28
  tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1")
29
  tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
30
 
31
- image_model = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet")
32
- image_pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", unet=image_model, torch_dtype=torch.float16)
33
- image_pipe.scheduler = EulerDiscreteScheduler.from_pretrained(image_pipe.scheduler.config, timestep_spacing="trailing")
 
 
 
 
 
 
34
 
35
  # Initialize voice-only mode
36
  def play_voice_output(response):
@@ -177,7 +184,7 @@ def initialize_tools():
177
  def main_interface(user_prompt, image=None, video=None, audio=None, doc=None, voice_only=False):
178
  text_model.to(device='cuda', dtype=torch.bfloat16)
179
  tts_model.to("cuda")
180
- image_model.to("cuda", torch.float16)
181
  image_pipe.to("cuda")
182
  response = handle_input(user_prompt, image=image, video=video, audio=audio, doc=doc)
183
  if voice_only:
 
16
  from PIL import Image
17
  from decord import VideoReader, cpu
18
  import requests
19
+ from huggingface_hub import hf_hub_download
20
+ from safetensors.torch import load_file
21
 
22
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
23
  MODEL = 'llama3-groq-70b-8192-tool-use-preview'
24
 
 
25
  text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True,
26
  device_map="auto", torch_dtype=torch.bfloat16)
27
  tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
 
29
  tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1")
30
  tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
31
 
32
+ # Corrected image model and pipeline setup
33
+ base = "stabilityai/stable-diffusion-xl-base-1.0"
34
+ repo = "ByteDance/SDXL-Lightning"
35
+ ckpt = "sdxl_lightning_4step_unet.safetensors"
36
+
37
+ unet = UNet2DConditionModel.from_config(base, subfolder="unet").to("cuda", torch.float16)
38
+ unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device="cuda"))
39
+ image_pipe = StableDiffusionXLPipeline.from_pretrained(base, unet=unet, torch_dtype=torch.float16, variant="fp16").to("cuda")
40
+ image_pipe.scheduler = EulerDiscreteScheduler.from_config(image_pipe.scheduler.config, timestep_spacing="trailing")
41
 
42
  # Initialize voice-only mode
43
  def play_voice_output(response):
 
184
  def main_interface(user_prompt, image=None, video=None, audio=None, doc=None, voice_only=False):
185
  text_model.to(device='cuda', dtype=torch.bfloat16)
186
  tts_model.to("cuda")
187
+ unet.to("cuda", torch.float16)
188
  image_pipe.to("cuda")
189
  response = handle_input(user_prompt, image=image, video=video, audio=audio, doc=doc)
190
  if voice_only: