VanguardAI commited on
Commit
82043d5
·
verified ·
1 Parent(s): cc79e1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -10
app.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  import numpy as np
5
  from groq import Groq
6
  import spaces
7
- from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
8
  from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
9
  from parler_tts import ParlerTTSForConditionalGeneration
10
  import soundfile as sf
@@ -20,8 +20,6 @@ import requests
20
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
21
  MODEL = 'llama3-groq-70b-8192-tool-use-preview'
22
 
23
- ############### MINICPM MEIN ERROR HAI, USKO REPLACE KARNA HOGA ###############
24
-
25
  # Load MiniCPM-V-2_6 with 4-bit quantization
26
  text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True,
27
  device_map="auto", torch_dtype=torch.bfloat16)
@@ -30,9 +28,9 @@ tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_co
30
  tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1")
31
  tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
32
 
33
- image_model = UNet2DConditionModel.from_config("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet")
34
- image_pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", unet=image_model, torch_dtype=torch.float16, variant="fp16")
35
- image_pipe.scheduler = EulerDiscreteScheduler.from_config(image_pipe.scheduler.config, timestep_spacing="trailing")
36
 
37
  # Initialize voice-only mode
38
  def play_voice_output(response):
@@ -174,13 +172,13 @@ def initialize_tools():
174
  }
175
  ]
176
  return tools
 
177
  @spaces.GPU()
178
- # Gradio Interface
179
  def main_interface(user_prompt, image=None, video=None, audio=None, doc=None, voice_only=False):
180
- text_model = text_model.to(device='cuda', dtype=torch.bfloat16)
181
  tts_model.to("cuda")
182
  image_model.to("cuda", torch.float16)
183
- image_pip.to("cuda")
184
  response = handle_input(user_prompt, image=image, video=video, audio=audio, doc=doc)
185
  if voice_only:
186
  audio_file = play_voice_output(response)
@@ -205,4 +203,4 @@ with gr.Blocks() as demo:
205
  outputs=output
206
  )
207
 
208
- demo.launch(inline=False)
 
4
  import numpy as np
5
  from groq import Groq
6
  import spaces
7
+ from transformers import AutoModel, AutoTokenizer
8
  from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
9
  from parler_tts import ParlerTTSForConditionalGeneration
10
  import soundfile as sf
 
20
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
21
  MODEL = 'llama3-groq-70b-8192-tool-use-preview'
22
 
 
 
23
  # Load MiniCPM-V-2_6 with 4-bit quantization
24
  text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True,
25
  device_map="auto", torch_dtype=torch.bfloat16)
 
28
  tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1")
29
  tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
30
 
31
+ image_model = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet")
32
+ image_pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", unet=image_model, torch_dtype=torch.float16)
33
+ image_pipe.scheduler = EulerDiscreteScheduler.from_pretrained(image_pipe.scheduler.config, timestep_spacing="trailing")
34
 
35
  # Initialize voice-only mode
36
  def play_voice_output(response):
 
172
  }
173
  ]
174
  return tools
175
+
176
  @spaces.GPU()
 
177
  def main_interface(user_prompt, image=None, video=None, audio=None, doc=None, voice_only=False):
178
+ text_model.to(device='cuda', dtype=torch.bfloat16)
179
  tts_model.to("cuda")
180
  image_model.to("cuda", torch.float16)
181
+ image_pipe.to("cuda")
182
  response = handle_input(user_prompt, image=image, video=video, audio=audio, doc=doc)
183
  if voice_only:
184
  audio_file = play_voice_output(response)
 
203
  outputs=output
204
  )
205
 
206
+ demo.launch(inline=False)