Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import os
|
|
4 |
import numpy as np
|
5 |
from groq import Groq
|
6 |
import spaces
|
7 |
-
from transformers import AutoModel, AutoTokenizer
|
8 |
from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
|
9 |
from parler_tts import ParlerTTSForConditionalGeneration
|
10 |
import soundfile as sf
|
@@ -20,8 +20,6 @@ import requests
|
|
20 |
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
21 |
MODEL = 'llama3-groq-70b-8192-tool-use-preview'
|
22 |
|
23 |
-
############### MINICPM MEIN ERROR HAI, USKO REPLACE KARNA HOGA ###############
|
24 |
-
|
25 |
# Load MiniCPM-V-2_6 with 4-bit quantization
|
26 |
text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True,
|
27 |
device_map="auto", torch_dtype=torch.bfloat16)
|
@@ -30,9 +28,9 @@ tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_co
|
|
30 |
tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1")
|
31 |
tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
|
32 |
|
33 |
-
image_model = UNet2DConditionModel.
|
34 |
-
image_pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", unet=image_model, torch_dtype=torch.float16
|
35 |
-
image_pipe.scheduler = EulerDiscreteScheduler.
|
36 |
|
37 |
# Initialize voice-only mode
|
38 |
def play_voice_output(response):
|
@@ -174,13 +172,13 @@ def initialize_tools():
|
|
174 |
}
|
175 |
]
|
176 |
return tools
|
|
|
177 |
@spaces.GPU()
|
178 |
-
# Gradio Interface
|
179 |
def main_interface(user_prompt, image=None, video=None, audio=None, doc=None, voice_only=False):
|
180 |
-
text_model
|
181 |
tts_model.to("cuda")
|
182 |
image_model.to("cuda", torch.float16)
|
183 |
-
|
184 |
response = handle_input(user_prompt, image=image, video=video, audio=audio, doc=doc)
|
185 |
if voice_only:
|
186 |
audio_file = play_voice_output(response)
|
@@ -205,4 +203,4 @@ with gr.Blocks() as demo:
|
|
205 |
outputs=output
|
206 |
)
|
207 |
|
208 |
-
demo.launch(inline=False)
|
|
|
4 |
import numpy as np
|
5 |
from groq import Groq
|
6 |
import spaces
|
7 |
+
from transformers import AutoModel, AutoTokenizer
|
8 |
from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
|
9 |
from parler_tts import ParlerTTSForConditionalGeneration
|
10 |
import soundfile as sf
|
|
|
20 |
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
21 |
MODEL = 'llama3-groq-70b-8192-tool-use-preview'
|
22 |
|
|
|
|
|
23 |
# Load MiniCPM-V-2_6 with 4-bit quantization
|
24 |
text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True,
|
25 |
device_map="auto", torch_dtype=torch.bfloat16)
|
|
|
28 |
tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1")
|
29 |
tts_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
|
30 |
|
31 |
+
image_model = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet")
|
32 |
+
image_pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", unet=image_model, torch_dtype=torch.float16)
|
33 |
+
image_pipe.scheduler = EulerDiscreteScheduler.from_pretrained(image_pipe.scheduler.config, timestep_spacing="trailing")
|
34 |
|
35 |
# Initialize voice-only mode
|
36 |
def play_voice_output(response):
|
|
|
172 |
}
|
173 |
]
|
174 |
return tools
|
175 |
+
|
176 |
@spaces.GPU()
|
|
|
177 |
def main_interface(user_prompt, image=None, video=None, audio=None, doc=None, voice_only=False):
|
178 |
+
text_model.to(device='cuda', dtype=torch.bfloat16)
|
179 |
tts_model.to("cuda")
|
180 |
image_model.to("cuda", torch.float16)
|
181 |
+
image_pipe.to("cuda")
|
182 |
response = handle_input(user_prompt, image=image, video=video, audio=audio, doc=doc)
|
183 |
if voice_only:
|
184 |
audio_file = play_voice_output(response)
|
|
|
203 |
outputs=output
|
204 |
)
|
205 |
|
206 |
+
demo.launch(inline=False)
|