yasserrmd commited on
Commit
7d1c060
1 Parent(s): 8f8e68e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import soundfile as sf
3
+ from diffusers import StableAudioPipeline
4
+ import gradio as gr
5
+
6
+ # Load the StableAudio pipeline model
7
+ pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch.float16)
8
+ pipe = pipe.to("cuda")
9
+
10
+ # Define the function to generate the sound based on a text prompt
11
+ @spaces.GPU
12
+ def generate_sound(prompt, negative_prompt, seed, inference_steps, duration, waveforms):
13
+ # Set the seed for reproducibility
14
+ generator = torch.Generator("cuda").manual_seed(seed)
15
+
16
+ # Run the audio generation
17
+ audio = pipe(
18
+ prompt,
19
+ negative_prompt=negative_prompt,
20
+ num_inference_steps=inference_steps,
21
+ audio_end_in_s=duration,
22
+ num_waveforms_per_prompt=waveforms,
23
+ generator=generator,
24
+ ).audios
25
+
26
+ # Get the output and save to a file
27
+ output = audio[0].T.float().cpu().numpy()
28
+ sf.write("generated_sound.wav", output, pipe.vae.sampling_rate)
29
+
30
+ return "generated_sound.wav"
31
+
32
+ # Define the Gradio interface
33
+ app = gr.Interface(
34
+ fn=generate_sound,
35
+ inputs=[
36
+ gr.inputs.Textbox(label="Text Prompt", placeholder="Describe the sound you'd like to generate..."),
37
+ gr.inputs.Textbox(label="Negative Prompt", placeholder="Describe what you don't want in the sound..."),
38
+ gr.inputs.Slider(label="Seed", minimum=0, maximum=10000, step=1, default=0),
39
+ gr.inputs.Slider(label="Inference Steps", minimum=50, maximum=500, step=10, default=200),
40
+ gr.inputs.Slider(label="Duration (seconds)", minimum=1.0, maximum=30.0, step=0.5, default=10.0),
41
+ gr.inputs.Slider(label="Number of Waveforms", minimum=1, maximum=5, step=1, default=1)
42
+ ],
43
+ outputs=gr.Audio(label="Generated Sound"),
44
+ title="StableAudio Text-to-Speech Generator",
45
+ description="Generate high-quality audio from text using StableAudio."
46
+ )
47
+
48
+ app.launch()