Spaces:

yasserrmd
/

stableaudio

Running on Zero

App Files Files Community

yasserrmd commited on 12 days ago

Commit

7d1c060

•

1 Parent(s): 8f8e68e

Create app.py

Browse files

Files changed (1) hide show

app.py +48 -0

app.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+import soundfile as sf
+from diffusers import StableAudioPipeline
+import gradio as gr
+# Load the StableAudio pipeline model
+pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch.float16)
+pipe = pipe.to("cuda")
+# Define the function to generate the sound based on a text prompt
+@spaces.GPU
+def generate_sound(prompt, negative_prompt, seed, inference_steps, duration, waveforms):
+    # Set the seed for reproducibility
+    generator = torch.Generator("cuda").manual_seed(seed)
+    # Run the audio generation
+    audio = pipe(
+        prompt,
+        negative_prompt=negative_prompt,
+        num_inference_steps=inference_steps,
+        audio_end_in_s=duration,
+        num_waveforms_per_prompt=waveforms,
+        generator=generator,
+    ).audios
+    # Get the output and save to a file
+    output = audio[0].T.float().cpu().numpy()
+    sf.write("generated_sound.wav", output, pipe.vae.sampling_rate)
+    return "generated_sound.wav"
+# Define the Gradio interface
+app = gr.Interface(
+    fn=generate_sound,
+    inputs=[
+        gr.inputs.Textbox(label="Text Prompt", placeholder="Describe the sound you'd like to generate..."),
+        gr.inputs.Textbox(label="Negative Prompt", placeholder="Describe what you don't want in the sound..."),
+        gr.inputs.Slider(label="Seed", minimum=0, maximum=10000, step=1, default=0),
+        gr.inputs.Slider(label="Inference Steps", minimum=50, maximum=500, step=10, default=200),
+        gr.inputs.Slider(label="Duration (seconds)", minimum=1.0, maximum=30.0, step=0.5, default=10.0),
+        gr.inputs.Slider(label="Number of Waveforms", minimum=1, maximum=5, step=1, default=1)
+    ],
+    outputs=gr.Audio(label="Generated Sound"),
+    title="StableAudio Text-to-Speech Generator",
+    description="Generate high-quality audio from text using StableAudio."
+)
+app.launch()