Spaces:
Running
on
Zero
Running
on
Zero
import torch | |
import soundfile as sf | |
from diffusers import StableAudioPipeline | |
import gradio as gr | |
# Load the StableAudio pipeline model | |
pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch.float16) | |
pipe = pipe.to("cuda") | |
# Define the function to generate the sound based on a text prompt | |
def generate_sound(prompt, negative_prompt, seed, inference_steps, duration, waveforms): | |
# Set the seed for reproducibility | |
generator = torch.Generator("cuda").manual_seed(seed) | |
# Run the audio generation | |
audio = pipe( | |
prompt, | |
negative_prompt=negative_prompt, | |
num_inference_steps=inference_steps, | |
audio_end_in_s=duration, | |
num_waveforms_per_prompt=waveforms, | |
generator=generator, | |
).audios | |
# Get the output and save to a file | |
output = audio[0].T.float().cpu().numpy() | |
sf.write("generated_sound.wav", output, pipe.vae.sampling_rate) | |
return "generated_sound.wav" | |
# Define the Gradio interface | |
app = gr.Interface( | |
fn=generate_sound, | |
inputs=[ | |
gr.inputs.Textbox(label="Text Prompt", placeholder="Describe the sound you'd like to generate..."), | |
gr.inputs.Textbox(label="Negative Prompt", placeholder="Describe what you don't want in the sound..."), | |
gr.inputs.Slider(label="Seed", minimum=0, maximum=10000, step=1, default=0), | |
gr.inputs.Slider(label="Inference Steps", minimum=50, maximum=500, step=10, default=200), | |
gr.inputs.Slider(label="Duration (seconds)", minimum=1.0, maximum=30.0, step=0.5, default=10.0), | |
gr.inputs.Slider(label="Number of Waveforms", minimum=1, maximum=5, step=1, default=1) | |
], | |
outputs=gr.Audio(label="Generated Sound"), | |
title="StableAudio Text-to-Speech Generator", | |
description="Generate high-quality audio from text using StableAudio." | |
) | |
app.launch() |