Spaces:

yasserrmd
/

stableaudio

Running on Zero

App Files Files Community

stableaudio / app.py

yasserrmd

Create app.py

7d1c060 verified 13 days ago

raw

history blame

1.89 kB

	import torch
	import soundfile as sf
	from diffusers import StableAudioPipeline
	import gradio as gr

	# Load the StableAudio pipeline model
	pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch.float16)
	pipe = pipe.to("cuda")

	# Define the function to generate the sound based on a text prompt
	@spaces.GPU
	def generate_sound(prompt, negative_prompt, seed, inference_steps, duration, waveforms):
	# Set the seed for reproducibility
	generator = torch.Generator("cuda").manual_seed(seed)

	# Run the audio generation
	audio = pipe(
	prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=inference_steps,
	audio_end_in_s=duration,
	num_waveforms_per_prompt=waveforms,
	generator=generator,
	).audios

	# Get the output and save to a file
	output = audio[0].T.float().cpu().numpy()
	sf.write("generated_sound.wav", output, pipe.vae.sampling_rate)

	return "generated_sound.wav"

	# Define the Gradio interface
	app = gr.Interface(
	fn=generate_sound,
	inputs=[
	gr.inputs.Textbox(label="Text Prompt", placeholder="Describe the sound you'd like to generate..."),
	gr.inputs.Textbox(label="Negative Prompt", placeholder="Describe what you don't want in the sound..."),
	gr.inputs.Slider(label="Seed", minimum=0, maximum=10000, step=1, default=0),
	gr.inputs.Slider(label="Inference Steps", minimum=50, maximum=500, step=10, default=200),
	gr.inputs.Slider(label="Duration (seconds)", minimum=1.0, maximum=30.0, step=0.5, default=10.0),
	gr.inputs.Slider(label="Number of Waveforms", minimum=1, maximum=5, step=1, default=1)
	],
	outputs=gr.Audio(label="Generated Sound"),
	title="StableAudio Text-to-Speech Generator",
	description="Generate high-quality audio from text using StableAudio."
	)

	app.launch()