Spaces:

Healthydater
/

songlab-melody

Running

App Files Files Community

songlab-melody / app.py

ageraustine

mixings with scipy

86b5b34 verified 5 months ago

raw

history blame

6.99 kB

	import streamlit as st
	import requests
	import numpy as np
	import os
	import base64
	from streamlit import session_state as st_state
	import librosa
	import soundfile as sf
	from scipy.signal import butter, sosfilt



	# Try to get API_URL from environment variables, if not found set to a default value
	try:
	API_URL = os.environ["API_URL"]
	except KeyError:
	st.error("API_URL environment variable is not set.")
	st.stop()

	# Try to get the Bearer token from environment variables, if not found set to a default value
	try:
	BEARER_TOKEN = os.environ["BEARER_TOKEN"]
	except KeyError:
	st.error("BEARER_TOKEN environment variable is not set.")
	st.stop()

	headers = {
	"Authorization": f"Bearer {BEARER_TOKEN}",
	"Content-Type": "application/json"
	}

	# Initialize session state variables
	if 'audio' not in st_state:
	st_state.audio = None

	if 'augmented_audio' not in st_state:
	st_state.augmented_audio = None

	# Streamlit app title
	st.title("Songlabai")

	# Initialize session state variables
	if 'vocal_audio' not in st_state:
	st_state.vocal_audio = None
	if 'audio' not in st_state:
	st_state.audio = None
	if 'augmented_audio' not in st_state:
	st_state.augmented_audio = None

	uploaded_file = st.file_uploader("Upload Music File", type=["mp3", "wav", "ogg", "flac", "aac"])

	genres = [
	"Pop", "Rock", "Hip Hop", "Jazz", "Blues",
	"Country", "Classical", "Electronic", "Reggae",
	"Folk", "R&B", "Metal", "Punk", "Indie",
	"Dance", "World", "Gospel", "Soul", "Funk",
	"Ambient", "Techno", "Disco", "House", "Trance",
	"Dubstep"
	]

	genre = st.selectbox("Select Genre:", genres)
	energy_levels = ["Low", "Medium", "High"]
	energy_level = st.radio("Energy Level:", energy_levels)
	description = st.text_input("Description:", "")
	tempo = st.slider("Tempo (in bpm):", min_value=40, max_value=100, value=60, step=5)

	# Duration input
	duration = st.slider("Duration (in seconds):", min_value=15, max_value=90, value=30, step=1)

	# Generate audio based on the user's prompt
	if st.button("Generate Audio"):
	prompt = f"{genre} ,{energy_level}, {tempo}, {description}"
	payload = {"inputs": {"prompt": prompt, "duration": duration}}
	if uploaded_file:
	audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
	payload = {"inputs": {"prompt": prompt, "duration": duration, "track": audio_base64}}
	st.text("Generating audio...")
	response = requests.post(API_URL, headers=headers, json=payload)
	st_state.audio = np.array(response.json()[0]['generated_audio'], dtype=np.float32)
	sample_rate = response.json()[0]['sample_rate']
	st.audio(st_state.audio, format="audio/wav", sample_rate=sample_rate, start_time=0)


	vocal_file = st.file_uploader("Upload Vocal File", type=["mp3", "wav", "ogg", "flac", "aac"])

	# Post-processing options
	st.header("Post-processing Options")

	# Mixing
	mix_vocals = st.checkbox("Mix Vocals")
	if mix_vocals and st_state.vocal_audio is not None:
	# Load the vocal audio
	vocal_audio, _ = librosa.load(vocal_file, sr=sample_rate, mono=False)

	# Adjust the vocal audio length to match the generated audio
	vocal_audio = librosa.util.fix_length(vocal_audio, len(st_state.audio))

	# Mix the vocal audio with the generated audio
	st_state.augmented_audio = (st_state.audio + vocal_audio) / 2

	# Mastering
	st.subheader("Mastering")

	# Volume Balance
	volume_balance = st.slider("Volume Balance", min_value=-10.0, max_value=10.0, value=0.0, step=0.1)
	if st.button("Apply Volume Balance"):
	if st_state.augmented_audio is None:
	st_state.augmented_audio = st_state.audio
	st_state.augmented_audio = 10 * (volume_balance / 20)

	# Compression
	compression_ratio = st.slider("Compression Ratio", min_value=1.0, max_value=10.0, value=3.0, step=0.1)
	if st.button("Apply Compression"):
	if st_state.augmented_audio is None:
	st_state.augmented_audio = st_state.audio

	# Apply compression using a simple soft-knee compressor
	threshold = -20 # dBFS
	ratio = compression_ratio
	knee = 10 # dB
	max_gain = 20 # dB

	def compress(x, threshold, ratio, knee, max_gain):
	over = np.maximum(x - threshold, 0)
	gain = over / (over + knee) * (1 - (1 / ratio)) + 1
	gain = np.maximum(gain, 1 - max_gain)
	return x * gain

	st_state.augmented_audio = compress(st_state.augmented_audio, threshold, ratio, knee, max_gain)

	# EQ
	eq_low = st.slider("EQ Low", min_value=-10.0, max_value=10.0, value=0.0, step=0.1)
	eq_mid = st.slider("EQ Mid", min_value=-10.0, max_value=10.0, value=0.0, step=0.1)
	eq_high = st.slider("EQ High", min_value=-10.0, max_value=10.0, value=0.0, step=0.1)

	if st.button("Apply EQ"):
	if st_state.augmented_audio is None:
	st_state.augmented_audio = st_state.audio

	# Apply a simple 3-band EQ using a butterworth filter
	nyquist = sample_rate / 2
	low_cutoff = 200 / nyquist
	mid_cutoff = 2000 / nyquist
	high_cutoff = 8000 / nyquist

	low_sos = butter(4, low_cutoff, btype='low', output='sos', analog=False)
	mid_sos = butter(4, [low_cutoff, mid_cutoff], btype='band', output='sos', analog=False)
	high_sos = butter(4, high_cutoff, btype='high', output='sos', analog=False)

	st_state.augmented_audio = sosfilt(np.dstack((low_sos, mid_sos, high_sos)),
	st_state.augmented_audio,
	np.stack((eq_low, eq_mid, eq_high)))

	# Reverb
	reverb_amount = st.slider("Reverb Amount", min_value=0.0, max_value=1.0, value=0.5, step=0.01)
	if st.button("Apply Reverb"):
	if st_state.augmented_audio is None:
	st_state.augmented_audio = st_state.audio

	# Apply a simple reverb effect using convolution
	ir_length = int(sample_rate * 2.5) # 2.5 seconds
	ir = np.zeros(ir_length)
	ir[0] = 1
	ir = np.append(ir, np.zeros(len(st_state.augmented_audio) - ir_length))
	reverb = np.convolve(st_state.augmented_audio, ir, mode='full')[:len(st_state.augmented_audio)]
	st_state.augmented_audio = st_state.augmented_audio + reverb_amount * reverb

	# Delay
	delay_amount = st.slider("Delay Amount", min_value=0.0, max_value=1.0, value=0.5, step=0.01)
	delay_time = st.slider("Delay Time (ms)", min_value=10, max_value=500, value=100, step=10)
	if st.button("Apply Delay"):
	if st_state.augmented_audio is None:
	st_state.augmented_audio = st_state.audio

	# Apply a simple delay effect
	delay_samples = int(delay_time / 1000 * sample_rate)
	delay = np.zeros(len(st_state.augmented_audio) + delay_samples)
	delay[delay_samples:] = st_state.augmented_audio
	delay[:len(st_state.augmented_audio)] += delay_amount * delay[:-delay_samples]
	st_state.augmented_audio = delay[:len(st_state.augmented_audio)]

	# Display the final audio
	if st_state.augmented_audio is not None:
	st.audio(st_state.augmented_audio, format="audio/wav", sample_rate=sample_rate, start_time=0)

	st.link_button("Download/Save", "https://songlabai.com/subcribe/")