Spaces:
Running
Running
File size: 6,985 Bytes
4bc7c2a a43a1a0 e4beb59 bdaff26 c9ef03f f7b5ba8 86b5b34 e4beb59 bdaff26 e4beb59 bdaff26 f7b5ba8 e4beb59 86b5b34 9e75871 c9ef03f e4beb59 7122272 e4beb59 7122272 c9ef03f e4beb59 c9ef03f f7b5ba8 e4beb59 f7b5ba8 e4beb59 86b5b34 e4beb59 4a13883 86b5b34 e4beb59 86b5b34 375fc9d 86b5b34 38546ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
import streamlit as st
import requests
import numpy as np
import os
import base64
from streamlit import session_state as st_state
import librosa
import soundfile as sf
from scipy.signal import butter, sosfilt
# Try to get API_URL from environment variables, if not found set to a default value
try:
API_URL = os.environ["API_URL"]
except KeyError:
st.error("API_URL environment variable is not set.")
st.stop()
# Try to get the Bearer token from environment variables, if not found set to a default value
try:
BEARER_TOKEN = os.environ["BEARER_TOKEN"]
except KeyError:
st.error("BEARER_TOKEN environment variable is not set.")
st.stop()
headers = {
"Authorization": f"Bearer {BEARER_TOKEN}",
"Content-Type": "application/json"
}
# Initialize session state variables
if 'audio' not in st_state:
st_state.audio = None
if 'augmented_audio' not in st_state:
st_state.augmented_audio = None
# Streamlit app title
st.title("Songlabai")
# Initialize session state variables
if 'vocal_audio' not in st_state:
st_state.vocal_audio = None
if 'audio' not in st_state:
st_state.audio = None
if 'augmented_audio' not in st_state:
st_state.augmented_audio = None
uploaded_file = st.file_uploader("Upload Music File", type=["mp3", "wav", "ogg", "flac", "aac"])
genres = [
"Pop", "Rock", "Hip Hop", "Jazz", "Blues",
"Country", "Classical", "Electronic", "Reggae",
"Folk", "R&B", "Metal", "Punk", "Indie",
"Dance", "World", "Gospel", "Soul", "Funk",
"Ambient", "Techno", "Disco", "House", "Trance",
"Dubstep"
]
genre = st.selectbox("Select Genre:", genres)
energy_levels = ["Low", "Medium", "High"]
energy_level = st.radio("Energy Level:", energy_levels)
description = st.text_input("Description:", "")
tempo = st.slider("Tempo (in bpm):", min_value=40, max_value=100, value=60, step=5)
# Duration input
duration = st.slider("Duration (in seconds):", min_value=15, max_value=90, value=30, step=1)
# Generate audio based on the user's prompt
if st.button("Generate Audio"):
prompt = f"{genre} ,{energy_level}, {tempo}, {description}"
payload = {"inputs": {"prompt": prompt, "duration": duration}}
if uploaded_file:
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
payload = {"inputs": {"prompt": prompt, "duration": duration, "track": audio_base64}}
st.text("Generating audio...")
response = requests.post(API_URL, headers=headers, json=payload)
st_state.audio = np.array(response.json()[0]['generated_audio'], dtype=np.float32)
sample_rate = response.json()[0]['sample_rate']
st.audio(st_state.audio, format="audio/wav", sample_rate=sample_rate, start_time=0)
vocal_file = st.file_uploader("Upload Vocal File", type=["mp3", "wav", "ogg", "flac", "aac"])
# Post-processing options
st.header("Post-processing Options")
# Mixing
mix_vocals = st.checkbox("Mix Vocals")
if mix_vocals and st_state.vocal_audio is not None:
# Load the vocal audio
vocal_audio, _ = librosa.load(vocal_file, sr=sample_rate, mono=False)
# Adjust the vocal audio length to match the generated audio
vocal_audio = librosa.util.fix_length(vocal_audio, len(st_state.audio))
# Mix the vocal audio with the generated audio
st_state.augmented_audio = (st_state.audio + vocal_audio) / 2
# Mastering
st.subheader("Mastering")
# Volume Balance
volume_balance = st.slider("Volume Balance", min_value=-10.0, max_value=10.0, value=0.0, step=0.1)
if st.button("Apply Volume Balance"):
if st_state.augmented_audio is None:
st_state.augmented_audio = st_state.audio
st_state.augmented_audio *= 10 ** (volume_balance / 20)
# Compression
compression_ratio = st.slider("Compression Ratio", min_value=1.0, max_value=10.0, value=3.0, step=0.1)
if st.button("Apply Compression"):
if st_state.augmented_audio is None:
st_state.augmented_audio = st_state.audio
# Apply compression using a simple soft-knee compressor
threshold = -20 # dBFS
ratio = compression_ratio
knee = 10 # dB
max_gain = 20 # dB
def compress(x, threshold, ratio, knee, max_gain):
over = np.maximum(x - threshold, 0)
gain = over / (over + knee) * (1 - (1 / ratio)) + 1
gain = np.maximum(gain, 1 - max_gain)
return x * gain
st_state.augmented_audio = compress(st_state.augmented_audio, threshold, ratio, knee, max_gain)
# EQ
eq_low = st.slider("EQ Low", min_value=-10.0, max_value=10.0, value=0.0, step=0.1)
eq_mid = st.slider("EQ Mid", min_value=-10.0, max_value=10.0, value=0.0, step=0.1)
eq_high = st.slider("EQ High", min_value=-10.0, max_value=10.0, value=0.0, step=0.1)
if st.button("Apply EQ"):
if st_state.augmented_audio is None:
st_state.augmented_audio = st_state.audio
# Apply a simple 3-band EQ using a butterworth filter
nyquist = sample_rate / 2
low_cutoff = 200 / nyquist
mid_cutoff = 2000 / nyquist
high_cutoff = 8000 / nyquist
low_sos = butter(4, low_cutoff, btype='low', output='sos', analog=False)
mid_sos = butter(4, [low_cutoff, mid_cutoff], btype='band', output='sos', analog=False)
high_sos = butter(4, high_cutoff, btype='high', output='sos', analog=False)
st_state.augmented_audio = sosfilt(np.dstack((low_sos, mid_sos, high_sos)),
st_state.augmented_audio,
np.stack((eq_low, eq_mid, eq_high)))
# Reverb
reverb_amount = st.slider("Reverb Amount", min_value=0.0, max_value=1.0, value=0.5, step=0.01)
if st.button("Apply Reverb"):
if st_state.augmented_audio is None:
st_state.augmented_audio = st_state.audio
# Apply a simple reverb effect using convolution
ir_length = int(sample_rate * 2.5) # 2.5 seconds
ir = np.zeros(ir_length)
ir[0] = 1
ir = np.append(ir, np.zeros(len(st_state.augmented_audio) - ir_length))
reverb = np.convolve(st_state.augmented_audio, ir, mode='full')[:len(st_state.augmented_audio)]
st_state.augmented_audio = st_state.augmented_audio + reverb_amount * reverb
# Delay
delay_amount = st.slider("Delay Amount", min_value=0.0, max_value=1.0, value=0.5, step=0.01)
delay_time = st.slider("Delay Time (ms)", min_value=10, max_value=500, value=100, step=10)
if st.button("Apply Delay"):
if st_state.augmented_audio is None:
st_state.augmented_audio = st_state.audio
# Apply a simple delay effect
delay_samples = int(delay_time / 1000 * sample_rate)
delay = np.zeros(len(st_state.augmented_audio) + delay_samples)
delay[delay_samples:] = st_state.augmented_audio
delay[:len(st_state.augmented_audio)] += delay_amount * delay[:-delay_samples]
st_state.augmented_audio = delay[:len(st_state.augmented_audio)]
# Display the final audio
if st_state.augmented_audio is not None:
st.audio(st_state.augmented_audio, format="audio/wav", sample_rate=sample_rate, start_time=0)
st.link_button("Download/Save", "https://songlabai.com/subcribe/") |