import streamlit as st import requests import numpy as np import os import base64 from streamlit import session_state as st_state import librosa import soundfile as sf from scipy.signal import butter, sosfilt # Try to get API_URL from environment variables, if not found set to a default value try: API_URL = os.environ["API_URL"] except KeyError: st.error("API_URL environment variable is not set.") st.stop() # Try to get the Bearer token from environment variables, if not found set to a default value try: BEARER_TOKEN = os.environ["BEARER_TOKEN"] except KeyError: st.error("BEARER_TOKEN environment variable is not set.") st.stop() headers = { "Authorization": f"Bearer {BEARER_TOKEN}", "Content-Type": "application/json" } # Initialize session state variables if 'audio' not in st_state: st_state.audio = None if 'augmented_audio' not in st_state: st_state.augmented_audio = None # Streamlit app title st.title("Songlabai") # Initialize session state variables if 'vocal_audio' not in st_state: st_state.vocal_audio = None if 'audio' not in st_state: st_state.audio = None if 'augmented_audio' not in st_state: st_state.augmented_audio = None uploaded_file = st.file_uploader("Upload Music File", type=["mp3", "wav", "ogg", "flac", "aac"]) genres = [ "Pop", "Rock", "Hip Hop", "Jazz", "Blues", "Country", "Classical", "Electronic", "Reggae", "Folk", "R&B", "Metal", "Punk", "Indie", "Dance", "World", "Gospel", "Soul", "Funk", "Ambient", "Techno", "Disco", "House", "Trance", "Dubstep" ] genre = st.selectbox("Select Genre:", genres) energy_levels = ["Low", "Medium", "High"] energy_level = st.radio("Energy Level:", energy_levels) description = st.text_input("Description:", "") tempo = st.slider("Tempo (in bpm):", min_value=40, max_value=100, value=60, step=5) # Duration input duration = st.slider("Duration (in seconds):", min_value=15, max_value=90, value=30, step=1) # Generate audio based on the user's prompt if st.button("Generate Audio") and genre and energy_level and description and tempo: prompt = f"{genre} ,{energy_level}, {tempo}, {description}" payload = {"inputs": {"prompt": prompt, "duration": duration}} if uploaded_file: audio_base64 = base64.b64encode(uploaded_file.read()).decode('utf-8') payload = {"inputs": {"prompt": prompt, "duration": duration, "track": audio_base64}} st.text("Generating audio...") response = requests.post(API_URL, headers=headers, json=payload) st_state.audio = np.array(response.json()[0]['generated_audio'], dtype=np.float32) sample_rate = response.json()[0]['sample_rate'] st.audio(st_state.audio, format="audio/wav", sample_rate=sample_rate, start_time=0) # Post-processing options st.header("Post-processing Options") vocal_file = st.file_uploader("Upload Vocal File", type=["mp3", "wav", "ogg", "flac", "aac"]) if vocal_file: st_state.vocal_audio = vocal_file.read() # Mixing mix_vocals = st.checkbox("Mix Vocals") if mix_vocals and st_state.vocal_audio is not None: # Load the vocal audio vocal_audio, _ = librosa.load(vocal_file.read(), sr=sample_rate, mono=False) # Adjust the vocal audio length to match the generated audio vocal_audio = librosa.util.fix_length(vocal_audio, len(st_state.audio)) # Mix the vocal audio with the generated audio st_state.augmented_audio = (st_state.audio + vocal_audio) / 2 # Mastering st.subheader("Mastering") # Volume Balance volume_balance = st.slider("Volume Balance", min_value=-10.0, max_value=10.0, value=0.0, step=0.1) if st.button("Apply Volume Balance"): if st_state.augmented_audio is None: st_state.augmented_audio = st_state.audio st_state.augmented_audio *= 10 ** (volume_balance / 20) # Compression compression_ratio = st.slider("Compression Ratio", min_value=1.0, max_value=10.0, value=3.0, step=0.1) if st.button("Apply Compression"): if st_state.augmented_audio is None: st_state.augmented_audio = st_state.audio # Apply compression using a simple soft-knee compressor threshold = -20 # dBFS ratio = compression_ratio knee = 10 # dB max_gain = 20 # dB def compress(x, threshold, ratio, knee, max_gain): over = np.maximum(x - threshold, 0) gain = over / (over + knee) * (1 - (1 / ratio)) + 1 gain = np.maximum(gain, 1 - max_gain) return x * gain st_state.augmented_audio = compress(st_state.augmented_audio, threshold, ratio, knee, max_gain) # EQ eq_low = st.slider("EQ Low", min_value=-10.0, max_value=10.0, value=0.0, step=0.1) eq_mid = st.slider("EQ Mid", min_value=-10.0, max_value=10.0, value=0.0, step=0.1) eq_high = st.slider("EQ High", min_value=-10.0, max_value=10.0, value=0.0, step=0.1) if st.button("Apply EQ"): if st_state.augmented_audio is None: st_state.augmented_audio = st_state.audio # Apply a simple 3-band EQ using a butterworth filter nyquist = sample_rate / 2 low_cutoff = 200 / nyquist mid_cutoff = 2000 / nyquist high_cutoff = 8000 / nyquist low_sos = butter(4, low_cutoff, btype='low', output='sos', analog=False) mid_sos = butter(4, [low_cutoff, mid_cutoff], btype='band', output='sos', analog=False) high_sos = butter(4, high_cutoff, btype='high', output='sos', analog=False) st_state.augmented_audio = sosfilt(np.dstack((low_sos, mid_sos, high_sos)), st_state.augmented_audio, np.stack((eq_low, eq_mid, eq_high))) # Reverb reverb_amount = st.slider("Reverb Amount", min_value=0.0, max_value=1.0, value=0.5, step=0.01) if st.button("Apply Reverb"): if st_state.augmented_audio is None: st_state.augmented_audio = st_state.audio # Apply a simple reverb effect using convolution ir_length = int(sample_rate * 2.5) # 2.5 seconds ir = np.zeros(ir_length) ir[0] = 1 ir = np.append(ir, np.zeros(len(st_state.augmented_audio) - ir_length)) reverb = np.convolve(st_state.augmented_audio, ir, mode='full')[:len(st_state.augmented_audio)] st_state.augmented_audio = st_state.augmented_audio + reverb_amount * reverb # Delay delay_amount = st.slider("Delay Amount", min_value=0.0, max_value=1.0, value=0.5, step=0.01) delay_time = st.slider("Delay Time (ms)", min_value=10, max_value=500, value=100, step=10) if st.button("Apply Delay"): if st_state.augmented_audio is None: st_state.augmented_audio = st_state.audio # Apply a simple delay effect delay_samples = int(delay_time / 1000 * sample_rate) delay = np.zeros(len(st_state.augmented_audio) + delay_samples) delay[delay_samples:] = st_state.augmented_audio delay[:len(st_state.augmented_audio)] += delay_amount * delay[:-delay_samples] st_state.augmented_audio = delay[:len(st_state.augmented_audio)] # Display the final audio if st_state.augmented_audio is not None: st.audio(st_state.augmented_audio, format="audio/wav", sample_rate=sample_rate, start_time=0) st.link_button("Download/Save", "https://songlabai.com/subcribe/")