import streamlit as st import requests import numpy as np import os import base64 from streamlit import session_state as st_state # Try to get API_URL from environment variables, if not found set to a default value try: API_URL = os.environ["API_URL"] except KeyError: st.error("API_URL environment variable is not set.") st.stop() # Try to get the Bearer token from environment variables, if not found set to a default value try: BEARER_TOKEN = os.environ["BEARER_TOKEN"] except KeyError: st.error("BEARER_TOKEN environment variable is not set.") st.stop() headers = { "Authorization": f"Bearer {BEARER_TOKEN}", "Content-Type": "application/json" } # Initialize session state variables if 'audio' not in st_state: st_state.audio = None if 'augmented_audio' not in st_state: st_state.augmented_audio = None # Streamlit app title st.title("Songlabai") uploaded_file = st.file_uploader("Upload Music File", type=["mp3", "wav", "ogg", "flac", "aac"]) genres = [ "Pop", "Rock", "Hip Hop", "Jazz", "Blues", "Country", "Classical", "Electronic", "Reggae", "Folk", "R&B", "Metal", "Punk", "Indie", "Dance", "World", "Gospel", "Soul", "Funk", "Ambient", "Techno", "Disco", "House", "Trance", "Dubstep" ] genre = st.selectbox("Select Genre:", genres) energy_levels = ["Low", "Medium", "High"] energy_level = st.radio("Energy Level:", energy_levels) description = st.text_input("Description:", "") tempo = st.slider("Tempo (in bpm):", min_value=40, max_value=100, value=60, step=5) # Duration input duration = st.slider("Duration (in seconds):", min_value=15, max_value=90, value=30, step=1) # Generate audio based on the user's prompt if st.button("Generate Audio"): prompt = f"{genre} ,{energy_level}, {tempo}, {description}" payload = {"inputs": {"prompt": prompt, "duration": duration}} if uploaded_file: audio_base64 = base64.b64encode(audio_bytes).decode('utf-8') payload = {"inputs": {"prompt": prompt, "duration": duration, "track": audio_base64}} st.text("Generating audio...") response = requests.post(API_URL, headers=headers, json=payload) st_state.audio = np.array(response.json()[0]['generated_audio'], dtype=np.float32) sample_rate = response.json()[0]['sample_rate'] st.audio(st_state.audio, format="audio/wav", sample_rate=sample_rate, start_time=0) # Post-processing options st.sidebar.title("Post-processing Options") apply_stereo = st.sidebar.checkbox("Apply Stereo Effect") reverse = st.sidebar.checkbox("Reverse Audio") change_speed = st.sidebar.checkbox("Change Speed") if change_speed: speed_factor = st.sidebar.slider("Speed Factor:", min_value=0.1, max_value=2.0, value=1.0, step=0.1) # Pitch shifting st.sidebar.title("Pitch Shifting") pitch_shift = st.sidebar.checkbox("Pitch Shift") if pitch_shift: pitch_semitones = st.sidebar.slider("Pitch (semitones):", min_value=-12, max_value=12, value=0, step=1) # Reverb st.sidebar.title("Reverb") add_reverb = st.sidebar.checkbox("Add Reverb") if add_reverb: reverb_room_scale = st.sidebar.slider("Room Scale:", min_value=0.0, max_value=100.0, value=50.0) reverb_damping = st.sidebar.slider("Damping:", min_value=0.0, max_value=100.0, value=50.0) reverb_wet_only = st.sidebar.checkbox("Wet Only", value=False) # Apply selected post-processing if apply_stereo or reverse or change_speed or pitch_shift or add_reverb: st.text("Applying post-processing...") # Apply selected post-processing st_state.augmented_audio = st_state.audio.copy() if apply_stereo: # Create a stereo effect by duplicating the audio and panning left and right audio_left = st_state.augmented_audio audio_right = st_state.augmented_audio st_state.augmented_audio = np.stack([audio_left, audio_right], axis=-1) if reverse: # Reverse the audio array st_state.augmented_audio = np.flip(st_state.augmented_audio) if change_speed: # Change the speed by resampling the audio new_sample_rate = int(sample_rate * speed_factor) st_state.augmented_audio = scipy.signal.resample(st_state.augmented_audio, int(len(st_state.augmented_audio) * speed_factor)) if pitch_shift: # Pitch shift using the Fourier shift method pitch_shift_factor = 2 ** (pitch_semitones / 12) st_state.augmented_audio = scipy.signal.resample(st_state.augmented_audio, int(len(st_state.augmented_audio) / pitch_shift_factor)) if add_reverb: # Apply reverb using the convolution method reverb_ir = scipy.signal.exponential(reverb_room_scale, reverb_damping, reverb_wet_only) st_state.augmented_audio = scipy.signal.fftconvolve(st_state.augmented_audio, reverb_ir) # Play the processed audio st.audio(st_state.augmented_audio, format="audio/wav", sample_rate=sample_rate, start_time=0)