Spaces:
Running
Running
import streamlit as st | |
import requests | |
import numpy as np | |
import os | |
import base64 | |
from streamlit import session_state as st_state | |
import librosa | |
import soundfile as sf | |
from scipy.signal import butter, sosfilt | |
def get_api_url_and_bearer_token(): | |
""" | |
Tries to get API_URL and BEARER_TOKEN from environment variables. | |
If not found, sets them to default values and throws an error. | |
""" | |
try: | |
API_URL = os.environ["API_URL"] | |
except KeyError: | |
st.error("API_URL environment variable is not set.") | |
st.stop() | |
try: | |
BEARER_TOKEN = os.environ["BEARER_TOKEN"] | |
except KeyError: | |
st.error("BEARER_TOKEN environment variable is not set.") | |
st.stop() | |
return API_URL, BEARER_TOKEN | |
def initialize_session_state(): | |
""" | |
Initializes session state variables for audio data and user inputs. | |
""" | |
if 'audio' not in st_state: | |
st_state.audio = None | |
if 'augmented_audio' not in st_state: | |
st_state.augmented_audio = None | |
if 'vocal_audio' not in st_state: | |
st_state.vocal_audio = None | |
def create_headers(bearer_token): | |
""" | |
Creates headers for API requests with Bearer token authorization. | |
""" | |
return { | |
"Authorization": f"Bearer {bearer_token}", | |
"Content-Type": "application/json" | |
} | |
def upload_and_get_file_bytes(): | |
""" | |
Uploads a music file and returns its bytes if uploaded, otherwise None. | |
""" | |
uploaded_file = st.file_uploader("Upload Music File", type=["mp3", "wav", "ogg", "flac", "aac"]) | |
if uploaded_file: | |
return uploaded_file.read() | |
else: | |
return None | |
def generate_audio(api_url, headers, prompt, duration, audio_bytes=None): | |
""" | |
Generates audio based on user prompt, duration and optional uploaded audio. | |
""" | |
payload = {"inputs": {"prompt": prompt, "duration": duration}} | |
if audio_bytes: | |
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8') | |
payload["inputs"]["track"] = audio_base64 | |
st.text("Generating audio...") | |
response = requests.post(api_url, headers=headers, json=payload) | |
generated_audio = np.array(response.json()[0]['generated_audio'], dtype=np.float32) | |
sample_rate = response.json()[0]['sample_rate'] | |
st.audio(generated_audio, format="audio/wav", sample_rate=sample_rate, start_time=0) | |
return generated_audio, sample_rate | |
def mix_vocals(audio, vocal_audio, sample_rate): | |
""" | |
Mixes uploaded vocal audio with the generated audio. | |
""" | |
vocal_audio, _ = librosa.load(vocal_audio, sr=sample_rate, mono=False) | |
vocal_audio = librosa.util.fix_length(vocal_audio, len(audio)) | |
return (audio + vocal_audio) / 2 | |
def apply_volume_balance(audio, balance): | |
""" | |
Applies volume balance to the audio. | |
""" | |
return audio * 10 ** (balance / 20) | |
def apply_compression(audio, threshold, ratio, knee, max_gain): | |
""" | |
Applies simple soft-knee compression to the audio. | |
""" | |
def compress(x): | |
over = np.maximum(x - threshold, 0) | |
gain = over / (over + knee) * (1 - (1 / ratio)) + 1 | |
gain = np.maximum(gain, 1 - max_gain) | |
return x * gain | |
return compress(audio) | |