Spaces:
Running
Running
File size: 3,013 Bytes
01fc4a7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import streamlit as st
import requests
import numpy as np
import os
import base64
from streamlit import session_state as st_state
import librosa
import soundfile as sf
from scipy.signal import butter, sosfilt
def get_api_url_and_bearer_token():
"""
Tries to get API_URL and BEARER_TOKEN from environment variables.
If not found, sets them to default values and throws an error.
"""
try:
API_URL = os.environ["API_URL"]
except KeyError:
st.error("API_URL environment variable is not set.")
st.stop()
try:
BEARER_TOKEN = os.environ["BEARER_TOKEN"]
except KeyError:
st.error("BEARER_TOKEN environment variable is not set.")
st.stop()
return API_URL, BEARER_TOKEN
def initialize_session_state():
"""
Initializes session state variables for audio data and user inputs.
"""
if 'audio' not in st_state:
st_state.audio = None
if 'augmented_audio' not in st_state:
st_state.augmented_audio = None
if 'vocal_audio' not in st_state:
st_state.vocal_audio = None
def create_headers(bearer_token):
"""
Creates headers for API requests with Bearer token authorization.
"""
return {
"Authorization": f"Bearer {bearer_token}",
"Content-Type": "application/json"
}
def upload_and_get_file_bytes():
"""
Uploads a music file and returns its bytes if uploaded, otherwise None.
"""
uploaded_file = st.file_uploader("Upload Music File", type=["mp3", "wav", "ogg", "flac", "aac"])
if uploaded_file:
return uploaded_file.read()
else:
return None
def generate_audio(api_url, headers, prompt, duration, audio_bytes=None):
"""
Generates audio based on user prompt, duration and optional uploaded audio.
"""
payload = {"inputs": {"prompt": prompt, "duration": duration}}
if audio_bytes:
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
payload["inputs"]["track"] = audio_base64
st.text("Generating audio...")
response = requests.post(api_url, headers=headers, json=payload)
generated_audio = np.array(response.json()[0]['generated_audio'], dtype=np.float32)
sample_rate = response.json()[0]['sample_rate']
st.audio(generated_audio, format="audio/wav", sample_rate=sample_rate, start_time=0)
return generated_audio, sample_rate
def mix_vocals(audio, vocal_audio, sample_rate):
"""
Mixes uploaded vocal audio with the generated audio.
"""
vocal_audio, _ = librosa.load(vocal_audio, sr=sample_rate, mono=False)
vocal_audio = librosa.util.fix_length(vocal_audio, len(audio))
return (audio + vocal_audio) / 2
def apply_volume_balance(audio, balance):
"""
Applies volume balance to the audio.
"""
return audio * 10 ** (balance / 20)
def apply_compression(audio, threshold, ratio, knee, max_gain):
"""
Applies simple soft-knee compression to the audio.
"""
def compress(x):
over = np.maximum(x - threshold, 0)
gain = over / (over + knee) * (1 - (1 / ratio)) + 1
gain = np.maximum(gain, 1 - max_gain)
return x * gain
return compress(audio)
|