Spaces:

Luis-Filipe
/

similar-music-to-suno-prompt

Running

App Files Files Community

Luis-Filipe commited on Jan 8

Commit

4ca50b5

verified ·

1 Parent(s): 993615a

Upload app.py

Browse files

Files changed (1) hide show

app.py +173 -23

app.py CHANGED Viewed

@@ -1,31 +1,181 @@
-# Audio Analysis to Suno Prompt Generator
-This Hugging Face Space analyzes audio files and generates Suno-compatible prompts based on their musical characteristics.
-## Features
-- Extracts musical key, tempo, and instrument types from audio
-- Uses OpenVINO for audio separation
-- Generates concise prompts (<200 characters) suitable for Suno
-- Supports multiple genres
-- Web interface for easy use
-## How to Use
-1. Upload an audio file (MP3 format recommended)
-2. Select a genre from the dropdown
-3. Click "Submit" to generate a prompt
-4. Copy the generated prompt for use with Suno
-## Technical Details
-The app uses:
-- Librosa for audio analysis
-- OpenVINO for audio separation
-- Gradio for the web interface
-## Notes
-- Maximum file size: 100MB
-- Supported formats: MP3, WAV
-- Processing may take a few minutes depending on file size

+# app.py
+import gradio as gr
+import librosa
+import numpy as np
+from openvino import runtime as ov
+import soundfile as sf
+import warnings
+import os
+from pathlib import Path
+warnings.filterwarnings('ignore', category=FutureWarning)
+warnings.filterwarnings('ignore', category=UserWarning)
+def estimate_key(y, sr):
+    """Estimate the musical key using chroma features."""
+    chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
+    chroma_avg = np.mean(chroma, axis=1)
+    keys = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
+    key_index = np.argmax(chroma_avg)
+    return keys[key_index]
+def classify_instrument(spectral_centroid, rms_energy):
+    """Classify instrument type based on spectral characteristics."""
+    if spectral_centroid < 500:
+        if rms_energy > 0.1:
+            return "bass"
+        return "sub"
+    elif spectral_centroid < 2000:
+        if rms_energy > 0.15:
+            return "drums"
+        return "perc"
+    elif spectral_centroid < 4000:
+        return "synth"
+    else:
+        return "high"
+def get_musical_tempo_description(tempo):
+    """Convert numerical tempo to musical description."""
+    if tempo < 70:
+        return "slow"
+    elif tempo < 100:
+        return "chill"
+    elif tempo < 120:
+        return "upbeat"
+    elif tempo < 140:
+        return "energetic"
+    else:
+        return "fast"
+def generate_prompt(keys, avg_tempo, streams_info, genre="electronic"):
+    """Generate a concise, Suno-friendly prompt under 200 characters."""
+    most_common_key = max(set(keys), key=keys.count) if keys else "C"
+    instrument_counts = {}
+    for info in streams_info:
+        inst_type = info['type']
+        instrument_counts[inst_type] = instrument_counts.get(inst_type, 0) + 1
+    main_elements = [k for k, v in sorted(instrument_counts.items(), key=lambda x: x[1], reverse=True)[:2]]
+    tempo_desc = get_musical_tempo_description(avg_tempo)
+    prompt = f"{most_common_key} {int(avg_tempo)}bpm {tempo_desc} {genre} with {' + '.join(main_elements)}, dark atmosphere + reverb"
+    if len(prompt) > 200:
+        prompt = prompt[:197] + "..."
+    return prompt
+def process_audio(audio_path, genre):
+    """Process audio file and generate prompt."""
+    try:
+        # Load audio
+        y, sr = librosa.load(audio_path, sr=None)
+        print(f"Audio loaded: {len(y)} samples, Sample rate: {sr}")
+        # Configure OpenVINO model
+        model_path = os.path.join(os.path.dirname(__file__), "models", "htdemucs_v4.xml")
+        core = ov.Core()
+        model = core.read_model(model_path)
+        compiled_model = core.compile_model(model, "CPU")
+        input_node = compiled_model.input(0)
+        output_node = compiled_model.output(0)
+        target_shape = (1, 4, 2048, 336)
+        total_size = np.prod(target_shape)
+        if len(y) < total_size:
+            input_data = np.pad(y, (0, total_size - len(y)), mode='constant')
+        else:
+            input_data = y[:total_size]
+        input_data = input_data.reshape(target_shape).astype(np.float32)
+        input_tensor = ov.Tensor(input_data)
+        outputs = compiled_model([input_tensor])[output_node]
+        separated_audios = outputs[0]
+        # Analysis lists
+        keys = []
+        avg_tempos = []
+        streams_info = []
+        # Create temporary directory for separated streams
+        temp_dir = Path("temp_streams")
+        temp_dir.mkdir(exist_ok=True)
+        # Process each separated audio stream
+        for i in range(separated_audios.shape[0]):
+            stream = separated_audios[i].reshape(-1)
+            try:
+                output_file = temp_dir / f'separated_stream_{i+1}.wav'
+                sf.write(str(output_file), stream, sr)
+                y_s, sr_s = librosa.load(str(output_file), sr=None)
+                if len(y_s) < sr_s * 0.1:
+                    continue
+                # Calculate audio features
+                tempo_s, _ = librosa.beat.beat_track(y=y_s, sr=sr_s)
+                spectral_centroid_s = np.mean(librosa.feature.spectral_centroid(y=y_s, sr=sr_s))
+                rms_s = np.mean(librosa.feature.rms(y=y_s))
+                key_s = estimate_key(y_s, sr_s)
+                # Store all information
+                streams_info.append({
+                    'type': classify_instrument(spectral_centroid_s, rms_s),
+                    'centroid': spectral_centroid_s,
+                    'energy': rms_s
+                })
+                keys.append(key_s)
+                avg_tempos.append(tempo_s)
+            except Exception as e:
+                print(f"Warning: Could not process stream {i+1}: {str(e)}")
+                continue
+            finally:
+                # Clean up temporary file
+                if output_file.exists():
+                    output_file.unlink()
+        # Clean up temporary directory
+        temp_dir.rmdir()
+        if len(avg_tempos) > 0:
+            avg_tempo = np.mean(avg_tempos)
+            prompt = generate_prompt(keys, avg_tempo, streams_info, genre)
+            return prompt, f"Character count: {len(prompt)}"
+        else:
+            return "Error: No valid audio streams were processed.", "Processing failed"
+    except Exception as e:
+        return f"Error processing the file: {str(e)}", "Processing failed"
+# Create Gradio interface
+def create_interface():
+    genre_choices = ["electronic", "ambient", "trap", "synthwave", "house", "techno"]
+    iface = gr.Interface(
+        fn=process_audio,
+        inputs=[
+            gr.Audio(type="filepath", label="Upload Audio File"),
+            gr.Dropdown(choices=genre_choices, label="Select Genre", value="electronic")
+        ],
+        outputs=[
+            gr.Textbox(label="Generated Prompt"),
+            gr.Textbox(label="Status")
+        ],
+        title="Audio Analysis to Suno Prompt Generator",
+        description="Upload an audio file to generate a Suno-compatible prompt based on its musical characteristics.",
+        examples=[],
+        cache_examples=False
+    )
+    return iface
+# Launch the interface
+if __name__ == "__main__":
+    iface = create_interface()
+    iface.launch()