Luis-Filipe commited on
Commit
4ca50b5
·
verified ·
1 Parent(s): 993615a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -23
app.py CHANGED
@@ -1,31 +1,181 @@
1
- # Audio Analysis to Suno Prompt Generator
 
 
 
 
 
 
 
 
2
 
3
- This Hugging Face Space analyzes audio files and generates Suno-compatible prompts based on their musical characteristics.
 
4
 
5
- ## Features
 
 
 
 
 
 
6
 
7
- - Extracts musical key, tempo, and instrument types from audio
8
- - Uses OpenVINO for audio separation
9
- - Generates concise prompts (<200 characters) suitable for Suno
10
- - Supports multiple genres
11
- - Web interface for easy use
 
 
 
 
 
 
 
 
 
12
 
13
- ## How to Use
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- 1. Upload an audio file (MP3 format recommended)
16
- 2. Select a genre from the dropdown
17
- 3. Click "Submit" to generate a prompt
18
- 4. Copy the generated prompt for use with Suno
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- ## Technical Details
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- The app uses:
23
- - Librosa for audio analysis
24
- - OpenVINO for audio separation
25
- - Gradio for the web interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- ## Notes
28
-
29
- - Maximum file size: 100MB
30
- - Supported formats: MP3, WAV
31
- - Processing may take a few minutes depending on file size
 
1
+ # app.py
2
+ import gradio as gr
3
+ import librosa
4
+ import numpy as np
5
+ from openvino import runtime as ov
6
+ import soundfile as sf
7
+ import warnings
8
+ import os
9
+ from pathlib import Path
10
 
11
+ warnings.filterwarnings('ignore', category=FutureWarning)
12
+ warnings.filterwarnings('ignore', category=UserWarning)
13
 
14
+ def estimate_key(y, sr):
15
+ """Estimate the musical key using chroma features."""
16
+ chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
17
+ chroma_avg = np.mean(chroma, axis=1)
18
+ keys = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
19
+ key_index = np.argmax(chroma_avg)
20
+ return keys[key_index]
21
 
22
+ def classify_instrument(spectral_centroid, rms_energy):
23
+ """Classify instrument type based on spectral characteristics."""
24
+ if spectral_centroid < 500:
25
+ if rms_energy > 0.1:
26
+ return "bass"
27
+ return "sub"
28
+ elif spectral_centroid < 2000:
29
+ if rms_energy > 0.15:
30
+ return "drums"
31
+ return "perc"
32
+ elif spectral_centroid < 4000:
33
+ return "synth"
34
+ else:
35
+ return "high"
36
 
37
+ def get_musical_tempo_description(tempo):
38
+ """Convert numerical tempo to musical description."""
39
+ if tempo < 70:
40
+ return "slow"
41
+ elif tempo < 100:
42
+ return "chill"
43
+ elif tempo < 120:
44
+ return "upbeat"
45
+ elif tempo < 140:
46
+ return "energetic"
47
+ else:
48
+ return "fast"
49
 
50
+ def generate_prompt(keys, avg_tempo, streams_info, genre="electronic"):
51
+ """Generate a concise, Suno-friendly prompt under 200 characters."""
52
+ most_common_key = max(set(keys), key=keys.count) if keys else "C"
53
+
54
+ instrument_counts = {}
55
+ for info in streams_info:
56
+ inst_type = info['type']
57
+ instrument_counts[inst_type] = instrument_counts.get(inst_type, 0) + 1
58
+
59
+ main_elements = [k for k, v in sorted(instrument_counts.items(), key=lambda x: x[1], reverse=True)[:2]]
60
+ tempo_desc = get_musical_tempo_description(avg_tempo)
61
+
62
+ prompt = f"{most_common_key} {int(avg_tempo)}bpm {tempo_desc} {genre} with {' + '.join(main_elements)}, dark atmosphere + reverb"
63
+
64
+ if len(prompt) > 200:
65
+ prompt = prompt[:197] + "..."
66
+
67
+ return prompt
68
 
69
+ def process_audio(audio_path, genre):
70
+ """Process audio file and generate prompt."""
71
+ try:
72
+ # Load audio
73
+ y, sr = librosa.load(audio_path, sr=None)
74
+ print(f"Audio loaded: {len(y)} samples, Sample rate: {sr}")
75
+
76
+ # Configure OpenVINO model
77
+ model_path = os.path.join(os.path.dirname(__file__), "models", "htdemucs_v4.xml")
78
+ core = ov.Core()
79
+ model = core.read_model(model_path)
80
+ compiled_model = core.compile_model(model, "CPU")
81
+
82
+ input_node = compiled_model.input(0)
83
+ output_node = compiled_model.output(0)
84
+ target_shape = (1, 4, 2048, 336)
85
+
86
+ total_size = np.prod(target_shape)
87
+ if len(y) < total_size:
88
+ input_data = np.pad(y, (0, total_size - len(y)), mode='constant')
89
+ else:
90
+ input_data = y[:total_size]
91
+
92
+ input_data = input_data.reshape(target_shape).astype(np.float32)
93
+ input_tensor = ov.Tensor(input_data)
94
+
95
+ outputs = compiled_model([input_tensor])[output_node]
96
+ separated_audios = outputs[0]
97
+
98
+ # Analysis lists
99
+ keys = []
100
+ avg_tempos = []
101
+ streams_info = []
102
+
103
+ # Create temporary directory for separated streams
104
+ temp_dir = Path("temp_streams")
105
+ temp_dir.mkdir(exist_ok=True)
106
+
107
+ # Process each separated audio stream
108
+ for i in range(separated_audios.shape[0]):
109
+ stream = separated_audios[i].reshape(-1)
110
+
111
+ try:
112
+ output_file = temp_dir / f'separated_stream_{i+1}.wav'
113
+ sf.write(str(output_file), stream, sr)
114
+
115
+ y_s, sr_s = librosa.load(str(output_file), sr=None)
116
+
117
+ if len(y_s) < sr_s * 0.1:
118
+ continue
119
+
120
+ # Calculate audio features
121
+ tempo_s, _ = librosa.beat.beat_track(y=y_s, sr=sr_s)
122
+ spectral_centroid_s = np.mean(librosa.feature.spectral_centroid(y=y_s, sr=sr_s))
123
+ rms_s = np.mean(librosa.feature.rms(y=y_s))
124
+ key_s = estimate_key(y_s, sr_s)
125
+
126
+ # Store all information
127
+ streams_info.append({
128
+ 'type': classify_instrument(spectral_centroid_s, rms_s),
129
+ 'centroid': spectral_centroid_s,
130
+ 'energy': rms_s
131
+ })
132
+
133
+ keys.append(key_s)
134
+ avg_tempos.append(tempo_s)
135
+
136
+ except Exception as e:
137
+ print(f"Warning: Could not process stream {i+1}: {str(e)}")
138
+ continue
139
+ finally:
140
+ # Clean up temporary file
141
+ if output_file.exists():
142
+ output_file.unlink()
143
+
144
+ # Clean up temporary directory
145
+ temp_dir.rmdir()
146
+
147
+ if len(avg_tempos) > 0:
148
+ avg_tempo = np.mean(avg_tempos)
149
+ prompt = generate_prompt(keys, avg_tempo, streams_info, genre)
150
+ return prompt, f"Character count: {len(prompt)}"
151
+ else:
152
+ return "Error: No valid audio streams were processed.", "Processing failed"
153
+
154
+ except Exception as e:
155
+ return f"Error processing the file: {str(e)}", "Processing failed"
156
 
157
+ # Create Gradio interface
158
+ def create_interface():
159
+ genre_choices = ["electronic", "ambient", "trap", "synthwave", "house", "techno"]
160
+
161
+ iface = gr.Interface(
162
+ fn=process_audio,
163
+ inputs=[
164
+ gr.Audio(type="filepath", label="Upload Audio File"),
165
+ gr.Dropdown(choices=genre_choices, label="Select Genre", value="electronic")
166
+ ],
167
+ outputs=[
168
+ gr.Textbox(label="Generated Prompt"),
169
+ gr.Textbox(label="Status")
170
+ ],
171
+ title="Audio Analysis to Suno Prompt Generator",
172
+ description="Upload an audio file to generate a Suno-compatible prompt based on its musical characteristics.",
173
+ examples=[],
174
+ cache_examples=False
175
+ )
176
+ return iface
177
 
178
+ # Launch the interface
179
+ if __name__ == "__main__":
180
+ iface = create_interface()
181
+ iface.launch()