Spaces:

Luis-Filipe
/

similar-music-to-suno-prompt

Running

App Files Files Community

similar-music-to-suno-prompt / app.py

Luis-Filipe

Upload app.py

4ca50b5 verified 27 days ago

raw

history blame contribute delete

6.53 kB

	# app.py
	import gradio as gr
	import librosa
	import numpy as np
	from openvino import runtime as ov
	import soundfile as sf
	import warnings
	import os
	from pathlib import Path

	warnings.filterwarnings('ignore', category=FutureWarning)
	warnings.filterwarnings('ignore', category=UserWarning)

	def estimate_key(y, sr):
	"""Estimate the musical key using chroma features."""
	chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
	chroma_avg = np.mean(chroma, axis=1)
	keys = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
	key_index = np.argmax(chroma_avg)
	return keys[key_index]

	def classify_instrument(spectral_centroid, rms_energy):
	"""Classify instrument type based on spectral characteristics."""
	if spectral_centroid < 500:
	if rms_energy > 0.1:
	return "bass"
	return "sub"
	elif spectral_centroid < 2000:
	if rms_energy > 0.15:
	return "drums"
	return "perc"
	elif spectral_centroid < 4000:
	return "synth"
	else:
	return "high"

	def get_musical_tempo_description(tempo):
	"""Convert numerical tempo to musical description."""
	if tempo < 70:
	return "slow"
	elif tempo < 100:
	return "chill"
	elif tempo < 120:
	return "upbeat"
	elif tempo < 140:
	return "energetic"
	else:
	return "fast"

	def generate_prompt(keys, avg_tempo, streams_info, genre="electronic"):
	"""Generate a concise, Suno-friendly prompt under 200 characters."""
	most_common_key = max(set(keys), key=keys.count) if keys else "C"

	instrument_counts = {}
	for info in streams_info:
	inst_type = info['type']
	instrument_counts[inst_type] = instrument_counts.get(inst_type, 0) + 1

	main_elements = [k for k, v in sorted(instrument_counts.items(), key=lambda x: x[1], reverse=True)[:2]]
	tempo_desc = get_musical_tempo_description(avg_tempo)

	prompt = f"{most_common_key} {int(avg_tempo)}bpm {tempo_desc} {genre} with {' + '.join(main_elements)}, dark atmosphere + reverb"

	if len(prompt) > 200:
	prompt = prompt[:197] + "..."

	return prompt

	def process_audio(audio_path, genre):
	"""Process audio file and generate prompt."""
	try:
	# Load audio
	y, sr = librosa.load(audio_path, sr=None)
	print(f"Audio loaded: {len(y)} samples, Sample rate: {sr}")

	# Configure OpenVINO model
	model_path = os.path.join(os.path.dirname(__file__), "models", "htdemucs_v4.xml")
	core = ov.Core()
	model = core.read_model(model_path)
	compiled_model = core.compile_model(model, "CPU")

	input_node = compiled_model.input(0)
	output_node = compiled_model.output(0)
	target_shape = (1, 4, 2048, 336)

	total_size = np.prod(target_shape)
	if len(y) < total_size:
	input_data = np.pad(y, (0, total_size - len(y)), mode='constant')
	else:
	input_data = y[:total_size]

	input_data = input_data.reshape(target_shape).astype(np.float32)
	input_tensor = ov.Tensor(input_data)

	outputs = compiled_model([input_tensor])[output_node]
	separated_audios = outputs[0]

	# Analysis lists
	keys = []
	avg_tempos = []
	streams_info = []

	# Create temporary directory for separated streams
	temp_dir = Path("temp_streams")
	temp_dir.mkdir(exist_ok=True)

	# Process each separated audio stream
	for i in range(separated_audios.shape[0]):
	stream = separated_audios[i].reshape(-1)

	try:
	output_file = temp_dir / f'separated_stream_{i+1}.wav'
	sf.write(str(output_file), stream, sr)

	y_s, sr_s = librosa.load(str(output_file), sr=None)

	if len(y_s) < sr_s * 0.1:
	continue

	# Calculate audio features
	tempo_s, _ = librosa.beat.beat_track(y=y_s, sr=sr_s)
	spectral_centroid_s = np.mean(librosa.feature.spectral_centroid(y=y_s, sr=sr_s))
	rms_s = np.mean(librosa.feature.rms(y=y_s))
	key_s = estimate_key(y_s, sr_s)

	# Store all information
	streams_info.append({
	'type': classify_instrument(spectral_centroid_s, rms_s),
	'centroid': spectral_centroid_s,
	'energy': rms_s
	})

	keys.append(key_s)
	avg_tempos.append(tempo_s)

	except Exception as e:
	print(f"Warning: Could not process stream {i+1}: {str(e)}")
	continue
	finally:
	# Clean up temporary file
	if output_file.exists():
	output_file.unlink()

	# Clean up temporary directory
	temp_dir.rmdir()

	if len(avg_tempos) > 0:
	avg_tempo = np.mean(avg_tempos)
	prompt = generate_prompt(keys, avg_tempo, streams_info, genre)
	return prompt, f"Character count: {len(prompt)}"
	else:
	return "Error: No valid audio streams were processed.", "Processing failed"

	except Exception as e:
	return f"Error processing the file: {str(e)}", "Processing failed"

	# Create Gradio interface
	def create_interface():
	genre_choices = ["electronic", "ambient", "trap", "synthwave", "house", "techno"]

	iface = gr.Interface(
	fn=process_audio,
	inputs=[
	gr.Audio(type="filepath", label="Upload Audio File"),
	gr.Dropdown(choices=genre_choices, label="Select Genre", value="electronic")
	],
	outputs=[
	gr.Textbox(label="Generated Prompt"),
	gr.Textbox(label="Status")
	],
	title="Audio Analysis to Suno Prompt Generator",
	description="Upload an audio file to generate a Suno-compatible prompt based on its musical characteristics.",
	examples=[],
	cache_examples=False
	)
	return iface

	# Launch the interface
	if __name__ == "__main__":
	iface = create_interface()
	iface.launch()