Spaces:

tigorsinaga
/

AI_CHORD_RECOGNITION

Sleeping

App Files Files Community

AI_CHORD_RECOGNITION / app.py

tigorsinaga

Update app.py

66a740d verified about 2 months ago

raw

history blame

3.88 kB

	import gradio as gr
	import librosa
	import numpy as np
	import tensorflow as tf
	import pickle
	from pydub import AudioSegment
	import os


	# === Load Model dan Label Encoder ===
	model = tf.keras.models.load_model('final_model.h5')
	with open('label_chord.pkl', 'rb') as f:
	label_encoder = pickle.load(f)

	# === Konversi MP3 ke WAV ===
	def convert_mp3_to_wav(mp3_path):
	sound = AudioSegment.from_mp3(mp3_path)
	wav_path = mp3_path.replace('.mp3', '.wav')
	sound.export(wav_path, format="wav")
	return wav_path

	# === Konversi Audio ke Mel Spectrogram ===
	def audio_to_mel_spectrogram(y, sr):
	y = librosa.util.normalize(y)
	mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
	mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
	mel_spectrogram_db = (mel_spectrogram_db + 80) / 80 # Normalisasi ke 0-1
	mel_spectrogram_db = tf.image.resize(mel_spectrogram_db[..., np.newaxis], (128, 128)).numpy()
	mel_spectrogram_db = np.repeat(mel_spectrogram_db, 3, axis=-1)
	return np.expand_dims(mel_spectrogram_db, axis=0)

	# === Prediksi Chord ===
	def predict_chords(audio_path):
	if audio_path.endswith('.mp3'):
	audio_path = convert_mp3_to_wav(audio_path)

	y, sr = librosa.load(audio_path, sr=22050)
	duration = librosa.get_duration(y=y, sr=sr)
	chords = []
	previous_chord = None

	for i in range(0, int(duration)):
	start_sample = i * sr
	end_sample = (i + 1) * sr
	y_segment = y[start_sample:end_sample]
	if len(y_segment) == 0:
	continue

	mel_spectrogram = audio_to_mel_spectrogram(y_segment, sr)
	prediction = model.predict(mel_spectrogram)
	predicted_index = np.argmax(prediction)
	predicted_chord = label_encoder.classes_[predicted_index]
	predicted_chord = predicted_chord.replace('_', '')

	if predicted_chord != previous_chord:
	chords.append(predicted_chord)
	previous_chord = predicted_chord

	return f"Predicted Chords: {' - '.join(chords)}"

	# === Gradio Interface ===

	sample_audio_path = "example.mp3"

	title = f"AI Chord Recognition"
	description = """
	<div class='description'>
	Upload an audio file (<strong>.mp3</strong> or <strong>.wav</strong>) and let the AI predict the chord progression.
	You can also try my sample audio.
	</div>
	"""

	# Tutorial penggunaan
	tutorial = """
	## Tutorial Penggunaan
	1. Masukkan audio kamu atau kamu bisa merekam sendiri.
	2. Gunakan file audio sample yang sudah kami sediakan jika butuh.
	3. Jalankan dengan memencet tombol "Predict Chords".
	4. Enjoy your song!
	"""

	with gr.Blocks(css=css) as interface:
	gr.Markdown("<link rel='stylesheet' href='style.css'>") # Menyertakan file CSS eksternal
	gr.Markdown(f"<h1 style='text-align: center;'>{title}</h1>")
	gr.Markdown("<h3 style='text-align: center;'>Hello! I'm Tigor Neilson Sinaga, with NIM 22.11.4725.<br>Welcome to my AI Chord Recognition project!</h3>")
	gr.Markdown(f"<h3 style='text-align: center;'>{description}</p>")
	gr.Markdown(f"<h3 style='text-align: center;'>{tutorial}</h3>") # Menambahkan tutorial penggunaan

	audio_input = gr.Audio(type="filepath", label="Upload Audio (MP3/WAV)")
	use_sample_button = gr.Button("Use Sample Audio", size="sm")
	predict_button = gr.Button("Predict Chords")
	output_text = gr.Textbox(label="Predicted Chords", lines=5, placeholder="Chord predictions will appear here...")

	use_sample_button.click(fn=lambda: sample_audio_path, inputs=[], outputs=audio_input)
	predict_button.click(fn=predict_chords, inputs=audio_input, outputs=output_text, queue=True)
	gr.Markdown("<p style='text-align: center; font-size: 12px; color: grey;'>This project is still under development and has not yet reached high accuracy.</p>")

	interface.launch(share=True)