Spaces:

tigorsinaga
/

AI_CHORD_RECOGNITION

Sleeping

App Files Files Community

AI_CHORD_RECOGNITION / app.py

tigorsinaga

Update app.py

4806e46 verified about 2 months ago

raw

history blame contribute delete

3.88 kB

	import gradio as gr
	import librosa
	import numpy as np
	import tensorflow as tf
	import pickle
	from pydub import AudioSegment
	import os


	# === Load Model dan Label Encoder ===
	model = tf.keras.models.load_model('final_model.h5')
	with open('label_chord.pkl', 'rb') as f:
	label_encoder = pickle.load(f)

	# === Konversi MP3 ke WAV ===
	def convert_mp3_to_wav(mp3_path):
	sound = AudioSegment.from_mp3(mp3_path)
	wav_path = mp3_path.replace('.mp3', '.wav')
	sound.export(wav_path, format="wav")
	return wav_path

	# === Konversi Audio ke Mel Spectrogram ===
	def audio_to_mel_spectrogram(y, sr):
	y = librosa.util.normalize(y)
	mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
	mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
	mel_spectrogram_db = (mel_spectrogram_db + 80) / 80 # Normalisasi ke 0-1
	mel_spectrogram_db = tf.image.resize(mel_spectrogram_db[..., np.newaxis], (128, 128)).numpy()
	mel_spectrogram_db = np.repeat(mel_spectrogram_db, 3, axis=-1)
	return np.expand_dims(mel_spectrogram_db, axis=0)

	# === Prediksi Chord ===
	def predict_chords(audio_path):
	if audio_path.endswith('.mp3'):
	audio_path = convert_mp3_to_wav(audio_path)

	y, sr = librosa.load(audio_path, sr=22050)
	duration = librosa.get_duration(y=y, sr=sr)
	chords = []
	previous_chord = None

	for i in range(0, int(duration)):
	start_sample = i * sr
	end_sample = (i + 1) * sr
	y_segment = y[start_sample:end_sample]
	if len(y_segment) == 0:
	continue

	mel_spectrogram = audio_to_mel_spectrogram(y_segment, sr)
	prediction = model.predict(mel_spectrogram)
	predicted_index = np.argmax(prediction)
	predicted_chord = label_encoder.classes_[predicted_index]
	predicted_chord = predicted_chord.replace('_', '')

	if predicted_chord != previous_chord:
	chords.append(predicted_chord)
	previous_chord = predicted_chord

	return f"Predicted Chords: {' - '.join(chords)}"

	# === Gradio Interface ===

	sample_audio_path = "example.mp3"

	title = f"AI Chord Recognition"
	description = """
	<div class='description'>
	Upload an audio file (<strong>.mp3</strong> or <strong>.wav</strong>) and let the AI predict the chord progression.
	You can also try my sample audio.
	</div>
	"""

	# Tutorial penggunaan
	tutorial = """
	Tutorial: <br>
	1. Upload your own audio or you can record one yourself. <br>
	2. Use the sample audio file we have provided if needed. <br>
	3. Run the prediction by clicking the "Predict Chords" button.<br>
	4. Enjoy your song!
	"""

	with gr.Blocks() as interface:
	gr.Markdown("<link rel='stylesheet' href='style.css'>") # Menyertakan file CSS eksternal
	gr.Markdown(f"<h1 style='text-align: center;'>{title}</h1>")
	gr.Markdown("<h3 style='text-align: center;'>Hello! I'm Tigor Neilson Sinaga, with NIM 22.11.4725.<br>Welcome to my AI Chord Recognition project!</h3>")
	gr.Markdown(f"<h3 style='text-align: center;'>{description}</p>")

	audio_input = gr.Audio(type="filepath", label="Upload Audio (MP3/WAV)")
	use_sample_button = gr.Button("Use Sample Audio", size="sm")
	predict_button = gr.Button("Predict Chords")
	output_text = gr.Textbox(label="Predicted Chords", lines=5, placeholder="Chord predictions will appear here...")

	use_sample_button.click(fn=lambda: sample_audio_path, inputs=[], outputs=audio_input)
	predict_button.click(fn=predict_chords, inputs=audio_input, outputs=output_text, queue=True)

	gr.Markdown(f"<h3 style='text-align: center;'>{tutorial}</h3>") # Menambahkan tutorial penggunaan

	gr.Markdown("<p style='text-align: center; font-size: 12px; color: grey;'>*This project is still under development and has not yet reached high accuracy.</p>")

	interface.launch(share=True)