tigorsinaga's picture
Update app.py
66a740d verified
raw
history blame
3.88 kB
import gradio as gr
import librosa
import numpy as np
import tensorflow as tf
import pickle
from pydub import AudioSegment
import os
# === Load Model dan Label Encoder ===
model = tf.keras.models.load_model('final_model.h5')
with open('label_chord.pkl', 'rb') as f:
label_encoder = pickle.load(f)
# === Konversi MP3 ke WAV ===
def convert_mp3_to_wav(mp3_path):
sound = AudioSegment.from_mp3(mp3_path)
wav_path = mp3_path.replace('.mp3', '.wav')
sound.export(wav_path, format="wav")
return wav_path
# === Konversi Audio ke Mel Spectrogram ===
def audio_to_mel_spectrogram(y, sr):
y = librosa.util.normalize(y)
mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
mel_spectrogram_db = (mel_spectrogram_db + 80) / 80 # Normalisasi ke 0-1
mel_spectrogram_db = tf.image.resize(mel_spectrogram_db[..., np.newaxis], (128, 128)).numpy()
mel_spectrogram_db = np.repeat(mel_spectrogram_db, 3, axis=-1)
return np.expand_dims(mel_spectrogram_db, axis=0)
# === Prediksi Chord ===
def predict_chords(audio_path):
if audio_path.endswith('.mp3'):
audio_path = convert_mp3_to_wav(audio_path)
y, sr = librosa.load(audio_path, sr=22050)
duration = librosa.get_duration(y=y, sr=sr)
chords = []
previous_chord = None
for i in range(0, int(duration)):
start_sample = i * sr
end_sample = (i + 1) * sr
y_segment = y[start_sample:end_sample]
if len(y_segment) == 0:
continue
mel_spectrogram = audio_to_mel_spectrogram(y_segment, sr)
prediction = model.predict(mel_spectrogram)
predicted_index = np.argmax(prediction)
predicted_chord = label_encoder.classes_[predicted_index]
predicted_chord = predicted_chord.replace('_', '')
if predicted_chord != previous_chord:
chords.append(predicted_chord)
previous_chord = predicted_chord
return f"Predicted Chords: {' - '.join(chords)}"
# === Gradio Interface ===
sample_audio_path = "example.mp3"
title = f"AI Chord Recognition"
description = """
<div class='description'>
Upload an audio file (<strong>.mp3</strong> or <strong>.wav</strong>) and let the AI predict the chord progression.
You can also try my sample audio.
</div>
"""
# Tutorial penggunaan
tutorial = """
## Tutorial Penggunaan
1. Masukkan audio kamu atau kamu bisa merekam sendiri.
2. Gunakan file audio sample yang sudah kami sediakan jika butuh.
3. Jalankan dengan memencet tombol **"Predict Chords"**.
4. Enjoy your song!
"""
with gr.Blocks(css=css) as interface:
gr.Markdown("<link rel='stylesheet' href='style.css'>") # Menyertakan file CSS eksternal
gr.Markdown(f"<h1 style='text-align: center;'>{title}</h1>")
gr.Markdown("<h3 style='text-align: center;'>Hello! I'm Tigor Neilson Sinaga, with NIM 22.11.4725.<br>Welcome to my AI Chord Recognition project!</h3>")
gr.Markdown(f"<h3 style='text-align: center;'>{description}</p>")
gr.Markdown(f"<h3 style='text-align: center;'>{tutorial}</h3>") # Menambahkan tutorial penggunaan
audio_input = gr.Audio(type="filepath", label="Upload Audio (MP3/WAV)")
use_sample_button = gr.Button("Use Sample Audio", size="sm")
predict_button = gr.Button("Predict Chords")
output_text = gr.Textbox(label="Predicted Chords", lines=5, placeholder="Chord predictions will appear here...")
use_sample_button.click(fn=lambda: sample_audio_path, inputs=[], outputs=audio_input)
predict_button.click(fn=predict_chords, inputs=audio_input, outputs=output_text, queue=True)
gr.Markdown("<p style='text-align: center; font-size: 12px; color: grey;'>This project is still under development and has not yet reached high accuracy.</p>")
interface.launch(share=True)