tigorsinaga's picture
Create app.py
e8e46ca verified
raw
history blame
4.04 kB
import gradio as gr
import librosa
import numpy as np
import tensorflow as tf
import pickle
from pydub import AudioSegment
import os
# === Load Model dan Label Encoder ===
model = tf.keras.models.load_model('/content/drive/MyDrive/AI CHORD RECOGNITION/Final/final_model.h5')
with open('/content/drive/MyDrive/AI CHORD RECOGNITION/Final/label_chord.pkl', 'rb') as f:
label_encoder = pickle.load(f)
# === Konversi MP3 ke WAV ===
def convert_mp3_to_wav(mp3_path):
sound = AudioSegment.from_mp3(mp3_path)
wav_path = mp3_path.replace('.mp3', '.wav')
sound.export(wav_path, format="wav")
return wav_path
# === Konversi Audio ke Mel Spectrogram ===
def audio_to_mel_spectrogram(y, sr):
y = librosa.util.normalize(y)
mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
mel_spectrogram_db = (mel_spectrogram_db + 80) / 80 # Normalisasi ke 0-1
mel_spectrogram_db = tf.image.resize(mel_spectrogram_db[..., np.newaxis], (128, 128)).numpy()
mel_spectrogram_db = np.repeat(mel_spectrogram_db, 3, axis=-1)
return np.expand_dims(mel_spectrogram_db, axis=0)
# === Prediksi Chord ===
def predict_chords(audio_path):
if audio_path.endswith('.mp3'):
audio_path = convert_mp3_to_wav(audio_path)
y, sr = librosa.load(audio_path, sr=22050)
duration = librosa.get_duration(y=y, sr=sr)
chords = []
previous_chord = None
for i in range(0, int(duration)):
start_sample = i * sr
end_sample = (i + 1) * sr
y_segment = y[start_sample:end_sample]
if len(y_segment) == 0:
continue
mel_spectrogram = audio_to_mel_spectrogram(y_segment, sr)
prediction = model.predict(mel_spectrogram)
predicted_index = np.argmax(prediction)
predicted_chord = label_encoder.classes_[predicted_index]
predicted_chord = predicted_chord.replace('_', '')
if predicted_chord != previous_chord:
chords.append(predicted_chord)
previous_chord = predicted_chord
return f"Predicted Chords: {' - '.join(chords)}"
# === Gradio Interface ===
css = """
body {
font-family: sans-serif;
}
.gradio-container {
border-radius: 10px;
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
padding: 20px;
}
.gr-button {
background-color: #4CAF50;
color: white;
border: none;
padding: 6px 12px;
font-size: 14px;
cursor: pointer;
border-radius: 5px;
}
.gr-button:hover {
background-color: #4a894c;
}
.description {
margin-top: 40px;
margin-bottom:-10px;
}
"""
sample_audio_path = "/content/drive/MyDrive/AI CHORD RECOGNITION/Final/example.mp3"
title = f"AI Chord Recognition"
description = """
<div class='description'>
Upload an audio file (<strong>.mp3</strong> or <strong>.wav</strong>) and let the AI predict the chord progression.
You can also try my sample audio.
</div>
"""
with gr.Blocks(css=css) as interface:
gr.Markdown(f"<h1 style='text-align: center;'>{title}</h1>")
gr.Markdown("<h3 style='text-align: center;'>Hello! I'm Tigor Neilson Sinaga, with NIM 22.11.4725.<br>Welcome to my AI Chord Recognition project!</h3>")
gr.Markdown(f"<h3 style='text-align: center;'>{description}</p>")
audio_input = gr.Audio(type="filepath", label="Upload Audio (MP3/WAV)")
use_sample_button = gr.Button("Use Sample Audio", size="sm")
predict_button = gr.Button("Predict Chords") # Changed line
output_text = gr.Textbox(label="Predicted Chords", lines=5, placeholder="Chord predictions will appear here...")
use_sample_button.click(fn=lambda: sample_audio_path, inputs=[], outputs=audio_input)
predict_button.click(fn=predict_chords, inputs=audio_input, outputs=output_text, queue=True)
gr.Markdown("<p style='text-align: center; font-size: 12px; color: grey;'>This project is still under development and has not yet reached high accuracy.</p>")
interface.launch(share=True)