Spaces:
Sleeping
Sleeping
# AUTOGENERATED! DO NOT EDIT! | |
# %% auto 0 | |
__all__ = ['learn', 'categories', 'audio', 'label', 'inf', 'extract_emotion', 'get_y', 'classify_audio'] | |
from fastai.vision.all import * | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import librosa | |
import librosa.display | |
from pathlib import Path | |
import os | |
def extract_emotion(file_name: str) -> str: | |
""" | |
Given the name of the file, return the label | |
indicating the emotion associated with the audio. | |
""" | |
# Split the filename at each underscore | |
parts = file_name.split('_') | |
# Label is after second | |
label_with_extension = parts[-1] | |
# Remove the extension to get only the label | |
label = label_with_extension[:-4] | |
return label | |
def get_y(filepath): return extract_emotion(str(filepath).split("/")[-1]) | |
# Load Learner | |
learn = load_learner("emotion_model.pkl") | |
categories = learn.dls.vocab | |
def classify_audio(audio_file): | |
""" | |
Takes the audio file and returns its | |
prediction of emotions along with probabilities. | |
""" | |
# Load the audio file | |
sample, sample_rate = librosa.load(audio_file, sr=None, duration=20) | |
# Create spectogram | |
S = librosa.feature.melspectrogram(y=sample, sr=sample_rate) | |
S_DB = librosa.power_to_db(S, ref=np.max) | |
# Prepare the figure for saving the spectrogram | |
fig, ax = plt.subplots() | |
fig.tight_layout(pad=0) | |
# Create the spectogram image | |
img = librosa.display.specshow(S_DB, sr=sample_rate, x_axis='time', | |
y_axis='mel', ax=ax) | |
# Turn off the axis for saving | |
plt.axis('off') | |
# Save the spectogram temporarily | |
temp_img_path = Path("temp_spectogram.png") | |
plt.savefig(temp_img_path) | |
pred,idx, probs = learn.predict(temp_img_path) | |
# Remove the temporary spectogram image | |
os.remove(temp_img_path) | |
return dict(zip(categories, map(float, probs))) | |
description = """ | |
## Welcome to the app that recognizes emotion from the audio! Upload/record your audio (no more than 20 seconds) and see the model prediction of the emotions. | |
## 7 Emotions the app recognizes: Anger, Disgust, Fear, Happiness, Pleasant Surprise (ps), Sadness, Neutral | |
""" | |
audio = gr.Audio(type="filepath", label="Upload Audio") | |
label = gr.Label() | |
# Gradio Interface | |
inf = gr.Interface(fn=classify_audio, inputs=audio, outputs=label, title="Emotion Recognition", description=description) | |
inf.launch(share=True) | |