Hetan07 commited on
Commit
9100bbc
1 Parent(s): dc45e5e

Final changes

Browse files
Files changed (3) hide show
  1. app.py +126 -136
  2. audio_splitting.py +5 -20
  3. feature_extraction.py +11 -3
app.py CHANGED
@@ -3,153 +3,143 @@ import joblib
3
  import pandas as pd
4
  import numpy as np
5
  import xgboost
6
- # from sklearn.ensemble import GradientBoostingClassifier
7
- # from tensorflow.keras.models import load_model
8
- import tensorflow
9
- from keras.losses import binary_crossentropy
10
- from keras.optimizers import Adam
11
- from tensorflow import keras
12
- from keras.models import load_model
13
  # Local Imports
14
  import feature_extraction
15
  import audio_splitting
16
 
17
- # Create a Streamlit web app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  st.title("Music Genre Classifier")
19
  st.write("A single-label music genre classifier based and trained on the GTZAN Dataset available for use on "
20
  "Kaggle. All the models have been trained on that dataset.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Upload music file
22
- uploaded_file = st.file_uploader("Upload a music file", type=["mp3", "wav"])
 
23
 
 
24
  if uploaded_file is not None:
25
  # User selects a model
26
- all_models = ["K-Nearest Neighbors - (Single Label)", "Logistic Regression - (Single Label)", "Support Vector Machines - (Single Label)",
27
- "Neural Network - (Single Label)",
28
- "XGB Classifier - (Single Label)"]
29
- model_name = st.selectbox("Select a model", all_models)
30
- st.write(f"Predicition of following genres")
31
-
32
- multi_class_names = ["Metal", "Jazz", "Blues", "R&B", "Classical", "Reggae", "Rap & Hip-Hop", "Punk", "Rock",
33
- "Country", "Bebop", "Pop", "Soul", "Dance & Electronic", "Folk"]
34
-
35
- class_names = ["Blues", "Classical", "Country", "Disco", "HipHop",
36
- "Jazz", "Metal", "Pop", "Reggae", "Rock"]
37
-
38
- col1, col2 = st.columns(2)
39
- s = ''
40
- with col1:
41
- for i in class_names[:5]:
42
- s += "- " + i + "\n"
43
- st.markdown(s)
44
-
45
- s = ''
46
-
47
- with col2:
48
- for i in class_names[5:]:
49
- s += "- " + i + "\n"
50
- st.markdown(s)
51
- # st.write(multi_class_names)
52
-
53
- # Load the selected model
54
- if model_name == "K-Nearest Neighbors - (Single Label)":
55
- model = joblib.load("./models/knn.pkl")
56
- elif model_name == "Logistic Regression - (Single Label)":
57
- model = joblib.load("./models/logistic.pkl")
58
- elif model_name == "Support Vector Machines - (Single Label)":
59
- model = joblib.load("./models/svm.pkl")
60
- elif model_name == "Neural Network - (Single Label)":
61
- model = joblib.load("./models/nn.pkl")
62
- elif model_name == "XGB Classifier - (Single Label)":
63
- model = joblib.load("./models/xgb.pkl")
64
- elif model_name == "XGB - (Multi Label)":
65
- model = joblib.load("./models/xgb_mlb.pkl")
66
- elif model_name == "Convolutional Recurrent Neural Network - (Multi Label)":
67
- model = tensorflow.keras.models.load_model("../models/model_crnn1.h5", compile=False)
68
- model.compile(loss=binary_crossentropy,
69
- optimizer=Adam(),
70
- metrics=['accuracy'])
71
- elif model_name == "Neural Network - (Multi Label)":
72
- model = tensorflow.keras.models.load_model("../models/model_nn.h5", compile=False)
73
- model.compile(loss=binary_crossentropy,
74
- optimizer=Adam(),
75
- metrics=['accuracy'])
76
- elif model_name == "Batch Normalization - (Multi Label)":
77
- model = tensorflow.keras.models.load_model("../models/model_bn.h5", compile=False)
78
- model.compile(loss=binary_crossentropy,
79
- optimizer=Adam(),
80
- metrics=['accuracy'])
81
- # class_names = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
82
-
83
- xgb_multi_class_names = ["Rock", "Rap & Hip-Hop", "Soul", "Classical", "Dance & Electronic", "Blues","Jazz",
84
- "Country","Bebop","Folk","Reggae","R&B","Punk","Metal","Pop"]
85
-
86
- xmulti_class_names = ["Metal", "Blues", "Reggae", "Jazz", "Rock", "Folk", "Classical", "Dance & Electronic",
87
- "Punk","Bebop", "Pop", "R&B", "Country", "Rap & Hip-Hop", "Soul"]
88
- class_indices = {i: class_name for i, class_name in enumerate(class_names)}
89
-
90
- features_list,val_list = audio_splitting.split_audio(uploaded_file)
91
- features = feature_extraction.scale(features_list)
92
 
93
- # st.write(features)
94
- # Features Dataframe
95
- df = pd.DataFrame({
96
- "fname": ["Chroma_STFT"],
97
- "Values": val_list
98
- })
99
- st.dataframe(
100
- df,
101
- column_config={
102
- "name": "Features",
103
- "Values": st.column_config.LineChartColumn(
104
- "Graph Values",y_min=0,y_max = 10000
105
- )
106
- }
107
- )
108
-
109
-
110
- # Reshape the features to match the expected shape for prediction
111
- reshaped_features = features.reshape(1, -1)
112
- if model_name == "XGB - (Multi Label)":
113
- # Predict labels for the input features
114
- predicted_indices = model.predict(reshaped_features)
115
- print(predicted_indices)
116
- predicted_labels = []
117
- for i in range(0,len(predicted_indices[0])):
118
- if predicted_indices[0][i]==1.0:
119
- predicted_labels.append(xgb_multi_class_names[i])
120
- if predicted_labels:
121
- st.write(f"Predicted Genres: {', '.join(predicted_labels)}")
122
- else:
123
- st.write("No genres predicted for this input.")
124
- if model_name == "XGB Classifier - (Single Label)":
125
- predicted_indices = model.predict(reshaped_features)
126
- predicted_labels = [class_indices[i] for i in predicted_indices]
127
- st.write(f"Predicted Genre: {predicted_labels[0]}")
128
- elif model_name == "Convolutional Recurrent Neural Network - (Multi Label)"\
129
- or model_name == "Neural Network - (Multi Label)"\
130
- or model_name == "Batch Normalization - (Multi Label)":
131
- predicted_probabilities = model.predict(reshaped_features)
132
-
133
- # Set a threshold for class prediction (e.g., 0.5)
134
- threshold = 0.3
135
- print(predicted_probabilities)
136
- probabilities = []
137
- if model_name == "Convolutional Recurrent Neural Network - (Multi Label)":
138
- predicted_labels = [class_name for i, class_name in enumerate(multi_class_names) if
139
- predicted_probabilities[0][i] >= threshold]
140
- probabilities = [(class_name,predicted_probabilities[0][i]*100) for i, class_name in enumerate(multi_class_names)]
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  else:
143
- predicted_labels = [class_name for i,class_name in enumerate(xmulti_class_names) if
144
- predicted_probabilities[0][i] >= threshold]
145
- probabilities = [(class_name,predicted_probabilities[0][i]*100) for i, class_name in enumerate(xmulti_class_names)]
146
-
147
- if predicted_labels:
148
- st.write(f"All probabilities are:")
149
- st.write(probabilities)
150
- st.write(f"Predicted Genres: {', '.join(predicted_labels)}")
151
- else:
152
- st.write("No genre predicted above the threshold.")
153
- else:
154
- predicted_label = model.predict(features)[0]
155
- st.metric("Predicted Genre:",str(predicted_label))
 
3
  import pandas as pd
4
  import numpy as np
5
  import xgboost
6
+
 
 
 
 
 
 
7
  # Local Imports
8
  import feature_extraction
9
  import audio_splitting
10
 
11
+ st.set_page_config(layout="wide")
12
+ # Vars
13
+ fields_df = ['Chromagram Short-Time Fourier Transform (Chroma-STFT)',
14
+ 'Root Mean Square Energy (RMS)',
15
+ 'Spectral Centroid',
16
+ 'Spectral Bandwidth',
17
+ 'Spectral Rolloff',
18
+ 'Zero Crossing Rate',
19
+ 'Harmony',
20
+ 'Percussion',
21
+ 'Tempo',
22
+ 'Mel-Frequency Cepstral Coefficients (MFCC-1)',
23
+ 'MFCC-2',
24
+ 'MFCC-3',
25
+ 'MFCC-4',
26
+ 'MFCC-5',
27
+ 'MFCC-6',
28
+ 'MFCC-7',
29
+ 'MFCC-8',
30
+ 'MFCC-9',
31
+ 'MFCC-10',
32
+ 'MFCC-11',
33
+ 'MFCC-12',
34
+ 'MFCC-13',
35
+ 'MFCC-14',
36
+ 'MFCC-15',
37
+ 'MFCC-16',
38
+ 'MFCC-17',
39
+ 'MFCC-18',
40
+ 'MFCC-19',
41
+ 'MFCC-20', ]
42
+
43
  st.title("Music Genre Classifier")
44
  st.write("A single-label music genre classifier based and trained on the GTZAN Dataset available for use on "
45
  "Kaggle. All the models have been trained on that dataset.")
46
+
47
+ st.write("Prediction of following genres")
48
+
49
+ class_names = ["Blues", "Classical", "Country", "Disco", "HipHop",
50
+ "Jazz", "Metal", "Pop", "Reggae", "Rock"]
51
+
52
+ class_indices = {i: class_name for i, class_name in enumerate(class_names)}
53
+
54
+ col1, col2 = st.columns(2)
55
+ s = ''
56
+ with col1:
57
+ for i in class_names[:5]:
58
+ s += "- " + i + "\n"
59
+ st.markdown(s)
60
+
61
+ s = ''
62
+
63
+ with col2:
64
+ for i in class_names[5:]:
65
+ s += "- " + i + "\n"
66
+ st.markdown(s)
67
+
68
+ st.divider()
69
  # Upload music file
70
+ st.subheader("Upload a music file")
71
+ uploaded_file = st.file_uploader("Upload a music file", type=["mp3", "wav", "ogg"], label_visibility="collapsed")
72
 
73
+ st.divider()
74
  if uploaded_file is not None:
75
  # User selects a model
76
+ all_models = ["K-Nearest Neighbors",
77
+ "Logistic Regression",
78
+ "Support Vector Machines",
79
+ "Neural Network",
80
+ "XGB Classifier"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ features_list, val_list = audio_splitting.split_audio(uploaded_file)
83
+ features = feature_extraction.scale(features_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ feature_copy = features_list
86
+ feature_copy.insert(19, "-")
87
+ st.header("Feature Extraction")
88
+
89
+ st.write("The given audio sample is processed using the librosa library to get the features extracted used by the "
90
+ "models for genre prediction. Following is the dataframe with each of the feature extracted and "
91
+ "corresponding mean and variance of the feature")
92
+
93
+ col3, col4 = st.columns([0.6,0.4])
94
+ with col3:
95
+
96
+ # Features Dataframe
97
+ df = pd.DataFrame({
98
+ "name": fields_df,
99
+ "Mean": feature_copy[2::2],
100
+ "Variance": feature_copy[3::2]
101
+ })
102
+
103
+ st.dataframe(
104
+ df,
105
+ column_config={
106
+ "name": "Features",
107
+ "Mean": "Mean of Feature",
108
+ "Variance": "Variance of Feature"
109
+ },
110
+ use_container_width=True
111
+ )
112
+
113
+ with col4:
114
+
115
+ col1, col2 = st.columns([0.55, 0.45])
116
+
117
+ col1.subheader("Select a model")
118
+ with col1:
119
+ model_name = st.selectbox("Select a model", all_models, label_visibility="collapsed")
120
+
121
+ # Load the selected model
122
+ if model_name == "K-Nearest Neighbors":
123
+ model = joblib.load("./models/knn.pkl")
124
+ elif model_name == "Logistic Regression":
125
+ model = joblib.load("./models/logistic.pkl")
126
+ elif model_name == "Support Vector Machines":
127
+ model = joblib.load("./models/svm.pkl")
128
+ elif model_name == "Neural Network":
129
+ model = joblib.load("./models/nn.pkl")
130
+ elif model_name == "XGB Classifier":
131
+ model = joblib.load("./models/xgb.pkl")
132
+ col2.subheader("Predicted genre")
133
+
134
+ # Reshape the features to match the expected shape for prediction
135
+ reshaped_features = features.reshape(1, -1)
136
+
137
+ if model_name == "XGB Classifier":
138
+ predicted_indices = model.predict(reshaped_features)
139
+ predicted_labels = [class_indices[i] for i in predicted_indices]
140
+ with col2:
141
+ st.metric("Predicted Genre:", str(predicted_labels[0]), label_visibility="collapsed")
142
  else:
143
+ predicted_label = model.predict(features)[0]
144
+ with col2:
145
+ st.metric("Predicted Genre:", str(predicted_label).capitalize(), label_visibility="collapsed")
 
 
 
 
 
 
 
 
 
 
audio_splitting.py CHANGED
@@ -1,41 +1,26 @@
1
- import pydub
2
- import streamlit
3
  from pydub import AudioSegment
4
  import feature_extraction
5
  import io
6
  def split_audio(uploaded_file):
7
- # Load your audio file
8
- # audio = AudioSegment.from_file("classical.00000.wav", format="wav")
9
  audio = AudioSegment.from_file(uploaded_file)
10
- print("Works")
11
- # Define the duration of each segment in milliseconds (3 seconds)
12
- segment_duration = 3 * 1000 # 3 seconds in milliseconds
13
 
14
- # Check the total duration of the audio
15
  audio_duration = len(audio)
16
 
17
- print("works")
18
  # Check if the audio is shorter than 1 minute and 3 seconds
19
  if audio_duration < 63 * 1000:
20
  # If it's shorter, take audio from 0 to 3 seconds
21
  segment = audio[:segment_duration]
22
  else:
23
  # If it's longer, take audio from 1 minute to 1 minute 3 seconds
24
- start_time = 60 * 1000 # 1 minute in milliseconds
25
  end_time = start_time + segment_duration
26
  segment = audio[start_time:end_time]
 
27
  output_stream = io.BytesIO()
28
  segment.export(output_stream, format="wav")
29
- print("Works")
30
- # Now you can directly use the output_stream for feature extraction
31
- output_stream.seek(0) # Reset the stream position to the beginning
32
 
33
  # Process and extract features from the segment
34
  features = feature_extraction.all_feature_extraction(output_stream)
35
- print(features)
36
- streamlit.write(features)
37
- return features
38
- # output_file = "D:/miniproject/output_segment.wav"
39
-
40
- # Save the segment to a new file
41
- # segment.export(output_file, format="wav")
 
 
 
1
  from pydub import AudioSegment
2
  import feature_extraction
3
  import io
4
  def split_audio(uploaded_file):
 
 
5
  audio = AudioSegment.from_file(uploaded_file)
 
 
 
6
 
7
+ segment_duration = 3 * 1000 # 3 seconds in milliseconds
8
  audio_duration = len(audio)
9
 
 
10
  # Check if the audio is shorter than 1 minute and 3 seconds
11
  if audio_duration < 63 * 1000:
12
  # If it's shorter, take audio from 0 to 3 seconds
13
  segment = audio[:segment_duration]
14
  else:
15
  # If it's longer, take audio from 1 minute to 1 minute 3 seconds
16
+ start_time = 60 * 1000
17
  end_time = start_time + segment_duration
18
  segment = audio[start_time:end_time]
19
+
20
  output_stream = io.BytesIO()
21
  segment.export(output_stream, format="wav")
22
+ output_stream.seek(0)
 
 
23
 
24
  # Process and extract features from the segment
25
  features = feature_extraction.all_feature_extraction(output_stream)
26
+ return features
 
 
 
 
 
 
feature_extraction.py CHANGED
@@ -32,11 +32,17 @@ short_field = Fields[2:]
32
  def all_feature_extraction(audio_path, sample_rate=22050):
33
  data_list = []
34
  val_field = []
35
-
36
- audio_df, sr = librosa.load(audio_path, sr=22050)
 
 
 
 
 
 
37
  data_list.append(audio_path)
38
  data_list.append(len(audio_df))
39
-
40
 
41
  # 1. Chroma STFT
42
  chroma_stft = librosa.feature.chroma_stft(y=audio_df, hop_length=512)
@@ -47,6 +53,8 @@ def all_feature_extraction(audio_path, sample_rate=22050):
47
  data_list.append(chroma_stft_mean)
48
  data_list.append(chroma_stft_var)
49
 
 
 
50
  # 2. RMS
51
  rms = librosa.feature.rms(y=audio_df)
52
  rms_mean = np.mean(rms)
 
32
  def all_feature_extraction(audio_path, sample_rate=22050):
33
  data_list = []
34
  val_field = []
35
+ print(data_list)
36
+ try:
37
+ audio_df, sr = sf.read(audio_path)
38
+ print("Audio loaded successfully.")
39
+ print("Shape of audio data:", audio_df.shape)
40
+ print("Sample rate:", sr)
41
+ except Exception as e:
42
+ print("Error loading audio file:", e)
43
  data_list.append(audio_path)
44
  data_list.append(len(audio_df))
45
+ print(data_list)
46
 
47
  # 1. Chroma STFT
48
  chroma_stft = librosa.feature.chroma_stft(y=audio_df, hop_length=512)
 
53
  data_list.append(chroma_stft_mean)
54
  data_list.append(chroma_stft_var)
55
 
56
+ print(data_list,val_field)
57
+
58
  # 2. RMS
59
  rms = librosa.feature.rms(y=audio_df)
60
  rms_mean = np.mean(rms)