Added the modules and the required models
Browse files- audio_splitting.py +37 -0
- feature_extraction.py +110 -0
- models/bn.pkl +3 -0
- models/cnn.pkl +3 -0
- models/crnn.pkl +3 -0
- models/knn.pkl +3 -0
- models/logistic.pkl +3 -0
- models/model_bn.h5 +3 -0
- models/model_cnn.h5 +3 -0
- models/model_crnn.h5 +3 -0
- models/model_crnn1.h5 +3 -0
- models/model_nn.h5 +3 -0
- models/nn.pkl +3 -0
- models/nn1.pkl +3 -0
- models/scaler.pkl +3 -0
- models/std_scalar +0 -0
- models/std_scaler(1).pkl +3 -0
- models/svm.pkl +3 -0
- models/xgb.pkl +3 -0
- models/xgb_mlb.pkl +3 -0
audio_splitting.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pydub
|
2 |
+
from pydub import AudioSegment
|
3 |
+
import feature_extraction
|
4 |
+
import io
|
5 |
+
def split_audio(uploaded_file):
|
6 |
+
# Load your audio file
|
7 |
+
# audio = AudioSegment.from_file("classical.00000.wav", format="wav")
|
8 |
+
audio = AudioSegment.from_file(uploaded_file,)
|
9 |
+
# Define the duration of each segment in milliseconds (3 seconds)
|
10 |
+
segment_duration = 3 * 1000 # 3 seconds in milliseconds
|
11 |
+
|
12 |
+
# Check the total duration of the audio
|
13 |
+
audio_duration = len(audio)
|
14 |
+
|
15 |
+
# Check if the audio is shorter than 1 minute and 3 seconds
|
16 |
+
if audio_duration < 63 * 1000:
|
17 |
+
# If it's shorter, take audio from 0 to 3 seconds
|
18 |
+
segment = audio[:segment_duration]
|
19 |
+
else:
|
20 |
+
# If it's longer, take audio from 1 minute to 1 minute 3 seconds
|
21 |
+
start_time = 60 * 1000 # 1 minute in milliseconds
|
22 |
+
end_time = start_time + segment_duration
|
23 |
+
segment = audio[start_time:end_time]
|
24 |
+
output_stream = io.BytesIO()
|
25 |
+
segment.export(output_stream, format="wav")
|
26 |
+
|
27 |
+
# Now you can directly use the output_stream for feature extraction
|
28 |
+
output_stream.seek(0) # Reset the stream position to the beginning
|
29 |
+
|
30 |
+
# Process and extract features from the segment
|
31 |
+
features = feature_extraction.all_feature_extraction(output_stream)
|
32 |
+
|
33 |
+
return features
|
34 |
+
# output_file = "D:/miniproject/output_segment.wav"
|
35 |
+
|
36 |
+
# Save the segment to a new file
|
37 |
+
# segment.export(output_file, format="wav")
|
feature_extraction.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import librosa
|
2 |
+
import numpy as np
|
3 |
+
import joblib
|
4 |
+
import soundfile as sf
|
5 |
+
scaler = joblib.load("./models/std_scaler(1).pkl")
|
6 |
+
|
7 |
+
|
8 |
+
def load_audio_from_uploaded_file(uploaded_file):
|
9 |
+
# Use the soundfile library to read the audio data and sample rate
|
10 |
+
audio_data, sample_rate = sf.read(uploaded_file)
|
11 |
+
|
12 |
+
return audio_data, sample_rate
|
13 |
+
|
14 |
+
|
15 |
+
# sample_audio,sr = librosa.load(r"classical.00000.wav",sr = 44100)
|
16 |
+
Fields = ['name', 'length', 'chroma_stft_mean', 'chroma_stft_var', 'rms_mean', 'rms_var',
|
17 |
+
'spectral_centroid_mean', 'spectral_centroid_var', 'spectral_bandwidth_mean', 'spectral_bandwidth_var',
|
18 |
+
'rolloff_mean', 'rolloff_var', 'zero_crossing_rate_mean', 'zero_crossing_rate_var',
|
19 |
+
'harmony_mean', 'harmony_var', 'percussive_mean', 'percussive_var', 'tempo',
|
20 |
+
'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var', 'mfcc4_mean', 'mfcc4_var',
|
21 |
+
'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean', 'mfcc8_var',
|
22 |
+
'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean',
|
23 |
+
'mfcc12_var',
|
24 |
+
'mfcc13_mean', 'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var', 'mfcc16_mean',
|
25 |
+
'mfcc16_var',
|
26 |
+
'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean', 'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean',
|
27 |
+
'mfcc20_var']
|
28 |
+
|
29 |
+
short_field = Fields[2:]
|
30 |
+
|
31 |
+
|
32 |
+
def all_feature_extraction(audio_path, sample_rate=22050):
|
33 |
+
data_list = []
|
34 |
+
audio_df, sr = librosa.load(audio_path, sr=22050)
|
35 |
+
data_list.append(audio_path)
|
36 |
+
data_list.append(len(audio_df))
|
37 |
+
# 1. Chroma STFT
|
38 |
+
chroma_stft = librosa.feature.chroma_stft(y=audio_df, hop_length=512)
|
39 |
+
chroma_stft_mean = np.mean(chroma_stft)
|
40 |
+
chroma_stft_var = np.var(chroma_stft)
|
41 |
+
data_list.append(chroma_stft_mean)
|
42 |
+
data_list.append(chroma_stft_var)
|
43 |
+
|
44 |
+
# 2. RMS
|
45 |
+
rms = librosa.feature.rms(y=audio_df)
|
46 |
+
rms_mean = np.mean(rms)
|
47 |
+
rms_var = np.var(rms)
|
48 |
+
data_list.append(rms_mean)
|
49 |
+
data_list.append(rms_var)
|
50 |
+
|
51 |
+
spectral_centroid = librosa.feature.spectral_centroid(y=audio_df)
|
52 |
+
spectral_centroid_mean = np.mean(spectral_centroid)
|
53 |
+
spectral_centroid_var = np.var(spectral_centroid)
|
54 |
+
data_list.append(spectral_centroid_mean)
|
55 |
+
data_list.append(spectral_centroid_var)
|
56 |
+
|
57 |
+
spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio_df)
|
58 |
+
spectral_bandwidth_mean = np.mean(spectral_bandwidth)
|
59 |
+
spectral_bandwidth_var = np.var(spectral_bandwidth)
|
60 |
+
data_list.append(spectral_bandwidth_mean)
|
61 |
+
data_list.append(spectral_bandwidth_var)
|
62 |
+
|
63 |
+
spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_df)
|
64 |
+
spectral_rolloff_mean = np.mean(spectral_rolloff)
|
65 |
+
spectral_rolloff_var = np.var(spectral_rolloff)
|
66 |
+
data_list.append(spectral_rolloff_mean)
|
67 |
+
data_list.append(spectral_rolloff_var)
|
68 |
+
|
69 |
+
zcr = librosa.feature.zero_crossing_rate(y=audio_df)
|
70 |
+
zcr_mean = np.mean(zcr)
|
71 |
+
zcr_var = np.var(zcr)
|
72 |
+
data_list.append(zcr_mean)
|
73 |
+
data_list.append(zcr_var)
|
74 |
+
|
75 |
+
harmonic, percussive = librosa.effects.hpss(y=audio_df)
|
76 |
+
harmonic_mean = np.mean(harmonic)
|
77 |
+
harmonic_var = np.var(harmonic)
|
78 |
+
percussive_mean = np.mean(percussive)
|
79 |
+
percussive_var = np.var(percussive)
|
80 |
+
data_list.append(harmonic_mean)
|
81 |
+
data_list.append(harmonic_var)
|
82 |
+
data_list.append(percussive_mean)
|
83 |
+
data_list.append(percussive_var)
|
84 |
+
|
85 |
+
tempo = librosa.feature.tempo(y=audio_df)
|
86 |
+
tempo = np.mean(tempo)
|
87 |
+
data_list.append(tempo)
|
88 |
+
mfccs = librosa.feature.mfcc(y=audio_df, sr=sr)
|
89 |
+
row_means = np.mean(mfccs, axis=1)
|
90 |
+
row_vars = np.var(mfccs, axis=1)
|
91 |
+
mfcc_means = {}
|
92 |
+
mfcc_vars = {}
|
93 |
+
for i in range(1, 21):
|
94 |
+
variable_name = f'mfcc{i}'
|
95 |
+
mfcc_means[variable_name] = row_means[i - 1] # You can initialize with values if needed
|
96 |
+
mfcc_vars[variable_name] = row_vars[i - 1]
|
97 |
+
# Convert the dictionary values to a list
|
98 |
+
mfcc_list = [value for value in zip(mfcc_means.values(), mfcc_vars.values())]
|
99 |
+
|
100 |
+
for mean, var in mfcc_list:
|
101 |
+
data_list.append(mean)
|
102 |
+
data_list.append(var)
|
103 |
+
return data_list
|
104 |
+
|
105 |
+
def scale(initial_features):
|
106 |
+
final_features = initial_features[2:]
|
107 |
+
final_features = np.array(final_features)
|
108 |
+
# Apply the loaded scaler to your single data point
|
109 |
+
scaled_data_point = scaler.transform([final_features])
|
110 |
+
return scaled_data_point
|
models/bn.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f123d228b0fc0654571fbfd957e23526cb380a9e909cf00fb2100dd52c573ead
|
3 |
+
size 2555061
|
models/cnn.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59d38f07cf80f7ed39a07f1d1652e38e3263519eb9bea309101502b45819e08a
|
3 |
+
size 182963
|
models/crnn.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04ee711027be2cae7385a07146efcecc056493f730a120dfe450ff4b6bdee2d2
|
3 |
+
size 342493
|
models/knn.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1ba96d036e23d7f20893202259a909569fae16c5004e3522c041f1e47a8eb8e
|
3 |
+
size 3245884
|
models/logistic.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9071b17bf3b8d130b8842b7db22fafe9d2f7895ada07e4ba587c4ba877f276bb
|
3 |
+
size 5783
|
models/model_bn.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:121c293728854ab08cb35a7b8086cdaa12ab7f54fbafc3a35fa26950705d84a8
|
3 |
+
size 2589340
|
models/model_cnn.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95d805c96c78b1c7c0394596860358a0024c3bbffda602e0e8eda33f0137f936
|
3 |
+
size 183464
|
models/model_crnn.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f09ea12931f67460b36814879aaab95d8946ff1b4a2f92190af33c97f029ac8
|
3 |
+
size 347624
|
models/model_crnn1.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55278fc28a3359c295b1a1d4000dfb1c8c61caaf2cfaa622fea938b702d99022
|
3 |
+
size 351720
|
models/model_nn.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b05ae23e3fd3ccc5393667aafec862a4dae1f382bb998cacb65446099ee9cf3
|
3 |
+
size 2515076
|
models/nn.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9bb18ab39efcbdf61b20fb32107fa7a2e380f4a999ef0c2f5a25bc178b5448d
|
3 |
+
size 2726129
|
models/nn1.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6ea85608e89d3b6f9dde8330c92ba7d225e51f5591d50e3bd90231b0d05771c
|
3 |
+
size 2496636
|
models/scaler.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:759ba8b2d1b3c34529392efc7d0729b24f98d4f7677197b516c140ac79802f2c
|
3 |
+
size 3311
|
models/std_scalar
ADDED
Binary file (3.31 kB). View file
|
|
models/std_scaler(1).pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f12102947c64acba9e9b8c6c8b89f455282cbc22a23565bfe2e5c4c224fa4350
|
3 |
+
size 3311
|
models/svm.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f280eacc31d08f200f5e2d85720a06d010223a8f098faf0641ee39948f13f49
|
3 |
+
size 3059947
|
models/xgb.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7aa82bd2754da1bd9775f424ef6007dc6bffcf335492e6869edc58646f1bb52e
|
3 |
+
size 18275161
|
models/xgb_mlb.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db529971c3f3e1d319502be2ec9015c3dc21a45b55bc800e26238295a3d502be
|
3 |
+
size 4584698
|