v0.2.0-beta
Browse files- app.py +2 -2
- app_test.ipynb +1 -1
- cqfe_utils.py +14 -9
- pyproject.toml +1 -1
app.py
CHANGED
@@ -4,7 +4,7 @@ cqfe_interface = gr.Interface(fn=cqfe,
|
|
4 |
inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
|
5 |
outputs=[gr.File(type='file', label='F0 Output Files'),
|
6 |
gr.Plot(label='F0 Estimation Plot')],
|
7 |
-
title="Choral Quartets F0 Extractor (v0.
|
8 |
description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
|
9 |
|
10 |
-
cqfe_interface.launch()
|
|
|
4 |
inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
|
5 |
outputs=[gr.File(type='file', label='F0 Output Files'),
|
6 |
gr.Plot(label='F0 Estimation Plot')],
|
7 |
+
title="Choral Quartets F0 Extractor (v0.2.0-beta)",
|
8 |
description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
|
9 |
|
10 |
+
cqfe_interface.launch()
|
app_test.ipynb
CHANGED
@@ -12,7 +12,7 @@
|
|
12 |
" inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
|
13 |
" outputs=[gr.File(type='file', label='F0 Output Files'),\n",
|
14 |
" gr.Plot(label='F0 Estimation Plot')],\n",
|
15 |
-
" title=\"Choral Quartets F0 Extractor (v0.
|
16 |
" description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
|
17 |
"\n",
|
18 |
"cqfe_interface.launch()"
|
|
|
12 |
" inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
|
13 |
" outputs=[gr.File(type='file', label='F0 Output Files'),\n",
|
14 |
" gr.Plot(label='F0 Estimation Plot')],\n",
|
15 |
+
" title=\"Choral Quartets F0 Extractor (v0.2.0-beta)\",\n",
|
16 |
" description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
|
17 |
"\n",
|
18 |
"cqfe_interface.launch()"
|
cqfe_utils.py
CHANGED
@@ -8,6 +8,8 @@ import pandas as pd
|
|
8 |
from scipy.ndimage import gaussian_filter1d
|
9 |
from cqfe_models import mask_voas_cnn_v2_model, late_deep_cnn_model
|
10 |
|
|
|
|
|
11 |
############################################################
|
12 |
|
13 |
freqscale = librosa.cqt_frequencies(n_bins=360, fmin=32.7, bins_per_octave=60)
|
@@ -134,9 +136,9 @@ def song_to_midi(sop, alto, ten, bass):
|
|
134 |
freq_matrix = bin_matrix_to_freq(bin_matrix)
|
135 |
|
136 |
mid_sop = create_midi(freq_matrix[0], save_to_file=False, program=52, channel=0)
|
137 |
-
mid_alto = create_midi(freq_matrix[1], save_to_file=False, program=
|
138 |
-
mid_ten = create_midi(freq_matrix[2], save_to_file=False, program=
|
139 |
-
mid_bass = create_midi(freq_matrix[3], save_to_file=False, program=
|
140 |
|
141 |
mid_mix = mido.MidiFile()
|
142 |
mid_mix.ticks_per_beat=mid_sop.ticks_per_beat
|
@@ -165,11 +167,14 @@ def song_to_dataframe(sop, alto, ten, bass):
|
|
165 |
|
166 |
############################################################
|
167 |
|
168 |
-
def prediction_postproc(input_array, argmax_and_threshold=True,
|
|
|
|
|
169 |
prediction = np.moveaxis(input_array, 0, 1).reshape(360, -1)
|
|
|
170 |
if(argmax_and_threshold):
|
171 |
prediction = np.argmax(prediction, axis=0)
|
172 |
-
prediction = np.array([i if i
|
173 |
threshold = np.zeros((360, prediction.shape[0]))
|
174 |
threshold[prediction, np.arange(prediction.size)] = 1
|
175 |
prediction = threshold
|
@@ -273,10 +278,10 @@ def get_va_prediction(model, f0_matrix):
|
|
273 |
t_pred_result = np.append(t_pred_result, t_pred, axis=0)
|
274 |
b_pred_result = np.append(b_pred_result, b_pred, axis=0)
|
275 |
|
276 |
-
s_pred_result = prediction_postproc(s_pred_result)[:, :f0_matrix.shape[1]]
|
277 |
-
a_pred_result = prediction_postproc(a_pred_result)[:, :f0_matrix.shape[1]]
|
278 |
-
t_pred_result = prediction_postproc(t_pred_result)[:, :f0_matrix.shape[1]]
|
279 |
-
b_pred_result = prediction_postproc(b_pred_result)[:, :f0_matrix.shape[1]]
|
280 |
|
281 |
return s_pred_result, a_pred_result, t_pred_result, b_pred_result
|
282 |
|
|
|
8 |
from scipy.ndimage import gaussian_filter1d
|
9 |
from cqfe_models import mask_voas_cnn_v2_model, late_deep_cnn_model
|
10 |
|
11 |
+
SATB_THRESHOLDS = [0.23, 0.17, 0.15, 0.17]
|
12 |
+
|
13 |
############################################################
|
14 |
|
15 |
freqscale = librosa.cqt_frequencies(n_bins=360, fmin=32.7, bins_per_octave=60)
|
|
|
136 |
freq_matrix = bin_matrix_to_freq(bin_matrix)
|
137 |
|
138 |
mid_sop = create_midi(freq_matrix[0], save_to_file=False, program=52, channel=0)
|
139 |
+
mid_alto = create_midi(freq_matrix[1], save_to_file=False, program=52, channel=1)
|
140 |
+
mid_ten = create_midi(freq_matrix[2], save_to_file=False, program=42, channel=2)
|
141 |
+
mid_bass = create_midi(freq_matrix[3], save_to_file=False, program=52, channel=3)
|
142 |
|
143 |
mid_mix = mido.MidiFile()
|
144 |
mid_mix.ticks_per_beat=mid_sop.ticks_per_beat
|
|
|
167 |
|
168 |
############################################################
|
169 |
|
170 |
+
def prediction_postproc(input_array, argmax_and_threshold=True,
|
171 |
+
gaussian_blur=True,
|
172 |
+
threshold_value=0):
|
173 |
prediction = np.moveaxis(input_array, 0, 1).reshape(360, -1)
|
174 |
+
thres_reference = deepcopy(prediction)
|
175 |
if(argmax_and_threshold):
|
176 |
prediction = np.argmax(prediction, axis=0)
|
177 |
+
prediction = np.array([prediction[i] if thres_reference[prediction[i], i] >= threshold_value else 0 for i in np.arange(prediction.size)])
|
178 |
threshold = np.zeros((360, prediction.shape[0]))
|
179 |
threshold[prediction, np.arange(prediction.size)] = 1
|
180 |
prediction = threshold
|
|
|
278 |
t_pred_result = np.append(t_pred_result, t_pred, axis=0)
|
279 |
b_pred_result = np.append(b_pred_result, b_pred, axis=0)
|
280 |
|
281 |
+
s_pred_result = prediction_postproc(s_pred_result, threshold_value=SATB_THRESHOLDS[0])[:, :f0_matrix.shape[1]]
|
282 |
+
a_pred_result = prediction_postproc(a_pred_result, threshold_value=SATB_THRESHOLDS[1])[:, :f0_matrix.shape[1]]
|
283 |
+
t_pred_result = prediction_postproc(t_pred_result, threshold_value=SATB_THRESHOLDS[2])[:, :f0_matrix.shape[1]]
|
284 |
+
b_pred_result = prediction_postproc(b_pred_result, threshold_value=SATB_THRESHOLDS[3])[:, :f0_matrix.shape[1]]
|
285 |
|
286 |
return s_pred_result, a_pred_result, t_pred_result, b_pred_result
|
287 |
|
pyproject.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
[tool.poetry]
|
2 |
name = "Choral-Quartets-F0-Extractor"
|
3 |
-
version = "0.
|
4 |
description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
|
5 |
authors = ["André Paiva (Xornotor) <[email protected]>"]
|
6 |
license = "cc"
|
|
|
1 |
[tool.poetry]
|
2 |
name = "Choral-Quartets-F0-Extractor"
|
3 |
+
version = "0.2.0-beta"
|
4 |
description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
|
5 |
authors = ["André Paiva (Xornotor) <[email protected]>"]
|
6 |
license = "cc"
|