Spaces:

Xornotor
/

Choral-Quartets-F0-Extractor

Sleeping

App Files Files Community

Xornotor commited on Nov 5, 2023

Commit

e857dc9

•

1 Parent(s): 993f635

v0.2.0-beta

Browse files

Files changed (4) hide show

app.py +2 -2
app_test.ipynb +1 -1
cqfe_utils.py +14 -9
pyproject.toml +1 -1

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ cqfe_interface = gr.Interface(fn=cqfe,
                               inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
                               outputs=[gr.File(type='file', label='F0 Output Files'),
                                        gr.Plot(label='F0 Estimation Plot')],
-                              title="Choral Quartets F0 Extractor (v0.1.3-alpha)",
                               description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
-cqfe_interface.launch()

                               inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
                               outputs=[gr.File(type='file', label='F0 Output Files'),
                                        gr.Plot(label='F0 Estimation Plot')],
+                              title="Choral Quartets F0 Extractor (v0.2.0-beta)",
                               description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
+cqfe_interface.launch()

app_test.ipynb CHANGED Viewed

@@ -12,7 +12,7 @@
     "                              inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
     "                              outputs=[gr.File(type='file', label='F0 Output Files'),\n",
     "                                       gr.Plot(label='F0 Estimation Plot')],\n",
-    "                              title=\"Choral Quartets F0 Extractor (v0.1.3-alpha)\",\n",
     "                              description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
     "\n",
     "cqfe_interface.launch()"

     "                              inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
     "                              outputs=[gr.File(type='file', label='F0 Output Files'),\n",
     "                                       gr.Plot(label='F0 Estimation Plot')],\n",
+    "                              title=\"Choral Quartets F0 Extractor (v0.2.0-beta)\",\n",
     "                              description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
     "\n",
     "cqfe_interface.launch()"

cqfe_utils.py CHANGED Viewed

@@ -8,6 +8,8 @@ import pandas as pd
 from scipy.ndimage import gaussian_filter1d
 from cqfe_models import mask_voas_cnn_v2_model, late_deep_cnn_model
 ############################################################
 freqscale = librosa.cqt_frequencies(n_bins=360, fmin=32.7, bins_per_octave=60)
@@ -134,9 +136,9 @@ def song_to_midi(sop, alto, ten, bass):
     freq_matrix = bin_matrix_to_freq(bin_matrix)
     mid_sop = create_midi(freq_matrix[0], save_to_file=False, program=52, channel=0)
-    mid_alto = create_midi(freq_matrix[1], save_to_file=False, program=53, channel=1)
-    mid_ten = create_midi(freq_matrix[2], save_to_file=False, program=49, channel=2)
-    mid_bass = create_midi(freq_matrix[3], save_to_file=False, program=50, channel=3)
     mid_mix = mido.MidiFile()
     mid_mix.ticks_per_beat=mid_sop.ticks_per_beat
@@ -165,11 +167,14 @@ def song_to_dataframe(sop, alto, ten, bass):
 ############################################################
-def prediction_postproc(input_array, argmax_and_threshold=True, gaussian_blur=True):
     prediction = np.moveaxis(input_array, 0, 1).reshape(360, -1)
     if(argmax_and_threshold):
         prediction = np.argmax(prediction, axis=0)
-        prediction = np.array([i if i <= 357 else 0 for i in prediction])
         threshold = np.zeros((360, prediction.shape[0]))
         threshold[prediction, np.arange(prediction.size)] = 1
         prediction = threshold
@@ -273,10 +278,10 @@ def get_va_prediction(model, f0_matrix):
         t_pred_result = np.append(t_pred_result, t_pred, axis=0)
         b_pred_result = np.append(b_pred_result, b_pred, axis=0)
-    s_pred_result = prediction_postproc(s_pred_result)[:, :f0_matrix.shape[1]]
-    a_pred_result = prediction_postproc(a_pred_result)[:, :f0_matrix.shape[1]]
-    t_pred_result = prediction_postproc(t_pred_result)[:, :f0_matrix.shape[1]]
-    b_pred_result = prediction_postproc(b_pred_result)[:, :f0_matrix.shape[1]]
     return s_pred_result, a_pred_result, t_pred_result, b_pred_result

 from scipy.ndimage import gaussian_filter1d
 from cqfe_models import mask_voas_cnn_v2_model, late_deep_cnn_model
+SATB_THRESHOLDS = [0.23, 0.17, 0.15, 0.17]
 ############################################################
 freqscale = librosa.cqt_frequencies(n_bins=360, fmin=32.7, bins_per_octave=60)
     freq_matrix = bin_matrix_to_freq(bin_matrix)
     mid_sop = create_midi(freq_matrix[0], save_to_file=False, program=52, channel=0)
+    mid_alto = create_midi(freq_matrix[1], save_to_file=False, program=52, channel=1)
+    mid_ten = create_midi(freq_matrix[2], save_to_file=False, program=42, channel=2)
+    mid_bass = create_midi(freq_matrix[3], save_to_file=False, program=52, channel=3)
     mid_mix = mido.MidiFile()
     mid_mix.ticks_per_beat=mid_sop.ticks_per_beat
 ############################################################
+def prediction_postproc(input_array, argmax_and_threshold=True,
+                                     gaussian_blur=True,
+                                     threshold_value=0):
     prediction = np.moveaxis(input_array, 0, 1).reshape(360, -1)
+    thres_reference = deepcopy(prediction)
     if(argmax_and_threshold):
         prediction = np.argmax(prediction, axis=0)
+        prediction = np.array([prediction[i] if thres_reference[prediction[i], i] >= threshold_value else 0 for i in np.arange(prediction.size)])
         threshold = np.zeros((360, prediction.shape[0]))
         threshold[prediction, np.arange(prediction.size)] = 1
         prediction = threshold
         t_pred_result = np.append(t_pred_result, t_pred, axis=0)
         b_pred_result = np.append(b_pred_result, b_pred, axis=0)
+    s_pred_result = prediction_postproc(s_pred_result, threshold_value=SATB_THRESHOLDS[0])[:, :f0_matrix.shape[1]]
+    a_pred_result = prediction_postproc(a_pred_result, threshold_value=SATB_THRESHOLDS[1])[:, :f0_matrix.shape[1]]
+    t_pred_result = prediction_postproc(t_pred_result, threshold_value=SATB_THRESHOLDS[2])[:, :f0_matrix.shape[1]]
+    b_pred_result = prediction_postproc(b_pred_result, threshold_value=SATB_THRESHOLDS[3])[:, :f0_matrix.shape[1]]
     return s_pred_result, a_pred_result, t_pred_result, b_pred_result

pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "Choral-Quartets-F0-Extractor"
-version = "0.1.3-alpha"
 description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
 authors = ["André Paiva (Xornotor) <[email protected]>"]
 license = "cc"

 [tool.poetry]
 name = "Choral-Quartets-F0-Extractor"
+version = "0.2.0-beta"
 description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
 authors = ["André Paiva (Xornotor) <[email protected]>"]
 license = "cc"