Xornotor commited on
Commit
e857dc9
1 Parent(s): 993f635

v0.2.0-beta

Browse files
Files changed (4) hide show
  1. app.py +2 -2
  2. app_test.ipynb +1 -1
  3. cqfe_utils.py +14 -9
  4. pyproject.toml +1 -1
app.py CHANGED
@@ -4,7 +4,7 @@ cqfe_interface = gr.Interface(fn=cqfe,
4
  inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
5
  outputs=[gr.File(type='file', label='F0 Output Files'),
6
  gr.Plot(label='F0 Estimation Plot')],
7
- title="Choral Quartets F0 Extractor (v0.1.3-alpha)",
8
  description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
9
 
10
- cqfe_interface.launch()
 
4
  inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
5
  outputs=[gr.File(type='file', label='F0 Output Files'),
6
  gr.Plot(label='F0 Estimation Plot')],
7
+ title="Choral Quartets F0 Extractor (v0.2.0-beta)",
8
  description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
9
 
10
+ cqfe_interface.launch()
app_test.ipynb CHANGED
@@ -12,7 +12,7 @@
12
  " inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
13
  " outputs=[gr.File(type='file', label='F0 Output Files'),\n",
14
  " gr.Plot(label='F0 Estimation Plot')],\n",
15
- " title=\"Choral Quartets F0 Extractor (v0.1.3-alpha)\",\n",
16
  " description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
17
  "\n",
18
  "cqfe_interface.launch()"
 
12
  " inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
13
  " outputs=[gr.File(type='file', label='F0 Output Files'),\n",
14
  " gr.Plot(label='F0 Estimation Plot')],\n",
15
+ " title=\"Choral Quartets F0 Extractor (v0.2.0-beta)\",\n",
16
  " description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
17
  "\n",
18
  "cqfe_interface.launch()"
cqfe_utils.py CHANGED
@@ -8,6 +8,8 @@ import pandas as pd
8
  from scipy.ndimage import gaussian_filter1d
9
  from cqfe_models import mask_voas_cnn_v2_model, late_deep_cnn_model
10
 
 
 
11
  ############################################################
12
 
13
  freqscale = librosa.cqt_frequencies(n_bins=360, fmin=32.7, bins_per_octave=60)
@@ -134,9 +136,9 @@ def song_to_midi(sop, alto, ten, bass):
134
  freq_matrix = bin_matrix_to_freq(bin_matrix)
135
 
136
  mid_sop = create_midi(freq_matrix[0], save_to_file=False, program=52, channel=0)
137
- mid_alto = create_midi(freq_matrix[1], save_to_file=False, program=53, channel=1)
138
- mid_ten = create_midi(freq_matrix[2], save_to_file=False, program=49, channel=2)
139
- mid_bass = create_midi(freq_matrix[3], save_to_file=False, program=50, channel=3)
140
 
141
  mid_mix = mido.MidiFile()
142
  mid_mix.ticks_per_beat=mid_sop.ticks_per_beat
@@ -165,11 +167,14 @@ def song_to_dataframe(sop, alto, ten, bass):
165
 
166
  ############################################################
167
 
168
- def prediction_postproc(input_array, argmax_and_threshold=True, gaussian_blur=True):
 
 
169
  prediction = np.moveaxis(input_array, 0, 1).reshape(360, -1)
 
170
  if(argmax_and_threshold):
171
  prediction = np.argmax(prediction, axis=0)
172
- prediction = np.array([i if i <= 357 else 0 for i in prediction])
173
  threshold = np.zeros((360, prediction.shape[0]))
174
  threshold[prediction, np.arange(prediction.size)] = 1
175
  prediction = threshold
@@ -273,10 +278,10 @@ def get_va_prediction(model, f0_matrix):
273
  t_pred_result = np.append(t_pred_result, t_pred, axis=0)
274
  b_pred_result = np.append(b_pred_result, b_pred, axis=0)
275
 
276
- s_pred_result = prediction_postproc(s_pred_result)[:, :f0_matrix.shape[1]]
277
- a_pred_result = prediction_postproc(a_pred_result)[:, :f0_matrix.shape[1]]
278
- t_pred_result = prediction_postproc(t_pred_result)[:, :f0_matrix.shape[1]]
279
- b_pred_result = prediction_postproc(b_pred_result)[:, :f0_matrix.shape[1]]
280
 
281
  return s_pred_result, a_pred_result, t_pred_result, b_pred_result
282
 
 
8
  from scipy.ndimage import gaussian_filter1d
9
  from cqfe_models import mask_voas_cnn_v2_model, late_deep_cnn_model
10
 
11
+ SATB_THRESHOLDS = [0.23, 0.17, 0.15, 0.17]
12
+
13
  ############################################################
14
 
15
  freqscale = librosa.cqt_frequencies(n_bins=360, fmin=32.7, bins_per_octave=60)
 
136
  freq_matrix = bin_matrix_to_freq(bin_matrix)
137
 
138
  mid_sop = create_midi(freq_matrix[0], save_to_file=False, program=52, channel=0)
139
+ mid_alto = create_midi(freq_matrix[1], save_to_file=False, program=52, channel=1)
140
+ mid_ten = create_midi(freq_matrix[2], save_to_file=False, program=42, channel=2)
141
+ mid_bass = create_midi(freq_matrix[3], save_to_file=False, program=52, channel=3)
142
 
143
  mid_mix = mido.MidiFile()
144
  mid_mix.ticks_per_beat=mid_sop.ticks_per_beat
 
167
 
168
  ############################################################
169
 
170
+ def prediction_postproc(input_array, argmax_and_threshold=True,
171
+ gaussian_blur=True,
172
+ threshold_value=0):
173
  prediction = np.moveaxis(input_array, 0, 1).reshape(360, -1)
174
+ thres_reference = deepcopy(prediction)
175
  if(argmax_and_threshold):
176
  prediction = np.argmax(prediction, axis=0)
177
+ prediction = np.array([prediction[i] if thres_reference[prediction[i], i] >= threshold_value else 0 for i in np.arange(prediction.size)])
178
  threshold = np.zeros((360, prediction.shape[0]))
179
  threshold[prediction, np.arange(prediction.size)] = 1
180
  prediction = threshold
 
278
  t_pred_result = np.append(t_pred_result, t_pred, axis=0)
279
  b_pred_result = np.append(b_pred_result, b_pred, axis=0)
280
 
281
+ s_pred_result = prediction_postproc(s_pred_result, threshold_value=SATB_THRESHOLDS[0])[:, :f0_matrix.shape[1]]
282
+ a_pred_result = prediction_postproc(a_pred_result, threshold_value=SATB_THRESHOLDS[1])[:, :f0_matrix.shape[1]]
283
+ t_pred_result = prediction_postproc(t_pred_result, threshold_value=SATB_THRESHOLDS[2])[:, :f0_matrix.shape[1]]
284
+ b_pred_result = prediction_postproc(b_pred_result, threshold_value=SATB_THRESHOLDS[3])[:, :f0_matrix.shape[1]]
285
 
286
  return s_pred_result, a_pred_result, t_pred_result, b_pred_result
287
 
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
  [tool.poetry]
2
  name = "Choral-Quartets-F0-Extractor"
3
- version = "0.1.3-alpha"
4
  description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
5
  authors = ["André Paiva (Xornotor) <[email protected]>"]
6
  license = "cc"
 
1
  [tool.poetry]
2
  name = "Choral-Quartets-F0-Extractor"
3
+ version = "0.2.0-beta"
4
  description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
5
  authors = ["André Paiva (Xornotor) <[email protected]>"]
6
  license = "cc"