Update: CSV, HDF5 and MIDI outputs
Browse files- app.py +7 -7
- app_test.ipynb +9 -39
- cq2m_models.py → cqfe_models.py +0 -0
- cq2m_utils.py → cqfe_utils.py +39 -5
- pyproject.toml +6 -4
- requirements.txt +3 -1
app.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
|
4 |
-
inputs=gr.Audio(type='filepath', format='wav'),
|
5 |
-
outputs=gr.File(type='file'),
|
6 |
-
title="Choral Quartets
|
7 |
-
description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform
|
8 |
|
9 |
-
|
|
|
1 |
import gradio as gr
|
2 |
+
from cqfe_utils import cqfe
|
3 |
+
cqfe_interface = gr.Interface(fn=cqfe,
|
4 |
+
inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
|
5 |
+
outputs=gr.File(type='file', label='F0 Output Files'),
|
6 |
+
title="Choral Quartets F0 Extractor",
|
7 |
+
description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
|
8 |
|
9 |
+
cqfe_interface.launch()
|
app_test.ipynb
CHANGED
@@ -7,52 +7,22 @@
|
|
7 |
"outputs": [],
|
8 |
"source": [
|
9 |
"import gradio as gr\n",
|
10 |
-
"import
|
11 |
]
|
12 |
},
|
13 |
{
|
14 |
"cell_type": "code",
|
15 |
-
"execution_count":
|
16 |
"metadata": {},
|
17 |
-
"outputs": [
|
18 |
-
{
|
19 |
-
"name": "stdout",
|
20 |
-
"output_type": "stream",
|
21 |
-
"text": [
|
22 |
-
"Running on local URL: http://127.0.0.1:7860\n",
|
23 |
-
"\n",
|
24 |
-
"To create a public link, set `share=True` in `launch()`.\n"
|
25 |
-
]
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"data": {
|
29 |
-
"text/html": [
|
30 |
-
"<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
31 |
-
],
|
32 |
-
"text/plain": [
|
33 |
-
"<IPython.core.display.HTML object>"
|
34 |
-
]
|
35 |
-
},
|
36 |
-
"metadata": {},
|
37 |
-
"output_type": "display_data"
|
38 |
-
},
|
39 |
-
{
|
40 |
-
"data": {
|
41 |
-
"text/plain": []
|
42 |
-
},
|
43 |
-
"execution_count": 2,
|
44 |
-
"metadata": {},
|
45 |
-
"output_type": "execute_result"
|
46 |
-
}
|
47 |
-
],
|
48 |
"source": [
|
49 |
-
"
|
50 |
-
" inputs=gr.Audio(type='filepath', format='wav'),\n",
|
51 |
-
" outputs=gr.File(type='file'),\n",
|
52 |
-
" title=\"Choral Quartets
|
53 |
-
" description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform
|
54 |
"\n",
|
55 |
-
"
|
56 |
]
|
57 |
}
|
58 |
],
|
|
|
7 |
"outputs": [],
|
8 |
"source": [
|
9 |
"import gradio as gr\n",
|
10 |
+
"from cqfe_utils import cqfe\n"
|
11 |
]
|
12 |
},
|
13 |
{
|
14 |
"cell_type": "code",
|
15 |
+
"execution_count": null,
|
16 |
"metadata": {},
|
17 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
"source": [
|
19 |
+
"cqfe_interface = gr.Interface(fn=cqfe,\n",
|
20 |
+
" inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
|
21 |
+
" outputs=gr.File(type='file', label='F0 Output Files'),\n",
|
22 |
+
" title=\"Choral Quartets F0 Extractor\",\n",
|
23 |
+
" description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
|
24 |
"\n",
|
25 |
+
"cqfe_interface.launch()"
|
26 |
]
|
27 |
}
|
28 |
],
|
cq2m_models.py → cqfe_models.py
RENAMED
File without changes
|
cq2m_utils.py → cqfe_utils.py
RENAMED
@@ -2,9 +2,20 @@ import os
|
|
2 |
import math
|
3 |
import mido
|
4 |
import pumpp
|
|
|
5 |
import numpy as np
|
|
|
6 |
from scipy.ndimage import gaussian_filter1d
|
7 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
############################################################
|
10 |
|
@@ -134,6 +145,28 @@ def song_to_midi(sop, alto, ten, bass):
|
|
134 |
|
135 |
############################################################
|
136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
def prediction_postproc(input_array, argmax_and_threshold=True, gaussian_blur=True):
|
138 |
prediction = np.moveaxis(input_array, 0, 1).reshape(360, -1)
|
139 |
if(argmax_and_threshold):
|
@@ -190,7 +223,7 @@ def get_mpe_prediction(model, audio_file=None):
|
|
190 |
Part of this function is part of deepsalience
|
191 |
"""
|
192 |
|
193 |
-
split_value =
|
194 |
|
195 |
if audio_file is not None:
|
196 |
|
@@ -250,10 +283,11 @@ def get_va_prediction(model, f0_matrix):
|
|
250 |
|
251 |
############################################################
|
252 |
|
253 |
-
def
|
254 |
mpe_pred = get_mpe_prediction(mpe, audiofile)
|
255 |
s_pred, a_pred, t_pred, b_pred = get_va_prediction(va, mpe_pred)
|
256 |
-
|
257 |
-
|
|
|
258 |
|
259 |
############################################################
|
|
|
2 |
import math
|
3 |
import mido
|
4 |
import pumpp
|
5 |
+
import librosa
|
6 |
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
from scipy.ndimage import gaussian_filter1d
|
9 |
+
from cqfe_models import mask_voas_cnn_v2_model, late_deep_cnn_model
|
10 |
+
|
11 |
+
############################################################
|
12 |
+
|
13 |
+
freqscale = librosa.cqt_frequencies(n_bins=360, fmin=32.7, bins_per_octave=60)
|
14 |
+
|
15 |
+
def bin_to_freq(bin):
|
16 |
+
return freqscale[bin]
|
17 |
+
|
18 |
+
vec_bin_to_freq = np.vectorize(bin_to_freq)
|
19 |
|
20 |
############################################################
|
21 |
|
|
|
145 |
|
146 |
############################################################
|
147 |
|
148 |
+
def song_to_tables(sop, alto, ten, bass):
|
149 |
+
|
150 |
+
savepath_csv = './output.csv'
|
151 |
+
savepath_hdf5 = './output.hdf5'
|
152 |
+
timescale = np.arange(0, 0.011609977 * (sop.shape[1]), 0.011609977)[:sop.shape[1]]
|
153 |
+
|
154 |
+
s_argmax = vec_bin_to_freq(np.argmax(sop, axis=0))
|
155 |
+
a_argmax = vec_bin_to_freq(np.argmax(alto, axis=0))
|
156 |
+
t_argmax = vec_bin_to_freq(np.argmax(ten, axis=0))
|
157 |
+
b_argmax = vec_bin_to_freq(np.argmax(bass, axis=0))
|
158 |
+
|
159 |
+
data = np.array([timescale, s_argmax, a_argmax, t_argmax, b_argmax], dtype=np.float32).T
|
160 |
+
columns = ['Timestep', 'Soprano', 'Alto', 'Tenor', 'Bass']
|
161 |
+
|
162 |
+
df = pd.DataFrame(data, columns=columns)
|
163 |
+
df.to_csv(savepath_csv, mode='w', header=True)
|
164 |
+
df.to_hdf(savepath_hdf5, key='F0', mode='w', complevel=9, complib='blosc', append=False, format='table')
|
165 |
+
|
166 |
+
return savepath_csv, savepath_hdf5
|
167 |
+
|
168 |
+
############################################################
|
169 |
+
|
170 |
def prediction_postproc(input_array, argmax_and_threshold=True, gaussian_blur=True):
|
171 |
prediction = np.moveaxis(input_array, 0, 1).reshape(360, -1)
|
172 |
if(argmax_and_threshold):
|
|
|
223 |
Part of this function is part of deepsalience
|
224 |
"""
|
225 |
|
226 |
+
split_value = 4000
|
227 |
|
228 |
if audio_file is not None:
|
229 |
|
|
|
283 |
|
284 |
############################################################
|
285 |
|
286 |
+
def cqfe(audiofile, mpe=late_deep_cnn_model(), va=mask_voas_cnn_v2_model()):
|
287 |
mpe_pred = get_mpe_prediction(mpe, audiofile)
|
288 |
s_pred, a_pred, t_pred, b_pred = get_va_prediction(va, mpe_pred)
|
289 |
+
output_midi = song_to_midi(s_pred, a_pred, t_pred, b_pred)
|
290 |
+
output_csv, output_hdf5 = song_to_tables(s_pred, a_pred, t_pred, b_pred)
|
291 |
+
return [output_midi, output_csv, output_hdf5]
|
292 |
|
293 |
############################################################
|
pyproject.toml
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
[tool.poetry]
|
2 |
-
name = "
|
3 |
-
version = "0.1.
|
4 |
-
description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform
|
5 |
authors = ["André Paiva (Xornotor) <[email protected]>"]
|
6 |
license = "cc"
|
7 |
readme = "README.md"
|
8 |
-
packages = [{include = "
|
9 |
|
10 |
[tool.poetry.dependencies]
|
11 |
python = "^3.11"
|
@@ -16,6 +16,8 @@ mido
|
|
16 |
pumpp
|
17 |
numpy
|
18 |
scipy
|
|
|
|
|
19 |
|
20 |
|
21 |
[build-system]
|
|
|
1 |
[tool.poetry]
|
2 |
+
name = "Choral-Quartets-F0-Extractor"
|
3 |
+
version = "0.1.1"
|
4 |
+
description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
|
5 |
authors = ["André Paiva (Xornotor) <[email protected]>"]
|
6 |
license = "cc"
|
7 |
readme = "README.md"
|
8 |
+
packages = [{include = "Choral_Quartets_F0_Extractor"}]
|
9 |
|
10 |
[tool.poetry.dependencies]
|
11 |
python = "^3.11"
|
|
|
16 |
pumpp
|
17 |
numpy
|
18 |
scipy
|
19 |
+
pandas
|
20 |
+
librosa
|
21 |
|
22 |
|
23 |
[build-system]
|
requirements.txt
CHANGED
@@ -4,4 +4,6 @@ typing-extensions
|
|
4 |
mido
|
5 |
pumpp
|
6 |
numpy
|
7 |
-
scipy
|
|
|
|
|
|
4 |
mido
|
5 |
pumpp
|
6 |
numpy
|
7 |
+
scipy
|
8 |
+
pandas
|
9 |
+
librosa
|