Plot added
Browse files- .gitignore +4 -1
- README.md +2 -2
- app.py +3 -2
- app_test.ipynb +65 -13
- cqfe_utils.py +21 -8
- pyproject.toml +2 -1
- requirements.txt +2 -1
.gitignore
CHANGED
@@ -1 +1,4 @@
|
|
1 |
-
__pycache__/*
|
|
|
|
|
|
|
|
1 |
+
__pycache__/*
|
2 |
+
flagged/*
|
3 |
+
input.*
|
4 |
+
output.*
|
README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
---
|
2 |
-
title: Choral Quartets
|
3 |
emoji: 🐠
|
4 |
colorFrom: yellow
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.38.0
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
license: cc
|
11 |
---
|
12 |
|
|
|
1 |
---
|
2 |
+
title: Choral Quartets F0 Extractor
|
3 |
emoji: 🐠
|
4 |
colorFrom: yellow
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.38.0
|
8 |
app_file: app.py
|
9 |
+
pinned: true
|
10 |
license: cc
|
11 |
---
|
12 |
|
app.py
CHANGED
@@ -2,8 +2,9 @@ import gradio as gr
|
|
2 |
from cqfe_utils import cqfe
|
3 |
cqfe_interface = gr.Interface(fn=cqfe,
|
4 |
inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
|
5 |
-
outputs=gr.
|
6 |
-
|
|
|
7 |
description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
|
8 |
|
9 |
cqfe_interface.launch()
|
|
|
2 |
from cqfe_utils import cqfe
|
3 |
cqfe_interface = gr.Interface(fn=cqfe,
|
4 |
inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
|
5 |
+
outputs=[gr.Plot(label='F0 Estimation Plot'),
|
6 |
+
gr.File(type='file', label='F0 Output Files')],
|
7 |
+
title="Choral Quartets F0 Extractor (v0.1.2)",
|
8 |
description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
|
9 |
|
10 |
cqfe_interface.launch()
|
app_test.ipynb
CHANGED
@@ -2,24 +2,76 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"source": [
|
9 |
"import gradio as gr\n",
|
10 |
-
"from cqfe_utils import cqfe\n"
|
11 |
-
]
|
12 |
-
},
|
13 |
-
{
|
14 |
-
"cell_type": "code",
|
15 |
-
"execution_count": null,
|
16 |
-
"metadata": {},
|
17 |
-
"outputs": [],
|
18 |
-
"source": [
|
19 |
"cqfe_interface = gr.Interface(fn=cqfe,\n",
|
20 |
" inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
|
21 |
-
" outputs=gr.
|
22 |
-
"
|
|
|
23 |
" description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
|
24 |
"\n",
|
25 |
"cqfe_interface.launch()"
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"Running on local URL: http://127.0.0.1:7860\n",
|
13 |
+
"\n",
|
14 |
+
"To create a public link, set `share=True` in `launch()`.\n"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"data": {
|
19 |
+
"text/html": [
|
20 |
+
"<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
21 |
+
],
|
22 |
+
"text/plain": [
|
23 |
+
"<IPython.core.display.HTML object>"
|
24 |
+
]
|
25 |
+
},
|
26 |
+
"metadata": {},
|
27 |
+
"output_type": "display_data"
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"data": {
|
31 |
+
"text/plain": []
|
32 |
+
},
|
33 |
+
"execution_count": 2,
|
34 |
+
"metadata": {},
|
35 |
+
"output_type": "execute_result"
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"name": "stderr",
|
39 |
+
"output_type": "stream",
|
40 |
+
"text": [
|
41 |
+
"2023-07-21 21:02:27.573436: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600\n",
|
42 |
+
"2023-07-21 21:02:28.575934: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n"
|
43 |
+
]
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"name": "stdout",
|
47 |
+
"output_type": "stream",
|
48 |
+
"text": [
|
49 |
+
"1/1 [==============================] - 10s 10s/step\n"
|
50 |
+
]
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"name": "stderr",
|
54 |
+
"output_type": "stream",
|
55 |
+
"text": [
|
56 |
+
"2023-07-21 21:02:35.800368: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n"
|
57 |
+
]
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"name": "stdout",
|
61 |
+
"output_type": "stream",
|
62 |
+
"text": [
|
63 |
+
"1/1 [==============================] - 0s 369ms/step\n"
|
64 |
+
]
|
65 |
+
}
|
66 |
+
],
|
67 |
"source": [
|
68 |
"import gradio as gr\n",
|
69 |
+
"from cqfe_utils import cqfe\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
"cqfe_interface = gr.Interface(fn=cqfe,\n",
|
71 |
" inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
|
72 |
+
" outputs=[gr.Plot(label='F0 Estimation Plot'),\n",
|
73 |
+
" gr.File(type='file', label='F0 Output Files')],\n",
|
74 |
+
" title=\"Choral Quartets F0 Extractor (v0.1.2)\",\n",
|
75 |
" description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
|
76 |
"\n",
|
77 |
"cqfe_interface.launch()"
|
cqfe_utils.py
CHANGED
@@ -145,10 +145,8 @@ def song_to_midi(sop, alto, ten, bass):
|
|
145 |
|
146 |
############################################################
|
147 |
|
148 |
-
def
|
149 |
|
150 |
-
savepath_csv = './output.csv'
|
151 |
-
savepath_hdf5 = './output.hdf5'
|
152 |
timescale = np.arange(0, 0.011609977 * (sop.shape[1]), 0.011609977)[:sop.shape[1]]
|
153 |
|
154 |
s_argmax = vec_bin_to_freq(np.argmax(sop, axis=0))
|
@@ -160,10 +158,8 @@ def song_to_tables(sop, alto, ten, bass):
|
|
160 |
columns = ['Timestep', 'Soprano', 'Alto', 'Tenor', 'Bass']
|
161 |
|
162 |
df = pd.DataFrame(data, columns=columns)
|
163 |
-
df.to_csv(savepath_csv, mode='w', header=True)
|
164 |
-
df.to_hdf(savepath_hdf5, key='F0', mode='w', complevel=9, complib='blosc', append=False, format='table')
|
165 |
|
166 |
-
return
|
167 |
|
168 |
############################################################
|
169 |
|
@@ -284,10 +280,27 @@ def get_va_prediction(model, f0_matrix):
|
|
284 |
############################################################
|
285 |
|
286 |
def cqfe(audiofile, mpe=late_deep_cnn_model(), va=mask_voas_cnn_v2_model()):
|
|
|
|
|
|
|
|
|
287 |
mpe_pred = get_mpe_prediction(mpe, audiofile)
|
288 |
s_pred, a_pred, t_pred, b_pred = get_va_prediction(va, mpe_pred)
|
|
|
289 |
output_midi = song_to_midi(s_pred, a_pred, t_pred, b_pred)
|
290 |
-
|
291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
|
293 |
############################################################
|
|
|
145 |
|
146 |
############################################################
|
147 |
|
148 |
+
def song_to_dataframe(sop, alto, ten, bass):
|
149 |
|
|
|
|
|
150 |
timescale = np.arange(0, 0.011609977 * (sop.shape[1]), 0.011609977)[:sop.shape[1]]
|
151 |
|
152 |
s_argmax = vec_bin_to_freq(np.argmax(sop, axis=0))
|
|
|
158 |
columns = ['Timestep', 'Soprano', 'Alto', 'Tenor', 'Bass']
|
159 |
|
160 |
df = pd.DataFrame(data, columns=columns)
|
|
|
|
|
161 |
|
162 |
+
return df
|
163 |
|
164 |
############################################################
|
165 |
|
|
|
280 |
############################################################
|
281 |
|
282 |
def cqfe(audiofile, mpe=late_deep_cnn_model(), va=mask_voas_cnn_v2_model()):
|
283 |
+
|
284 |
+
savepath_csv = './output.csv'
|
285 |
+
savepath_hdf5 = './output.hdf5'
|
286 |
+
|
287 |
mpe_pred = get_mpe_prediction(mpe, audiofile)
|
288 |
s_pred, a_pred, t_pred, b_pred = get_va_prediction(va, mpe_pred)
|
289 |
+
|
290 |
output_midi = song_to_midi(s_pred, a_pred, t_pred, b_pred)
|
291 |
+
|
292 |
+
output_df = song_to_dataframe(s_pred, a_pred, t_pred, b_pred)
|
293 |
+
output_df.to_csv(savepath_csv, mode='w', header=True)
|
294 |
+
output_df.to_hdf(savepath_hdf5, key='F0', mode='w', complevel=9, complib='blosc', append=False, format='table')
|
295 |
+
ax1 = output_df.plot.scatter(x='Timestep', y='Bass', s=1, color='#2f29e3', label='Bass')
|
296 |
+
ax2 = output_df.plot.scatter(x='Timestep', y='Tenor', s=1, color='#e36129', label='Tenor', ax=ax1)
|
297 |
+
ax3 = output_df.plot.scatter(x='Timestep', y='Alto', s=1, color='#29e35a', label='Alto', ax=ax1)
|
298 |
+
ax4 = output_df.plot.scatter(x='Timestep', y='Soprano', s=1, color='#d3d921', label='Soprano', ax=ax1)
|
299 |
+
ax1.set_xlabel('Time (s)')
|
300 |
+
ax1.set_ylabel('Freq (Hz)')
|
301 |
+
fig = ax1.get_figure()
|
302 |
+
fig.set_dpi(200)
|
303 |
+
|
304 |
+
return fig, [output_midi, savepath_csv, savepath_hdf5]
|
305 |
|
306 |
############################################################
|
pyproject.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
[tool.poetry]
|
2 |
name = "Choral-Quartets-F0-Extractor"
|
3 |
-
version = "0.1.
|
4 |
description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
|
5 |
authors = ["André Paiva (Xornotor) <[email protected]>"]
|
6 |
license = "cc"
|
@@ -21,6 +21,7 @@ hdf5plugin
|
|
21 |
h5py
|
22 |
tables
|
23 |
librosa
|
|
|
24 |
|
25 |
|
26 |
[build-system]
|
|
|
1 |
[tool.poetry]
|
2 |
name = "Choral-Quartets-F0-Extractor"
|
3 |
+
version = "0.1.2"
|
4 |
description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
|
5 |
authors = ["André Paiva (Xornotor) <[email protected]>"]
|
6 |
license = "cc"
|
|
|
21 |
h5py
|
22 |
tables
|
23 |
librosa
|
24 |
+
matplotlib
|
25 |
|
26 |
|
27 |
[build-system]
|
requirements.txt
CHANGED
@@ -9,4 +9,5 @@ pandas
|
|
9 |
hdf5plugin
|
10 |
h5py
|
11 |
tables
|
12 |
-
librosa
|
|
|
|
9 |
hdf5plugin
|
10 |
h5py
|
11 |
tables
|
12 |
+
librosa
|
13 |
+
matplotlib
|