v0.1.3-alpha
Browse files- app.py +1 -1
- app_test.ipynb +3 -62
- cqfe_utils.py +39 -36
- pyproject.toml +1 -1
app.py
CHANGED
@@ -4,7 +4,7 @@ cqfe_interface = gr.Interface(fn=cqfe,
|
|
4 |
inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
|
5 |
outputs=[gr.File(type='file', label='F0 Output Files'),
|
6 |
gr.Plot(label='F0 Estimation Plot')],
|
7 |
-
title="Choral Quartets F0 Extractor (v0.1.
|
8 |
description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
|
9 |
|
10 |
cqfe_interface.launch()
|
|
|
4 |
inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
|
5 |
outputs=[gr.File(type='file', label='F0 Output Files'),
|
6 |
gr.Plot(label='F0 Estimation Plot')],
|
7 |
+
title="Choral Quartets F0 Extractor (v0.1.3-alpha)",
|
8 |
description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
|
9 |
|
10 |
cqfe_interface.launch()
|
app_test.ipynb
CHANGED
@@ -2,68 +2,9 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
-
"outputs": [
|
8 |
-
{
|
9 |
-
"name": "stdout",
|
10 |
-
"output_type": "stream",
|
11 |
-
"text": [
|
12 |
-
"Running on local URL: http://127.0.0.1:7860\n",
|
13 |
-
"\n",
|
14 |
-
"To create a public link, set `share=True` in `launch()`.\n"
|
15 |
-
]
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"data": {
|
19 |
-
"text/html": [
|
20 |
-
"<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
21 |
-
],
|
22 |
-
"text/plain": [
|
23 |
-
"<IPython.core.display.HTML object>"
|
24 |
-
]
|
25 |
-
},
|
26 |
-
"metadata": {},
|
27 |
-
"output_type": "display_data"
|
28 |
-
},
|
29 |
-
{
|
30 |
-
"data": {
|
31 |
-
"text/plain": []
|
32 |
-
},
|
33 |
-
"execution_count": 2,
|
34 |
-
"metadata": {},
|
35 |
-
"output_type": "execute_result"
|
36 |
-
},
|
37 |
-
{
|
38 |
-
"name": "stderr",
|
39 |
-
"output_type": "stream",
|
40 |
-
"text": [
|
41 |
-
"2023-07-21 21:02:27.573436: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600\n",
|
42 |
-
"2023-07-21 21:02:28.575934: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n"
|
43 |
-
]
|
44 |
-
},
|
45 |
-
{
|
46 |
-
"name": "stdout",
|
47 |
-
"output_type": "stream",
|
48 |
-
"text": [
|
49 |
-
"1/1 [==============================] - 10s 10s/step\n"
|
50 |
-
]
|
51 |
-
},
|
52 |
-
{
|
53 |
-
"name": "stderr",
|
54 |
-
"output_type": "stream",
|
55 |
-
"text": [
|
56 |
-
"2023-07-21 21:02:35.800368: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n"
|
57 |
-
]
|
58 |
-
},
|
59 |
-
{
|
60 |
-
"name": "stdout",
|
61 |
-
"output_type": "stream",
|
62 |
-
"text": [
|
63 |
-
"1/1 [==============================] - 0s 369ms/step\n"
|
64 |
-
]
|
65 |
-
}
|
66 |
-
],
|
67 |
"source": [
|
68 |
"import gradio as gr\n",
|
69 |
"from cqfe_utils import cqfe\n",
|
@@ -71,7 +12,7 @@
|
|
71 |
" inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
|
72 |
" outputs=[gr.File(type='file', label='F0 Output Files'),\n",
|
73 |
" gr.Plot(label='F0 Estimation Plot')],\n",
|
74 |
-
" title=\"Choral Quartets F0 Extractor (v0.1.
|
75 |
" description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
|
76 |
"\n",
|
77 |
"cqfe_interface.launch()"
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
"metadata": {},
|
7 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"source": [
|
9 |
"import gradio as gr\n",
|
10 |
"from cqfe_utils import cqfe\n",
|
|
|
12 |
" inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
|
13 |
" outputs=[gr.File(type='file', label='F0 Output Files'),\n",
|
14 |
" gr.Plot(label='F0 Estimation Plot')],\n",
|
15 |
+
" title=\"Choral Quartets F0 Extractor (v0.1.3-alpha)\",\n",
|
16 |
" description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
|
17 |
"\n",
|
18 |
"cqfe_interface.launch()"
|
cqfe_utils.py
CHANGED
@@ -43,37 +43,41 @@ def downsample_bins(voice):
|
|
43 |
|
44 |
############################################################
|
45 |
|
46 |
-
def
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
|
|
|
|
|
|
|
50 |
# List event = (pitch, velocity, time)
|
51 |
-
T
|
|
|
|
|
52 |
t_last = 0
|
53 |
-
|
54 |
list_event = []
|
55 |
for t in range(T):
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
velocity = 127
|
69 |
-
else:
|
70 |
-
velocity = 0
|
71 |
-
# Time is incremented since last event
|
72 |
-
t_event = t - t_last
|
73 |
-
t_last = t
|
74 |
-
list_event.append((pitch, velocity, t_event))
|
75 |
-
pr_tm1 = pr_t
|
76 |
-
list_event.append((0, 0, T - t_last))
|
77 |
return list_event
|
78 |
# Tempo
|
79 |
microseconds_per_beat = mido.bpm2tempo(tempo)
|
@@ -85,7 +89,7 @@ def create_midi(pr, write_path='./midi_track.mid', ticks_per_beat=58,
|
|
85 |
# Add a new track with the instrument name to the midi file
|
86 |
track = mid.add_track("Voice Aah")
|
87 |
# transform the matrix in a list of (pitch, velocity, time)
|
88 |
-
events =
|
89 |
#print(events)
|
90 |
# Tempo
|
91 |
track.append(mido.MetaMessage('set_tempo', tempo=microseconds_per_beat))
|
@@ -126,18 +130,16 @@ def song_to_midi(sop, alto, ten, bass):
|
|
126 |
|
127 |
savepath = './output.mid'
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
down_ten = downsample_bins(ten.T)
|
132 |
-
down_bass = downsample_bins(bass.T)
|
133 |
|
134 |
-
mid_sop = create_midi(
|
135 |
-
mid_alto = create_midi(
|
136 |
-
mid_ten = create_midi(
|
137 |
-
mid_bass = create_midi(
|
138 |
|
139 |
mid_mix = mido.MidiFile()
|
140 |
-
mid_mix.ticks_per_beat
|
141 |
mid_mix.tracks = mid_sop.tracks + mid_alto.tracks + mid_ten.tracks + mid_bass.tracks
|
142 |
mid_mix.save(savepath)
|
143 |
|
@@ -246,6 +248,7 @@ def get_mpe_prediction(model, audio_file=None):
|
|
246 |
output_list.append(p)
|
247 |
|
248 |
predicted_output = np.hstack(output_list).astype(np.float32)
|
|
|
249 |
return predicted_output
|
250 |
|
251 |
############################################################
|
|
|
43 |
|
44 |
############################################################
|
45 |
|
46 |
+
def bin_matrix_to_freq(matrix):
|
47 |
+
s_freqs = vec_bin_to_freq(np.argmax(matrix[0], axis=0)).reshape(-1, 1)
|
48 |
+
a_freqs = vec_bin_to_freq(np.argmax(matrix[1], axis=0)).reshape(-1, 1)
|
49 |
+
t_freqs = vec_bin_to_freq(np.argmax(matrix[2], axis=0)).reshape(-1, 1)
|
50 |
+
b_freqs = vec_bin_to_freq(np.argmax(matrix[3], axis=0)).reshape(-1, 1)
|
51 |
+
|
52 |
+
freqs = np.concatenate((s_freqs, a_freqs, t_freqs, b_freqs), axis=1).T
|
53 |
+
return freqs
|
54 |
+
|
55 |
+
############################################################
|
56 |
|
57 |
+
def create_midi(freq, write_path='./midi_track.mid', ticks_per_beat=58,
|
58 |
+
tempo=90, save_to_file=True, program=53, channel=0):
|
59 |
+
|
60 |
+
def freq_to_list(freq):
|
61 |
# List event = (pitch, velocity, time)
|
62 |
+
T = freq.shape[0]
|
63 |
+
#midi_freqs = np.squeeze(midi_freqs)
|
64 |
+
midi_freqs = np.round(69 + 12*np.log2(freq/440)).squeeze().astype('int')
|
65 |
t_last = 0
|
66 |
+
pitch_tm1 = 20
|
67 |
list_event = []
|
68 |
for t in range(T):
|
69 |
+
pitch_t = midi_freqs[t]
|
70 |
+
if (pitch_t != pitch_tm1):
|
71 |
+
velocity = 127
|
72 |
+
if(pitch_t == 24):
|
73 |
+
pitch_t = 0
|
74 |
+
velocity = 0
|
75 |
+
t_event = t - t_last
|
76 |
+
t_last = t
|
77 |
+
list_event.append((pitch_tm1, 0, t_event))
|
78 |
+
list_event.append((pitch_t, velocity, 0))
|
79 |
+
pitch_tm1 = pitch_t
|
80 |
+
list_event.append((pitch_tm1, 0, T - t_last))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
return list_event
|
82 |
# Tempo
|
83 |
microseconds_per_beat = mido.bpm2tempo(tempo)
|
|
|
89 |
# Add a new track with the instrument name to the midi file
|
90 |
track = mid.add_track("Voice Aah")
|
91 |
# transform the matrix in a list of (pitch, velocity, time)
|
92 |
+
events = freq_to_list(freq)
|
93 |
#print(events)
|
94 |
# Tempo
|
95 |
track.append(mido.MetaMessage('set_tempo', tempo=microseconds_per_beat))
|
|
|
130 |
|
131 |
savepath = './output.mid'
|
132 |
|
133 |
+
bin_matrix = np.array([sop, alto, ten, bass])
|
134 |
+
freq_matrix = bin_matrix_to_freq(bin_matrix)
|
|
|
|
|
135 |
|
136 |
+
mid_sop = create_midi(freq_matrix[0], save_to_file=False, program=52, channel=0)
|
137 |
+
mid_alto = create_midi(freq_matrix[1], save_to_file=False, program=53, channel=1)
|
138 |
+
mid_ten = create_midi(freq_matrix[2], save_to_file=False, program=49, channel=2)
|
139 |
+
mid_bass = create_midi(freq_matrix[3], save_to_file=False, program=50, channel=3)
|
140 |
|
141 |
mid_mix = mido.MidiFile()
|
142 |
+
mid_mix.ticks_per_beat=mid_sop.ticks_per_beat
|
143 |
mid_mix.tracks = mid_sop.tracks + mid_alto.tracks + mid_ten.tracks + mid_bass.tracks
|
144 |
mid_mix.save(savepath)
|
145 |
|
|
|
248 |
output_list.append(p)
|
249 |
|
250 |
predicted_output = np.hstack(output_list).astype(np.float32)
|
251 |
+
|
252 |
return predicted_output
|
253 |
|
254 |
############################################################
|
pyproject.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
[tool.poetry]
|
2 |
name = "Choral-Quartets-F0-Extractor"
|
3 |
-
version = "0.1.
|
4 |
description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
|
5 |
authors = ["André Paiva (Xornotor) <[email protected]>"]
|
6 |
license = "cc"
|
|
|
1 |
[tool.poetry]
|
2 |
name = "Choral-Quartets-F0-Extractor"
|
3 |
+
version = "0.1.3-alpha"
|
4 |
description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
|
5 |
authors = ["André Paiva (Xornotor) <[email protected]>"]
|
6 |
license = "cc"
|