Xornotor commited on
Commit
993f635
1 Parent(s): 3973eb2

v0.1.3-alpha

Browse files
Files changed (4) hide show
  1. app.py +1 -1
  2. app_test.ipynb +3 -62
  3. cqfe_utils.py +39 -36
  4. pyproject.toml +1 -1
app.py CHANGED
@@ -4,7 +4,7 @@ cqfe_interface = gr.Interface(fn=cqfe,
4
  inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
5
  outputs=[gr.File(type='file', label='F0 Output Files'),
6
  gr.Plot(label='F0 Estimation Plot')],
7
- title="Choral Quartets F0 Extractor (v0.1.2-alpha)",
8
  description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
9
 
10
  cqfe_interface.launch()
 
4
  inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
5
  outputs=[gr.File(type='file', label='F0 Output Files'),
6
  gr.Plot(label='F0 Estimation Plot')],
7
+ title="Choral Quartets F0 Extractor (v0.1.3-alpha)",
8
  description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
9
 
10
  cqfe_interface.launch()
app_test.ipynb CHANGED
@@ -2,68 +2,9 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 2,
6
  "metadata": {},
7
- "outputs": [
8
- {
9
- "name": "stdout",
10
- "output_type": "stream",
11
- "text": [
12
- "Running on local URL: http://127.0.0.1:7860\n",
13
- "\n",
14
- "To create a public link, set `share=True` in `launch()`.\n"
15
- ]
16
- },
17
- {
18
- "data": {
19
- "text/html": [
20
- "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
21
- ],
22
- "text/plain": [
23
- "<IPython.core.display.HTML object>"
24
- ]
25
- },
26
- "metadata": {},
27
- "output_type": "display_data"
28
- },
29
- {
30
- "data": {
31
- "text/plain": []
32
- },
33
- "execution_count": 2,
34
- "metadata": {},
35
- "output_type": "execute_result"
36
- },
37
- {
38
- "name": "stderr",
39
- "output_type": "stream",
40
- "text": [
41
- "2023-07-21 21:02:27.573436: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600\n",
42
- "2023-07-21 21:02:28.575934: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n"
43
- ]
44
- },
45
- {
46
- "name": "stdout",
47
- "output_type": "stream",
48
- "text": [
49
- "1/1 [==============================] - 10s 10s/step\n"
50
- ]
51
- },
52
- {
53
- "name": "stderr",
54
- "output_type": "stream",
55
- "text": [
56
- "2023-07-21 21:02:35.800368: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n"
57
- ]
58
- },
59
- {
60
- "name": "stdout",
61
- "output_type": "stream",
62
- "text": [
63
- "1/1 [==============================] - 0s 369ms/step\n"
64
- ]
65
- }
66
- ],
67
  "source": [
68
  "import gradio as gr\n",
69
  "from cqfe_utils import cqfe\n",
@@ -71,7 +12,7 @@
71
  " inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
72
  " outputs=[gr.File(type='file', label='F0 Output Files'),\n",
73
  " gr.Plot(label='F0 Estimation Plot')],\n",
74
- " title=\"Choral Quartets F0 Extractor (v0.1.2-alpha)\",\n",
75
  " description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
76
  "\n",
77
  "cqfe_interface.launch()"
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": null,
6
  "metadata": {},
7
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import gradio as gr\n",
10
  "from cqfe_utils import cqfe\n",
 
12
  " inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
13
  " outputs=[gr.File(type='file', label='F0 Output Files'),\n",
14
  " gr.Plot(label='F0 Estimation Plot')],\n",
15
+ " title=\"Choral Quartets F0 Extractor (v0.1.3-alpha)\",\n",
16
  " description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
17
  "\n",
18
  "cqfe_interface.launch()"
cqfe_utils.py CHANGED
@@ -43,37 +43,41 @@ def downsample_bins(voice):
43
 
44
  ############################################################
45
 
46
- def create_midi(pr, write_path='./midi_track.mid', ticks_per_beat=58,
47
- tempo=90, save_to_file=True, program=53, channel=0):
 
 
 
 
 
 
 
 
48
 
49
- def pr_to_list(pr):
 
 
 
50
  # List event = (pitch, velocity, time)
51
- T, N = pr.shape
 
 
52
  t_last = 0
53
- pr_tm1 = np.zeros(N)
54
  list_event = []
55
  for t in range(T):
56
- pr_t = pr[t]
57
- mask = (pr_t != pr_tm1)
58
- if(N == 360): range_step = 5
59
- else: range_step = 1
60
- if (mask).any():
61
- for n in range(0, N):
62
- if mask[n]:
63
- if(N <= 72):
64
- pitch = 25 + n
65
- else:
66
- pitch = 24 + round(n/5)
67
- if int(pr_t[n] * 127) >= 50:
68
- velocity = 127
69
- else:
70
- velocity = 0
71
- # Time is incremented since last event
72
- t_event = t - t_last
73
- t_last = t
74
- list_event.append((pitch, velocity, t_event))
75
- pr_tm1 = pr_t
76
- list_event.append((0, 0, T - t_last))
77
  return list_event
78
  # Tempo
79
  microseconds_per_beat = mido.bpm2tempo(tempo)
@@ -85,7 +89,7 @@ def create_midi(pr, write_path='./midi_track.mid', ticks_per_beat=58,
85
  # Add a new track with the instrument name to the midi file
86
  track = mid.add_track("Voice Aah")
87
  # transform the matrix in a list of (pitch, velocity, time)
88
- events = pr_to_list(pr)
89
  #print(events)
90
  # Tempo
91
  track.append(mido.MetaMessage('set_tempo', tempo=microseconds_per_beat))
@@ -126,18 +130,16 @@ def song_to_midi(sop, alto, ten, bass):
126
 
127
  savepath = './output.mid'
128
 
129
- down_sop = downsample_bins(sop.T)
130
- down_alto = downsample_bins(alto.T)
131
- down_ten = downsample_bins(ten.T)
132
- down_bass = downsample_bins(bass.T)
133
 
134
- mid_sop = create_midi(down_sop, save_to_file=False, program=52, channel=0)
135
- mid_alto = create_midi(down_alto, save_to_file=False, program=53, channel=1)
136
- mid_ten = create_midi(down_ten, save_to_file=False, program=49, channel=2)
137
- mid_bass = create_midi(down_bass, save_to_file=False, program=50, channel=3)
138
 
139
  mid_mix = mido.MidiFile()
140
- mid_mix.ticks_per_beat = mid_sop.ticks_per_beat
141
  mid_mix.tracks = mid_sop.tracks + mid_alto.tracks + mid_ten.tracks + mid_bass.tracks
142
  mid_mix.save(savepath)
143
 
@@ -246,6 +248,7 @@ def get_mpe_prediction(model, audio_file=None):
246
  output_list.append(p)
247
 
248
  predicted_output = np.hstack(output_list).astype(np.float32)
 
249
  return predicted_output
250
 
251
  ############################################################
 
43
 
44
  ############################################################
45
 
46
+ def bin_matrix_to_freq(matrix):
47
+ s_freqs = vec_bin_to_freq(np.argmax(matrix[0], axis=0)).reshape(-1, 1)
48
+ a_freqs = vec_bin_to_freq(np.argmax(matrix[1], axis=0)).reshape(-1, 1)
49
+ t_freqs = vec_bin_to_freq(np.argmax(matrix[2], axis=0)).reshape(-1, 1)
50
+ b_freqs = vec_bin_to_freq(np.argmax(matrix[3], axis=0)).reshape(-1, 1)
51
+
52
+ freqs = np.concatenate((s_freqs, a_freqs, t_freqs, b_freqs), axis=1).T
53
+ return freqs
54
+
55
+ ############################################################
56
 
57
+ def create_midi(freq, write_path='./midi_track.mid', ticks_per_beat=58,
58
+ tempo=90, save_to_file=True, program=53, channel=0):
59
+
60
+ def freq_to_list(freq):
61
  # List event = (pitch, velocity, time)
62
+ T = freq.shape[0]
63
+ #midi_freqs = np.squeeze(midi_freqs)
64
+ midi_freqs = np.round(69 + 12*np.log2(freq/440)).squeeze().astype('int')
65
  t_last = 0
66
+ pitch_tm1 = 20
67
  list_event = []
68
  for t in range(T):
69
+ pitch_t = midi_freqs[t]
70
+ if (pitch_t != pitch_tm1):
71
+ velocity = 127
72
+ if(pitch_t == 24):
73
+ pitch_t = 0
74
+ velocity = 0
75
+ t_event = t - t_last
76
+ t_last = t
77
+ list_event.append((pitch_tm1, 0, t_event))
78
+ list_event.append((pitch_t, velocity, 0))
79
+ pitch_tm1 = pitch_t
80
+ list_event.append((pitch_tm1, 0, T - t_last))
 
 
 
 
 
 
 
 
 
81
  return list_event
82
  # Tempo
83
  microseconds_per_beat = mido.bpm2tempo(tempo)
 
89
  # Add a new track with the instrument name to the midi file
90
  track = mid.add_track("Voice Aah")
91
  # transform the matrix in a list of (pitch, velocity, time)
92
+ events = freq_to_list(freq)
93
  #print(events)
94
  # Tempo
95
  track.append(mido.MetaMessage('set_tempo', tempo=microseconds_per_beat))
 
130
 
131
  savepath = './output.mid'
132
 
133
+ bin_matrix = np.array([sop, alto, ten, bass])
134
+ freq_matrix = bin_matrix_to_freq(bin_matrix)
 
 
135
 
136
+ mid_sop = create_midi(freq_matrix[0], save_to_file=False, program=52, channel=0)
137
+ mid_alto = create_midi(freq_matrix[1], save_to_file=False, program=53, channel=1)
138
+ mid_ten = create_midi(freq_matrix[2], save_to_file=False, program=49, channel=2)
139
+ mid_bass = create_midi(freq_matrix[3], save_to_file=False, program=50, channel=3)
140
 
141
  mid_mix = mido.MidiFile()
142
+ mid_mix.ticks_per_beat=mid_sop.ticks_per_beat
143
  mid_mix.tracks = mid_sop.tracks + mid_alto.tracks + mid_ten.tracks + mid_bass.tracks
144
  mid_mix.save(savepath)
145
 
 
248
  output_list.append(p)
249
 
250
  predicted_output = np.hstack(output_list).astype(np.float32)
251
+
252
  return predicted_output
253
 
254
  ############################################################
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
  [tool.poetry]
2
  name = "Choral-Quartets-F0-Extractor"
3
- version = "0.1.2-alpha"
4
  description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
5
  authors = ["André Paiva (Xornotor) <[email protected]>"]
6
  license = "cc"
 
1
  [tool.poetry]
2
  name = "Choral-Quartets-F0-Extractor"
3
+ version = "0.1.3-alpha"
4
  description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
5
  authors = ["André Paiva (Xornotor) <[email protected]>"]
6
  license = "cc"