Xornotor commited on
Commit
9c5c602
1 Parent(s): b650d31

Update: CSV, HDF5 and MIDI outputs

Browse files
app.py CHANGED
@@ -1,9 +1,9 @@
1
  import gradio as gr
2
- import cq2m_utils
3
- cq2m_interface = gr.Interface(fn=cq2m_utils.cq2m,
4
- inputs=gr.Audio(type='filepath', format='wav'),
5
- outputs=gr.File(type='file'),
6
- title="Choral Quartets to Midi",
7
- description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform .WAV files with Choral Quartets recordings into MIDI files, with a separate track for each voice.")
8
 
9
- cq2m_interface.launch()
 
1
  import gradio as gr
2
+ from cqfe_utils import cqfe
3
+ cqfe_interface = gr.Interface(fn=cqfe,
4
+ inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
5
+ outputs=gr.File(type='file', label='F0 Output Files'),
6
+ title="Choral Quartets F0 Extractor",
7
+ description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
8
 
9
+ cqfe_interface.launch()
app_test.ipynb CHANGED
@@ -7,52 +7,22 @@
7
  "outputs": [],
8
  "source": [
9
  "import gradio as gr\n",
10
- "import cq2m_utils"
11
  ]
12
  },
13
  {
14
  "cell_type": "code",
15
- "execution_count": 2,
16
  "metadata": {},
17
- "outputs": [
18
- {
19
- "name": "stdout",
20
- "output_type": "stream",
21
- "text": [
22
- "Running on local URL: http://127.0.0.1:7860\n",
23
- "\n",
24
- "To create a public link, set `share=True` in `launch()`.\n"
25
- ]
26
- },
27
- {
28
- "data": {
29
- "text/html": [
30
- "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
31
- ],
32
- "text/plain": [
33
- "<IPython.core.display.HTML object>"
34
- ]
35
- },
36
- "metadata": {},
37
- "output_type": "display_data"
38
- },
39
- {
40
- "data": {
41
- "text/plain": []
42
- },
43
- "execution_count": 2,
44
- "metadata": {},
45
- "output_type": "execute_result"
46
- }
47
- ],
48
  "source": [
49
- "cq2m_interface = gr.Interface(fn=cq2m_utils.cq2m,\n",
50
- " inputs=gr.Audio(type='filepath', format='wav'),\n",
51
- " outputs=gr.File(type='file'),\n",
52
- " title=\"Choral Quartets to Midi\",\n",
53
- " description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform .WAV files with Choral Quartets recordings into MIDI files, with a separate track for each voice.\")\n",
54
  "\n",
55
- "cq2m_interface.launch()"
56
  ]
57
  }
58
  ],
 
7
  "outputs": [],
8
  "source": [
9
  "import gradio as gr\n",
10
+ "from cqfe_utils import cqfe\n"
11
  ]
12
  },
13
  {
14
  "cell_type": "code",
15
+ "execution_count": null,
16
  "metadata": {},
17
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  "source": [
19
+ "cqfe_interface = gr.Interface(fn=cqfe,\n",
20
+ " inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
21
+ " outputs=gr.File(type='file', label='F0 Output Files'),\n",
22
+ " title=\"Choral Quartets F0 Extractor\",\n",
23
+ " description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
24
  "\n",
25
+ "cqfe_interface.launch()"
26
  ]
27
  }
28
  ],
cq2m_models.py → cqfe_models.py RENAMED
File without changes
cq2m_utils.py → cqfe_utils.py RENAMED
@@ -2,9 +2,20 @@ import os
2
  import math
3
  import mido
4
  import pumpp
 
5
  import numpy as np
 
6
  from scipy.ndimage import gaussian_filter1d
7
- from cq2m_models import mask_voas_cnn_model, late_deep_cnn_model
 
 
 
 
 
 
 
 
 
8
 
9
  ############################################################
10
 
@@ -134,6 +145,28 @@ def song_to_midi(sop, alto, ten, bass):
134
 
135
  ############################################################
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  def prediction_postproc(input_array, argmax_and_threshold=True, gaussian_blur=True):
138
  prediction = np.moveaxis(input_array, 0, 1).reshape(360, -1)
139
  if(argmax_and_threshold):
@@ -190,7 +223,7 @@ def get_mpe_prediction(model, audio_file=None):
190
  Part of this function is part of deepsalience
191
  """
192
 
193
- split_value = 2500
194
 
195
  if audio_file is not None:
196
 
@@ -250,10 +283,11 @@ def get_va_prediction(model, f0_matrix):
250
 
251
  ############################################################
252
 
253
- def cq2m(audiofile, mpe=late_deep_cnn_model(), va=mask_voas_cnn_model()):
254
  mpe_pred = get_mpe_prediction(mpe, audiofile)
255
  s_pred, a_pred, t_pred, b_pred = get_va_prediction(va, mpe_pred)
256
- midi = song_to_midi(s_pred, a_pred, t_pred, b_pred)
257
- return midi
 
258
 
259
  ############################################################
 
2
  import math
3
  import mido
4
  import pumpp
5
+ import librosa
6
  import numpy as np
7
+ import pandas as pd
8
  from scipy.ndimage import gaussian_filter1d
9
+ from cqfe_models import mask_voas_cnn_v2_model, late_deep_cnn_model
10
+
11
+ ############################################################
12
+
13
+ freqscale = librosa.cqt_frequencies(n_bins=360, fmin=32.7, bins_per_octave=60)
14
+
15
+ def bin_to_freq(bin):
16
+ return freqscale[bin]
17
+
18
+ vec_bin_to_freq = np.vectorize(bin_to_freq)
19
 
20
  ############################################################
21
 
 
145
 
146
  ############################################################
147
 
148
+ def song_to_tables(sop, alto, ten, bass):
149
+
150
+ savepath_csv = './output.csv'
151
+ savepath_hdf5 = './output.hdf5'
152
+ timescale = np.arange(0, 0.011609977 * (sop.shape[1]), 0.011609977)[:sop.shape[1]]
153
+
154
+ s_argmax = vec_bin_to_freq(np.argmax(sop, axis=0))
155
+ a_argmax = vec_bin_to_freq(np.argmax(alto, axis=0))
156
+ t_argmax = vec_bin_to_freq(np.argmax(ten, axis=0))
157
+ b_argmax = vec_bin_to_freq(np.argmax(bass, axis=0))
158
+
159
+ data = np.array([timescale, s_argmax, a_argmax, t_argmax, b_argmax], dtype=np.float32).T
160
+ columns = ['Timestep', 'Soprano', 'Alto', 'Tenor', 'Bass']
161
+
162
+ df = pd.DataFrame(data, columns=columns)
163
+ df.to_csv(savepath_csv, mode='w', header=True)
164
+ df.to_hdf(savepath_hdf5, key='F0', mode='w', complevel=9, complib='blosc', append=False, format='table')
165
+
166
+ return savepath_csv, savepath_hdf5
167
+
168
+ ############################################################
169
+
170
  def prediction_postproc(input_array, argmax_and_threshold=True, gaussian_blur=True):
171
  prediction = np.moveaxis(input_array, 0, 1).reshape(360, -1)
172
  if(argmax_and_threshold):
 
223
  Part of this function is part of deepsalience
224
  """
225
 
226
+ split_value = 4000
227
 
228
  if audio_file is not None:
229
 
 
283
 
284
  ############################################################
285
 
286
+ def cqfe(audiofile, mpe=late_deep_cnn_model(), va=mask_voas_cnn_v2_model()):
287
  mpe_pred = get_mpe_prediction(mpe, audiofile)
288
  s_pred, a_pred, t_pred, b_pred = get_va_prediction(va, mpe_pred)
289
+ output_midi = song_to_midi(s_pred, a_pred, t_pred, b_pred)
290
+ output_csv, output_hdf5 = song_to_tables(s_pred, a_pred, t_pred, b_pred)
291
+ return [output_midi, output_csv, output_hdf5]
292
 
293
  ############################################################
pyproject.toml CHANGED
@@ -1,11 +1,11 @@
1
  [tool.poetry]
2
- name = "choral-quartets-to-midi"
3
- version = "0.1.0"
4
- description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform .WAV files with Choral Quartets recordings into MIDI files, with a separate track for each voice. Based on Late/DeepCNN by Helena Cuesta and MaskVoasCNN by André Paiva."
5
  authors = ["André Paiva (Xornotor) <[email protected]>"]
6
  license = "cc"
7
  readme = "README.md"
8
- packages = [{include = "choral_quartets_to_midi"}]
9
 
10
  [tool.poetry.dependencies]
11
  python = "^3.11"
@@ -16,6 +16,8 @@ mido
16
  pumpp
17
  numpy
18
  scipy
 
 
19
 
20
 
21
  [build-system]
 
1
  [tool.poetry]
2
+ name = "Choral-Quartets-F0-Extractor"
3
+ version = "0.1.1"
4
+ description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
5
  authors = ["André Paiva (Xornotor) <[email protected]>"]
6
  license = "cc"
7
  readme = "README.md"
8
+ packages = [{include = "Choral_Quartets_F0_Extractor"}]
9
 
10
  [tool.poetry.dependencies]
11
  python = "^3.11"
 
16
  pumpp
17
  numpy
18
  scipy
19
+ pandas
20
+ librosa
21
 
22
 
23
  [build-system]
requirements.txt CHANGED
@@ -4,4 +4,6 @@ typing-extensions
4
  mido
5
  pumpp
6
  numpy
7
- scipy
 
 
 
4
  mido
5
  pumpp
6
  numpy
7
+ scipy
8
+ pandas
9
+ librosa