Spaces:

Xornotor
/

Choral-Quartets-F0-Extractor

Sleeping

App Files Files Community

Xornotor commited on Jul 24, 2023

Commit

993f635

•

1 Parent(s): 3973eb2

v0.1.3-alpha

Browse files

Files changed (4) hide show

app.py +1 -1
app_test.ipynb +3 -62
cqfe_utils.py +39 -36
pyproject.toml +1 -1

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ cqfe_interface = gr.Interface(fn=cqfe,
                               inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
                               outputs=[gr.File(type='file', label='F0 Output Files'),
                                        gr.Plot(label='F0 Estimation Plot')],
-                              title="Choral Quartets F0 Extractor (v0.1.2-alpha)",
                               description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
 cqfe_interface.launch()

                               inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),
                               outputs=[gr.File(type='file', label='F0 Output Files'),
                                        gr.Plot(label='F0 Estimation Plot')],
+                              title="Choral Quartets F0 Extractor (v0.1.3-alpha)",
                               description="An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.")
 cqfe_interface.launch()

app_test.ipynb CHANGED Viewed

@@ -2,68 +2,9 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Running on local URL:  http://127.0.0.1:7860\n",
-      "\n",
-      "To create a public link, set `share=True` in `launch()`.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": []
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2023-07-21 21:02:27.573436: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600\n",
-      "2023-07-21 21:02:28.575934: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1/1 [==============================] - 10s 10s/step\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2023-07-21 21:02:35.800368: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1/1 [==============================] - 0s 369ms/step\n"
-     ]
-    }
-   ],
    "source": [
     "import gradio as gr\n",
     "from cqfe_utils import cqfe\n",
@@ -71,7 +12,7 @@
     "                              inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
     "                              outputs=[gr.File(type='file', label='F0 Output Files'),\n",
     "                                       gr.Plot(label='F0 Estimation Plot')],\n",
-    "                              title=\"Choral Quartets F0 Extractor (v0.1.2-alpha)\",\n",
     "                              description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
     "\n",
     "cqfe_interface.launch()"

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "import gradio as gr\n",
     "from cqfe_utils import cqfe\n",
     "                              inputs=gr.Audio(type='filepath', format='wav', label='Audio Input File'),\n",
     "                              outputs=[gr.File(type='file', label='F0 Output Files'),\n",
     "                                       gr.Plot(label='F0 Estimation Plot')],\n",
+    "                              title=\"Choral Quartets F0 Extractor (v0.1.3-alpha)\",\n",
     "                              description=\"An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass). The processing may take a few minutes.\")\n",
     "\n",
     "cqfe_interface.launch()"

cqfe_utils.py CHANGED Viewed

@@ -43,37 +43,41 @@ def downsample_bins(voice):
 ############################################################
-def create_midi(pr, write_path='./midi_track.mid', ticks_per_beat=58,
-                tempo=90, save_to_file=True, program=53, channel=0):
-    def pr_to_list(pr):
         # List event = (pitch, velocity, time)
-        T, N = pr.shape
         t_last = 0
-        pr_tm1 = np.zeros(N)
         list_event = []
         for t in range(T):
-            pr_t = pr[t]
-            mask = (pr_t != pr_tm1)
-            if(N == 360): range_step = 5
-            else: range_step = 1
-            if (mask).any():
-                for n in range(0, N):
-                    if mask[n]:
-                        if(N <= 72):
-                            pitch = 25 + n
-                        else:
-                            pitch = 24 + round(n/5)
-                        if int(pr_t[n] * 127) >= 50:
-                            velocity = 127
-                        else:
-                            velocity = 0
-                        # Time is incremented since last event
-                        t_event = t - t_last
-                        t_last = t
-                        list_event.append((pitch, velocity, t_event))
-            pr_tm1 = pr_t
-        list_event.append((0, 0, T - t_last))
         return list_event
     # Tempo
     microseconds_per_beat = mido.bpm2tempo(tempo)
@@ -85,7 +89,7 @@ def create_midi(pr, write_path='./midi_track.mid', ticks_per_beat=58,
     # Add a new track with the instrument name to the midi file
     track = mid.add_track("Voice Aah")
     # transform the matrix in a list of (pitch, velocity, time)
-    events = pr_to_list(pr)
     #print(events)
     # Tempo
     track.append(mido.MetaMessage('set_tempo', tempo=microseconds_per_beat))
@@ -126,18 +130,16 @@ def song_to_midi(sop, alto, ten, bass):
     savepath = './output.mid'
-    down_sop = downsample_bins(sop.T)
-    down_alto = downsample_bins(alto.T)
-    down_ten = downsample_bins(ten.T)
-    down_bass = downsample_bins(bass.T)
-    mid_sop = create_midi(down_sop, save_to_file=False, program=52, channel=0)
-    mid_alto = create_midi(down_alto, save_to_file=False, program=53, channel=1)
-    mid_ten = create_midi(down_ten, save_to_file=False, program=49, channel=2)
-    mid_bass = create_midi(down_bass, save_to_file=False, program=50, channel=3)
     mid_mix = mido.MidiFile()
-    mid_mix.ticks_per_beat = mid_sop.ticks_per_beat
     mid_mix.tracks = mid_sop.tracks + mid_alto.tracks + mid_ten.tracks + mid_bass.tracks
     mid_mix.save(savepath)
@@ -246,6 +248,7 @@ def get_mpe_prediction(model, audio_file=None):
         output_list.append(p)
     predicted_output = np.hstack(output_list).astype(np.float32)
     return predicted_output
 ############################################################

 ############################################################
+def bin_matrix_to_freq(matrix):
+    s_freqs = vec_bin_to_freq(np.argmax(matrix[0], axis=0)).reshape(-1, 1)
+    a_freqs = vec_bin_to_freq(np.argmax(matrix[1], axis=0)).reshape(-1, 1)
+    t_freqs = vec_bin_to_freq(np.argmax(matrix[2], axis=0)).reshape(-1, 1)
+    b_freqs = vec_bin_to_freq(np.argmax(matrix[3], axis=0)).reshape(-1, 1)
+    freqs = np.concatenate((s_freqs, a_freqs, t_freqs, b_freqs), axis=1).T
+    return freqs
+############################################################
+def create_midi(freq, write_path='./midi_track.mid', ticks_per_beat=58,
+                tempo=90, save_to_file=True, program=53, channel=0):
+    def freq_to_list(freq):
         # List event = (pitch, velocity, time)
+        T = freq.shape[0]
+        #midi_freqs = np.squeeze(midi_freqs)
+        midi_freqs = np.round(69 + 12*np.log2(freq/440)).squeeze().astype('int')
         t_last = 0
+        pitch_tm1 = 20
         list_event = []
         for t in range(T):
+            pitch_t = midi_freqs[t]
+            if (pitch_t != pitch_tm1):
+                velocity = 127
+                if(pitch_t == 24):
+                    pitch_t = 0
+                    velocity = 0
+                t_event = t - t_last
+                t_last = t
+                list_event.append((pitch_tm1, 0, t_event))
+                list_event.append((pitch_t, velocity, 0))
+            pitch_tm1 = pitch_t
+        list_event.append((pitch_tm1, 0, T - t_last))
         return list_event
     # Tempo
     microseconds_per_beat = mido.bpm2tempo(tempo)
     # Add a new track with the instrument name to the midi file
     track = mid.add_track("Voice Aah")
     # transform the matrix in a list of (pitch, velocity, time)
+    events = freq_to_list(freq)
     #print(events)
     # Tempo
     track.append(mido.MetaMessage('set_tempo', tempo=microseconds_per_beat))
     savepath = './output.mid'
+    bin_matrix = np.array([sop, alto, ten, bass])
+    freq_matrix = bin_matrix_to_freq(bin_matrix)
+    mid_sop = create_midi(freq_matrix[0], save_to_file=False, program=52, channel=0)
+    mid_alto = create_midi(freq_matrix[1], save_to_file=False, program=53, channel=1)
+    mid_ten = create_midi(freq_matrix[2], save_to_file=False, program=49, channel=2)
+    mid_bass = create_midi(freq_matrix[3], save_to_file=False, program=50, channel=3)
     mid_mix = mido.MidiFile()
+    mid_mix.ticks_per_beat=mid_sop.ticks_per_beat
     mid_mix.tracks = mid_sop.tracks + mid_alto.tracks + mid_ten.tracks + mid_bass.tracks
     mid_mix.save(savepath)
         output_list.append(p)
     predicted_output = np.hstack(output_list).astype(np.float32)
     return predicted_output
 ############################################################

pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "Choral-Quartets-F0-Extractor"
-version = "0.1.2-alpha"
 description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
 authors = ["André Paiva (Xornotor) <[email protected]>"]
 license = "cc"

 [tool.poetry]
 name = "Choral-Quartets-F0-Extractor"
+version = "0.1.3-alpha"
 description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform audio files with Choral Quartets recordings into files (CSV, HDF5 and MIDI) containing F0 estimations for each voice (Soprano, Alto, Tenor and Bass)."
 authors = ["André Paiva (Xornotor) <[email protected]>"]
 license = "cc"