Spaces:

Xornotor
/

Choral-Quartets-F0-Extractor

Sleeping

App Files Files Community

Xornotor commited on Jul 21, 2023

Commit

2b8a8b8

1 Parent(s): e87882e

Initial files for application

Browse files

Files changed (8) hide show

.gitignore +1 -0
Checkpoints/.gitattributes +35 -0
Checkpoints/mask_voas.keras +0 -0
Checkpoints/mask_voas_v2.keras +0 -0
app_test.ipynb +37 -0
cq2m_models.py +321 -0
cq2m_utils.py +257 -0
pyproject.toml +23 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__/*

Checkpoints/.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

Checkpoints/mask_voas.keras ADDED Viewed

Binary file (856 kB). View file

Checkpoints/mask_voas_v2.keras ADDED Viewed

Binary file (661 kB). View file

app_test.ipynb ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gradio as gr\n",
+    "import cq2m_utils\n",
+    "#midi = cq2m_utils.cq2m(audiofile)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "tf",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

cq2m_models.py ADDED Viewed

	@@ -0,0 +1,321 @@

+import tensorflow as tf
+from tensorflow.keras import Model
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.losses import BinaryCrossentropy, Reduction
+from tensorflow.keras.layers import Input, Resizing, Conv2D, BatchNormalization, Multiply, Lambda, Concatenate
+import tensorflow.keras.backend as K
+EPOCHS = 10
+TRAINING_DTYPE = tf.float16
+SPLIT_SIZE = 256
+BATCH_SIZE = 24
+LEARNING_RATE = 5e-3
+RESIZING_FILTER = 'bilinear'
+############################################################
+def mask_voas_cnn_model(l_rate = LEARNING_RATE):
+    x_in = Input(shape=(360, SPLIT_SIZE, 1))
+    x = Resizing(90, int(SPLIT_SIZE/2), RESIZING_FILTER,
+                 name="downscale")(x_in)
+    x = BatchNormalization()(x)
+    x = Conv2D(filters=32, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv1")(x)
+    x = BatchNormalization()(x)
+    x = Conv2D(filters=32, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv2")(x)
+    x = BatchNormalization()(x)
+    x = Conv2D(filters=16, kernel_size=(70, 3), padding="same",
+        activation="relu", name="conv_harm_1")(x)
+    x = BatchNormalization()(x)
+    x = Conv2D(filters=16, kernel_size=(70, 3), padding="same",
+        activation="relu", name="conv_harm_2")(x)
+    x = BatchNormalization()(x)
+    ## "masking" original input with trained data
+    x = Resizing(360, SPLIT_SIZE, RESIZING_FILTER,
+                 name="upscale")(x)
+    x = Multiply(name="multiply_mask")([x, x_in])
+    ## start four branches now
+    ## branch 1
+    x1a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv1a")(x)
+    x1a = BatchNormalization()(x1a)
+    x1b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv1b")(x1a)
+    ## branch 2
+    x2a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv2a")(x)
+    x2a = BatchNormalization()(x2a)
+    x2b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv2b")(x2a)
+    ## branch 3
+    x3a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv3a")(x)
+    x3a = BatchNormalization()(x3a)
+    x3b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv3b")(x3a)
+    x4a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv4a")(x)
+    x4a = BatchNormalization()(x4a)
+    x4b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv4b"
+    )(x4a)
+    y1 = Conv2D(filters=1, kernel_size=1, name='conv_soprano',
+                padding='same', activation='sigmoid')(x1b)
+    y1 = tf.squeeze(y1, axis=-1, name='sop')
+    y2 = Conv2D(filters=1, kernel_size=1, name='conv_alto',
+                padding='same', activation='sigmoid')(x2b)
+    y2 = tf.squeeze(y2, axis=-1, name='alt')
+    y3 = Conv2D(filters=1, kernel_size=1, name='conv_tenor',
+                padding='same', activation='sigmoid')(x3b)
+    y3 = tf.squeeze(y3, axis=-1, name='ten')
+    y4 = Conv2D(filters=1, kernel_size=1, name='conv_bass',
+                padding='same', activation='sigmoid')(x4b)
+    y4 = tf.squeeze(y4, axis=-1, name='bas')
+    out = [y1, y2, y3, y4]
+    model = Model(inputs=x_in, outputs=out, name='MaskVoasCNN')
+    model.compile(optimizer=Adam(learning_rate=l_rate),
+                 loss=BinaryCrossentropy(reduction=Reduction.SUM_OVER_BATCH_SIZE))
+    model.load_weights('./Checkpoints/mask_voas.keras')
+    return model
+############################################################
+def mask_voas_cnn_v2_model(l_rate = LEARNING_RATE):
+    x_in = Input(shape=(360, SPLIT_SIZE, 1))
+    x = Resizing(90, int(SPLIT_SIZE/2), RESIZING_FILTER,
+                 name="downscale")(x_in)
+    x = BatchNormalization()(x)
+    x = Conv2D(filters=32, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv1")(x)
+    x = BatchNormalization()(x)
+    x = Conv2D(filters=32, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv2")(x)
+    x = BatchNormalization()(x)
+    x = Conv2D(filters=16, kernel_size=(48, 3), padding="same",
+        activation="relu", name="conv_harm_1")(x)
+    x = BatchNormalization()(x)
+    x = Conv2D(filters=16, kernel_size=(48, 3), padding="same",
+        activation="relu", name="conv_harm_2")(x)
+    x = BatchNormalization()(x)
+    x = Conv2D(filters=16, kernel_size=1, padding="same",
+        activation="sigmoid", name="conv_sigmoid_before_mask")(x)
+    ## "masking" original input with trained data
+    x = Resizing(360, SPLIT_SIZE, RESIZING_FILTER,
+                 name="upscale")(x)
+    x = Multiply(name="multiply_mask")([x, x_in])
+    x = BatchNormalization()(x)
+    ## start four branches now
+    ## branch 1
+    x1a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv1a")(x)
+    x1a = BatchNormalization()(x1a)
+    x1b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv1b")(x1a)
+    ## branch 2
+    x2a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv2a")(x)
+    x2a = BatchNormalization()(x2a)
+    x2b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv2b")(x2a)
+    ## branch 3
+    x3a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv3a")(x)
+    x3a = BatchNormalization()(x3a)
+    x3b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv3b")(x3a)
+    x4a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv4a")(x)
+    x4a = BatchNormalization()(x4a)
+    x4b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
+        activation="relu", name="conv4b"
+    )(x4a)
+    y1 = Conv2D(filters=1, kernel_size=1, name='conv_soprano',
+                padding='same', activation='sigmoid')(x1b)
+    y1 = tf.squeeze(y1, axis=-1, name='sop')
+    y2 = Conv2D(filters=1, kernel_size=1, name='conv_alto',
+                padding='same', activation='sigmoid')(x2b)
+    y2 = tf.squeeze(y2, axis=-1, name='alt')
+    y3 = Conv2D(filters=1, kernel_size=1, name='conv_tenor',
+                padding='same', activation='sigmoid')(x3b)
+    y3 = tf.squeeze(y3, axis=-1, name='ten')
+    y4 = Conv2D(filters=1, kernel_size=1, name='conv_bass',
+                padding='same', activation='sigmoid')(x4b)
+    y4 = tf.squeeze(y4, axis=-1, name='bas')
+    out = [y1, y2, y3, y4]
+    model = Model(inputs=x_in, outputs=out, name='MaskVoasCNNv2')
+    model.compile(optimizer=Adam(learning_rate=l_rate),
+                 loss=BinaryCrossentropy(reduction=Reduction.SUM_OVER_BATCH_SIZE))
+    model.load_weights('./Checkpoints/mask_voas_v2.keras')
+    return model
+############################################################
+def __base_model(input, let):
+    b1 = BatchNormalization()(input)
+    # conv1
+    y1 = Conv2D(16, (5, 5), padding='same', activation='relu', name='conv1{}'.format(let))(b1)
+    y1a = BatchNormalization()(y1)
+    # conv2
+    y2 = Conv2D(32, (5, 5), padding='same', activation='relu', name='conv2{}'.format(let))(y1a)
+    y2a = BatchNormalization()(y2)
+    # conv3
+    y3 = Conv2D(32, (5, 5), padding='same', activation='relu', name='conv3{}'.format(let))(y2a)
+    y3a = BatchNormalization()(y3)
+    # conv4 layer
+    y4 = Conv2D(32, (5, 5), padding='same', activation='relu', name='conv4{}'.format(let))(y3a)
+    y4a = BatchNormalization()(y4)
+    # conv5 layer, harm1
+    y5 = Conv2D(32, (70, 3), padding='same', activation='relu', name='harm1{}'.format(let))(y4a)
+    y5a = BatchNormalization()(y5)
+    # conv6 layer, harm2
+    y6 = Conv2D(32, (70, 3), padding='same', activation='relu', name='harm2{}'.format(let))(y5a)
+    y6a = BatchNormalization()(y6)
+    return y6a, input
+def late_deep_cnn_model():
+    '''Late/Deep
+    '''
+    input_shape_1 = (None, None, 5) # HCQT input shape
+    input_shape_2 = (None, None, 5)  # phase differentials input shape
+    inputs1 = Input(shape=input_shape_1)
+    inputs2 = Input(shape=input_shape_2)
+    y6a, _ = __base_model(inputs1, 'a')
+    y6b, _ = __base_model(inputs2, 'b')
+    # concatenate features
+    y6c = Concatenate()([y6a, y6b])
+    # conv7 layer
+    y7 = Conv2D(64, (3, 3), padding='same', activation='relu', name='conv7')(y6c)
+    y7a = BatchNormalization()(y7)
+    # conv8 layer
+    y8 = Conv2D(64, (3, 3), padding='same', activation='relu', name='conv8')(y7a)
+    y8a = BatchNormalization()(y8)
+    y9 = Conv2D(8, (360, 1), padding='same', activation='relu', name='distribution')(y8a)
+    y9a = BatchNormalization()(y9)
+    y10 = Conv2D(1, (1, 1), padding='same', activation='sigmoid', name='squishy')(y9a)
+    predictions = Lambda(lambda x: K.squeeze(x, axis=3))(y10)
+    model = Model(inputs=[inputs1, inputs2], outputs=predictions)
+    model.compile(
+        loss=__bkld, metrics=['mse', __soft_binary_accuracy],
+        optimizer='adam'
+    )
+    model.load_weights('./Checkpoints/exp3multif0.pkl')
+    return model
+############################################################
+def __bkld(y_true, y_pred):
+    """Brian's KL Divergence implementation
+    """
+    y_true = K.clip(y_true, K.epsilon(), 1.0 - K.epsilon())
+    y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
+    return K.mean(K.mean(
+        -1.0*y_true* K.log(y_pred) - (1.0 - y_true) * K.log(1.0 - y_pred),
+        axis=-1), axis=-1)
+############################################################
+def __soft_binary_accuracy(y_true, y_pred):
+    """Binary accuracy that works when inputs are probabilities
+    """
+    return K.mean(K.mean(
+        K.equal(K.round(y_true), K.round(y_pred)), axis=-1), axis=-1)
+############################################################

cq2m_utils.py ADDED Viewed

	@@ -0,0 +1,257 @@

+import os
+import math
+import mido
+import pumpp
+import numpy as np
+from scipy.ndimage import gaussian_filter1d
+from cq2m_models import mask_voas_cnn_model, late_deep_cnn_model
+############################################################
+def downsample_bins(voice):
+    voice_0 = np.array(voice.T[0::5]).T
+    voice_1 = np.array(voice.T[1::5]).T
+    voice_2 = np.array(voice.T[2::5]).T
+    voice_3 = np.array(voice.T[3::5]).T
+    voice_4 = np.array(voice.T[4::5]).T
+    voice_0 = voice_0.T[1:70].T
+    voice_1 = voice_1.T[1:70].T
+    voice_2 = voice_2.T[1:70].T
+    voice_3 = voice_3.T[0:69].T
+    voice_4 = voice_4.T[0:69].T
+    voice_sums = voice_0 + voice_1 + voice_2 + voice_3 + voice_4
+    voice_argmax = np.argmax(voice_sums, axis=1)
+    threshold = np.zeros(voice_sums.shape)
+    threshold[np.arange(voice_argmax.size), voice_argmax] = 1
+    threshold[:, 0] = 0
+    voice_sums = threshold
+    return voice_sums
+############################################################
+def create_midi(pr, write_path='./midi_track.mid', ticks_per_beat=58,
+                tempo=90, save_to_file=True, program=53, channel=0):
+    def pr_to_list(pr):
+        # List event = (pitch, velocity, time)
+        T, N = pr.shape
+        t_last = 0
+        pr_tm1 = np.zeros(N)
+        list_event = []
+        for t in range(T):
+            pr_t = pr[t]
+            mask = (pr_t != pr_tm1)
+            if(N == 360): range_step = 5
+            else: range_step = 1
+            if (mask).any():
+                for n in range(0, N):
+                    if mask[n]:
+                        if(N <= 72):
+                            pitch = 25 + n
+                        else:
+                            pitch = 24 + round(n/5)
+                        if int(pr_t[n] * 127) >= 50:
+                            velocity = 127
+                        else:
+                            velocity = 0
+                        # Time is incremented since last event
+                        t_event = t - t_last
+                        t_last = t
+                        list_event.append((pitch, velocity, t_event))
+            pr_tm1 = pr_t
+        list_event.append((0, 0, T - t_last))
+        return list_event
+    # Tempo
+    microseconds_per_beat = mido.bpm2tempo(tempo)
+    # Write a pianoroll in a midi file
+    mid = mido.MidiFile()
+    mid.ticks_per_beat = ticks_per_beat
+    # Add a new track with the instrument name to the midi file
+    track = mid.add_track("Voice Aah")
+    # transform the matrix in a list of (pitch, velocity, time)
+    events = pr_to_list(pr)
+    #print(events)
+    # Tempo
+    track.append(mido.MetaMessage('set_tempo', tempo=microseconds_per_beat))
+    track.append(mido.MetaMessage('channel_prefix', channel=channel))
+    # Add the program_change
+    #Choir Aahs = 53, Voice Oohs (or Doos) = 54, Synch Choir = 55
+    track.append(mido.Message('program_change', program=program, channel=channel))
+    # This list is required to shut down
+    # notes that are on, intensity modified, then off only 1 time
+    # Example :
+    # (60,20,0)
+    # (60,40,10)
+    # (60,0,15)
+    notes_on_list = []
+    # Write events in the midi file
+    for event in events:
+        pitch, velocity, time = event
+        if velocity == 0:
+            # Get the channel
+            track.append(mido.Message('note_off', note=pitch, velocity=0, time=time, channel=channel))
+            if(pitch in notes_on_list):
+                notes_on_list.remove(pitch)
+        else:
+            if pitch in notes_on_list:
+                track.append(mido.Message('note_off', note=pitch, velocity=0, time=time, channel=channel))
+                notes_on_list.remove(pitch)
+                time = 0
+            track.append(mido.Message('note_on', note=pitch, velocity=velocity, time=time, channel=channel))
+            notes_on_list.append(pitch)
+    if save_to_file:
+        mid.save(write_path)
+    return mid
+############################################################
+def song_to_midi(sop, alto, ten, bass):
+    down_sop = downsample_bins(sop.T)
+    down_alto = downsample_bins(alto.T)
+    down_ten = downsample_bins(ten.T)
+    down_bass = downsample_bins(bass.T)
+    mid_sop = create_midi(down_sop, save_to_file=False, program=52, channel=0)
+    mid_alto = create_midi(down_alto, save_to_file=False, program=53, channel=1)
+    mid_ten = create_midi(down_ten, save_to_file=False, program=49, channel=2)
+    mid_bass = create_midi(down_bass, save_to_file=False, program=50, channel=3)
+    mid_mix = mido.MidiFile()
+    mid_mix.ticks_per_beat = mid_sop.ticks_per_beat
+    mid_mix.tracks = mid_sop.tracks + mid_alto.tracks + mid_ten.tracks + mid_bass.tracks
+    mid_mix.save('./result.mid')
+    return mid_mix
+############################################################
+def prediction_postproc(input_array, argmax_and_threshold=True, gaussian_blur=True):
+    prediction = np.moveaxis(input_array, 0, 1).reshape(360, -1)
+    if(argmax_and_threshold):
+        prediction = np.argmax(prediction, axis=0)
+        prediction = np.array([i if i <= 357 else 0 for i in prediction])
+        threshold = np.zeros((360, prediction.shape[0]))
+        threshold[prediction, np.arange(prediction.size)] = 1
+        prediction = threshold
+    if(gaussian_blur):
+        prediction = np.array(gaussian_filter1d(prediction, 1, axis=0, mode='wrap'))
+        prediction = (prediction - np.min(prediction))/(np.max(prediction)-np.min(prediction))
+    return prediction
+############################################################
+def get_hcqt_params():
+    bins_per_octave = 60
+    n_octaves = 6
+    over_sample = 5
+    harmonics = [1, 2, 3, 4, 5]
+    sr = 22050
+    fmin = 32.7
+    hop_length = 256
+    return bins_per_octave, n_octaves, harmonics, sr, fmin, hop_length, over_sample
+############################################################
+def create_pump_object():
+    (bins_per_octave, n_octaves, harmonics,
+     sr, f_min, hop_length, over_sample) = get_hcqt_params()
+    p_phdif = pumpp.feature.HCQTPhaseDiff(name='dphase', sr=sr, hop_length=hop_length,
+                                   fmin=f_min, n_octaves=n_octaves, over_sample=over_sample, harmonics=harmonics, log=True)
+    pump = pumpp.Pump(p_phdif)
+    return pump
+############################################################
+def compute_pump_features(pump, audio_fpath):
+    data = pump(audio_f=audio_fpath)
+    return data
+############################################################
+def get_mpe_prediction(model, audio_file=None):
+    """Generate output from a model given an input numpy file.
+       Part of this function is part of deepsalience
+    """
+    split_value = 2500
+    if audio_file is not None:
+        pump = create_pump_object()
+        features = compute_pump_features(pump, audio_file)
+        input_hcqt = features['dphase/mag'][0]
+        input_dphase = features['dphase/dphase'][0]
+    else:
+        raise ValueError("One audio_file must be specified")
+    input_hcqt = input_hcqt.transpose(1, 2, 0)[np.newaxis, :, :, :]
+    input_dphase = input_dphase.transpose(1, 2, 0)[np.newaxis, :, :, :]
+    n_t = input_hcqt.shape[3]
+    t_slices = list(np.arange(0, n_t, split_value))
+    output_list = []
+    for t in t_slices:
+        p = model.predict([np.transpose(input_hcqt[:, :, :, t:t+split_value], (0, 1, 3, 2)),
+                           np.transpose(input_dphase[:, :, :, t:t+split_value], (0, 1, 3, 2))]
+                          )[0, :, :]
+        output_list.append(p)
+    predicted_output = np.hstack(output_list).astype(np.float32)
+    return predicted_output
+############################################################
+def get_va_prediction(model, f0_matrix):
+    splits = f0_matrix.shape[1]//256
+    splits_diff = 256 - (f0_matrix.shape[1] - splits * 256)
+    fill = np.zeros((360, splits_diff))
+    mix_filled = np.concatenate((np.copy(f0_matrix), fill), axis=1)
+    mix_filled = np.reshape(mix_filled, (360, -1, 256, 1)).transpose((1, 0, 2, 3))
+    batches = math.ceil(mix_filled.shape[0]/24)
+    s_pred_result = np.zeros((0, 360, 256))
+    a_pred_result = np.zeros((0, 360, 256))
+    t_pred_result = np.zeros((0, 360, 256))
+    b_pred_result = np.zeros((0, 360, 256))
+    for i in range(batches):
+        s_pred, a_pred, t_pred, b_pred = model.predict(mix_filled[i*24:(i+1)*24])
+        s_pred_result = np.append(s_pred_result, s_pred, axis=0)
+        a_pred_result = np.append(a_pred_result, a_pred, axis=0)
+        t_pred_result = np.append(t_pred_result, t_pred, axis=0)
+        b_pred_result = np.append(b_pred_result, b_pred, axis=0)
+    s_pred_result = prediction_postproc(s_pred_result)[:, :f0_matrix.shape[1]]
+    a_pred_result = prediction_postproc(a_pred_result)[:, :f0_matrix.shape[1]]
+    t_pred_result = prediction_postproc(t_pred_result)[:, :f0_matrix.shape[1]]
+    b_pred_result = prediction_postproc(b_pred_result)[:, :f0_matrix.shape[1]]
+    return s_pred_result, a_pred_result, t_pred_result, b_pred_result
+############################################################
+def cq2m(audiofile, mpe=late_deep_cnn_model(), va=mask_voas_cnn_model()):
+    mpe_pred = get_mpe_prediction(mpe, audiofile)
+    s_pred, a_pred, t_pred, b_pred = get_va_prediction(va, mpe_pred)
+    midi = song_to_midi(s_pred, a_pred, t_pred, b_pred)
+    return midi
+############################################################

pyproject.toml ADDED Viewed

	@@ -0,0 +1,23 @@

+[tool.poetry]
+name = "choral-quartets-to-midi"
+version = "0.1.0"
+description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform .WAV files with Choral Quartets recordings into MIDI files, with a separate track for each voice. Based on Late/DeepCNN by Helena Cuesta and MaskVoasCNN by André Paiva."
+authors = ["André Paiva (Xornotor) <[email protected]>"]
+license = "cc"
+readme = "README.md"
+packages = [{include = "choral_quartets_to_midi"}]
+[tool.poetry.dependencies]
+python = "^3.11"
+tensorflow = "2.13.0"
+gradio = "3.37.0"
+typing-extensions = "4.5.0"
+mido = "1.2.10"
+pumpp = "0.6.0"
+numpy = "1.24.3"
+scipy = "1.11.1"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"