Xornotor commited on
Commit
2b8a8b8
1 Parent(s): e87882e

Initial files for application

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/*
Checkpoints/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Checkpoints/mask_voas.keras ADDED
Binary file (856 kB). View file
 
Checkpoints/mask_voas_v2.keras ADDED
Binary file (661 kB). View file
 
app_test.ipynb ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import gradio as gr\n",
10
+ "import cq2m_utils\n",
11
+ "#midi = cq2m_utils.cq2m(audiofile)"
12
+ ]
13
+ }
14
+ ],
15
+ "metadata": {
16
+ "kernelspec": {
17
+ "display_name": "tf",
18
+ "language": "python",
19
+ "name": "python3"
20
+ },
21
+ "language_info": {
22
+ "codemirror_mode": {
23
+ "name": "ipython",
24
+ "version": 3
25
+ },
26
+ "file_extension": ".py",
27
+ "mimetype": "text/x-python",
28
+ "name": "python",
29
+ "nbconvert_exporter": "python",
30
+ "pygments_lexer": "ipython3",
31
+ "version": "3.11.4"
32
+ },
33
+ "orig_nbformat": 4
34
+ },
35
+ "nbformat": 4,
36
+ "nbformat_minor": 2
37
+ }
cq2m_models.py ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from tensorflow.keras import Model
3
+ from tensorflow.keras.optimizers import Adam
4
+ from tensorflow.keras.losses import BinaryCrossentropy, Reduction
5
+ from tensorflow.keras.layers import Input, Resizing, Conv2D, BatchNormalization, Multiply, Lambda, Concatenate
6
+ import tensorflow.keras.backend as K
7
+
8
+ EPOCHS = 10
9
+ TRAINING_DTYPE = tf.float16
10
+ SPLIT_SIZE = 256
11
+ BATCH_SIZE = 24
12
+ LEARNING_RATE = 5e-3
13
+ RESIZING_FILTER = 'bilinear'
14
+
15
+ ############################################################
16
+
17
+ def mask_voas_cnn_model(l_rate = LEARNING_RATE):
18
+ x_in = Input(shape=(360, SPLIT_SIZE, 1))
19
+
20
+ x = Resizing(90, int(SPLIT_SIZE/2), RESIZING_FILTER,
21
+ name="downscale")(x_in)
22
+
23
+ x = BatchNormalization()(x)
24
+
25
+ x = Conv2D(filters=32, kernel_size=(3, 3), padding="same",
26
+ activation="relu", name="conv1")(x)
27
+
28
+ x = BatchNormalization()(x)
29
+
30
+ x = Conv2D(filters=32, kernel_size=(3, 3), padding="same",
31
+ activation="relu", name="conv2")(x)
32
+
33
+ x = BatchNormalization()(x)
34
+
35
+ x = Conv2D(filters=16, kernel_size=(70, 3), padding="same",
36
+ activation="relu", name="conv_harm_1")(x)
37
+
38
+ x = BatchNormalization()(x)
39
+
40
+ x = Conv2D(filters=16, kernel_size=(70, 3), padding="same",
41
+ activation="relu", name="conv_harm_2")(x)
42
+
43
+ x = BatchNormalization()(x)
44
+
45
+ ## "masking" original input with trained data
46
+
47
+ x = Resizing(360, SPLIT_SIZE, RESIZING_FILTER,
48
+ name="upscale")(x)
49
+
50
+ x = Multiply(name="multiply_mask")([x, x_in])
51
+
52
+ ## start four branches now
53
+
54
+ ## branch 1
55
+ x1a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
56
+ activation="relu", name="conv1a")(x)
57
+
58
+ x1a = BatchNormalization()(x1a)
59
+
60
+ x1b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
61
+ activation="relu", name="conv1b")(x1a)
62
+
63
+ ## branch 2
64
+ x2a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
65
+ activation="relu", name="conv2a")(x)
66
+
67
+ x2a = BatchNormalization()(x2a)
68
+
69
+ x2b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
70
+ activation="relu", name="conv2b")(x2a)
71
+
72
+ ## branch 3
73
+
74
+ x3a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
75
+ activation="relu", name="conv3a")(x)
76
+
77
+ x3a = BatchNormalization()(x3a)
78
+
79
+ x3b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
80
+ activation="relu", name="conv3b")(x3a)
81
+
82
+ x4a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
83
+ activation="relu", name="conv4a")(x)
84
+
85
+ x4a = BatchNormalization()(x4a)
86
+
87
+ x4b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
88
+ activation="relu", name="conv4b"
89
+ )(x4a)
90
+
91
+
92
+ y1 = Conv2D(filters=1, kernel_size=1, name='conv_soprano',
93
+ padding='same', activation='sigmoid')(x1b)
94
+ y1 = tf.squeeze(y1, axis=-1, name='sop')
95
+
96
+ y2 = Conv2D(filters=1, kernel_size=1, name='conv_alto',
97
+ padding='same', activation='sigmoid')(x2b)
98
+ y2 = tf.squeeze(y2, axis=-1, name='alt')
99
+
100
+ y3 = Conv2D(filters=1, kernel_size=1, name='conv_tenor',
101
+ padding='same', activation='sigmoid')(x3b)
102
+ y3 = tf.squeeze(y3, axis=-1, name='ten')
103
+
104
+ y4 = Conv2D(filters=1, kernel_size=1, name='conv_bass',
105
+ padding='same', activation='sigmoid')(x4b)
106
+ y4 = tf.squeeze(y4, axis=-1, name='bas')
107
+
108
+ out = [y1, y2, y3, y4]
109
+
110
+ model = Model(inputs=x_in, outputs=out, name='MaskVoasCNN')
111
+
112
+ model.compile(optimizer=Adam(learning_rate=l_rate),
113
+ loss=BinaryCrossentropy(reduction=Reduction.SUM_OVER_BATCH_SIZE))
114
+
115
+ model.load_weights('./Checkpoints/mask_voas.keras')
116
+
117
+ return model
118
+
119
+ ############################################################
120
+
121
+ def mask_voas_cnn_v2_model(l_rate = LEARNING_RATE):
122
+ x_in = Input(shape=(360, SPLIT_SIZE, 1))
123
+
124
+ x = Resizing(90, int(SPLIT_SIZE/2), RESIZING_FILTER,
125
+ name="downscale")(x_in)
126
+
127
+ x = BatchNormalization()(x)
128
+
129
+ x = Conv2D(filters=32, kernel_size=(3, 3), padding="same",
130
+ activation="relu", name="conv1")(x)
131
+
132
+ x = BatchNormalization()(x)
133
+
134
+ x = Conv2D(filters=32, kernel_size=(3, 3), padding="same",
135
+ activation="relu", name="conv2")(x)
136
+
137
+ x = BatchNormalization()(x)
138
+
139
+ x = Conv2D(filters=16, kernel_size=(48, 3), padding="same",
140
+ activation="relu", name="conv_harm_1")(x)
141
+
142
+ x = BatchNormalization()(x)
143
+
144
+ x = Conv2D(filters=16, kernel_size=(48, 3), padding="same",
145
+ activation="relu", name="conv_harm_2")(x)
146
+
147
+ x = BatchNormalization()(x)
148
+
149
+ x = Conv2D(filters=16, kernel_size=1, padding="same",
150
+ activation="sigmoid", name="conv_sigmoid_before_mask")(x)
151
+
152
+ ## "masking" original input with trained data
153
+
154
+ x = Resizing(360, SPLIT_SIZE, RESIZING_FILTER,
155
+ name="upscale")(x)
156
+
157
+ x = Multiply(name="multiply_mask")([x, x_in])
158
+
159
+ x = BatchNormalization()(x)
160
+
161
+ ## start four branches now
162
+
163
+ ## branch 1
164
+ x1a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
165
+ activation="relu", name="conv1a")(x)
166
+
167
+ x1a = BatchNormalization()(x1a)
168
+
169
+ x1b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
170
+ activation="relu", name="conv1b")(x1a)
171
+
172
+ ## branch 2
173
+ x2a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
174
+ activation="relu", name="conv2a")(x)
175
+
176
+ x2a = BatchNormalization()(x2a)
177
+
178
+ x2b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
179
+ activation="relu", name="conv2b")(x2a)
180
+
181
+ ## branch 3
182
+
183
+ x3a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
184
+ activation="relu", name="conv3a")(x)
185
+
186
+ x3a = BatchNormalization()(x3a)
187
+
188
+ x3b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
189
+ activation="relu", name="conv3b")(x3a)
190
+
191
+ x4a = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
192
+ activation="relu", name="conv4a")(x)
193
+
194
+ x4a = BatchNormalization()(x4a)
195
+
196
+ x4b = Conv2D(filters=16, kernel_size=(3, 3), padding="same",
197
+ activation="relu", name="conv4b"
198
+ )(x4a)
199
+
200
+
201
+ y1 = Conv2D(filters=1, kernel_size=1, name='conv_soprano',
202
+ padding='same', activation='sigmoid')(x1b)
203
+ y1 = tf.squeeze(y1, axis=-1, name='sop')
204
+
205
+ y2 = Conv2D(filters=1, kernel_size=1, name='conv_alto',
206
+ padding='same', activation='sigmoid')(x2b)
207
+ y2 = tf.squeeze(y2, axis=-1, name='alt')
208
+
209
+ y3 = Conv2D(filters=1, kernel_size=1, name='conv_tenor',
210
+ padding='same', activation='sigmoid')(x3b)
211
+ y3 = tf.squeeze(y3, axis=-1, name='ten')
212
+
213
+ y4 = Conv2D(filters=1, kernel_size=1, name='conv_bass',
214
+ padding='same', activation='sigmoid')(x4b)
215
+ y4 = tf.squeeze(y4, axis=-1, name='bas')
216
+
217
+ out = [y1, y2, y3, y4]
218
+
219
+ model = Model(inputs=x_in, outputs=out, name='MaskVoasCNNv2')
220
+
221
+ model.compile(optimizer=Adam(learning_rate=l_rate),
222
+ loss=BinaryCrossentropy(reduction=Reduction.SUM_OVER_BATCH_SIZE))
223
+
224
+ model.load_weights('./Checkpoints/mask_voas_v2.keras')
225
+
226
+ return model
227
+
228
+ ############################################################
229
+
230
+ def __base_model(input, let):
231
+
232
+ b1 = BatchNormalization()(input)
233
+
234
+ # conv1
235
+ y1 = Conv2D(16, (5, 5), padding='same', activation='relu', name='conv1{}'.format(let))(b1)
236
+ y1a = BatchNormalization()(y1)
237
+
238
+ # conv2
239
+ y2 = Conv2D(32, (5, 5), padding='same', activation='relu', name='conv2{}'.format(let))(y1a)
240
+ y2a = BatchNormalization()(y2)
241
+
242
+ # conv3
243
+ y3 = Conv2D(32, (5, 5), padding='same', activation='relu', name='conv3{}'.format(let))(y2a)
244
+ y3a = BatchNormalization()(y3)
245
+
246
+ # conv4 layer
247
+ y4 = Conv2D(32, (5, 5), padding='same', activation='relu', name='conv4{}'.format(let))(y3a)
248
+ y4a = BatchNormalization()(y4)
249
+
250
+ # conv5 layer, harm1
251
+ y5 = Conv2D(32, (70, 3), padding='same', activation='relu', name='harm1{}'.format(let))(y4a)
252
+ y5a = BatchNormalization()(y5)
253
+
254
+ # conv6 layer, harm2
255
+ y6 = Conv2D(32, (70, 3), padding='same', activation='relu', name='harm2{}'.format(let))(y5a)
256
+ y6a = BatchNormalization()(y6)
257
+
258
+ return y6a, input
259
+
260
+
261
+ def late_deep_cnn_model():
262
+ '''Late/Deep
263
+ '''
264
+
265
+ input_shape_1 = (None, None, 5) # HCQT input shape
266
+ input_shape_2 = (None, None, 5) # phase differentials input shape
267
+
268
+ inputs1 = Input(shape=input_shape_1)
269
+ inputs2 = Input(shape=input_shape_2)
270
+
271
+ y6a, _ = __base_model(inputs1, 'a')
272
+ y6b, _ = __base_model(inputs2, 'b')
273
+
274
+ # concatenate features
275
+ y6c = Concatenate()([y6a, y6b])
276
+
277
+ # conv7 layer
278
+ y7 = Conv2D(64, (3, 3), padding='same', activation='relu', name='conv7')(y6c)
279
+ y7a = BatchNormalization()(y7)
280
+
281
+ # conv8 layer
282
+ y8 = Conv2D(64, (3, 3), padding='same', activation='relu', name='conv8')(y7a)
283
+ y8a = BatchNormalization()(y8)
284
+
285
+ y9 = Conv2D(8, (360, 1), padding='same', activation='relu', name='distribution')(y8a)
286
+ y9a = BatchNormalization()(y9)
287
+
288
+ y10 = Conv2D(1, (1, 1), padding='same', activation='sigmoid', name='squishy')(y9a)
289
+ predictions = Lambda(lambda x: K.squeeze(x, axis=3))(y10)
290
+
291
+ model = Model(inputs=[inputs1, inputs2], outputs=predictions)
292
+
293
+ model.compile(
294
+ loss=__bkld, metrics=['mse', __soft_binary_accuracy],
295
+ optimizer='adam'
296
+ )
297
+
298
+ model.load_weights('./Checkpoints/exp3multif0.pkl')
299
+
300
+ return model
301
+
302
+ ############################################################
303
+
304
+ def __bkld(y_true, y_pred):
305
+ """Brian's KL Divergence implementation
306
+ """
307
+ y_true = K.clip(y_true, K.epsilon(), 1.0 - K.epsilon())
308
+ y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
309
+ return K.mean(K.mean(
310
+ -1.0*y_true* K.log(y_pred) - (1.0 - y_true) * K.log(1.0 - y_pred),
311
+ axis=-1), axis=-1)
312
+
313
+ ############################################################
314
+
315
+ def __soft_binary_accuracy(y_true, y_pred):
316
+ """Binary accuracy that works when inputs are probabilities
317
+ """
318
+ return K.mean(K.mean(
319
+ K.equal(K.round(y_true), K.round(y_pred)), axis=-1), axis=-1)
320
+
321
+ ############################################################
cq2m_utils.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import math
3
+ import mido
4
+ import pumpp
5
+ import numpy as np
6
+ from scipy.ndimage import gaussian_filter1d
7
+ from cq2m_models import mask_voas_cnn_model, late_deep_cnn_model
8
+
9
+ ############################################################
10
+
11
+ def downsample_bins(voice):
12
+ voice_0 = np.array(voice.T[0::5]).T
13
+ voice_1 = np.array(voice.T[1::5]).T
14
+ voice_2 = np.array(voice.T[2::5]).T
15
+ voice_3 = np.array(voice.T[3::5]).T
16
+ voice_4 = np.array(voice.T[4::5]).T
17
+
18
+ voice_0 = voice_0.T[1:70].T
19
+ voice_1 = voice_1.T[1:70].T
20
+ voice_2 = voice_2.T[1:70].T
21
+ voice_3 = voice_3.T[0:69].T
22
+ voice_4 = voice_4.T[0:69].T
23
+
24
+ voice_sums = voice_0 + voice_1 + voice_2 + voice_3 + voice_4
25
+ voice_argmax = np.argmax(voice_sums, axis=1)
26
+ threshold = np.zeros(voice_sums.shape)
27
+ threshold[np.arange(voice_argmax.size), voice_argmax] = 1
28
+ threshold[:, 0] = 0
29
+ voice_sums = threshold
30
+
31
+ return voice_sums
32
+
33
+ ############################################################
34
+
35
+ def create_midi(pr, write_path='./midi_track.mid', ticks_per_beat=58,
36
+ tempo=90, save_to_file=True, program=53, channel=0):
37
+
38
+ def pr_to_list(pr):
39
+ # List event = (pitch, velocity, time)
40
+ T, N = pr.shape
41
+ t_last = 0
42
+ pr_tm1 = np.zeros(N)
43
+ list_event = []
44
+ for t in range(T):
45
+ pr_t = pr[t]
46
+ mask = (pr_t != pr_tm1)
47
+ if(N == 360): range_step = 5
48
+ else: range_step = 1
49
+ if (mask).any():
50
+ for n in range(0, N):
51
+ if mask[n]:
52
+ if(N <= 72):
53
+ pitch = 25 + n
54
+ else:
55
+ pitch = 24 + round(n/5)
56
+ if int(pr_t[n] * 127) >= 50:
57
+ velocity = 127
58
+ else:
59
+ velocity = 0
60
+ # Time is incremented since last event
61
+ t_event = t - t_last
62
+ t_last = t
63
+ list_event.append((pitch, velocity, t_event))
64
+ pr_tm1 = pr_t
65
+ list_event.append((0, 0, T - t_last))
66
+ return list_event
67
+ # Tempo
68
+ microseconds_per_beat = mido.bpm2tempo(tempo)
69
+ # Write a pianoroll in a midi file
70
+ mid = mido.MidiFile()
71
+ mid.ticks_per_beat = ticks_per_beat
72
+
73
+
74
+ # Add a new track with the instrument name to the midi file
75
+ track = mid.add_track("Voice Aah")
76
+ # transform the matrix in a list of (pitch, velocity, time)
77
+ events = pr_to_list(pr)
78
+ #print(events)
79
+ # Tempo
80
+ track.append(mido.MetaMessage('set_tempo', tempo=microseconds_per_beat))
81
+ track.append(mido.MetaMessage('channel_prefix', channel=channel))
82
+ # Add the program_change
83
+ #Choir Aahs = 53, Voice Oohs (or Doos) = 54, Synch Choir = 55
84
+ track.append(mido.Message('program_change', program=program, channel=channel))
85
+
86
+ # This list is required to shut down
87
+ # notes that are on, intensity modified, then off only 1 time
88
+ # Example :
89
+ # (60,20,0)
90
+ # (60,40,10)
91
+ # (60,0,15)
92
+ notes_on_list = []
93
+ # Write events in the midi file
94
+ for event in events:
95
+ pitch, velocity, time = event
96
+ if velocity == 0:
97
+ # Get the channel
98
+ track.append(mido.Message('note_off', note=pitch, velocity=0, time=time, channel=channel))
99
+ if(pitch in notes_on_list):
100
+ notes_on_list.remove(pitch)
101
+ else:
102
+ if pitch in notes_on_list:
103
+ track.append(mido.Message('note_off', note=pitch, velocity=0, time=time, channel=channel))
104
+ notes_on_list.remove(pitch)
105
+ time = 0
106
+ track.append(mido.Message('note_on', note=pitch, velocity=velocity, time=time, channel=channel))
107
+ notes_on_list.append(pitch)
108
+ if save_to_file:
109
+ mid.save(write_path)
110
+ return mid
111
+
112
+ ############################################################
113
+
114
+ def song_to_midi(sop, alto, ten, bass):
115
+
116
+ down_sop = downsample_bins(sop.T)
117
+ down_alto = downsample_bins(alto.T)
118
+ down_ten = downsample_bins(ten.T)
119
+ down_bass = downsample_bins(bass.T)
120
+
121
+ mid_sop = create_midi(down_sop, save_to_file=False, program=52, channel=0)
122
+ mid_alto = create_midi(down_alto, save_to_file=False, program=53, channel=1)
123
+ mid_ten = create_midi(down_ten, save_to_file=False, program=49, channel=2)
124
+ mid_bass = create_midi(down_bass, save_to_file=False, program=50, channel=3)
125
+
126
+ mid_mix = mido.MidiFile()
127
+ mid_mix.ticks_per_beat = mid_sop.ticks_per_beat
128
+ mid_mix.tracks = mid_sop.tracks + mid_alto.tracks + mid_ten.tracks + mid_bass.tracks
129
+ mid_mix.save('./result.mid')
130
+
131
+ return mid_mix
132
+
133
+ ############################################################
134
+
135
+ def prediction_postproc(input_array, argmax_and_threshold=True, gaussian_blur=True):
136
+ prediction = np.moveaxis(input_array, 0, 1).reshape(360, -1)
137
+ if(argmax_and_threshold):
138
+ prediction = np.argmax(prediction, axis=0)
139
+ prediction = np.array([i if i <= 357 else 0 for i in prediction])
140
+ threshold = np.zeros((360, prediction.shape[0]))
141
+ threshold[prediction, np.arange(prediction.size)] = 1
142
+ prediction = threshold
143
+ if(gaussian_blur):
144
+ prediction = np.array(gaussian_filter1d(prediction, 1, axis=0, mode='wrap'))
145
+ prediction = (prediction - np.min(prediction))/(np.max(prediction)-np.min(prediction))
146
+ return prediction
147
+
148
+ ############################################################
149
+
150
+ def get_hcqt_params():
151
+
152
+ bins_per_octave = 60
153
+ n_octaves = 6
154
+ over_sample = 5
155
+ harmonics = [1, 2, 3, 4, 5]
156
+ sr = 22050
157
+ fmin = 32.7
158
+ hop_length = 256
159
+
160
+ return bins_per_octave, n_octaves, harmonics, sr, fmin, hop_length, over_sample
161
+
162
+ ############################################################
163
+
164
+ def create_pump_object():
165
+
166
+ (bins_per_octave, n_octaves, harmonics,
167
+ sr, f_min, hop_length, over_sample) = get_hcqt_params()
168
+
169
+ p_phdif = pumpp.feature.HCQTPhaseDiff(name='dphase', sr=sr, hop_length=hop_length,
170
+ fmin=f_min, n_octaves=n_octaves, over_sample=over_sample, harmonics=harmonics, log=True)
171
+
172
+ pump = pumpp.Pump(p_phdif)
173
+
174
+ return pump
175
+
176
+ ############################################################
177
+
178
+ def compute_pump_features(pump, audio_fpath):
179
+
180
+ data = pump(audio_f=audio_fpath)
181
+
182
+ return data
183
+
184
+ ############################################################
185
+
186
+ def get_mpe_prediction(model, audio_file=None):
187
+ """Generate output from a model given an input numpy file.
188
+ Part of this function is part of deepsalience
189
+ """
190
+
191
+ split_value = 2500
192
+
193
+ if audio_file is not None:
194
+
195
+ pump = create_pump_object()
196
+ features = compute_pump_features(pump, audio_file)
197
+ input_hcqt = features['dphase/mag'][0]
198
+ input_dphase = features['dphase/dphase'][0]
199
+
200
+ else:
201
+ raise ValueError("One audio_file must be specified")
202
+
203
+ input_hcqt = input_hcqt.transpose(1, 2, 0)[np.newaxis, :, :, :]
204
+ input_dphase = input_dphase.transpose(1, 2, 0)[np.newaxis, :, :, :]
205
+
206
+ n_t = input_hcqt.shape[3]
207
+ t_slices = list(np.arange(0, n_t, split_value))
208
+ output_list = []
209
+
210
+ for t in t_slices:
211
+ p = model.predict([np.transpose(input_hcqt[:, :, :, t:t+split_value], (0, 1, 3, 2)),
212
+ np.transpose(input_dphase[:, :, :, t:t+split_value], (0, 1, 3, 2))]
213
+ )[0, :, :]
214
+
215
+ output_list.append(p)
216
+
217
+ predicted_output = np.hstack(output_list).astype(np.float32)
218
+ return predicted_output
219
+
220
+ ############################################################
221
+
222
+ def get_va_prediction(model, f0_matrix):
223
+ splits = f0_matrix.shape[1]//256
224
+ splits_diff = 256 - (f0_matrix.shape[1] - splits * 256)
225
+ fill = np.zeros((360, splits_diff))
226
+ mix_filled = np.concatenate((np.copy(f0_matrix), fill), axis=1)
227
+ mix_filled = np.reshape(mix_filled, (360, -1, 256, 1)).transpose((1, 0, 2, 3))
228
+ batches = math.ceil(mix_filled.shape[0]/24)
229
+
230
+ s_pred_result = np.zeros((0, 360, 256))
231
+ a_pred_result = np.zeros((0, 360, 256))
232
+ t_pred_result = np.zeros((0, 360, 256))
233
+ b_pred_result = np.zeros((0, 360, 256))
234
+
235
+ for i in range(batches):
236
+ s_pred, a_pred, t_pred, b_pred = model.predict(mix_filled[i*24:(i+1)*24])
237
+ s_pred_result = np.append(s_pred_result, s_pred, axis=0)
238
+ a_pred_result = np.append(a_pred_result, a_pred, axis=0)
239
+ t_pred_result = np.append(t_pred_result, t_pred, axis=0)
240
+ b_pred_result = np.append(b_pred_result, b_pred, axis=0)
241
+
242
+ s_pred_result = prediction_postproc(s_pred_result)[:, :f0_matrix.shape[1]]
243
+ a_pred_result = prediction_postproc(a_pred_result)[:, :f0_matrix.shape[1]]
244
+ t_pred_result = prediction_postproc(t_pred_result)[:, :f0_matrix.shape[1]]
245
+ b_pred_result = prediction_postproc(b_pred_result)[:, :f0_matrix.shape[1]]
246
+
247
+ return s_pred_result, a_pred_result, t_pred_result, b_pred_result
248
+
249
+ ############################################################
250
+
251
+ def cq2m(audiofile, mpe=late_deep_cnn_model(), va=mask_voas_cnn_model()):
252
+ mpe_pred = get_mpe_prediction(mpe, audiofile)
253
+ s_pred, a_pred, t_pred, b_pred = get_va_prediction(va, mpe_pred)
254
+ midi = song_to_midi(s_pred, a_pred, t_pred, b_pred)
255
+ return midi
256
+
257
+ ############################################################
pyproject.toml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "choral-quartets-to-midi"
3
+ version = "0.1.0"
4
+ description = "An application that uses Multi-Pitch Estimation and Voice Assignment to transform .WAV files with Choral Quartets recordings into MIDI files, with a separate track for each voice. Based on Late/DeepCNN by Helena Cuesta and MaskVoasCNN by André Paiva."
5
+ authors = ["André Paiva (Xornotor) <[email protected]>"]
6
+ license = "cc"
7
+ readme = "README.md"
8
+ packages = [{include = "choral_quartets_to_midi"}]
9
+
10
+ [tool.poetry.dependencies]
11
+ python = "^3.11"
12
+ tensorflow = "2.13.0"
13
+ gradio = "3.37.0"
14
+ typing-extensions = "4.5.0"
15
+ mido = "1.2.10"
16
+ pumpp = "0.6.0"
17
+ numpy = "1.24.3"
18
+ scipy = "1.11.1"
19
+
20
+
21
+ [build-system]
22
+ requires = ["poetry-core"]
23
+ build-backend = "poetry.core.masonry.api"