import numpy as np import torch import librosa from sklearn.base import BaseEstimator, TransformerMixin from typing import Callable, Optional class ReductionTransformer(BaseEstimator, TransformerMixin): def __init__(self, windows_number: int = 300, statistique: Callable[[np.ndarray], np.ndarray] = np.mean): self.windows_number = windows_number self.statistique = statistique def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None): return self def fit_transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: self.fit(X, y) return self.transform(X, y) def transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: X_ = X.copy() *c_, size_ = X_.shape windows_size_ = size_//self.windows_number metrique_clip = X_[..., :self.windows_number*windows_size_] return np.apply_along_axis(self.statistique, axis=-1, arr=metrique_clip.reshape((*c_, self.windows_number, windows_size_))) def inverse_transform(self, X: np.ndarray) -> np.ndarray: raise NotImplementedError class MeanTransformer(BaseEstimator, TransformerMixin): def __init__(self, windows_number: int = 300): self.windows_number = windows_number self.windows_size = 0 def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None): return self def fit_transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: self.fit(X, y) return self.transform(X, y) def transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: X_ = X.copy() *c_, size_ = X_.shape windows_size_ = size_//self.windows_number self.windows_size = windows_size_ metrique_clip = X_[..., :self.windows_number*windows_size_] return np.mean(metrique_clip.reshape((*c_, self.windows_number, windows_size_)), axis=-1) def inverse_transform(self, X: np.ndarray) -> np.ndarray: original_size = self.windows_size*self.windows_number X_reconstruct = np.interp( x = np.arange(start=0, stop=original_size, step=1), xp = np.arange(start=0, stop=original_size, step=self.windows_size), fp = X ) return X_reconstruct class StdTransformer(BaseEstimator, TransformerMixin): def __init__(self, windows_number: int = 300): self.windows_number = windows_number def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None): return self def fit_transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: self.fit(X, y) return self.transform(X, y) def transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: X_ = X.copy() *c_, size_ = X_.shape windows_size_ = size_//self.windows_number metrique_clip = X_[..., :self.windows_number*windows_size_] return np.std(metrique_clip.reshape((*c_, self.windows_number, windows_size_)), axis=-1) def inverse_transform(self, X: np.ndarray) -> np.ndarray: raise NotImplementedError class MfccTransformer(BaseEstimator, TransformerMixin): def __init__(self, sr: int = 22050, N_MFCC: int = 12, hop_length: int = 1024, reshape_output: bool = True): self.sr = sr self.N_MFCC = N_MFCC self.hop_length = hop_length self.reshape_output = reshape_output def reshape(self, X: np.ndarray) -> np.ndarray: X_ = X.copy() c_, *_ = X_.shape return X_.reshape(c_, -1, self.N_MFCC) def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None): return self def fit_transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: self.fit(X, y) return self.transform(X, y) def transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: X_ = X.copy() c_, *_ = X_.shape mfcc = librosa.feature.mfcc(y=X_, sr=self.sr, hop_length=self.hop_length, n_mfcc=self.N_MFCC ) if self.reshape_output: mfcc = mfcc.reshape(c_, -1) return mfcc def inverse_transform(self, X: np.ndarray) -> np.ndarray: X_reconstruct = librosa.feature.inverse.mfcc_to_audio( mfcc = X, n_mels = self.N_MFCC, ) return X_reconstruct class MelTransformer(BaseEstimator, TransformerMixin): def __init__(self, sr: int = 22050, N_MEL: int = 12, hop_length: int = 1024, reshape_output: bool = True): self.sr = sr self.N_MEL = N_MEL self.hop_length = hop_length self.reshape_output = reshape_output def reshape(self, X: np.ndarray) -> np.ndarray: X_ = X.copy() c_, *_ = X_.shape return X_.reshape(c_, -1, self.N_MEL) def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None): return self def fit_transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: self.fit(X, y) return self.transform(X, y) def transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: X_ = X.copy() c_, *_ = X_.shape mel = librosa.feature.melspectrogram(y=X, sr=self.sr, hop_length=self.hop_length, n_mels=self.N_MEL ) if self.reshape_output: mel = mel.reshape(c_, -1) return mel def inverse_transform(self, X: np.ndarray) -> np.ndarray: X_reconstruct = librosa.feature.inverse.mel_to_audio( M = X, sr = self.sr, hop_length = self.hop_length ) return X_reconstruct class TorchTransform(BaseEstimator, TransformerMixin): def __init__(self): pass def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None): return self def fit_transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> torch.Tensor: self.fit(X, y) return self.transform(X, y) def transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> torch.Tensor: return torch.tensor(X).unsqueeze(dim=1) def inverse_transform(self, X: torch.Tensor) -> np.ndarray: return np.array(X.squeeze(dim=1)) class ShuffleTransformer(BaseEstimator, TransformerMixin): def __init__(self, p: float = 0.005): self.p = p def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None): return self def fit_transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: self.fit(X, y) return self.transform(X, y) def transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray: will_swap = np.random.choice(X.shape[0], int(self.p*X.shape[0])) will_swap_with = np.random.choice(X.shape[0], int(self.p*X.shape[0])) if hasattr(X, "copy"): X_ = X.copy() elif hasattr(X, "clone"): X_ = X.clone() else: X_ = X X_[will_swap, ...] = X_[will_swap_with, ...] return X_ def inverse_transform(self, X: np.ndarray) -> np.ndarray: raise NotImplementedError