|
|
|
|
|
|
|
|
|
|
|
"""STFT-based Loss modules.""" |
|
|
|
import torch |
|
import torch.nn.functional as F |
|
|
|
from distutils.version import LooseVersion |
|
|
|
is_pytorch_17plus = LooseVersion(torch.__version__) >= LooseVersion("1.7") |
|
|
|
|
|
def stft(x, fft_size, hop_size, win_length, window): |
|
"""Perform STFT and convert to magnitude spectrogram. |
|
Args: |
|
x (Tensor): Input signal tensor (B, T). |
|
fft_size (int): FFT size. |
|
hop_size (int): Hop size. |
|
win_length (int): Window length. |
|
window (str): Window function type. |
|
Returns: |
|
Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1). |
|
|
|
""" |
|
if is_pytorch_17plus: |
|
x_stft = torch.stft( |
|
x, fft_size, hop_size, win_length, window, return_complex=False |
|
) |
|
else: |
|
x_stft = torch.stft(x, fft_size, hop_size, win_length, window) |
|
real = x_stft[..., 0] |
|
imag = x_stft[..., 1] |
|
|
|
|
|
return torch.sqrt(torch.clamp(real**2 + imag**2, min=1e-7)).transpose(2, 1) |
|
|
|
|
|
class SpectralConvergenceLoss(torch.nn.Module): |
|
"""Spectral convergence loss module.""" |
|
|
|
def __init__(self): |
|
"""Initilize spectral convergence loss module.""" |
|
super(SpectralConvergenceLoss, self).__init__() |
|
|
|
def forward(self, x_mag, y_mag): |
|
"""Calculate forward propagation. |
|
|
|
Args: |
|
x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins). |
|
y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins). |
|
|
|
Returns: |
|
Tensor: Spectral convergence loss value. |
|
|
|
""" |
|
return torch.norm(y_mag - x_mag, p="fro") / torch.norm(y_mag, p="fro") |
|
|
|
|
|
class LogSTFTMagnitudeLoss(torch.nn.Module): |
|
"""Log STFT magnitude loss module.""" |
|
|
|
def __init__(self): |
|
"""Initilize los STFT magnitude loss module.""" |
|
super(LogSTFTMagnitudeLoss, self).__init__() |
|
|
|
def forward(self, x_mag, y_mag): |
|
"""Calculate forward propagation. |
|
|
|
Args: |
|
x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins). |
|
y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins). |
|
|
|
Returns: |
|
Tensor: Log STFT magnitude loss value. |
|
|
|
""" |
|
return F.l1_loss(torch.log(y_mag), torch.log(x_mag)) |
|
|
|
|
|
class STFTLoss(torch.nn.Module): |
|
"""STFT loss module.""" |
|
|
|
def __init__( |
|
self, fft_size=1024, shift_size=120, win_length=600, window="hann_window", |
|
band="full" |
|
): |
|
"""Initialize STFT loss module.""" |
|
super(STFTLoss, self).__init__() |
|
self.fft_size = fft_size |
|
self.shift_size = shift_size |
|
self.win_length = win_length |
|
self.band = band |
|
|
|
self.spectral_convergence_loss = SpectralConvergenceLoss() |
|
self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss() |
|
|
|
self.register_buffer("window", getattr(torch, window)(win_length)) |
|
|
|
def forward(self, x, y): |
|
"""Calculate forward propagation. |
|
|
|
Args: |
|
x (Tensor): Predicted signal (B, T). |
|
y (Tensor): Groundtruth signal (B, T). |
|
|
|
Returns: |
|
Tensor: Spectral convergence loss value. |
|
Tensor: Log STFT magnitude loss value. |
|
|
|
""" |
|
x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window) |
|
y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window) |
|
|
|
if self.band == "high": |
|
freq_mask_ind = x_mag.shape[1] // 2 |
|
sc_loss = self.spectral_convergence_loss(x_mag[:,freq_mask_ind:,:], y_mag[:,freq_mask_ind:,:]) |
|
mag_loss = self.log_stft_magnitude_loss(x_mag[:,freq_mask_ind:,:], y_mag[:,freq_mask_ind:,:]) |
|
elif self.band == "full": |
|
sc_loss = self.spectral_convergence_loss(x_mag, y_mag) |
|
mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag) |
|
else: |
|
raise NotImplementedError |
|
|
|
return sc_loss, mag_loss |
|
|
|
|
|
class MultiResolutionSTFTLoss(torch.nn.Module): |
|
"""Multi resolution STFT loss module.""" |
|
|
|
def __init__( |
|
self, fft_sizes=[1024, 2048, 512], hop_sizes=[120, 240, 50], win_lengths=[600, 1200, 240], |
|
window="hann_window", sc_lambda=0.1, mag_lambda=0.1, band="full" |
|
): |
|
"""Initialize Multi resolution STFT loss module. |
|
|
|
Args: |
|
fft_sizes (list): List of FFT sizes. |
|
hop_sizes (list): List of hop sizes. |
|
win_lengths (list): List of window lengths. |
|
window (str): Window function type. |
|
*_lambda (float): a balancing factor across different losses. |
|
band (str): high-band or full-band loss |
|
|
|
""" |
|
super(MultiResolutionSTFTLoss, self).__init__() |
|
self.sc_lambda = sc_lambda |
|
self.mag_lambda = mag_lambda |
|
|
|
assert len(fft_sizes) == len(hop_sizes) == len(win_lengths) |
|
self.stft_losses = torch.nn.ModuleList() |
|
for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths): |
|
self.stft_losses += [STFTLoss(fs, ss, wl, window, band)] |
|
|
|
def forward(self, x, y): |
|
"""Calculate forward propagation. |
|
|
|
Args: |
|
x (Tensor): Predicted signal (B, T) or (B, #subband, T). |
|
y (Tensor): Groundtruth signal (B, T) or (B, #subband, T). |
|
|
|
Returns: |
|
Tensor: Multi resolution spectral convergence loss value. |
|
Tensor: Multi resolution log STFT magnitude loss value. |
|
|
|
""" |
|
if len(x.shape) == 3: |
|
x = x.view(-1, x.size(2)) |
|
y = y.view(-1, y.size(2)) |
|
sc_loss = 0.0 |
|
mag_loss = 0.0 |
|
for f in self.stft_losses: |
|
sc_l, mag_l = f(x, y) |
|
sc_loss += sc_l |
|
mag_loss += mag_l |
|
|
|
sc_loss *= self.sc_lambda |
|
sc_loss /= len(self.stft_losses) |
|
mag_loss *= self.mag_lambda |
|
mag_loss /= len(self.stft_losses) |
|
|
|
return sc_loss, mag_loss |