backbone: class_path: vocos.models.VocosBackbone init_args: input_channels: 1024 dim: 512 intermediate_dim: 1536 num_layers: 8 head: class_path: vocos.heads.ISTFTHead init_args: dim: 512 n_fft: 3528 hop_length: 882 padding: same