# Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved. # Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) import torch import torch.nn as nn class AFF(nn.Module): def __init__(self, channels=64, r=4): super(AFF, self).__init__() inter_channels = int(channels // r) self.local_att = nn.Sequential( nn.Conv2d(channels * 2, inter_channels, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(inter_channels), nn.SiLU(inplace=True), nn.Conv2d(inter_channels, channels, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(channels), ) def forward(self, x, ds_y): xa = torch.cat((x, ds_y), dim=1) x_att = self.local_att(xa) x_att = 1.0 + torch.tanh(x_att) xo = torch.mul(x, x_att) + torch.mul(ds_y, 2.0 - x_att) return xo