Spaces:
Running
on
L40S
Running
on
L40S
# Copyright (c) OpenMMLab. All rights reserved. | |
import os | |
import platform | |
import numpy as np | |
import pytest | |
import torch | |
import torch.distributed as dist | |
import torch.nn as nn | |
if platform.system() == 'Windows': | |
import regex as re | |
else: | |
import re | |
class TestSyncBN: | |
def dist_init(self): | |
rank = int(os.environ['SLURM_PROCID']) | |
world_size = int(os.environ['SLURM_NTASKS']) | |
local_rank = int(os.environ['SLURM_LOCALID']) | |
node_list = str(os.environ['SLURM_NODELIST']) | |
node_parts = re.findall('[0-9]+', node_list) | |
os.environ['MASTER_ADDR'] = (f'{node_parts[1]}.{node_parts[2]}' + | |
f'.{node_parts[3]}.{node_parts[4]}') | |
os.environ['MASTER_PORT'] = '12341' | |
os.environ['WORLD_SIZE'] = str(world_size) | |
os.environ['RANK'] = str(rank) | |
dist.init_process_group('nccl') | |
torch.cuda.set_device(local_rank) | |
def _test_syncbn_train(self, size=1, half=False): | |
if 'SLURM_NTASKS' not in os.environ or int( | |
os.environ['SLURM_NTASKS']) != 4: | |
print('must run with slurm has 4 processes!\n' | |
'srun -p test --gres=gpu:4 -n4') | |
return | |
else: | |
print('Running syncbn test') | |
from mmcv.ops import SyncBatchNorm | |
assert size in (1, 2, 4) | |
if not dist.is_initialized(): | |
self.dist_init() | |
rank = dist.get_rank() | |
torch.manual_seed(9) | |
torch.cuda.manual_seed(9) | |
self.x = torch.rand(16, 3, 2, 3).cuda() | |
self.y_bp = torch.rand(16, 3, 2, 3).cuda() | |
if half: | |
self.x = self.x.half() | |
self.y_bp = self.y_bp.half() | |
dist.broadcast(self.x, src=0) | |
dist.broadcast(self.y_bp, src=0) | |
torch.cuda.synchronize() | |
if size == 1: | |
groups = [None, None, None, None] | |
groups[0] = dist.new_group([0]) | |
groups[1] = dist.new_group([1]) | |
groups[2] = dist.new_group([2]) | |
groups[3] = dist.new_group([3]) | |
group = groups[rank] | |
elif size == 2: | |
groups = [None, None, None, None] | |
groups[0] = groups[1] = dist.new_group([0, 1]) | |
groups[2] = groups[3] = dist.new_group([2, 3]) | |
group = groups[rank] | |
elif size == 4: | |
group = dist.group.WORLD | |
syncbn = SyncBatchNorm(3, group=group).cuda() | |
syncbn.weight.data[0] = 0.2 | |
syncbn.weight.data[1] = 0.5 | |
syncbn.weight.data[2] = 0.7 | |
syncbn.train() | |
bn = nn.BatchNorm2d(3).cuda() | |
bn.weight.data[0] = 0.2 | |
bn.weight.data[1] = 0.5 | |
bn.weight.data[2] = 0.7 | |
bn.train() | |
sx = self.x[rank * 4:rank * 4 + 4] | |
sx.requires_grad_() | |
sy = syncbn(sx) | |
sy.backward(self.y_bp[rank * 4:rank * 4 + 4]) | |
smean = syncbn.running_mean | |
svar = syncbn.running_var | |
sx_grad = sx.grad | |
sw_grad = syncbn.weight.grad | |
sb_grad = syncbn.bias.grad | |
if size == 1: | |
x = self.x[rank * 4:rank * 4 + 4] | |
y_bp = self.y_bp[rank * 4:rank * 4 + 4] | |
elif size == 2: | |
x = self.x[rank // 2 * 8:rank // 2 * 8 + 8] | |
y_bp = self.y_bp[rank // 2 * 8:rank // 2 * 8 + 8] | |
elif size == 4: | |
x = self.x | |
y_bp = self.y_bp | |
x.requires_grad_() | |
y = bn(x) | |
y.backward(y_bp) | |
if size == 2: | |
y = y[rank % 2 * 4:rank % 2 * 4 + 4] | |
elif size == 4: | |
y = y[rank * 4:rank * 4 + 4] | |
mean = bn.running_mean | |
var = bn.running_var | |
if size == 1: | |
x_grad = x.grad | |
w_grad = bn.weight.grad | |
b_grad = bn.bias.grad | |
elif size == 2: | |
x_grad = x.grad[rank % 2 * 4:rank % 2 * 4 + 4] | |
w_grad = bn.weight.grad / 2 | |
b_grad = bn.bias.grad / 2 | |
elif size == 4: | |
x_grad = x.grad[rank * 4:rank * 4 + 4] | |
w_grad = bn.weight.grad / 4 | |
b_grad = bn.bias.grad / 4 | |
assert np.allclose(mean.data.cpu().numpy(), | |
smean.data.cpu().numpy(), 1e-3) | |
assert np.allclose(var.data.cpu().numpy(), | |
svar.data.cpu().numpy(), 1e-3) | |
assert np.allclose(y.data.cpu().numpy(), sy.data.cpu().numpy(), 1e-3) | |
assert np.allclose(w_grad.data.cpu().numpy(), | |
sw_grad.data.cpu().numpy(), 1e-3) | |
assert np.allclose(b_grad.data.cpu().numpy(), | |
sb_grad.data.cpu().numpy(), 1e-3) | |
assert np.allclose(x_grad.data.cpu().numpy(), | |
sx_grad.data.cpu().numpy(), 1e-2) | |
def _test_syncbn_empty_train(self, size=1, half=False): | |
if 'SLURM_NTASKS' not in os.environ or int( | |
os.environ['SLURM_NTASKS']) != 4: | |
print('must run with slurm has 4 processes!\n' | |
'srun -p test --gres=gpu:4 -n4') | |
return | |
else: | |
print('Running syncbn test') | |
from mmcv.ops import SyncBatchNorm | |
assert size in (1, 2, 4) | |
if not dist.is_initialized(): | |
self.dist_init() | |
rank = dist.get_rank() | |
torch.manual_seed(9) | |
torch.cuda.manual_seed(9) | |
self.x = torch.rand(0, 3, 2, 3).cuda() | |
self.y_bp = torch.rand(0, 3, 2, 3).cuda() | |
if half: | |
self.x = self.x.half() | |
self.y_bp = self.y_bp.half() | |
dist.broadcast(self.x, src=0) | |
dist.broadcast(self.y_bp, src=0) | |
torch.cuda.synchronize() | |
if size == 1: | |
groups = [None, None, None, None] | |
groups[0] = dist.new_group([0]) | |
groups[1] = dist.new_group([1]) | |
groups[2] = dist.new_group([2]) | |
groups[3] = dist.new_group([3]) | |
group = groups[rank] | |
elif size == 2: | |
groups = [None, None, None, None] | |
groups[0] = groups[1] = dist.new_group([0, 1]) | |
groups[2] = groups[3] = dist.new_group([2, 3]) | |
group = groups[rank] | |
elif size == 4: | |
group = dist.group.WORLD | |
syncbn = SyncBatchNorm(3, group=group, stats_mode='N').cuda() | |
syncbn.weight.data[0] = 0.2 | |
syncbn.weight.data[1] = 0.5 | |
syncbn.weight.data[2] = 0.7 | |
syncbn.train() | |
bn = nn.BatchNorm2d(3).cuda() | |
bn.weight.data[0] = 0.2 | |
bn.weight.data[1] = 0.5 | |
bn.weight.data[2] = 0.7 | |
bn.train() | |
sx = self.x[rank * 4:rank * 4 + 4] | |
sx.requires_grad_() | |
sy = syncbn(sx) | |
sy.backward(self.y_bp[rank * 4:rank * 4 + 4]) | |
smean = syncbn.running_mean | |
svar = syncbn.running_var | |
sx_grad = sx.grad | |
sw_grad = syncbn.weight.grad | |
sb_grad = syncbn.bias.grad | |
if size == 1: | |
x = self.x[rank * 4:rank * 4 + 4] | |
y_bp = self.y_bp[rank * 4:rank * 4 + 4] | |
elif size == 2: | |
x = self.x[rank // 2 * 8:rank // 2 * 8 + 8] | |
y_bp = self.y_bp[rank // 2 * 8:rank // 2 * 8 + 8] | |
elif size == 4: | |
x = self.x | |
y_bp = self.y_bp | |
x.requires_grad_() | |
y = bn(x) | |
y.backward(y_bp) | |
if size == 2: | |
y = y[rank % 2 * 4:rank % 2 * 4 + 4] | |
elif size == 4: | |
y = y[rank * 4:rank * 4 + 4] | |
mean = bn.running_mean | |
var = bn.running_var | |
if size == 1: | |
x_grad = x.grad | |
w_grad = bn.weight.grad | |
b_grad = bn.bias.grad | |
elif size == 2: | |
x_grad = x.grad[rank % 2 * 4:rank % 2 * 4 + 4] | |
w_grad = bn.weight.grad / 2 | |
b_grad = bn.bias.grad / 2 | |
elif size == 4: | |
x_grad = x.grad[rank * 4:rank * 4 + 4] | |
w_grad = bn.weight.grad / 4 | |
b_grad = bn.bias.grad / 4 | |
assert np.allclose(mean.data.cpu().numpy(), | |
smean.data.cpu().numpy(), 1e-3) | |
assert np.allclose(var.data.cpu().numpy(), | |
svar.data.cpu().numpy(), 1e-3) | |
assert np.allclose(y.data.cpu().numpy(), sy.data.cpu().numpy(), 1e-3) | |
assert np.allclose(w_grad.data.cpu().numpy(), | |
sw_grad.data.cpu().numpy(), 1e-3) | |
assert np.allclose(b_grad.data.cpu().numpy(), | |
sb_grad.data.cpu().numpy(), 1e-3) | |
assert np.allclose(x_grad.data.cpu().numpy(), | |
sx_grad.data.cpu().numpy(), 1e-2) | |
# 'stats_mode' only allows 'default' and 'N' | |
with pytest.raises(AssertionError): | |
SyncBatchNorm(3, group=group, stats_mode='X') | |
def test_syncbn_1(self): | |
self._test_syncbn_train(size=1) | |
def test_syncbn_2(self): | |
self._test_syncbn_train(size=2) | |
def test_syncbn_4(self): | |
self._test_syncbn_train(size=4) | |
def test_syncbn_1_half(self): | |
self._test_syncbn_train(size=1, half=True) | |
def test_syncbn_2_half(self): | |
self._test_syncbn_train(size=2, half=True) | |
def test_syncbn_4_half(self): | |
self._test_syncbn_train(size=4, half=True) | |
def test_syncbn_empty_1(self): | |
self._test_syncbn_empty_train(size=1) | |
def test_syncbn_empty_2(self): | |
self._test_syncbn_empty_train(size=2) | |
def test_syncbn_empty_4(self): | |
self._test_syncbn_empty_train(size=4) | |
def test_syncbn_empty_1_half(self): | |
self._test_syncbn_empty_train(size=1, half=True) | |
def test_syncbn_empty_2_half(self): | |
self._test_syncbn_empty_train(size=2, half=True) | |
def test_syncbn_empty_4_half(self): | |
self._test_syncbn_empty_train(size=4, half=True) | |