Spaces:

ahsanMah
/

localizing-anomalies

Runtime error

App Files Files Community

localizing-anomalies / flowutils.py

ahsanMah

+ porting in msma files

b1602ac 8 months ago

raw

history blame

8.56 kB

	import pdb

	import normflows as nf
	import numpy as np
	import torch
	import torch.nn as nn
	from einops import rearrange, repeat


	def build_flows(
	latent_size, num_flows=4, num_blocks=2, hidden_units=128, context_size=64
	):
	# Define flows

	flows = []
	for i in range(num_flows):
	flows += [
	nf.flows.CoupledRationalQuadraticSpline(
	latent_size,
	num_blocks=num_blocks,
	num_hidden_channels=hidden_units,
	num_context_channels=context_size,
	)
	]
	flows += [nf.flows.LULinearPermute(latent_size)]

	# Set base distribution
	q0 = nf.distributions.DiagGaussian(latent_size, trainable=True)

	# Construct flow model
	model = nf.ConditionalNormalizingFlow(q0, flows)

	return model


	def get_emb(sin_inp):
	"""
	Gets a base embedding for one dimension with sin and cos intertwined
	"""
	emb = torch.stack((sin_inp.sin(), sin_inp.cos()), dim=-1)
	return torch.flatten(emb, -2, -1)


	class PositionalEncoding2D(nn.Module):
	def __init__(self, channels):
	"""
	:param channels: The last dimension of the tensor you want to apply pos emb to.
	"""
	super(PositionalEncoding2D, self).__init__()
	self.org_channels = channels
	channels = int(np.ceil(channels / 4) * 2)
	self.channels = channels
	inv_freq = 1.0 / (10000 ** (torch.arange(0, channels, 2).float() / channels))
	self.register_buffer("inv_freq", inv_freq)
	self.register_buffer("cached_penc", None, persistent=False)

	def forward(self, tensor):
	"""
	:param tensor: A 4d tensor of size (batch_size, x, y, ch)
	:return: Positional Encoding Matrix of size (batch_size, x, y, ch)
	"""
	if len(tensor.shape) != 4:
	raise RuntimeError("The input tensor has to be 4d!")

	if (
	self.cached_penc is not None
	and self.cached_penc.shape[:2] == tensor.shape[1:3]
	):
	return self.cached_penc

	self.cached_penc = None
	batch_size, orig_ch, x, y = tensor.shape
	pos_x = torch.arange(x, device=tensor.device, dtype=self.inv_freq.dtype)
	pos_y = torch.arange(y, device=tensor.device, dtype=self.inv_freq.dtype)
	sin_inp_x = torch.einsum("i,j->ij", pos_x, self.inv_freq)
	sin_inp_y = torch.einsum("i,j->ij", pos_y, self.inv_freq)
	emb_x = get_emb(sin_inp_x).unsqueeze(1)
	emb_y = get_emb(sin_inp_y)
	emb = torch.zeros(
	(x, y, self.channels * 2),
	device=tensor.device,
	dtype=tensor.dtype,
	)
	emb[:, :, : self.channels] = emb_x
	emb[:, :, self.channels : 2 * self.channels] = emb_y

	self.cached_penc = emb

	return self.cached_penc


	class SpatialNormer(nn.Module):
	def __init__(
	self,
	in_channels, # channels will be number of sigma scales in input
	kernel_size=3,
	stride=2,
	padding=1,
	):
	"""
	Note that the convolution will reduce the channel dimension
	So (b, num_sigmas, c, h, w) -> (b, num_sigmas, new_h , new_w)
	"""
	super().__init__()
	self.conv = nn.Conv3d(
	in_channels,
	in_channels,
	kernel_size,
	# This is the real trick that ensures each
	# sigma dimension is normed separately
	groups=in_channels,
	stride=(1, stride, stride),
	padding=(0, padding, padding),
	bias=False,
	)
	self.conv.weight.data.fill_(1) # all ones weights
	self.conv.weight.requires_grad = False # freeze weights

	@torch.no_grad()
	def forward(self, x):
	return self.conv(x.square()).pow_(0.5).squeeze(2)


	class PatchFlow(torch.nn.Module):
	def __init__(
	self,
	input_size,
	patch_size=3,
	context_embedding_size=128,
	num_blocks=2,
	hidden_units=128,
	):
	super().__init__()
	num_sigmas, c, h, w = input_size
	self.local_pooler = SpatialNormer(
	in_channels=num_sigmas, kernel_size=patch_size
	)
	self.flow = build_flows(
	latent_size=num_sigmas, context_size=context_embedding_size
	)
	self.position_encoding = PositionalEncoding2D(channels=context_embedding_size)

	# caching pos encs
	_, _, ctx_h, ctw_w = self.local_pooler(
	torch.empty((1, num_sigmas, c, h, w))
	).shape
	self.position_encoding(torch.empty(1, 1, ctx_h, ctw_w))
	assert self.position_encoding.cached_penc.shape[-1] == context_embedding_size

	def init_weights(self):
	# Initialize weights with Xavier
	linear_modules = list(
	filter(lambda m: isinstance(m, nn.Linear), self.flow.modules())
	)
	total = len(linear_modules)

	for idx, m in enumerate(linear_modules):
	# Last layer gets init w/ zeros
	if idx == total - 1:
	nn.init.zeros_(m.weight.data)
	else:
	nn.init.xavier_uniform_(m.weight.data)

	if m.bias is not None:
	nn.init.zeros_(m.bias.data)

	def forward(self, x, chunk_size=32):
	b, s, c, h, w = x.shape
	x_norm = self.local_pooler(x)
	_, _, new_h, new_w = x_norm.shape
	context = self.position_encoding(x_norm)

	# (Patches * batch) x channels
	local_ctx = rearrange(context, "h w c -> (h w) c")
	patches = rearrange(x_norm, "b c h w -> (h w) b c")

	nchunks = (patches.shape[0] + chunk_size - 1) // chunk_size
	patches = patches.chunk(nchunks, dim=0)
	ctx_chunks = local_ctx.chunk(nchunks, dim=0)
	patch_logpx = []

	# gc = repeat(global_ctx, "b c -> (n b) c", n=self.patch_batch_size)

	for p, ctx in zip(patches, ctx_chunks):

	# num patches in chunk (<= chunk_size)
	n = p.shape[0]
	ctx = repeat(ctx, "n c -> (n b) c", b=b)
	p = rearrange(p, "n b c -> (n b) c")

	# Compute log densities for each patch
	logpx = self.flow.log_prob(p, context=ctx)
	logpx = rearrange(logpx, "(n b) -> n b", n=n, b=b)
	patch_logpx.append(logpx)
	# del ctx, p

	# print(p[:4], ctx[:4], logpx)
	# Convert back to image
	logpx = torch.cat(patch_logpx, dim=0)
	logpx = rearrange(logpx, "(h w) b -> b 1 h w", b=b, h=new_h, w=new_w)

	return logpx.contiguous()

	@staticmethod
	def stochastic_step(
	scores, x_batch, flow_model, opt=None, train=False, n_patches=32, device="cpu"
	):
	if train:
	flow_model.train()
	opt.zero_grad(set_to_none=True)
	else:
	flow_model.eval()

	patches, context = PatchFlow.get_random_patches(
	scores, x_batch, flow_model, n_patches
	)

	patch_feature = patches.to(device)
	context_vector = context.to(device)
	patch_feature = rearrange(patch_feature, "n b c -> (n b) c")
	context_vector = rearrange(context_vector, "n b c -> (n b) c")

	# global_pooled_image = flow_model.global_pooler(x_batch)
	# global_context = flow_model.global_attention(global_pooled_image)
	# gctx = repeat(global_context, "b c -> (n b) c", n=n_patches)

	# # Concatenate global context to local context
	# context_vector = torch.cat([context_vector, gctx], dim=1)

	z, ldj = flow_model.flow.inverse_and_log_det(
	patch_feature,
	context=context_vector,
	)

	loss = -torch.mean(flow_model.flow.q0.log_prob(z) + ldj)
	loss *= n_patches

	if train:
	loss.backward()
	opt.step()

	return loss.item() / n_patches

	@staticmethod
	def get_random_patches(scores, x_batch, flow_model, n_patches):
	b = scores.shape[0]
	h = flow_model.local_pooler(scores)
	patches = rearrange(h, "b c h w -> (h w) b c")

	context = flow_model.position_encoding(h)
	context = rearrange(context, "h w c -> (h w) c")
	context = repeat(context, "n c -> n b c", b=b)

	# conserve gpu memory
	patches = patches.cpu()
	context = context.cpu()

	# Get random patches
	total_patches = patches.shape[0]
	shuffled_idx = torch.randperm(total_patches)
	rand_idx_batch = shuffled_idx[:n_patches]

	return patches[rand_idx_batch], context[rand_idx_batch]