Spaces:

ahsanMah
/

localizing-anomalies

Runtime error

App Files Files Community

ahsanMah commited on Jun 20, 2024

Commit

b1602ac

1 Parent(s): 06232ec

+ porting in msma files

Browse files

+ adding flow model utils

Files changed (4) hide show

app.py +1 -1
dataset.py +269 -0
flowutils.py +263 -0
scorer.py → msma.py +203 -53

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import torch
-from scorer import build_model, config_presets
 @cache

 import numpy as np
 import torch
+from msma import build_model, config_presets
 @cache

dataset.py ADDED Viewed

	@@ -0,0 +1,269 @@

+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# This work is licensed under a Creative Commons
+# Attribution-NonCommercial-ShareAlike 4.0 International License.
+# You should have received a copy of the license along with this
+# work. If not, see http://creativecommons.org/licenses/by-nc-sa/4.0/
+"""Streaming images and labels from datasets created with dataset_tool.py."""
+import json
+import os
+import zipfile
+import numpy as np
+import PIL.Image
+import torch
+import dnnlib
+try:
+    import pyspng
+except ImportError:
+    pyspng = None
+# ----------------------------------------------------------------------------
+# Abstract base class for datasets.
+class Dataset(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        name,  # Name of the dataset.
+        raw_shape,  # Shape of the raw image data (NCHW).
+        use_labels=True,  # Enable conditioning labels? False = label dimension is zero.
+        max_size=None,  # Artificially limit the size of the dataset. None = no limit. Applied before xflip.
+        xflip=False,  # Artificially double the size of the dataset via x-flips. Applied after max_size.
+        random_seed=0,  # Random seed to use when applying max_size.
+        cache=False,  # Cache images in CPU memory?
+    ):
+        self._name = name
+        self._raw_shape = list(raw_shape)
+        self._use_labels = use_labels
+        self._cache = cache
+        self._cached_images = dict()  # {raw_idx: np.ndarray, ...}
+        self._raw_labels = None
+        self._label_shape = None
+        # Apply max_size.
+        self._raw_idx = np.arange(self._raw_shape[0], dtype=np.int64)
+        if (max_size is not None) and (self._raw_idx.size > max_size):
+            np.random.RandomState(random_seed % (1 << 31)).shuffle(self._raw_idx)
+            self._raw_idx = np.sort(self._raw_idx[:max_size])
+        # Apply xflip.
+        self._xflip = np.zeros(self._raw_idx.size, dtype=np.uint8)
+        if xflip:
+            self._raw_idx = np.tile(self._raw_idx, 2)
+            self._xflip = np.concatenate([self._xflip, np.ones_like(self._xflip)])
+    def _get_raw_labels(self):
+        if self._raw_labels is None:
+            self._raw_labels = self._load_raw_labels() if self._use_labels else None
+            if self._raw_labels is None:
+                self._raw_labels = np.zeros([self._raw_shape[0], 0], dtype=np.float32)
+            assert isinstance(self._raw_labels, np.ndarray)
+            assert self._raw_labels.shape[0] == self._raw_shape[0]
+            assert self._raw_labels.dtype in [np.float32, np.int64]
+            if self._raw_labels.dtype == np.int64:
+                assert self._raw_labels.ndim == 1
+                assert np.all(self._raw_labels >= 0)
+        return self._raw_labels
+    def close(self):  # to be overridden by subclass
+        pass
+    def _load_raw_image(self, raw_idx):  # to be overridden by subclass
+        raise NotImplementedError
+    def _load_raw_labels(self):  # to be overridden by subclass
+        raise NotImplementedError
+    def __getstate__(self):
+        return dict(self.__dict__, _raw_labels=None)
+    def __del__(self):
+        try:
+            self.close()
+        except:
+            pass
+    def __len__(self):
+        return self._raw_idx.size
+    def __getitem__(self, idx):
+        raw_idx = self._raw_idx[idx]
+        image = self._cached_images.get(raw_idx, None)
+        if image is None:
+            image = self._load_raw_image(raw_idx)
+            if self._cache:
+                self._cached_images[raw_idx] = image
+        assert isinstance(image, np.ndarray)
+        assert list(image.shape) == self._raw_shape[1:]
+        if self._xflip[idx]:
+            assert image.ndim == 3  # CHW
+            image = image[:, :, ::-1]
+        return image.copy(), self.get_label(idx)
+    def get_label(self, idx):
+        label = self._get_raw_labels()[self._raw_idx[idx]]
+        if label.dtype == np.int64:
+            onehot = np.zeros(self.label_shape, dtype=np.float32)
+            onehot[label] = 1
+            label = onehot
+        return label.copy()
+    def get_details(self, idx):
+        d = dnnlib.EasyDict()
+        d.raw_idx = int(self._raw_idx[idx])
+        d.xflip = int(self._xflip[idx]) != 0
+        d.raw_label = self._get_raw_labels()[d.raw_idx].copy()
+        return d
+    @property
+    def name(self):
+        return self._name
+    @property
+    def image_shape(self):  # [CHW]
+        return list(self._raw_shape[1:])
+    @property
+    def num_channels(self):
+        assert len(self.image_shape) == 3  # CHW
+        return self.image_shape[0]
+    @property
+    def resolution(self):
+        assert len(self.image_shape) == 3  # CHW
+        assert self.image_shape[1] == self.image_shape[2]
+        return self.image_shape[1]
+    @property
+    def label_shape(self):
+        if self._label_shape is None:
+            raw_labels = self._get_raw_labels()
+            if raw_labels.dtype == np.int64:
+                self._label_shape = [int(np.max(raw_labels)) + 1]
+            else:
+                self._label_shape = raw_labels.shape[1:]
+        return list(self._label_shape)
+    @property
+    def label_dim(self):
+        assert len(self.label_shape) == 1
+        return self.label_shape[0]
+    @property
+    def has_labels(self):
+        return any(x != 0 for x in self.label_shape)
+    @property
+    def has_onehot_labels(self):
+        return self._get_raw_labels().dtype == np.int64
+# ----------------------------------------------------------------------------
+# Dataset subclass that loads images recursively from the specified directory
+# or ZIP file.
+class ImageFolderDataset(Dataset):
+    def __init__(
+        self,
+        path,  # Path to directory or zip.
+        resolution=None,  # Ensure specific resolution, None = anything goes.
+        **super_kwargs,  # Additional arguments for the Dataset base class.
+    ):
+        self._path = path
+        self._zipfile = None
+        if os.path.isdir(self._path):
+            self._type = "dir"
+            self._all_fnames = {
+                os.path.relpath(os.path.join(root, fname), start=self._path)
+                for root, _dirs, files in os.walk(self._path)
+                for fname in files
+            }
+        elif self._file_ext(self._path) == ".zip":
+            self._type = "zip"
+            self._all_fnames = set(self._get_zipfile().namelist())
+        else:
+            raise IOError("Path must point to a directory or zip")
+        PIL.Image.init()
+        supported_ext = PIL.Image.EXTENSION.keys() | {".npy"}
+        self._image_fnames = sorted(
+            fname
+            for fname in self._all_fnames
+            if self._file_ext(fname) in supported_ext
+        )
+        if len(self._image_fnames) == 0:
+            raise IOError("No image files found in the specified path")
+        name = os.path.splitext(os.path.basename(self._path))[0]
+        raw_shape = [len(self._image_fnames)] + list(self._load_raw_image(0).shape)
+        if resolution is not None and (
+            raw_shape[2] != resolution or raw_shape[3] != resolution
+        ):
+            raise IOError("Image files do not match the specified resolution")
+        super().__init__(name=name, raw_shape=raw_shape, **super_kwargs)
+    @staticmethod
+    def _file_ext(fname):
+        return os.path.splitext(fname)[1].lower()
+    def _get_zipfile(self):
+        assert self._type == "zip"
+        if self._zipfile is None:
+            self._zipfile = zipfile.ZipFile(self._path)
+        return self._zipfile
+    def _open_file(self, fname):
+        if self._type == "dir":
+            return open(os.path.join(self._path, fname), "rb")
+        if self._type == "zip":
+            return self._get_zipfile().open(fname, "r")
+        return None
+    def close(self):
+        try:
+            if self._zipfile is not None:
+                self._zipfile.close()
+        finally:
+            self._zipfile = None
+    def __getstate__(self):
+        return dict(super().__getstate__(), _zipfile=None)
+    def _load_raw_image(self, raw_idx):
+        fname = self._image_fnames[raw_idx]
+        ext = self._file_ext(fname)
+        with self._open_file(fname) as f:
+            if ext == ".npy":
+                image = np.load(f)
+                image = image.reshape(-1, *image.shape[-2:])
+            elif ext == ".png" and pyspng is not None:
+                image = pyspng.load(f.read())
+                image = image.reshape(*image.shape[:2], -1).transpose(2, 0, 1)
+            else:
+                image = np.array(PIL.Image.open(f))
+                image = image.reshape(*image.shape[:2], -1).transpose(2, 0, 1)
+        return image
+    def _load_raw_labels(self):
+        fname = "dataset.json"
+        if fname not in self._all_fnames:
+            return None
+        with self._open_file(fname) as f:
+            labels = json.load(f)["labels"]
+        if labels is None:
+            return None
+        labels = dict(labels)
+        labels = [labels[fname.replace("\\", "/")] for fname in self._image_fnames]
+        labels = np.array(labels)
+        labels = labels.astype({1: np.int64, 2: np.float32}[labels.ndim])
+        return labels
+# ----------------------------------------------------------------------------

flowutils.py ADDED Viewed

	@@ -0,0 +1,263 @@

+import pdb
+import normflows as nf
+import numpy as np
+import torch
+import torch.nn as nn
+from einops import rearrange, repeat
+def build_flows(
+    latent_size, num_flows=4, num_blocks=2, hidden_units=128, context_size=64
+):
+    # Define flows
+    flows = []
+    for i in range(num_flows):
+        flows += [
+            nf.flows.CoupledRationalQuadraticSpline(
+                latent_size,
+                num_blocks=num_blocks,
+                num_hidden_channels=hidden_units,
+                num_context_channels=context_size,
+            )
+        ]
+        flows += [nf.flows.LULinearPermute(latent_size)]
+    # Set base distribution
+    q0 = nf.distributions.DiagGaussian(latent_size, trainable=True)
+    # Construct flow model
+    model = nf.ConditionalNormalizingFlow(q0, flows)
+    return model
+def get_emb(sin_inp):
+    """
+    Gets a base embedding for one dimension with sin and cos intertwined
+    """
+    emb = torch.stack((sin_inp.sin(), sin_inp.cos()), dim=-1)
+    return torch.flatten(emb, -2, -1)
+class PositionalEncoding2D(nn.Module):
+    def __init__(self, channels):
+        """
+        :param channels: The last dimension of the tensor you want to apply pos emb to.
+        """
+        super(PositionalEncoding2D, self).__init__()
+        self.org_channels = channels
+        channels = int(np.ceil(channels / 4) * 2)
+        self.channels = channels
+        inv_freq = 1.0 / (10000 ** (torch.arange(0, channels, 2).float() / channels))
+        self.register_buffer("inv_freq", inv_freq)
+        self.register_buffer("cached_penc", None, persistent=False)
+    def forward(self, tensor):
+        """
+        :param tensor: A 4d tensor of size (batch_size, x, y, ch)
+        :return: Positional Encoding Matrix of size (batch_size, x, y, ch)
+        """
+        if len(tensor.shape) != 4:
+            raise RuntimeError("The input tensor has to be 4d!")
+        if (
+            self.cached_penc is not None
+            and self.cached_penc.shape[:2] == tensor.shape[1:3]
+        ):
+            return self.cached_penc
+        self.cached_penc = None
+        batch_size, orig_ch, x, y = tensor.shape
+        pos_x = torch.arange(x, device=tensor.device, dtype=self.inv_freq.dtype)
+        pos_y = torch.arange(y, device=tensor.device, dtype=self.inv_freq.dtype)
+        sin_inp_x = torch.einsum("i,j->ij", pos_x, self.inv_freq)
+        sin_inp_y = torch.einsum("i,j->ij", pos_y, self.inv_freq)
+        emb_x = get_emb(sin_inp_x).unsqueeze(1)
+        emb_y = get_emb(sin_inp_y)
+        emb = torch.zeros(
+            (x, y, self.channels * 2),
+            device=tensor.device,
+            dtype=tensor.dtype,
+        )
+        emb[:, :, : self.channels] = emb_x
+        emb[:, :, self.channels : 2 * self.channels] = emb_y
+        self.cached_penc = emb
+        return self.cached_penc
+class SpatialNormer(nn.Module):
+    def __init__(
+        self,
+        in_channels,  # channels will be number of sigma scales in input
+        kernel_size=3,
+        stride=2,
+        padding=1,
+    ):
+        """
+        Note that the convolution will reduce the channel dimension
+        So (b, num_sigmas, c, h, w) -> (b, num_sigmas, new_h , new_w)
+        """
+        super().__init__()
+        self.conv = nn.Conv3d(
+            in_channels,
+            in_channels,
+            kernel_size,
+            # This is the real trick that ensures each
+            # sigma dimension is normed separately
+            groups=in_channels,
+            stride=(1, stride, stride),
+            padding=(0, padding, padding),
+            bias=False,
+        )
+        self.conv.weight.data.fill_(1)  # all ones weights
+        self.conv.weight.requires_grad = False  # freeze weights
+    @torch.no_grad()
+    def forward(self, x):
+        return self.conv(x.square()).pow_(0.5).squeeze(2)
+class PatchFlow(torch.nn.Module):
+    def __init__(
+        self,
+        input_size,
+        patch_size=3,
+        context_embedding_size=128,
+        num_blocks=2,
+        hidden_units=128,
+    ):
+        super().__init__()
+        num_sigmas, c, h, w = input_size
+        self.local_pooler = SpatialNormer(
+            in_channels=num_sigmas, kernel_size=patch_size
+        )
+        self.flow = build_flows(
+            latent_size=num_sigmas, context_size=context_embedding_size
+        )
+        self.position_encoding = PositionalEncoding2D(channels=context_embedding_size)
+        # caching pos encs
+        _, _, ctx_h, ctw_w = self.local_pooler(
+            torch.empty((1, num_sigmas, c, h, w))
+        ).shape
+        self.position_encoding(torch.empty(1, 1, ctx_h, ctw_w))
+        assert self.position_encoding.cached_penc.shape[-1] == context_embedding_size
+    def init_weights(self):
+        # Initialize weights with Xavier
+        linear_modules = list(
+            filter(lambda m: isinstance(m, nn.Linear), self.flow.modules())
+        )
+        total = len(linear_modules)
+        for idx, m in enumerate(linear_modules):
+            # Last layer gets init w/ zeros
+            if idx == total - 1:
+                nn.init.zeros_(m.weight.data)
+            else:
+                nn.init.xavier_uniform_(m.weight.data)
+            if m.bias is not None:
+                nn.init.zeros_(m.bias.data)
+    def forward(self, x, chunk_size=32):
+        b, s, c, h, w = x.shape
+        x_norm = self.local_pooler(x)
+        _, _, new_h, new_w = x_norm.shape
+        context = self.position_encoding(x_norm)
+        # (Patches * batch) x channels
+        local_ctx = rearrange(context, "h w c -> (h w) c")
+        patches = rearrange(x_norm, "b c h w -> (h w) b c")
+        nchunks = (patches.shape[0] + chunk_size - 1) // chunk_size
+        patches = patches.chunk(nchunks, dim=0)
+        ctx_chunks = local_ctx.chunk(nchunks, dim=0)
+        patch_logpx = []
+        # gc = repeat(global_ctx, "b c -> (n b) c", n=self.patch_batch_size)
+        for p, ctx in zip(patches, ctx_chunks):
+            # num patches in chunk (<= chunk_size)
+            n = p.shape[0]
+            ctx = repeat(ctx, "n c -> (n b) c", b=b)
+            p = rearrange(p, "n b c -> (n b) c")
+            # Compute log densities for each patch
+            logpx = self.flow.log_prob(p, context=ctx)
+            logpx = rearrange(logpx, "(n b) -> n b", n=n, b=b)
+            patch_logpx.append(logpx)
+            # del ctx, p
+        # print(p[:4], ctx[:4], logpx)
+        # Convert back to image
+        logpx = torch.cat(patch_logpx, dim=0)
+        logpx = rearrange(logpx, "(h w) b -> b 1 h w", b=b, h=new_h, w=new_w)
+        return logpx.contiguous()
+    @staticmethod
+    def stochastic_step(
+        scores, x_batch, flow_model, opt=None, train=False, n_patches=32, device="cpu"
+    ):
+        if train:
+            flow_model.train()
+            opt.zero_grad(set_to_none=True)
+        else:
+            flow_model.eval()
+        patches, context = PatchFlow.get_random_patches(
+            scores, x_batch, flow_model, n_patches
+        )
+        patch_feature = patches.to(device)
+        context_vector = context.to(device)
+        patch_feature = rearrange(patch_feature, "n b c -> (n b) c")
+        context_vector = rearrange(context_vector, "n b c -> (n b) c")
+        # global_pooled_image = flow_model.global_pooler(x_batch)
+        # global_context = flow_model.global_attention(global_pooled_image)
+        # gctx = repeat(global_context, "b c -> (n b) c", n=n_patches)
+        # # Concatenate global context to local context
+        # context_vector = torch.cat([context_vector, gctx], dim=1)
+        z, ldj = flow_model.flow.inverse_and_log_det(
+            patch_feature,
+            context=context_vector,
+        )
+        loss = -torch.mean(flow_model.flow.q0.log_prob(z) + ldj)
+        loss *= n_patches
+        if train:
+            loss.backward()
+            opt.step()
+        return loss.item() / n_patches
+    @staticmethod
+    def get_random_patches(scores, x_batch, flow_model, n_patches):
+        b = scores.shape[0]
+        h = flow_model.local_pooler(scores)
+        patches = rearrange(h, "b c h w -> (h w) b c")
+        context = flow_model.position_encoding(h)
+        context = rearrange(context, "h w c -> (h w) c")
+        context = repeat(context, "n c -> n b c", b=b)
+        # conserve gpu memory
+        patches = patches.cpu()
+        context = context.cpu()
+        # Get random patches
+        total_patches = patches.shape[0]
+        shuffled_idx = torch.randperm(total_patches)
+        rand_idx_batch = shuffled_idx[:n_patches]
+        return patches[rand_idx_batch], context[rand_idx_batch]

scorer.py → msma.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import os
 import pickle
 from pickle import dump, load
 import numpy as np
@@ -9,9 +10,12 @@ from sklearn.mixture import GaussianMixture
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 from tqdm import tqdm
 import dnnlib
 model_root = "https://nvlabs-fi-cdn.nvidia.com/edm2/posthoc-reconstructions"
@@ -22,6 +26,17 @@ config_presets = {
 }
 class EDMScorer(torch.nn.Module):
     def __init__(
         self,
@@ -41,6 +56,7 @@ class EDMScorer(torch.nn.Module):
         self.sigma_max = sigma_max
         self.sigma_data = sigma_data
         self.net = net.eval()
         # Adjust noise levels based on how far we want to accumulate
         self.sigma_min = 1e-1
@@ -63,7 +79,7 @@ class EDMScorer(torch.nn.Module):
         x,
         force_fp32=False,
     ):
-        x = x.to(torch.float32)
         batch_scores = []
         for sigma in self.sigma_steps:
@@ -76,6 +92,29 @@ class EDMScorer(torch.nn.Module):
         return batch_scores
 def build_model(preset="edm2-img64-s-fid", device="cpu"):
     netpath = config_presets[preset]
     with dnnlib.util.open_url(netpath, verbose=1) as f:
@@ -85,41 +124,45 @@ def build_model(preset="edm2-img64-s-fid", device="cpu"):
     return model
-def train_gmm(score_path, outdir):
-    def quantile_scorer(gmm, X, y=None):
-        return np.quantile(gmm.score_samples(X), 0.1)
-    X = torch.load(score_path)
-    gm = GaussianMixture(init_params="kmeans", covariance_type="full", max_iter=100000)
-    clf = Pipeline([("scaler", StandardScaler()), ("GMM", gm)])
-    clf.fit(X)
-    inlier_nll = -clf.score_samples(X)
-    param_grid = dict(
-        GMM__n_components=range(2, 11, 2),
-    )
-    grid = GridSearchCV(
-        estimator=clf,
-        param_grid=param_grid,
-        cv=10,
-        n_jobs=2,
-        verbose=1,
-        scoring=quantile_scorer,
     )
-    grid_result = grid.fit(X)
-    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
-    print("-----" * 15)
-    means = grid_result.cv_results_["mean_test_score"]
-    stds = grid_result.cv_results_["std_test_score"]
-    params = grid_result.cv_results_["params"]
-    for mean, stdev, param in zip(means, stds, params):
-        print("%f (%f) with: %r" % (mean, stdev, param))
-    clf = grid.best_estimator_
     os.makedirs(outdir, exist_ok=True)
     with open(f"{outdir}/refscores.npz", "wb") as f:
@@ -134,26 +177,14 @@ def compute_gmm_likelihood(x_score, gmmdir):
         clf = load(f)
         nll = -clf.score_samples(x_score)
-    with np.load(f"{gmmdir}/refscores.npz", "wb") as f:
         ref_nll = f["arr_0"]
         percentile = (ref_nll < nll).mean()
     return nll, percentile
-def test_runner(device="cpu"):
-    # f = "doge.jpg"
-    f = "goldfish.JPEG"
-    image = (PIL.Image.open(f)).resize((64, 64), PIL.Image.Resampling.LANCZOS)
-    image = np.array(image)
-    image = image.reshape(*image.shape[:2], -1).transpose(2, 0, 1)
-    x = torch.from_numpy(image).unsqueeze(0).to(device)
-    model = build_model(device=device)
-    scores = model(x)
-    return scores
-def runner(preset, dataset_path, device="cpu"):
     dsobj = ImageFolderDataset(path=dataset_path, resolution=64)
     refimg, reflabel = dsobj[0]
     print(refimg.shape, refimg.dtype, reflabel)
@@ -178,19 +209,138 @@ def runner(preset, dataset_path, device="cpu"):
     print(f"Computed score norms for {score_norms.shape[0]} samples")
 if __name__ == "__main__":
     device = "cuda" if torch.cuda.is_available() else "cpu"
     preset = "edm2-img64-s-fid"
-    # runner(
     #     preset=preset,
     #     dataset_path="/GROND_STOR/amahmood/datasets/img64/",
     #     device="cuda",
     # )
-    train_gmm(
-        f"out/msma/{preset}_imagenette_score_norms.pt", outdir=f"out/msma/{preset}"
-    )
-    s = test_runner(device=device)
-    s = s.square().sum(dim=(2, 3, 4)) ** 0.5
-    s = s.to("cpu").numpy()
-    nll, pct = compute_gmm_likelihood(s, gmmdir=f"out/msma/{preset}")
-    print(f"Anomaly score for image: {nll[0]:.3f} @ {pct*100:.2f} percentile")

 import os
 import pickle
+from functools import partial
 from pickle import dump, load
 import numpy as np
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
+from torch.utils.data import Subset
 from tqdm import tqdm
 import dnnlib
+from dataset import ImageFolderDataset
+from flowutils import PatchFlow
 model_root = "https://nvlabs-fi-cdn.nvidia.com/edm2/posthoc-reconstructions"
 }
+class StandardRGBEncoder:
+    def __init__(self):
+        super().__init__()
+    def encode(self, x):  # raw pixels => final pixels
+        return x.to(torch.float32) / 127.5 - 1
+    def decode(self, x):  # final latents => raw pixels
+        return (x.to(torch.float32) * 127.5 + 128).clip(0, 255).to(torch.uint8)
 class EDMScorer(torch.nn.Module):
     def __init__(
         self,
         self.sigma_max = sigma_max
         self.sigma_data = sigma_data
         self.net = net.eval()
+        self.encoder = StandardRGBEncoder()
         # Adjust noise levels based on how far we want to accumulate
         self.sigma_min = 1e-1
         x,
         force_fp32=False,
     ):
+        x = self.encoder.encode(x).to(torch.float32)
         batch_scores = []
         for sigma in self.sigma_steps:
         return batch_scores
+class ScoreFlow(torch.nn.Module):
+    def __init__(
+        self,
+        scorenet,
+        vectorize=False,
+        device="cpu",
+    ):
+        super().__init__()
+        h = w = scorenet.net.img_resolution
+        c = scorenet.net.img_channels
+        num_sigmas = len(scorenet.sigma_steps)
+        self.flow = PatchFlow((num_sigmas, c, h, w))
+        self.flow = self.flow.to(device)
+        self.scorenet = scorenet.to(device).requires_grad_(False)
+        self.flow.init_weights()
+    def forward(self, x, **score_kwargs):
+        x_scores = self.scorenet(x, **score_kwargs)
+        return self.flow(x_scores)
 def build_model(preset="edm2-img64-s-fid", device="cpu"):
     netpath = config_presets[preset]
     with dnnlib.util.open_url(netpath, verbose=1) as f:
     return model
+def quantile_scorer(gmm, X, y=None):
+    return np.quantile(gmm.score_samples(X), 0.1)
+def train_gmm(score_path, outdir, grid_search=False):
+    X = torch.load(score_path)
+    gm = GaussianMixture(
+        n_components=7, init_params="kmeans", covariance_type="full", max_iter=100000
     )
+    if grid_search:
+        clf = Pipeline([("scaler", StandardScaler()), ("GMM", gm)])
+        param_grid = dict(
+            GMM__n_components=range(2, 11, 1),
+        )
+        grid = GridSearchCV(
+            estimator=clf,
+            param_grid=param_grid,
+            cv=5,
+            n_jobs=2,
+            verbose=1,
+            scoring=quantile_scorer,
+        )
+        grid_result = grid.fit(X)
+        print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
+        print("-----" * 15)
+        means = grid_result.cv_results_["mean_test_score"]
+        stds = grid_result.cv_results_["std_test_score"]
+        params = grid_result.cv_results_["params"]
+        for mean, stdev, param in zip(means, stds, params):
+            print("%f (%f) with: %r" % (mean, stdev, param))
+        clf = grid.best_estimator_
+    clf.fit(X)
+    inlier_nll = -clf.score_samples(X)
     os.makedirs(outdir, exist_ok=True)
     with open(f"{outdir}/refscores.npz", "wb") as f:
         clf = load(f)
         nll = -clf.score_samples(x_score)
+    with np.load(f"{gmmdir}/refscores.npz", "rb") as f:
         ref_nll = f["arr_0"]
         percentile = (ref_nll < nll).mean()
     return nll, percentile
+def cache_score_norms(preset, dataset_path, device="cpu"):
     dsobj = ImageFolderDataset(path=dataset_path, resolution=64)
     refimg, reflabel = dsobj[0]
     print(refimg.shape, refimg.dtype, reflabel)
     print(f"Computed score norms for {score_norms.shape[0]} samples")
+def train_flow(dataset_path, preset, device="cuda"):
+    dsobj = ImageFolderDataset(path=dataset_path, resolution=64)
+    refimg, reflabel = dsobj[0]
+    print(f"Loaded {len(dsobj)} samples from {dataset_path}")
+    # Subset of training dataset
+    val_ratio = 0.1
+    train_len = int((1 - val_ratio) * len(dsobj))
+    val_len = len(dsobj) - train_len
+    print(
+        f"Generating train/test split with ratio={val_ratio} -> {train_len}/{val_len}..."
+    )
+    train_ds = Subset(dsobj, range(train_len))
+    val_ds = Subset(dsobj, range(train_len, train_len + val_len))
+    trainiter = torch.utils.data.DataLoader(
+        train_ds, batch_size=48, num_workers=4, prefetch_factor=2
+    )
+    testiter = torch.utils.data.DataLoader(
+        val_ds, batch_size=48, num_workers=4, prefetch_factor=2
+    )
+    model = ScoreFlow(build_model(preset=preset), device=device)
+    opt = torch.optim.AdamW(model.flow.parameters(), lr=3e-4, weight_decay=1e-5)
+    train_step = partial(
+        PatchFlow.stochastic_step,
+        flow_model=model.flow,
+        opt=opt,
+        train=True,
+        n_patches=64,
+        device=device,
+    )
+    eval_step = partial(
+        PatchFlow.stochastic_step,
+        flow_model=model.flow,
+        train=False,
+        n_patches=128,
+        device=device,
+    )
+    pbar = tqdm(trainiter, desc="Train Loss: ? - Val Loss: ?")
+    step = 0
+    for x, _ in tqdm(trainiter):
+        x = x.to(device)
+        scores = model.scorenet(x)
+        if step == 0:
+            with torch.inference_mode():
+                val_loss = eval_step(scores, x)
+        train_loss = train_step(scores, x)
+        if (step + 1) % 10 == 0:
+            with torch.inference_mode():
+                val_loss = 0.0
+                for i, (x, _) in enumerate(testiter):
+                    x = x.to(device)
+                    scores = model.scorenet(x)
+                    val_loss += eval_step(scores, x)
+                    break
+                val_loss /= i + 1
+        pbar.set_description(
+            f"Step: {step:d} - Train: {train_loss:.3f} - Val: {val_loss:.3f}"
+        )
+        step += 1
+    torch.save(model.flow.state_dict(), f"out/msma/{preset}/flow.pt")
+@torch.inference_mode
+def test_runner(device="cpu"):
+    # f = "doge.jpg"
+    f = "goldfish.JPEG"
+    image = (PIL.Image.open(f)).resize((64, 64), PIL.Image.Resampling.LANCZOS)
+    image = np.array(image)
+    image = image.reshape(*image.shape[:2], -1).transpose(2, 0, 1)
+    x = torch.from_numpy(image).unsqueeze(0).to(device)
+    model = build_model(device=device)
+    scores = model(x)
+    return scores
+def test_flow_runner(device="cpu", load_weights=None):
+    f = "doge.jpg"
+    # f = "goldfish.JPEG"
+    image = (PIL.Image.open(f)).resize((64, 64), PIL.Image.Resampling.LANCZOS)
+    image = np.array(image)
+    image = image.reshape(*image.shape[:2], -1).transpose(2, 0, 1)
+    x = torch.from_numpy(image).unsqueeze(0).to(device)
+    model = build_model(device=device)
+    score_flow = ScoreFlow(scorenet=model, device=device)
+    if load_weights is not None:
+        score_flow.flow.load_state_dict(torch.load(load_weights))
+    heatmap = score_flow(x)
+    print(heatmap.shape)
+    heatmap = score_flow(x).detach().cpu().numpy()
+    heatmap = (heatmap - heatmap.min()) / (heatmap.max() - heatmap.min()) * 255
+    im = PIL.Image.fromarray(heatmap[0, 0])
+    im.convert("RGB").save(
+        "heatmap.png",
+    )
+    return
 if __name__ == "__main__":
     device = "cuda" if torch.cuda.is_available() else "cpu"
     preset = "edm2-img64-s-fid"
+    imagenette_path = "/GROND_STOR/amahmood/datasets/img64/"
+    train_flow(imagenette_path, preset, device)
+    test_flow_runner("cuda", f"out/msma/{preset}/flow.pt")
+    # cache_score_norms(
     #     preset=preset,
     #     dataset_path="/GROND_STOR/amahmood/datasets/img64/",
     #     device="cuda",
     # )
+    # train_gmm(
+    #     f"out/msma/{preset}_imagenette_score_norms.pt", outdir=f"out/msma/{preset}"
+    # )
+    # s = test_runner(device=device)
+    # s = s.square().sum(dim=(2, 3, 4)) ** 0.5
+    # s = s.to("cpu").numpy()
+    # nll, pct = compute_gmm_likelihood(s, gmmdir=f"out/msma/{preset}/")
+    # print(f"Anomaly score for image: {nll[0]:.3f} @ {pct*100:.2f} percentile")