deepcode-ai
/

diffai

Model card Files Files and versions Community

khulnasoft commited on Jun 14, 2024

Commit

746c674

verified ·

1 Parent(s): cd6bb5d

Upload 16 files

Browse files

Files changed (16) hide show

AllExperimentsSerial.sh +33 -0
LICENSE +21 -0
README.md +249 -0
__init__.py +8 -0
__main__.py +561 -0
ai.py +1064 -0
components.py +951 -0
convert.py +144 -0
goals.py +529 -0
helpers.py +489 -0
losses.py +60 -0
media/overview.png +0 -0
media/resnetTinyFewCombo.png +0 -0
models.py +120 -0
requirements.txt +6 -0
scheduling.py +120 -0

AllExperimentsSerial.sh ADDED Viewed

	@@ -0,0 +1,33 @@

+# Baseline
+python . -D CIFAR10 -n ResNetTiny  -d "LinMix(a=Point(), b=Box(w=Lin(0,0.031373,150,10)), bw=Lin(0,0.5,150,10))"   --batch-size 50 --width 0.031373  --lr 0.001 --normalize-layer True --clip-norm False --lr-multistep $1
+# InSamp
+python . -D CIFAR10 -n ResNetTiny  -d "LinMix(a=Point(), b=InSamp(Lin(0,1,150,10)), bw=Lin(0,0.5, 150,10))" --batch-size 50 --width 0.031373  --lr 0.001 --normalize-layer True --clip-norm False --lr-multistep $1
+# InSampLPA
+python . -D CIFAR10 -n ResNetTiny  -d "LinMix(a=Point(), b=InSamp(Lin(0,1,150,20), w=Lin(0,0.031373, 150, 20)), bw=Lin(0,0.5, 150, 20))" --batch-size 50 --width 0.031373  --lr 0.001 --normalize-layer True --clip-norm False --lr-multistep  $1
+# Adv_{1}InSampLPA
+python . -D CIFAR10 -n ResNetTiny  -d "LinMix(a=IFGSM(w=Lin(0,0.031373,20,20), k=1), b=InSamp(Lin(0,1,150,10), w=Lin(0,0.031373,150,10)), bw=Lin(0,0.5,150,10))" --batch-size 50 --width 0.031373  --lr 0.001 --normalize-layer True --clip-norm False --lr-multistep $1
+# Adv_{3}InSampLPA
+python . -D CIFAR10 -n ResNetTiny  -d "LinMix(a=IFGSM(w=Lin(0,0.031373,20,20), k=3), b=InSamp(Lin(0,1,150,10), w=Lin(0,0.031373,150,10)), bw=Lin(0,0.5,150,10))" --batch-size 50 --width 0.031373  --lr 0.001  --normalize-layer True --clip-norm False --lr-multistep  $1
+# Baseline
+python . -D CIFAR10 -n ResNetTiny_FewCombo -d "LinMix(a=Point(), b=Box(w=Lin(0,0.031373,150,10)), bw=Lin(0,0.5,150,10))"   --batch-size 50 --width 0.031373  --lr 0.001 --normalize-layer True --clip-norm False --lr-multistep $1
+# InSamp
+python . -D CIFAR10 -n ResNetTiny_FewCombo -d "LinMix(a=Point(), b=InSamp(Lin(0,1,150,10)), bw=Lin(0,0.5, 150,10))" --batch-size 50 --width 0.031373  --lr 0.001 --normalize-layer True --clip-norm False --lr-multistep $1
+# InSampLPA
+python . -D CIFAR10 -n ResNetTiny_FewCombo -d "LinMix(a=Point(), b=InSamp(Lin(0,1,150,20), w=Lin(0,0.031373, 150, 20)), bw=Lin(0,0.5, 150, 20))" --batch-size 50 --width 0.031373  --lr 0.001 --normalize-layer True --clip-norm False --lr-multistep  $1
+# Adv_{1}InSampLPA
+python . -D CIFAR10 -n ResNetTiny_FewCombo -d "LinMix(a=IFGSM(w=Lin(0,0.031373,20,20), k=1), b=InSamp(Lin(0,1,150,10), w=Lin(0,0.031373,150,10)), bw=Lin(0,0.5,150,10))" --batch-size 50 --width 0.031373  --lr 0.001 --normalize-layer True --clip-norm False --lr-multistep $1
+# Adv_{3}InSampLPA
+python . -D CIFAR10 -n ResNetTiny_FewCombo -d "LinMix(a=IFGSM(w=Lin(0,0.031373,20,20), k=3), b=InSamp(Lin(0,1,150,10), w=Lin(0,0.031373,150,10)), bw=Lin(0,0.5,150,10))" --batch-size 50 --width 0.031373  --lr 0.001 --normalize-layer True --clip-norm False --lr-multistep  $1
+# Adv_{1}InSampLPA
+python . -D CIFAR10 -n ResNetTiny_ManyFixed -d "LinMix(a=IFGSM(w=Lin(0,0.031373,20,20), k=1), b=InSamp(Lin(0,1,150,10), w=Lin(0,0.031373,150,10)), bw=Lin(0,0.5,150,10))" --batch-size 50 --width 0.031373  --lr 0.001 --normalize-layer True --clip-norm False --lr-multistep $1
+# InSamp_{18}
+python . -D CIFAR10 -n SkipNet18 -d "LinMix(a=Point(), b=InSamp(Lin(0,1,200,40)), bw=Lin(0,0.5,200,40))" -t "MI_FGSM(k=20,r=2)" --batch-size 100 --save-freq 2 --width 0.031373 --lr 0.1 --normalize-layer True --clip-norm False --lr-multistep --sgd --custom-schedule "[10,20,250,300,350]"  $1
+# Adv_{5}InSamp_{18}
+python . -D CIFAR10 -n SkipNet18 -d "LinMix(a=IFGSM(w=Lin(0,0.031373,20,20)), b=InSamp(Lin(0,1,200,40)), bw=Lin(0,0.5,200,40))" -t "MI_FGSM(k=20,r=2)" --batch-size 100 --width 0.031373 --lr 0.1 --normalize-layer True --clip-norm False --lr-multistep --sgd --custom-schedule "[10,20,250,300,350]"  $1
+# InSamp_{18}  Combo
+python . -D CIFAR10 -n SkipNet18_Combo -d "LinMix(b=InSamp(Lin(0,1,200,40)), bw=Lin(0,0.5, 200, 40))" --batch-size 100 --width 0.031373 --lr 0.1 --normalize-layer True --clip-norm False --sgd --lr-multistep --custom-schedule "[10,20,250,300,350]" $1

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2018 SRI Lab, ETH Zurich
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,249 @@

+DiffAI v3 <a href="https://www.sri.inf.ethz.ch/"><img width="100" alt="portfolio_view" align="right" src="http://safeai.ethz.ch/img/sri-logo.svg"></a>
+=============================================================================================================
+![High Level](https://raw.githubusercontent.com/eth-sri/diffai/v3/media/overview.png)
+DiffAI is a system for training neural networks to be provably robust and for proving that they are robust.
+Background
+----------
+By now, it is well known that otherwise working networks can be tricked by clever attacks. For example [Goodfellow et al.](https://arxiv.org/abs/1412.6572) demonstrated a network with high classification accuracy which classified one image of a panda correctly, and a seemingly identical attack picture
+incorrectly.  Many defenses against this type of attack have been produced, but very few produce networks for which *provably* verifying the safety of a prediction is feasible.
+Abstract Interpretation is a technique for verifying properties of programs by soundly overapproximating their behavior.  When applied to neural networks, an infinite set (a ball) of possible inputs is passed to an approximating "abstract" network
+to produce a superset of the possible outputs from the actual network.  Provided an appropreate representation for these sets, demonstrating that the network classifies everything in the ball correctly becomes a simple task. The method used to represent these sets is the abstract domain, and the specific approximations are the abstract transformers.
+In DiffAI, the entire abstract interpretation process is programmed using PyTorch so that it is differentiable and can be run on the GPU,
+and a loss function is crafted so that low values correspond to inputs which can be proved safe (robust).
+Whats New In v3?
+----------------
+* Abstract Networks:  one can now customize the handling of the domains on a per-layer basis.
+* Training DSL:  A DSL has been exposed to allow for custom training regimens with complex parameter scheduling.
+* Cross Loss:  The box goal now uses the cross entropy style loss by default as suggested by [Gowal et al. 2019](https://arxiv.org/abs/1810.12715)
+* Conversion to Onyx:  We can now export to the onyx format, and can export the abstract network itself to onyx (so that one can run abstract analysis or training using tensorflow for example).
+Requirements
+------------
+python 3.6.7, and virtualenv, torch 0.4.1.
+Recommended Setup
+-----------------
+```
+$ git clone https://github.com/eth-sri/DiffAI.git
+$ cd DiffAI
+$ virtualenv pytorch --python python3.6
+$ source pytorch/bin/activate
+(pytorch) $ pip install -r requirements.txt
+```
+Note: you need to activate your virtualenv every time you start a new shell.
+Getting Started
+---------------
+DiffAI can be run as a standalone program.  To see a list of arguments, type
+```
+(pytorch) $ python . --help
+```
+At the minimum, DiffAI expects at least one domain to train with and one domain to test with, and a network with which to test.  For example, to train with the Box domain, baseline training (Point) and test against the FGSM attack and the ZSwitch domain with a simple feed forward network on the MNIST dataset (default, if none provided), you would type:
+```
+(pytorch) $ python . -d "Point()" -d "Box()" -t "PGD()" -t "ZSwitch()" -n ffnn
+```
+Unless otherwise specified by "--out", the output is logged to the folder "out/".
+In the folder corresponding to the experiment that has been run, one can find the saved configuration options in
+"config.txt", and a pickled net which is saved every 10 epochs (provided that testing is set to happen every 10th epoch).
+To load a saved model, use "--test" as per the example:
+```
+(pytorch) $ alias test-diffai="python . -d Point --epochs 1 --dont-write --test-freq 1"
+(pytorch) $ test-diffai -t Box --update-test-net-name convBig --test PATHTOSAVED_CONVBIG.pynet --width 0.1 --test-size 500 --test-batch-size 500
+```
+Note that "--update-test-net-name" will create a new model based on convBig and try to use the weights in the pickled PATHTOSAVED_CONVBIG.pynet to initialize that models weights.  This is not always necessary, but is useful when the code for a model changes (in components) but does not effect the number or usage of weight, or when loading a model pickled by a cuda process into a cpu process.
+The default specification type is the L_infinity Ball specified explicitly by "--spec boxSpec",
+which uses an epsilon specified by "--width"
+The default specification type is the L_infinity Ball specified explicitly by "--spec boxSpec",
+which uses an epsilon specified by "--width"
+Abstract Networks
+-----------------
+![Example Abstract Net](https://raw.githubusercontent.com/eth-sri/diffai/master/media/resnetTinyFewCombo.png)
+A cruical point of DiffAI v3 is that how a network is trained and abstracted should be part of the network description itself.   In this release, we provide layers that allow one to alter how the abstraction works,
+in addition to providing a script for converting an abstract network to onyx so that the abstract analysis might be run in tensorflow.
+Below is a list of the abstract layers that we have included.
+* CorrMaxPool3D
+* CorrMaxPool2D
+* CorrFix
+* CorrMaxK
+* CorrRand
+* DecorrRand
+* DecorrMin
+* DeepLoss
+* ToZono
+* ToHZono
+* Concretize
+* CorrelateAll
+Training Domain DSL
+-------------------
+In DiffAI v3, a dsl has been provided to specify arbitrary training domains. In particular, it is now possible to train on combinations of attacks and abstract domains on specifications defined by attacks. Specifying training domains is possible in the command line using ```-d "DOMAIN_INITIALIZATION"```.  The possible combinations are the classes listed in domains.py. The same syntax is also supported for testing domains, to allow for testing robustness with different epsilon-sized attacks and specifications.
+Listed below are a few examples:
+* ```-t "IFGSM(k=4, w=0.1)" -t "ZNIPS(w=0.3)" ``` Will first test with the PGD attack with an epsilon=w=0.1 and, the number of iterations k=4 and step size set to w/k.  It will also test with the zonotope domain using the transformer specified in our [NIPS 2018 paper](https://www.sri.inf.ethz.ch/publications/singh2018effective) with an epsilon=w=0.3.
+* ```-t "PGD(r=3,k=16,restart=2, w=0.1)"``` tests on points found using PGD with a step size of r*w/k and two restarts, and an attack-generated specification.
+* ```-d Point()``` is standard non-defensive training.
+* ```-d "LinMix(a=IFGSM(), b=Box(), aw=1, bw=0.1)"``` trains on points produced by pgd with the default parameters listed in domains.py, and points produced using the box domain.  The loss is combined linearly using the weights aw and bw and scaled by 1/(aw + bw). The epsilon used for both is the ambient epsilon specified with "--width".
+* ```-d "DList((IFGSM(w=0.1),1), (Box(w=0.01),0.1), (Box(w=0.1),0.01))"``` is a generalization of the Mix domain allowing for training with arbitrarily many domains at once weighted by the given values (the resulting loss is scaled by the inverse of the sum of weights).
+* ```-d "AdvDom(a=IFGSM(), b=Box())"``` trains using the Box domain, but constructs specifications as L∞ balls containing the PGD attack image and the original image "o".
+* ```-d "BiAdv(a=IFGSM(), b=Box())"``` is similar, but creates specifications between the pgd attack image "a" and "o - (a - o)".
+One domain we have found particularly useful for training is ```Mix(a=PGD(r=3,k=16,restart=2, w=0.1), b=BiAdv(a=IFGSM(k=5, w=0.05)), bw=0.1)```.
+While the above domains are all deterministic (up to gpu error and shuffling orders), we have also implemented nondeterministic training domains:
+* ```-d "Coin(a=IFGSM(), b=Box(), aw=1, bw=0.1)"``` is like Mix, but chooses which domain to train a batch with by the probabilities determined by aw / (aw + bw) and bw / (aw + bw).
+* ```-d "DProb((IFGSM(w=0.1),1), (Box(w=0.01),0.1), (Box(w=0.1),0.01))"``` is to Coin what DList is to Mix.
+* ```-d AdvDom(a=IFGSM(), b=DList((PointB(),1), (PointA(), 1), (Box(), 0.2)))``` can be used to share attack images between multiple training types.  Here an attack image "m" is found using PGD, then both the original image "o" and the attack image "m" are passed to DList which trains using three different ways:  PointA trains with "o", PointB trains with "m", and Box trains on the box produced between them.  This can also be used with Mix.
+* ```-d Normal(w=0.3)``` trains using images sampled from a normal distribution around the provided image using standard deviation w.
+* ```-d NormalAdv(a=IFGSM(), w=0.3)``` trains using PGD (but this could be an abstract domain) where perturbations are constrained to a box determined by a normal distribution around the original image with standard deviation w.
+* ```-d InSamp(0.2, w=0.1)``` uses Inclusion sampling as defined in the ArXiv paper.
+There are more domains implemented than listed here, and of course more interesting combinations are possible.  Please look carefully at domains.py for default values and further options.
+Parameter Scheduling DSL
+------------------------
+In place of many constants, you can use the following scheduling devices.
+* ```Lin(s,e,t,i)``` Linearly interpolates between s and e over t epochs, using s for the first i epochs.
+* ```Until(t,a,b)``` Uses a for the first t epochs, then switches to using b (telling b the current epoch starting from 0 at epoch t).
+Suggested Training
+------------------
+```LinMix(a=IFGSM(k=2), b=InSamp(Lin(0,1,150,10)), bw = Lin(0,0.5,150,10))``` is a training goal that appears to work particularly well for CIFAR10 networks.
+Contents
+--------
+* components.py: A high level neural network library for composable layers and operations
+* goals.py: The DSL for specifying training losses and domains, and attacks which can be used as a drop in replacement for pytorch tensors in any model built with components from components.py
+* scheduling.py: The DSL for specifying parameter scheduling.
+* models.py: A repository of models to train with which are used in the paper.
+* convert.py: A utility for converting a model with a training or testing domain (goal) into an onyx network.  This is useful for exporting DiffAI abstractions to tensorflow.
+* \_\_main\_\_.py: The entry point to run the experiments.
+* helpers.py: Assorted helper functions.  Does some monkeypatching, so you might want to be careful importing our library into your project.
+* AllExperimentsSerial.sh: A script which runs the training experiments from the 2019 ArXiv paper from table 4 and 5 and figure 5.
+Notes
+-----
+Not all of the datasets listed in the help message are supported.  Supported datasets are:
+* CIFAR10
+* CIFAR100
+* MNIST
+* SVHN
+* FashionMNIST
+Unsupported datasets will not necessarily throw errors.
+Reproducing Results
+-------------------
+[Download Defended Networks](https://www.dropbox.com/sh/66obogmvih79e3k/AACe-tkKGvIK0Z--2tk2alZaa?dl=0)
+All training runs from the paper can be reproduced as by the following command, in the same order as Table 6 in the appendix.
+```
+./AllExperimentsSerial.sh "-t MI_FGSM(k=20,r=2) -t HBox --test-size 10000 --test-batch-size 200 --test-freq 400 --save-freq 1 --epochs 420 --out all_experiments --write-first True --test-first False"
+```
+The training schemes can be written as follows (the names differ slightly from the presentation in the paper):
+* Baseline: LinMix(a=Point(), b=Box(w=Lin(0,0.031373,150,10)), bw=Lin(0,0.5,150,10))
+* InSamp: LinMix(a=Point(), b=InSamp(Lin(0,1,150,10)), bw=Lin(0,0.5, 150,10))
+* InSampLPA: LinMix(a=Point(), b=InSamp(Lin(0,1,150,20), w=Lin(0,0.031373, 150, 20)), bw=Lin(0,0.5, 150, 20))
+* Adv_{1}ISLPA: LinMix(a=IFGSM(w=Lin(0,0.031373,20,20), k=1), b=InSamp(Lin(0,1,150,10), w=Lin(0,0.031373,150,10)), bw=Lin(0,0.5,150,10))
+* Adv_{3}ISLPA: LinMix(a=IFGSM(w=Lin(0,0.031373,20,20), k=3), b=InSamp(Lin(0,1,150,10), w=Lin(0,0.031373,150,10)), bw=Lin(0,0.5,150,10))
+* Baseline_{18}: LinMix(a=Point(), b=InSamp(Lin(0,1,200,40)), bw=Lin(0,0.5,200,40))
+* InSamp_{18}: LinMix(a=IFGSM(w=Lin(0,0.031373,20,20)), b=InSamp(Lin(0,1,200,40)), bw=Lin(0,0.5,200,40))
+* Adv_{5}IS_{18}: LinMix(b=InSamp(Lin(0,1,200,40)), bw=Lin(0,0.5, 200, 40))
+* BiAdv_L: LinMix(a=IFGSM(k=2), b=BiAdv(a=IFGSM(k=3, w=Lin(0,0.031373, 150, 30)), b=Box()), bw=Lin(0,0.6, 200, 30))
+To test a saved network as in the paper, use the following command:
+```
+python . -D CIFAR10 -n ResNetLarge_LargeCombo -d Point --width 0.031373 --normalize-layer True --clip-norm False -t 'MI_FGSM(k=20,r=2)' -t HBox --test-size 10000 --test-batch-size 200 --epochs 1 --test NAMEOFSAVEDNET.pynet
+```
+About
+-----
+* DiffAI is now on version 3.0.
+* This repository contains the code used for the experiments in the [2019 ArXiV Paper](https://arxiv.org/abs/1903.12519).
+* To reproduce the experiments from the 2018 ICML paper [Differentiable Abstract Interpretation for Provably Robust Neural Networks](https://files.sri.inf.ethz.ch/website/papers/icml18-diffai.pdf), one must download the source from download the [source code for Version 1.0](https://github.com/eth-sri/diffai/releases/tag/v1.0)
+* Further information and related projects can be found at [the SafeAI Project](http://safeai.ethz.ch/)
+* [High level slides](https://files.sri.inf.ethz.ch/website/slides/mirman2018differentiable.pdf)
+Citing This Framework
+---------------------
+```
+@inproceedings{
+  title={Differentiable Abstract Interpretation for Provably Robust Neural Networks},
+  author={Mirman, Matthew and Gehr, Timon and Vechev, Martin},
+  booktitle={International Conference on Machine Learning (ICML)},
+  year={2018},
+  url={https://www.icml.cc/Conferences/2018/Schedule?showEvent=2477},
+}
+```
+Contributors
+------------
+* [Matthew Mirman](https://www.mirman.com) - [email protected]
+* [Gagandeep Singh](https://www.sri.inf.ethz.ch/people/gagandeep) - [email protected]
+* [Timon Gehr](https://www.sri.inf.ethz.ch/tg.php) - [email protected]
+* Marc Fischer - [email protected]
+* [Martin Vechev](https://www.sri.inf.ethz.ch/vechev.php) - [email protected]
+License and Copyright
+---------------------
+* Copyright (c) 2018 [Secure, Reliable, and Intelligent Systems Lab (SRI), ETH Zurich](https://www.sri.inf.ethz.ch/)
+* Licensed under the [MIT License](https://opensource.org/licenses/MIT)

__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import sys
+import os
+SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
+print(SCRIPT_DIR)
+sys.path.append(SCRIPT_DIR)

__main__.py ADDED Viewed

	@@ -0,0 +1,561 @@

+import future
+import builtins
+import past
+import six
+import copy
+from timeit import default_timer as timer
+from datetime import datetime
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torchvision import datasets
+from torch.utils.data import Dataset
+import decimal
+import torch.onnx
+import inspect
+from inspect import getargspec
+import os
+import helpers as h
+from helpers import Timer
+import copy
+import random
+from components import *
+import models
+import goals
+import scheduling
+from goals import *
+from scheduling import *
+import math
+import warnings
+from torch.serialization import SourceChangeWarning
+POINT_DOMAINS = [m for m in h.getMethods(goals) if issubclass(m, goals.Point)]
+SYMETRIC_DOMAINS = [goals.Box] + POINT_DOMAINS
+datasets.Imagenet12 = None
+class Top(nn.Module):
+    def __init__(self, args, net, ty = Point):
+        super(Top, self).__init__()
+        self.net = net
+        self.ty = ty
+        self.w = args.width
+        self.global_num = 0
+        self.getSpec = getattr(self, args.spec)
+        self.sub_batch_size = args.sub_batch_size
+        self.curve_width = args.curve_width
+        self.regularize = args.regularize
+        self.speedCount = 0
+        self.speed = 0.0
+    def addSpeed(self, s):
+        self.speed = (s + self.speed * self.speedCount) / (self.speedCount + 1)
+        self.speedCount += 1
+    def forward(self, x):
+        return self.net(x)
+    def clip_norm(self):
+        self.net.clip_norm()
+    def boxSpec(self, x, target, **kargs):
+        return [(self.ty.box(x, w = self.w, model=self, target=target, untargeted=True, **kargs).to_dtype(), target)]
+    def curveSpec(self, x, target, **kargs):
+        if self.ty.__class__ in SYMETRIC_DOMAINS:
+            return self.boxSpec(x,target, **kargs)
+        batch_size = x.size()[0]
+        newTargs = [ None for i in range(batch_size) ]
+        newSpecs = [ None for i in range(batch_size) ]
+        bestSpecs = [ None for i in range(batch_size) ]
+        for i in range(batch_size):
+            newTarg = target[i]
+            newTargs[i] = newTarg
+            newSpec = x[i]
+            best_x = newSpec
+            best_dist = float("inf")
+            for j in range(batch_size):
+                potTarg = target[j]
+                potSpec = x[j]
+                if (not newTarg.data.equal(potTarg.data)) or i == j:
+                    continue
+                curr_dist = (newSpec - potSpec).norm(1).item()  # must experiment with the type of norm here
+                if curr_dist <= best_dist:
+                    best_x = potSpec
+            newSpecs[i] = newSpec
+            bestSpecs[i] = best_x
+        new_batch_size = self.sub_batch_size
+        batchedTargs = h.chunks(newTargs, new_batch_size)
+        batchedSpecs = h.chunks(newSpecs, new_batch_size)
+        batchedBest = h.chunks(bestSpecs, new_batch_size)
+        def batch(t,s,b):
+            t = h.lten(t)
+            s = torch.stack(s)
+            b = torch.stack(b)
+            if h.use_cuda:
+                t.cuda()
+                s.cuda()
+                b.cuda()
+            m = self.ty.line(s, b, w = self.curve_width, **kargs)
+            return (m , t)
+        return [batch(t,s,b) for t,s,b in zip(batchedTargs, batchedSpecs, batchedBest)]
+    def regLoss(self):
+        if self.regularize is None or self.regularize <= 0.0:
+            return 0
+        reg_loss = 0
+        r = self.net.regularize(2)
+        return self.regularize * r
+    def aiLoss(self, dom, target, **args):
+        r = self(dom)
+        return self.regLoss() +  r.loss(target = target, **args)
+    def printNet(self, f):
+        self.net.printNet(f)
+# Training settings
+parser = argparse.ArgumentParser(description='PyTorch DiffAI Example',  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('--batch-size', type=int, default=10, metavar='N', help='input batch size for training')
+parser.add_argument('--test-first', type=h.str2bool, nargs='?', const=True, default=True, help='test first')
+parser.add_argument('--test-freq', type=int, default=1, metavar='N', help='number of epochs to skip before testing')
+parser.add_argument('--test-batch-size', type=int, default=10, metavar='N', help='input batch size for testing')
+parser.add_argument('--sub-batch-size', type=int, default=3, metavar='N', help='input batch size for curve specs')
+parser.add_argument('--custom-schedule', type=str, default="", metavar='net', help='Learning rate scheduling for lr-multistep.  Defaults to [200,250,300] for CIFAR10 and [15,25] for everything else.')
+parser.add_argument('--test', type=str, default=None, metavar='net', help='Saved net to use, in addition to any other nets you specify with -n')
+parser.add_argument('--update-test-net', type=h.str2bool, nargs='?', const=True, default=False, help="should update test net")
+parser.add_argument('--sgd',type=h.str2bool, nargs='?', const=True, default=False, help="use sgd instead of adam")
+parser.add_argument('--onyx', type=h.str2bool, nargs='?', const=True, default=False, help="should output onyx")
+parser.add_argument('--save-dot-net', type=h.str2bool, nargs='?', const=True, default=False, help="should output in .net")
+parser.add_argument('--update-test-net-name', type=str, choices = h.getMethodNames(models), default=None, help="update test net name")
+parser.add_argument('--normalize-layer', type=h.str2bool, nargs='?', const=True, default=True, help="should include a training set specific normalization layer")
+parser.add_argument('--clip-norm', type=h.str2bool, nargs='?', const=True, default=False, help="should clip the normal and use normal decomposition for weights")
+parser.add_argument('--epochs', type=int, default=1000, metavar='N', help='number of epochs to train')
+parser.add_argument('--log-freq', type=int, default=10, metavar='N', help='The frequency with which log statistics are printed')
+parser.add_argument('--save-freq', type=int, default=1, metavar='N', help='The frequency with which nets and images are saved, in terms of number of test passes')
+parser.add_argument('--number-save-images', type=int, default=0, metavar='N', help='The number of images to save. Should be smaller than test-size.')
+parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate')
+parser.add_argument('--lr-multistep', type=h.str2bool, nargs='?', const=True, default=False, help='learning rate multistep scheduling')
+parser.add_argument('--threshold', type=float, default=-0.01, metavar='TH', help='threshold for lr schedule')
+parser.add_argument('--patience', type=int, default=0, metavar='PT', help='patience for lr schedule')
+parser.add_argument('--factor', type=float, default=0.5, metavar='R', help='reduction multiplier for lr schedule')
+parser.add_argument('--max-norm', type=float, default=10000, metavar='MN', help='the maximum norm allowed in weight distribution')
+parser.add_argument('--curve-width', type=float, default=None, metavar='CW', help='the width of the curve spec')
+parser.add_argument('--width', type=float, default=0.01, metavar='CW', help='the width of either the line or box')
+parser.add_argument('--spec', choices = [ x for x in dir(Top) if x[-4:] == "Spec" and len(getargspec(getattr(Top, x)).args) == 3]
+                    , default="boxSpec", help='picks which spec builder function to use for training')
+parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed')
+parser.add_argument("--use-schedule", type=h.str2bool, nargs='?',
+                    const=True, default=False,
+                    help="activate learning rate schedule")
+parser.add_argument('-d', '--domain', sub_choices = None, action = h.SubAct
+                    , default=[], help='picks which abstract goals to use for training', required=True)
+parser.add_argument('-t', '--test-domain', sub_choices = None, action = h.SubAct
+                    , default=[], help='picks which abstract goals to use for testing.  Examples include ' + str(goals), required=True)
+parser.add_argument('-n', '--net', choices = h.getMethodNames(models), action = 'append'
+                    , default=[], help='picks which net to use for training')  # one net for now
+parser.add_argument('-D', '--dataset', choices = [n for (n,k) in inspect.getmembers(datasets, inspect.isclass) if issubclass(k, Dataset)]
+                    , default="MNIST", help='picks which dataset to use.')
+parser.add_argument('-o', '--out', default="out", help='picks which net to use for training')
+parser.add_argument('--dont-write', type=h.str2bool, nargs='?', const=True, default=False, help='dont write anywhere if this flag is on')
+parser.add_argument('--write-first', type=h.str2bool, nargs='?', const=True, default=False, help='write the initial net.  Useful for comparing algorithms, a pain for testing.')
+parser.add_argument('--test-size', type=int, default=2000, help='number of examples to test with')
+parser.add_argument('-r', '--regularize', type=float, default=None, help='use regularization')
+args = parser.parse_args()
+largest_domain = max([len(h.catStrs(d)) for d in (args.domain)] )
+largest_test_domain = max([len(h.catStrs(d)) for d in (args.test_domain)] )
+args.log_interval = int(50000 / (args.batch_size * args.log_freq))
+h.max_c_for_norm = args.max_norm
+if h.use_cuda:
+    torch.cuda.manual_seed(1 + args.seed)
+else:
+    torch.manual_seed(args.seed)
+train_loader = h.loadDataset(args.dataset, args.batch_size, True, False)
+test_loader = h.loadDataset(args.dataset, args.test_batch_size, False, False)
+input_dims = train_loader.dataset[0][0].size()
+num_classes = int(max(getattr(train_loader.dataset, 'train_labels' if args.dataset != "SVHN" else 'labels'))) + 1
+print("input_dims: ", input_dims)
+print("Num classes: ", num_classes)
+vargs = vars(args)
+total_batches_seen = 0
+def train(epoch, models):
+    global total_batches_seen
+    for model in models:
+        model.train()
+    for batch_idx, (data, target) in enumerate(train_loader):
+        total_batches_seen += 1
+        time = float(total_batches_seen) / len(train_loader)
+        if h.use_cuda:
+            data, target = data.cuda(), target.cuda()
+        for model in models:
+            model.global_num += data.size()[0]
+            timer = Timer("train a sample from " + model.name + " with " + model.ty.name, data.size()[0], False)
+            lossy = 0
+            with timer:
+                for s in model.getSpec(data.to_dtype(),target, time = time):
+                    model.optimizer.zero_grad()
+                    loss = model.aiLoss(*s, time = time, **vargs).mean(dim=0)
+                    lossy += loss.detach().item()
+                    loss.backward()
+                    torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
+                    for p in model.parameters():
+                        if p is not None and torch.isnan(p).any():
+                            print("Such nan in vals")
+                        if p is not None and p.grad is not None and torch.isnan(p.grad).any():
+                            print("Such nan in postmagic")
+                            stdv = 1 / math.sqrt(h.product(p.data.shape))
+                            p.grad = torch.where(torch.isnan(p.grad), torch.normal(mean=h.zeros(p.grad.shape), std=stdv), p.grad)
+                    model.optimizer.step()
+                    for p in model.parameters():
+                        if p is not None and torch.isnan(p).any():
+                            print("Such nan in vals after grad")
+                            stdv = 1 / math.sqrt(h.product(p.data.shape))
+                            p.data = torch.where(torch.isnan(p.data), torch.normal(mean=h.zeros(p.data.shape), std=stdv), p.data)
+                    if args.clip_norm:
+                        model.clip_norm()
+                    for p in model.parameters():
+                        if p is not None and torch.isnan(p).any():
+                            raise Exception("Such nan in vals after clip")
+            model.addSpeed(timer.getUnitTime())
+            if batch_idx % args.log_interval == 0:
+                print(('Train Epoch {:12} {:'+ str(largest_domain) +'}: {:3} [{:7}/{} ({:.0f}%)] \tAvg sec/ex {:1.8f}\tLoss: {:.6f}').format(
+                    model.name,  model.ty.name,
+                    epoch,
+                    batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader),
+                    model.speed,
+                    lossy))
+num_tests = 0
+def test(models, epoch, f = None):
+    global num_tests
+    num_tests += 1
+    class MStat:
+        def __init__(self, model):
+            model.eval()
+            self.model = model
+            self.correct = 0
+            class Stat:
+                def __init__(self, d, dnm):
+                    self.domain = d
+                    self.name = dnm
+                    self.width = 0
+                    self.max_eps = None
+                    self.safe = 0
+                    self.proved = 0
+                    self.time = 0
+            self.domains = [ Stat(h.parseValues(d, goals), h.catStrs(d)) for d in args.test_domain ]
+    model_stats = [ MStat(m) for m in models ]
+    num_its = 0
+    saved_data_target = []
+    for data, target in test_loader:
+        if num_its >= args.test_size:
+            break
+        if num_tests == 1:
+            saved_data_target += list(zip(list(data), list(target)))
+        num_its += data.size()[0]
+        if h.use_cuda:
+            data, target = data.cuda().to_dtype(), target.cuda()
+        for m in model_stats:
+            with torch.no_grad():
+                pred = m.model(data).vanillaTensorPart().max(1, keepdim=True)[1] # get the index of the max log-probability
+                m.correct += pred.eq(target.data.view_as(pred)).sum()
+            for stat in m.domains:
+                timer = Timer(shouldPrint = False)
+                with timer:
+                    def calcData(data, target):
+                        box = stat.domain.box(data, w = m.model.w, model=m.model, untargeted = True, target=target).to_dtype()
+                        with torch.no_grad():
+                            bs = m.model(box)
+                            org = m.model(data).vanillaTensorPart().max(1,keepdim=True)[1]
+                            stat.width += bs.diameter().sum().item() # sum up batch loss
+                            stat.proved += bs.isSafe(org).sum().item()
+                            stat.safe += bs.isSafe(target).sum().item()
+                            # stat.max_eps += 0 # TODO: calculate max_eps
+                    if m.model.net.neuronCount() < 5000 or stat.domain in SYMETRIC_DOMAINS:
+                        calcData(data, target)
+                    else:
+                        for d,t in zip(data, target):
+                            calcData(d.unsqueeze(0),t.unsqueeze(0))
+                stat.time += timer.getUnitTime()
+    l = num_its # len(test_loader.dataset)
+    for m in model_stats:
+        if args.lr_multistep:
+            m.model.lrschedule.step()
+        pr_corr = float(m.correct) / float(l)
+        if args.use_schedule:
+            m.model.lrschedule.step(1 - pr_corr)
+        h.printBoth(('Test: {:12} trained with {:'+ str(largest_domain) +'} - Avg sec/ex {:1.12f}, Accuracy: {}/{} ({:3.1f}%)').format(
+            m.model.name, m.model.ty.name,
+            m.model.speed,
+            m.correct, l, 100. * pr_corr), f = f)
+        model_stat_rec = ""
+        for stat in m.domains:
+            pr_safe = stat.safe / l
+            pr_proved = stat.proved / l
+            pr_corr_given_proved = pr_safe / pr_proved if pr_proved > 0 else 0.0
+            h.printBoth(("\t{:" + str(largest_test_domain)+"} - Width: {:<36.16f} Pr[Proved]={:<1.3f}  Pr[Corr and Proved]={:<1.3f}  Pr[Corr|Proved]={:<1.3f} {}Time = {:<7.5f}" ).format(
+                stat.name,
+                stat.width / l,
+                pr_proved,
+                pr_safe, pr_corr_given_proved,
+                "AvgMaxEps: {:1.10f} ".format(stat.max_eps / l) if stat.max_eps is not None else "",
+                stat.time), f = f)
+            model_stat_rec += "{}_{:1.3f}_{:1.3f}_{:1.3f}__".format(stat.name, pr_proved, pr_safe, pr_corr_given_proved)
+        prepedname = m.model.ty.name.replace(" ", "_").replace(",", "").replace("(", "_").replace(")", "_").replace("=", "_")
+        net_file = os.path.join(out_dir, m.model.name +"__" +prepedname + "_checkpoint_"+str(epoch)+"_with_{:1.3f}".format(pr_corr))
+        h.printBoth("\tSaving netfile: {}\n".format(net_file + ".pynet"), f = f)
+        if (num_tests % args.save_freq == 1 or args.save_freq == 1) and not args.dont_write and (num_tests > 1 or args.write_first):
+            print("Actually Saving")
+            torch.save(m.model.net, net_file + ".pynet")
+            if args.save_dot_net:
+                with h.mopen(args.dont_write, net_file + ".net", "w") as f2:
+                    m.model.net.printNet(f2)
+                    f2.close()
+            if args.onyx:
+                nn = copy.deepcopy(m.model.net)
+                nn.remove_norm()
+                torch.onnx.export(nn, h.zeros([1] + list(input_dims)), net_file + ".onyx",
+                                  verbose=False, input_names=["actual_input"] + ["param"+str(i) for i in range(len(list(nn.parameters())))], output_names=["output"])
+    if num_tests == 1 and not args.dont_write:
+        img_dir = os.path.join(out_dir, "images")
+        if not os.path.exists(img_dir):
+            os.makedirs(img_dir)
+        for img_num,(img,target) in zip(range(args.number_save_images), saved_data_target[:args.number_save_images]):
+            sz = ""
+            for s in img.size():
+                sz += str(s) + "x"
+            sz = sz[:-1]
+            img_file = os.path.join(img_dir, args.dataset + "_" + sz + "_"+ str(img_num))
+            if img_num == 0:
+                print("Saving image to: ", img_file + ".img")
+            with open(img_file + ".img", "w") as imgfile:
+                flatimg = img.view(h.product(img.size()))
+                for t in flatimg.cpu():
+                    print(decimal.Decimal(float(t)).__format__("f"), file=imgfile)
+            with open(img_file + ".class" , "w") as imgfile:
+                print(int(target.item()), file=imgfile)
+def createModel(net, domain, domain_name):
+    net_weights, net_create = net
+    domain.name = domain_name
+    net = net_create()
+    m = {}
+    for (k,v) in net_weights.state_dict().items():
+        m[k] = v.to_dtype()
+    net.load_state_dict(m)
+    model = Top(args, net, domain)
+    if args.clip_norm:
+        model.clip_norm()
+    if h.use_cuda:
+        model.cuda()
+    if args.sgd:
+        model.optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
+    else:
+        model.optimizer = optim.Adam(model.parameters(), lr=args.lr)
+    if args.lr_multistep:
+        model.lrschedule = optim.lr_scheduler.MultiStepLR(
+            model.optimizer,
+            gamma = 0.1,
+            milestones = eval(args.custom_schedule) if args.custom_schedule != "" else ([200, 250, 300] if args.dataset == "CIFAR10" else [15, 25]))
+    else:
+        model.lrschedule = optim.lr_scheduler.ReduceLROnPlateau(
+            model.optimizer,
+            'min',
+            patience=args.patience,
+            threshold= args.threshold,
+            min_lr=0.000001,
+            factor=args.factor,
+            verbose=True)
+    net.name = net_create.__name__
+    model.name = net_create.__name__
+    return model
+out_dir = os.path.join(args.out, args.dataset, str(args.net)[1:-1].replace(", ","_").replace("'",""),
+                       args.spec, "width_"+str(args.width), h.file_timestamp() )
+print("Saving to:", out_dir)
+if not os.path.exists(out_dir) and not args.dont_write:
+    os.makedirs(out_dir)
+print("Starting Training with:")
+with h.mopen(args.dont_write, os.path.join(out_dir, "config.txt"), "w") as f:
+    for k in sorted(vars(args)):
+        h.printBoth("\t"+k+": "+str(getattr(args,k)), f = f)
+print("")
+def buildNet(n):
+    n = n(num_classes)
+    if args.normalize_layer:
+        if args.dataset in ["MNIST"]:
+            n = Seq(Normalize([0.1307], [0.3081] ), n)
+        elif args.dataset in ["CIFAR10", "CIFAR100"]:
+            n = Seq(Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), n)
+        elif args.dataset in ["SVHN"]:
+            n = Seq(Normalize([0.5,0.5,0.5], [0.2, 0.2, 0.2]), n)
+        elif args.dataset in ["Imagenet12"]:
+            n = Seq(Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]), n)
+    n = n.infer(input_dims)
+    if args.clip_norm:
+        n.clip_norm()
+    return n
+if not args.test is None:
+    test_name = None
+    def loadedNet():
+        if test_name is not None:
+            n = getattr(models,test_name)
+            n = buildNet(n)
+            if args.clip_norm:
+                n.clip_norm()
+            return n
+        else:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", SourceChangeWarning)
+                return torch.load(args.test)
+    net = loadedNet().double() if h.dtype == torch.float64 else loadedNet().float()
+    if args.update_test_net_name is not None:
+        test_name = args.update_test_net_name
+    elif args.update_test_net and '__name__' in dir(net):
+        test_name = net.__name__
+    if test_name is not None:
+        loadedNet.__name__ = test_name
+    nets = [ (net, loadedNet) ]
+elif args.net == []:
+    raise Exception("Need to specify at least one net with either -n or --test")
+else:
+    nets = []
+for n in args.net:
+    m = getattr(models,n)
+    net_create = (lambda m: lambda: buildNet(m))(m) # why doesn't python do scoping right?  This is a thunk.  It is bad.
+    net_create.__name__ = n
+    net = buildNet(m)
+    net.__name__ = n
+    nets += [ (net, net_create) ]
+    print("Name: ", net_create.__name__)
+    print("Number of Neurons (relus): ", net.neuronCount())
+    print("Number of Parameters: ", sum([h.product(s.size()) for s in net.parameters()]))
+    print("Depth (relu layers): ", net.depth())
+    print()
+    net.showNet()
+    print()
+if args.domain == []:
+    models = [ createModel(net, goals.Box(args.width), "Box") for net in nets]
+else:
+    models = h.flat([[createModel(net, h.parseValues(d, goals, scheduling), h.catStrs(d)) for net in nets] for d in args.domain])
+with h.mopen(args.dont_write, os.path.join(out_dir, "log.txt"), "w") as f:
+    startTime = timer()
+    for epoch in range(1, args.epochs + 1):
+        if f is not None:
+            f.flush()
+        if (epoch - 1) % args.test_freq == 0 and (epoch > 1 or args.test_first):
+            with Timer("test all models before epoch "+str(epoch), 1):
+                test(models, epoch, f)
+                if f is not None:
+                    f.flush()
+        h.printBoth("Elapsed-Time: {:.2f}s\n".format(timer() - startTime), f = f)
+        if args.epochs <= args.test_freq:
+            break
+        with Timer("train all models in epoch", 1, f = f):
+            train(epoch, models)

ai.py ADDED Viewed

	@@ -0,0 +1,1064 @@

+import future
+import builtins
+import past
+import six
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import torch.autograd
+from functools import reduce
+try:
+    from . import helpers as h
+except:
+    import helpers as h
+def catNonNullErrors(op, ref_errs=None): # the way of things is ugly
+    def doop(er1, er2):
+        erS, erL = (er1, er2)
+        sS, sL = (erS.size()[0], erL.size()[0])
+        if sS == sL: # TODO: here we know we used transformers on either side which didnt introduce new error terms (this is a hack for hybrid zonotopes and doesn't work with adaptive error term adding).
+            return op(erS,erL)
+        if ref_errs is not None:
+            sz = ref_errs.size()[0]
+        else:
+            sz = min(sS, sL)
+        p1 = op(erS[:sz], erL[:sz])
+        erSrem = erS[sz:]
+        erLrem = erS[sz:]
+        p2 = op(erSrem, h.zeros(erSrem.shape))
+        p3 = op(h.zeros(erLrem.shape), erLrem)
+        return torch.cat((p1,p2,p3), dim=0)
+    return doop
+def creluBoxy(dom):
+    if dom.errors is None:
+        if dom.beta is None:
+            return dom.new(F.relu(dom.head), None, None)
+        er = dom.beta
+        mx = F.relu(dom.head + er)
+        mn = F.relu(dom.head - er)
+        return dom.new((mn + mx) / 2, (mx - mn) / 2 , None)
+    aber = torch.abs(dom.errors)
+    sm = torch.sum(aber, 0)
+    if not dom.beta is None:
+        sm += dom.beta
+    mx = dom.head + sm
+    mn = dom.head - sm
+    should_box = mn.lt(0) * mx.gt(0)
+    gtz = dom.head.gt(0).to_dtype()
+    mx /= 2
+    newhead = h.ifThenElse(should_box, mx, gtz * dom.head)
+    newbeta = h.ifThenElse(should_box, mx, gtz * (dom.beta if not dom.beta is None else 0))
+    newerr = (1 - should_box.to_dtype()) * gtz * dom.errors
+    return dom.new(newhead, newbeta , newerr)
+def creluBoxySound(dom):
+    if dom.errors is None:
+        if dom.beta is None:
+            return dom.new(F.relu(dom.head), None, None)
+        er = dom.beta
+        mx = F.relu(dom.head + er)
+        mn = F.relu(dom.head - er)
+        return dom.new((mn + mx) / 2, (mx - mn) / 2 + 2e-6 , None)
+    aber = torch.abs(dom.errors)
+    sm = torch.sum(aber, 0)
+    if not dom.beta is None:
+        sm += dom.beta
+    mx = dom.head + sm
+    mn = dom.head - sm
+    should_box = mn.lt(0) * mx.gt(0)
+    gtz = dom.head.gt(0).to_dtype()
+    mx /= 2
+    newhead = h.ifThenElse(should_box, mx, gtz * dom.head)
+    newbeta = h.ifThenElse(should_box, mx + 2e-6, gtz * (dom.beta if not dom.beta is None else 0))
+    newerr = (1 - should_box.to_dtype()) * gtz * dom.errors
+    return dom.new(newhead, newbeta, newerr)
+def creluSwitch(dom):
+    if dom.errors is None:
+        if dom.beta is None:
+            return dom.new(F.relu(dom.head), None, None)
+        er = dom.beta
+        mx = F.relu(dom.head + er)
+        mn = F.relu(dom.head - er)
+        return dom.new((mn + mx) / 2, (mx - mn) / 2 , None)
+    aber = torch.abs(dom.errors)
+    sm = torch.sum(aber, 0)
+    if not dom.beta is None:
+        sm += dom.beta
+    mn = dom.head - sm
+    mx = sm
+    mx += dom.head
+    should_box = mn.lt(0) * mx.gt(0)
+    gtz = dom.head.gt(0)
+    mn.neg_()
+    should_boxer = mn.gt(mx)
+    mn /= 2
+    newhead = h.ifThenElse(should_box, h.ifThenElse(should_boxer, mx / 2, dom.head + mn ), gtz.to_dtype() * dom.head)
+    zbet =  dom.beta if not dom.beta is None else 0
+    newbeta = h.ifThenElse(should_box, h.ifThenElse(should_boxer, mx / 2, mn + zbet), gtz.to_dtype() * zbet)
+    newerr  = h.ifThenElseL(should_box, 1 - should_boxer, gtz).to_dtype() * dom.errors
+    return dom.new(newhead, newbeta , newerr)
+def creluSmooth(dom):
+    if dom.errors is None:
+        if dom.beta is None:
+            return dom.new(F.relu(dom.head), None, None)
+        er = dom.beta
+        mx = F.relu(dom.head + er)
+        mn = F.relu(dom.head - er)
+        return dom.new((mn + mx) / 2, (mx - mn) / 2 , None)
+    aber = torch.abs(dom.errors)
+    sm = torch.sum(aber, 0)
+    if not dom.beta is None:
+        sm += dom.beta
+    mn = dom.head - sm
+    mx = sm
+    mx += dom.head
+    nmn = F.relu(-1 * mn)
+    zbet =  (dom.beta if not dom.beta is None else 0)
+    newheadS = dom.head + nmn / 2
+    newbetaS = zbet + nmn / 2
+    newerrS = dom.errors
+    mmx = F.relu(mx)
+    newheadB = mmx / 2
+    newbetaB = newheadB
+    newerrB = 0
+    eps = 0.0001
+    t = nmn / (mmx + nmn + eps) # mn.lt(0).to_dtype() * F.sigmoid(nmn - nmx)
+    shouldnt_zero = mx.gt(0).to_dtype()
+    newhead = shouldnt_zero * ( (1 - t) * newheadS + t * newheadB)
+    newbeta = shouldnt_zero * ( (1 - t) * newbetaS + t * newbetaB)
+    newerr =  shouldnt_zero * ( (1 - t) * newerrS  + t * newerrB)
+    return dom.new(newhead, newbeta , newerr)
+def creluNIPS(dom):
+    if dom.errors is None:
+        if dom.beta is None:
+            return dom.new(F.relu(dom.head), None, None)
+        er = dom.beta
+        mx = F.relu(dom.head + er)
+        mn = F.relu(dom.head - er)
+        return dom.new((mn + mx) / 2, (mx - mn) / 2 , None)
+    sm = torch.sum(torch.abs(dom.errors), 0)
+    if not dom.beta is None:
+        sm += dom.beta
+    mn = dom.head - sm
+    mx =  dom.head + sm
+    mngz = mn >= 0.0
+    zs = h.zeros(dom.head.shape)
+    diff = mx - mn
+    lam = torch.where((mx > 0) & (diff > 0.0), mx / diff, zs)
+    mu = lam * mn * (-0.5)
+    betaz = zs if dom.beta is None else dom.beta
+    newhead = torch.where(mngz, dom.head , lam * dom.head + mu)
+    mngz += diff <= 0.0
+    newbeta = torch.where(mngz, betaz    , lam * betaz + mu ) # mu is always positive on this side
+    newerr = torch.where(mngz, dom.errors, lam * dom.errors )
+    return dom.new(newhead, newbeta, newerr)
+class MaxTypes:
+    @staticmethod
+    def ub(x):
+        return x.ub()
+    @staticmethod
+    def only_beta(x):
+        return x.beta if x.beta is not None else x.head * 0
+    @staticmethod
+    def head_beta(x):
+        return MaxTypes.only_beta(x) + x.head
+class HybridZonotope:
+    def isSafe(self, target):
+        od,_ = torch.min(h.preDomRes(self,target).lb(), 1)
+        return od.gt(0.0).long()
+    def isPoint(self):
+        return False
+    def labels(self):
+        target = torch.max(self.ub(), 1)[1]
+        l = list(h.preDomRes(self,target).lb()[0])
+        return [target.item()] + [ i for i,v in zip(range(len(l)), l) if v <= 0]
+    def relu(self):
+        return self.customRelu(self)
+    def __init__(self, head, beta, errors, customRelu = creluBoxy, **kargs):
+        self.head = head
+        self.errors = errors
+        self.beta = beta
+        self.customRelu = creluBoxy if customRelu is None else customRelu
+    def new(self, *args, customRelu = None, **kargs):
+        return self.__class__(*args, **kargs, customRelu = self.customRelu if customRelu is None else customRelu).checkSizes()
+    def zono_to_hybrid(self, *args, **kargs): # we are already a hybrid zono.
+        return self.new(self.head, self.beta, self.errors, **kargs)
+    def hybrid_to_zono(self, *args, correlate=True, customRelu = None, **kargs):
+        beta = self.beta
+        errors = self.errors
+        if correlate and beta is not None:
+            batches = beta.shape[0]
+            num_elem = h.product(beta.shape[1:])
+            ei = h.getEi(batches, num_elem)
+            if len(beta.shape) > 2:
+                ei = ei.contiguous().view(num_elem, *beta.shape)
+            err = ei * beta
+            errors = torch.cat((err, errors), dim=0) if errors is not None else err
+            beta = None
+        return Zonotope(self.head, beta, errors if errors is not None else (self.beta * 0).unsqueeze(0) , customRelu = self.customRelu if customRelu is None else None)
+    def abstractApplyLeaf(self, foo, *args, **kargs):
+        return getattr(self, foo)(*args, **kargs)
+    def decorrelate(self, cc_indx_batch_err): # keep these errors
+        if self.errors is None:
+            return self
+        batch_size = self.head.shape[0]
+        num_error_terms = self.errors.shape[0]
+        beta = h.zeros(self.head.shape).to_dtype() if self.beta is None  else self.beta
+        errors = h.zeros([0] + list(self.head.shape)).to_dtype() if self.errors is None else self.errors
+        inds_i = torch.arange(self.head.shape[0], device=h.device).unsqueeze(1).long()
+        errors = errors.to_dtype().permute(1,0, *list(range(len(self.errors.shape)))[2:])
+        sm = errors.clone()
+        sm[inds_i, cc_indx_batch_err] = 0
+        beta = beta.to_dtype() + sm.abs().sum(dim=1)
+        errors = errors[inds_i, cc_indx_batch_err]
+        errors = errors.permute(1,0, *list(range(len(self.errors.shape)))[2:]).contiguous()
+        return self.new(self.head, beta, errors)
+    def dummyDecorrelate(self, num_decorrelate):
+        if num_decorrelate == 0 or self.errors is None:
+            return self
+        elif num_decorrelate >= self.errors.shape[0]:
+            beta = self.beta
+            if self.errors is not None:
+                errs = self.errors.abs().sum(dim=0)
+                if beta is None:
+                    beta = errs
+                else:
+                    beta += errs
+            return self.new(self.head, beta, None)
+        return None
+    def stochasticDecorrelate(self, num_decorrelate, choices = None, num_to_keep=False):
+        dummy = self.dummyDecorrelate(num_decorrelate)
+        if dummy is not None:
+            return dummy
+        num_error_terms = self.errors.shape[0]
+        batch_size = self.head.shape[0]
+        ucc_mask = h.ones([batch_size, self.errors.shape[0]]).long()
+        cc_indx_batch_err = h.cudify(torch.multinomial(ucc_mask.to_dtype(), num_decorrelate if num_to_keep else num_error_terms - num_decorrelate, replacement=False)) if choices is None else choices
+        return self.decorrelate(cc_indx_batch_err)
+    def decorrelateMin(self, num_decorrelate, num_to_keep=False):
+        dummy = self.dummyDecorrelate(num_decorrelate)
+        if dummy is not None:
+            return dummy
+        num_error_terms = self.errors.shape[0]
+        batch_size = self.head.shape[0]
+        error_sum_b_e = self.errors.abs().view(self.errors.shape[0], batch_size, -1).sum(dim=2).permute(1,0)
+        cc_indx_batch_err = error_sum_b_e.topk(num_decorrelate if num_to_keep else num_error_terms - num_decorrelate)[1]
+        return self.decorrelate(cc_indx_batch_err)
+    def correlate(self, cc_indx_batch_beta): # given in terms of the flattened matrix.
+        num_correlate = h.product(cc_indx_batch_beta.shape[1:])
+        beta = h.zeros(self.head.shape).to_dtype() if self.beta is None  else self.beta
+        errors = h.zeros([0] + list(self.head.shape)).to_dtype() if self.errors is None else self.errors
+        batch_size = beta.shape[0]
+        new_errors = h.zeros([num_correlate] + list(self.head.shape)).to_dtype()
+        inds_i = torch.arange(batch_size, device=h.device).unsqueeze(1).long()
+        nc = torch.arange(num_correlate, device=h.device).unsqueeze(1).long()
+        new_errors = new_errors.permute(1,0, *list(range(len(new_errors.shape)))[2:]).contiguous().view(batch_size, num_correlate, -1)
+        new_errors[inds_i, nc.unsqueeze(0).expand([batch_size]+list(nc.shape)).squeeze(2), cc_indx_batch_beta] = beta.view(batch_size,-1)[inds_i, cc_indx_batch_beta]
+        new_errors = new_errors.permute(1,0, *list(range(len(new_errors.shape)))[2:]).contiguous().view(num_correlate, batch_size, *beta.shape[1:])
+        errors = torch.cat((errors, new_errors), dim=0)
+        beta.view(batch_size, -1)[inds_i, cc_indx_batch_beta] = 0
+        return self.new(self.head, beta, errors)
+    def stochasticCorrelate(self, num_correlate, choices = None):
+        if num_correlate == 0:
+            return self
+        domshape = self.head.shape
+        batch_size = domshape[0]
+        num_pixs = h.product(domshape[1:])
+        num_correlate = min(num_correlate, num_pixs)
+        ucc_mask = h.ones([batch_size, num_pixs ]).long()
+        cc_indx_batch_beta = h.cudify(torch.multinomial(ucc_mask.to_dtype(), num_correlate, replacement=False)) if choices is None else choices
+        return self.correlate(cc_indx_batch_beta)
+    def correlateMaxK(self, num_correlate):
+        if num_correlate == 0:
+            return self
+        domshape = self.head.shape
+        batch_size = domshape[0]
+        num_pixs = h.product(domshape[1:])
+        num_correlate = min(num_correlate, num_pixs)
+        concrete_max_image = self.ub().view(batch_size, -1)
+        cc_indx_batch_beta = concrete_max_image.topk(num_correlate)[1]
+        return self.correlate(cc_indx_batch_beta)
+    def correlateMaxPool(self, *args, max_type = MaxTypes.ub , max_pool = F.max_pool2d, **kargs):
+        domshape = self.head.shape
+        batch_size = domshape[0]
+        num_pixs = h.product(domshape[1:])
+        concrete_max_image = max_type(self)
+        cc_indx_batch_beta = max_pool(concrete_max_image, *args, return_indices=True, **kargs)[1].view(batch_size, -1)
+        return self.correlate(cc_indx_batch_beta)
+    def checkSizes(self):
+        if not self.errors is None:
+            if not self.errors.size()[1:] == self.head.size():
+                raise Exception("Such bad sizes on error:", self.errors.shape, " head:", self.head.shape)
+            if torch.isnan(self.errors).any():
+                raise Exception("Such nan in errors")
+        if not self.beta is None:
+            if not self.beta.size() == self.head.size():
+                raise Exception("Such bad sizes on beta")
+            if torch.isnan(self.beta).any():
+                raise Exception("Such nan in errors")
+            if self.beta.lt(0.0).any():
+                self.beta = self.beta.abs()
+        return self
+    def __mul__(self, flt):
+        return self.new(self.head * flt, None if self.beta is None else self.beta * abs(flt), None if self.errors is None else self.errors * flt)
+    def __truediv__(self, flt):
+        flt = 1. / flt
+        return self.new(self.head * flt, None if self.beta is None else self.beta * abs(flt), None if self.errors is None else self.errors * flt)
+    def __add__(self, other):
+        if isinstance(other, HybridZonotope):
+            return self.new(self.head + other.head, h.msum(self.beta, other.beta, lambda a,b: a + b), h.msum(self.errors, other.errors, catNonNullErrors(lambda a,b: a + b)))
+        else:
+            # other has to be a standard variable or tensor
+            return self.new(self.head + other, self.beta, self.errors)
+    def addPar(self, a, b):
+        return self.new(a.head + b.head, h.msum(a.beta, b.beta, lambda a,b: a + b), h.msum(a.errors, b.errors, catNonNullErrors(lambda a,b: a + b, self.errors)))
+    def __sub__(self, other):
+        if isinstance(other, HybridZonotope):
+            return self.new(self.head - other.head
+                            , h.msum(self.beta, other.beta, lambda a,b: a + b)
+                            , h.msum(self.errors, None if other.errors is None else -other.errors, catNonNullErrors(lambda a,b: a + b)))
+        else:
+            # other has to be a standard variable or tensor
+            return self.new(self.head - other, self.beta, self.errors)
+    def bmm(self, other):
+        hd = self.head.bmm(other)
+        bet = None if self.beta is None else self.beta.bmm(other.abs())
+        if self.errors is None:
+            er = None
+        else:
+            er = self.errors.matmul(other)
+        return self.new(hd, bet, er)
+    def getBeta(self):
+        return self.head * 0 if self.beta is None else self.beta
+    def getErrors(self):
+        return (self.head * 0).unsqueeze(0) if self.beta is None else self.errors
+    def merge(self, other, ref = None): # the vast majority of the time ref should be none here.  Not for parallel computation with powerset
+        s_beta = self.getBeta() # so that beta is never none
+        sbox_u = self.head + s_beta
+        sbox_l = self.head - s_beta
+        o_u = other.ub()
+        o_l = other.lb()
+        o_in_s = (o_u <= sbox_u) & (o_l >= sbox_l)
+        s_err_mx = self.errors.abs().sum(dim=0)
+        if not isinstance(other, HybridZonotope):
+            new_head = (self.head + other.center()) / 2
+            new_beta = torch.max(sbox_u + s_err_mx,o_u) - new_head
+            return self.new(torch.where(o_in_s, self.head, new_head), torch.where(o_in_s, self.beta,new_beta), o_in_s.float() * self.errors)
+        # TODO: could be more efficient if one of these doesn't have beta or errors but thats okay for now.
+        s_u = sbox_u + s_err_mx
+        s_l = sbox_l - s_err_mx
+        obox_u = o_u - other.head
+        obox_l = o_l + other.head
+        s_in_o = (s_u <= obox_u) & (s_l >= obox_l)
+        # TODO: could theoretically still do something better when one is contained partially in the other
+        new_head = (self.head + other.center()) / 2
+        new_beta = torch.max(sbox_u + self.getErrors().abs().sum(dim=0),o_u) - new_head
+        return self.new(torch.where(o_in_s, self.head, torch.where(s_in_o, other.head, new_head))
+                        , torch.where(o_in_s, s_beta,torch.where(s_in_o, other.getBeta(), new_beta))
+                        , h.msum(o_in_s.float() * self.errors, s_in_o.float() * other.errors, catNonNullErrors(lambda a,b: a + b, ref_errs = ref.errors if ref is not None else ref))) # these are both zero otherwise
+    def conv(self, conv, weight, bias = None, **kargs):
+        h = self.errors
+        inter = h if h is None else h.view(-1, *h.size()[2:])
+        hd = conv(self.head, weight, bias=bias, **kargs)
+        res = h if h is None else conv(inter, weight, bias=None, **kargs)
+        return self.new( hd
+                       , None if self.beta is None else conv(self.beta, weight.abs(), bias = None, **kargs)
+                       , h if h is None else res.view(h.size()[0], h.size()[1], *res.size()[1:]))
+    def conv1d(self, *args, **kargs):
+        return self.conv(lambda x, *args, **kargs: x.conv1d(*args,**kargs), *args, **kargs)
+    def conv2d(self, *args, **kargs):
+        return self.conv(lambda x, *args, **kargs: x.conv2d(*args,**kargs), *args, **kargs)
+    def conv3d(self, *args, **kargs):
+        return self.conv(lambda x, *args, **kargs: x.conv3d(*args,**kargs), *args, **kargs)
+    def conv_transpose1d(self, *args, **kargs):
+        return self.conv(lambda x, *args, **kargs: x.conv_transpose1d(*args,**kargs), *args, **kargs)
+    def conv_transpose2d(self, *args, **kargs):
+        return self.conv(lambda x, *args, **kargs: x.conv_transpose2d(*args,**kargs), *args, **kargs)
+    def conv_transpose3d(self, *args, **kargs):
+        return self.conv(lambda x, *args, **kargs: x.conv_transpose3d(*args,**kargs), *args, **kargs)
+    def matmul(self, other):
+        return self.new(self.head.matmul(other), None if self.beta is None else self.beta.matmul(other.abs()), None if self.errors is None else self.errors.matmul(other))
+    def unsqueeze(self, i):
+        return self.new(self.head.unsqueeze(i), None if self.beta is None else self.beta.unsqueeze(i), None if self.errors is None else self.errors.unsqueeze(i + 1))
+    def squeeze(self, dim):
+        return self.new(self.head.squeeze(dim),
+                        None if self.beta is None else self.beta.squeeze(dim),
+                        None if self.errors is None else self.errors.squeeze(dim + 1 if dim >= 0 else dim))
+    def double(self):
+        return self.new(self.head.double(), self.beta.double()  if self.beta is not None else None, self.errors.double() if self.errors is not None else None)
+    def float(self):
+        return self.new(self.head.float(), self.beta.float()  if self.beta is not None else None, self.errors.float() if self.errors is not None else None)
+    def to_dtype(self):
+        return self.new(self.head.to_dtype(), self.beta.to_dtype()  if self.beta is not None else None, self.errors.to_dtype() if self.errors is not None else None)
+    def sum(self, dim=1):
+        return self.new(torch.sum(self.head,dim=dim), None if self.beta is None else torch.sum(self.beta,dim=dim), None if self.errors is None else torch.sum(self.errors, dim= dim + 1 if dim >= 0 else dim))
+    def view(self,*newshape):
+        return self.new(self.head.view(*newshape),
+                        None if self.beta is None else self.beta.view(*newshape),
+                        None if self.errors is None else self.errors.view(self.errors.size()[0], *newshape))
+    def gather(self,dim, index):
+        return self.new(self.head.gather(dim, index),
+                        None if self.beta is None else self.beta.gather(dim, index),
+                        None if self.errors is None else self.errors.gather(dim + 1, index.expand([self.errors.size()[0]] + list(index.size()))))
+    def concretize(self):
+        if self.errors is None:
+            return self
+        return self.new(self.head, torch.sum(self.concreteErrors().abs(),0), None) # maybe make a box?
+    def cat(self,other, dim=0):
+        return self.new(self.head.cat(other.head, dim = dim),
+                        h.msum(other.beta, self.beta, lambda a,b: a.cat(b, dim = dim)),
+                        h.msum(self.errors, other.errors, catNonNullErrors(lambda a,b: a.cat(b, dim+1))))
+    def split(self, split_size, dim = 0):
+        heads = list(self.head.split(split_size, dim))
+        betas = list(self.beta.split(split_size, dim)) if not self.beta is None else None
+        errorss = list(self.errors.split(split_size, dim + 1)) if not self.errors is None else None
+        def makeFromI(i):
+            return self.new( heads[i],
+                             None if betas is None else betas[i],
+                             None if errorss is None else errorss[i])
+        return tuple(makeFromI(i) for i in range(len(heads)))
+    def concreteErrors(self):
+        if self.beta is None and self.errors is None:
+            raise Exception("shouldn't have both beta and errors be none")
+        if self.errors is None:
+            return self.beta.unsqueeze(0)
+        if self.beta is None:
+            return self.errors
+        return torch.cat([self.beta.unsqueeze(0),self.errors], dim=0)
+    def applyMonotone(self, foo, *args, **kargs):
+        if self.beta is None and self.errors is None:
+            return self.new(foo(self.head), None , None)
+        beta = self.concreteErrors().abs().sum(dim=0)
+        tp = foo(self.head + beta, *args, **kargs)
+        bt = foo(self.head - beta, *args, **kargs)
+        new_hybrid = self.new((tp + bt) / 2, (tp - bt) / 2 , None)
+        if self.errors is not None:
+            return new_hybrid.correlateMaxK(self.errors.shape[0])
+        return new_hybrid
+    def avg_pool2d(self, *args, **kargs):
+        nhead = F.avg_pool2d(self.head, *args, **kargs)
+        return self.new(nhead,
+                        None if self.beta is None else F.avg_pool2d(self.beta, *args, **kargs),
+                        None if self.errors is None else F.avg_pool2d(self.errors.view(-1, *self.head.shape[1:]), *args, **kargs).view(-1,*nhead.shape))
+    def adaptive_avg_pool2d(self, *args, **kargs):
+        nhead = F.adaptive_avg_pool2d(self.head, *args, **kargs)
+        return self.new(nhead,
+                        None if self.beta is None else F.adaptive_avg_pool2d(self.beta, *args, **kargs),
+                        None if self.errors is None else F.adaptive_avg_pool2d(self.errors.view(-1, *self.head.shape[1:]), *args, **kargs).view(-1,*nhead.shape))
+    def elu(self):
+        return self.applyMonotone(F.elu)
+    def selu(self):
+        return self.applyMonotone(F.selu)
+    def sigm(self):
+        return self.applyMonotone(F.sigmoid)
+    def softplus(self):
+        if self.errors is None:
+            if self.beta is None:
+                return self.new(F.softplus(self.head), None , None)
+            tp = F.softplus(self.head + self.beta)
+            bt = F.softplus(self.head - self.beta)
+            return self.new((tp + bt) / 2, (tp - bt) / 2 , None)
+        errors = self.concreteErrors()
+        o = h.ones(self.head.size())
+        def sp(hd):
+            return F.softplus(hd) # torch.log(o + torch.exp(hd))  # not very stable
+        def spp(hd):
+            ehd = torch.exp(hd)
+            return ehd.div(ehd + o)
+        def sppp(hd):
+            ehd = torch.exp(hd)
+            md = ehd + o
+            return ehd.div(md.mul(md))
+        fa = sp(self.head)
+        fpa = spp(self.head)
+        a = self.head
+        k = torch.sum(errors.abs(), 0)
+        def evalG(r):
+            return r.mul(r).mul(sppp(a + r))
+        m = torch.max(evalG(h.zeros(k.size())), torch.max(evalG(k), evalG(-k)))
+        m = h.ifThenElse( a.abs().lt(k), torch.max(m, torch.max(evalG(a), evalG(-a))), m)
+        m /= 2
+        return self.new(fa, m if self.beta is None else m + self.beta.mul(fpa), None if self.errors is None else self.errors.mul(fpa))
+    def center(self):
+        return self.head
+    def vanillaTensorPart(self):
+        return self.head
+    def lb(self):
+        return self.head - self.concreteErrors().abs().sum(dim=0)
+    def ub(self):
+        return self.head + self.concreteErrors().abs().sum(dim=0)
+    def size(self):
+        return self.head.size()
+    def diameter(self):
+        abal = torch.abs(self.concreteErrors()).transpose(0,1)
+        return abal.sum(1).sum(1) # perimeter
+    def loss(self, target, **args):
+        r = -h.preDomRes(self, target).lb()
+        return F.softplus(r.max(1)[0])
+    def deep_loss(self, act = F.relu, *args, **kargs):
+        batch_size = self.head.shape[0]
+        inds = torch.arange(batch_size, device=h.device).unsqueeze(1).long()
+        def dl(l,u):
+            ls, lsi = torch.sort(l, dim=1)
+            ls_u = u[inds, lsi]
+            def slidingMax(a): # using maxpool
+                k = a.shape[1]
+                ml = a.min(dim=1)[0].unsqueeze(1)
+                inp = torch.cat((h.zeros([batch_size, k]), a - ml), dim=1)
+                mpl = F.max_pool1d(inp.unsqueeze(1) ,  kernel_size = k, stride=1, padding = 0, return_indices=False).squeeze(1)
+                return mpl[:,:-1] + ml
+            return act(slidingMax(ls_u) - ls).sum(dim=1)
+        l = self.lb().view(batch_size, -1)
+        u = self.ub().view(batch_size, -1)
+        return ( dl(l,u) + dl(-u,-l) ) / (2 * l.shape[1]) # make it easier to regularize against
+class Zonotope(HybridZonotope):
+    def applySuper(self, ret):
+        batches = ret.head.size()[0]
+        num_elem = h.product(ret.head.size()[1:])
+        ei = h.getEi(batches, num_elem)
+        if len(ret.head.size()) > 2:
+            ei = ei.contiguous().view(num_elem, *ret.head.size())
+        ret.errors = torch.cat( (ret.errors, ei * ret.beta) ) if not ret.beta is None else ret.errors
+        ret.beta = None
+        return ret.checkSizes()
+    def zono_to_hybrid(self, *args, customRelu = None, **kargs): # we are already a hybrid zono.
+        return HybridZonotope(self.head, self.beta, self.errors, customRelu = self.customRelu if customRelu is None else customRelu)
+    def hybrid_to_zono(self, *args, **kargs):
+        return self.new(self.head, self.beta, self.errors, **kargs)
+    def applyMonotone(self, *args, **kargs):
+        return self.applySuper(super(Zonotope,self).applyMonotone(*args, **kargs))
+    def softplus(self):
+        return self.applySuper(super(Zonotope,self).softplus())
+    def relu(self):
+        return self.applySuper(super(Zonotope,self).relu())
+    def splitRelu(self, *args, **kargs):
+        return [self.applySuper(a) for a in super(Zonotope, self).splitRelu(*args, **kargs)]
+def mysign(x):
+    e = x.eq(0).to_dtype()
+    r = x.sign().to_dtype()
+    return r + e
+def mulIfEq(grad,out,target):
+    pred = out.max(1, keepdim=True)[1]
+    is_eq = pred.eq(target.view_as(pred)).to_dtype()
+    is_eq = is_eq.view([-1] + [1 for _ in grad.size()[1:]]).expand_as(grad)
+    return is_eq
+def stdLoss(out, target):
+    if torch.__version__[0] == "0":
+        return F.cross_entropy(out, target, reduce = False)
+    else:
+        return F.cross_entropy(out, target, reduction='none')
+class ListDomain(object):
+    def __init__(self, al, *args, **kargs):
+        self.al = list(al)
+    def new(self, *args, **kargs):
+        return self.__class__(*args, **kargs)
+    def isSafe(self,*args,**kargs):
+        raise "Domain Not Suitable For Testing"
+    def labels(self):
+        raise "Domain Not Suitable For Testing"
+    def isPoint(self):
+        return all(a.isPoint() for a in self.al)
+    def __mul__(self, flt):
+        return self.new(a.__mul__(flt) for a in self.al)
+    def __truediv__(self, flt):
+        return self.new(a.__truediv__(flt) for a in self.al)
+    def __add__(self, other):
+        if isinstance(other, ListDomain):
+            return self.new(a.__add__(o) for a,o in zip(self.al, other.al))
+        else:
+            return self.new(a.__add__(other) for a in self.al)
+    def merge(self, other, ref = None):
+        if ref is None:
+            return self.new(a.merge(o) for a,o in zip(self.al,other.al) )
+        return self.new(a.merge(o, ref = r) for a,o,r in zip(self.al,other.al, ref.al))
+    def addPar(self, a, b):
+        return self.new(s.addPar(av,bv) for s,av,bv in zip(self.al, a.al, b.al))
+    def __sub__(self, other):
+        if isinstance(other, ListDomain):
+            return self.new(a.__sub__(o) for a,o in zip(self.al, other.al))
+        else:
+            return self.new(a.__sub__(other) for a in self.al)
+    def abstractApplyLeaf(self, *args, **kargs):
+        return self.new(a.abstractApplyLeaf(*args, **kargs) for a in self.al)
+    def bmm(self, other):
+        return self.new(a.bmm(other) for a in self.al)
+    def matmul(self, other):
+        return self.new(a.matmul(other) for a in self.al)
+    def conv(self, *args, **kargs):
+        return self.new(a.conv(*args, **kargs) for a in self.al)
+    def conv1d(self, *args, **kargs):
+        return self.new(a.conv1d(*args, **kargs) for a in self.al)
+    def conv2d(self, *args, **kargs):
+        return self.new(a.conv2d(*args, **kargs) for a in self.al)
+    def conv3d(self, *args, **kargs):
+        return self.new(a.conv3d(*args, **kargs) for a in self.al)
+    def max_pool2d(self, *args, **kargs):
+        return self.new(a.max_pool2d(*args, **kargs) for a in self.al)
+    def avg_pool2d(self, *args, **kargs):
+        return self.new(a.avg_pool2d(*args, **kargs) for a in self.al)
+    def adaptive_avg_pool2d(self, *args, **kargs):
+        return self.new(a.adaptive_avg_pool2d(*args, **kargs) for a in self.al)
+    def unsqueeze(self, *args, **kargs):
+        return self.new(a.unsqueeze(*args, **kargs) for a in self.al)
+    def squeeze(self, *args, **kargs):
+        return self.new(a.squeeze(*args, **kargs) for a in self.al)
+    def view(self, *args, **kargs):
+        return self.new(a.view(*args, **kargs) for a in self.al)
+    def gather(self, *args, **kargs):
+        return self.new(a.gather(*args, **kargs) for a in self.al)
+    def sum(self, *args, **kargs):
+        return self.new(a.sum(*args,**kargs) for a in self.al)
+    def double(self):
+        return self.new(a.double() for a in self.al)
+    def float(self):
+        return self.new(a.float() for a in self.al)
+    def to_dtype(self):
+        return self.new(a.to_dtype() for a in self.al)
+    def vanillaTensorPart(self):
+        return self.al[0].vanillaTensorPart()
+    def center(self):
+        return self.new(a.center() for a in self.al)
+    def ub(self):
+        return self.new(a.ub() for a in self.al)
+    def lb(self):
+        return self.new(a.lb() for a in self.al)
+    def relu(self):
+        return self.new(a.relu() for a in self.al)
+    def splitRelu(self, *args, **kargs):
+        return self.new(a.splitRelu(*args, **kargs) for a in self.al)
+    def softplus(self):
+        return self.new(a.softplus() for a in self.al)
+    def elu(self):
+        return self.new(a.elu() for a in self.al)
+    def selu(self):
+        return self.new(a.selu() for a in self.al)
+    def sigm(self):
+        return self.new(a.sigm() for a in self.al)
+    def cat(self, other, *args, **kargs):
+        return self.new(a.cat(o, *args, **kargs) for a,o in zip(self.al, other.al))
+    def split(self, *args, **kargs):
+        return [self.new(*z) for z in zip(a.split(*args, **kargs) for a in self.al)]
+    def size(self):
+        return self.al[0].size()
+    def loss(self, *args, **kargs):
+        return sum(a.loss(*args, **kargs) for a in self.al)
+    def deep_loss(self, *args, **kargs):
+        return sum(a.deep_loss(*args, **kargs) for a in self.al)
+    def checkSizes(self):
+        for a in self.al:
+            a.checkSizes()
+        return self
+class TaggedDomain(object):
+    def __init__(self, a, tag = None):
+        self.tag = tag
+        self.a = a
+    def isSafe(self,*args,**kargs):
+        return self.a.isSafe(*args, **kargs)
+    def isPoint(self):
+        return self.a.isPoint()
+    def labels(self):
+        raise "Domain Not Suitable For Testing"
+    def __mul__(self, flt):
+        return TaggedDomain(self.a.__mul__(flt), self.tag)
+    def __truediv__(self, flt):
+        return TaggedDomain(self.a.__truediv__(flt), self.tag)
+    def __add__(self, other):
+        if isinstance(other, TaggedDomain):
+            return TaggedDomain(self.a.__add__(other.a), self.tag)
+        else:
+            return TaggedDomain(self.a.__add__(other), self.tag)
+    def addPar(self, a,b):
+        return TaggedDomain(self.a.addPar(a.a, b.a), self.tag)
+    def __sub__(self, other):
+        if isinstance(other, TaggedDomain):
+            return TaggedDomain(self.a.__sub__(other.a), self.tag)
+        else:
+            return TaggedDomain(self.a.__sub__(other), self.tag)
+    def bmm(self, other):
+        return TaggedDomain(self.a.bmm(other), self.tag)
+    def matmul(self, other):
+        return TaggedDomain(self.a.matmul(other), self.tag)
+    def conv(self, *args, **kargs):
+        return TaggedDomain(self.a.conv(*args, **kargs) , self.tag)
+    def conv1d(self, *args, **kargs):
+        return TaggedDomain(self.a.conv1d(*args, **kargs), self.tag)
+    def conv2d(self, *args, **kargs):
+        return TaggedDomain(self.a.conv2d(*args, **kargs), self.tag)
+    def conv3d(self, *args, **kargs):
+        return TaggedDomain(self.a.conv3d(*args, **kargs), self.tag)
+    def max_pool2d(self, *args, **kargs):
+        return TaggedDomain(self.a.max_pool2d(*args, **kargs), self.tag)
+    def avg_pool2d(self, *args, **kargs):
+        return TaggedDomain(self.a.avg_pool2d(*args, **kargs), self.tag)
+    def adaptive_avg_pool2d(self, *args, **kargs):
+        return TaggedDomain(self.a.adaptive_avg_pool2d(*args, **kargs), self.tag)
+    def unsqueeze(self, *args, **kargs):
+        return TaggedDomain(self.a.unsqueeze(*args, **kargs), self.tag)
+    def squeeze(self, *args, **kargs):
+        return TaggedDomain(self.a.squeeze(*args, **kargs), self.tag)
+    def abstractApplyLeaf(self, *args, **kargs):
+        return TaggedDomain(self.a.abstractApplyLeaf(*args, **kargs), self.tag)
+    def view(self, *args, **kargs):
+        return TaggedDomain(self.a.view(*args, **kargs), self.tag)
+    def gather(self, *args, **kargs):
+        return TaggedDomain(self.a.gather(*args, **kargs), self.tag)
+    def sum(self, *args, **kargs):
+        return TaggedDomain(self.a.sum(*args,**kargs), self.tag)
+    def double(self):
+        return TaggedDomain(self.a.double(), self.tag)
+    def float(self):
+        return TaggedDomain(self.a.float(), self.tag)
+    def to_dtype(self):
+        return TaggedDomain(self.a.to_dtype(), self.tag)
+    def vanillaTensorPart(self):
+        return self.a.vanillaTensorPart()
+    def center(self):
+        return TaggedDomain(self.a.center(), self.tag)
+    def ub(self):
+        return TaggedDomain(self.a.ub(), self.tag)
+    def lb(self):
+        return TaggedDomain(self.a.lb(), self.tag)
+    def relu(self):
+        return TaggedDomain(self.a.relu(), self.tag)
+    def splitRelu(self, *args, **kargs):
+        return TaggedDomain(self.a.splitRelu(*args, **kargs), self.tag)
+    def diameter(self):
+        return self.a.diameter()
+    def softplus(self):
+        return TaggedDomain(self.a.softplus(), self.tag)
+    def elu(self):
+        return TaggedDomain(self.a.elu(), self.tag)
+    def selu(self):
+        return TaggedDomain(self.a.selu(), self.tag)
+    def sigm(self):
+        return TaggedDomain(self.a.sigm(), self.tag)
+    def cat(self, other, *args, **kargs):
+        return TaggedDomain(self.a.cat(other.a, *args, **kargs), self.tag)
+    def split(self, *args, **kargs):
+        return [TaggedDomain(z, self.tag) for z in self.a.split(*args, **kargs)]
+    def size(self):
+        return self.a.size()
+    def loss(self, *args, **kargs):
+        return self.tag.loss(self.a, *args, **kargs)
+    def deep_loss(self, *args, **kargs):
+        return self.a.deep_loss(*args, **kargs)
+    def checkSizes(self):
+        self.a.checkSizes()
+        return self
+    def merge(self, other, ref = None):
+        return TaggedDomain(self.a.merge(other.a, ref = None if ref is None else ref.a), self.tag)

components.py ADDED Viewed

	@@ -0,0 +1,951 @@

+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+from torch.distributions import multinomial, categorical
+import torch.optim as optim
+import math
+try:
+    from . import helpers as h
+    from . import ai
+    from . import scheduling as S
+except:
+    import helpers as h
+    import ai
+    import scheduling as S
+import math
+import abc
+from torch.nn.modules.conv import _ConvNd
+from enum import Enum
+class InferModule(nn.Module):
+    def __init__(self, *args, normal = False, ibp_init = False, **kwargs):
+        self.args = args
+        self.kwargs = kwargs
+        self.infered = False
+        self.normal = normal
+        self.ibp_init = ibp_init
+    def infer(self, in_shape, global_args = None):
+        """ this is really actually stateful. """
+        if self.infered:
+            return self
+        self.infered = True
+        super(InferModule, self).__init__()
+        self.inShape = list(in_shape)
+        self.outShape = list(self.init(list(in_shape), *self.args, global_args = global_args, **self.kwargs))
+        if self.outShape is None:
+            raise "init should set the out_shape"
+        self.reset_parameters()
+        return self
+    def reset_parameters(self):
+        if not hasattr(self,'weight') or self.weight is None:
+            return
+        n = h.product(self.weight.size()) / self.outShape[0]
+        stdv = 1 / math.sqrt(n)
+        if self.ibp_init:
+            torch.nn.init.orthogonal_(self.weight.data)
+        elif self.normal:
+            self.weight.data.normal_(0, stdv)
+            self.weight.data.clamp_(-1, 1)
+        else:
+            self.weight.data.uniform_(-stdv, stdv)
+        if self.bias is not None:
+            if self.ibp_init:
+                self.bias.data.zero_()
+            elif self.normal:
+                self.bias.data.normal_(0, stdv)
+                self.bias.data.clamp_(-1, 1)
+            else:
+                self.bias.data.uniform_(-stdv, stdv)
+    def clip_norm(self):
+        if not hasattr(self, "weight"):
+            return
+        if not hasattr(self,"weight_g"):
+            if torch.__version__[0] == "0":
+                nn.utils.weight_norm(self, dim=None)
+            else:
+                nn.utils.weight_norm(self)
+        self.weight_g.data.clamp_(-h.max_c_for_norm, h.max_c_for_norm)
+        if torch.__version__[0] != "0":
+            self.weight_v.data.clamp_(-h.max_c_for_norm * 10000,h.max_c_for_norm * 10000)
+            if hasattr(self, "bias"):
+                self.bias.data.clamp_(-h.max_c_for_norm * 10000, h.max_c_for_norm * 10000)
+    def regularize(self, p):
+        reg = 0
+        if torch.__version__[0] == "0":
+            for param in self.parameters():
+                reg += param.norm(p)
+        else:
+            if hasattr(self, "weight_g"):
+                reg += self.weight_g.norm().sum()
+                reg += self.weight_v.norm().sum()
+            elif hasattr(self, "weight"):
+                reg += self.weight.norm().sum()
+            if hasattr(self, "bias"):
+                reg += self.bias.view(-1).norm(p=p).sum()
+        return reg
+    def remove_norm(self):
+        if hasattr(self,"weight_g"):
+            torch.nn.utils.remove_weight_norm(self)
+    def showNet(self, t = ""):
+        print(t + self.__class__.__name__)
+    def printNet(self, f):
+        print(self.__class__.__name__, file=f)
+    @abc.abstractmethod
+    def forward(self, *args, **kargs):
+        pass
+    def __call__(self, *args, onyx=False, **kargs):
+        if onyx:
+            return self.forward(*args, onyx=onyx, **kargs)
+        else:
+            return super(InferModule, self).__call__(*args, **kargs)
+    @abc.abstractmethod
+    def neuronCount(self):
+        pass
+    def depth(self):
+        return 0
+def getShapeConv(in_shape, conv_shape, stride = 1, padding = 0):
+    inChan, inH, inW = in_shape
+    outChan, kH, kW = conv_shape[:3]
+    outH = 1 + int((2 * padding + inH - kH) / stride)
+    outW = 1 + int((2 * padding + inW - kW) / stride)
+    return (outChan, outH, outW)
+def getShapeConvTranspose(in_shape, conv_shape, stride = 1, padding = 0, out_padding=0):
+    inChan, inH, inW = in_shape
+    outChan, kH, kW = conv_shape[:3]
+    outH = (inH - 1 ) * stride - 2 * padding + kH + out_padding
+    outW = (inW - 1 ) * stride - 2 * padding + kW + out_padding
+    return (outChan, outH, outW)
+class Linear(InferModule):
+    def init(self, in_shape, out_shape, **kargs):
+        self.in_neurons = h.product(in_shape)
+        if isinstance(out_shape, int):
+            out_shape = [out_shape]
+        self.out_neurons = h.product(out_shape)
+        self.weight = torch.nn.Parameter(torch.Tensor(self.in_neurons, self.out_neurons))
+        self.bias = torch.nn.Parameter(torch.Tensor(self.out_neurons))
+        return out_shape
+    def forward(self, x, **kargs):
+        s = x.size()
+        x = x.view(s[0], h.product(s[1:]))
+        return (x.matmul(self.weight) + self.bias).view(s[0], *self.outShape)
+    def neuronCount(self):
+        return 0
+    def showNet(self, t = ""):
+        print(t + "Linear out=" + str(self.out_neurons))
+    def printNet(self, f):
+        print("Linear(" + str(self.out_neurons) + ")" )
+        print(h.printListsNumpy(list(self.weight.transpose(1,0).data)), file= f)
+        print(h.printNumpy(self.bias), file= f)
+class Activation(InferModule):
+    def init(self, in_shape, global_args = None, activation = "ReLU", **kargs):
+        self.activation = [ "ReLU","Sigmoid", "Tanh", "Softplus", "ELU", "SELU"].index(activation)
+        self.activation_name = activation
+        return in_shape
+    def regularize(self, p):
+        return 0
+    def forward(self, x, **kargs):
+        return [lambda x:x.relu(), lambda x:x.sigmoid(), lambda x:x.tanh(), lambda x:x.softplus(), lambda x:x.elu(), lambda x:x.selu()][self.activation](x)
+    def neuronCount(self):
+        return h.product(self.outShape)
+    def depth(self):
+        return 1
+    def showNet(self, t = ""):
+        print(t + self.activation_name)
+    def printNet(self, f):
+        pass
+class ReLU(Activation):
+    pass
+def activation(*args, batch_norm = False, **kargs):
+    a = Activation(*args, **kargs)
+    return Seq(BatchNorm(), a) if batch_norm else a
+class Identity(InferModule): # for feigning model equivelence when removing an op
+    def init(self, in_shape, global_args = None, **kargs):
+        return in_shape
+    def forward(self, x, **kargs):
+        return x
+    def neuronCount(self):
+        return 0
+    def printNet(self, f):
+        pass
+    def regularize(self, p):
+        return 0
+    def showNet(self, *args, **kargs):
+        pass
+class Dropout(InferModule):
+    def init(self, in_shape, p=0.5, use_2d = False, alpha_dropout = False, **kargs):
+        self.p = S.Const.initConst(p)
+        self.use_2d = use_2d
+        self.alpha_dropout = alpha_dropout
+        return in_shape
+    def forward(self, x, time = 0, **kargs):
+        if self.training:
+            with torch.no_grad():
+                p = self.p.getVal(time = time)
+                mask = (F.dropout2d if self.use_2d else F.dropout)(h.ones(x.size()),p=p, training=True)
+            if self.alpha_dropout:
+                with torch.no_grad():
+                    keep_prob = 1 - p
+                    alpha = -1.7580993408473766
+                    a = math.pow(keep_prob + alpha * alpha * keep_prob * (1 - keep_prob), -0.5)
+                    b = -a * alpha * (1 - keep_prob)
+                    mask = mask * a
+                return x * mask + b
+            else:
+                return x * mask
+        else:
+            return x
+    def neuronCount(self):
+        return 0
+    def showNet(self, t = ""):
+        print(t + "Dropout p=" + str(self.p))
+    def printNet(self, f):
+        print("Dropout(" + str(self.p) + ")" )
+class PrintActivation(Identity):
+    def init(self, in_shape, global_args = None, activation = "ReLU", **kargs):
+        self.activation = activation
+        return in_shape
+    def printNet(self, f):
+        print(self.activation, file = f)
+class PrintReLU(PrintActivation):
+    pass
+class Conv2D(InferModule):
+    def init(self, in_shape, out_channels, kernel_size, stride = 1, global_args = None, bias=True, padding = 0, activation = "ReLU", **kargs):
+        self.prev = in_shape
+        self.in_channels = in_shape[0]
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.activation = activation
+        self.use_softplus = h.default(global_args, 'use_softplus', False)
+        weights_shape = (self.out_channels, self.in_channels, kernel_size, kernel_size)
+        self.weight = torch.nn.Parameter(torch.Tensor(*weights_shape))
+        if bias:
+            self.bias = torch.nn.Parameter(torch.Tensor(weights_shape[0]))
+        else:
+            self.bias = None # h.zeros(weights_shape[0])
+        outshape = getShapeConv(in_shape, (out_channels, kernel_size, kernel_size), stride, padding)
+        return outshape
+    def forward(self, input, **kargs):
+        return input.conv2d(self.weight, bias=self.bias, stride=self.stride, padding = self.padding )
+    def printNet(self, f): # only complete if we've forwardt stride=1
+        print("Conv2D", file = f)
+        sz = list(self.prev)
+        print(self.activation + ", filters={}, kernel_size={}, input_shape={}, stride={}, padding={}".format(self.out_channels, [self.kernel_size, self.kernel_size], list(reversed(sz)), [self.stride, self.stride], self.padding ), file = f)
+        print(h.printListsNumpy([[list(p) for p in l ] for l in self.weight.permute(2,3,1,0).data]) , file= f)
+        print(h.printNumpy(self.bias if self.bias is not None else h.dten(self.out_channels)), file= f)
+    def showNet(self, t = ""):
+        sz = list(self.prev)
+        print(t + "Conv2D, filters={}, kernel_size={}, input_shape={}, stride={}, padding={}".format(self.out_channels, [self.kernel_size, self.kernel_size], list(reversed(sz)), [self.stride, self.stride], self.padding ))
+    def neuronCount(self):
+        return 0
+class ConvTranspose2D(InferModule):
+    def init(self, in_shape, out_channels, kernel_size, stride = 1, global_args = None, bias=True, padding = 0, out_padding=0, activation = "ReLU", **kargs):
+        self.prev = in_shape
+        self.in_channels = in_shape[0]
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.out_padding = out_padding
+        self.activation = activation
+        self.use_softplus = h.default(global_args, 'use_softplus', False)
+        weights_shape = (self.in_channels, self.out_channels, kernel_size, kernel_size)
+        self.weight = torch.nn.Parameter(torch.Tensor(*weights_shape))
+        if bias:
+            self.bias = torch.nn.Parameter(torch.Tensor(weights_shape[0]))
+        else:
+            self.bias = None # h.zeros(weights_shape[0])
+        outshape = getShapeConvTranspose(in_shape, (out_channels, kernel_size, kernel_size), stride, padding, out_padding)
+        return outshape
+    def forward(self, input, **kargs):
+        return input.conv_transpose2d(self.weight, bias=self.bias, stride=self.stride, padding = self.padding, output_padding=self.out_padding)
+    def printNet(self, f): # only complete if we've forwardt stride=1
+        print("ConvTranspose2D", file = f)
+        print(self.activation + ", filters={}, kernel_size={}, input_shape={}".format(self.out_channels, list(self.kernel_size), list(self.prev) ), file = f)
+        print(h.printListsNumpy([[list(p) for p in l ] for l in self.weight.permute(2,3,1,0).data]) , file= f)
+        print(h.printNumpy(self.bias), file= f)
+    def neuronCount(self):
+        return 0
+class MaxPool2D(InferModule):
+    def init(self, in_shape, kernel_size, stride = None, **kargs):
+        self.prev = in_shape
+        self.kernel_size = kernel_size
+        self.stride = kernel_size if stride is None else stride
+        return getShapeConv(in_shape, (in_shape[0], kernel_size, kernel_size), stride)
+    def forward(self, x, **kargs):
+        return x.max_pool2d(self.kernel_size, self.stride)
+    def printNet(self, f):
+        print("MaxPool2D stride={}, kernel_size={}, input_shape={}".format(list(self.stride), list(self.shape[2:]), list(self.prev[1:]+self.prev[:1]) ), file = f)
+    def neuronCount(self):
+        return h.product(self.outShape)
+class AvgPool2D(InferModule):
+    def init(self, in_shape, kernel_size, stride = None, **kargs):
+        self.prev = in_shape
+        self.kernel_size = kernel_size
+        self.stride = kernel_size if stride is None else stride
+        out_size = getShapeConv(in_shape, (in_shape[0], kernel_size, kernel_size), self.stride, padding = 1)
+        return out_size
+    def forward(self, x, **kargs):
+        if h.product(x.size()[2:]) == 1:
+            return x
+        return x.avg_pool2d(kernel_size = self.kernel_size, stride = self.stride, padding = 1)
+    def printNet(self, f):
+        print("AvgPool2D stride={}, kernel_size={}, input_shape={}".format(list(self.stride), list(self.shape[2:]), list(self.prev[1:]+self.prev[:1]) ), file = f)
+    def neuronCount(self):
+        return h.product(self.outShape)
+class AdaptiveAvgPool2D(InferModule):
+    def init(self, in_shape, out_shape, **kargs):
+        self.prev = in_shape
+        self.out_shape = list(out_shape)
+        return [in_shape[0]] + self.out_shape
+    def forward(self, x, **kargs):
+        return x.adaptive_avg_pool2d(self.out_shape)
+    def printNet(self, f):
+        print("AdaptiveAvgPool2D out_Shape={} input_shape={}".format(list(self.out_shape), list(self.prev[1:]+self.prev[:1]) ), file = f)
+    def neuronCount(self):
+        return h.product(self.outShape)
+class Normalize(InferModule):
+    def init(self, in_shape, mean, std, **kargs):
+        self.mean_v = mean
+        self.std_v = std
+        self.mean = h.dten(mean)
+        self.std = 1 / h.dten(std)
+        return in_shape
+    def forward(self, x, **kargs):
+        mean_ex = self.mean.view(self.mean.shape[0],1,1).expand(*x.size()[1:])
+        std_ex = self.std.view(self.std.shape[0],1,1).expand(*x.size()[1:])
+        return (x - mean_ex) * std_ex
+    def neuronCount(self):
+        return 0
+    def printNet(self, f):
+        print("Normalize mean={} std={}".format(self.mean_v, self.std_v), file = f)
+    def showNet(self, t = ""):
+        print(t + "Normalize mean={} std={}".format(self.mean_v, self.std_v))
+class Flatten(InferModule):
+    def init(self, in_shape, **kargs):
+        return h.product(in_shape)
+    def forward(self, x, **kargs):
+        s = x.size()
+        return x.view(s[0], h.product(s[1:]))
+    def neuronCount(self):
+        return 0
+class BatchNorm(InferModule):
+    def init(self, in_shape, track_running_stats = True, momentum = 0.1, eps=1e-5, **kargs):
+        self.gamma = torch.nn.Parameter(torch.Tensor(*in_shape))
+        self.beta = torch.nn.Parameter(torch.Tensor(*in_shape))
+        self.eps = eps
+        self.track_running_stats = track_running_stats
+        self.momentum = momentum
+        self.running_mean = None
+        self.running_var =  None
+        self.num_batches_tracked = 0
+        return in_shape
+    def reset_parameters(self):
+        self.gamma.data.fill_(1)
+        self.beta.data.zero_()
+    def forward(self, x, **kargs):
+        exponential_average_factor = 0.0
+        if self.training and self.track_running_stats:
+            # TODO: if statement only here to tell the jit to skip emitting this when it is None
+            if self.num_batches_tracked is not None:
+                self.num_batches_tracked += 1
+                if self.momentum is None:  # use cumulative moving average
+                    exponential_average_factor = 1.0 / float(self.num_batches_tracked)
+                else:  # use exponential moving average
+                    exponential_average_factor = self.momentum
+        new_mean = x.vanillaTensorPart().detach().mean(dim=0)
+        new_var = x.vanillaTensorPart().detach().var(dim=0, unbiased=False)
+        if torch.isnan(new_var * 0).any():
+            return x
+        if self.training:
+            self.running_mean = (1 - exponential_average_factor) * self.running_mean + exponential_average_factor * new_mean  if self.running_mean is not None else new_mean
+            if self.running_var is None:
+                self.running_var = new_var
+            else:
+                q = (1 - exponential_average_factor) * self.running_var
+                r = exponential_average_factor * new_var
+                self.running_var = q + r
+        if self.track_running_stats and self.running_mean is not None and self.running_var is not None:
+            new_mean = self.running_mean
+            new_var = self.running_var
+        diver = 1 / (new_var + self.eps).sqrt()
+        if torch.isnan(diver).any():
+            print("Really shouldn't happen ever")
+            return x
+        else:
+            out = (x - new_mean) * diver * self.gamma + self.beta
+            return  out
+    def neuronCount(self):
+        return 0
+class Unflatten2d(InferModule):
+    def init(self, in_shape, w, **kargs):
+        self.w = w
+        self.outChan = int(h.product(in_shape) / (w * w))
+        return (self.outChan, self.w, self.w)
+    def forward(self, x, **kargs):
+        s = x.size()
+        return x.view(s[0], self.outChan, self.w, self.w)
+    def neuronCount(self):
+        return 0
+class View(InferModule):
+    def init(self, in_shape, out_shape, **kargs):
+        assert(h.product(in_shape) == h.product(out_shape))
+        return out_shape
+    def forward(self, x, **kargs):
+        s = x.size()
+        return x.view(s[0], *self.outShape)
+    def neuronCount(self):
+        return 0
+class Seq(InferModule):
+    def init(self, in_shape, *layers, **kargs):
+        self.layers = layers
+        self.net = nn.Sequential(*layers)
+        self.prev = in_shape
+        for s in layers:
+            in_shape = s.infer(in_shape, **kargs).outShape
+        return in_shape
+    def forward(self, x, **kargs):
+        for l in self.layers:
+            x = l(x, **kargs)
+        return x
+    def clip_norm(self):
+        for l in self.layers:
+            l.clip_norm()
+    def regularize(self, p):
+        return sum(n.regularize(p) for n in self.layers)
+    def remove_norm(self):
+        for l in self.layers:
+            l.remove_norm()
+    def printNet(self, f):
+        for l in self.layers:
+            l.printNet(f)
+    def showNet(self, *args, **kargs):
+        for l in self.layers:
+            l.showNet(*args, **kargs)
+    def neuronCount(self):
+        return sum([l.neuronCount() for l in self.layers ])
+    def depth(self):
+        return sum([l.depth() for l in self.layers ])
+def FFNN(layers, last_lin = False, last_zono = False, **kargs):
+    starts = layers
+    ends = []
+    if last_lin:
+        ends = ([CorrelateAll(only_train=False)] if last_zono else []) + [PrintActivation(activation = "Affine"), Linear(layers[-1],**kargs)]
+        starts = layers[:-1]
+    return Seq(*([ Seq(PrintActivation(**kargs), Linear(s, **kargs), activation(**kargs)) for s in starts] + ends))
+def Conv(*args, **kargs):
+    return Seq(Conv2D(*args, **kargs), activation(**kargs))
+def ConvTranspose(*args, **kargs):
+    return Seq(ConvTranspose2D(*args, **kargs), activation(**kargs))
+MP = MaxPool2D
+def LeNet(conv_layers, ly = [], bias = True, normal=False, **kargs):
+    def transfer(tp):
+        if isinstance(tp, InferModule):
+            return tp
+        if isinstance(tp[0], str):
+            return MaxPool2D(*tp[1:])
+        return Conv(out_channels = tp[0], kernel_size = tp[1], stride = tp[-1] if len(tp) == 4 else 1, bias=bias, normal=normal, **kargs)
+    conv = [transfer(s) for s in conv_layers]
+    return Seq(*conv, FFNN(ly, **kargs, bias=bias)) if len(ly) > 0 else Seq(*conv)
+def InvLeNet(ly, w, conv_layers, bias = True, normal=False, **kargs):
+    def transfer(tp):
+        return ConvTranspose(out_channels = tp[0], kernel_size = tp[1], stride = tp[2], padding = tp[3], out_padding = tp[4], bias=False, normal=normal)
+    return Seq(FFNN(ly, bias=bias), Unflatten2d(w),  *[transfer(s) for s in conv_layers])
+class FromByteImg(InferModule):
+    def init(self, in_shape, **kargs):
+        return in_shape
+    def forward(self, x, **kargs):
+        return x.to_dtype()/ 256.
+    def neuronCount(self):
+        return 0
+class Skip(InferModule):
+    def init(self, in_shape, net1, net2, **kargs):
+        self.net1 = net1.infer(in_shape, **kargs)
+        self.net2 = net2.infer(in_shape, **kargs)
+        assert(net1.outShape[1:] == net2.outShape[1:])
+        return [ net1.outShape[0] + net2.outShape[0] ] + net1.outShape[1:]
+    def forward(self, x, **kargs):
+        r1 = self.net1(x, **kargs)
+        r2 = self.net2(x, **kargs)
+        return r1.cat(r2, dim=1)
+    def regularize(self, p):
+        return self.net1.regularize(p) + self.net2.regularize(p)
+    def clip_norm(self):
+        self.net1.clip_norm()
+        self.net2.clip_norm()
+    def remove_norm(self):
+        self.net1.remove_norm()
+        self.net2.remove_norm()
+    def neuronCount(self):
+        return self.net1.neuronCount() + self.net2.neuronCount()
+    def printNet(self, f):
+        print("SkipNet1", file=f)
+        self.net1.printNet(f)
+        print("SkipNet2", file=f)
+        self.net2.printNet(f)
+        print("SkipCat dim=1", file=f)
+    def showNet(self, t = ""):
+        print(t+"SkipNet1")
+        self.net1.showNet("    "+t)
+        print(t+"SkipNet2")
+        self.net2.showNet("    "+t)
+        print(t+"SkipCat dim=1")
+class ParSum(InferModule):
+    def init(self, in_shape, net1, net2, **kargs):
+        self.net1 = net1.infer(in_shape, **kargs)
+        self.net2 = net2.infer(in_shape, **kargs)
+        assert(net1.outShape == net2.outShape)
+        return net1.outShape
+    def forward(self, x, **kargs):
+        r1 = self.net1(x, **kargs)
+        r2 = self.net2(x, **kargs)
+        return x.addPar(r1,r2)
+    def clip_norm(self):
+        self.net1.clip_norm()
+        self.net2.clip_norm()
+    def remove_norm(self):
+        self.net1.remove_norm()
+        self.net2.remove_norm()
+    def neuronCount(self):
+        return self.net1.neuronCount() + self.net2.neuronCount()
+    def depth(self):
+        return max(self.net1.depth(), self.net2.depth())
+    def printNet(self, f):
+        print("ParNet1", file=f)
+        self.net1.printNet(f)
+        print("ParNet2", file=f)
+        self.net2.printNet(f)
+        print("ParCat dim=1", file=f)
+    def showNet(self, t = ""):
+        print(t + "ParNet1")
+        self.net1.showNet("    "+t)
+        print(t + "ParNet2")
+        self.net2.showNet("    "+t)
+        print(t + "ParSum")
+class ToZono(Identity):
+    def init(self, in_shape, customRelu = None, only_train = False, **kargs):
+        self.customRelu = customRelu
+        self.only_train = only_train
+        return in_shape
+    def forward(self, x, **kargs):
+        return self.abstract_forward(x, **kargs) if self.training or not self.only_train else x
+    def abstract_forward(self, x, **kargs):
+        return x.abstractApplyLeaf('hybrid_to_zono', customRelu = self.customRelu)
+    def showNet(self, t = ""):
+        print(t + self.__class__.__name__ + " only_train=" + str(self.only_train))
+class CorrelateAll(ToZono):
+    def abstract_forward(self, x, **kargs):
+        return x.abstractApplyLeaf('hybrid_to_zono',correlate=True, customRelu = self.customRelu)
+class ToHZono(ToZono):
+    def abstract_forward(self, x, **kargs):
+        return x.abstractApplyLeaf('zono_to_hybrid',customRelu = self.customRelu)
+class Concretize(ToZono):
+    def init(self, in_shape, only_train = True, **kargs):
+        self.only_train = only_train
+        return in_shape
+    def abstract_forward(self, x, **kargs):
+        return x.abstractApplyLeaf('concretize')
+# stochastic correlation
+class CorrRand(Concretize):
+    def init(self, in_shape, num_correlate, only_train = True, **kargs):
+        self.only_train = only_train
+        self.num_correlate = num_correlate
+        return in_shape
+    def abstract_forward(self, x):
+        return x.abstractApplyLeaf("stochasticCorrelate", self.num_correlate)
+    def showNet(self, t = ""):
+        print(t + self.__class__.__name__ + " only_train=" + str(self.only_train) + " num_correlate="+ str(self.num_correlate))
+class CorrMaxK(CorrRand):
+    def abstract_forward(self, x):
+        return x.abstractApplyLeaf("correlateMaxK", self.num_correlate)
+class CorrMaxPool2D(Concretize):
+    def init(self,in_shape, kernel_size, only_train = True, max_type = ai.MaxTypes.head_beta, **kargs):
+        self.only_train = only_train
+        self.kernel_size = kernel_size
+        self.max_type = max_type
+        return in_shape
+    def abstract_forward(self, x):
+        return x.abstractApplyLeaf("correlateMaxPool", kernel_size = self.kernel_size, stride = self.kernel_size, max_type = self.max_type)
+    def showNet(self, t = ""):
+        print(t + self.__class__.__name__ + " only_train=" + str(self.only_train) + " kernel_size="+ str(self.kernel_size) + " max_type=" +str(self.max_type))
+class CorrMaxPool3D(Concretize):
+    def init(self,in_shape, kernel_size, only_train = True, max_type = ai.MaxTypes.only_beta, **kargs):
+        self.only_train = only_train
+        self.kernel_size = kernel_size
+        self.max_type = max_type
+        return in_shape
+    def abstract_forward(self, x):
+        return x.abstractApplyLeaf("correlateMaxPool", kernel_size = self.kernel_size, stride = self.kernel_size, max_type = self.max_type, max_pool = F.max_pool3d)
+    def showNet(self, t = ""):
+        print(t + self.__class__.__name__ + " only_train=" + str(self.only_train) + " kernel_size="+ str(self.kernel_size) + " max_type=" +self.max_type)
+class CorrFix(Concretize):
+    def init(self,in_shape, k, only_train = True, **kargs):
+        self.k = k
+        self.only_train = only_train
+        return in_shape
+    def abstract_forward(self, x):
+        sz = x.size()
+        """
+        # for more control in the future
+        indxs_1 = torch.arange(start = 0, end = sz[1], step = math.ceil(sz[1] / self.dims[1]) )
+        indxs_2 = torch.arange(start = 0, end = sz[2], step = math.ceil(sz[2] / self.dims[2]) )
+        indxs_3 = torch.arange(start = 0, end = sz[3], step = math.ceil(sz[3] / self.dims[3]) )
+        indxs = torch.stack(torch.meshgrid((indxs_1,indxs_2,indxs_3)), dim=3).view(-1,3)
+        """
+        szm = h.product(sz[1:])
+        indxs = torch.arange(start = 0, end = szm, step = math.ceil(szm / self.k))
+        indxs = indxs.unsqueeze(0).expand(sz[0], indxs.size()[0])
+        return x.abstractApplyLeaf("correlate", indxs)
+    def showNet(self, t = ""):
+        print(t + self.__class__.__name__ + " only_train=" + str(self.only_train) + " k="+ str(self.k))
+class DecorrRand(Concretize):
+    def init(self, in_shape, num_decorrelate, only_train = True, **kargs):
+        self.only_train = only_train
+        self.num_decorrelate = num_decorrelate
+        return in_shape
+    def abstract_forward(self, x):
+        return x.abstractApplyLeaf("stochasticDecorrelate", self.num_decorrelate)
+class DecorrMin(Concretize):
+    def init(self, in_shape, num_decorrelate, only_train = True, num_to_keep = False, **kargs):
+        self.only_train = only_train
+        self.num_decorrelate = num_decorrelate
+        self.num_to_keep = num_to_keep
+        return in_shape
+    def abstract_forward(self, x):
+        return x.abstractApplyLeaf("decorrelateMin", self.num_decorrelate, num_to_keep = self.num_to_keep)
+    def showNet(self, t = ""):
+        print(t + self.__class__.__name__ + " only_train=" + str(self.only_train) + " k="+ str(self.num_decorrelate) + " num_to_keep=" + str(self.num_to_keep) )
+class DeepLoss(ToZono):
+    def init(self, in_shape, bw = 0.01, act = F.relu, **kargs): # weight must be between 0 and 1
+        self.only_train = True
+        self.bw = S.Const.initConst(bw)
+        self.act = act
+        return in_shape
+    def abstract_forward(self, x, **kargs):
+        if x.isPoint():
+            return x
+        return ai.TaggedDomain(x, self.MLoss(self, x))
+    class MLoss():
+        def __init__(self, obj, x):
+            self.obj = obj
+            self.x = x
+        def loss(self, a, *args, lr = 1, time = 0, **kargs):
+            bw = self.obj.bw.getVal(time = time)
+            pre_loss = a.loss(*args, time = time, **kargs, lr = lr * (1 - bw))
+            if bw <= 0.0:
+                return pre_loss
+            return (1 - bw) * pre_loss + bw * self.x.deep_loss(act = self.obj.act)
+    def showNet(self, t = ""):
+        print(t + self.__class__.__name__ + " only_train=" + str(self.only_train) + " bw="+ str(self.bw) + " act=" + str(self.act) )
+class IdentLoss(DeepLoss):
+    def abstract_forward(self, x, **kargs):
+        return x
+def SkipNet(net1, net2, ffnn, **kargs):
+    return Seq(Skip(net1,net2), FFNN(ffnn, **kargs))
+def WideBlock(out_filters, downsample=False, k=3, bias=False, **kargs):
+    if not downsample:
+        k_first = 3
+        skip_stride = 1
+        k_skip = 1
+    else:
+        k_first = 4
+        skip_stride = 2
+        k_skip = 2
+    # conv2d280(input)
+    blockA = Conv2D(out_filters, kernel_size=k_skip, stride=skip_stride, padding=0, bias=bias, normal=True, **kargs)
+    # conv2d282(relu(conv2d278(input)))
+    blockB = Seq( Conv(out_filters, kernel_size = k_first, stride = skip_stride, padding = 1, bias=bias, normal=True, **kargs)
+                , Conv2D(out_filters, kernel_size = k, stride = 1, padding = 1, bias=bias, normal=True, **kargs))
+    return Seq(ParSum(blockA, blockB), activation(**kargs))
+def BasicBlock(in_planes, planes, stride=1, bias = False, skip_net = False, **kargs):
+    block = Seq( Conv(planes, kernel_size = 3, stride = stride, padding = 1, bias=bias, normal=True, **kargs)
+               , Conv2D(planes, kernel_size = 3, stride = 1, padding = 1, bias=bias, normal=True, **kargs))
+    if stride != 1 or in_planes != planes:
+        block = ParSum(block, Conv2D(planes, kernel_size=1, stride=stride, bias=bias, normal=True, **kargs))
+    elif not skip_net:
+        block = ParSum(block, Identity())
+    return Seq(block, activation(**kargs))
+# https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py
+def ResNet(blocksList, extra = [], bias = False, **kargs):
+    layers = []
+    in_planes = 64
+    planes = 64
+    stride = 0
+    for num_blocks in blocksList:
+        if stride < 2:
+            stride += 1
+        strides = [stride] + [1]*(num_blocks-1)
+        for stride in strides:
+            layers.append(BasicBlock(in_planes, planes, stride, bias = bias, **kargs))
+            in_planes = planes
+        planes *= 2
+    print("RESlayers: ", len(layers))
+    for e,l in extra:
+        layers[l] = Seq(layers[l], e)
+    return Seq(Conv(64, kernel_size=3, stride=1, padding = 1, bias=bias, normal=True, printShape=True),
+               *layers)
+def DenseNet(growthRate, depth, reduction, num_classes, bottleneck = True):
+    def Bottleneck(growthRate):
+        interChannels = 4*growthRate
+        n = Seq( ReLU(),
+                 Conv2D(interChannels, kernel_size=1, bias=True, ibp_init = True),
+                 ReLU(),
+                 Conv2D(growthRate, kernel_size=3, padding=1, bias=True, ibp_init = True)
+                 )
+        return Skip(Identity(), n)
+    def SingleLayer(growthRate):
+        n = Seq( ReLU(),
+                 Conv2D(growthRate, kernel_size=3, padding=1, bias=True, ibp_init = True))
+        return Skip(Identity(), n)
+    def Transition(nOutChannels):
+        return Seq( ReLU(),
+                    Conv2D(nOutChannels, kernel_size = 1, bias = True, ibp_init = True),
+                    AvgPool2D(kernel_size=2))
+    def make_dense(growthRate, nDenseBlocks, bottleneck):
+        return Seq(*[Bottleneck(growthRate) if bottleneck else SingleLayer(growthRate) for i in range(nDenseBlocks)])
+    nDenseBlocks = (depth-4) // 3
+    if bottleneck:
+        nDenseBlocks //= 2
+    nChannels = 2*growthRate
+    conv1 = Conv2D(nChannels, kernel_size=3, padding=1, bias=True, ibp_init = True)
+    dense1 = make_dense(growthRate, nDenseBlocks, bottleneck)
+    nChannels += nDenseBlocks * growthRate
+    nOutChannels = int(math.floor(nChannels*reduction))
+    trans1 = Transition(nOutChannels)
+    nChannels = nOutChannels
+    dense2 = make_dense(growthRate, nDenseBlocks, bottleneck)
+    nChannels += nDenseBlocks*growthRate
+    nOutChannels = int(math.floor(nChannels*reduction))
+    trans2 = Transition(nOutChannels)
+    nChannels = nOutChannels
+    dense3 = make_dense(growthRate, nDenseBlocks, bottleneck)
+    return Seq(conv1, dense1, trans1, dense2, trans2, dense3,
+               ReLU(),
+               AvgPool2D(kernel_size=8),
+               CorrelateAll(only_train=False, ignore_point = True),
+               Linear(num_classes, ibp_init = True))

convert.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import future
+import builtins
+import past
+import six
+from timeit import default_timer as timer
+from datetime import datetime
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torchvision import datasets, transforms, utils
+from torch.utils.data import Dataset
+import inspect
+from inspect import getargspec
+import os
+import helpers as h
+from helpers import Timer
+import copy
+import random
+from itertools import count
+from components import *
+import models
+import goals
+from goals import *
+import math
+from torch.serialization import SourceChangeWarning
+import warnings
+parser = argparse.ArgumentParser(description='Convert a pickled PyTorch DiffAI net to an abstract onyx net which returns the interval concretization around the final logits.  The first dimension of the output is the natural center, the second dimension is the lb, the third is the ub',  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('-n', '--net', type=str, default=None, metavar='N', help='Saved and pickled net to use, in pynet format', required=True)
+parser.add_argument('-d', '--domain', type=str, default="Point()", help='picks which abstract goals to use for testing.  Uses box.  Doesn\'t use time, so don\'t use Lin.  Unless point, should specify a width w.')
+parser.add_argument('-b', '--batch-size', type=int, default=1, help='The batch size to export.  Not sure this matters.')
+parser.add_argument('-o', '--out', type=str, default="convert_out/", metavar='F', help='Where to save the net.')
+parser.add_argument('--update-net', type=h.str2bool, nargs='?', const=True, default=False, help="should update test net")
+parser.add_argument('--net-name', type=str, choices = h.getMethodNames(models), default=None, help="update test net name")
+parser.add_argument('--save-name', type=str, default=None, help="name to save the net with.  Defaults to <domain>___<netfile-.pynet>.onyx")
+parser.add_argument('-D', '--dataset', choices = [n for (n,k) in inspect.getmembers(datasets, inspect.isclass) if issubclass(k, Dataset)]
+                    , default="MNIST", help='picks which dataset to use.')
+parser.add_argument('--map-to-cpu', type=h.str2bool, nargs='?', const=True, default=False, help="map cuda operations in save back to cpu; enables to run on a computer without a GPU")
+parser.add_argument('--tf-input', type=h.str2bool, nargs='?', const=True, default=False, help="change the shape of the input data from batch-channels-height-width (standard in pytroch) to batch-height-width-channels (standard in tf)")
+args = parser.parse_args()
+out_dir = args.out
+if not os.path.exists(out_dir):
+    os.makedirs(out_dir)
+with warnings.catch_warnings(record=True) as w:
+    warnings.simplefilter("always", SourceChangeWarning)
+    if args.map_to_cpu:
+        net = torch.load(args.net, map_location='cpu')
+    else:
+        net = torch.load(args.net)
+net_name = None
+if args.net_name is not None:
+    net_name = args.net_name
+elif args.update_net and 'name' in dir(net):
+    net_name = net.name
+def buildNet(n, input_dims, num_classes):
+    n = n(num_classes)
+    if args.dataset in ["MNIST"]:
+        n = Seq(Normalize([0.1307], [0.3081] ), n)
+    elif args.dataset in ["CIFAR10", "CIFAR100"]:
+        n = Seq(Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), n)
+    elif dataset in ["SVHN"]:
+        n = Seq(Normalize([0.5,0.5,0.5], [0.2, 0.2, 0.2]), n)
+    elif dataset in ["Imagenet12"]:
+        n = Seq(Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]), n)
+    n = n.infer(input_dims)
+    n.clip_norm()
+    return n
+if net_name is not None:
+    n = getattr(models,net_name)
+    n = buildNet(n, net.inShape, net.outShape)
+    n.load_state_dict(net.state_dict())
+    net = n
+net = net.to(h.device)
+net.remove_norm()
+domain = eval(args.domain)
+if args.save_name is None:
+    save_name = h.prepareDomainNameForFile(args.domain) + "___" + os.path.basename(args.net)[:-6] + ".onyx"
+else:
+    save_name = args.save_name
+def abstractNet(inpt):
+    if args.tf_input:
+        inpt = inpt.permute(0, 3, 1, 2)
+    dom = domain.box(inpt, w = None)
+    o = net(dom, onyx=True).unsqueeze(1)
+    out = torch.cat([o.vanillaTensorPart(), o.lb().vanillaTensorPart(), o.ub().vanillaTensorPart()], dim=1)
+    return out
+input_shape = [args.batch_size] + list(net.inShape)
+if args.tf_input:
+    input_shape = [args.batch_size] + list(net.inShape)[1:]  + [net.inShape[0]]
+dummy = h.zeros(input_shape)
+abstractNet(dummy)
+class AbstractNet(nn.Module):
+    def __init__(self, domain, net, abstractNet):
+        super(AbstractNet, self).__init__()
+        self.net = net
+        self.abstractNet = abstractNet
+        if hasattr(domain, "net") and domain.net is not None:
+            self.netDom = domain.net
+    def forward(self, inpt):
+        return self.abstractNet(inpt)
+absNet = AbstractNet(domain, net, abstractNet)
+out_path = os.path.join(out_dir,  save_name)
+print("Saving:", out_path)
+param_list = ["param"+str(i) for i in range(len(list(absNet.parameters())))]
+torch.onnx.export(absNet, dummy, out_path, verbose=False, input_names=["actual_input"] + param_list, output_names=["output"])

goals.py ADDED Viewed

	@@ -0,0 +1,529 @@

+import future
+import builtins
+import past
+import six
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import torch.autograd
+import components as comp
+from torch.distributions import multinomial, categorical
+import math
+import numpy as np
+try:
+    from . import helpers as h
+    from . import ai
+    from . import scheduling as S
+except:
+    import helpers as h
+    import ai
+    import scheduling as S
+class WrapDom(object):
+    def __init__(self, a):
+        self.a = eval(a) if type(a) is str else a
+    def box(self, *args, **kargs):
+        return self.Domain(self.a.box(*args, **kargs))
+    def boxBetween(self, *args, **kargs):
+        return self.Domain(self.a.boxBetween(*args, **kargs))
+    def line(self, *args, **kargs):
+        return self.Domain(self.a.line(*args, **kargs))
+class DList(object):
+    Domain = ai.ListDomain
+    class MLoss():
+        def __init__(self, aw):
+            self.aw = aw
+        def loss(self, dom, *args, lr = 1, **kargs):
+            if self.aw <= 0.0:
+                return 0
+            return self.aw * dom.loss(*args, lr = lr * self.aw, **kargs)
+    def __init__(self, *al):
+        if len(al) == 0:
+            al = [("Point()", 1.0), ("Box()", 0.1)]
+        self.al = [(eval(a) if type(a) is str else a, S.Const.initConst(aw)) for a,aw in al]
+    def getDiv(self, **kargs):
+        return 1.0 / sum(aw.getVal(**kargs) for _,aw in self.al)
+    def box(self, *args, **kargs):
+        m = self.getDiv(**kargs)
+        return self.Domain(ai.TaggedDomain(a.box(*args, **kargs), DList.MLoss(aw.getVal(**kargs) * m)) for a,aw in self.al)
+    def boxBetween(self, *args, **kargs):
+        m = self.getDiv(**kargs)
+        return self.Domain(ai.TaggedDomain(a.boxBetween(*args, **kargs), DList.MLoss(aw.getVal(**kargs) * m)) for a,aw in self.al)
+    def line(self, *args, **kargs):
+        m = self.getDiv(**kargs)
+        return self.Domain(ai.TaggedDomain(a.line(*args, **kargs), DList.MLoss(aw.getVal(**kargs) * m)) for a,aw in self.al)
+    def __str__(self):
+        return "DList(%s)" % h.sumStr("("+str(a)+","+str(w)+")" for a,w in self.al)
+class Mix(DList):
+    def __init__(self, a="Point()", b="Box()", aw = 1.0, bw = 0.1):
+        super(Mix, self).__init__((a,aw), (b,bw))
+class LinMix(DList):
+    def __init__(self, a="Point()", b="Box()", bw = 0.1):
+        super(LinMix, self).__init__((a,S.Complement(bw)), (b,bw))
+class DProb(object):
+    def __init__(self, *doms):
+        if len(doms) == 0:
+            doms = [("Point()", 0.8), ("Box()", 0.2)]
+        div = 1.0 / sum(float(aw) for _,aw in doms)
+        self.domains = [eval(a) if type(a) is str else a for a,_ in doms]
+        self.probs = [ div * float(aw)  for _,aw in doms]
+    def chooseDom(self):
+        return self.domains[np.random.choice(len(self.domains), p = self.probs)] if len(self.domains) > 1 else self.domains[0]
+    def box(self, *args, **kargs):
+        domain = self.chooseDom()
+        return domain.box(*args, **kargs)
+    def line(self, *args, **kargs):
+        domain = self.chooseDom()
+        return domain.line(*args, **kargs)
+    def __str__(self):
+        return "DProb(%s)" % h.sumStr("("+str(a)+","+str(w)+")" for a,w in zip(self.domains, self.probs))
+class Coin(DProb):
+    def __init__(self, a="Point()", b="Box()", ap = 0.8, bp = 0.2):
+        super(Coin, self).__init__((a,ap), (b,bp))
+class Point(object):
+    Domain = h.dten
+    def __init__(self, **kargs):
+        pass
+    def box(self, original, *args, **kargs):
+        return original
+    def line(self, original, other, *args, **kargs):
+        return (original + other) / 2
+    def boxBetween(self, o1, o2, *args, **kargs):
+        return (o1 + o2) / 2
+    def __str__(self):
+        return "Point()"
+class PointA(Point):
+    def boxBetween(self, o1, o2, *args, **kargs):
+        return o1
+    def __str__(self):
+        return "PointA()"
+class PointB(Point):
+    def boxBetween(self, o1, o2, *args, **kargs):
+        return o2
+    def __str__(self):
+        return "PointB()"
+class NormalPoint(Point):
+    def __init__(self, w = None, **kargs):
+        self.epsilon = w
+    def box(self, original, w, *args, **kargs):
+        """ original = mu = mean, epsilon = variance"""
+        if not self.epsilon is None:
+            w = self.epsilon
+        inter = torch.randn_like(original, device = h.device) * w
+        return original + inter
+    def __str__(self):
+        return "NormalPoint(%s)" % ("" if self.epsilon is None else str(self.epsilon))
+class MI_FGSM(Point):
+    def __init__(self, w = None, r = 20.0, k = 100, mu = 0.8, should_end = True, restart = None, searchable=False,**kargs):
+        self.epsilon = S.Const.initConst(w)
+        self.k = k
+        self.mu = mu
+        self.r = float(r)
+        self.should_end = should_end
+        self.restart = restart
+        self.searchable = searchable
+    def box(self, original, model, target = None, untargeted = False, **kargs):
+        if target is None:
+            untargeted = True
+            with torch.no_grad():
+                target = model(original).max(1)[1]
+        return self.attack(model, original, untargeted, target, **kargs)
+    def boxBetween(self, o1, o2, model, target = None, *args, **kargs):
+        return self.attack(model, (o1 - o2).abs() / 2, (o1 + o2) / 2, target, **kargs)
+    def attack(self, model, xo, untargeted, target, w, loss_function=ai.stdLoss, **kargs):
+        w = self.epsilon.getVal(c = w, **kargs)
+        x = nn.Parameter(xo.clone(), requires_grad=True)
+        gradorg = h.zeros(x.shape)
+        is_eq = 1
+        w = h.ones(x.shape) * w
+        for i in range(self.k):
+            if self.restart is not None and i % int(self.k / self.restart) == 0:
+                x = is_eq * (torch.rand_like(xo) * w + xo) + (1 - is_eq) * x
+                x = nn.Parameter(x, requires_grad = True)
+            model.optimizer.zero_grad()
+            out = model(x).vanillaTensorPart()
+            loss = loss_function(out, target)
+            loss.sum().backward(retain_graph=True)
+            with torch.no_grad():
+                oth = x.grad / torch.norm(x.grad, p=1)
+                gradorg *= self.mu
+                gradorg += oth
+                grad = (self.r * w / self.k) * ai.mysign(gradorg)
+                if self.should_end:
+                    is_eq = ai.mulIfEq(grad, out, target)
+                x = (x + grad * is_eq) if untargeted else (x - grad * is_eq)
+                x = xo + torch.min(torch.max(x - xo, -w),w)
+                x.requires_grad_()
+        model.optimizer.zero_grad()
+        return x
+    def boxBetween(self, o1, o2, model, target, *args, **kargs):
+        raise "Not boxBetween is not yet supported by MI_FGSM"
+    def __str__(self):
+        return "MI_FGSM(%s)" % (("" if self.epsilon is None else "w="+str(self.epsilon)+",")
+                                + ("" if self.k == 5 else "k="+str(self.k)+",")
+                                + ("" if self.r == 5.0 else "r="+str(self.r)+",")
+                                + ("" if self.mu == 0.8 else "r="+str(self.mu)+",")
+                                + ("" if self.should_end else "should_end=False"))
+class PGD(MI_FGSM):
+    def __init__(self, r = 5.0, k = 5, **kargs):
+        super(PGD,self).__init__(r=r, k = k, mu = 0, **kargs)
+    def __str__(self):
+        return "PGD(%s)" % (("" if self.epsilon is None else "w="+str(self.epsilon)+",")
+                            + ("" if self.k == 5 else "k="+str(self.k)+",")
+                            + ("" if self.r == 5.0 else "r="+str(self.r)+",")
+                            + ("" if self.should_end else "should_end=False"))
+class IFGSM(PGD):
+    def __init__(self, k = 5, **kargs):
+        super(IFGSM, self).__init__(r = 1, k=k, **kargs)
+    def __str__(self):
+        return "IFGSM(%s)" % (("" if self.epsilon is None else "w="+str(self.epsilon)+",")
+                              + ("" if self.k == 5 else "k="+str(self.k)+",")
+                              + ("" if self.should_end else "should_end=False"))
+class NormalAdv(Point):
+    def __init__(self, a="IFGSM()", w = None):
+        self.a = (eval(a) if type(a) is str else a)
+        self.epsilon = S.Const.initConst(w)
+    def box(self, original, w, *args, **kargs):
+        epsilon = self.epsilon.getVal(c = w, shape = original.shape[:1], **kargs)
+        assert (0 <= h.dten(epsilon)).all()
+        epsilon = torch.randn(original.size()[0:1], device = h.device)[0] * epsilon
+        return self.a.box(original, w = epsilon, *args, **kargs)
+    def __str__(self):
+        return "NormalAdv(%s)" % ( str(self.a) + ("" if self.epsilon is None else ",w="+str(self.epsilon)))
+class InclusionSample(Point):
+    def __init__(self, sub, a="Box()", normal = False, w = None, **kargs):
+        self.sub = S.Const.initConst(sub)  # sub is the fraction of w to use.
+        self.w = S.Const.initConst(w)
+        self.normal = normal
+        self.a = (eval(a) if type(a) is str else a)
+    def box(self, original, w, *args, **kargs):
+        w = self.w.getVal(c = w, shape = original.shape[:1], **kargs)
+        sub = self.sub.getVal(c = 1, shape = original.shape[:1], **kargs)
+        assert (0 <= h.dten(w)).all()
+        assert (h.dten(sub) <= 1).all()
+        assert (0 <= h.dten(sub)).all()
+        if self.normal:
+            inter = torch.randn_like(original, device = h.device)
+        else:
+            inter = (torch.rand_like(original, device = h.device) * 2 - 1)
+        inter = inter * w * (1 - sub)
+        return self.a.box(original + inter, w = w * sub, *args, **kargs)
+    def boxBetween(self, o1, o2, *args, **kargs):
+        w = (o2 - o1).abs()
+        return self.box( (o2 + o1)/2 , w = w, *args, **kargs)
+    def __str__(self):
+        return "InclusionSample(%s, %s)" % (str(self.sub), str(self.a) + ("" if self.epsilon is None else ",w="+str(self.epsilon)))
+InSamp = InclusionSample
+class AdvInclusion(InclusionSample):
+    def __init__(self, sub, a="IFGSM()", b="Box()", w = None, **kargs):
+        self.sub = S.Const.initConst(sub)  # sub is the fraction of w to use.
+        self.w = S.Const.initConst(w)
+        self.a = (eval(a) if type(a) is str else a)
+        self.b = (eval(b) if type(b) is str else b)
+    def box(self, original, w, *args, **kargs):
+        w = self.w.getVal(c = w, shape = original.shape, **kargs)
+        sub = self.sub.getVal(c = 1, shape = original.shape, **kargs)
+        assert (0 <= h.dten(w)).all()
+        assert (h.dten(sub) <= 1).all()
+        assert (0 <= h.dten(sub)).all()
+        if h.dten(w).sum().item() <= 0.0:
+            inter = original
+        else:
+            inter = self.a.box(original, w = w * (1 - sub), *args, **kargs)
+        return self.b.box(inter, w = w * sub, *args, **kargs)
+    def __str__(self):
+        return "AdvInclusion(%s, %s, %s)" % (str(self.sub), str(self.a), str(self.b) + ("" if self.epsilon is None else ",w="+str(self.epsilon)))
+class AdvDom(Point):
+    def __init__(self, a="IFGSM()", b="Box()"):
+        self.a = (eval(a) if type(a) is str else a)
+        self.b = (eval(b) if type(b) is str else b)
+    def box(self, original,*args, **kargs):
+        adv = self.a.box(original, *args, **kargs)
+        return self.b.boxBetween(original, adv.ub(), *args, **kargs)
+    def boxBetween(self, o1, o2, *args, **kargs):
+        original = (o1 + o2) / 2
+        adv = self.a.boxBetween(o1, o2, *args, **kargs)
+        return self.b.boxBetween(original, adv.ub(), *args, **kargs)
+    def __str__(self):
+        return "AdvDom(%s)" % (("" if self.width is None else "width="+str(self.width)+",")
+                               + str(self.a) + "," + str(self.b))
+class BiAdv(AdvDom):
+    def box(self, original, **kargs):
+        adv = self.a.box(original, **kargs)
+        extreme = (adv.ub() - original).abs()
+        return self.b.boxBetween(original - extreme, original + extreme, **kargs)
+    def boxBetween(self, o1, o2, *args, **kargs):
+        original = (o1 + o2) / 2
+        adv = self.a.boxBetween(o1, o2, *args, **kargs)
+        extreme = (adv.ub() - original).abs()
+        return self.b.boxBetween(original - extreme, original + extreme, *args, **kargs)
+    def __str__(self):
+        return "BiAdv" + AdvDom.__str__(self)[6:]
+class HBox(object):
+    Domain = ai.HybridZonotope
+    def domain(self, *args, **kargs):
+        return ai.TaggedDomain(self.Domain(*args, **kargs), self)
+    def __init__(self, w = None, tot_weight = 1, width_weight = 0, pow_loss = None, log_loss = False, searchable = True, cross_loss = True, **kargs):
+        self.w = S.Const.initConst(w)
+        self.tot_weight = S.Const.initConst(tot_weight)
+        self.width_weight = S.Const.initConst(width_weight)
+        self.pow_loss = pow_loss
+        self.searchable = searchable
+        self.log_loss = log_loss
+        self.cross_loss = cross_loss
+    def __str__(self):
+        return "HBox(%s)" % ("" if self.w is None else "w="+str(self.w))
+    def boxBetween(self, o1, o2,  *args, **kargs):
+        batches = o1.size()[0]
+        num_elem = h.product(o1.size()[1:])
+        ei = h.getEi(batches, num_elem)
+        if len(o1.size()) > 2:
+            ei = ei.contiguous().view(num_elem, *o1.size())
+        return self.domain((o1 + o2) / 2, None, ei * (o2 - o1).abs() / 2).checkSizes()
+    def box(self, original, w, **kargs):
+        """
+        This version of it is slow, but keeps correlation down the line.
+        """
+        radius = self.w.getVal(c = w, **kargs)
+        batches = original.size()[0]
+        num_elem = h.product(original.size()[1:])
+        ei = h.getEi(batches,num_elem)
+        if len(original.size()) > 2:
+            ei = ei.contiguous().view(num_elem, *original.size())
+        return self.domain(original, None, ei * radius).checkSizes()
+    def line(self, o1, o2, **kargs):
+        w = self.w.getVal(c = 0, **kargs)
+        ln = ((o2 - o1) / 2).unsqueeze(0)
+        if not w is None and w > 0.0:
+            batches = o1.size()[0]
+            num_elem = h.product(o1.size()[1:])
+            ei = h.getEi(batches,num_elem)
+            if len(o1.size()) > 2:
+                ei = ei.contiguous().view(num_elem, *o1.size())
+            ln = torch.cat([ln, ei * w])
+        return self.domain((o1 + o2) / 2, None, ln ).checkSizes()
+    def loss(self, dom, target, *args, **kargs):
+        width_weight = self.width_weight.getVal(**kargs)
+        tot_weight = self.tot_weight.getVal(**kargs)
+        if self.cross_loss:
+            r = dom.ub()
+            inds = torch.arange(r.shape[0], device=h.device, dtype=h.ltype)
+            r[inds,target] = dom.lb()[inds,target]
+            tot = r.loss(target, *args, **kargs)
+        else:
+            tot = dom.loss(target, *args, **kargs)
+        if self.log_loss:
+            tot = (tot + 1).log()
+        if self.pow_loss is not None and self.pow_loss > 0 and self.pow_loss != 1:
+            tot = tot.pow(self.pow_loss)
+        ls = tot * tot_weight
+        if width_weight > 0:
+            ls += dom.diameter() * width_weight
+        return ls / (width_weight + tot_weight)
+class Box(HBox):
+    def __str__(self):
+        return "Box(%s)" % ("" if self.w is None else "w="+str(self.w))
+    def box(self, original, w, **kargs):
+        """
+        This version of it takes advantage of betas being uncorrelated.
+        Unfortunately they stay uncorrelated forever.
+        Counterintuitively, tests show more accuracy - this is because the other box
+        creates lots of 0 errors which get accounted for by the calcultion of the newhead in relu
+        which is apparently worse than not accounting for errors.
+        """
+        radius = self.w.getVal(c = w, **kargs)
+        return self.domain(original, h.ones(original.size()) * radius, None).checkSizes()
+    def line(self, o1, o2, **kargs):
+        w = self.w.getVal(c = 0, **kargs)
+        return self.domain((o1 + o2) / 2, ((o2 - o1) / 2).abs() + h.ones(o2.size()) * w, None).checkSizes()
+    def boxBetween(self, o1, o2, *args, **kargs):
+        return self.line(o1, o2, **kargs)
+class ZBox(HBox):
+    def __str__(self):
+        return "ZBox(%s)" % ("" if self.w is None else "w="+str(self.w))
+    def Domain(self, *args, **kargs):
+        return ai.Zonotope(*args, **kargs)
+class HSwitch(HBox):
+    def __str__(self):
+        return "HSwitch(%s)" % ("" if self.w is None else "w="+str(self.w))
+    def Domain(self, *args, **kargs):
+        return ai.HybridZonotope(*args, customRelu = ai.creluSwitch, **kargs)
+class ZSwitch(ZBox):
+    def __str__(self):
+        return "ZSwitch(%s)" % ("" if self.w is None else "w="+str(self.w))
+    def Domain(self, *args, **kargs):
+        return ai.Zonotope(*args, customRelu = ai.creluSwitch, **kargs)
+class ZNIPS(ZBox):
+    def __str__(self):
+        return "ZSwitch(%s)" % ("" if self.w is None else "w="+str(self.w))
+    def Domain(self, *args, **kargs):
+        return ai.Zonotope(*args, customRelu = ai.creluNIPS, **kargs)
+class HSmooth(HBox):
+    def __str__(self):
+        return "HSmooth(%s)" % ("" if self.w is None else "w="+str(self.w))
+    def Domain(self, *args, **kargs):
+        return ai.HybridZonotope(*args, customRelu = ai.creluSmooth, **kargs)
+class HNIPS(HBox):
+    def __str__(self):
+        return "HSmooth(%s)" % ("" if self.w is None else "w="+str(self.w))
+    def Domain(self, *args, **kargs):
+        return ai.HybridZonotope(*args, customRelu = ai.creluNIPS, **kargs)
+class ZSmooth(ZBox):
+    def __str__(self):
+        return "ZSmooth(%s)" % ("" if self.w is None else "w="+str(self.w))
+    def Domain(self, *args, **kargs):
+        return ai.Zonotope(*args, customRelu = ai.creluSmooth, **kargs)
+# stochastic correlation
+class HRand(WrapDom):
+    # domain must be an ai style domain like hybrid zonotope.
+    def __init__(self, num_correlated, a = "HSwitch()", **kargs):
+        super(HRand, self).__init__(Box())
+        self.num_correlated = num_correlated
+        self.dom = eval(a) if type(a) is str else a
+    def Domain(self, d):
+        with torch.no_grad():
+            out = d.abstractApplyLeaf('stochasticCorrelate', self.num_correlated)
+            out = self.dom.Domain(out.head, out.beta, out.errors)
+        return out
+    def __str__(self):
+        return "HRand(%s, domain = %s)" % (str(self.num_correlated), str(self.a))

helpers.py ADDED Viewed

	@@ -0,0 +1,489 @@

+import future
+import builtins
+import past
+import six
+import inspect
+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import numpy as np
+import argparse
+import decimal
+import PIL
+from torchvision import datasets, transforms
+from datetime import datetime
+from forbiddenfruit import curse
+#from torch.autograd import Variable
+from timeit import default_timer as timer
+class Timer:
+    def __init__(self, activity = None, units = 1, shouldPrint = True, f = None):
+        self.activity = activity
+        self.units = units
+        self.shouldPrint = shouldPrint
+        self.f = f
+    def __enter__(self):
+        self.start = timer()
+        return self
+    def getUnitTime(self):
+        return (self.end - self.start) / self.units
+    def __str__(self):
+        return "Avg time to " + self.activity + ": "+str(self.getUnitTime())
+    def __exit__(self, *args):
+        self.end = timer()
+        if self.shouldPrint:
+            printBoth(self, f = self.f)
+def cudify(x):
+    if use_cuda:
+        return x.cuda(async=True)
+    return x
+def pyval(a, **kargs):
+    return dten([a], **kargs)
+def ifThenElse(cond, a, b):
+    cond = cond.to_dtype()
+    return cond * a + (1 - cond) * b
+def ifThenElseL(cond, a, b):
+    return cond * a + (1 - cond) * b
+def product(it):
+    if isinstance(it,int):
+        return it
+    product = 1
+    for x in it:
+        if x >= 0:
+            product *= x
+    return product
+def getEi(batches, num_elem):
+    return eye(num_elem).expand(batches, num_elem,num_elem).permute(1,0,2)
+def one_hot(batch,d):
+    bs = batch.size()[0]
+    indexes = [ list(range(bs)), batch]
+    values = [ 1 for _ in range(bs) ]
+    return cudify(torch.sparse.FloatTensor(ltenCPU(indexes), ftenCPU(values), torch.Size([bs,d])))
+def seye(n, m = None):
+    if m is None:
+        m = n
+    mn = n if n < m else m
+    indexes = [[ i for i in range(mn) ], [ i  for i in range(mn) ] ]
+    values = [1 for i in range(mn) ]
+    return cudify(torch.sparse.ByteTensor(ltenCPU(indexes), dtenCPU(values), torch.Size([n,m])))
+dtype = torch.float32
+ftype = torch.float32
+ltype = torch.int64
+btype = torch.uint8
+torch.set_default_dtype(dtype)
+cpu = torch.device("cpu")
+cuda_async = True
+ftenCPU = lambda *args, **kargs: torch.tensor(*args, dtype=ftype, device=cpu, **kargs)
+dtenCPU = lambda *args, **kargs: torch.tensor(*args, dtype=dtype, device=cpu, **kargs)
+ltenCPU = lambda *args, **kargs: torch.tensor(*args, dtype=ltype, device=cpu, **kargs)
+btenCPU = lambda *args, **kargs: torch.tensor(*args, dtype=btype, device=cpu, **kargs)
+if torch.cuda.is_available() and not 'NOCUDA' in os.environ:
+    print("using cuda")
+    device = torch.device("cuda")
+    ften = lambda *args, **kargs: torch.tensor(*args, dtype=ftype, device=device, **kargs).cuda(non_blocking=cuda_async)
+    dten = lambda *args, **kargs: torch.tensor(*args, dtype=dtype, device=device, **kargs).cuda(non_blocking=cuda_async)
+    lten = lambda *args, **kargs: torch.tensor(*args, dtype=ltype, device=device, **kargs).cuda(non_blocking=cuda_async)
+    bten = lambda *args, **kargs: torch.tensor(*args, dtype=btype, device=device, **kargs).cuda(non_blocking=cuda_async)
+    ones = lambda *args, **cargs: torch.ones(*args, **cargs).cuda(non_blocking=cuda_async)
+    zeros = lambda *args, **cargs: torch.zeros(*args, **cargs).cuda(non_blocking=cuda_async)
+    eye = lambda *args, **cargs: torch.eye(*args, **cargs).cuda(non_blocking=cuda_async)
+    use_cuda = True
+    print("set up cuda")
+else:
+    print("not using cuda")
+    ften = ftenCPU
+    dten = dtenCPU
+    lten = ltenCPU
+    bten = btenCPU
+    ones = torch.ones
+    zeros = torch.zeros
+    eye = torch.eye
+    use_cuda = False
+    device = cpu
+def smoothmax(x, alpha, dim = 0):
+    return x.mul(F.softmax(x * alpha, dim)).sum(dim + 1)
+def str2bool(v):
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+def flat(lst):
+    lst_ = []
+    for l in lst:
+        lst_ += l
+    return lst_
+def printBoth(*st, f = None):
+    print(*st)
+    if not f is None:
+        print(*st, file=f)
+def hasMethod(cl, mt):
+    return callable(getattr(cl, mt, None))
+def getMethodNames(Foo):
+    return [func for func in dir(Foo) if callable(getattr(Foo, func)) and not func.startswith("__")]
+def getMethods(Foo):
+    return [getattr(Foo, m) for m in getMethodNames(Foo)]
+max_c_for_norm = 10000
+def numel(arr):
+    return product(arr.size())
+def chunks(l, n):
+    """Yield successive n-sized chunks from l."""
+    for i in range(0, len(l), n):
+        yield l[i:i + n]
+def loadDataset(dataset, batch_size, train, transform = True):
+    oargs = {}
+    if dataset in ["MNIST", "CIFAR10", "CIFAR100", "FashionMNIST", "PhotoTour"]:
+        oargs['train'] = train
+    elif dataset in ["STL10", "SVHN"] :
+        oargs['split'] = 'train' if train else 'test'
+    elif dataset in ["LSUN"]:
+        oargs['classes'] = 'train' if train else 'test'
+    elif dataset in ["Imagenet12"]:
+        pass
+    else:
+        raise Exception(dataset + " is not yet supported")
+    if dataset in ["MNIST"]:
+        transformer = transforms.Compose([ transforms.ToTensor()]
+                                         + ([transforms.Normalize((0.1307,), (0.3081,))] if transform else []))
+    elif dataset in ["CIFAR10", "CIFAR100"]:
+        transformer = transforms.Compose(([ #transforms.RandomCrop(32, padding=4),
+                                            transforms.RandomAffine(0, (0.125, 0.125), resample=PIL.Image.BICUBIC) ,
+                                            transforms.RandomHorizontalFlip(),
+                                            #transforms.RandomRotation(15, resample = PIL.Image.BILINEAR)
+                                          ] if train else [])
+                                         + [transforms.ToTensor()]
+                                         + ([transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))] if transform else []))
+    elif dataset in ["SVHN"]:
+        transformer = transforms.Compose([
+            transforms.RandomHorizontalFlip(),
+            transforms.ToTensor(),
+            transforms.Normalize((0.5,0.5,0.5), (0.2,0.2,0.2))])
+    else:
+        transformer = transforms.ToTensor()
+    if dataset in ["Imagenet12"]:
+        # https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md#download-the-imagenet-dataset
+        train_set = datasets.ImageFolder(
+            '../data/Imagenet12/train' if train else '../data/Imagenet12/val',
+            transforms.Compose([
+                transforms.RandomResizedCrop(224),
+                transforms.RandomHorizontalFlip(),
+                normalize,
+            ]))
+    else:
+        train_set = getattr(datasets, dataset)('../data', download=True, transform=transformer, **oargs)
+    return torch.utils.data.DataLoader(
+        train_set
+        , batch_size=batch_size
+        , shuffle=True,
+        **({'num_workers': 1, 'pin_memory': True} if use_cuda else {}))
+def variable(Pt):
+    class Point:
+        def isSafe(self,target):
+            pred = self.max(1, keepdim=True)[1] # get the index of the max log-probability
+            return pred.eq(target.data.view_as(pred))
+        def isPoint(self):
+            return True
+        def labels(self):
+            return [self[0].max(1)[1]] # get the index of the max log-probability
+        def softplus(self):
+            return F.softplus(self)
+        def elu(self):
+            return F.elu(self)
+        def selu(self):
+            return F.selu(self)
+        def sigm(self):
+            return F.sigmoid(self)
+        def conv3d(self, *args, **kargs):
+            return F.conv3d(self, *args, **kargs)
+        def conv2d(self, *args, **kargs):
+            return F.conv2d(self, *args, **kargs)
+        def conv1d(self, *args, **kargs):
+            return F.conv1d(self, *args, **kargs)
+        def conv_transpose3d(self, *args, **kargs):
+            return F.conv_transpose3d(self, *args, **kargs)
+        def conv_transpose2d(self, *args, **kargs):
+            return F.conv_transpose2d(self, *args, **kargs)
+        def conv_transpose1d(self, *args, **kargs):
+            return F.conv_transpose1d(self, *args, **kargs)
+        def max_pool2d(self, *args, **kargs):
+            return F.max_pool2d(self, *args, **kargs)
+        def avg_pool2d(self, *args, **kargs):
+            return F.avg_pool2d(self, *args, **kargs)
+        def adaptive_avg_pool2d(self, *args, **kargs):
+            return F.adaptive_avg_pool2d(self, *args, **kargs)
+        def cat(self, other, dim = 0, **kargs):
+            return torch.cat((self, other), dim = dim, **kargs)
+        def addPar(self, a, b):
+            return a + b
+        def abstractApplyLeaf(self, foo, *args, **kargs):
+            return self
+        def diameter(self):
+            return pyval(0)
+        def to_dtype(self):
+            return self.type(dtype=dtype, non_blocking=cuda_async)
+        def loss(self, target, **kargs):
+            if torch.__version__[0] == "0":
+                return F.cross_entropy(self, target, reduce = False)
+            else:
+                return F.cross_entropy(self, target, reduction='none')
+        def deep_loss(self, *args, **kargs):
+            return 0
+        def merge(self, *args, **kargs):
+            return self
+        def splitRelu(self, *args, **kargs):
+            return self
+        def lb(self):
+            return self
+        def vanillaTensorPart(self):
+            return self
+        def center(self):
+            return self
+        def ub(self):
+            return self
+        def cudify(self, cuda_async = True):
+            return self.cuda(non_blocking=cuda_async) if use_cuda else self
+    def log_softmax(self, *args, dim = 1, **kargs):
+        return F.log_softmax(self, *args,dim = dim, **kargs)
+    if torch.__version__[0] == "0" and torch.__version__ != "0.4.1":
+        Point.log_softmax = log_softmax
+    def log_softmax(self, *args, dim = 1, **kargs):
+        return F.log_softmax(self, *args,dim = dim, **kargs)
+    if torch.__version__[0] == "0" and torch.__version__ != "0.4.1":
+        Point.log_softmax = log_softmax
+    for nm in getMethodNames(Point):
+        curse(Pt, nm, getattr(Point, nm))
+variable(torch.autograd.Variable)
+variable(torch.cuda.DoubleTensor)
+variable(torch.DoubleTensor)
+variable(torch.cuda.FloatTensor)
+variable(torch.FloatTensor)
+variable(torch.ByteTensor)
+variable(torch.Tensor)
+def default(dic, nm, d):
+    if dic is not None and nm in dic:
+        return dic[nm]
+    return d
+def softmaxBatchNP(x, epsilon, subtract = False):
+    """Compute softmax values for each sets of scores in x."""
+    x = x.astype(np.float64)
+    ex = x / epsilon if epsilon is not None else x
+    if subtract:
+        ex -= ex.max(axis=1)[:,np.newaxis]
+    e_x = np.exp(ex)
+    sm = (e_x / e_x.sum(axis=1)[:,np.newaxis])
+    am = np.argmax(x, axis=1)
+    bads = np.logical_not(np.isfinite(sm.sum(axis = 1)))
+    if epsilon is None:
+        sm[bads] = 0
+        sm[bads, am[bads]] = 1
+    else:
+        epsilon *= (x.shape[1] - 1) / x.shape[1]
+        sm[bads] = epsilon / (x.shape[1] - 1)
+        sm[bads, am[bads]] = 1 - epsilon
+    sm /= sm.sum(axis=1)[:,np.newaxis]
+    return sm
+def cadd(a,b):
+    both = a.cat(b)
+    a, b = both.split(a.size()[0])
+    return a + b
+def msum(a,b, l):
+    if a is None:
+        return b
+    if b is None:
+        return a
+    return l(a,b)
+class SubAct(argparse.Action):
+    def __init__(self, sub_choices, *args, **kargs):
+        super(SubAct,self).__init__(*args, nargs='+', **kargs)
+        self.sub_choices = sub_choices
+        self.sub_choices_names = None if sub_choices is None else getMethodNames(sub_choices)
+    def __call__(self, parser, namespace, values, option_string=None):
+        if self.sub_choices_names is not None and not values[0] in self.sub_choices_names:
+            msg = 'invalid choice: %r (choose from %s)' % (values[0], self.sub_choices_names)
+            raise argparse.ArgumentError(self, msg)
+        prev = getattr(namespace, self.dest)
+        setattr(namespace, self.dest, prev + [values])
+def catLists(val):
+    if isinstance(val, list):
+        v = []
+        for i in val:
+            v += catLists(i)
+        return v
+    return [val]
+def sumStr(val):
+    s = ""
+    for v in val:
+        s += v
+    return s
+def catStrs(val):
+    s = val[0]
+    if len(val) > 1:
+        s += "("
+    for v in val[1:2]:
+        s += v
+    for v in val[2:]:
+        s += ", "+v
+    if len(val) > 1:
+        s += ")"
+    return s
+def printNumpy(x):
+    return "[" + sumStr([decimal.Decimal(float(v)).__format__("f") + ", " for v in x.data.cpu().numpy()])[:-2]+"]"
+def printStrList(x):
+    return "[" + sumStr(v + ", " for v in x)[:-2]+"]"
+def printListsNumpy(val):
+    if isinstance(val, list):
+        return printStrList(printListsNumpy(v) for v in val)
+    return printNumpy(val)
+def parseValues(values, methods, *others):
+    if len(values) == 1 and values[0]:
+        x = eval(values[0], dict(pair for l in ([methods] + list(others)) for pair in l.__dict__.items()) )
+        return x() if inspect.isclass(x) else x
+    args = []
+    kargs = {}
+    for arg in values[1:]:
+        if '=' in arg:
+            k = arg.split('=')[0]
+            v = arg[len(k)+1:]
+            try:
+                kargs[k] = eval(v)
+            except:
+                kargs[k] = v
+        else:
+            args += [eval(arg)]
+    return getattr(methods, values[0])(*args, **kargs)
+def preDomRes(outDom, target): # TODO: make faster again by keeping sparse tensors sparse
+    t = one_hot(target.long(), outDom.size()[1]).to_dense().to_dtype()
+    tmat = t.unsqueeze(2).matmul(t.unsqueeze(1))
+    tl = t.unsqueeze(2).expand(-1, -1, tmat.size()[1])
+    inv_t = eye(tmat.size()[1]).expand(tmat.size()[0], -1, -1)
+    inv_t = inv_t - tmat
+    tl = tl.bmm(inv_t)
+    fst = outDom.unsqueeze(1).matmul(tl).squeeze(1)
+    snd = outDom.unsqueeze(1).matmul(inv_t).squeeze(1)
+    return (fst - snd) + t
+def mopen(shouldnt, *args, **kargs):
+    if shouldnt:
+        import contextlib
+        return contextlib.suppress()
+    return open(*args, **kargs)
+def file_timestamp():
+    return str(datetime.now()).replace(":","").replace(" ", "")
+def prepareDomainNameForFile(s):
+    return s.replace(" ", "_").replace(",", "").replace("(", "_").replace(")", "_").replace("=", "_")
+# delimited only
+def callCC(foo):
+    class RV(BaseException):
+        def __init__(self, v):
+            self.v = v
+    def cc(x):
+        raise RV(x)
+    try:
+        return foo(cc)
+    except RV as rv:
+        return rv.v

losses.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# This source file is part of DiffAI
+# Copyright (c) 2018 Secure, Reliable, and Intelligent Systems Lab (SRI), ETH Zurich
+# This software is distributed under the MIT License: https://opensource.org/licenses/MIT
+# SPDX-License-Identifier: MIT
+# For more information see https://github.com/eth-sri/diffai
+# THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT ANY WARRANTY OF ANY KIND, EITHER
+# EXPRESS, IMPLIED OR STATUTORY, INCLUDING BUT NOT LIMITED TO ANY WARRANTY
+# THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS OR BE ERROR-FREE AND ANY
+# IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
+# TITLE, OR NON-INFRINGEMENT.  IN NO EVENT SHALL ETH ZURICH BE LIABLE FOR ANY
+#  DAMAGES, INCLUDING BUT NOT LIMITED TO DIRECT, INDIRECT,
+# SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM, OR IN
+# ANY WAY CONNECTED WITH THIS SOFTWARE (WHETHER OR NOT BASED UPON WARRANTY,
+# CONTRACT, TORT OR OTHERWISE).
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import helpers as h
+import domains
+from domains import *
+import math
+POINT_DOMAINS = [m for m in h.getMethods(domains) if h.hasMethod(m, "attack")] + [ torch.FloatTensor, torch.Tensor, torch.cuda.FloatTensor ]
+SYMETRIC_DOMAINS = [domains.Box] + POINT_DOMAINS
+def domRes(outDom, target, **args): # TODO: make faster again by keeping sparse tensors sparse
+    t = h.one_hot(target.data.long(), outDom.size()[1]).to_dense()
+    tmat = t.unsqueeze(2).matmul(t.unsqueeze(1))
+    tl = t.unsqueeze(2).expand(-1, -1, tmat.size()[1])
+    inv_t = h.eye(tmat.size()[1]).expand(tmat.size()[0], -1, -1)
+    inv_t = inv_t - tmat
+    tl = tl.bmm(inv_t)
+    fst = outDom.bmm(tl)
+    snd = outDom.bmm(inv_t)
+    diff = fst - snd
+    return diff.lb() + t
+def isSafeDom(outDom, target, **args):
+    od,_ = torch.min(domRes(outDom, target, **args), 1)
+    return od.gt(0.0).long().item()
+def isSafeBox(target, net, inp, eps, dom):
+    atarg = target.argmax(1)[0].unsqueeze(0)
+    if hasattr(dom, "attack"):
+        x = dom.attack(net, eps, inp, target)
+        pred = net(x).argmax(1)[0].unsqueeze(0) # get the index of the max log-probability
+        return pred.item() == atarg.item()
+    else:
+        outDom = net(dom.box(inp, eps))
+        return isSafeDom(outDom, atarg)

media/overview.png ADDED Viewed

media/resnetTinyFewCombo.png ADDED Viewed

models.py ADDED Viewed

	@@ -0,0 +1,120 @@

+try:
+    from . import components as n
+    from . import ai
+    from . import scheduling as S
+except:
+    import components as n
+    import scheduling as S
+    import ai
+############# Previously Known Models.  Not guaranteed to have the same performance as previous papers.
+def FFNN(c, **kargs):
+    return n.FFNN([100, 100, 100, 100, 100,c], last_lin = True, last_zono = True, **kargs)
+def ConvSmall(c, **kargs):
+    return n.LeNet([ (16,4,4,2), (32,4,4,2) ], [100,c], last_lin = True, last_zono = True, **kargs)
+def ConvMed(c, **kargs):
+    return n.LeNet([ (16,4,4,2), (32,4,4,2) ], [100,c], padding = 1, last_lin = True, last_zono = True, **kargs)
+def ConvBig(c, **kargs):
+    return n.LeNet([ (32,3,3,1), (32,4,4,2) , (64,3,3,1), (64,4,4,2)], [512, 512,c], padding = 1, last_lin = True, last_zono = True, **kargs)
+def ConvLargeIBP(c, **kargs):
+    return n.LeNet([ (64, 3, 3, 1), (64,3,3,1), (128,3,3,2), (128,3,3,1), (128,3,3,1)], [200,c], padding=1, ibp_init = True, bias = True, last_lin = True, last_zono = True, **kargs)
+def ResNetWong(c, **kargs):
+    return n.Seq(n.Conv(16, 3, padding=1, bias=False), n.WideBlock(16), n.WideBlock(16), n.WideBlock(32, True), n.WideBlock(64, True), n.FFNN([1000, c], ibp_init = True, bias=True, last_lin=True, last_zono = True, **kargs))
+def TruncatedVGG(c, **kargs):
+    return n.LeNet([ (64, 3, 3, 1), (64,3,3,1), (128,3,3,2), (128,3,3,1)], [512,c], padding=1, ibp_init = True, bias = True, last_lin = True, last_zono = True, **kargs)
+############# New Models
+def ResNetTiny(c, **kargs): # resnetWide also used by mixtrain and scaling provable adversarial defenses
+    def wb(c, bias = True, **kargs):
+        return n.WideBlock(c, False, bias=bias, ibp_init=True, batch_norm = False, **kargs)
+    return n.Seq(n.Conv(16, 3, padding=1, bias=True, ibp_init = True),
+                 wb(16),
+                 wb(32),
+                 wb(32),
+                 wb(32),
+                 wb(32),
+                 n.FFNN([500, c], bias=True, last_lin=True, ibp_init = True, last_zono = True, **kargs))
+def ResNetTiny_FewCombo(c, **kargs): # resnetWide also used by mixtrain and scaling provable adversarial defenses
+    def wb(c, bias = True, **kargs):
+        return n.WideBlock(c, False, bias=bias, ibp_init=True, batch_norm = False, **kargs)
+    dl = n.DeepLoss
+    cmk = n.CorrMaxK
+    cm2d = n.CorrMaxPool2D
+    cm3d = n.CorrMaxPool3D
+    dec = lambda x: n.DecorrMin(x, num_to_keep = True)
+    return n.Seq(cmk(32),
+                 n.Conv(16, 3, padding=1, bias=True, ibp_init = True), dec(8),
+                 wb(16), dec(4),
+                 wb(32), n.Concretize(),
+                 wb(32),
+                 wb(32),
+                 wb(32), cmk(10),
+                 n.FFNN([500, c], bias=True, last_lin=True, ibp_init = True, last_zono = True, **kargs))
+def ResNetTiny_ManyFixed(c, **kargs): # resnetWide also used by mixtrain and scaling provable adversarial defenses
+    def wb(c, bias = True, **kargs):
+        return n.WideBlock(c, False, bias=bias, ibp_init=True, batch_norm = False, **kargs)
+    cmk = n.CorrFix
+    dec = lambda x: n.DecorrMin(x, num_to_keep = True)
+    return n.Seq(n.CorrMaxK(32),
+                 n.Conv(16, 3, padding=1, bias=True, ibp_init = True), cmk(16), dec(16),
+                 wb(16), cmk(8), dec(8),
+                 wb(32), cmk(8), dec(8),
+                 wb(32), cmk(4), dec(4),
+                 wb(32), n.Concretize(),
+                 wb(32),
+                 n.FFNN([500, c], bias=True, last_lin=True, ibp_init = True, last_zono = True, **kargs))
+def SkipNet18(c, **kargs):
+    return n.Seq(n.ResNet([2,2,2,2], bias = True, ibp_init = True, skip_net = True), n.FFNN([512, 512, c], bias=True, last_lin=True, last_zono = True, ibp_init = True, **kargs))
+def SkipNet18_Combo(c, **kargs):
+    dl = n.DeepLoss
+    cmk = n.CorrFix
+    dec = lambda x: n.DecorrMin(x, num_to_keep = True)
+    return n.Seq(n.ResNet([2,2,2,2], extra = [ (cmk(20),2),(dec(10),2)
+                                              ,(cmk(10),3),(dec(5),3),(dl(S.Until(90, S.Lin(0, 0.2, 50, 40), 0)), 3)
+                                              ,(cmk(5),4),(dec(2),4)], bias = True, ibp_init=True, skip_net = True), n.FFNN([512, 512, c], bias=True, last_lin=True, last_zono = True, ibp_init=True, **kargs))
+def ResNet18(c, **kargs):
+    return n.Seq(n.ResNet([2,2,2,2], bias = True, ibp_init = True), n.FFNN([512, 512, c], bias=True, last_lin=True, last_zono = True, ibp_init = True, **kargs))
+def ResNetLarge_LargeCombo(c, **kargs): # resnetWide also used by mixtrain and scaling provable adversarial defenses
+    def wb(c, bias = True, **kargs):
+        return n.WideBlock(c, False, bias=bias, ibp_init=True, batch_norm = False, **kargs)
+    dl = n.DeepLoss
+    cmk = n.CorrMaxK
+    cm2d = n.CorrMaxPool2D
+    cm3d = n.CorrMaxPool3D
+    dec = lambda x: n.DecorrMin(x, num_to_keep = True)
+    return n.Seq(n.Conv(16, 3, padding=1, bias=True, ibp_init = True), cmk(4),
+                 wb(16), cmk(4), dec(4),
+                 wb(32), cmk(4), dec(4),
+                 wb(32), dl(S.Until(1, 0, S.Lin(0.5, 0, 50, 3))),
+                 wb(32), cmk(4), dec(4),
+                 wb(64), cmk(4), dec(2),
+                 wb(64), dl(S.Until(24, S.Lin(0, 0.1, 20, 4), S.Lin(0.1, 0, 50))),
+                 wb(64),
+                 n.FFNN([1000, c], bias=True, last_lin=True, ibp_init = True, **kargs))
+def ResNet34(c, **kargs):
+    return n.Seq(n.ResNet([3,4,6,3], bias = True, ibp_init = True), n.FFNN([512, 512, c], bias=True, last_lin=True, last_zono = True, ibp_init = True, **kargs))
+def DenseNet100(c, **kwargs):
+    return n.DenseNet(growthRate=12, depth=100, reduction=0.5,
+                      bottleneck=True, num_classes = c)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+numpy
+six
+future
+forbiddenfruit
+torch==0.4.1
+torchvision==0.2.1

scheduling.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import torch
+import torch.nn as nn
+import math
+try:
+    from . import helpers as h
+except:
+    import helpers as h
+class Const():
+    def __init__(self, c):
+        self.c = c if c is None else float(c)
+    def getVal(self, c = None, **kargs):
+        return self.c if self.c is not None else c
+    def __str__(self):
+        return str(self.c)
+    def initConst(x):
+        return x if isinstance(x, Const) else Const(x)
+class Lin(Const):
+    def __init__(self, start, end, steps, initial = 0, quant = False):
+        self.start = float(start)
+        self.end = float(end)
+        self.steps = float(steps)
+        self.initial = float(initial)
+        self.quant = quant
+    def getVal(self, time = 0, **kargs):
+        if self.quant:
+            time = math.floor(time)
+        return (self.end - self.start) * max(0,min(1, float(time - self.initial) / self.steps)) + self.start
+    def __str__(self):
+        return "Lin(%s,%s,%s,%s, quant=%s)".format(str(self.start), str(self.end), str(self.steps), str(self.initial), str(self.quant))
+class Until(Const):
+    def __init__(self, thresh, a, b):
+        self.a = Const.initConst(a)
+        self.b = Const.initConst(b)
+        self.thresh = thresh
+    def getVal(self, *args, time = 0, **kargs):
+        return self.a.getVal(*args, time = time, **kargs) if time < self.thresh else self.b.getVal(*args, time = time - self.thresh, **kargs)
+    def __str__(self):
+        return "Until(%s, %s, %s)" % (str(self.thresh), str(self.a), str(self.b))
+class Scale(Const): # use with mix when aw = 1, and 0 <= c < 1
+    def __init__(self, c):
+        self.c = Const.initConst(c)
+    def getVal(self, *args, **kargs):
+        c = self.c.getVal(*args, **kargs)
+        if c == 0:
+            return 0
+        assert c >= 0
+        assert c < 1
+        return c / (1 - c)
+    def __str__(self):
+        return "Scale(%s)" % str(self.c)
+def MixLin(*args, **kargs):
+    return Scale(Lin(*args, **kargs))
+class Normal(Const):
+    def __init__(self, c):
+        self.c = Const.initConst(c)
+    def getVal(self, *args, shape = [1], **kargs):
+        c = self.c.getVal(*args, shape = shape, **kargs)
+        return torch.randn(shape, device = h.device).abs() * c
+    def __str__(self):
+        return "Normal(%s)" % str(self.c)
+class Clip(Const):
+    def __init__(self, c, l, u):
+        self.c = Const.initConst(c)
+        self.l = Const.initConst(l)
+        self.u = Const.initConst(u)
+    def getVal(self, *args, **kargs):
+        c = self.c.getVal(*args, **kargs)
+        l = self.l.getVal(*args, **kargs)
+        u = self.u.getVal(*args, **kargs)
+        if isinstance(c, float):
+            return min(max(c,l),u)
+        else:
+            return c.clamp(l,u)
+    def __str__(self):
+        return "Clip(%s, %s, %s)" % (str(self.c), str(self.l), str(self.u))
+class Fun(Const):
+    def __init__(self, foo):
+        self.foo = foo
+    def getVal(self, *args, **kargs):
+        return self.foo(*args, **kargs)
+    def __str__(self):
+        return "Fun(...)"
+class Complement(Const): # use with mix when aw = 1, and 0 <= c < 1
+    def __init__(self, c):
+        self.c = Const.initConst(c)
+    def getVal(self, *args, **kargs):
+        c = self.c.getVal(*args, **kargs)
+        assert c >= 0
+        assert c <= 1
+        return 1 - c
+    def __str__(self):
+        return "Complement(%s)" % str(self.c)