g30rv17ys commited on
Commit
fd4b932
·
verified ·
1 Parent(s): 5b6ac0e

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .github/ISSUE_TEMPLATE/bug_report.md +38 -0
  2. .github/ISSUE_TEMPLATE/feature_request.md +20 -0
  3. .gitmodules +6 -0
  4. convert_model.py +56 -0
  5. expman/expman/__init__.py +7 -0
  6. expman/expman/__main__.py +58 -0
  7. expman/expman/__pycache__/__init__.cpython-311.pyc +0 -0
  8. expman/expman/__pycache__/exp_group.cpython-311.pyc +0 -0
  9. expman/expman/__pycache__/experiment.cpython-311.pyc +0 -0
  10. expman/expman/exp_group.py +96 -0
  11. expman/expman/experiment.py +233 -0
  12. losses.py +18 -0
  13. matlab/Meye.m +310 -0
  14. matlab/README.md +57 -0
  15. matlab/example.m +211 -0
  16. models/deeplab.py +78 -0
  17. models/deeplab/README.md +380 -0
  18. models/deeplab/assets/2007_000346_inference.png +0 -0
  19. models/deeplab/assets/confusion_matrix.png +0 -0
  20. models/deeplab/assets/dog_inference.png +0 -0
  21. models/deeplab/assets/mIOU.png +0 -0
  22. models/deeplab/common/callbacks.py +32 -0
  23. models/deeplab/common/data_utils.py +523 -0
  24. models/deeplab/common/model_utils.py +168 -0
  25. models/deeplab/common/utils.py +343 -0
  26. models/deeplab/configs/ade20k_classes.txt +150 -0
  27. models/deeplab/configs/cityscapes_classes.txt +33 -0
  28. models/deeplab/configs/coco_classes.txt +80 -0
  29. models/deeplab/configs/voc_classes.txt +20 -0
  30. models/deeplab/deeplab.py +297 -0
  31. models/deeplab/deeplabv3p/data.py +161 -0
  32. models/deeplab/deeplabv3p/loss.py +74 -0
  33. models/deeplab/deeplabv3p/metrics.py +46 -0
  34. models/deeplab/deeplabv3p/model.py +96 -0
  35. models/deeplab/deeplabv3p/models/__pycache__/deeplabv3p_mobilenetv3.cpython-311.pyc +0 -0
  36. models/deeplab/deeplabv3p/models/__pycache__/layers.cpython-311.pyc +0 -0
  37. models/deeplab/deeplabv3p/models/deeplabv3p_mobilenetv2.py +349 -0
  38. models/deeplab/deeplabv3p/models/deeplabv3p_mobilenetv3.py +912 -0
  39. models/deeplab/deeplabv3p/models/deeplabv3p_peleenet.py +428 -0
  40. models/deeplab/deeplabv3p/models/deeplabv3p_resnet50.py +408 -0
  41. models/deeplab/deeplabv3p/models/deeplabv3p_xception.py +239 -0
  42. models/deeplab/deeplabv3p/models/layers.py +311 -0
  43. models/deeplab/deeplabv3p/postprocess_np.py +30 -0
  44. models/deeplab/eval.py +565 -0
  45. models/deeplab/example/2007_000039.jpg +0 -0
  46. models/deeplab/example/2007_000039.png +0 -0
  47. models/deeplab/example/2007_000346.jpg +0 -0
  48. models/deeplab/example/2007_000346.png +0 -0
  49. models/deeplab/example/air.jpg +0 -0
  50. models/deeplab/example/car.jpg +0 -0
.github/ISSUE_TEMPLATE/bug_report.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Bug report
3
+ about: Create a report to help us improve
4
+ title: ''
5
+ labels: bug
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Describe the bug**
11
+ A clear and concise description of what the bug is.
12
+
13
+ **To Reproduce**
14
+ Steps to reproduce the behavior:
15
+ 1. Go to '...'
16
+ 2. Click on '....'
17
+ 3. Scroll down to '....'
18
+ 4. See error
19
+
20
+ **Expected behavior**
21
+ A clear and concise description of what you expected to happen.
22
+
23
+ **Screenshots**
24
+ If applicable, add screenshots to help explain your problem.
25
+
26
+ **Desktop (please complete the following information):**
27
+ - OS: [e.g. iOS]
28
+ - Browser [e.g. chrome, safari]
29
+ - Version [e.g. 22]
30
+
31
+ **Smartphone (please complete the following information):**
32
+ - Device: [e.g. iPhone6]
33
+ - OS: [e.g. iOS8.1]
34
+ - Browser [e.g. stock browser, safari]
35
+ - Version [e.g. 22]
36
+
37
+ **Additional context**
38
+ Add any other context about the problem here.
.github/ISSUE_TEMPLATE/feature_request.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: enhancement
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Is your feature request related to a problem? Please describe.**
11
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12
+
13
+ **Describe the solution you'd like**
14
+ A clear and concise description of what you want to happen.
15
+
16
+ **Describe alternatives you've considered**
17
+ A clear and concise description of any alternative solutions or features you've considered.
18
+
19
+ **Additional context**
20
+ Add any other context or screenshots about the feature request here.
.gitmodules ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [submodule "expman"]
2
+ path = expman
3
+ url = https://github.com/fabiocarrara/expman
4
+ [submodule "models/deeplab"]
5
+ path = models/deeplab
6
+ url = https://github.com/david8862/tf-keras-deeplabv3p-model-set
convert_model.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from tensorflow.keras import backend as K
3
+ from adabelief_tf import AdaBeliefOptimizer
4
+
5
+ def iou_coef(y_true, y_pred):
6
+ y_true = tf.cast(y_true, tf.float32)
7
+ y_pred = tf.cast(y_pred, tf.float32)
8
+ intersection = K.sum(K.abs(y_true * y_pred), axis=[1, 2, 3])
9
+ union = K.sum(y_true, axis=[1, 2, 3]) + K.sum(y_pred, axis=[1, 2, 3]) - intersection
10
+ return K.mean((intersection + 1e-6) / (union + 1e-6))
11
+
12
+ def dice_coef(y_true, y_pred):
13
+ y_true = tf.cast(y_true, tf.float32)
14
+ y_pred = tf.cast(y_pred, tf.float32)
15
+ intersection = K.sum(K.abs(y_true * y_pred), axis=[1, 2, 3])
16
+ return K.mean((2. * intersection + 1e-6) / (K.sum(y_true, axis=[1, 2, 3]) + K.sum(y_pred, axis=[1, 2, 3]) + 1e-6))
17
+
18
+ def boundary_loss(y_true, y_pred):
19
+ y_true = tf.cast(y_true, tf.float32)
20
+ y_pred = tf.cast(y_pred, tf.float32)
21
+ dy_true, dx_true = tf.image.image_gradients(y_true)
22
+ dy_pred, dx_pred = tf.image.image_gradients(y_pred)
23
+ loss = tf.reduce_mean(tf.abs(dy_pred - dy_true) + tf.abs(dx_pred - dx_true))
24
+ return loss * 0.5
25
+
26
+ def enhanced_binary_crossentropy(y_true, y_pred):
27
+ y_true = tf.cast(y_true, tf.float32)
28
+ y_pred = tf.cast(y_pred, tf.float32)
29
+ bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
30
+ boundary = boundary_loss(y_true, y_pred)
31
+ return bce + boundary
32
+
33
+ def hard_swish(x):
34
+ return x * tf.nn.relu6(x + 3) * (1. / 6.)
35
+
36
+ # Path to your current .keras model
37
+ keras_path = 'runs/b32_c-conv_d-|root|meye|data|NN_human_mouse_eyes|_g1.5_l0.001_num_c1_num_f16_num_s5_r128_se23_sp-random_up-relu_us0/best_model.keras'
38
+
39
+ # Load the model with custom objects
40
+ custom_objects = {
41
+ 'AdaBeliefOptimizer': AdaBeliefOptimizer,
42
+ 'iou_coef': iou_coef,
43
+ 'dice_coef': dice_coef,
44
+ 'hard_swish': hard_swish,
45
+ 'enhanced_binary_crossentropy': enhanced_binary_crossentropy,
46
+ 'boundary_loss': boundary_loss
47
+ }
48
+
49
+ print("Loading model from:", keras_path)
50
+ model = tf.keras.models.load_model(keras_path, custom_objects=custom_objects)
51
+
52
+ # Save as .h5
53
+ h5_path = keras_path.replace('.keras', '.h5')
54
+ print("Saving model to:", h5_path)
55
+ model.save(h5_path, save_format='h5')
56
+ print("Conversion complete!")
expman/expman/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from .experiment import Experiment, exp_filter, use_hash_naming
2
+ from .exp_group import ExpGroup
3
+
4
+ abbreviate = Experiment.abbreviate
5
+ from_dir = Experiment.from_dir
6
+ gather = ExpGroup.gather
7
+ is_exp_dir = Experiment.is_exp_dir
expman/expman/__main__.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ from .exp_group import ExpGroup
4
+
5
+
6
+ def add_param(args):
7
+ exps = ExpGroup.gather(args.run)
8
+ for exp in exps:
9
+ exp.add_parameter(args.param, args.value)
10
+
11
+
12
+ def mv_param(args):
13
+ exps = ExpGroup.gather(args.run)
14
+ for exp in exps:
15
+ exp.rename_parameter(args.param, args.new_param)
16
+
17
+
18
+ def rm_param(args):
19
+ exps = ExpGroup.gather(args.run)
20
+ for exp in exps:
21
+ exp.remove_parameter(args.param)
22
+
23
+
24
+ def command_line():
25
+ def guess(value):
26
+ """ try to guess a python type for the passed string parameter """
27
+ try:
28
+ result = eval(value)
29
+ except (NameError, ValueError):
30
+ result = value
31
+ return result
32
+
33
+ parser = argparse.ArgumentParser(description='Experiment Manager Utilities')
34
+ subparsers = parser.add_subparsers(dest='command')
35
+ subparsers.required = True
36
+
37
+ parser_add = subparsers.add_parser('add-param')
38
+ parser_add.add_argument('run', default='runs/')
39
+ parser_add.add_argument('param', help='new param name')
40
+ parser_add.add_argument('value', type=guess, help='new param value')
41
+ parser_add.set_defaults(func=add_param)
42
+
43
+ parser_rm = subparsers.add_parser('rm-param')
44
+ parser_rm.add_argument('run', default='runs/')
45
+ parser_rm.add_argument('param', help='param to remove')
46
+ parser_rm.set_defaults(func=rm_param)
47
+
48
+ parser_mv = subparsers.add_parser('mv-param')
49
+ parser_mv.add_argument('run', default='runs/')
50
+ parser_mv.add_argument('param', help='param to rename')
51
+ parser_mv.add_argument('new_param', help='new param name')
52
+ parser_mv.set_defaults(func=mv_param)
53
+
54
+ args = parser.parse_args()
55
+ args.func(args)
56
+
57
+
58
+ command_line()
expman/expman/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (448 Bytes). View file
 
expman/expman/__pycache__/exp_group.cpython-311.pyc ADDED
Binary file (6.82 kB). View file
 
expman/expman/__pycache__/experiment.cpython-311.pyc ADDED
Binary file (16.9 kB). View file
 
expman/expman/exp_group.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+
4
+ from glob import glob
5
+ from .experiment import Experiment
6
+
7
+
8
+ class ExpGroup:
9
+ @classmethod
10
+ def gather(cls, root='runs/'):
11
+ if Experiment.is_exp_dir(root):
12
+ exps = (root,)
13
+ else:
14
+ exps = glob(os.path.join(root, '*'))
15
+ exps = filter(Experiment.is_exp_dir, exps)
16
+
17
+ exps = map(Experiment.from_dir, exps)
18
+ exps = filter(lambda x: x.existing, exps)
19
+ exps = tuple(exps)
20
+ return cls(exps)
21
+
22
+ def __init__(self, experiments=()):
23
+ assert isinstance(experiments, (list, tuple)), "'experiments' must be a list or tuple"
24
+ self.experiments = experiments
25
+
26
+ @staticmethod
27
+ def _collect_one(exp_id, exp, csv=None, index_col=None):
28
+ params = exp.params.to_frame().transpose().infer_objects() # as DataFrame
29
+ params['exp_id'] = exp_id
30
+
31
+ if csv is None:
32
+ return params
33
+
34
+ csv_path = exp.path_to(csv)
35
+ if os.path.exists(csv_path):
36
+ stuff = pd.read_csv(csv_path, index_col=index_col)
37
+ else: # try globbing
38
+ csv_files = os.path.join(exp.path, csv)
39
+ csv_files = list(glob(csv_files))
40
+ if len(csv_files) == 0:
41
+ return pd.DataFrame()
42
+
43
+ stuff = map(lambda x: pd.read_csv(x, index_col=index_col, float_precision='round_trip'), csv_files)
44
+ stuff = pd.concat(stuff, ignore_index=True)
45
+
46
+ stuff['exp_id'] = exp_id
47
+ return pd.merge(params, stuff, on='exp_id')
48
+
49
+ def collect(self, csv=None, index_col=None, prefix=''):
50
+ results = [self._collect_one(exp_id, exp, csv=csv, index_col=index_col) for exp_id, exp in enumerate(self.experiments)]
51
+ results = pd.concat(results, ignore_index=True, sort=False)
52
+
53
+ if len(results):
54
+ # build minimal exp_name
55
+ exp_name = ''
56
+ params = results.loc[:, :'exp_id'].drop('exp_id', axis=1)
57
+ if len(params) > 1:
58
+ varying_params = params.loc[:, params.nunique() > 1]
59
+ exp_name = varying_params.apply(Experiment.abbreviate, axis=1)
60
+ idx = results.columns.get_loc('exp_id') + 1
61
+ results.insert(idx, 'exp_name', prefix + exp_name)
62
+
63
+ return results
64
+
65
+ def filter(self, filters):
66
+ if isinstance(filters, str):
67
+ filters = string.split(',')
68
+ filters = map(lambda x: x.split('='), filters)
69
+ filters = {k: v for k, v in filters}
70
+
71
+ def __filter_exp(e):
72
+ for param, value in filters.items():
73
+ try:
74
+ p = e.params[param]
75
+ ptype = type(p)
76
+ if p != ptype(value):
77
+ return False
78
+ except:
79
+ return False
80
+
81
+ return True
82
+
83
+ filtered_exps = filter(__filter_exp, self.experiments)
84
+ filtered_exps = tuple(filtered_exps)
85
+ return ExpGroup(filtered_exps)
86
+
87
+ def items(self, short_names=True, prefix=''):
88
+ if short_names:
89
+ params = self.collect(prefix=prefix)
90
+ exp_names = params['exp_name'].values
91
+ return zip(exp_names, self.experiments)
92
+
93
+ return self.experiments
94
+
95
+ def __iter__(self):
96
+ return iter(self.experiments)
expman/expman/experiment.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import argparse
3
+ import ast
4
+ import os
5
+ import hashlib
6
+ import shutil
7
+ import numbers
8
+ from glob import glob
9
+ from io import StringIO
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+
15
+ hash_naming = False
16
+
17
+ def use_hash_naming(use_hashes=True):
18
+ global hash_naming
19
+ assert isinstance(use_hashes, bool), "Value must be a boolean."
20
+ hash_naming = use_hashes
21
+
22
+ def _guessed_cast(x):
23
+ try:
24
+ return ast.literal_eval(x)
25
+ except:
26
+ return x
27
+
28
+ def exp_filter(string):
29
+ if '=' not in string:
30
+ raise argparse.ArgumentTypeError(
31
+ 'Filter {} is not in format <param1>=<value1>[, <param2>=<value2>[, ...]]'.format(string))
32
+ filters = string.split(',')
33
+ filters = map(lambda x: x.split('='), filters)
34
+ filters = {k: _guessed_cast(v) for k, v in filters}
35
+ return filters
36
+
37
+
38
+ class Experiment:
39
+
40
+ PARAM_FILENAME = 'params.json'
41
+
42
+ @staticmethod
43
+ def _abbr(name, value, params):
44
+
45
+ def prefix_len(a, b):
46
+ return len(os.path.commonprefix((a, b)))
47
+
48
+ prefix = [name[:prefix_len(p, name) + 1] for p in params.keys() if p != name]
49
+ prefix = max(prefix, key=len) if len(prefix) > 0 else name
50
+
51
+ sep = ''
52
+ if isinstance(value, str):
53
+ sep = '-'
54
+ elif isinstance(value, numbers.Number):
55
+ value = '{:g}'.format(value)
56
+ sep = '-' if prefix[-1].isdigit() else ''
57
+ elif isinstance(value, (list, tuple)):
58
+ value = map(str, value)
59
+ value = map(lambda v: v.replace(os.sep, '|'), value)
60
+ value = ','.join(list(value))
61
+ sep = '-'
62
+
63
+ return prefix, sep, value
64
+
65
+ @classmethod
66
+ def abbreviate(cls, params):
67
+ if isinstance(params, pd.DataFrame):
68
+ params = params.iloc[0]
69
+ params = params.replace({np.nan: None})
70
+
71
+ if hash_naming:
72
+ exp_name = hashlib.md5(str(sorted(params.items())).encode()).hexdigest()
73
+ else:
74
+ abbrev_params = {k: '{}{}{}'.format(*cls._abbr(k, v, params)) for k, v in params.items()}
75
+ abbrev = sorted(abbrev_params.values())
76
+ exp_name = '_'.join(abbrev)
77
+
78
+ return exp_name
79
+
80
+ @classmethod
81
+ def from_dir(cls, exp_dir):
82
+ root = os.path.dirname(exp_dir.rstrip('/'))
83
+ params = os.path.join(exp_dir, cls.PARAM_FILENAME)
84
+
85
+ assert os.path.exists(exp_dir), "Experiment directory not found: '{}'".format(exp_dir)
86
+ assert os.path.exists(params), "Empty run directory found: '{}'".format(params)
87
+
88
+ params = cls._read_params(params)
89
+ exp = cls(params, root=root, create=False)
90
+ return exp
91
+
92
+ @classmethod
93
+ def is_exp_dir(cls, exp_dir):
94
+ if os.path.isdir(exp_dir):
95
+ params = os.path.join(exp_dir, cls.PARAM_FILENAME)
96
+ if os.path.exists(params):
97
+ return True
98
+
99
+ return False
100
+
101
+ @classmethod
102
+ def update_exp_dir(cls, exp_dir):
103
+ exp_dir = exp_dir.rstrip('/')
104
+ root = os.path.dirname(exp_dir)
105
+ name = os.path.basename(exp_dir)
106
+ params = os.path.join(exp_dir, cls.PARAM_FILENAME)
107
+
108
+ assert os.path.exists(exp_dir), "Experiment directory not found: '{}'".format(exp_dir)
109
+ assert os.path.exists(params), "Empty run directory found: '{}'".format(params)
110
+
111
+ params = cls._read_params(params)
112
+ new_name = cls.abbreviate(params)
113
+
114
+ if name != new_name:
115
+ new_exp_dir = os.path.join(root, new_name)
116
+ assert not os.path.exists(new_exp_dir), \
117
+ "Destination experiment directory already exists: '{}'".format(new_exp_dir)
118
+
119
+ print('Renaming:\n {} into\n {}'.format(exp_dir, new_exp_dir))
120
+ shutil.move(exp_dir, new_exp_dir)
121
+
122
+ def __init__(self, params, root='runs/', ignore=(), create=True):
123
+ # relative dir containing this run
124
+ self.root = root
125
+ # params to be ignored in the run naming
126
+ self.ignore = ignore
127
+ # parameters of this run
128
+ if isinstance(params, argparse.Namespace):
129
+ params = vars(params)
130
+
131
+ def _sanitize(v):
132
+ return tuple(v) if isinstance(v, list) else v
133
+
134
+ params = {k: _sanitize(v) for k, v in params.items() if k not in self.ignore}
135
+ self.params = pd.Series(params, name='params')
136
+
137
+ # whether to create the run directory if not exists
138
+ self.create = create
139
+
140
+ self.name = self.abbreviate(self.params)
141
+ self.path = os.path.join(self.root, self.name)
142
+ self.existing = os.path.exists(self.path)
143
+ self.found = self.existing
144
+
145
+ if not self.existing:
146
+ if self.create:
147
+ os.makedirs(self.path)
148
+ self.write_params()
149
+ self.existing = True
150
+ else:
151
+ print("Run directory '{}' not found, but not created.".format(self.path))
152
+
153
+ else:
154
+ param_fname = self.path_to(self.PARAM_FILENAME)
155
+ assert os.path.exists(param_fname), "Empty run, parameters not found: '{}'".format(param_fname)
156
+ self.params = self._read_params(param_fname)
157
+
158
+
159
+ def __str__(self):
160
+ s = StringIO()
161
+ print('Experiment Dir: {}'.format(self.path), file=s)
162
+ print('Params:', file=s)
163
+
164
+ # Set display options differently
165
+ with pd.option_context('display.max_rows', None,
166
+ 'display.max_columns', None,
167
+ 'display.width', None):
168
+ print(self.params.to_string(), file=s)
169
+
170
+ return s.getvalue()
171
+
172
+ def __repr__(self):
173
+ return self.__str__()
174
+
175
+ def path_to(self, path):
176
+ path = os.path.join(self.path, path)
177
+ return path
178
+
179
+ def add_parameter(self, key, value):
180
+ assert key not in self.params, "Parameter already exists: '{}'".format(key)
181
+ self.params[key] = value
182
+ self._update_run_dir()
183
+ self.write_params()
184
+
185
+ def rename_parameter(self, key, new_key):
186
+ assert key in self.params, "Cannot rename non-existent parameter: '{}'".format(key)
187
+ assert new_key not in self.params, "Destination name for parameter exists: '{}'".format(key)
188
+
189
+ self.params[new_key] = self.params[key]
190
+ del self.params[key]
191
+
192
+ self._update_run_dir()
193
+ self.write_params()
194
+
195
+ def remove_parameter(self, key):
196
+ assert key in self.params, "Cannot remove non-existent parameter: '{}'".format(key)
197
+ del self.params[key]
198
+ self._update_run_dir()
199
+ self.write_params()
200
+
201
+ def _update_run_dir(self):
202
+ old_run_dir = self.path
203
+ if self.existing:
204
+ self.name = self.abbreviate(self.params)
205
+ self.path = os.path.join(self.root, self.name)
206
+ assert not os.path.exists(self.path), "Cannot rename run, new name exists: '{}'".format(self.path)
207
+ shutil.move(old_run_dir, self.path)
208
+
209
+ @staticmethod
210
+ def _read_params(path):
211
+ # read json to pd.Series
212
+ params = pd.read_json(path, typ='series')
213
+ # transform lists to tuples (for hashability)
214
+ params = params.apply(lambda x: tuple(x) if isinstance(x, list) else x)
215
+ return params
216
+
217
+ def write_params(self):
218
+ # write Series as json
219
+ self.params.to_json(self.path_to(self.PARAM_FILENAME))
220
+
221
+ def test():
222
+ parser = argparse.ArgumentParser(description='Experiment Manager Test')
223
+ parser.add_argument('-e', '--epochs', type=int, default=70)
224
+ parser.add_argument('-b', '--batch-size', type=int, default=64)
225
+ parser.add_argument('-m', '--model', choices=('1d-conv', 'paper'), default='1d-conv')
226
+ parser.add_argument('-s', '--seed', type=int, default=23)
227
+ parser.add_argument('--no-cuda', action='store_true')
228
+ parser.set_defaults(no_cuda=False)
229
+ args = parser.parse_args()
230
+
231
+ run = Experiment(args, root='prova', ignore=['no_cuda'])
232
+ print(run)
233
+ print(run.path_to('ckpt/best.h5'))
losses.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from tensorflow.keras import backend as K
3
+
4
+ def boundary_loss(y_true, y_pred):
5
+ """Additional loss focusing on boundaries"""
6
+ # Compute gradients
7
+ dy_true, dx_true = tf.image.image_gradients(y_true)
8
+ dy_pred, dx_pred = tf.image.image_gradients(y_pred)
9
+
10
+ # Compute boundary loss
11
+ loss = tf.reduce_mean(tf.abs(dy_pred - dy_true) + tf.abs(dx_pred - dx_true))
12
+ return loss * 0.5 # weight factor
13
+
14
+ def enhanced_binary_crossentropy(y_true, y_pred):
15
+ """Combine standard BCE with boundary loss"""
16
+ bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
17
+ boundary = boundary_loss(y_true, y_pred)
18
+ return bce + boundary
matlab/Meye.m ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ classdef Meye
2
+
3
+ properties (Access=private)
4
+ model
5
+ end
6
+
7
+
8
+ methods
9
+
10
+ % CONSTRUCTOR
11
+ %------------------------------------------------------------------
12
+ function self = Meye(modelPath)
13
+ % Class constructor
14
+ arguments
15
+ modelPath char {mustBeText}
16
+ end
17
+
18
+ % Change the current directory to the directory where the
19
+ % original class is, so that the package with the custom layers
20
+ % is created there
21
+ classPath = getClassPath(self);
22
+ oldFolder = cd(classPath);
23
+ % Import the model saved as ONNX
24
+ self.model = importONNXNetwork(modelPath, ...
25
+ 'GenerateCustomLayers',true, ...
26
+ 'PackageName','customLayers_meye',...
27
+ 'InputDataFormats', 'BSSC',...
28
+ 'OutputDataFormats',{'BSSC','BC'});
29
+
30
+ % Manually change the "nearest" option to "linear" inside of
31
+ % the automatically generated custom layers. This is necessary
32
+ % due to the fact that MATLAB still does not support the proper
33
+ % translation between ONNX layers and DLtoolbox layers
34
+ self.nearest2Linear([classPath filesep '+customLayers_meye'])
35
+
36
+ % Go back to the old current folder
37
+ cd(oldFolder)
38
+ end
39
+
40
+
41
+ % PREDICTION OF SINGLE IMAGES
42
+ %------------------------------------------------------------------
43
+ function [pupilMask, eyeProb, blinkProb] = predictImage(self, inputImage, options)
44
+ % Predicts pupil location on a single image
45
+ arguments
46
+ self
47
+ inputImage
48
+ options.roiPos = []
49
+ options.threshold = []
50
+ end
51
+
52
+ roiPos = options.roiPos;
53
+
54
+ % Convert the image to grayscale if RGB
55
+ if size(inputImage,3) > 1
56
+ inputImage = im2gray(inputImage);
57
+ end
58
+
59
+ % Crop the frame to the desired ROI
60
+ if ~isempty(roiPos)
61
+ crop = inputImage(roiPos(2):roiPos(2)+roiPos(4)-1,...
62
+ roiPos(1):roiPos(1)+roiPos(3)-1);
63
+ else
64
+ crop = inputImage;
65
+ end
66
+
67
+ % Preprocessing
68
+ img = double(imresize(crop,[128 128]));
69
+ img = img / max(img,[],'all');
70
+
71
+ % Do the prediction
72
+ [rawMask, info] = predict(self.model, img);
73
+ eyeProb = info(1);
74
+ blinkProb = info(2);
75
+
76
+ % Reinsert the cropped prediction in the frame
77
+ if ~isempty(roiPos)
78
+ pupilMask = zeros(size(inputImage));
79
+ pupilMask(roiPos(2):roiPos(2)+roiPos(4)-1,...
80
+ roiPos(1):roiPos(1)+roiPos(3)-1) = imresize(rawMask, [roiPos(4), roiPos(3)],"bilinear");
81
+ else
82
+ pupilMask = imresize(rawMask,size(inputImage),"bilinear");
83
+ end
84
+
85
+ % Apply a threshold to the image if requested
86
+ if ~isempty(options.threshold)
87
+ pupilMask = pupilMask > options.threshold;
88
+ end
89
+
90
+ end
91
+
92
+
93
+ % PREDICT A MOVIE AND GET A TABLE WITH THE RESULTS
94
+ %------------------------------------------------------------------
95
+ function tab = predictMovie(self, moviePath, options)
96
+ % Predict an entire video file and returns a results Table
97
+ %
98
+ % tab = predictMovie(moviePath, name-value)
99
+ %
100
+ % INPUT(S)
101
+ % - moviePath: (char/string) Full path of a video file.
102
+ % - name-value pairs
103
+ % - roiPos: [x,y,width,height] 4-elements vector defining a
104
+ % rectangle containing the eye. Works best if width and
105
+ % height are similar. If empty, a prediction will be done on
106
+ % a full frame(Default: []).
107
+ % - threshold: [0-1] The pupil prediction is binarized based
108
+ % on a threshold value to measure pupil size. (Default:0.4)
109
+ %
110
+ % OUTPUT(S)
111
+ % - tab: a MATLAB table containing data of the analyzed video
112
+
113
+ arguments
114
+ self
115
+ moviePath char {mustBeText}
116
+ options.roiPos double = []
117
+ options.threshold = 0.4;
118
+ end
119
+
120
+ % Initialize a video reader
121
+ v = VideoReader(moviePath);
122
+ totFrames = v.NumFrames;
123
+
124
+ % Initialize Variables
125
+ frameN = zeros(totFrames,1,'double');
126
+ frameTime = zeros(totFrames,1,'double');
127
+ binaryMask = cell(totFrames,1);
128
+ pupilArea = zeros(totFrames,1,'double');
129
+ isEye = zeros(totFrames,1,'double');
130
+ isBlink = zeros(totFrames,1,'double');
131
+
132
+ tic
133
+ for i = 1:totFrames
134
+ % Progress report
135
+ if toc>10
136
+ fprintf('%.1f%% - Processing frame (%u/%u)\n', (i/totFrames)*100 , i, totFrames)
137
+ tic
138
+ end
139
+
140
+ % Read a frame and make its prediction
141
+ frame = read(v, i, 'native');
142
+ [pupilMask, eyeProb, blinkProb] = self.predictImage(frame, roiPos=options.roiPos,...
143
+ threshold=options.threshold);
144
+
145
+ % Save results for this frame
146
+ frameN(i) = i;
147
+ frameTime(i) = v.CurrentTime;
148
+ binaryMask{i} = pupilMask > options.threshold;
149
+ pupilArea(i) = sum(binaryMask{i},"all");
150
+ isEye(i) = eyeProb;
151
+ isBlink(i) = blinkProb;
152
+ end
153
+ % Save all the results in a final table
154
+ tab = table(frameN,frameTime,binaryMask,pupilArea,isEye,isBlink);
155
+ end
156
+
157
+
158
+
159
+ % PREVIEW OF A PREDICTED MOVIE
160
+ %------------------------------------------------------------------
161
+ function predictMovie_Preview(self, moviePath, options)
162
+ % Displays a live-preview of prediction for a video file
163
+
164
+ arguments
165
+ self
166
+ moviePath char {mustBeText}
167
+ options.roiPos double = []
168
+ options.threshold double = []
169
+ end
170
+ roiPos = options.roiPos;
171
+
172
+
173
+ % Initialize a video reader
174
+ v = VideoReader(moviePath);
175
+ % Initialize images to show
176
+ blankImg = zeros(v.Height, v.Width, 'uint8');
177
+ cyanColor = cat(3, blankImg, blankImg+255, blankImg+255);
178
+ pupilTransparency = blankImg;
179
+
180
+ % Create a figure for the preview
181
+ figHandle = figure(...
182
+ 'Name','MEYE video preview',...
183
+ 'NumberTitle','off',...
184
+ 'ToolBar','none',...
185
+ 'MenuBar','none', ...
186
+ 'Color',[.1, .1, .1]);
187
+
188
+ ax = axes('Parent',figHandle,...
189
+ 'Units','normalized',...
190
+ 'Position',[0 0 1 .94]);
191
+
192
+ imHandle = imshow(blankImg,'Parent',ax);
193
+ hold on
194
+ cyanHandle = imshow(cyanColor,'Parent',ax);
195
+ cyanHandle.AlphaData = pupilTransparency;
196
+ rect = rectangle('LineWidth',1.5, 'LineStyle','-.','EdgeColor',[1,0,0],...
197
+ 'Parent',ax,'Position',[0,0,0,0]);
198
+ hold off
199
+ title(ax,'MEYE Video Preview', 'Color',[1,1,1])
200
+
201
+ % Movie-Showing loop
202
+ while exist("figHandle","var") && ishandle(figHandle) && hasFrame(v)
203
+ try
204
+ tic
205
+ frame = readFrame(v);
206
+
207
+ % Actually do the prediction
208
+ [pupilMask, eyeProb, blinkProb] = self.predictImage(frame, roiPos=roiPos,...
209
+ threshold=options.threshold);
210
+
211
+ % Update graphic elements
212
+ imHandle.CData = frame;
213
+ cyanHandle.AlphaData = imresize(pupilMask, [v.Height, v.Width]);
214
+ if ~isempty(roiPos)
215
+ rect.Position = roiPos;
216
+ end
217
+ titStr = sprintf('Eye: %.2f%% - Blink:%.2f%% - FPS:%.1f',...
218
+ eyeProb*100, blinkProb*100, 1/toc);
219
+ ax.Title.String = titStr;
220
+ drawnow
221
+ catch ME
222
+ warning(ME.message)
223
+ close(figHandle)
224
+ end
225
+ end
226
+ disp('Stop preview.')
227
+ end
228
+
229
+
230
+ end
231
+
232
+
233
+ %------------------------------------------------------------------
234
+ %------------------------------------------------------------------
235
+ % INTERNAL FUNCTIONS
236
+ %------------------------------------------------------------------
237
+ %------------------------------------------------------------------
238
+ methods(Access=private)
239
+ %------------------------------------------------------------------
240
+ function path = getClassPath(~)
241
+ % Returns the full path of where the class file is
242
+
243
+ fullPath = mfilename('fullpath');
244
+ [path,~,~] = fileparts(fullPath);
245
+ end
246
+
247
+ %------------------------------------------------------------------
248
+ function [fplist,fnlist] = listfiles(~, folderpath, token)
249
+ listing = dir(folderpath);
250
+ index = 0;
251
+ fplist = {};
252
+ fnlist = {};
253
+ for i = 1:size(listing,1)
254
+ s = listing(i).name;
255
+ if contains(s,token)
256
+ index = index+1;
257
+ fplist{index} = [folderpath filesep s];
258
+ fnlist{index} = s;
259
+ end
260
+ end
261
+ end
262
+
263
+ % nearest2Linear
264
+ %------------------------------------------------------------------
265
+ function nearest2Linear(self, inputPath)
266
+ fP = self.listfiles(inputPath, 'Shape_To_Upsample');
267
+
268
+ foundFileToChange = false;
269
+ beforePatter = '"half_pixel", "nearest",';
270
+ afterPattern = '"half_pixel", "linear",';
271
+ for i = 1:length(fP)
272
+
273
+ % Get the content of the file
274
+ fID = fopen(fP{i}, 'r');
275
+ f = fread(fID,'*char')';
276
+ fclose(fID);
277
+
278
+ % Send a verbose warning the first time we are manually
279
+ % correcting the upsampling layers bug
280
+ if ~foundFileToChange && contains(f,beforePatter)
281
+ foundFileToChange = true;
282
+ msg = ['This is a message from MEYE developers.\n' ...
283
+ 'In the current release of the Deep Learning Toolbox ' ...
284
+ 'MATLAB does not translate well all the layers in the ' ...
285
+ 'ONNX network to native MATLAB layers. In particular the ' ...
286
+ 'automatically generated custom layers that have to do ' ...
287
+ 'with UPSAMPLING are generated with the ''nearest'' instead of ' ...
288
+ 'the ''linear'' mode.\nWe automatically correct for this bug when you ' ...
289
+ 'instantiate a Meye object (henche this warning).\nEverything should work fine, ' ...
290
+ 'and we hope that in future MATLAB releases this hack wont be ' ...
291
+ 'needed anymore.\n' ...
292
+ 'If you find bugs or performance issues, please let us know ' ...
293
+ 'with an issue ' ...
294
+ '<a href="matlab: web(''https://github.com/fabiocarrara/meye/issues'')">HERE.</a>'];
295
+ warning(sprintf(msg))
296
+ end
297
+
298
+ % Replace the 'nearest' option with 'linear'
299
+ newF = strrep(f, beforePatter, afterPattern);
300
+
301
+ % Save the file back in its original location
302
+ fID = fopen(fP{i}, 'w');
303
+ fprintf(fID,'%s',newF);
304
+ fclose(fID);
305
+ end
306
+ end
307
+ end
308
+ end
309
+
310
+
matlab/README.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MEYE pupillometry on MATLAB
2
+
3
+ > Try MEYE on a standalone [Web-App](https://www.pupillometry.it/)
4
+
5
+ > Learn more on the original [MEYE repo](https://github.com/fabiocarrara/meye)
6
+
7
+ > Label your own dataset with [pLabeler](https://github.com/LeonardoLupori/pLabeler)
8
+
9
+ Starting from MATLAB version 2021b, MEYE is also available for use on MATLAB!
10
+
11
+ Here's a brief tutorial on how to use it in you own experiments.
12
+
13
+ ## What do you need?
14
+
15
+ - [MATLAB 2021b](https://it.mathworks.com/products/matlab.html) or later
16
+ - [MATLAB Image Processing Toolbox](https://it.mathworks.com/products/image.html)
17
+ - [MATLAB Deep Learning Toolbox](https://it.mathworks.com/products/deep-learning.html)
18
+ An additional _support package_ of this toolbox has to be downloaded manually from the Add-On explorer in MATLAB:
19
+ - _Deep Learning Toolbox™ Converter for ONNX Model Format_
20
+ ![image](https://user-images.githubusercontent.com/39329654/152327789-dde0af9b-d531-40be-b1a0-5ba17c508a13.png)
21
+ - A MEYE model in [ONNX](https://onnx.ai/) format. You can download our latest model [here](https://github.com/fabiocarrara/meye/releases).
22
+ ![onnxModel](https://user-images.githubusercontent.com/39329654/152552616-1b800398-5794-4f51-b4ed-2e3339cb2d0d.png)
23
+
24
+
25
+ ## Quick start!
26
+
27
+ ```matlab
28
+ % Create an instance of Meye
29
+ meye = Meye('path/to/model.onnx');
30
+
31
+ % Example 1
32
+ % Make predictions on a single Image
33
+ %
34
+ % Load an image for which you want to predict the pupil
35
+ img = imread('path/to/img.tif');
36
+ % Make a prediction on a frame
37
+ [pupil, isEye, isBlink] = meye.predictImage(img);
38
+
39
+ % Example 2
40
+ % Make predictions on a video file and preview the results
41
+ %
42
+ meye.predictMovie_Preview('path/to/video');
43
+ ```
44
+
45
+ ## Examples
46
+
47
+ Inside the file [example.m](example.m) you can find 5 extensively commented examples of some use cases for MEYE on MATLAB.
48
+ These examples require you to download example data from [here](https://drive.google.com/drive/folders/1BG6O5BEkwXkNKC_1XuB3H9wbx3DeNWwF?usp=sharing). To run the examples succesfully, make sure that the downloaded files are in the same folder as the `example.m` file.
49
+
50
+ # Known issues
51
+
52
+ ## Small issue with _Upsample_ layers
53
+ When [importing](https://it.mathworks.com/help/deeplearning/ref/importonnxnetwork.html) a ONNX network, MATLAB tries to translate all the layers of the network from ONNX Operators to built-in MATLAB layers (see [here](https://it.mathworks.com/help/deeplearning/ref/importonnxnetwork.html#mw_dc6cd14c-e8d0-4370-af81-96626a888d9c)).
54
+ This operation is not succesful for all the layers and MATLAB tries to overcome erros by automatically generating custom layers to replace the ones that it wasnt able to translate. These _custom_ layers are stored in a folder as MATLAB `.m` class files.
55
+ We found a small bug in the way MATLAB translates `Upsample` layers while importing MEYE network. In particular, the custom generated layers perform the upsample with the `nearest` interpolation method, while it should be used the `linear` method for best results.
56
+ For now, we solved this bug by automatically replacing the `nearest` method with the `linear` one in all the custom generated layers. This restores optimal performance with no additional computational costs, but it's a bit hacky.
57
+ We hope that in future releases MATLAB's process of translation to its own built-in layers will be smoother and this trick will not be needed anymore.
matlab/example.m ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %% Download all the example material
2
+ %
3
+ % 1 - Download the latest MEYE model in ONNX format
4
+ % -------------------------------------------------------------------------
5
+ % Download the .onnx file from the assets here:
6
+ % https://github.com/fabiocarrara/meye/releases
7
+
8
+ % EXAMPLE data can be found in this folder:
9
+ % https://drive.google.com/drive/folders/1BG6O5BEkwXkNKC_1XuB3H9wbx3DeNWwF?usp=sharing
10
+ %
11
+ % 2 - Download an example image of a simple mouse eye from:
12
+ % https://drive.google.com/file/d/1hcWcC1cAmzY4r-SIWDIgUY0-gpbmetUL/view?usp=sharing
13
+ %
14
+ % 3 - Download an example of a large image here:
15
+ % https://drive.google.com/file/d/16QixvUMtojqfrcy4WXlYJ7CP3K8vrz_C/view?usp=sharing
16
+ %
17
+ % 4 - Download an example pupillometry video here:
18
+ % https://drive.google.com/file/d/1TYj80dzIR1ZjpEvfefH_akhbUjwpvJta/view?usp=sharing
19
+
20
+
21
+ %% EXAMPLE 1
22
+ % -------------------------------------------------------------------------
23
+ % Predict the pupil from a simple image of an eye
24
+
25
+ % Clean up the workspace
26
+ clearvars, clc
27
+
28
+ % Change these values according to the filenames of the MEYE model and the
29
+ % simple pupil image
30
+ MODEL_NAME = 'meye_20220124.onnx';
31
+ IMAGE_NAME = 'pupilImage_simple.png';
32
+
33
+
34
+ % Initialize a MEYE object
35
+ meye = Meye(MODEL_NAME);
36
+
37
+ % Load the simple image
38
+ img = imread(IMAGE_NAME);
39
+
40
+ % Predict a single image
41
+ [pupilMask, eyeProb, blinkProb] = meye.predictImage(img);
42
+
43
+ % Plot the results of the prediction
44
+ subplot(1,3,1)
45
+ imshow(img)
46
+ title('Original Image')
47
+
48
+ subplot(1,3,2)
49
+ imagesc(pupilMask)
50
+ title(sprintf('Prediction (Eye:%.2f%% - Blink:%.2f%%)',eyeProb*100,blinkProb*100))
51
+ axis off, axis image
52
+
53
+ subplot(1,3,3)
54
+ imshowpair(img, pupilMask)
55
+ title('Merge')
56
+
57
+
58
+ %% EXAMPLE 2
59
+ % -------------------------------------------------------------------------
60
+ % Binarize the pupil prediction and get the pupil size in pixels
61
+
62
+ % Clean up the workspace
63
+ clearvars, close all, clc
64
+
65
+ % Change these values according to the filenames of the MEYE model and the
66
+ % simple pupil image
67
+ MODEL_NAME = 'meye_20220124.onnx';
68
+ IMAGE_NAME = 'pupilImage_simple.png';
69
+
70
+
71
+ % Initialize a MEYE object
72
+ meye = Meye(MODEL_NAME);
73
+
74
+ % Load the simple image
75
+ img = imread(IMAGE_NAME);
76
+
77
+ % Predict a single image
78
+ % You can automatically binarize the prediction by passing the "threshold"
79
+ % optional argument. This number can be between 0 and 1. If omitted, the
80
+ % function returns a raw probability map instead of a binarized image
81
+ pupilBinaryMask = meye.predictImage(img, 'threshold', 0.4);
82
+
83
+ imshowpair(img, pupilBinaryMask)
84
+ title(sprintf('Pupil Size: %u px', sum(pupilBinaryMask,'all')))
85
+
86
+
87
+ %% EXAMPLE 3
88
+ % -------------------------------------------------------------------------
89
+ % Predict the pupil on a large image where the eye is a small portion of
90
+ % the image
91
+
92
+ % Clean up the workspace
93
+ clearvars, close all, clc
94
+
95
+ % Change these values according to the filenames of the MEYE model and the
96
+ % simple pupil image
97
+ MODEL_NAME = 'meye_20220124.onnx';
98
+ IMAGE_NAME = 'pupilImage_large.png';
99
+
100
+
101
+ % Initialize a MEYE object
102
+ meye = Meye(MODEL_NAME);
103
+
104
+ % Load the simple image
105
+ img = imread(IMAGE_NAME);
106
+
107
+ % Predict the image
108
+ pupilMask = meye.predictImage(img);
109
+
110
+ % As you can see from this image, the prediction is not perfect. This is
111
+ % because MEYE was trained on images that tightly contained the eye.
112
+ subplot(1,2,1)
113
+ imshowpair(img, pupilMask)
114
+ title('Tomal Image prediction (low-quality)')
115
+
116
+ % In order to solve this issue it is possible to restrict the prediction to
117
+ % a rectangular Region of Interest (ROI) in the image. This is done simply
118
+ % by passing the optional argument "roiPos" to the predictImage function.
119
+ % The roiPos is a 4-elements vector containing X,Y, width, height of a
120
+ % rectangular shape. Note that X and Y are the coordinates of the top left
121
+ % corner of the ROI
122
+
123
+ ROI = [90,90,200,200];
124
+ pupilMask = meye.predictImage(img, 'roiPos', ROI);
125
+
126
+ % Plot the results with the ROI and see the difference between the 2 methods
127
+ subplot(1,2,2)
128
+ imshowpair(img, pupilMask)
129
+ rectangle('Position',ROI, 'LineStyle','-.','EdgeColor',[1,0,0])
130
+ title('ROI prediction (high quality)')
131
+ linkaxes
132
+ set(gcf,'Position',[300,600,1000,320])
133
+
134
+
135
+ %% EXAMPLE 4
136
+ % -------------------------------------------------------------------------
137
+ % Show a preview of the prediction of an entire pupillometry video.
138
+ %
139
+ % As you saw you can adjust a few parameters for the prediction.
140
+ % If you want to get a quick preview of how your pre-recorded video will be
141
+ % processed, you can use the method predictMovie_Preview.
142
+ % Here you can play around with different ROI positions and threshold
143
+ % values and see what are the results before analyzing the whole video.
144
+
145
+ % Clean up the workspace
146
+ clearvars, close all, clc
147
+
148
+ % Change these values according to the filenames of the MEYE model and the
149
+ % simple pupil image
150
+ MODEL_NAME = 'meye_20220124.onnx';
151
+ VIDEO_NAME = 'mouse_example.mp4';
152
+
153
+ % Initialize a MEYE object
154
+ meye = Meye(MODEL_NAME);
155
+
156
+ % Try to play around moving or resizing the ROI to see how the performances change
157
+ ROI = [70, 60, 200, 200];
158
+
159
+ % Change the threshold value to binarize the pupil prediction.
160
+ % Use [] to see the raw probability map. Use a number in the range [0:1] to binarize it
161
+ threshold = 0.4;
162
+
163
+ meye.predictMovie_Preview(VIDEO_NAME,"roiPos", ROI,"threshold",threshold);
164
+
165
+
166
+
167
+ %% EXAMPLE 5
168
+ % Predict the entire video and get the results table
169
+
170
+ % Clean up the workspace
171
+ clearvars, close all, clc
172
+
173
+ % Change these values according to the filenames of the MEYE model and the
174
+ % simple pupil image
175
+ MODEL_NAME = 'meye_20220124.onnx';
176
+ VIDEO_NAME = 'mouse_example.mp4';
177
+
178
+ % Initialize a MEYE object
179
+ meye = Meye(MODEL_NAME);
180
+
181
+ % Try to play around moving or resizing the ROI to see how the performances change
182
+ ROI = [70, 60, 200, 200];
183
+
184
+ % Change the threshold value to binarize the pupil prediction.
185
+ % Use [] to see the raw probability map. Use a number in the range [0:1] to binarize it
186
+ threshold = 0.4;
187
+
188
+ % Predict the whole movie and save results in a table
189
+ T = meye.predictMovie(VIDEO_NAME, "roiPos", ROI, "threshold", threshold);
190
+
191
+ % Show some of the values in the table
192
+ disp(head(T))
193
+
194
+ % Plot some of the results
195
+ subplot 311
196
+ plot(T.frameTime,T.isEye, 'LineWidth', 2)
197
+ title('Eye Probability')
198
+ ylabel('Probability'),
199
+ xlim([T.frameTime(1) T.frameTime(end)])
200
+
201
+ subplot 312
202
+ plot(T.frameTime,T.isBlink, 'LineWidth', 2)
203
+ title('Blink Probability')
204
+ ylabel('Probability')
205
+ xlim([T.frameTime(1) T.frameTime(end)])
206
+
207
+ subplot 313
208
+ plot(T.frameTime,T.pupilArea, 'LineWidth', 2)
209
+ title('Pupil Size')
210
+ xlabel('Time (s)'), ylabel('Pupil Area (px)')
211
+ xlim([T.frameTime(1) T.frameTime(end)])
models/deeplab.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path += ['models/deeplab']
3
+
4
+ import tensorflow as tf
5
+
6
+ from tensorflow.keras import backend as K
7
+ from tensorflow.keras import layers as L
8
+ from tensorflow.keras.models import Model, Sequential
9
+
10
+ from deeplabv3p.models.deeplabv3p_resnet50 import Deeplabv3pResNet50
11
+ from deeplabv3p.models.deeplabv3p_mobilenetv3 import Deeplabv3pMobileNetV3Small, Deeplabv3pLiteMobileNetV3Small, Deeplabv3pMobileNetV3Large, Deeplabv3pLiteMobileNetV3Large
12
+ from deeplabv3p.models.deeplabv3p_xception import Deeplabv3pXception
13
+ from deeplabv3p.models.deeplabv3p_peleenet import Deeplabv3pPeleeNet, Deeplabv3pLitePeleeNet
14
+
15
+ AVAILABLE_BACKBONES = {
16
+ 'resnet50': Deeplabv3pResNet50,
17
+ 'xception': Deeplabv3pXception,
18
+ 'mobilenetv3-large': Deeplabv3pMobileNetV3Large,
19
+ 'lite-mobilenetv3-large': Deeplabv3pLiteMobileNetV3Large,
20
+ 'mobilenetv3-small': Deeplabv3pMobileNetV3Small,
21
+ 'lite-mobilenetv3-small': Deeplabv3pLiteMobileNetV3Small,
22
+ 'peleenet': Deeplabv3pPeleeNet,
23
+ 'lite-peleenet': Deeplabv3pLitePeleeNet,
24
+ }
25
+
26
+ AVAILABLE_PRETRAINED_WEIGHTS = {
27
+ 'resnet50': 'imagenet',
28
+ 'xception': None, # 'pascalvoc', # needs fix in upstream
29
+ 'mobilenetv3-large': 'imagenet',
30
+ 'lite-mobilenetv3-large': 'imagenet',
31
+ 'mobilenetv3-small': 'imagenet',
32
+ 'lite-mobilenetv3-small': 'imagenet',
33
+ 'peleenet': 'imagenet',
34
+ 'lite-peleenet': 'imagenet',
35
+ }
36
+
37
+ def build_model(input_shape, output_shape, config):
38
+
39
+ assert input_shape[:2] == output_shape[:2], "Only same input-output HW shapes are supported."
40
+ num_classes = output_shape[2]
41
+
42
+ # backbone pretends RGB images to use pretrained weights
43
+ needs_rgb_conversion = input_shape[2] != 3
44
+ backbone_input_shape = (input_shape[:2] + (3,)) if needs_rgb_conversion else input_shape
45
+ backbone_name = config.get('backbone', 'resnet50')
46
+ weights = config.get('weights', AVAILABLE_PRETRAINED_WEIGHTS[backbone_name])
47
+ backbone_fn = AVAILABLE_BACKBONES[backbone_name]
48
+ backbone, backbone_len = backbone_fn(input_shape=backbone_input_shape, num_classes=num_classes, weights=weights, OS=8)
49
+
50
+ # segmentation mask
51
+ out_mask = backbone.get_layer('pred_resize').output
52
+ out_mask = L.Activation('sigmoid', name='mask')(out_mask)
53
+
54
+ # metadata tags (is_eye and is_blink)
55
+ middle = backbone.get_layer('image_pooling').output
56
+ middle = L.Flatten()(middle)
57
+ out_tags = L.Dense(2, activation='sigmoid', name='tags')(middle)
58
+
59
+ model = Model(inputs=backbone.input, outputs=[out_mask, out_tags])
60
+
61
+ if needs_rgb_conversion:
62
+ gray_input = L.Input(shape=input_shape)
63
+ rgb_input = L.Lambda(lambda x: K.tile(x, (1, 1, 1, 3)) , name='gray2rgb')(gray_input) # we assume BHWC
64
+ out_mask, out_tags = model(rgb_input)
65
+
66
+ # rename outputs
67
+ out_mask = L.Lambda(lambda x: x, name='mask')(out_mask)
68
+ out_tags = L.Lambda(lambda x: x, name='tags')(out_tags)
69
+ model = Model(inputs=gray_input, outputs=[out_mask, out_tags])
70
+
71
+ return model
72
+
73
+
74
+ if __name__ == "__main__":
75
+ shape = (128, 128, 1)
76
+ model = build_model(shape, shape, {'weights': None})#, 'backbone': 'lite-mobilenetv3-small'})
77
+ model.summary()
78
+ import pdb; pdb.set_trace()
models/deeplab/README.md ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TF Keras DeepLab v3+ Modelset
2
+
3
+ ## Introduction
4
+
5
+ An end-to-end semantic segmentation pipeline with DeepLabv3+ models. Implement with tf.keras, including data collection/annotation, model training/tuning, model evaluation and on device deployment. Support different backbones and different head architecture:
6
+
7
+ #### Backbone
8
+ - [x] Xception
9
+ - [x] ResNet50
10
+ - [x] MobileNetV2
11
+ - [x] MobilenetV3(Large/Small)
12
+ - [x] PeleeNet ([paper](https://arxiv.org/abs/1804.06882))
13
+
14
+ #### Head
15
+ - [x] ASPP
16
+ - [x] ASPP Lite(Only Global Pooling + 1x1 Conv)
17
+ - [x] Decoder
18
+ - [x] Different Output Stride(8/16/32)
19
+
20
+ #### Loss
21
+ - [x] Categorical Cross Entropy Loss
22
+ - [x] Balanced Class Weighted Cross Entropy Loss
23
+ - [x] Adaptive Class Weighted Cross Entropy Loss
24
+ - [x] Focal Loss
25
+
26
+ #### Postprocess
27
+ - [x] Numpy CRF (Conditional Random Fields) postprocess implementation
28
+
29
+
30
+ #### Train tech
31
+ - [x] Transfer training from Imagenet/PascalVOC
32
+ - [x] Dynamic learning rate decay (Cosine/Exponential/Polynomial/PiecewiseConstant)
33
+ - [x] Weights Average policy for optimizer (EMA/SWA/Lookahead, valid for TF-2.x with tfa)
34
+ - [x] GridMask data augmentation ([paper](https://arxiv.org/abs/2001.04086))
35
+ - [x] Multi-GPU training with SyncBatchNorm support (valid for TF-2.2 and later)
36
+
37
+ #### On-device deployment
38
+ - [x] Tensorflow-Lite Float32/UInt8 model inference
39
+ - [x] MNN Float32/UInt8 model inference
40
+
41
+
42
+ ## Quick Start
43
+
44
+ 1. Install requirements on Ubuntu 16.04/18.04:
45
+
46
+ ```
47
+ # pip install -r requirements.txt
48
+ ```
49
+
50
+ 2. Download Deeplabv3+ PascalVOC pretrained weights. It's provided by [keras-deeplab-v3-plus](https://github.com/bonlime/keras-deeplab-v3-plus) and imported from [original TF checkpoint](https://github.com/tensorflow/models/tree/master/research/deeplab)
51
+ 3. Run Deeplab segmentation on your image or video.
52
+
53
+ ```
54
+ # wget -O weights/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5 https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.1/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5
55
+ # python deeplab.py --model_type=xception --weights_path=weights/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5 --classes_path=configs/voc_classes.txt --output_stride=16 --image
56
+ # python deeplab.py --model_type=xception --weights_path=weights/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5 --classes_path=configs/voc_classes.txt --output_stride=16 --input=<your video file>
57
+
58
+ ```
59
+
60
+ Image segment sample:
61
+
62
+ <p align="center">
63
+ <img src="assets/dog_inference.png">
64
+ </p>
65
+
66
+
67
+
68
+ ## Guide of train/evaluate/demo
69
+
70
+ ### Train
71
+
72
+ 1. Prepare dataset
73
+ 1. PascalVOC2012 & SBD (VOC2012 train_aug) semantic segmentation dataset
74
+ * Run a simple script to download, convert & merge PascalVOC 2012 and SBD:
75
+
76
+ ```
77
+ # pushd tools/dataset_converter/voc_augment/
78
+ # ./dataset_prepare.sh
79
+ # popd
80
+
81
+ ```
82
+ Dataset images & labels will be placed at `VOC2012/`
83
+
84
+ 2. MS COCO 2017 segmentation dataset
85
+ * Run a simple script to download COCO2017 dataset, and convert annotated instance mask to PNG format semantic segmentation label image:
86
+
87
+ ```
88
+ # pushd tools/dataset_converter/mscoco2017/
89
+ # ./dataset_prepare.sh
90
+ # popd
91
+
92
+ ```
93
+ You can dig into related script for details. Dataset images & labels will be placed at `mscoco2017/`
94
+
95
+ 3. ADE20K semantic segmentation dataset
96
+ * Run a simple script to download, merge & convert ADE20K dataset:
97
+
98
+ ```
99
+ # pushd tools/dataset_converter/ade20k/
100
+ # ./dataset_prepare.sh
101
+ # popd
102
+
103
+ ```
104
+ Dataset images & labels will be placed at `ADEChallengeData2016/`
105
+
106
+ 4. Cityscapes semantic segmentation dataset
107
+ * Download the Cityscapes dataset package from `https://www.cityscapes-dataset.com/` (need registration) and put to `tools/dataset_converter/cityscapes/`. Then run a simple script to merge & convert:
108
+
109
+ ```
110
+ # pushd tools/dataset_converter/cityscapes/
111
+ # ./dataset_prepare.sh
112
+ # popd
113
+
114
+ ```
115
+ Dataset images & labels will be placed at `Cityscapes/`
116
+
117
+ 5. Customized semantic segmentation dataset
118
+ * Collecting target JPG format images and place at `<dataset_path>/images`
119
+ * Generate semantic segmentation label image. You can use [labelme](https://github.com/wkentaro/labelme) to annotate your image with polygonal segmentation mask and save to a json file. Then run [json_to_dataset.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/tools/dataset_converter/labelme/json_to_dataset.py) to convert json annotations to PascalVOC style PNG format label images:
120
+ ```
121
+ # cd tools/dataset_converter/labelme && python json_to_dataset.py -h
122
+ usage: json_to_dataset.py [-h] --json_file_path JSON_FILE_PATH
123
+ [--classes_path CLASSES_PATH] --png_label_path
124
+ PNG_LABEL_PATH
125
+
126
+ convert labelme json label to voc png label
127
+
128
+ optional arguments:
129
+ -h, --help show this help message and exit
130
+ --json_file_path JSON_FILE_PATH
131
+ path to labelme annotated json label files
132
+ --classes_path CLASSES_PATH
133
+ path to class definitions,
134
+ default=../../../configs/voc_classes.txt
135
+ --png_label_path PNG_LABEL_PATH
136
+ output path of converted png label images
137
+ ```
138
+
139
+ For class names file format, refer to [voc_classes.txt](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/configs/voc_classes.txt) (not including background class, which would be added as index 0 in code by default).
140
+
141
+ * Place the PNG label images at `<dataset_path>/labels`
142
+ * Create PascalVOC style dataset split (train/val/test) txt files. One line for a image and only include image base name, like:
143
+ ```
144
+ 2007_000033
145
+ 2007_000042
146
+ 2007_000061
147
+ ...
148
+ ```
149
+
150
+ You can put these dataset files together at `<dataset_path>` to create an independent dataset directory
151
+
152
+
153
+ 2. [train.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/train.py)
154
+ ```
155
+ # python train.py -h
156
+ usage: train.py [-h] [--model_type MODEL_TYPE] [--weights_path WEIGHTS_PATH]
157
+ [--model_input_shape MODEL_INPUT_SHAPE]
158
+ [--output_stride {8,16,32}] [--dataset_path DATASET_PATH]
159
+ [--dataset_file DATASET_FILE]
160
+ [--val_dataset_file VAL_DATASET_FILE] [--val_split VAL_SPLIT]
161
+ [--classes_path CLASSES_PATH] [--batch_size BATCH_SIZE]
162
+ [--optimizer {adam,rmsprop,sgd}] [--loss {crossentropy,focal}]
163
+ [--weighted_type {None,adaptive,balanced}]
164
+ [--learning_rate LEARNING_RATE]
165
+ [--average_type {None,ema,swa,lookahead}]
166
+ [--decay_type {None,cosine,exponential,polynomial,piecewise_constant}]
167
+ [--transfer_epoch TRANSFER_EPOCH] [--freeze_level {0,1,2}]
168
+ [--init_epoch INIT_EPOCH] [--total_epoch TOTAL_EPOCH]
169
+ [--gpu_num GPU_NUM] [--model_pruning] [--eval_online]
170
+ [--eval_epoch_interval EVAL_EPOCH_INTERVAL]
171
+ [--save_eval_checkpoint]
172
+
173
+ optional arguments:
174
+ -h, --help show this help message and exit
175
+ --model_type MODEL_TYPE
176
+ DeepLabv3+ model type:
177
+ mobilenetv2/mobilenetv2_lite/resnet50,
178
+ default=mobilenetv2_lite
179
+ --weights_path WEIGHTS_PATH
180
+ Pretrained model/weights file for fine tune
181
+ --model_input_shape MODEL_INPUT_SHAPE
182
+ model image input shape as <height>x<width>,
183
+ default=512x512
184
+ --output_stride {8,16,32}
185
+ model output stride, default=16
186
+ --dataset_path DATASET_PATH
187
+ dataset path containing images and label png file,
188
+ default=VOC2012/
189
+ --dataset_file DATASET_FILE
190
+ train samples txt file,
191
+ default=VOC2012/ImageSets/Segmentation/trainval.txt
192
+ --val_dataset_file VAL_DATASET_FILE
193
+ val samples txt file, default=None
194
+ --val_split VAL_SPLIT
195
+ validation data persentage in dataset if no val
196
+ dataset provide, default=0.1
197
+ --classes_path CLASSES_PATH
198
+ path to class definitions,
199
+ default=configs/voc_classes.txt
200
+ --batch_size BATCH_SIZE
201
+ batch size for training, default=16
202
+ --optimizer {adam,rmsprop,sgd}
203
+ optimizer for training (adam/rmsprop/sgd), default=sgd
204
+ --loss {crossentropy,focal}
205
+ loss type for training (crossentropy/focal),
206
+ default=crossentropy
207
+ --weighted_type {None,adaptive,balanced}
208
+ class balance weighted type, default=None
209
+ --learning_rate LEARNING_RATE
210
+ Initial learning rate, default=0.01
211
+ --average_type {None,ema,swa,lookahead}
212
+ weights average type, default=None
213
+ --decay_type {None,cosine,exponential,polynomial,piecewise_constant}
214
+ Learning rate decay type, default=None
215
+ --transfer_epoch TRANSFER_EPOCH
216
+ Transfer training stage epochs, default=5
217
+ --freeze_level {0,1,2}
218
+ Freeze level of the model in transfer training stage.
219
+ 0:NA/1:backbone/2:only open prediction layer
220
+ --init_epoch INIT_EPOCH
221
+ initial training epochs for fine tune training,
222
+ default=0
223
+ --total_epoch TOTAL_EPOCH
224
+ total training epochs, default=150
225
+ --gpu_num GPU_NUM Number of GPU to use, default=1
226
+ --model_pruning Use model pruning for optimization, only for TF 1.x
227
+ --eval_online Whether to do evaluation on validation dataset during
228
+ training
229
+ --eval_epoch_interval EVAL_EPOCH_INTERVAL
230
+ Number of iteration(epochs) interval to do evaluation,
231
+ default=10
232
+ --save_eval_checkpoint
233
+ Whether to save checkpoint with best evaluation result
234
+ ```
235
+
236
+ Following is a reference config cmd for training mobilenetv2 lite model on PascalVOC2012 & SBD dataset:
237
+ ```
238
+ # python train.py --model_type=mobilenetv2_lite --output_stride=16 --dataset_path=VOC2012/ --dataset_file=VOC2012/ImageSets/Segmentation/train.txt --val_dataset_file=VOC2012/ImageSets/Segmentation/val.txt --batch_size=16 --freeze_level=1 --transfer_epoch=5 --total_epoch=150 --eval_online --eval_epoch_interval=1 --save_eval_checkpoint --weighted_type=adaptive
239
+ ```
240
+
241
+ Checkpoints during training could be found at `logs/000/`. Choose a best one as result
242
+
243
+ You can also use Tensorboard to monitor the loss trend during train:
244
+ ```
245
+ # tensorboard --logdir=logs/000
246
+ ```
247
+
248
+ MultiGPU usage: use `--gpu_num N` to use N GPUs. It use [tf.distribute.MirroredStrategy](https://www.tensorflow.org/guide/distributed_training#mirroredstrategy) to support MultiGPU environment.
249
+
250
+
251
+ ### Model dump
252
+ We' better to dump out inference model from training checkpoint for eval or demo. Following script cmd work for that.
253
+
254
+ ```
255
+ # python deeplab.py --model_type=mobilenetv2_lite --weights_path=logs/000/<checkpoint>.h5 --classes_path=configs/voc_classes.txt --model_input_shape=512x512 --output_stride=16 --dump_model --output_model_file=model.h5
256
+ ```
257
+
258
+ Change model_type, input shape & output stride to get different inference model. If "--model_pruning" was added in training, you also need to use "--pruning_model" here for dumping out the pruned model.
259
+
260
+ NOTE: One trained model could be dump out for different input shape & output stride (of course with different accuracy performance).
261
+
262
+
263
+ ### Evaluation
264
+ Use [eval.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/eval.py) to do evaluation on the inference model with your test data. It will calculate following metrics:
265
+
266
+ * mIOU
267
+ * FWIOU (Frequency Weighted IOU)
268
+ * PA (Pixel Accuracy)
269
+ * MPA (Mean Pixel Accuracy)
270
+
271
+ It will also draw confusion matrix chart and IOU result for each class under "result" dir, and optionally save all the segmentation result images & predicted PNG labels for checking.
272
+
273
+ ```
274
+ # python eval.py --model_path=model.h5 --dataset_path=VOC2012/ --dataset_file=VOC2012/ImageSets/Segmentation/val.txt --classes_path=configs/voc_classes.txt --model_input_shape=512x512 --save_result
275
+ ```
276
+
277
+ If you enable "--eval_online" option in train.py, evaluation on validation dataset will be executed during training. But that may cost more time for train process.
278
+
279
+
280
+ Following is a sample result trained on MobilenetV2_Lite model with VOC2012+SBD dataset:
281
+ <p align="center">
282
+ <img src="assets/mIOU.png">
283
+ <img src="assets/confusion_matrix.png">
284
+ </p>
285
+
286
+
287
+ Some experiment on VOC2012+SBD dataset and comparison:
288
+
289
+ | Model type | InputSize | Output Stride | TrainSet | TestSet | mIOU | FLOPS | Param | Speed | Ps |
290
+ | ----- | ------ | ------ | ------ | ----- | ----- | ----- | ----- | ----- | ----- |
291
+ | [ResNet50](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.1/deeplabv3p_resnet50_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 73.71% | 73.95G | 26.72M | 38ms | Keras on Titan XP |
292
+ | [MobileNetV3Large](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.1/deeplabv3p_mobilenetv3large_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 72.33% | 9.52G | 3.51M | 29ms | Keras on Titan XP |
293
+ | [PeleeNet Lite](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.2/deeplabv3p_peleenet_lite_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 68.23% | 7.64G | 2.59M | 37.8ms | Keras on Titan XP |
294
+ | [MobileNetV2 Lite](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.0/deeplabv3p_mobilenetv2_lite_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 67.83% | 5.24G | 2.11M | 23ms | Keras on Titan XP |
295
+ | [MobileNetV3Small Lite](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.1/deeplabv3p_mobilenetv3small_lite_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 64.81% | 1.36G | 1.06M | 20ms | Keras on Titan XP |
296
+
297
+ **NOTE**: If you meet any model loading problem with these pretrained weights due to h5 format compatibility issue, try to run "Model dump" with it again to regenerate the inference model.
298
+
299
+
300
+ ### Demo
301
+ 1. [deeplab.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/deeplab.py)
302
+ > * Demo script for trained model
303
+
304
+ image inference mode
305
+ ```
306
+ # python deeplab.py --model_type=mobilenetv2_lite --weights_path=model.h5 --classes_path=configs/voc_classes.txt --model_input_shape=512x512 --output_stride=16 --image
307
+ ```
308
+ video inference mode
309
+ ```
310
+ # python deeplab.py --model_type=mobilenetv2_lite --weights_path=model.h5 --classes_path=configs/voc_classes.txt --model_input_shape=512x512 --output_stride=16 --input=test.mp4
311
+ ```
312
+ For video detection mode, you can use "input=0" to capture live video from web camera and "output=<video name>" to dump out inference result to another video
313
+
314
+ ### Tensorflow model convert
315
+ Using [keras_to_tensorflow.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/tools/model_converter/keras_to_tensorflow.py) to convert the tf.keras .h5 model to tensorflow frozen pb model:
316
+ ```
317
+ # python keras_to_tensorflow.py
318
+ --input_model="path/to/keras/model.h5"
319
+ --output_model="path/to/save/model.pb"
320
+ ```
321
+
322
+ ### ONNX model convert
323
+ Using [keras_to_onnx.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/tools/model_converter/keras_to_onnx.py) to convert the tf.keras .h5 model to ONNX model:
324
+ ```
325
+ ### need to set environment TF_KERAS=1 for tf.keras model
326
+ # export TF_KERAS=1
327
+ # python keras_to_onnx.py
328
+ --keras_model_file="path/to/keras/model.h5"
329
+ --output_file="path/to/save/model.onnx"
330
+ --op_set=11
331
+ ```
332
+
333
+ You can also use [eval.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/eval.py) to do evaluation on the pb & onnx inference model
334
+
335
+ ### Inference model deployment
336
+ See [on-device inference](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/inference) for TFLite & MNN model deployment
337
+
338
+
339
+ ### TODO
340
+ - [ ] support model pruning
341
+ - [ ] support SubPixel predict layer
342
+ - [ ] support Quantization aware training
343
+
344
+
345
+ ## Some issues to know
346
+ 1. The test environment is
347
+ - Ubuntu 16.04/18.04
348
+ - Python 3.6.8
349
+ - tensorflow 2.0.0/tensorflow 1.15.0
350
+ - tf.keras 2.2.4-tf
351
+
352
+ 2. Imagenet pretrained weights for backbone is automatically loaded (if have) when training, so recommended to freeze backbone layers for several epochs in transfer traning stage.
353
+
354
+ 3. Training strategy is for reference only. Adjust it according to your dataset and your target. And add further strategy if needed.
355
+
356
+
357
+ ## Contribution guidelines
358
+ New features, improvements and any other kind of contributions are warmly welcome via pull request :)
359
+
360
+
361
+ # Citation
362
+ Please cite tf-keras-deeplabv3p-model-set in your publications if it helps your research:
363
+ ```
364
+ @article{Keras-segmentation-deeplab-v3.1,
365
+ Author = {Jenia Golbstein},
366
+ Year = {2019}
367
+ }
368
+ @article{pytorch-deeplab-xception,
369
+ Author = {jfzhang95},
370
+ Year = {2019}
371
+ }
372
+
373
+ @article{Focal Loss,
374
+ title={Focal Loss for Dense Object Detection},
375
+ author={Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He, Piotr Dollár},
376
+ journal = {arXiv},
377
+ year={2017}
378
+ }
379
+
380
+ ```
models/deeplab/assets/2007_000346_inference.png ADDED
models/deeplab/assets/confusion_matrix.png ADDED
models/deeplab/assets/dog_inference.png ADDED
models/deeplab/assets/mIOU.png ADDED
models/deeplab/common/callbacks.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding=utf-8 -*-
3
+ """custom model callbacks."""
4
+ import os, sys, random, tempfile
5
+ import numpy as np
6
+ from tensorflow_model_optimization.sparsity import keras as sparsity
7
+ from tensorflow.keras.callbacks import Callback
8
+
9
+ from eval import eval_mIOU
10
+
11
+
12
+ class EvalCallBack(Callback):
13
+ def __init__(self, dataset_path, dataset, class_names, model_input_shape, model_pruning, log_dir, eval_epoch_interval=10, save_eval_checkpoint=False):
14
+ self.dataset_path = dataset_path
15
+ self.dataset = dataset
16
+ self.class_names = class_names
17
+ self.model_input_shape = model_input_shape
18
+ self.model_pruning = model_pruning
19
+ self.log_dir = log_dir
20
+ self.eval_epoch_interval = eval_epoch_interval
21
+ self.save_eval_checkpoint = save_eval_checkpoint
22
+ self.best_mIOU = 0.0
23
+
24
+ def on_epoch_end(self, epoch, logs=None):
25
+ if (epoch+1) % self.eval_epoch_interval == 0:
26
+ # Do eval every eval_epoch_interval epochs
27
+ mIOU = eval_mIOU(self.model, 'H5', self.dataset_path, self.dataset, self.class_names, self.model_input_shape, do_crf=False, save_result=False, show_background=True)
28
+
29
+ if self.save_eval_checkpoint and mIOU > self.best_mIOU:
30
+ # Save best mIOU value and model checkpoint
31
+ self.best_mIOU = mIOU
32
+ self.model.save(os.path.join(self.log_dir, 'ep{epoch:03d}-loss{loss:.3f}-Jaccard{Jaccard:.3f}-val_loss{val_loss:.3f}-val_Jaccard{val_Jaccard:.3f}-mIOU{mIOU:.3f}.h5'.format(epoch=(epoch+1), loss=logs.get('loss'), Jaccard=logs.get('Jaccard'), val_loss=logs.get('val_loss'), val_Jaccard=logs.get('val_Jaccard'), mIOU=mIOU)))
models/deeplab/common/data_utils.py ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding=utf-8 -*-
3
+ """Data process utility functions."""
4
+ import numpy as np
5
+ import random
6
+ import math
7
+ import cv2
8
+ from PIL import Image, ImageEnhance
9
+
10
+ def rand(a=0, b=1):
11
+ return np.random.rand()*(b-a) + a
12
+
13
+
14
+ def random_horizontal_flip(image, label, prob=.5):
15
+ """
16
+ Random horizontal flip for image & label
17
+
18
+ # Arguments
19
+ image: origin image for horizontal flip
20
+ numpy array containing image data
21
+ label: origin label for horizontal flip
22
+ numpy array containing segment label mask
23
+ prob: probability for random flip,
24
+ scalar to control the flip probability.
25
+
26
+ # Returns
27
+ image: adjusted numpy array image.
28
+ label: adjusted numpy array label mask
29
+ """
30
+ flip = rand() < prob
31
+ if flip:
32
+ image = cv2.flip(image, 1)
33
+ label = cv2.flip(label, 1)
34
+
35
+ return image, label
36
+
37
+
38
+ def random_vertical_flip(image, label, prob=.5):
39
+ """
40
+ Random vertical flip for image & label
41
+
42
+ # Arguments
43
+ image: origin image for vertical flip
44
+ numpy array containing image data
45
+ label: origin label for vertical flip
46
+ numpy array containing segment label mask
47
+ prob: probability for random flip,
48
+ scalar to control the flip probability.
49
+
50
+ # Returns
51
+ image: adjusted numpy array image.
52
+ label: adjusted numpy array label mask
53
+ """
54
+ flip = rand() < prob
55
+ if flip:
56
+ image = cv2.flip(image, 0)
57
+ label = cv2.flip(label, 0)
58
+
59
+ return image, label
60
+
61
+
62
+ #def random_brightness(image, jitter=.3):
63
+ #"""
64
+ #Random adjust brightness for image
65
+
66
+ ## Arguments
67
+ #image: origin image for brightness change
68
+ #numpy array containing image data
69
+ #jitter: jitter range for random brightness,
70
+ #scalar to control the random brightness level.
71
+
72
+ ## Returns
73
+ #new_image: adjusted numpy array image.
74
+ #"""
75
+ #factor = 1.0 + random.gauss(mu=0.0, sigma=jitter)
76
+ #if random.randint(0,1) and abs(factor) > 0.1:
77
+ #factor = 1.0/factor
78
+ #table = np.array([((i / 255.0) ** factor) * 255 for i in np.arange(0, 256)]).astype(np.uint8)
79
+ #new_image = cv2.LUT(image, table)
80
+
81
+ #return new_image
82
+
83
+ def random_brightness(image, jitter=.5):
84
+ """
85
+ Random adjust brightness for image
86
+
87
+ # Arguments
88
+ image: origin image for brightness change
89
+ numpy array containing image data
90
+ jitter: jitter range for random brightness,
91
+ scalar to control the random brightness level.
92
+
93
+ # Returns
94
+ image: adjusted numpy array image.
95
+ """
96
+ img = Image.fromarray(image)
97
+ enh_bri = ImageEnhance.Brightness(img)
98
+ brightness = rand(jitter, 1/jitter)
99
+ new_img = enh_bri.enhance(brightness)
100
+ image = np.asarray(new_img)
101
+
102
+ return image
103
+
104
+
105
+ def random_blur(image, prob=.5, size=5):
106
+ """
107
+ Random add gaussian blur to image
108
+
109
+ # Arguments
110
+ image: origin image for blur
111
+ numpy array containing image data
112
+ prob: probability for blur,
113
+ scalar to control the blur probability.
114
+ size: kernel size for gaussian blur,
115
+ scalar to control the filter size.
116
+
117
+ # Returns
118
+ image: adjusted numpy array image.
119
+ """
120
+ blur = rand() < prob
121
+ if blur:
122
+ image = cv2.GaussianBlur(image, (size, size), 0)
123
+
124
+ return image
125
+
126
+
127
+ def random_histeq(image, size=8, prob=.2):
128
+ """
129
+ Random apply "Contrast Limited Adaptive Histogram Equalization"
130
+ to image
131
+
132
+ # Arguments
133
+ image: origin image for histeq
134
+ numpy array containing image data
135
+ size: grid size for CLAHE,
136
+ scalar to control the grid size.
137
+ prob: probability for histeq,
138
+ scalar to control the histeq probability.
139
+
140
+ # Returns
141
+ image: adjusted numpy array image.
142
+ """
143
+ histeq = rand() < prob
144
+ if histeq:
145
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(size, size))
146
+ img_yuv = cv2.cvtColor(image, cv2.COLOR_BGR2YUV)
147
+ img_yuv[:,:,0] = clahe.apply(img_yuv[:,:,0])
148
+ image = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR) # to BGR
149
+ return image
150
+
151
+
152
+ def random_grayscale(image, prob=.2):
153
+ """
154
+ Random convert image to grayscale
155
+
156
+ # Arguments
157
+ image: origin image for grayscale convert
158
+ numpy array containing image data
159
+ prob: probability for grayscale convert,
160
+ scalar to control the convert probability.
161
+
162
+ # Returns
163
+ image: adjusted numpy array image.
164
+ """
165
+ convert = rand() < prob
166
+ if convert:
167
+ #convert to grayscale first, and then
168
+ #back to 3 channels fake BGR
169
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
170
+ image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
171
+
172
+ return image
173
+
174
+
175
+ def random_chroma(image, jitter=.5):
176
+ """
177
+ Random adjust chroma (color level) for image
178
+
179
+ # Arguments
180
+ image: origin image for chroma change
181
+ numpy array containing image data
182
+ jitter: jitter range for random chroma,
183
+ scalar to control the random color level.
184
+
185
+ # Returns
186
+ image: adjusted numpy array image.
187
+ """
188
+ img = Image.fromarray(image)
189
+ enh_col = ImageEnhance.Color(img)
190
+ color = rand(jitter, 1/jitter)
191
+ new_img = enh_col.enhance(color)
192
+ image = np.asarray(new_img)
193
+
194
+ return image
195
+
196
+
197
+ def random_contrast(image, jitter=.5):
198
+ """
199
+ Random adjust contrast for image
200
+
201
+ # Arguments
202
+ image: origin image for contrast change
203
+ numpy array containing image data
204
+ jitter: jitter range for random contrast,
205
+ scalar to control the random contrast level.
206
+
207
+ # Returns
208
+ image: adjusted numpy array image.
209
+ """
210
+ img = Image.fromarray(image)
211
+ enh_con = ImageEnhance.Contrast(img)
212
+ contrast = rand(jitter, 1/jitter)
213
+ new_img = enh_con.enhance(contrast)
214
+ image = np.asarray(new_img)
215
+
216
+ return image
217
+
218
+
219
+ def random_sharpness(image, jitter=.5):
220
+ """
221
+ Random adjust sharpness for image
222
+
223
+ # Arguments
224
+ image: origin image for sharpness change
225
+ numpy array containing image data
226
+ jitter: jitter range for random sharpness,
227
+ scalar to control the random sharpness level.
228
+
229
+ # Returns
230
+ image: adjusted numpy array image.
231
+ """
232
+ img = Image.fromarray(image)
233
+ enh_sha = ImageEnhance.Sharpness(img)
234
+ sharpness = rand(jitter, 1/jitter)
235
+ new_img = enh_sha.enhance(sharpness)
236
+ image = np.asarray(new_img)
237
+
238
+ return image
239
+
240
+
241
+ def random_zoom_rotate(image, label, rotate_range=30, zoom_range=0.2, prob=0.3):
242
+ """
243
+ Random do zoom & rotate for image & label
244
+
245
+ # Arguments
246
+ image: origin image for zoom & rotate
247
+ numpy array containing image data
248
+ label: origin label for zoom & rotate
249
+ numpy array containing segment label mask
250
+ prob: probability for random flip,
251
+ scalar to control the flip probability.
252
+
253
+ # Returns
254
+ image: adjusted numpy array image.
255
+ label: adjusted numpy array label mask
256
+ """
257
+ if rotate_range:
258
+ angle = random.gauss(mu=0.0, sigma=rotate_range)
259
+ else:
260
+ angle = 0.0
261
+
262
+ if zoom_range:
263
+ scale = random.gauss(mu=1.0, sigma=zoom_range)
264
+ else:
265
+ scale = 1.0
266
+
267
+ warpAffine = rand() < prob
268
+ if warpAffine and (rotate_range or zoom_range):
269
+ M = cv2.getRotationMatrix2D((image.shape[1]//2, image.shape[0]//2), angle, scale)
270
+ image = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
271
+ label = cv2.warpAffine(label, M, (label.shape[1], label.shape[0]), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
272
+
273
+ return image, label
274
+
275
+
276
+ class Grid(object):
277
+ def __init__(self, d1, d2, rotate=360, ratio=0.5, mode=1, prob=1.):
278
+ self.d1 = d1
279
+ self.d2 = d2
280
+ self.rotate = rotate
281
+ self.ratio = ratio
282
+ self.mode=mode
283
+ self.st_prob = self.prob = prob
284
+
285
+ def set_prob(self, epoch, max_epoch):
286
+ self.prob = self.st_prob * min(1, epoch / max_epoch)
287
+
288
+ def __call__(self, img, label):
289
+ h = img.shape[0]
290
+ w = img.shape[1]
291
+
292
+ if np.random.rand() > self.prob:
293
+ return img, label
294
+
295
+ # 1.5 * h, 1.5 * w works fine with the squared images
296
+ # But with rectangular input, the mask might not be able to recover back to the input image shape
297
+ # A square mask with edge length equal to the diagnoal of the input image
298
+ # will be able to cover all the image spot after the rotation. This is also the minimum square.
299
+ hh = math.ceil((math.sqrt(h*h + w*w)))
300
+
301
+ d = np.random.randint(self.d1, self.d2)
302
+ #d = self.d
303
+
304
+ # maybe use ceil? but i guess no big difference
305
+ self.l = math.ceil(d*self.ratio)
306
+
307
+ mask = np.ones((hh, hh), np.float32)
308
+ st_h = np.random.randint(d)
309
+ st_w = np.random.randint(d)
310
+ for i in range(-1, hh//d+1):
311
+ s = d*i + st_h
312
+ t = s+self.l
313
+ s = max(min(s, hh), 0)
314
+ t = max(min(t, hh), 0)
315
+ mask[s:t,:] *= 0
316
+ for i in range(-1, hh//d+1):
317
+ s = d*i + st_w
318
+ t = s+self.l
319
+ s = max(min(s, hh), 0)
320
+ t = max(min(t, hh), 0)
321
+ mask[:,s:t] *= 0
322
+ r = np.random.randint(self.rotate)
323
+ mask = Image.fromarray(np.uint8(mask))
324
+ mask = mask.rotate(r)
325
+ mask = np.asarray(mask)
326
+ mask = mask[(hh-h)//2:(hh-h)//2+h, (hh-w)//2:(hh-w)//2+w]
327
+
328
+ if self.mode == 1:
329
+ mask = 1-mask
330
+
331
+ #mask = mask.expand_as(img)
332
+ img = img * np.expand_dims(mask, -1)
333
+ label = label * mask
334
+
335
+ return img, label
336
+
337
+
338
+ def random_gridmask(image, label, prob=0.2):
339
+ """
340
+ Random do GridMask augment for image & label
341
+
342
+ reference:
343
+ https://arxiv.org/abs/2001.04086
344
+ https://github.com/Jia-Research-Lab/GridMask/blob/master/imagenet_grid/utils/grid.py
345
+
346
+ # Arguments
347
+ image: origin image for GridMask
348
+ numpy array containing image data
349
+ label: origin label for zoom & rotate
350
+ numpy array containing segment label mask
351
+ prob: probability for GridMask,
352
+ scalar to control the GridMask probability.
353
+
354
+ # Returns
355
+ image: adjusted numpy array image.
356
+ label: adjusted numpy array label mask
357
+ """
358
+ grid = Grid(d1=image.shape[1]//7, d2=image.shape[1]//3, rotate=360, ratio=0.5, prob=prob)
359
+ image, label = grid(image, label)
360
+
361
+ return image, label
362
+
363
+
364
+ def random_crop(image, label, crop_shape, prob=.1):
365
+ """
366
+ Random crop a specific size area from image
367
+ and label
368
+
369
+ # Arguments
370
+ image: origin image for vertical flip
371
+ numpy array containing image data
372
+ label: origin label for vertical flip
373
+ numpy array containing segment label mask
374
+ crop_shape: target crop shape,
375
+ list or tuple in (width, height).
376
+ prob: probability for crop,
377
+ scalar to control the crop probability.
378
+
379
+ # Returns
380
+ image: croped numpy array image.
381
+ label: croped numpy array label mask
382
+ """
383
+ # check if the image and label are same shape
384
+ if (image.shape[0] != label.shape[0]) or (image.shape[1] != label.shape[1]):
385
+ raise Exception('Image and label must have the same dimensions!')
386
+
387
+ crop = rand() < prob
388
+ if crop:
389
+ if (crop_shape[0] < image.shape[1]) and (crop_shape[1] < image.shape[0]):
390
+ x = random.randrange(image.shape[1]-crop_shape[0])
391
+ y = random.randrange(image.shape[0]-crop_shape[1])
392
+
393
+ image = image[y:y+crop_shape[1], x:x+crop_shape[0], :]
394
+ label = label[y:y+crop_shape[1], x:x+crop_shape[0]]
395
+ else:
396
+ image = cv2.resize(image, crop_shape)
397
+ label = cv2.resize(label, crop_shape, interpolation = cv2.INTER_NEAREST)
398
+
399
+ return image, label
400
+
401
+
402
+
403
+ def normalize_image(image):
404
+ """
405
+ normalize image array from 0 ~ 255
406
+ to -1.0 ~ 1.0
407
+
408
+ # Arguments
409
+ image: origin input image
410
+ numpy image array with dtype=float, 0.0 ~ 255.0
411
+
412
+ # Returns
413
+ image: numpy image array with dtype=float, -1.0 ~ 1.0
414
+ """
415
+ image = image.astype(np.float32) / 127.5 - 1
416
+
417
+ return image
418
+
419
+
420
+ def denormalize_image(image):
421
+ """
422
+ Denormalize image array from -1.0 ~ 1.0
423
+ to 0 ~ 255
424
+
425
+ # Arguments
426
+ image: normalized image array with dtype=float, -1.0 ~ 1.0
427
+
428
+ # Returns
429
+ image: numpy image array with dtype=uint8, 0 ~ 255
430
+ """
431
+ image = (image * 127.5 + 127.5).astype(np.uint8)
432
+
433
+ return image
434
+
435
+
436
+ def preprocess_image(image, model_image_size):
437
+ """
438
+ Prepare model input image data with
439
+ resize, normalize and dim expansion
440
+
441
+ # Arguments
442
+ image: origin input image
443
+ PIL Image object containing image data
444
+ model_image_size: model input image size
445
+ tuple of format (height, width).
446
+
447
+ # Returns
448
+ image_data: numpy array of image data for model input.
449
+ """
450
+ resized_image = image.resize(model_image_size, Image.BICUBIC)
451
+ image_data = np.asarray(resized_image).astype('float32')
452
+ #image_data = normalize_image(image_data)
453
+ image_data = np.expand_dims(image_data, 0)
454
+ return image_data
455
+
456
+
457
+ def mask_resize(mask, target_size):
458
+ """
459
+ Resize predict segmentation mask array to target size
460
+ with bilinear interpolation
461
+
462
+ # Arguments
463
+ mask: predict mask array to be resize
464
+ uint8 numpy array with shape (height, width, 1)
465
+ target_size: target image size,
466
+ tuple of format (width, height).
467
+
468
+ # Returns
469
+ resize_mask: resized mask array.
470
+
471
+ """
472
+ dst_w, dst_h = target_size # dest width & height
473
+ src_h, src_w = mask.shape[:2] # src width & height
474
+
475
+ if src_h == dst_h and src_w == dst_w:
476
+ return mask.copy()
477
+
478
+ scale_x = float(src_w) / dst_w # resize scale for width
479
+ scale_y = float(src_h) / dst_h # resize scale for height
480
+
481
+ # create & go through the target image array
482
+ resize_mask = np.zeros((dst_h, dst_w), dtype=np.uint8)
483
+ for dst_y in range(dst_h):
484
+ for dst_x in range(dst_w):
485
+ # mapping dest point back to src point
486
+ src_x = (dst_x + 0.5) * scale_x - 0.5
487
+ src_y = (dst_y + 0.5) * scale_y - 0.5
488
+ # calculate round point in src image
489
+ src_x_0 = int(np.floor(src_x))
490
+ src_y_0 = int(np.floor(src_y))
491
+ src_x_1 = min(src_x_0 + 1, src_w - 1)
492
+ src_y_1 = min(src_y_0 + 1, src_h - 1)
493
+
494
+ # Bilinear interpolation
495
+ value0 = (src_x_1 - src_x) * mask[src_y_0, src_x_0] + (src_x - src_x_0) * mask[src_y_0, src_x_1]
496
+ value1 = (src_x_1 - src_x) * mask[src_y_1, src_x_0] + (src_x - src_x_0) * mask[src_y_1, src_x_1]
497
+ resize_mask[dst_y, dst_x] = int((src_y_1 - src_y) * value0 + (src_y - src_y_0) * value1)
498
+
499
+ return resize_mask
500
+
501
+
502
+ def mask_resize_fast(mask, target_size):
503
+ """
504
+ Use cv2 to do a quick resize on predict
505
+ segmentation mask array to target size
506
+
507
+ # Arguments
508
+ mask: predict mask array to be resize
509
+ uint8 numpy array with shape (height, width, 1)
510
+ target_size: target image size,
511
+ tuple of format (width, height).
512
+
513
+ # Returns
514
+ resize_mask: resized mask array.
515
+
516
+ """
517
+ mask = cv2.merge([mask, mask, mask]).astype('uint8')
518
+ #resize_mask = cv2.resize(mask, target_size, cv2.INTER_AREA)
519
+ resize_mask = cv2.resize(mask, target_size, cv2.INTER_NEAREST)
520
+ (resize_mask, _, _) = cv2.split(np.array(resize_mask))
521
+
522
+ return resize_mask
523
+
models/deeplab/common/model_utils.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding=utf-8 -*-
3
+ """Model utility functions."""
4
+ from tensorflow.keras.optimizers import Adam, RMSprop, SGD
5
+ from tensorflow.keras.optimizers.schedules import ExponentialDecay, PolynomialDecay, PiecewiseConstantDecay
6
+ from tensorflow.keras.experimental import CosineDecay
7
+ from tensorflow_model_optimization.sparsity import keras as sparsity
8
+
9
+
10
+ def get_pruning_model(model, begin_step, end_step):
11
+ import tensorflow as tf
12
+ if tf.__version__.startswith('2'):
13
+ # model pruning API is not supported in TF 2.0 yet
14
+ raise Exception('model pruning is not fully supported in TF 2.x, Please switch env to TF 1.x for this feature')
15
+
16
+ pruning_params = {
17
+ 'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.0,
18
+ final_sparsity=0.7,
19
+ begin_step=begin_step,
20
+ end_step=end_step,
21
+ frequency=100)
22
+ }
23
+
24
+ pruning_model = sparsity.prune_low_magnitude(model, **pruning_params)
25
+ return pruning_model
26
+
27
+
28
+ # some global value for lr scheduler
29
+ # need to update to CLI option in main()
30
+ #lr_base = 1e-3
31
+ #total_epochs = 250
32
+
33
+ #def learning_rate_scheduler(epoch, curr_lr, mode='cosine_decay'):
34
+ #lr_power = 0.9
35
+ #lr = curr_lr
36
+
37
+ ## adam default lr
38
+ #if mode is 'adam':
39
+ #lr = 0.001
40
+
41
+ ## original lr scheduler
42
+ #if mode is 'power_decay':
43
+ #lr = lr_base * ((1 - float(epoch) / total_epochs) ** lr_power)
44
+
45
+ ## exponential decay policy
46
+ #if mode is 'exp_decay':
47
+ #lr = (float(lr_base) ** float(lr_power)) ** float(epoch + 1)
48
+
49
+ ## cosine decay policy, including warmup and hold stage
50
+ #if mode is 'cosine_decay':
51
+ ##warmup & hold hyperparams, adjust for your training
52
+ #warmup_epochs = 0
53
+ #hold_base_rate_epochs = 0
54
+ #warmup_lr = lr_base * 0.01
55
+ #lr = 0.5 * lr_base * (1 + np.cos(
56
+ #np.pi * float(epoch - warmup_epochs - hold_base_rate_epochs) /
57
+ #float(total_epochs - warmup_epochs - hold_base_rate_epochs)))
58
+
59
+ #if hold_base_rate_epochs > 0 and epoch < warmup_epochs + hold_base_rate_epochs:
60
+ #lr = lr_base
61
+
62
+ #if warmup_epochs > 0 and epoch < warmup_epochs:
63
+ #if lr_base < warmup_lr:
64
+ #raise ValueError('learning_rate_base must be larger or equal to '
65
+ #'warmup_learning_rate.')
66
+ #slope = (lr_base - warmup_lr) / float(warmup_epochs)
67
+ #warmup_rate = slope * float(epoch) + warmup_lr
68
+ #lr = warmup_rate
69
+
70
+ #if mode is 'progressive_drops':
71
+ ## drops as progression proceeds, good for sgd
72
+ #if epoch > 0.9 * total_epochs:
73
+ #lr = 0.0001
74
+ #elif epoch > 0.75 * total_epochs:
75
+ #lr = 0.001
76
+ #elif epoch > 0.5 * total_epochs:
77
+ #lr = 0.01
78
+ #else:
79
+ #lr = 0.1
80
+
81
+ #print('learning_rate change to: {}'.format(lr))
82
+ #return lr
83
+
84
+
85
+ def get_lr_scheduler(learning_rate, decay_type, decay_steps):
86
+ if decay_type:
87
+ decay_type = decay_type.lower()
88
+
89
+ if decay_type == None:
90
+ lr_scheduler = learning_rate
91
+ elif decay_type == 'cosine':
92
+ lr_scheduler = CosineDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, alpha=0.2) # use 0.2*learning_rate as final minimum learning rate
93
+ elif decay_type == 'exponential':
94
+ lr_scheduler = ExponentialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, decay_rate=0.9)
95
+ elif decay_type == 'polynomial':
96
+ lr_scheduler = PolynomialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, end_learning_rate=learning_rate/100)
97
+ elif decay_type == 'piecewise_constant':
98
+ #apply a piecewise constant lr scheduler, including warmup stage
99
+ boundaries = [500, int(decay_steps*0.9), decay_steps]
100
+ values = [0.001, learning_rate, learning_rate/10., learning_rate/100.]
101
+ lr_scheduler = PiecewiseConstantDecay(boundaries=boundaries, values=values)
102
+ else:
103
+ raise ValueError('Unsupported lr decay type')
104
+
105
+ return lr_scheduler
106
+
107
+
108
+ def get_optimizer(optim_type, learning_rate, average_type=None, decay_type='cosine', decay_steps=100000):
109
+ optim_type = optim_type.lower()
110
+
111
+ lr_scheduler = get_lr_scheduler(learning_rate, decay_type, decay_steps)
112
+
113
+ if optim_type == 'adam':
114
+ optimizer = Adam(learning_rate=lr_scheduler, epsilon=1e-7, amsgrad=False)
115
+ elif optim_type == 'rmsprop':
116
+ optimizer = RMSprop(learning_rate=lr_scheduler, rho=0.9, momentum=0.0, centered=False)
117
+ elif optim_type == 'sgd':
118
+ optimizer = SGD(learning_rate=lr_scheduler, momentum=0.9, nesterov=False)
119
+ else:
120
+ raise ValueError('Unsupported optimizer type')
121
+
122
+ if average_type:
123
+ optimizer = get_averaged_optimizer(average_type, optimizer)
124
+
125
+ return optimizer
126
+
127
+
128
+ def get_averaged_optimizer(average_type, optimizer):
129
+ """
130
+ Apply weights average mechanism in optimizer. Need tensorflow-addons
131
+ which request TF 2.x and have following compatibility table:
132
+ -------------------------------------------------------------
133
+ | Tensorflow Addons | Tensorflow | Python |
134
+ -------------------------------------------------------------
135
+ | tfa-nightly | 2.3, 2.4 | 3.6, 3.7, 3.8 |
136
+ -------------------------------------------------------------
137
+ | tensorflow-addons-0.12.0 | 2.3, 2.4 | 3.6, 3.7, 3.8 |
138
+ -------------------------------------------------------------
139
+ | tensorflow-addons-0.11.2 | 2.2, 2.3 | 3.5, 3.6, 3.7, 3.8 |
140
+ -------------------------------------------------------------
141
+ | tensorflow-addons-0.10.0 | 2.2 | 3.5, 3.6, 3.7, 3.8 |
142
+ -------------------------------------------------------------
143
+ | tensorflow-addons-0.9.1 | 2.1, 2.2 | 3.5, 3.6, 3.7 |
144
+ -------------------------------------------------------------
145
+ | tensorflow-addons-0.8.3 | 2.1 | 3.5, 3.6, 3.7 |
146
+ -------------------------------------------------------------
147
+ | tensorflow-addons-0.7.1 | 2.1 | 2.7, 3.5, 3.6, 3.7 |
148
+ -------------------------------------------------------------
149
+ | tensorflow-addons-0.6.0 | 2.0 | 2.7, 3.5, 3.6, 3.7 |
150
+ -------------------------------------------------------------
151
+ """
152
+ import tensorflow_addons as tfa
153
+
154
+ average_type = average_type.lower()
155
+
156
+ if average_type == None:
157
+ averaged_optimizer = optimizer
158
+ elif average_type == 'ema':
159
+ averaged_optimizer = tfa.optimizers.MovingAverage(optimizer, average_decay=0.99)
160
+ elif average_type == 'swa':
161
+ averaged_optimizer = tfa.optimizers.SWA(optimizer, start_averaging=0, average_period=10)
162
+ elif average_type == 'lookahead':
163
+ averaged_optimizer = tfa.optimizers.Lookahead(optimizer, sync_period=6, slow_step_size=0.5)
164
+ else:
165
+ raise ValueError('Unsupported average type')
166
+
167
+ return averaged_optimizer
168
+
models/deeplab/common/utils.py ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding=utf-8 -*-
3
+ """Miscellaneous utility functions."""
4
+
5
+ import os
6
+ import numpy as np
7
+ import copy
8
+ from tqdm import tqdm
9
+ from PIL import Image
10
+ import matplotlib.pyplot as plt
11
+ from matplotlib import gridspec
12
+
13
+ from deeplabv3p.models.layers import normalize, img_resize
14
+ from deeplabv3p.models.deeplabv3p_mobilenetv3 import hard_sigmoid, hard_swish
15
+ import tensorflow as tf
16
+
17
+
18
+ def optimize_tf_gpu(tf, K):
19
+ if tf.__version__.startswith('2'):
20
+ gpus = tf.config.experimental.list_physical_devices('GPU')
21
+ if gpus:
22
+ try:
23
+ # Currently, memory growth needs to be the same across GPUs
24
+ for gpu in gpus:
25
+ tf.config.experimental.set_virtual_device_configuration(gpu, [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10000)])
26
+ #tf.config.experimental.set_memory_growth(gpu, True)
27
+ except RuntimeError as e:
28
+ # Memory growth must be set before GPUs have been initialized
29
+ print(e)
30
+ else:
31
+ config = tf.ConfigProto()
32
+ config.gpu_options.allow_growth=True #dynamic alloc GPU resource
33
+ config.gpu_options.per_process_gpu_memory_fraction = 0.9 #GPU memory threshold 0.3
34
+ session = tf.Session(config=config)
35
+
36
+ # set session
37
+ K.set_session(session)
38
+
39
+
40
+ def get_custom_objects():
41
+ '''
42
+ form up a custom_objects dict so that the customized
43
+ layer/function call could be correctly parsed when keras
44
+ .h5 model is loading or converting
45
+ '''
46
+ custom_objects_dict = {
47
+ 'tf': tf,
48
+ 'normalize': normalize,
49
+ 'img_resize': img_resize,
50
+ 'hard_sigmoid': hard_sigmoid,
51
+ 'hard_swish': hard_swish,
52
+ }
53
+ return custom_objects_dict
54
+
55
+ """
56
+ def calculate_weigths_labels(dataset_generator, num_classes, save_path=None):
57
+ '''
58
+ calculate a static segment classes (including background) weights
59
+ coefficient based on class pixel
60
+ '''
61
+ # Initialize class count list array
62
+ class_counts = np.zeros((num_classes,))
63
+
64
+ # collecting class pixel count
65
+ pbar = tqdm(total=len(dataset_generator), desc='Calculating classes weights')
66
+ for n, (_, y) in enumerate(dataset_generator):
67
+ mask = (y >= 0) & (y < num_classes)
68
+ labels = y[mask].astype(np.uint8)
69
+ count_l = np.bincount(labels, minlength=num_classes)
70
+ class_counts += count_l
71
+ pbar.update(1)
72
+ pbar.close()
73
+ # sum() to get total valid pixel count
74
+ total_count = np.sum(class_counts)
75
+ # get class weights with 1/(log(1.02+(class_count/total_count)))
76
+ class_weights = []
77
+ for class_count in class_counts:
78
+ class_weight = 1 / (np.log(1.02 + (class_count / total_count)))
79
+ class_weights.append(class_weight)
80
+
81
+ class_weights = np.array(class_weights)
82
+ # save class weights array to file for reloading next time
83
+ if save_path:
84
+ classes_weights_path = os.path.join(save_path, 'classes_weights.npy')
85
+ np.save(classes_weights_path, class_weights)
86
+
87
+ return class_weights
88
+ """
89
+
90
+
91
+ def calculate_weigths_labels(dataset_generator, num_classes, save_path=None):
92
+ '''
93
+ calculate a static segment classes (including background) weights
94
+ coefficient based on class pixel
95
+ '''
96
+ # Initialize class count list array
97
+ class_counts = np.zeros((num_classes,))
98
+
99
+ # collecting class pixel count
100
+ pbar = tqdm(total=len(dataset_generator), desc='Calculating classes weights')
101
+ for n, (_, y) in enumerate(dataset_generator):
102
+ mask = (y >= 0) & (y < num_classes)
103
+ labels = y[mask].astype(np.uint8)
104
+ count_l = np.bincount(labels, minlength=num_classes)
105
+ class_counts += count_l
106
+ pbar.update(1)
107
+ pbar.close()
108
+ # sum() to get total valid pixel count
109
+ total_count = np.sum(class_counts)
110
+
111
+ #
112
+ # use following formula to calculate balanced class weights:
113
+ # class_weights = sample_count / (num_classes * np.bincount(labels))
114
+ #
115
+ # which is same as
116
+ # class_weight.compute_class_weight('balanced', class_list, y)
117
+ #
118
+ class_weights = total_count / (num_classes * class_counts)
119
+ class_weights = np.array(class_weights)
120
+ # save class weights array to file for reloading next time
121
+ if save_path:
122
+ classes_weights_path = os.path.join(save_path, 'classes_weights.txt')
123
+ save_class_weights(classes_weights_path, class_weights)
124
+
125
+ return class_weights
126
+
127
+
128
+ def save_class_weights(save_path, class_weights):
129
+ '''
130
+ save class weights array with shape (num_classes,)
131
+ '''
132
+ weights_file = open(save_path, 'w')
133
+ for class_weight in list(class_weights):
134
+ weights_file.write(str(class_weight))
135
+ weights_file.write('\n')
136
+ weights_file.close()
137
+
138
+
139
+ def load_class_weights(classes_weights_path):
140
+ '''
141
+ load saved class weights txt file and convert
142
+ to numpy array with shape (num_classes,)
143
+ '''
144
+ with open(classes_weights_path) as f:
145
+ classes_weights = f.readlines()
146
+ classes_weights = [float(c.strip()) for c in classes_weights]
147
+
148
+ return np.array(classes_weights)
149
+
150
+
151
+ def get_classes(classes_path):
152
+ '''loads the classes'''
153
+ with open(classes_path) as f:
154
+ class_names = f.readlines()
155
+ class_names = [c.strip() for c in class_names]
156
+ return class_names
157
+
158
+
159
+ def get_data_list(data_list_file, shuffle=True):
160
+ with open(data_list_file) as f:
161
+ lines = f.readlines()
162
+ lines = [line.strip() for line in lines]
163
+
164
+ if shuffle:
165
+ np.random.seed(10101)
166
+ np.random.shuffle(lines)
167
+ np.random.seed(None)
168
+
169
+ return lines
170
+
171
+
172
+ def figure_to_image(figure):
173
+ '''
174
+ Convert a Matplotlib figure to a Pillow image with RGBA channels
175
+
176
+ # Arguments
177
+ figure: matplotlib figure
178
+ usually create with plt.figure()
179
+
180
+ # Returns
181
+ image: numpy array image
182
+ '''
183
+ # draw the renderer
184
+ figure.canvas.draw()
185
+
186
+ # Get the RGBA buffer from the figure
187
+ w, h = figure.canvas.get_width_height()
188
+ buf = np.fromstring(figure.canvas.tostring_argb(), dtype=np.uint8)
189
+ buf.shape = (w, h, 4)
190
+
191
+ # canvas.tostring_argb give pixmap in ARGB mode. Roll the ALPHA channel to have it in RGBA mode
192
+ buf = np.roll(buf, 3, axis=2)
193
+ image = Image.frombytes("RGBA", (w, h), buf.tostring())
194
+ # Convert RGBA to RGB
195
+ image = np.asarray(image)[..., :3]
196
+ return image
197
+
198
+
199
+ def create_pascal_label_colormap():
200
+ """
201
+ create label colormap with PASCAL VOC segmentation dataset definition
202
+
203
+ # Returns
204
+ colormap: Colormap array for visualizing segmentation
205
+ """
206
+ colormap = np.zeros((256, 3), dtype=int)
207
+ index = np.arange(256, dtype=int)
208
+
209
+ for shift in reversed(range(8)):
210
+ for channel in range(3):
211
+ colormap[:, channel] |= ((index >> channel) & 1) << shift
212
+ index >>= 3
213
+
214
+ return colormap
215
+
216
+
217
+ def label_to_color_image(label):
218
+ """
219
+ mapping the segmentation label to color indexing array
220
+
221
+ # Arguments
222
+ label: 2D uint8 numpy array, with segmentation label
223
+
224
+ # Returns
225
+ result: A 2D array with floating type. The element of the array
226
+ is the color indexed by the corresponding element in the input label
227
+ to the PascalVOC color map.
228
+
229
+ Raises:
230
+ ValueError: If label is not of rank 2 or its value is larger than color
231
+ map maximum entry.
232
+ """
233
+ if label.ndim != 2:
234
+ raise ValueError('Expect 2-D input label')
235
+
236
+ colormap = create_pascal_label_colormap()
237
+
238
+ if np.max(label) >= len(colormap):
239
+ raise ValueError('label value too large.')
240
+
241
+ return colormap[label]
242
+
243
+
244
+ def visualize_segmentation(image, mask, gt_mask=None, class_names=None, overlay=0.7, ignore_count_threshold=100, title=None, gt_title=None):
245
+ """
246
+ Visualize segmentation mask on input image, using PascalVOC
247
+ Segmentation color map
248
+
249
+ # Arguments
250
+ image: image array
251
+ numpy array for input image
252
+ mask: predict mask array
253
+ 2D numpy array for predict segmentation mask
254
+ gt_mask: ground truth mask array
255
+ 2D numpy array for gt segmentation mask
256
+ class_names: label class definition
257
+ list of label class names
258
+ ignore_count_threshold: threshold to filter label
259
+ integer scalar to filter the label value with small count
260
+ title: predict segmentation title
261
+ title string for predict segmentation result plot
262
+ gt_title: ground truth segmentation title
263
+ title string for ground truth segmentation plot
264
+
265
+ # Returns
266
+ img: A numpy image with segmentation result
267
+ """
268
+ if (gt_mask is not None) and (class_names is not None):
269
+ grid_spec = gridspec.GridSpec(1, 3, width_ratios=[6, 6, 1])
270
+ figsize = (15, 10)
271
+ elif (gt_mask is not None) and (class_names is None):
272
+ grid_spec = gridspec.GridSpec(1, 2, width_ratios=[6, 6])
273
+ figsize = (15, 10)
274
+ elif (gt_mask is None) and (class_names is not None):
275
+ grid_spec = gridspec.GridSpec(1, 2, width_ratios=[6, 1])
276
+ figsize = (10, 10)
277
+ else:
278
+ grid_spec = [111]
279
+ figsize = (10, 10)
280
+
281
+ figure = plt.figure(figsize=figsize)
282
+
283
+ # convert mask array to color mapped image
284
+ mask_image = label_to_color_image(mask).astype(np.uint8)
285
+ # show segmentation result image
286
+ plt.subplot(grid_spec[0])
287
+ plt.imshow(image)
288
+ plt.imshow(mask_image, alpha=overlay)
289
+ plt.axis('off')
290
+ # add plt title, optional
291
+ if title:
292
+ plt.title(title)
293
+
294
+ if gt_mask is not None:
295
+ # reset invalid label value as 0(background)
296
+ filtered_gt_mask = copy.deepcopy(gt_mask)
297
+ filtered_gt_mask[filtered_gt_mask>len(class_names)-1] = 0
298
+ # convert gt mask array to color mapped image
299
+ gt_mask_image = label_to_color_image(filtered_gt_mask).astype(np.uint8)
300
+ # show gt segmentation image
301
+ plt.subplot(grid_spec[1])
302
+ plt.imshow(image)
303
+ plt.imshow(gt_mask_image, alpha=overlay)
304
+ plt.axis('off')
305
+ # add plt title, optional
306
+ if gt_title:
307
+ plt.title(gt_title)
308
+
309
+ # if class name list is provided, plot a legend graph of
310
+ # classes color map
311
+ if class_names:
312
+ classes_index = np.arange(len(class_names)).reshape(len(class_names), 1)
313
+ classes_color_map = label_to_color_image(classes_index)
314
+
315
+ labels, count= np.unique(mask, return_counts=True)
316
+ # filter some corner pixel labels, may be caused by mask resize
317
+ labels = np.array([labels[i] for i in range(len(labels)) if count[i] > ignore_count_threshold])
318
+
319
+ if gt_mask is not None:
320
+ gt_labels, gt_count= np.unique(filtered_gt_mask, return_counts=True)
321
+ # filter some corner pixel labels, may be caused by mask resize
322
+ gt_labels = np.array([gt_labels[i] for i in range(len(gt_labels)) if gt_count[i] > ignore_count_threshold])
323
+
324
+ # merge labels & gt labels
325
+ labels = list(set(list(labels)+list(gt_labels)))
326
+ labels.sort()
327
+ labels = np.array(labels)
328
+
329
+ ax = plt.subplot(grid_spec[-1])
330
+ plt.imshow(classes_color_map[labels].astype(np.uint8), interpolation='nearest')
331
+
332
+ # adjust subplot display
333
+ ax.yaxis.tick_right()
334
+ plt.yticks(range(len(labels)), np.asarray(class_names)[labels])
335
+ plt.xticks([], [])
336
+ ax.tick_params(width=0.0)
337
+ plt.grid('off')
338
+
339
+ # convert plt to numpy image
340
+ img = figure_to_image(figure)
341
+ plt.close("all")
342
+ return img
343
+
models/deeplab/configs/ade20k_classes.txt ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wall
2
+ building
3
+ sky
4
+ floor
5
+ tree
6
+ ceiling
7
+ road
8
+ bed
9
+ window
10
+ grass
11
+ cabinet
12
+ sidewalk
13
+ person
14
+ ground
15
+ door
16
+ table
17
+ mountain
18
+ plant
19
+ curtain
20
+ chair
21
+ car
22
+ water
23
+ picture
24
+ sofa
25
+ shelf
26
+ house
27
+ sea
28
+ mirror
29
+ carpet
30
+ field
31
+ armchair
32
+ seat
33
+ fence
34
+ desk
35
+ rock
36
+ closet
37
+ lamp
38
+ bathtub
39
+ railing
40
+ cushion
41
+ base
42
+ box
43
+ column
44
+ signboard
45
+ chest of drawers
46
+ counter
47
+ sand
48
+ sink
49
+ skyscraper
50
+ fireplace
51
+ refrigerator
52
+ grandstand
53
+ path
54
+ stairs, steps
55
+ runway
56
+ showcase
57
+ billiard table
58
+ pillow
59
+ screen door
60
+ stairway
61
+ river
62
+ bridge
63
+ bookcase
64
+ blind, screen
65
+ coffee table
66
+ toilet
67
+ flower
68
+ book
69
+ hill
70
+ bench
71
+ countertop
72
+ stove
73
+ palm tree
74
+ kitchen island
75
+ computer
76
+ swivel chair
77
+ boat
78
+ bar
79
+ arcade machine
80
+ hovel
81
+ bus
82
+ towel
83
+ light
84
+ truck
85
+ tower
86
+ chandelier
87
+ sunshade
88
+ streetlight
89
+ booth
90
+ television
91
+ aeroplane
92
+ dirt track
93
+ clothes
94
+ pole
95
+ land
96
+ handrail
97
+ escalator
98
+ ottoman
99
+ bottle
100
+ buffet
101
+ poster
102
+ stage
103
+ van
104
+ ship
105
+ fountain
106
+ conveyer belt
107
+ canopy
108
+ washing machine
109
+ toy
110
+ swimming pool
111
+ stool
112
+ barrel
113
+ basket
114
+ waterfall
115
+ tent
116
+ bag
117
+ motorbike
118
+ cradle
119
+ oven
120
+ ball
121
+ solid food
122
+ stair
123
+ tank
124
+ brand
125
+ microwave
126
+ flowerpot
127
+ animal
128
+ bicycle
129
+ lake
130
+ dishwasher
131
+ silver screen
132
+ blanket
133
+ sculpture
134
+ exhaust hood
135
+ sconce
136
+ vase
137
+ traffic light
138
+ tray
139
+ dustbin
140
+ fan
141
+ wharf
142
+ crt screen
143
+ plate
144
+ monitor
145
+ notice board
146
+ shower
147
+ radiator
148
+ glass
149
+ clock
150
+ flag
models/deeplab/configs/cityscapes_classes.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ego vehicle
2
+ rectification border
3
+ out of roi
4
+ static
5
+ dynamic
6
+ ground
7
+ road
8
+ sidewalk
9
+ parking
10
+ rail track
11
+ building
12
+ wall
13
+ fence
14
+ guard rail
15
+ bridge
16
+ tunnel
17
+ pole
18
+ polegroup
19
+ traffic light
20
+ traffic sign
21
+ vegetation
22
+ terrain
23
+ sky
24
+ person
25
+ rider
26
+ car
27
+ truck
28
+ bus
29
+ caravan
30
+ trailer
31
+ train
32
+ motorcycle
33
+ bicycle
models/deeplab/configs/coco_classes.txt ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ person
2
+ bicycle
3
+ car
4
+ motorbike
5
+ aeroplane
6
+ bus
7
+ train
8
+ truck
9
+ boat
10
+ traffic light
11
+ fire hydrant
12
+ stop sign
13
+ parking meter
14
+ bench
15
+ bird
16
+ cat
17
+ dog
18
+ horse
19
+ sheep
20
+ cow
21
+ elephant
22
+ bear
23
+ zebra
24
+ giraffe
25
+ backpack
26
+ umbrella
27
+ handbag
28
+ tie
29
+ suitcase
30
+ frisbee
31
+ skis
32
+ snowboard
33
+ sports ball
34
+ kite
35
+ baseball bat
36
+ baseball glove
37
+ skateboard
38
+ surfboard
39
+ tennis racket
40
+ bottle
41
+ wine glass
42
+ cup
43
+ fork
44
+ knife
45
+ spoon
46
+ bowl
47
+ banana
48
+ apple
49
+ sandwich
50
+ orange
51
+ broccoli
52
+ carrot
53
+ hot dog
54
+ pizza
55
+ donut
56
+ cake
57
+ chair
58
+ sofa
59
+ pottedplant
60
+ bed
61
+ diningtable
62
+ toilet
63
+ tvmonitor
64
+ laptop
65
+ mouse
66
+ remote
67
+ keyboard
68
+ cell phone
69
+ microwave
70
+ oven
71
+ toaster
72
+ sink
73
+ refrigerator
74
+ book
75
+ clock
76
+ vase
77
+ scissors
78
+ teddy bear
79
+ hair drier
80
+ toothbrush
models/deeplab/configs/voc_classes.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aeroplane
2
+ bicycle
3
+ bird
4
+ boat
5
+ bottle
6
+ bus
7
+ car
8
+ cat
9
+ chair
10
+ cow
11
+ diningtable
12
+ dog
13
+ horse
14
+ motorbike
15
+ person
16
+ pottedplant
17
+ sheep
18
+ sofa
19
+ train
20
+ tvmonitor
models/deeplab/deeplab.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Run a Deeplabv3plus semantic segmentation model on test images.
5
+ """
6
+
7
+ import colorsys
8
+ import os, sys, argparse
9
+ import numpy as np
10
+ import cv2
11
+ from PIL import Image
12
+ import matplotlib.pyplot as plt
13
+ import time
14
+ from timeit import default_timer as timer
15
+ import tensorflow as tf
16
+ from tensorflow.keras import backend as K
17
+ from tensorflow.keras.models import Model, load_model
18
+ from tensorflow.keras.utils import multi_gpu_model
19
+ #from tensorflow_model_optimization.sparsity import keras as sparsity
20
+
21
+ from deeplabv3p.model import get_deeplabv3p_model
22
+ from deeplabv3p.postprocess_np import crf_postprocess
23
+ from common.utils import get_classes, optimize_tf_gpu, visualize_segmentation
24
+ from common.data_utils import preprocess_image, mask_resize, mask_resize_fast
25
+
26
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
27
+ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
28
+
29
+ #tf.enable_eager_execution()
30
+ optimize_tf_gpu(tf, K)
31
+
32
+ default_config = {
33
+ "model_type": 'mobilenetv2lite',
34
+ "classes_path": os.path.join('configs', 'voc_classes.txt'),
35
+ "model_input_shape" : (512, 512),
36
+ "output_stride": 16,
37
+ "weights_path": os.path.join('weights', 'mobilenetv2_original.h5'),
38
+ "do_crf": False,
39
+ "pruning_model": False,
40
+ "gpu_num" : 1,
41
+ }
42
+
43
+
44
+ class DeepLab(object):
45
+ _defaults = default_config
46
+
47
+ @classmethod
48
+ def get_defaults(cls, n):
49
+ if n in cls._defaults:
50
+ return cls._defaults[n]
51
+ else:
52
+ return "Unrecognized attribute name '" + n + "'"
53
+
54
+ def __init__(self, **kwargs):
55
+ super(DeepLab, self).__init__()
56
+ self.__dict__.update(self._defaults) # set up default values
57
+ self.__dict__.update(kwargs) # and update with user overrides
58
+ self.class_names = get_classes(self.classes_path)
59
+ K.set_learning_phase(0)
60
+ self.deeplab_model = self._generate_model()
61
+
62
+ def _generate_model(self):
63
+ '''to generate the bounding boxes'''
64
+ weights_path = os.path.expanduser(self.weights_path)
65
+ assert weights_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
66
+
67
+ #add one more class for background
68
+ self.class_names = ['background'] + self.class_names
69
+ num_classes = len(self.class_names)
70
+ assert len(self.class_names) < 254, 'PNG image label only support less than 254 classes.'
71
+
72
+ # Load model, or construct model and load weights.
73
+ try:
74
+ deeplab_model = get_deeplabv3p_model(self.model_type, num_classes, model_input_shape=self.model_input_shape, output_stride=self.output_stride, freeze_level=0, weights_path=weights_path, training=False)
75
+ deeplab_model.summary()
76
+ except Exception as e:
77
+ print(repr(e))
78
+ if self.gpu_num>=2:
79
+ deeplab_model = multi_gpu_model(deeplab_model, gpus=self.gpu_num)
80
+
81
+ return deeplab_model
82
+
83
+
84
+ def segment_image(self, image):
85
+ image_data = preprocess_image(image, self.model_input_shape)
86
+ # origin image shape, in (height, width) format
87
+ image_shape = tuple(reversed(image.size))
88
+
89
+ start = time.time()
90
+ out_mask = self.predict(image_data, image_shape)
91
+ end = time.time()
92
+ print("Inference time: {:.8f}s".format(end - start))
93
+
94
+ # show segmentation result
95
+ image_array = visualize_segmentation(np.array(image), out_mask, class_names=self.class_names, ignore_count_threshold=500)
96
+ return Image.fromarray(image_array)
97
+
98
+
99
+ def predict(self, image_data, image_shape):
100
+ prediction = self.deeplab_model.predict([image_data])
101
+ # reshape prediction to mask array
102
+ mask = np.argmax(prediction, -1)[0].reshape(self.model_input_shape)
103
+
104
+ # add CRF postprocess if need
105
+ if self.do_crf:
106
+ image = image_data[0].astype('uint8')
107
+ mask = crf_postprocess(image, mask, zero_unsure=False)
108
+
109
+ # resize mask back to origin image size
110
+ mask = mask_resize_fast(mask, tuple(reversed(image_shape)))
111
+
112
+ return mask
113
+
114
+
115
+ def dump_model_file(self, output_model_file):
116
+ self.deeplab_model.save(output_model_file)
117
+
118
+ def dump_saved_model(self, saved_model_path):
119
+ model = self.deeplab_model
120
+ os.makedirs(saved_model_path, exist_ok=True)
121
+
122
+ tf.keras.experimental.export_saved_model(model, saved_model_path)
123
+ print('export inference model to %s' % str(saved_model_path))
124
+
125
+
126
+ def segment_video(deeplab, video_path, output_path=""):
127
+ import cv2
128
+ vid = cv2.VideoCapture(0 if video_path == '0' else video_path)
129
+ if not vid.isOpened():
130
+ raise IOError("Couldn't open webcam or video")
131
+
132
+ # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later
133
+ # to convert it to x264 to reduce file size:
134
+ # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4
135
+ #
136
+ #video_FourCC = cv2.VideoWriter_fourcc(*'XVID') if video_path == '0' else int(vid.get(cv2.CAP_PROP_FOURCC))
137
+ video_FourCC = cv2.VideoWriter_fourcc(*'XVID') if video_path == '0' else cv2.VideoWriter_fourcc(*"mp4v")
138
+ video_fps = vid.get(cv2.CAP_PROP_FPS)
139
+ video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
140
+ int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
141
+ isOutput = True if output_path != "" else False
142
+ if isOutput:
143
+ print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
144
+ out = cv2.VideoWriter(output_path, video_FourCC, (5. if video_path == '0' else video_fps), video_size)
145
+ accum_time = 0
146
+ curr_fps = 0
147
+ fps = "FPS: ??"
148
+ prev_time = timer()
149
+ while True:
150
+ return_value, frame = vid.read()
151
+ image = Image.fromarray(frame)
152
+ image = deeplab.segment_image(image)
153
+ result = np.asarray(image)
154
+ curr_time = timer()
155
+ exec_time = curr_time - prev_time
156
+ prev_time = curr_time
157
+ accum_time = accum_time + exec_time
158
+ curr_fps = curr_fps + 1
159
+ if accum_time > 1:
160
+ accum_time = accum_time - 1
161
+ fps = "FPS: " + str(curr_fps)
162
+ curr_fps = 0
163
+ cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
164
+ fontScale=0.50, color=(255, 0, 0), thickness=2)
165
+ cv2.namedWindow("result", cv2.WINDOW_NORMAL)
166
+ cv2.imshow("result", result)
167
+ if isOutput:
168
+ out.write(result)
169
+ if cv2.waitKey(1) & 0xFF == ord('q'):
170
+ break
171
+ # Release everything if job is finished
172
+ vid.release()
173
+ if isOutput:
174
+ out.release()
175
+ cv2.destroyAllWindows()
176
+
177
+
178
+ def segment_img(deeplab):
179
+ while True:
180
+ img = input('Input image filename:')
181
+ try:
182
+ image = Image.open(img)
183
+ except:
184
+ print('Open Error! Try again!')
185
+ continue
186
+ else:
187
+ r_image = deeplab.segment_image(image)
188
+ r_image.show()
189
+
190
+
191
+ if __name__ == '__main__':
192
+ # class DeepLab defines the default value, so suppress any default here
193
+ parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description='demo or dump out Deeplab h5 model')
194
+ '''
195
+ Command line options
196
+ '''
197
+ parser.add_argument(
198
+ '--model_type', type=str,
199
+ help='Deeplabv3p model type: mobilenetv2/xception, default ' + DeepLab.get_defaults("model_type")
200
+ )
201
+
202
+ parser.add_argument(
203
+ '--weights_path', type=str,
204
+ help='path to model weight file, default ' + DeepLab.get_defaults("weights_path")
205
+ )
206
+
207
+ parser.add_argument(
208
+ '--classes_path', type=str,
209
+ help='path to class definitions, default ' + DeepLab.get_defaults("classes_path")
210
+ )
211
+
212
+ parser.add_argument(
213
+ '--model_input_shape', type=str,
214
+ help='model input size as <height>x<width>, default ' +
215
+ str(DeepLab.get_defaults("model_input_shape")[0])+'x'+str(DeepLab.get_defaults("model_input_shape")[1]),
216
+ default=str(DeepLab.get_defaults("model_input_shape")[0])+'x'+str(DeepLab.get_defaults("model_input_shape")[1])
217
+ )
218
+
219
+ parser.add_argument(
220
+ '--output_stride', type=int, choices=[8, 16, 32],
221
+ help='model output stride, default ' + str(DeepLab.get_defaults("output_stride"))
222
+ )
223
+
224
+ parser.add_argument(
225
+ '--do_crf', default=False, action="store_true",
226
+ help='whether to add CRF postprocess for model output, default ' + str(DeepLab.get_defaults("do_crf"))
227
+ )
228
+
229
+ #parser.add_argument(
230
+ #'--pruning_model', default=False, action="store_true",
231
+ #help='Whether to be a pruning model/weights file')
232
+
233
+ parser.add_argument(
234
+ '--gpu_num', type=int,
235
+ help='Number of GPU to use, default ' + str(DeepLab.get_defaults("gpu_num"))
236
+ )
237
+ parser.add_argument(
238
+ '--image', default=False, action="store_true",
239
+ help='Image inference mode, will ignore all positional arguments'
240
+ )
241
+ '''
242
+ Command line positional arguments -- for video detection mode
243
+ '''
244
+ parser.add_argument(
245
+ "--input", nargs='?', type=str,required=False,default='./path2your_video',
246
+ help = "Video input path"
247
+ )
248
+
249
+ parser.add_argument(
250
+ "--output", nargs='?', type=str, default="",
251
+ help = "[Optional] Video output path"
252
+ )
253
+ '''
254
+ Command line positional arguments -- for model dump
255
+ '''
256
+ parser.add_argument(
257
+ '--dump_model', default=False, action="store_true",
258
+ help='Dump out training model to inference model'
259
+ )
260
+
261
+ parser.add_argument(
262
+ '--output_model_file', type=str,
263
+ help='output inference model file'
264
+ )
265
+
266
+ args = parser.parse_args()
267
+ # param parse
268
+ if args.model_input_shape:
269
+ height, width = args.model_input_shape.split('x')
270
+ args.model_input_shape = (int(height), int(width))
271
+
272
+ # get wrapped inference object
273
+ deeplab = DeepLab(**vars(args))
274
+
275
+ if args.dump_model:
276
+ """
277
+ Dump out training model to inference model
278
+ """
279
+ if not args.output_model_file:
280
+ raise ValueError('output model file is not specified')
281
+
282
+ print('Dumping out training model to inference model')
283
+ deeplab.dump_model_file(args.output_model_file)
284
+ sys.exit()
285
+
286
+ if args.image:
287
+ """
288
+ Image segmentation mode, disregard any remaining command line arguments
289
+ """
290
+ print("Image segmentation mode")
291
+ if "input" in args:
292
+ print(" Ignoring remaining command line arguments: " + args.input + "," + args.output)
293
+ segment_img(deeplab)
294
+ elif "input" in args:
295
+ segment_video(deeplab, args.input, args.output)
296
+ else:
297
+ print("Must specify at least video_input_path. See usage with --help.")
models/deeplab/deeplabv3p/data.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ import os, glob, time
4
+ import random
5
+ import numpy as np
6
+ import cv2
7
+ from PIL import Image
8
+ from sklearn.utils import class_weight
9
+ from tensorflow.keras.utils import Sequence
10
+
11
+ from common.data_utils import random_horizontal_flip, random_vertical_flip, random_brightness, random_grayscale, random_chroma, random_contrast, random_sharpness, random_blur, random_zoom_rotate, random_gridmask, random_crop, random_histeq
12
+
13
+
14
+ class SegmentationGenerator(Sequence):
15
+ def __init__(self, dataset_path, data_list,
16
+ batch_size=1,
17
+ num_classes=21,
18
+ target_size=(512, 512),
19
+ weighted_type=None,
20
+ is_eval=False,
21
+ augment=True):
22
+ # get real path for dataset
23
+ dataset_realpath = os.path.realpath(dataset_path)
24
+ self.image_path_list = [os.path.join(dataset_realpath, 'images', image_id.strip()+'.jpg') for image_id in data_list]
25
+ self.label_path_list = [os.path.join(dataset_realpath, 'labels', image_id.strip()+'.png') for image_id in data_list]
26
+ # initialize random seed
27
+ np.random.seed(int(time.time()))
28
+
29
+ self.num_classes = num_classes
30
+ self.batch_size = batch_size
31
+ self.target_size = target_size
32
+ self.weighted_type = weighted_type
33
+ self.augment = augment
34
+ self.is_eval = is_eval
35
+
36
+ # Preallocate memory
37
+ self.X = np.zeros((batch_size, target_size[1], target_size[0], 3), dtype='float32')
38
+ self.Y = np.zeros((batch_size, target_size[1]*target_size[0], 1), dtype='float32')
39
+ self.PIXEL_WEIGHTS = np.zeros((batch_size, target_size[1]*target_size[0]), dtype='float32')
40
+
41
+ def get_batch_image_path(self, i):
42
+ return self.image_path_list[i*self.batch_size:(i+1)*self.batch_size]
43
+
44
+ def get_batch_label_path(self, i):
45
+ return self.label_path_list[i*self.batch_size:(i+1)*self.batch_size]
46
+
47
+ def get_weighted_type(self):
48
+ return self.weighted_type
49
+
50
+ def __len__(self):
51
+ return len(self.image_path_list) // self.batch_size
52
+
53
+ def __getitem__(self, i):
54
+
55
+ for n, (image_path, label_path) in enumerate(zip(self.image_path_list[i*self.batch_size:(i+1)*self.batch_size],
56
+ self.label_path_list[i*self.batch_size:(i+1)*self.batch_size])):
57
+
58
+ # Load image and label array
59
+ image = cv2.imread(image_path, cv2.IMREAD_COLOR) # cv2.IMREAD_COLOR/cv2.IMREAD_GRAYSCALE/cv2.IMREAD_UNCHANGED
60
+ label = np.array(Image.open(label_path))
61
+
62
+ # we reset all the invalid label value as 0(background) in training,
63
+ # but as 255(invalid) in eval
64
+ if self.is_eval:
65
+ label[label>(self.num_classes-1)] = 255
66
+ else:
67
+ label[label>(self.num_classes-1)] = 0
68
+
69
+ # Do augmentation
70
+ if self.augment:
71
+ # random horizontal flip image
72
+ image, label = random_horizontal_flip(image, label)
73
+
74
+ # random vertical flip image
75
+ image, label = random_vertical_flip(image, label)
76
+
77
+ # random zoom & rotate image
78
+ image, label = random_zoom_rotate(image, label)
79
+
80
+ # random add gridmask augment for image and label
81
+ image, label = random_gridmask(image, label)
82
+
83
+ # random adjust brightness
84
+ image = random_brightness(image)
85
+
86
+ # random adjust color level
87
+ image = random_chroma(image)
88
+
89
+ # random adjust contrast
90
+ image = random_contrast(image)
91
+
92
+ # random adjust sharpness
93
+ image = random_sharpness(image)
94
+
95
+ # random convert image to grayscale
96
+ image = random_grayscale(image)
97
+
98
+ # random do gaussian blur to image
99
+ image = random_blur(image)
100
+
101
+ # random crop image & label
102
+ image, label = random_crop(image, label, self.target_size)
103
+
104
+ # random do histogram equalization using CLAHE
105
+ image = random_histeq(image)
106
+
107
+
108
+ # Resize image & label mask to model input shape
109
+ image = cv2.resize(image, self.target_size)
110
+ label = cv2.resize(label, self.target_size, interpolation = cv2.INTER_NEAREST)
111
+
112
+ label = label.astype('int32')
113
+ y = label.flatten()
114
+
115
+ # we reset all the invalid label value as 0(background) in training,
116
+ # but as 255(invalid) in eval
117
+ if self.is_eval:
118
+ y[y>(self.num_classes-1)] = 255
119
+ else:
120
+ y[y>(self.num_classes-1)] = 0
121
+
122
+ # append input image and label array
123
+ self.X[n] = image
124
+ self.Y[n] = np.expand_dims(y, -1)
125
+
126
+ ###########################################################################
127
+ #
128
+ # generating adaptive pixels weights array, for unbalanced classes training
129
+ #
130
+ ###########################################################################
131
+
132
+ # Create adaptive pixels weights for all classes on one image,
133
+ # according to pixel number of classes
134
+ class_list = np.unique(y)
135
+ if len(class_list):
136
+ class_weights = class_weight.compute_class_weight('balanced', class_list, y)
137
+ class_weights = {class_id : weight for class_id , weight in zip(class_list, class_weights)}
138
+ # class_weigts dict would be like:
139
+ # {
140
+ # 0: 0.5997304983036035,
141
+ # 12: 2.842871240958237,
142
+ # 15: 1.0195474451419193
143
+ # }
144
+ for class_id in class_list:
145
+ np.putmask(self.PIXEL_WEIGHTS[n], y==class_id, class_weights[class_id])
146
+
147
+ # A trick of keras data generator: the last item yield
148
+ # from a generator could be a sample weights array
149
+ sample_weight_dict = {'pred_mask' : self.PIXEL_WEIGHTS}
150
+
151
+ if self.weighted_type == 'adaptive':
152
+ return self.X, self.Y, sample_weight_dict
153
+ else:
154
+ return self.X, self.Y
155
+
156
+ def on_epoch_end(self):
157
+ # Shuffle dataset for next epoch
158
+ c = list(zip(self.image_path_list, self.label_path_list))
159
+ random.shuffle(c)
160
+ self.image_path_list, self.label_path_list = zip(*c)
161
+
models/deeplab/deeplabv3p/loss.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ import numpy as np
4
+ import tensorflow as tf
5
+ import tensorflow.keras.backend as K
6
+
7
+
8
+ def sparse_crossentropy_ignoring_last_label(y_true, y_pred):
9
+ num_classes = K.shape(y_pred)[-1]
10
+ y_true = K.one_hot(tf.cast(y_true[..., 0], tf.int32), num_classes+1)[..., :-1]
11
+ return K.categorical_crossentropy(y_true, y_pred)
12
+
13
+ def sparse_crossentropy(y_true, y_pred):
14
+ num_classes = K.shape(y_pred)[-1]
15
+ y_true = K.one_hot(tf.cast(y_true[..., 0], tf.int32), num_classes)
16
+ return K.categorical_crossentropy(y_true, y_pred)
17
+
18
+
19
+ def softmax_focal_loss(y_true, y_pred, gamma=2.0, alpha=0.25, from_logits=False):
20
+ """
21
+ Compute softmax focal loss.
22
+ Reference Paper:
23
+ "Focal Loss for Dense Object Detection"
24
+ https://arxiv.org/abs/1708.02002
25
+
26
+ # Arguments
27
+ y_true: Ground truth targets,
28
+ tensor of shape (?, num_pixel, num_classes).
29
+ y_pred: Predicted logits,
30
+ tensor of shape (?, num_pixel, num_classes).
31
+ gamma: exponent of the modulating factor (1 - p_t) ^ gamma.
32
+ alpha: optional alpha weighting factor to balance positives vs negatives.
33
+
34
+ # Returns
35
+ softmax_focal_loss: Softmax focal loss, tensor of shape (?, num_pixel).
36
+ """
37
+ if from_logits:
38
+ y_pred = K.softmax(y_pred)
39
+
40
+ # Clip the prediction value to prevent NaN's and Inf's
41
+ #epsilon = K.epsilon()
42
+ #y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
43
+ y_pred = K.maximum(K.minimum(y_pred, 1 - 1e-15), 1e-15)
44
+
45
+ # Calculate Cross Entropy
46
+ cross_entropy = -y_true * K.log(y_pred)
47
+
48
+ # Calculate Focal Loss
49
+ softmax_focal_loss = K.mean(alpha * K.pow(1 - y_pred, gamma) * cross_entropy, axis=-1)
50
+ return softmax_focal_loss
51
+
52
+
53
+ class WeightedSparseCategoricalCrossEntropy(object):
54
+ def __init__(self, weights, from_logits=False):
55
+ self.weights = np.array(weights).astype('float32')
56
+ self.from_logits = from_logits
57
+ self.__name__ = 'weighted_sparse_categorical_crossentropy'
58
+
59
+ def __call__(self, y_true, y_pred):
60
+ return self.weighted_sparse_categorical_crossentropy(y_true, y_pred)
61
+
62
+ def weighted_sparse_categorical_crossentropy(self, y_true, y_pred):
63
+ num_classes = len(self.weights)
64
+ y_true = K.one_hot(tf.cast(y_true[..., 0], tf.int32), num_classes)
65
+ if self.from_logits:
66
+ y_pred = K.softmax(y_pred)
67
+
68
+ log_pred = K.log(y_pred)
69
+ unweighted_losses = -K.sum(y_true*log_pred, axis=-1)
70
+
71
+ weights = K.sum(K.constant(self.weights) * y_true, axis=-1)
72
+ weighted_losses = unweighted_losses * weights
73
+ return weighted_losses
74
+
models/deeplab/deeplabv3p/metrics.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ import os
4
+ import numpy as np
5
+
6
+ from tensorflow.keras import backend as K
7
+ import tensorflow as tf
8
+
9
+
10
+ def mIOU(gt, preds):
11
+ ulabels = np.unique(gt)
12
+ iou = np.zeros(len(ulabels))
13
+ for k, u in enumerate(ulabels):
14
+ inter = (gt == u) & (preds==u)
15
+ union = (gt == u) | (preds==u)
16
+ iou[k] = inter.sum()/union.sum()
17
+ return np.round(iou.mean(), 2)
18
+
19
+
20
+ def sparse_accuracy_ignoring_last_label(y_true, y_pred):
21
+ nb_classes = y_pred.shape.as_list()[-1]
22
+ y_pred = K.reshape(y_pred, (-1, nb_classes))
23
+ y_true = tf.cast(K.flatten(y_true), tf.int64)
24
+ legal_labels = ~K.equal(y_true, nb_classes)
25
+ return K.sum(tf.cast(legal_labels & K.equal(y_true,
26
+ K.argmax(y_pred, axis=-1)), tf.float32)) / K.sum(tf.cast(legal_labels, tf.float32))
27
+
28
+
29
+
30
+ def Jaccard(y_true, y_pred):
31
+ nb_classes = y_pred.shape.as_list()[-1]
32
+ iou = []
33
+ pred_pixels = K.argmax(y_pred, axis=-1)
34
+ for i in range(0, nb_classes+1):
35
+ true_labels = K.equal(y_true[:,:,0], i)
36
+ pred_labels = K.equal(pred_pixels, i)
37
+ inter = tf.cast(true_labels & pred_labels, tf.int32)
38
+ union = tf.cast(true_labels | pred_labels, tf.int32)
39
+ legal_batches = K.sum(tf.cast(true_labels, tf.int32), axis=1)>0
40
+ ious = K.sum(inter, axis=1)/K.sum(union, axis=1)
41
+ iou.append(K.mean(ious[legal_batches]))
42
+ iou = tf.stack(iou)
43
+ legal_labels = ~tf.math.is_nan(iou)
44
+ iou = iou[legal_labels]
45
+ return K.mean(iou)
46
+
models/deeplab/deeplabv3p/model.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ create deeplabv3p models
5
+ """
6
+ from functools import partial
7
+ from tensorflow.keras.layers import Conv2D, Reshape, Activation, Softmax, Lambda, Input
8
+ from tensorflow.keras.models import Model
9
+
10
+ from deeplabv3p.models.deeplabv3p_xception import Deeplabv3pXception
11
+ from deeplabv3p.models.deeplabv3p_mobilenetv2 import Deeplabv3pMobileNetV2, Deeplabv3pLiteMobileNetV2
12
+ from deeplabv3p.models.deeplabv3p_mobilenetv3 import Deeplabv3pMobileNetV3Large, Deeplabv3pLiteMobileNetV3Large, Deeplabv3pMobileNetV3Small, Deeplabv3pLiteMobileNetV3Small
13
+ from deeplabv3p.models.deeplabv3p_peleenet import Deeplabv3pPeleeNet, Deeplabv3pLitePeleeNet
14
+ from deeplabv3p.models.deeplabv3p_resnet50 import Deeplabv3pResNet50
15
+ from deeplabv3p.models.layers import DeeplabConv2D, Subpixel, img_resize
16
+
17
+ #
18
+ # A map of model type to construction function for DeepLabv3+
19
+ #
20
+ deeplab_model_map = {
21
+ 'mobilenetv2': partial(Deeplabv3pMobileNetV2, alpha=1.0),
22
+ 'mobilenetv2_lite': partial(Deeplabv3pLiteMobileNetV2, alpha=1.0),
23
+
24
+ 'mobilenetv3large': partial(Deeplabv3pMobileNetV3Large, alpha=1.0),
25
+ 'mobilenetv3large_lite': partial(Deeplabv3pLiteMobileNetV3Large, alpha=1.0),
26
+
27
+ 'mobilenetv3small': partial(Deeplabv3pMobileNetV3Small, alpha=1.0),
28
+ 'mobilenetv3small_lite': partial(Deeplabv3pLiteMobileNetV3Small, alpha=1.0),
29
+
30
+ 'peleenet': Deeplabv3pPeleeNet,
31
+ 'peleenet_lite': Deeplabv3pLitePeleeNet,
32
+
33
+ 'xception': Deeplabv3pXception,
34
+ 'resnet50': Deeplabv3pResNet50,
35
+ }
36
+
37
+
38
+ def get_deeplabv3p_model(model_type, num_classes, model_input_shape, output_stride, freeze_level=0, weights_path=None, training=True, use_subpixel=False):
39
+ # check if model type is valid
40
+ if model_type not in deeplab_model_map.keys():
41
+ raise ValueError('This model type is not supported now')
42
+
43
+ model_function = deeplab_model_map[model_type]
44
+
45
+ input_tensor = Input(shape=model_input_shape + (3,), name='image_input')
46
+ model, backbone_len = model_function(input_tensor=input_tensor,
47
+ input_shape=model_input_shape + (3,),
48
+ #weights='imagenet',
49
+ num_classes=21,
50
+ OS=output_stride)
51
+
52
+ base_model = Model(model.input, model.layers[-5].output)
53
+ print('backbone layers number: {}'.format(backbone_len))
54
+
55
+ if use_subpixel:
56
+ if model_type == 'xception':
57
+ scale = 4
58
+ else:
59
+ scale = 8
60
+ x = Subpixel(num_classes, 1, scale, padding='same')(base_model.output)
61
+ else:
62
+ x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='conv_upsample')(base_model.output)
63
+ x = Lambda(img_resize, arguments={'size': (model_input_shape[0], model_input_shape[1])}, name='pred_resize')(x)
64
+
65
+ # for training model, we need to flatten mask to calculate loss
66
+ if training:
67
+ x = Reshape((model_input_shape[0]*model_input_shape[1], num_classes)) (x)
68
+
69
+ x = Softmax(name='pred_mask')(x)
70
+ model = Model(base_model.input, x, name='deeplabv3p_'+model_type)
71
+
72
+ #if use_subpixel:
73
+ # Do ICNR
74
+ #for layer in model.layers:
75
+ #if type(layer) == Subpixel:
76
+ #c, b = layer.get_weights()
77
+ #w = icnr_weights(scale=scale, shape=c.shape)
78
+ #layer.set_weights([w, b])
79
+
80
+ if weights_path:
81
+ model.load_weights(weights_path, by_name=False)#, skip_mismatch=True)
82
+ print('Load weights {}.'.format(weights_path))
83
+
84
+ if freeze_level in [1, 2]:
85
+ # Freeze the backbone part or freeze all but final feature map & input layers.
86
+ num = (backbone_len, len(base_model.layers))[freeze_level-1]
87
+ for i in range(num): model.layers[i].trainable = False
88
+ print('Freeze the first {} layers of total {} layers.'.format(num, len(model.layers)))
89
+ elif freeze_level == 0:
90
+ # Unfreeze all layers.
91
+ for i in range(len(model.layers)):
92
+ model.layers[i].trainable= True
93
+ print('Unfreeze all of the layers.')
94
+
95
+ return model
96
+
models/deeplab/deeplabv3p/models/__pycache__/deeplabv3p_mobilenetv3.cpython-311.pyc ADDED
Binary file (35.4 kB). View file
 
models/deeplab/deeplabv3p/models/__pycache__/layers.cpython-311.pyc ADDED
Binary file (16.6 kB). View file
 
models/deeplab/deeplabv3p/models/deeplabv3p_mobilenetv2.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """ Deeplabv3+ MobileNetV2 model for Keras.
4
+
5
+ # Reference Paper:
6
+ - [Encoder-Decoder with Atrous Separable Convolution
7
+ for Semantic Image Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
8
+ - [Inverted Residuals and Linear Bottlenecks: Mobile Networks for
9
+ Classification, Detection and Segmentation](https://arxiv.org/abs/1801.04381)
10
+ """
11
+ from tensorflow.keras.models import Model
12
+ from tensorflow.keras.activations import relu
13
+ from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, ZeroPadding2D, Lambda, AveragePooling2D, Input, Concatenate, Add, Reshape, BatchNormalization, Dropout, ReLU, Softmax
14
+ from tensorflow.keras.utils import get_source_inputs, get_file
15
+ #from tensorflow.keras import backend as K
16
+
17
+ from deeplabv3p.models.layers import DeeplabConv2D, DeeplabDepthwiseConv2D, CustomBatchNormalization, ASPP_block, ASPP_Lite_block, Decoder_block, normalize, img_resize
18
+
19
+ BACKBONE_WEIGHT_PATH = ('https://github.com/JonathanCMitchell/mobilenet_v2_keras/'
20
+ 'releases/download/v1.1/')
21
+
22
+ WEIGHTS_PATH_MOBILE = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.1/deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5"
23
+
24
+
25
+ def _make_divisible(v, divisor, min_value=None):
26
+ if min_value is None:
27
+ min_value = divisor
28
+ new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
29
+ # Make sure that round down does not go down by more than 10%.
30
+ if new_v < 0.9 * v:
31
+ new_v += divisor
32
+ return new_v
33
+
34
+
35
+ def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id, skip_connection, rate=1):
36
+ #in_channels = inputs._keras_shape[-1]
37
+ in_channels = inputs.shape.as_list()[-1]
38
+ pointwise_conv_filters = int(filters * alpha)
39
+ pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
40
+ x = inputs
41
+ prefix = 'expanded_conv_{}_'.format(block_id)
42
+ if block_id:
43
+ # Expand
44
+ x = DeeplabConv2D(expansion * in_channels, kernel_size=1, padding='same',
45
+ use_bias=False, activation=None,
46
+ name=prefix + 'expand')(x)
47
+ x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999,
48
+ name=prefix + 'expand_BN')(x)
49
+ x = ReLU(max_value=6.)(x)
50
+ else:
51
+ prefix = 'expanded_conv_'
52
+ # Depthwise
53
+ x = DeeplabDepthwiseConv2D(kernel_size=3, strides=stride, activation=None,
54
+ use_bias=False, padding='same', dilation_rate=(rate, rate),
55
+ name=prefix + 'depthwise')(x)
56
+ x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999,
57
+ name=prefix + 'depthwise_BN')(x)
58
+ x = ReLU(max_value=6., name=prefix + 'depthwise_relu')(x)
59
+
60
+ x = DeeplabConv2D(pointwise_filters,
61
+ kernel_size=1, padding='same', use_bias=False, activation=None,
62
+ name=prefix + 'project')(x)
63
+ x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999,
64
+ name=prefix + 'project_BN')(x)
65
+
66
+ if skip_connection:
67
+ return Add(name=prefix + 'add')([inputs, x])
68
+ # if in_channels == pointwise_filters and stride == 1:
69
+ # return Add(name='res_connect_' + str(block_id))([inputs, x])
70
+
71
+ return x
72
+
73
+
74
+ def MobileNetV2_body(input_tensor, OS, alpha, weights='imagenet'):
75
+ """
76
+ Modified MobileNetV2 feature extractor body
77
+ with specified output stride and skip level feature
78
+ """
79
+ if OS == 8:
80
+ origin_os16_stride = 1
81
+ origin_os16_block_rate = 2
82
+ origin_os32_stride = 1
83
+ origin_os32_block_rate = 4
84
+ elif OS == 16:
85
+ origin_os16_stride = 2
86
+ origin_os16_block_rate = 1
87
+ origin_os32_stride = 1
88
+ origin_os32_block_rate = 2
89
+ elif OS == 32:
90
+ origin_os16_stride = 2
91
+ origin_os16_block_rate = 1
92
+ origin_os32_stride = 2
93
+ origin_os32_block_rate = 1
94
+ else:
95
+ raise ValueError('invalid output stride', OS)
96
+
97
+ first_block_filters = _make_divisible(32 * alpha, 8)
98
+ x = DeeplabConv2D(first_block_filters,
99
+ kernel_size=3,
100
+ strides=(2, 2), padding='same',
101
+ use_bias=False, name='Conv')(input_tensor)
102
+ x = CustomBatchNormalization(
103
+ epsilon=1e-3, momentum=0.999, name='Conv_BN')(x)
104
+ x = ReLU(6.)(x)
105
+
106
+ x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1,
107
+ expansion=1, block_id=0, skip_connection=False)
108
+
109
+ x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2,
110
+ expansion=6, block_id=1, skip_connection=False)
111
+ x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1,
112
+ expansion=6, block_id=2, skip_connection=True)
113
+ # skip level feature, with output stride = 4
114
+ skip = x
115
+
116
+ x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2,
117
+ expansion=6, block_id=3, skip_connection=False)
118
+ x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
119
+ expansion=6, block_id=4, skip_connection=True)
120
+ x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
121
+ expansion=6, block_id=5, skip_connection=True)
122
+
123
+ # original output stride changes to 16 from here, so we start to control block stride and dilation rate
124
+ x = _inverted_res_block(x, filters=64, alpha=alpha, stride=origin_os16_stride, # origin: stride=2!
125
+ expansion=6, block_id=6, skip_connection=False)
126
+ x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=origin_os16_block_rate,
127
+ expansion=6, block_id=7, skip_connection=True)
128
+ x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=origin_os16_block_rate,
129
+ expansion=6, block_id=8, skip_connection=True)
130
+ x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=origin_os16_block_rate,
131
+ expansion=6, block_id=9, skip_connection=True)
132
+
133
+ x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=origin_os16_block_rate,
134
+ expansion=6, block_id=10, skip_connection=False)
135
+ x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=origin_os16_block_rate,
136
+ expansion=6, block_id=11, skip_connection=True)
137
+ x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=origin_os16_block_rate,
138
+ expansion=6, block_id=12, skip_connection=True)
139
+
140
+ # original output stride changes to 32 from here
141
+ x = _inverted_res_block(x, filters=160, alpha=alpha, stride=origin_os32_stride, rate=origin_os16_block_rate, # origin: stride=2!
142
+ expansion=6, block_id=13, skip_connection=False)
143
+ x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=origin_os32_block_rate,
144
+ expansion=6, block_id=14, skip_connection=True)
145
+ x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=origin_os32_block_rate,
146
+ expansion=6, block_id=15, skip_connection=True)
147
+
148
+ x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, rate=origin_os32_block_rate,
149
+ expansion=6, block_id=16, skip_connection=False)
150
+ # end of feature extractor
151
+
152
+ # expand the model structure to MobileNetV2 no top, so
153
+ # that we can load official imagenet pretrained weights
154
+
155
+ # no alpha applied to last conv as stated in the paper:
156
+ # if the width multiplier is greater than 1 we
157
+ # increase the number of output channels
158
+ if alpha > 1.0:
159
+ last_block_filters = _make_divisible(1280 * alpha, 8)
160
+ else:
161
+ last_block_filters = 1280
162
+
163
+ y = DeeplabConv2D(last_block_filters,
164
+ kernel_size=1,
165
+ use_bias=False,
166
+ name='Conv_1')(x)
167
+ y = CustomBatchNormalization(epsilon=1e-3,
168
+ momentum=0.999,
169
+ name='Conv_1_bn')(y)
170
+ y = ReLU(6., name='out_relu')(y)
171
+
172
+ # Ensure that the model takes into account
173
+ # any potential predecessors of `input_tensor`.
174
+ if input_tensor is not None:
175
+ inputs = get_source_inputs(input_tensor)
176
+ #else:
177
+ #inputs = img_input
178
+
179
+ # hardcode row=224
180
+ rows = 224
181
+
182
+ model = Model(inputs, y, name='mobilenetv2_%0.2f_%s' % (alpha, rows))
183
+ # Load weights.
184
+ if weights == 'imagenet':
185
+ model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
186
+ str(alpha) + '_' + str(rows) + '_no_top' + '.h5')
187
+ weight_path = BACKBONE_WEIGHT_PATH + model_name
188
+ weights_path = get_file(
189
+ model_name, weight_path, cache_subdir='models')
190
+
191
+ model.load_weights(weights_path)
192
+
193
+ backbone_len = len(model.layers) - 3
194
+ # need to return feature map and skip connection,
195
+ # not the whole "no top" model
196
+ return x, skip, backbone_len
197
+
198
+
199
+ def Deeplabv3pMobileNetV2(input_shape=(512, 512, 3),
200
+ alpha=1.0,
201
+ weights='imagenet',
202
+ input_tensor=None,
203
+ num_classes=21,
204
+ OS=8):
205
+ """ Instantiates the Deeplabv3+ MobileNetV2 architecture
206
+ # Arguments
207
+ input_shape: shape of input image. format HxWxC
208
+ PASCAL VOC model was trained on (512,512,3) images
209
+ alpha: controls the width of the MobileNetV2 network. This is known as the
210
+ width multiplier in the MobileNetV2 paper.
211
+ - If `alpha` < 1.0, proportionally decreases the number
212
+ of filters in each layer.
213
+ - If `alpha` > 1.0, proportionally increases the number
214
+ of filters in each layer.
215
+ - If `alpha` = 1, default number of filters from the paper
216
+ are used at each layer.
217
+ Used only for mobilenetv2 backbone
218
+ weights: pretrained weights type
219
+ - imagenet: pre-trained on Imagenet
220
+ - None : random initialization
221
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
222
+ to use as image input for the model.
223
+ num_classes: number of desired classes.
224
+ OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
225
+
226
+ # Returns
227
+ A Keras model instance.
228
+ """
229
+
230
+ if not (weights in {'imagenet', None}):
231
+ raise ValueError('The `weights` argument should be either '
232
+ '`imagenet` (pre-trained on Imagenet) or '
233
+ '`None` (random initialization)')
234
+ if input_tensor is None:
235
+ img_input = Input(shape=input_shape, name='image_input')
236
+ else:
237
+ img_input = input_tensor
238
+
239
+ # normalize input image
240
+ img_norm = Lambda(normalize, name='input_normalize')(img_input)
241
+
242
+ # backbone body for feature extract
243
+ x, skip_feature, backbone_len = MobileNetV2_body(img_norm, OS, alpha, weights=weights)
244
+
245
+ # ASPP block
246
+ x = ASPP_block(x, OS)
247
+
248
+ # Deeplabv3+ decoder for feature projection
249
+ x = Decoder_block(x, skip_feature)
250
+
251
+ # Final prediction conv block
252
+ x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
253
+ x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
254
+ x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
255
+ x = Softmax(name='Predictions/Softmax')(x)
256
+
257
+
258
+ # Ensure that the model takes into account
259
+ # any potential predecessors of `input_tensor`.
260
+ #if input_tensor is not None:
261
+ #inputs = get_source_inputs(input_tensor)
262
+ #else:
263
+ #inputs = img_input
264
+
265
+ model = Model(img_input, x, name='deeplabv3p_mobilenetv2')
266
+
267
+ return model, backbone_len
268
+
269
+
270
+ def Deeplabv3pLiteMobileNetV2(input_shape=(512, 512, 3),
271
+ alpha=1.0,
272
+ weights='imagenet',
273
+ input_tensor=None,
274
+ num_classes=21,
275
+ OS=8):
276
+ """ Instantiates the Deeplabv3+ MobileNetV2Lite architecture
277
+ # Arguments
278
+ input_shape: shape of input image. format HxWxC
279
+ PASCAL VOC model was trained on (512,512,3) images
280
+ alpha: controls the width of the MobileNetV2 network. This is known as the
281
+ width multiplier in the MobileNetV2 paper.
282
+ - If `alpha` < 1.0, proportionally decreases the number
283
+ of filters in each layer.
284
+ - If `alpha` > 1.0, proportionally increases the number
285
+ of filters in each layer.
286
+ - If `alpha` = 1, default number of filters from the paper
287
+ are used at each layer.
288
+ Used only for mobilenetv2 backbone
289
+ weights: pretrained weights type
290
+ - pascalvoc : pre-trained on PASCAL VOC
291
+ - imagenet: pre-trained on Imagenet
292
+ - None : random initialization
293
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
294
+ to use as image input for the model.
295
+ num_classes: number of desired classes.
296
+ OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
297
+
298
+ # Returns
299
+ A Keras model instance.
300
+ # Raises
301
+ RuntimeError: If attempting to run this model with a
302
+ backend that does not support separable convolutions.
303
+ ValueError: in case of invalid argument for `weights` or `backbone`
304
+ """
305
+
306
+ if not (weights in {'pascalvoc', 'imagenet', None}):
307
+ raise ValueError('The `weights` argument should be either '
308
+ '`pascalvoc` (pre-trained on PASCAL VOC) '
309
+ '`imagenet` (pre-trained on Imagenet) or '
310
+ '`None` (random initialization)')
311
+
312
+ if input_tensor is None:
313
+ img_input = Input(shape=input_shape, name='image_input')
314
+ else:
315
+ img_input = input_tensor
316
+
317
+ # normalize input image
318
+ img_norm = Lambda(normalize, name='input_normalize')(img_input)
319
+
320
+ # backbone body for feature extract
321
+ x, _, backbone_len = MobileNetV2_body(img_norm, OS, alpha, weights=weights)
322
+
323
+ # use ASPP Lite block & no decode block
324
+ x = ASPP_Lite_block(x)
325
+
326
+ # Final prediction conv block
327
+ x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
328
+ x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
329
+ x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
330
+ x = Softmax(name='Predictions/Softmax')(x)
331
+
332
+
333
+ # Ensure that the model takes into account
334
+ # any potential predecessors of `input_tensor`.
335
+ #if input_tensor is not None:
336
+ #inputs = get_source_inputs(input_tensor)
337
+ #else:
338
+ #inputs = img_input
339
+
340
+ model = Model(img_input, x, name='deeplabv3p_mobilenetv2_lite')
341
+
342
+ # load weights
343
+ if weights == 'pascalvoc':
344
+ weights_path = get_file('deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5',
345
+ WEIGHTS_PATH_MOBILE,
346
+ cache_subdir='models')
347
+ model.load_weights(weights_path, by_name=True)
348
+ return model, backbone_len
349
+
models/deeplab/deeplabv3p/models/deeplabv3p_mobilenetv3.py ADDED
@@ -0,0 +1,912 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #!/usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+ """ Deeplabv3+ MobileNetV3(Large/Small) model for Keras.
5
+
6
+ # Reference Paper:
7
+ - [Encoder-Decoder with Atrous Separable Convolution
8
+ for Semantic Image Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
9
+ - [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf) (ICCV 2019)
10
+ """
11
+ import os, sys
12
+ import warnings
13
+
14
+ from keras_applications.imagenet_utils import _obtain_input_shape
15
+ from keras_applications.imagenet_utils import preprocess_input as _preprocess_input
16
+ from tensorflow.keras.utils import get_source_inputs, get_file
17
+ from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, Dense, GlobalAveragePooling2D, GlobalMaxPooling2D, Flatten, Softmax, Dropout, ZeroPadding2D
18
+ from tensorflow.keras.layers import BatchNormalization, Add, Multiply, Reshape
19
+ from tensorflow.keras.layers import Input, Activation, ReLU, Reshape, Lambda
20
+ from tensorflow.keras.models import Model
21
+ from tensorflow.keras import backend as K
22
+
23
+ sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..'))
24
+ from deeplabv3p.models.layers import DeeplabConv2D, DeeplabDepthwiseConv2D, CustomBatchNormalization, ASPP_block, ASPP_Lite_block, Decoder_block, normalize, img_resize
25
+
26
+
27
+ BASE_WEIGHT_PATH = ('https://github.com/DrSlink/mobilenet_v3_keras/'
28
+ 'releases/download/v1.0/')
29
+ WEIGHTS_HASHES = {
30
+ 'large_224_0.75_float': (
31
+ '765b44a33ad4005b3ac83185abf1d0eb',
32
+ 'c256439950195a46c97ede7c294261c6'),
33
+ 'large_224_1.0_float': (
34
+ '59e551e166be033d707958cf9e29a6a7',
35
+ '12c0a8442d84beebe8552addf0dcb950'),
36
+ 'large_minimalistic_224_1.0_float': (
37
+ '675e7b876c45c57e9e63e6d90a36599c',
38
+ 'c1cddbcde6e26b60bdce8e6e2c7cae54'),
39
+ 'small_224_0.75_float': (
40
+ 'cb65d4e5be93758266aa0a7f2c6708b7',
41
+ 'c944bb457ad52d1594392200b48b4ddb'),
42
+ 'small_224_1.0_float': (
43
+ '8768d4c2e7dee89b9d02b2d03d65d862',
44
+ '5bec671f47565ab30e540c257bba8591'),
45
+ 'small_minimalistic_224_1.0_float': (
46
+ '99cd97fb2fcdad2bf028eb838de69e37',
47
+ '1efbf7e822e03f250f45faa3c6bbe156'),
48
+ }
49
+
50
+
51
+ def correct_pad(backend, inputs, kernel_size):
52
+ """Returns a tuple for zero-padding for 2D convolution with downsampling.
53
+ # Arguments
54
+ input_size: An integer or tuple/list of 2 integers.
55
+ kernel_size: An integer or tuple/list of 2 integers.
56
+ # Returns
57
+ A tuple.
58
+ """
59
+ img_dim = 2 if backend.image_data_format() == 'channels_first' else 1
60
+ input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)]
61
+
62
+ if isinstance(kernel_size, int):
63
+ kernel_size = (kernel_size, kernel_size)
64
+
65
+ if input_size[0] is None:
66
+ adjust = (1, 1)
67
+ else:
68
+ adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
69
+
70
+ correct = (kernel_size[0] // 2, kernel_size[1] // 2)
71
+
72
+ return ((correct[0] - adjust[0], correct[0]),
73
+ (correct[1] - adjust[1], correct[1]))
74
+
75
+
76
+ def preprocess_input(x):
77
+ """
78
+ "mode" option description in preprocess_input
79
+ mode: One of "caffe", "tf" or "torch".
80
+ - caffe: will convert the images from RGB to BGR,
81
+ then will zero-center each color channel with
82
+ respect to the ImageNet dataset,
83
+ without scaling.
84
+ - tf: will scale pixels between -1 and 1,
85
+ sample-wise.
86
+ - torch: will scale pixels between 0 and 1 and then
87
+ will normalize each channel with respect to the
88
+ ImageNet dataset.
89
+ """
90
+ x = _preprocess_input(x, mode='tf', backend=K)
91
+ #x /= 255.
92
+ #mean = [0.485, 0.456, 0.406]
93
+ #std = [0.229, 0.224, 0.225]
94
+
95
+ #x[..., 0] -= mean[0]
96
+ #x[..., 1] -= mean[1]
97
+ #x[..., 2] -= mean[2]
98
+ #if std is not None:
99
+ #x[..., 0] /= std[0]
100
+ #x[..., 1] /= std[1]
101
+ #x[..., 2] /= std[2]
102
+
103
+ return x
104
+
105
+
106
+ def relu(x):
107
+ return ReLU()(x)
108
+
109
+
110
+ def hard_sigmoid(x):
111
+ return ReLU(6.)(x + 3.) * (1. / 6.)
112
+
113
+
114
+ def hard_swish(x):
115
+ return Multiply()([Activation(hard_sigmoid)(x), x])
116
+
117
+
118
+ # This function is taken from the original tf repo.
119
+ # It ensures that all layers have a channel number that is divisible by 8
120
+ # It can be seen here:
121
+ # https://github.com/tensorflow/models/blob/master/research/
122
+ # slim/nets/mobilenet/mobilenet.py
123
+
124
+ def _depth(v, divisor=8, min_value=None):
125
+ if min_value is None:
126
+ min_value = divisor
127
+ new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
128
+ # Make sure that round down does not go down by more than 10%.
129
+ if new_v < 0.9 * v:
130
+ new_v += divisor
131
+ return new_v
132
+
133
+
134
+ def _se_block(inputs, filters, se_ratio, prefix):
135
+ x = GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')(inputs)
136
+ if K.image_data_format() == 'channels_first':
137
+ x = Reshape((filters, 1, 1))(x)
138
+ else:
139
+ x = Reshape((1, 1, filters))(x)
140
+ x = DeeplabConv2D(_depth(filters * se_ratio),
141
+ kernel_size=1,
142
+ padding='same',
143
+ name=prefix + 'squeeze_excite/Conv')(x)
144
+ x = ReLU(name=prefix + 'squeeze_excite/Relu')(x)
145
+ x = DeeplabConv2D(filters,
146
+ kernel_size=1,
147
+ padding='same',
148
+ name=prefix + 'squeeze_excite/Conv_1')(x)
149
+ x = Activation(hard_sigmoid)(x)
150
+ #if K.backend() == 'theano':
151
+ ## For the Theano backend, we have to explicitly make
152
+ ## the excitation weights broadcastable.
153
+ #x = Lambda(
154
+ #lambda br: K.pattern_broadcast(br, [True, True, True, False]),
155
+ #output_shape=lambda input_shape: input_shape,
156
+ #name=prefix + 'squeeze_excite/broadcast')(x)
157
+ x = Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x])
158
+ return x
159
+
160
+
161
+ def _inverted_res_block(x, expansion, filters, kernel_size, stride,
162
+ se_ratio, activation, block_id, skip_connection=False, rate=1):
163
+ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
164
+ shortcut = x
165
+ prefix = 'expanded_conv/'
166
+ infilters = K.int_shape(x)[channel_axis]
167
+ if block_id:
168
+ # Expand
169
+ prefix = 'expanded_conv_{}/'.format(block_id)
170
+ x = DeeplabConv2D(_depth(infilters * expansion),
171
+ kernel_size=1,
172
+ padding='same',
173
+ use_bias=False,
174
+ name=prefix + 'expand')(x)
175
+ x = CustomBatchNormalization(axis=channel_axis,
176
+ epsilon=1e-3,
177
+ momentum=0.999,
178
+ name=prefix + 'expand/BatchNorm')(x)
179
+ x = Activation(activation)(x)
180
+
181
+ #if stride == 2:
182
+ #x = ZeroPadding2D(padding=correct_pad(K, x, kernel_size),
183
+ #name=prefix + 'depthwise/pad')(x)
184
+ x = DeeplabDepthwiseConv2D(kernel_size,
185
+ strides=stride,
186
+ padding='same',# if stride == 1 else 'valid',
187
+ dilation_rate=(rate, rate),
188
+ use_bias=False,
189
+ name=prefix + 'depthwise/Conv')(x)
190
+ x = CustomBatchNormalization(axis=channel_axis,
191
+ epsilon=1e-3,
192
+ momentum=0.999,
193
+ name=prefix + 'depthwise/BatchNorm')(x)
194
+ x = Activation(activation)(x)
195
+
196
+ if se_ratio:
197
+ x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix)
198
+
199
+ x = DeeplabConv2D(filters,
200
+ kernel_size=1,
201
+ padding='same',
202
+ use_bias=False,
203
+ name=prefix + 'project')(x)
204
+ x = CustomBatchNormalization(axis=channel_axis,
205
+ epsilon=1e-3,
206
+ momentum=0.999,
207
+ name=prefix + 'project/BatchNorm')(x)
208
+
209
+ #if stride == 1 and infilters == filters:
210
+ #x = Add(name=prefix + 'Add')([shortcut, x])
211
+ if skip_connection:
212
+ x = Add(name=prefix + 'Add')([shortcut, x])
213
+ return x
214
+
215
+
216
+ def MobileNetV3(stack_fn,
217
+ last_point_ch,
218
+ input_shape=None,
219
+ alpha=1.0,
220
+ model_type='large',
221
+ minimalistic=False,
222
+ include_top=True,
223
+ weights='imagenet',
224
+ input_tensor=None,
225
+ classes=1000,
226
+ pooling=None,
227
+ dropout_rate=0.2,
228
+ **kwargs):
229
+ """Instantiates the MobileNetV3 architecture.
230
+ # Arguments
231
+ stack_fn: a function that returns output tensor for the
232
+ stacked residual blocks.
233
+ last_point_ch: number channels at the last layer (before top)
234
+ input_shape: optional shape tuple, to be specified if you would
235
+ like to use a model with an input img resolution that is not
236
+ (224, 224, 3).
237
+ It should have exactly 3 inputs channels (224, 224, 3).
238
+ You can also omit this option if you would like
239
+ to infer input_shape from an input_tensor.
240
+ If you choose to include both input_tensor and input_shape then
241
+ input_shape will be used if they match, if the shapes
242
+ do not match then we will throw an error.
243
+ E.g. `(160, 160, 3)` would be one valid value.
244
+ alpha: controls the width of the network. This is known as the
245
+ depth multiplier in the MobileNetV3 paper, but the name is kept for
246
+ consistency with MobileNetV1 in Keras.
247
+ - If `alpha` < 1.0, proportionally decreases the number
248
+ of filters in each layer.
249
+ - If `alpha` > 1.0, proportionally increases the number
250
+ of filters in each layer.
251
+ - If `alpha` = 1, default number of filters from the paper
252
+ are used at each layer.
253
+ model_type: MobileNetV3 is defined as two models: large and small. These
254
+ models are targeted at high and low resource use cases respectively.
255
+ minimalistic: In addition to large and small models this module also contains
256
+ so-called minimalistic models, these models have the same per-layer
257
+ dimensions characteristic as MobilenetV3 however, they don't utilize any
258
+ of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5
259
+ convolutions). While these models are less efficient on CPU, they are
260
+ much more performant on GPU/DSP.
261
+ include_top: whether to include the fully-connected
262
+ layer at the top of the network.
263
+ weights: one of `None` (random initialization),
264
+ 'imagenet' (pre-training on ImageNet),
265
+ or the path to the weights file to be loaded.
266
+ input_tensor: optional Keras tensor (i.e. output of
267
+ `layers.Input()`)
268
+ to use as image input for the model.
269
+ classes: optional number of classes to classify images
270
+ into, only to be specified if `include_top` is True, and
271
+ if no `weights` argument is specified.
272
+ pooling: optional pooling mode for feature extraction
273
+ when `include_top` is `False`.
274
+ - `None` means that the output of the model will be
275
+ the 4D tensor output of the
276
+ last convolutional layer.
277
+ - `avg` means that global average pooling
278
+ will be applied to the output of the
279
+ last convolutional layer, and thus
280
+ the output of the model will be a 2D tensor.
281
+ - `max` means that global max pooling will
282
+ be applied.
283
+ dropout_rate: fraction of the input units to drop on the last layer
284
+ # Returns
285
+ A Keras model instance.
286
+ # Raises
287
+ ValueError: in case of invalid model type, argument for `weights`,
288
+ or invalid input shape when weights='imagenet'
289
+ """
290
+
291
+ if not (weights in {'imagenet', None} or os.path.exists(weights)):
292
+ raise ValueError('The `weights` argument should be either '
293
+ '`None` (random initialization), `imagenet` '
294
+ '(pre-training on ImageNet), '
295
+ 'or the path to the weights file to be loaded.')
296
+
297
+ if weights == 'imagenet' and include_top and classes != 1000:
298
+ raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
299
+ 'as true, `classes` should be 1000')
300
+
301
+ # Determine proper input shape
302
+ input_shape = _obtain_input_shape(input_shape,
303
+ default_size=224,
304
+ min_size=32,
305
+ data_format=K.image_data_format(),
306
+ require_flatten=include_top,
307
+ weights=weights)
308
+
309
+ # If input_shape is None and input_tensor is None using standart shape
310
+ if input_shape is None and input_tensor is None:
311
+ input_shape = (None, None, 3)
312
+
313
+ if K.image_data_format() == 'channels_last':
314
+ row_axis, col_axis = (0, 1)
315
+ else:
316
+ row_axis, col_axis = (1, 2)
317
+ rows = input_shape[row_axis]
318
+ cols = input_shape[col_axis]
319
+ if rows and cols and (rows < 32 or cols < 32):
320
+ raise ValueError('Input size must be at least 32x32; got `input_shape=' +
321
+ str(input_shape) + '`')
322
+ if weights == 'imagenet':
323
+ if minimalistic is False and alpha not in [0.75, 1.0] \
324
+ or minimalistic is True and alpha != 1.0:
325
+ raise ValueError('If imagenet weights are being loaded, '
326
+ 'alpha can be one of `0.75`, `1.0` for non minimalistic'
327
+ ' or `1.0` for minimalistic only.')
328
+
329
+ if rows != cols or rows != 224:
330
+ warnings.warn('`input_shape` is undefined or non-square, '
331
+ 'or `rows` is not 224.'
332
+ ' Weights for input shape (224, 224) will be'
333
+ ' loaded as the default.')
334
+
335
+ if input_tensor is None:
336
+ img_input = Input(shape=input_shape)
337
+ else:
338
+ #if not K.is_keras_tensor(input_tensor):
339
+ #img_input = Input(tensor=input_tensor, shape=input_shape)
340
+ #else:
341
+ #img_input = input_tensor
342
+ img_input = input_tensor
343
+
344
+ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
345
+
346
+ if minimalistic:
347
+ kernel = 3
348
+ activation = relu
349
+ se_ratio = None
350
+ else:
351
+ kernel = 5
352
+ activation = hard_swish
353
+ se_ratio = 0.25
354
+
355
+ x = ZeroPadding2D(padding=correct_pad(K, img_input, 3),
356
+ name='Conv_pad')(img_input)
357
+ x = DeeplabConv2D(16,
358
+ kernel_size=3,
359
+ strides=(2, 2),
360
+ padding='valid',
361
+ use_bias=False,
362
+ name='Conv')(x)
363
+ x = CustomBatchNormalization(axis=channel_axis,
364
+ epsilon=1e-3,
365
+ momentum=0.999,
366
+ name='Conv/BatchNorm')(x)
367
+ x = Activation(activation)(x)
368
+
369
+ x, skip_feature = stack_fn(x, kernel, activation, se_ratio)
370
+ # keep end of the feature extrator as final feature map
371
+ final_feature = x
372
+
373
+ last_conv_ch = _depth(K.int_shape(x)[channel_axis] * 6)
374
+
375
+ # if the width multiplier is greater than 1 we
376
+ # increase the number of output channels
377
+ if alpha > 1.0:
378
+ last_point_ch = _depth(last_point_ch * alpha)
379
+
380
+ x = DeeplabConv2D(last_conv_ch,
381
+ kernel_size=1,
382
+ padding='same',
383
+ use_bias=False,
384
+ name='Conv_1')(x)
385
+ x = CustomBatchNormalization(axis=channel_axis,
386
+ epsilon=1e-3,
387
+ momentum=0.999,
388
+ name='Conv_1/BatchNorm')(x)
389
+ x = Activation(activation)(x)
390
+
391
+ if include_top:
392
+ x = GlobalAveragePooling2D()(x)
393
+ if channel_axis == 1:
394
+ x = Reshape((last_conv_ch, 1, 1))(x)
395
+ else:
396
+ x = Reshape((1, 1, last_conv_ch))(x)
397
+ x = DeeplabConv2D(last_point_ch,
398
+ kernel_size=1,
399
+ padding='same',
400
+ name='Conv_2')(x)
401
+ x = Activation(activation)(x)
402
+ if dropout_rate > 0:
403
+ x = Dropout(dropout_rate)(x)
404
+ x = DeeplabConv2D(classes,
405
+ kernel_size=1,
406
+ padding='same',
407
+ name='Logits')(x)
408
+ x = Flatten()(x)
409
+ x = Softmax(name='Predictions/Softmax')(x)
410
+ else:
411
+ if pooling == 'avg':
412
+ x = GlobalAveragePooling2D(name='avg_pool')(x)
413
+ elif pooling == 'max':
414
+ x = GlobalMaxPooling2D(name='max_pool')(x)
415
+ # Ensure that the model takes into account
416
+ # any potential predecessors of `input_tensor`.
417
+ if input_tensor is not None:
418
+ inputs = get_source_inputs(input_tensor)
419
+ else:
420
+ inputs = img_input
421
+
422
+ # Create model.
423
+ model = Model(inputs, x, name='MobilenetV3' + model_type)
424
+
425
+ # Load weights.
426
+ if weights == 'imagenet':
427
+ model_name = "{}{}_224_{}_float".format(
428
+ model_type, '_minimalistic' if minimalistic else '', str(alpha))
429
+ if include_top:
430
+ file_name = 'weights_mobilenet_v3_' + model_name + '.h5'
431
+ file_hash = WEIGHTS_HASHES[model_name][0]
432
+ else:
433
+ file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5'
434
+ file_hash = WEIGHTS_HASHES[model_name][1]
435
+ weights_path = get_file(file_name,
436
+ BASE_WEIGHT_PATH + file_name,
437
+ cache_subdir='models',
438
+ file_hash=file_hash)
439
+ model.load_weights(weights_path)
440
+ elif weights is not None:
441
+ model.load_weights(weights)
442
+
443
+ #return model
444
+ return final_feature, skip_feature, len(model.layers) - 3
445
+
446
+
447
+
448
+ def MobileNetV3Small(input_shape=None,
449
+ alpha=1.0,
450
+ OS=8,
451
+ minimalistic=False,
452
+ include_top=True,
453
+ weights='imagenet',
454
+ input_tensor=None,
455
+ classes=1000,
456
+ pooling=None,
457
+ dropout_rate=0.2,
458
+ **kwargs):
459
+ """
460
+ Modified MobileNetV3Large feature extractor body
461
+ with specified output stride and skip level feature
462
+ """
463
+ if OS == 8:
464
+ origin_os16_stride = 1
465
+ origin_os16_block_rate = 2
466
+ origin_os32_stride = 1
467
+ origin_os32_block_rate = 4
468
+ elif OS == 16:
469
+ origin_os16_stride = 2
470
+ origin_os16_block_rate = 1
471
+ origin_os32_stride = 1
472
+ origin_os32_block_rate = 2
473
+ elif OS == 32:
474
+ origin_os16_stride = 2
475
+ origin_os16_block_rate = 1
476
+ origin_os32_stride = 2
477
+ origin_os32_block_rate = 1
478
+ else:
479
+ raise ValueError('invalid output stride', OS)
480
+
481
+ def stack_fn(x, kernel, activation, se_ratio):
482
+ def depth(d):
483
+ return _depth(d * alpha)
484
+
485
+ x = _inverted_res_block(x, expansion=1, filters=depth(16), kernel_size=3,
486
+ stride=2, se_ratio=se_ratio, activation=relu, block_id=0, skip_connection=False)
487
+ # skip level feature, with output stride = 4
488
+ skip = x
489
+
490
+ x = _inverted_res_block(x, expansion=72. / 16, filters=depth(24), kernel_size=3,
491
+ stride=2, se_ratio=None, activation=relu, block_id=1, skip_connection=False)
492
+ x = _inverted_res_block(x, expansion=88. / 24, filters=depth(24), kernel_size=3,
493
+ stride=1, se_ratio=None, activation=relu, block_id=2, skip_connection=True)
494
+
495
+ # original output stride changes to 16 from here, so we start to control block stride and dilation rate
496
+ x = _inverted_res_block(x, expansion=4, filters=depth(40), kernel_size=kernel,
497
+ stride=origin_os16_stride, se_ratio=se_ratio, activation=activation, block_id=3, skip_connection=False) # origin: stride=2!
498
+ x = _inverted_res_block(x, expansion=6, filters=depth(40), kernel_size=kernel,
499
+ stride=1, se_ratio=se_ratio, activation=activation, block_id=4, skip_connection=True, rate=origin_os16_block_rate)
500
+ x = _inverted_res_block(x, expansion=6, filters=depth(40), kernel_size=kernel,
501
+ stride=1, se_ratio=se_ratio, activation=activation, block_id=5, skip_connection=True, rate=origin_os16_block_rate)
502
+ x = _inverted_res_block(x, expansion=3, filters=depth(48), kernel_size=kernel,
503
+ stride=1, se_ratio=se_ratio, activation=activation, block_id=6, skip_connection=False, rate=origin_os16_block_rate)
504
+ x = _inverted_res_block(x, expansion=3, filters=depth(48), kernel_size=kernel,
505
+ stride=1, se_ratio=se_ratio, activation=activation, block_id=7, skip_connection=True, rate=origin_os16_block_rate)
506
+ # original output stride changes to 32 from here
507
+ x = _inverted_res_block(x, expansion=6, filters=depth(96), kernel_size=kernel,
508
+ stride=origin_os32_stride, se_ratio=se_ratio, activation=activation, block_id=8, skip_connection=False, rate=origin_os16_block_rate) # origin: stride=2!
509
+ x = _inverted_res_block(x, expansion=6, filters=depth(96), kernel_size=kernel,
510
+ stride=1, se_ratio=se_ratio, activation=activation, block_id=9, skip_connection=True, rate=origin_os32_block_rate)
511
+ x = _inverted_res_block(x, expansion=6, filters=depth(96), kernel_size=kernel,
512
+ stride=1, se_ratio=se_ratio, activation=activation, block_id=10, skip_connection=True, rate=origin_os32_block_rate)
513
+ return x, skip
514
+
515
+ return MobileNetV3(stack_fn,
516
+ 1024,
517
+ input_shape,
518
+ alpha,
519
+ 'small',
520
+ minimalistic,
521
+ include_top,
522
+ weights,
523
+ input_tensor,
524
+ classes,
525
+ pooling,
526
+ dropout_rate,
527
+ **kwargs)
528
+
529
+
530
+ def MobileNetV3Large(input_shape=None,
531
+ alpha=1.0,
532
+ OS=8,
533
+ minimalistic=False,
534
+ include_top=True,
535
+ weights='imagenet',
536
+ input_tensor=None,
537
+ classes=1000,
538
+ pooling=None,
539
+ dropout_rate=0.2,
540
+ **kwargs):
541
+ """
542
+ Modified MobileNetV3Large feature extractor body
543
+ with specified output stride and skip level feature
544
+ """
545
+ if OS == 8:
546
+ origin_os16_stride = 1
547
+ origin_os16_block_rate = 2
548
+ origin_os32_stride = 1
549
+ origin_os32_block_rate = 4
550
+ elif OS == 16:
551
+ origin_os16_stride = 2
552
+ origin_os16_block_rate = 1
553
+ origin_os32_stride = 1
554
+ origin_os32_block_rate = 2
555
+ elif OS == 32:
556
+ origin_os16_stride = 2
557
+ origin_os16_block_rate = 1
558
+ origin_os32_stride = 2
559
+ origin_os32_block_rate = 1
560
+ else:
561
+ raise ValueError('invalid output stride', OS)
562
+
563
+ def stack_fn(x, kernel, activation, se_ratio):
564
+ def depth(d):
565
+ return _depth(d * alpha)
566
+ x = _inverted_res_block(x, expansion=1, filters=depth(16), kernel_size=3,
567
+ stride=1, se_ratio=None, activation=relu, block_id=0, skip_connection=True)
568
+ x = _inverted_res_block(x, expansion=4, filters=depth(24), kernel_size=3,
569
+ stride=2, se_ratio=None, activation=relu, block_id=1, skip_connection=False)
570
+ x = _inverted_res_block(x, expansion=3, filters=depth(24), kernel_size=3,
571
+ stride=1, se_ratio=None, activation=relu, block_id=2, skip_connection=True)
572
+ # skip level feature, with output stride = 4
573
+ skip = x
574
+
575
+ x = _inverted_res_block(x, expansion=3, filters=depth(40), kernel_size=kernel,
576
+ stride=2, se_ratio=se_ratio, activation=relu, block_id=3, skip_connection=False)
577
+ x = _inverted_res_block(x, expansion=3, filters=depth(40), kernel_size=kernel,
578
+ stride=1, se_ratio=se_ratio, activation=relu, block_id=4, skip_connection=True)
579
+ x = _inverted_res_block(x, expansion=3, filters=depth(40), kernel_size=kernel,
580
+ stride=1, se_ratio=se_ratio, activation=relu, block_id=5, skip_connection=True)
581
+
582
+ # original output stride changes to 16 from here, so we start to control block stride and dilation rate
583
+ x = _inverted_res_block(x, expansion=6, filters=depth(80), kernel_size=3,
584
+ stride=origin_os16_stride, se_ratio=None, activation=activation, block_id=6, skip_connection=False) # origin: stride=2!
585
+ x = _inverted_res_block(x, expansion=2.5, filters=depth(80), kernel_size=3,
586
+ stride=1, se_ratio=None, activation=activation, block_id=7, skip_connection=True, rate=origin_os16_block_rate)
587
+ x = _inverted_res_block(x, expansion=2.3, filters=depth(80), kernel_size=3,
588
+ stride=1, se_ratio=None, activation=activation, block_id=8, skip_connection=True, rate=origin_os16_block_rate)
589
+ x = _inverted_res_block(x, expansion=2.3, filters=depth(80), kernel_size=3,
590
+ stride=1, se_ratio=None, activation=activation, block_id=9, skip_connection=True, rate=origin_os16_block_rate)
591
+ x = _inverted_res_block(x, expansion=6, filters=depth(112), kernel_size=3,
592
+ stride=1, se_ratio=se_ratio, activation=activation, block_id=10, skip_connection=False, rate=origin_os16_block_rate)
593
+ x = _inverted_res_block(x, expansion=6, filters=depth(112), kernel_size=3,
594
+ stride=1, se_ratio=se_ratio, activation=activation, block_id=11, skip_connection=True, rate=origin_os16_block_rate)
595
+ # original output stride changes to 32 from here
596
+ x = _inverted_res_block(x, expansion=6, filters=depth(160), kernel_size=kernel,
597
+ stride=origin_os32_stride, se_ratio=se_ratio,
598
+ activation=activation, block_id=12, skip_connection=False, rate=origin_os16_block_rate) # origin: stride=2!
599
+ x = _inverted_res_block(x, expansion=6, filters=depth(160), kernel_size=kernel,
600
+ stride=1, se_ratio=se_ratio,
601
+ activation=activation, block_id=13, skip_connection=True, rate=origin_os32_block_rate)
602
+ x = _inverted_res_block(x, expansion=6, filters=depth(160), kernel_size=kernel,
603
+ stride=1, se_ratio=se_ratio,
604
+ activation=activation, block_id=14, skip_connection=True, rate=origin_os32_block_rate)
605
+ return x, skip
606
+
607
+ return MobileNetV3(stack_fn,
608
+ 1280,
609
+ input_shape,
610
+ alpha,
611
+ 'large',
612
+ minimalistic,
613
+ include_top,
614
+ weights,
615
+ input_tensor,
616
+ classes,
617
+ pooling,
618
+ dropout_rate,
619
+ **kwargs)
620
+
621
+
622
+ setattr(MobileNetV3Small, '__doc__', MobileNetV3.__doc__)
623
+ setattr(MobileNetV3Large, '__doc__', MobileNetV3.__doc__)
624
+
625
+
626
+
627
+ def Deeplabv3pMobileNetV3Large(input_shape=(512, 512, 3),
628
+ alpha=1.0,
629
+ weights='imagenet',
630
+ input_tensor=None,
631
+ num_classes=21,
632
+ OS=8):
633
+ """ Instantiates the Deeplabv3+ MobileNetV3Large architecture
634
+ # Arguments
635
+ input_shape: shape of input image. format HxWxC
636
+ PASCAL VOC model was trained on (512,512,3) images
637
+ alpha: controls the width of the MobileNetV3Large network. This is known as the
638
+ width multiplier in the MobileNetV3Large paper.
639
+ - If `alpha` < 1.0, proportionally decreases the number
640
+ of filters in each layer.
641
+ - If `alpha` > 1.0, proportionally increases the number
642
+ of filters in each layer.
643
+ - If `alpha` = 1, default number of filters from the paper
644
+ are used at each layer.
645
+ weights: pretrained weights type
646
+ - imagenet: pre-trained on Imagenet
647
+ - None : random initialization
648
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
649
+ to use as image input for the model.
650
+ num_classes: number of desired classes.
651
+ OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
652
+
653
+ # Returns
654
+ A Keras model instance.
655
+ """
656
+
657
+ if not (weights in {'imagenet', None}):
658
+ raise ValueError('The `weights` argument should be either '
659
+ '`imagenet` (pre-trained on Imagenet) or '
660
+ '`None` (random initialization)')
661
+
662
+ if input_tensor is None:
663
+ img_input = Input(shape=input_shape, name='image_input')
664
+ else:
665
+ img_input = input_tensor
666
+
667
+ # normalize input image
668
+ img_norm = Lambda(normalize, name='input_normalize')(img_input)
669
+
670
+ # backbone body for feature extract
671
+ x, skip_feature, backbone_len = MobileNetV3Large(include_top=False, input_tensor=img_norm, weights=weights, OS=OS, alpha=alpha)
672
+
673
+ # ASPP block
674
+ x = ASPP_block(x, OS)
675
+
676
+ # Deeplabv3+ decoder for feature projection
677
+ x = Decoder_block(x, skip_feature)
678
+
679
+ # Final prediction conv block
680
+ x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
681
+ x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
682
+ x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
683
+ x = Softmax(name='Predictions/Softmax')(x)
684
+
685
+ # Ensure that the model takes into account
686
+ # any potential predecessors of `input_tensor`.
687
+ #if input_tensor is not None:
688
+ #inputs = get_source_inputs(input_tensor)
689
+ #else:
690
+ #inputs = img_input
691
+ model = Model(img_input, x, name='deeplabv3p_mobilenetv3large')
692
+
693
+ return model, backbone_len
694
+
695
+
696
+ def Deeplabv3pLiteMobileNetV3Large(input_shape=(512, 512, 3),
697
+ alpha=1.0,
698
+ weights='imagenet',
699
+ input_tensor=None,
700
+ num_classes=21,
701
+ OS=8):
702
+ """ Instantiates the Deeplabv3+ MobileNetV3LargeLite architecture
703
+ # Arguments
704
+ input_shape: shape of input image. format HxWxC
705
+ PASCAL VOC model was trained on (512,512,3) images
706
+ alpha: controls the width of the MobileNetV3Large network. This is known as the
707
+ width multiplier in the MobileNetV3Large paper.
708
+ - If `alpha` < 1.0, proportionally decreases the number
709
+ of filters in each layer.
710
+ - If `alpha` > 1.0, proportionally increases the number
711
+ of filters in each layer.
712
+ - If `alpha` = 1, default number of filters from the paper
713
+ are used at each layer.
714
+ weights: pretrained weights type
715
+ - imagenet: pre-trained on Imagenet
716
+ - None : random initialization
717
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
718
+ to use as image input for the model.
719
+ num_classes: number of desired classes.
720
+ OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
721
+
722
+ # Returns
723
+ A Keras model instance.
724
+ # Raises
725
+ RuntimeError: If attempting to run this model with a
726
+ backend that does not support separable convolutions.
727
+ ValueError: in case of invalid argument for `weights` or `backbone`
728
+ """
729
+ if not (weights in {'imagenet', None}):
730
+ raise ValueError('The `weights` argument should be either '
731
+ '`imagenet` (pre-trained on Imagenet) or '
732
+ '`None` (random initialization)')
733
+
734
+ if input_tensor is None:
735
+ img_input = Input(shape=input_shape, name='image_input')
736
+ else:
737
+ img_input = input_tensor
738
+
739
+ # normalize input image
740
+ img_norm = Lambda(normalize, name='input_normalize')(img_input)
741
+
742
+ # backbone body for feature extract
743
+ x, _, backbone_len = MobileNetV3Large(include_top=False, input_tensor=img_norm, weights=weights, OS=OS, alpha=alpha)
744
+
745
+ # use ASPP Lite block & no decode block
746
+ x = ASPP_Lite_block(x)
747
+
748
+ # Final prediction conv block
749
+ x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
750
+ x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
751
+ x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
752
+ x = Softmax(name='Predictions/Softmax')(x)
753
+
754
+ # Ensure that the model takes into account
755
+ # any potential predecessors of `input_tensor`.
756
+ #if input_tensor is not None:
757
+ #inputs = get_source_inputs(input_tensor)
758
+ #else:
759
+ #inputs = img_input
760
+ model = Model(img_input, x, name='deeplabv3p_mobilenetv3large_lite')
761
+
762
+ return model, backbone_len
763
+
764
+
765
+
766
+ def Deeplabv3pMobileNetV3Small(input_shape=(512, 512, 3),
767
+ alpha=1.0,
768
+ weights='imagenet',
769
+ input_tensor=None,
770
+ num_classes=21,
771
+ OS=8):
772
+ """ Instantiates the Deeplabv3+ MobileNetV3Small architecture
773
+ # Arguments
774
+ input_shape: shape of input image. format HxWxC
775
+ PASCAL VOC model was trained on (512,512,3) images
776
+ alpha: controls the width of the MobileNetV3Small network. This is known as the
777
+ width multiplier in the MobileNetV2 paper.
778
+ - If `alpha` < 1.0, proportionally decreases the number
779
+ of filters in each layer.
780
+ - If `alpha` > 1.0, proportionally increases the number
781
+ of filters in each layer.
782
+ - If `alpha` = 1, default number of filters from the paper
783
+ are used at each layer.
784
+ weights: pretrained weights type
785
+ - imagenet: pre-trained on Imagenet
786
+ - None : random initialization
787
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
788
+ to use as image input for the model.
789
+ num_classes: number of desired classes
790
+ OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
791
+
792
+ # Returns
793
+ A Keras model instance.
794
+ """
795
+ if not (weights in {'imagenet', None}):
796
+ raise ValueError('The `weights` argument should be either '
797
+ '`imagenet` (pre-trained on Imagenet) or '
798
+ '`None` (random initialization)')
799
+
800
+ if input_tensor is None:
801
+ img_input = Input(shape=input_shape, name='image_input')
802
+ else:
803
+ img_input = input_tensor
804
+
805
+ # normalize input image
806
+ img_norm = Lambda(normalize, name='input_normalize')(img_input)
807
+
808
+ # backbone body for feature extract
809
+ x, skip_feature, backbone_len = MobileNetV3Small(include_top=False, input_tensor=img_norm, weights=weights, OS=OS, alpha=alpha)
810
+
811
+ # ASPP block
812
+ x = ASPP_block(x, OS)
813
+
814
+ # Deeplabv3+ decoder for feature projection
815
+ x = Decoder_block(x, skip_feature)
816
+
817
+ # Final prediction conv block
818
+ x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
819
+ x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
820
+ x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
821
+ x = Softmax(name='Predictions/Softmax')(x)
822
+
823
+ # Ensure that the model takes into account
824
+ # any potential predecessors of `input_tensor`.
825
+ #if input_tensor is not None:
826
+ #inputs = get_source_inputs(input_tensor)
827
+ #else:
828
+ #inputs = img_input
829
+ model = Model(img_input, x, name='deeplabv3p_mobilenetv3small')
830
+
831
+ return model, backbone_len
832
+
833
+
834
+
835
+ def Deeplabv3pLiteMobileNetV3Small(input_shape=(512, 512, 3),
836
+ alpha=1.0,
837
+ weights='imagenet',
838
+ input_tensor=None,
839
+ num_classes=21,
840
+ OS=8):
841
+ """ Instantiates the Deeplabv3+ MobileNetV3SmallLite architecture
842
+ # Arguments
843
+ input_shape: shape of input image. format HxWxC
844
+ PASCAL VOC model was trained on (512,512,3) images
845
+ alpha: controls the width of the MobileNetV3Small network. This is known as the
846
+ width multiplier in the MobileNetV3Small paper.
847
+ - If `alpha` < 1.0, proportionally decreases the number
848
+ of filters in each layer.
849
+ - If `alpha` > 1.0, proportionally increases the number
850
+ of filters in each layer.
851
+ - If `alpha` = 1, default number of filters from the paper
852
+ are used at each layer.
853
+ weights: pretrained weights type
854
+ - imagenet: pre-trained on Imagenet
855
+ - None : random initialization
856
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
857
+ to use as image input for the model.
858
+ num_classes: number of desired classes.
859
+ OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
860
+
861
+ # Returns
862
+ A Keras model instance.
863
+ # Raises
864
+ RuntimeError: If attempting to run this model with a
865
+ backend that does not support separable convolutions.
866
+ ValueError: in case of invalid argument for `weights` or `backbone`
867
+ """
868
+ if not (weights in {'imagenet', None}):
869
+ raise ValueError('The `weights` argument should be either '
870
+ '`imagenet` (pre-trained on Imagenet) or '
871
+ '`None` (random initialization)')
872
+
873
+ if input_tensor is None:
874
+ img_input = Input(shape=input_shape, name='image_input')
875
+ else:
876
+ img_input = input_tensor
877
+
878
+ # normalize input image
879
+ img_norm = Lambda(normalize, name='input_normalize')(img_input)
880
+
881
+ # backbone body for feature extract
882
+ x, _, backbone_len = MobileNetV3Small(include_top=False, input_tensor=img_norm, weights=weights, OS=OS, alpha=alpha)
883
+
884
+ # use ASPP Lite block & no decode block
885
+ x = ASPP_Lite_block(x)
886
+
887
+ # Final prediction conv block
888
+ x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
889
+ x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
890
+ x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
891
+ x = Softmax(name='Predictions/Softmax')(x)
892
+
893
+ # Ensure that the model takes into account
894
+ # any potential predecessors of `input_tensor`.
895
+ #if input_tensor is not None:
896
+ #inputs = get_source_inputs(input_tensor)
897
+ #else:
898
+ #inputs = img_input
899
+ model = Model(img_input, x, name='deeplabv3p_mobilenetv3small_lite')
900
+
901
+ return model, backbone_len
902
+
903
+
904
+
905
+ if __name__ == '__main__':
906
+ input_tensor = Input(shape=(512, 512, 3), name='image_input')
907
+ model, backbone_len = Deeplabv3pMobileNetV3Small(input_tensor=input_tensor,
908
+ alpha=1.0,
909
+ weights=None,
910
+ num_classes=21,
911
+ OS=8)
912
+ model.summary()
models/deeplab/deeplabv3p/models/deeplabv3p_peleenet.py ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """ Deeplabv3+ PeleeNet model for Keras.
4
+
5
+ # Reference Paper:
6
+ - [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993.pdf)
7
+ - [Pelee: A Real-Time Object Detection System on Mobile Devices](https://arxiv.org/abs/1804.06882)
8
+ """
9
+ import os, sys
10
+ import warnings
11
+
12
+ from keras_applications.imagenet_utils import _obtain_input_shape
13
+ from keras_applications.imagenet_utils import preprocess_input as _preprocess_input
14
+ from tensorflow.keras.utils import get_source_inputs, get_file
15
+ from tensorflow.keras.models import Model
16
+ from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, ReLU, \
17
+ MaxPooling2D, Concatenate, AveragePooling2D, Flatten, Dropout, Dense, GlobalAveragePooling2D, GlobalMaxPooling2D, Softmax, Reshape, Lambda
18
+ from tensorflow.keras import backend as K
19
+
20
+ sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..'))
21
+ from deeplabv3p.models.layers import DeeplabConv2D, CustomBatchNormalization, ASPP_block, ASPP_Lite_block, Decoder_block, normalize, img_resize
22
+
23
+
24
+ BASE_WEIGHT_PATH = (
25
+ 'https://github.com/david8862/tf-keras-image-classifier/'
26
+ 'releases/download/v1.0.0/')
27
+
28
+
29
+ def preprocess_input(x):
30
+ """
31
+ "mode" option description in preprocess_input
32
+ mode: One of "caffe", "tf" or "torch".
33
+ - caffe: will convert the images from RGB to BGR,
34
+ then will zero-center each color channel with
35
+ respect to the ImageNet dataset,
36
+ without scaling.
37
+ - tf: will scale pixels between -1 and 1,
38
+ sample-wise.
39
+ - torch: will scale pixels between 0 and 1 and then
40
+ will normalize each channel with respect to the
41
+ ImageNet dataset.
42
+ """
43
+ #x = _preprocess_input(x, mode='tf', backend=K)
44
+ x /= 255.
45
+ mean = [0.485, 0.456, 0.406]
46
+ std = [0.229, 0.224, 0.225]
47
+
48
+ x[..., 0] -= mean[0]
49
+ x[..., 1] -= mean[1]
50
+ x[..., 2] -= mean[2]
51
+ if std is not None:
52
+ x[..., 0] /= std[0]
53
+ x[..., 1] /= std[1]
54
+ x[..., 2] /= std[2]
55
+
56
+ return x
57
+
58
+
59
+ def dense_graph(x, growth_rate, bottleneck_width, name=''):
60
+ growth_rate = int(growth_rate / 2)
61
+ inter_channel = int(growth_rate * bottleneck_width / 4) * 4
62
+
63
+ num_input_features = K.int_shape(x)[-1]
64
+
65
+ if inter_channel > num_input_features / 2:
66
+ inter_channel = int(num_input_features / 8) * 4
67
+ print('adjust inter_channel to ', inter_channel)
68
+
69
+ branch1 = basic_conv2d_graph(
70
+ x, inter_channel, kernel_size=1, strides=1, padding='valid', name=name + '_branch1a')
71
+ branch1 = basic_conv2d_graph(
72
+ branch1, growth_rate, kernel_size=3, strides=1, padding='same', name=name + '_branch1b')
73
+
74
+ branch2 = basic_conv2d_graph(
75
+ x, inter_channel, kernel_size=1, strides=1, padding='valid', name=name + '_branch2a')
76
+ branch2 = basic_conv2d_graph(
77
+ branch2, growth_rate, kernel_size=3, strides=1, padding='same', name=name + '_branch2b')
78
+ branch2 = basic_conv2d_graph(
79
+ branch2, growth_rate, kernel_size=3, strides=1, padding='same', name=name + '_branch2c')
80
+
81
+ out = Concatenate(axis=-1)([x, branch1, branch2])
82
+
83
+ return out
84
+
85
+
86
+ def dense_block_graph(x, num_layers, bn_size, growth_rate, name=''):
87
+ for i in range(num_layers):
88
+ x = dense_graph(x, growth_rate, bn_size, name=name + '_denselayer{}'.format(i + 1))
89
+
90
+ return x
91
+
92
+
93
+ def stem_block_graph(x, num_init_features, name=''):
94
+ num_stem_features = int(num_init_features / 2)
95
+
96
+ out = basic_conv2d_graph(x, num_init_features, kernel_size=3, strides=2, padding='same', name=name + '_stem1')
97
+
98
+ branch2 = basic_conv2d_graph(
99
+ out, num_stem_features, kernel_size=1, strides=1, padding='valid', name=name + '_stem2a')
100
+ branch2 = basic_conv2d_graph(
101
+ branch2, num_init_features, kernel_size=3, strides=2, padding='same', name=name + '_stem2b')
102
+
103
+ branch1 = MaxPooling2D(pool_size=2, strides=2)(out)
104
+
105
+ out = Concatenate(axis=-1)([branch1, branch2])
106
+
107
+ out = basic_conv2d_graph(out, num_init_features, kernel_size=1, strides=1, padding='valid', name=name + '_stem3')
108
+
109
+ return out
110
+
111
+
112
+ def basic_conv2d_graph(x, out_channels, kernel_size, strides, padding, activation=True, name=''):
113
+ x = DeeplabConv2D(
114
+ out_channels, kernel_size=kernel_size, strides=strides,
115
+ padding=padding, use_bias=False, name=name + '_conv')(x)
116
+ x = CustomBatchNormalization(name=name + '_norm')(x)
117
+ if activation:
118
+ x = ReLU()(x)
119
+
120
+ return x
121
+
122
+
123
+ def PeleeNet(input_shape=None,
124
+ OS=8,
125
+ growth_rate=32,
126
+ block_config=[3, 4, 8, 6],
127
+ num_init_features=32,
128
+ bottleneck_width=[1, 2, 4, 4],
129
+ include_top=True,
130
+ weights='imagenet',
131
+ input_tensor=None,
132
+ pooling=None,
133
+ dropout_rate=0.05,
134
+ classes=1000,
135
+ **kwargs):
136
+ """Instantiates the PeleeNet architecture.
137
+
138
+ # Arguments
139
+ input_shape: optional shape tuple, to be specified if you would
140
+ like to use a model with an input img resolution that is not
141
+ (224, 224, 3).
142
+ It should have exactly 3 inputs channels (224, 224, 3).
143
+ You can also omit this option if you would like
144
+ to infer input_shape from an input_tensor.
145
+ If you choose to include both input_tensor and input_shape then
146
+ input_shape will be used if they match, if the shapes
147
+ do not match then we will throw an error.
148
+ E.g. `(160, 160, 3)` would be one valid value.
149
+ include_top: whether to include the fully-connected
150
+ layer at the top of the network.
151
+ weights: one of `None` (random initialization),
152
+ 'imagenet' (pre-training on ImageNet),
153
+ or the path to the weights file to be loaded.
154
+ input_tensor: optional Keras tensor (i.e. output of
155
+ `layers.Input()`)
156
+ to use as image input for the model.
157
+ pooling: Optional pooling mode for feature extraction
158
+ when `include_top` is `False`.
159
+ - `None` means that the output of the model
160
+ will be the 4D tensor output of the
161
+ last convolutional block.
162
+ - `avg` means that global average pooling
163
+ will be applied to the output of the
164
+ last convolutional block, and thus
165
+ the output of the model will be a
166
+ 2D tensor.
167
+ - `max` means that global max pooling will
168
+ be applied.
169
+ classes: optional number of classes to classify images
170
+ into, only to be specified if `include_top` is True, and
171
+ if no `weights` argument is specified.
172
+
173
+ # Returns
174
+ A Keras model instance.
175
+
176
+ # Raises
177
+ ValueError: in case of invalid argument for `weights`,
178
+ or invalid input shape or invalid alpha, rows when
179
+ weights='imagenet'
180
+ """
181
+
182
+ if not (weights in {'imagenet', None} or os.path.exists(weights)):
183
+ raise ValueError('The `weights` argument should be either '
184
+ '`None` (random initialization), `imagenet` '
185
+ '(pre-training on ImageNet), '
186
+ 'or the path to the weights file to be loaded.')
187
+
188
+ if weights == 'imagenet' and include_top and classes != 1000:
189
+ raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
190
+ 'as true, `classes` should be 1000')
191
+
192
+ input_shape = _obtain_input_shape(input_shape,
193
+ default_size=224,
194
+ min_size=32,
195
+ data_format=K.image_data_format(),
196
+ require_flatten=include_top,
197
+ weights=weights)
198
+
199
+ # If input_shape is None and input_tensor is None using standard shape
200
+ if input_shape is None and input_tensor is None:
201
+ input_shape = (None, None, 3)
202
+
203
+ if input_tensor is None:
204
+ img_input = Input(shape=input_shape)
205
+ else:
206
+ #if not K.is_keras_tensor(input_tensor):
207
+ #img_input = Input(tensor=input_tensor, shape=input_shape)
208
+ #else:
209
+ #img_input = input_tensor
210
+ img_input = input_tensor
211
+
212
+ if type(growth_rate) is list:
213
+ growth_rates = growth_rate
214
+ assert len(growth_rates) == 4, 'The growth rate must be the list and the size must be 4'
215
+ else:
216
+ growth_rates = [growth_rate] * 4
217
+
218
+ if type(bottleneck_width) is list:
219
+ bottleneck_widths = bottleneck_width
220
+ assert len(bottleneck_widths) == 4, 'The bottleneck width must be the list and the size must be 4'
221
+ else:
222
+ bottleneck_widths = [bottleneck_width] * 4
223
+
224
+ features = stem_block_graph(img_input, num_init_features, name='bbn_features_stemblock')
225
+ num_features = num_init_features
226
+ for i, num_layers in enumerate(block_config):
227
+ features = dense_block_graph(
228
+ features, num_layers=num_layers, bn_size=bottleneck_widths[i],
229
+ growth_rate=growth_rates[i], name='bbn_features_denseblock{}'.format(i + 1))
230
+
231
+ num_features = num_features + num_layers * growth_rates[i]
232
+ features = basic_conv2d_graph(
233
+ features, num_features, kernel_size=1, strides=1,
234
+ padding='valid', name='bbn_features_transition{}'.format(i + 1))
235
+
236
+ #if i != len(block_config) - 1:
237
+ #features = AveragePooling2D(pool_size=2, strides=2)(features)
238
+
239
+ # skip level feature, with output stride = 4
240
+ if i == 0:
241
+ skip = features
242
+
243
+ # apply stride pooling according to OS
244
+ if OS == 8 and i < 1:
245
+ features = AveragePooling2D(pool_size=2, strides=2)(features)
246
+ elif OS == 16 and i < 2:
247
+ features = AveragePooling2D(pool_size=2, strides=2)(features)
248
+ elif OS == 32 and i != len(block_config) - 1:
249
+ features = AveragePooling2D(pool_size=2, strides=2)(features)
250
+
251
+ features_shape = K.int_shape(features)
252
+
253
+ if include_top:
254
+ x = GlobalAveragePooling2D()(features)
255
+ if dropout_rate > 0:
256
+ x = Dropout(dropout_rate)(x)
257
+ x = Dense(classes, activation='softmax',
258
+ use_bias=True, name='Logits')(x)
259
+ else:
260
+ if pooling == 'avg':
261
+ x = GlobalAveragePooling2D()(features)
262
+ elif pooling == 'max':
263
+ x = GlobalMaxPooling2D()(features)
264
+ else:
265
+ x = features
266
+
267
+ # Ensure that the model takes into account
268
+ # any potential predecessors of `input_tensor`.
269
+ if input_tensor is not None:
270
+ inputs = get_source_inputs(input_tensor)
271
+ else:
272
+ inputs = img_input
273
+
274
+ # Create model.
275
+ model = Model(inputs, x, name='peleenet')
276
+
277
+ # Load weights.
278
+ if weights == 'imagenet':
279
+ if include_top:
280
+ file_name = 'peleenet_weights_tf_dim_ordering_tf_kernels_224.h5'
281
+ weight_path = BASE_WEIGHT_PATH + file_name
282
+ else:
283
+ file_name = 'peleenet_weights_tf_dim_ordering_tf_kernels_224_no_top.h5'
284
+ weight_path = BASE_WEIGHT_PATH + file_name
285
+
286
+ weights_path = get_file(file_name, weight_path, cache_subdir='models')
287
+ model.load_weights(weights_path)
288
+ elif weights is not None:
289
+ model.load_weights(weights)
290
+
291
+ backbone_len = len(model.layers)
292
+ # need to return feature map and skip connection,
293
+ # not the whole "no top" model
294
+ return x, skip, backbone_len
295
+ #return model
296
+
297
+
298
+ def Deeplabv3pPeleeNet(input_shape=(512, 512, 3),
299
+ weights='imagenet',
300
+ input_tensor=None,
301
+ num_classes=21,
302
+ OS=8):
303
+ """ Instantiates the Deeplabv3+ PeleeNet architecture
304
+ # Arguments
305
+ input_shape: shape of input image. format HxWxC
306
+ PASCAL VOC model was trained on (512,512,3) images
307
+ weights: pretrained weights type
308
+ - imagenet: pre-trained on Imagenet
309
+ - None : random initialization
310
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
311
+ to use as image input for the model.
312
+ num_classes: number of desired classes.
313
+ OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
314
+
315
+ # Returns
316
+ A Keras model instance.
317
+ """
318
+
319
+ if not (weights in {'imagenet', None}):
320
+ raise ValueError('The `weights` argument should be either '
321
+ '`imagenet` (pre-trained on Imagenet) or '
322
+ '`None` (random initialization)')
323
+ if input_tensor is None:
324
+ img_input = Input(shape=input_shape, name='image_input')
325
+ else:
326
+ img_input = input_tensor
327
+
328
+ # normalize input image
329
+ img_norm = Lambda(normalize, name='input_normalize')(img_input)
330
+
331
+ # backbone body for feature extract
332
+ x, skip_feature, backbone_len = PeleeNet(include_top=False, pooling=None, input_tensor=img_norm, weights=weights, OS=OS)
333
+
334
+ # ASPP block
335
+ x = ASPP_block(x, OS)
336
+
337
+ # Deeplabv3+ decoder for feature projection
338
+ x = Decoder_block(x, skip_feature)
339
+
340
+ # Final prediction conv block
341
+ x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
342
+ x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
343
+ x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
344
+ x = Softmax(name='Predictions/Softmax')(x)
345
+
346
+
347
+ # Ensure that the model takes into account
348
+ # any potential predecessors of `input_tensor`.
349
+ #if input_tensor is not None:
350
+ #inputs = get_source_inputs(input_tensor)
351
+ #else:
352
+ #inputs = img_input
353
+
354
+ model = Model(img_input, x, name='deeplabv3p_peleenet')
355
+
356
+ return model, backbone_len
357
+
358
+
359
+ def Deeplabv3pLitePeleeNet(input_shape=(512, 512, 3),
360
+ weights='imagenet',
361
+ input_tensor=None,
362
+ num_classes=21,
363
+ OS=8):
364
+ """ Instantiates the Deeplabv3+ MobileNetV2Lite architecture
365
+ # Arguments
366
+ input_shape: shape of input image. format HxWxC
367
+ PASCAL VOC model was trained on (512,512,3) images
368
+ weights: pretrained weights type
369
+ - imagenet: pre-trained on Imagenet
370
+ - None : random initialization
371
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
372
+ to use as image input for the model.
373
+ num_classes: number of desired classes.
374
+ OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
375
+
376
+ # Returns
377
+ A Keras model instance.
378
+ # Raises
379
+ RuntimeError: If attempting to run this model with a
380
+ backend that does not support separable convolutions.
381
+ ValueError: in case of invalid argument for `weights` or `backbone`
382
+ """
383
+
384
+ if not (weights in {'imagenet', None}):
385
+ raise ValueError('The `weights` argument should be either '
386
+ '`imagenet` (pre-trained on Imagenet) or '
387
+ '`None` (random initialization)')
388
+
389
+ if input_tensor is None:
390
+ img_input = Input(shape=input_shape, name='image_input')
391
+ else:
392
+ img_input = input_tensor
393
+
394
+ # normalize input image
395
+ img_norm = Lambda(normalize, name='input_normalize')(img_input)
396
+
397
+ # backbone body for feature extract
398
+ x, _, backbone_len = PeleeNet(include_top=False, pooling=None, input_tensor=img_norm, weights=weights, OS=OS)
399
+
400
+ # use ASPP Lite block & no decode block
401
+ x = ASPP_Lite_block(x)
402
+
403
+ # Final prediction conv block
404
+ x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
405
+ x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
406
+ x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
407
+ x = Softmax(name='Predictions/Softmax')(x)
408
+
409
+
410
+ # Ensure that the model takes into account
411
+ # any potential predecessors of `input_tensor`.
412
+ #if input_tensor is not None:
413
+ #inputs = get_source_inputs(input_tensor)
414
+ #else:
415
+ #inputs = img_input
416
+
417
+ model = Model(img_input, x, name='deeplabv3p_peleenet_lite')
418
+
419
+ return model, backbone_len
420
+
421
+
422
+ if __name__ == '__main__':
423
+ input_tensor = Input(shape=(512, 512, 3), name='image_input')
424
+ model, backbone_len = Deeplabv3pLitePeleeNet(input_tensor=input_tensor,
425
+ weights=None,
426
+ num_classes=21,
427
+ OS=8)
428
+ model.summary()
models/deeplab/deeplabv3p/models/deeplabv3p_resnet50.py ADDED
@@ -0,0 +1,408 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """ Deeplabv3+ ResNet50 model for Keras.
4
+
5
+ # Reference:
6
+ - [Encoder-Decoder with Atrous Separable Convolution
7
+ for Semantic Image Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
8
+ - [Deep Residual Learning for Image Recognition](
9
+ https://arxiv.org/abs/1512.03385) (CVPR 2016 Best Paper Award)
10
+ """
11
+ import os
12
+ import warnings
13
+ from keras_applications.imagenet_utils import _obtain_input_shape
14
+ from tensorflow.keras.models import Model
15
+ from tensorflow.keras.activations import relu
16
+ from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, ZeroPadding2D, Lambda, AveragePooling2D, MaxPooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D, Input, Dense, Concatenate, add, Reshape, BatchNormalization, Dropout, ReLU, Softmax
17
+ from tensorflow.keras.utils import get_source_inputs, get_file
18
+ from tensorflow.keras import backend as K
19
+
20
+ from deeplabv3p.models.layers import DeeplabConv2D, DeeplabDepthwiseConv2D, CustomBatchNormalization, ASPP_block, ASPP_Lite_block, Decoder_block, normalize, img_resize
21
+
22
+
23
+ WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/'
24
+ 'releases/download/v0.2/'
25
+ 'resnet50_weights_tf_dim_ordering_tf_kernels.h5')
26
+ WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/'
27
+ 'releases/download/v0.2/'
28
+ 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5')
29
+
30
+
31
+ def identity_block(input_tensor, kernel_size, filters, stage, block, rate=1):
32
+ """The identity block is the block that has no conv layer at shortcut.
33
+
34
+ # Arguments
35
+ input_tensor: input tensor
36
+ kernel_size: default 3, the kernel size of
37
+ middle conv layer at main path
38
+ filters: list of integers, the filters of 3 conv layer at main path
39
+ stage: integer, current stage label, used for generating layer names
40
+ block: 'a','b'..., current block label, used for generating layer names
41
+
42
+ # Returns
43
+ Output tensor for the block.
44
+ """
45
+ filters1, filters2, filters3 = filters
46
+ if K.image_data_format() == 'channels_last':
47
+ bn_axis = 3
48
+ else:
49
+ bn_axis = 1
50
+ conv_name_base = 'res' + str(stage) + block + '_branch'
51
+ bn_name_base = 'bn' + str(stage) + block + '_branch'
52
+
53
+ x = DeeplabConv2D(filters1, (1, 1),
54
+ kernel_initializer='he_normal',
55
+ dilation_rate=(rate, rate),
56
+ name=conv_name_base + '2a')(input_tensor)
57
+ x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
58
+ x = ReLU()(x)
59
+
60
+ x = DeeplabConv2D(filters2, kernel_size,
61
+ padding='same',
62
+ kernel_initializer='he_normal',
63
+ dilation_rate=(rate, rate),
64
+ name=conv_name_base + '2b')(x)
65
+ x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
66
+ x = ReLU()(x)
67
+
68
+ x = DeeplabConv2D(filters3, (1, 1),
69
+ kernel_initializer='he_normal',
70
+ dilation_rate=(rate, rate),
71
+ name=conv_name_base + '2c')(x)
72
+ x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
73
+
74
+ x = add([x, input_tensor])
75
+ x = ReLU()(x)
76
+ return x
77
+
78
+
79
+ def conv_block(input_tensor,
80
+ kernel_size,
81
+ filters,
82
+ stage,
83
+ block,
84
+ strides=(2, 2),
85
+ rate=1):
86
+ """A block that has a conv layer at shortcut.
87
+
88
+ # Arguments
89
+ input_tensor: input tensor
90
+ kernel_size: default 3, the kernel size of
91
+ middle conv layer at main path
92
+ filters: list of integers, the filters of 3 conv layer at main path
93
+ stage: integer, current stage label, used for generating layer names
94
+ block: 'a','b'..., current block label, used for generating layer names
95
+ strides: Strides for the first conv layer in the block.
96
+
97
+ # Returns
98
+ Output tensor for the block.
99
+
100
+ Note that from stage 3,
101
+ the first conv layer at main path is with strides=(2, 2)
102
+ And the shortcut should have strides=(2, 2) as well
103
+ """
104
+ filters1, filters2, filters3 = filters
105
+ if K.image_data_format() == 'channels_last':
106
+ bn_axis = 3
107
+ else:
108
+ bn_axis = 1
109
+ conv_name_base = 'res' + str(stage) + block + '_branch'
110
+ bn_name_base = 'bn' + str(stage) + block + '_branch'
111
+
112
+ x = DeeplabConv2D(filters1, (1, 1), strides=strides,
113
+ kernel_initializer='he_normal',
114
+ dilation_rate=(rate, rate),
115
+ name=conv_name_base + '2a')(input_tensor)
116
+ x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
117
+ x = ReLU()(x)
118
+
119
+ x = DeeplabConv2D(filters2, kernel_size, padding='same',
120
+ kernel_initializer='he_normal',
121
+ dilation_rate=(rate, rate),
122
+ name=conv_name_base + '2b')(x)
123
+ x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
124
+ x = ReLU()(x)
125
+
126
+ x = DeeplabConv2D(filters3, (1, 1),
127
+ kernel_initializer='he_normal',
128
+ dilation_rate=(rate, rate),
129
+ name=conv_name_base + '2c')(x)
130
+ x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
131
+
132
+ shortcut = DeeplabConv2D(filters3, (1, 1), strides=strides,
133
+ kernel_initializer='he_normal',
134
+ dilation_rate=(rate, rate),
135
+ name=conv_name_base + '1')(input_tensor)
136
+ shortcut = CustomBatchNormalization(
137
+ axis=bn_axis, name=bn_name_base + '1')(shortcut)
138
+
139
+ x = add([x, shortcut])
140
+ x = ReLU()(x)
141
+ return x
142
+
143
+
144
+ def ResNet50(include_top=True,
145
+ OS=8,
146
+ weights='imagenet',
147
+ input_tensor=None,
148
+ input_shape=None,
149
+ pooling=None,
150
+ classes=1000,
151
+ **kwargs):
152
+ """Instantiates the ResNet50 architecture.
153
+
154
+ Optionally loads weights pre-trained on ImageNet.
155
+ Note that the data format convention used by the model is
156
+ the one specified in your Keras config at `~/.keras/keras.json`.
157
+
158
+ # Arguments
159
+ include_top: whether to include the fully-connected
160
+ layer at the top of the network.
161
+ weights: one of `None` (random initialization),
162
+ 'imagenet' (pre-training on ImageNet),
163
+ or the path to the weights file to be loaded.
164
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
165
+ to use as image input for the model.
166
+ input_shape: optional shape tuple, only to be specified
167
+ if `include_top` is False (otherwise the input shape
168
+ has to be `(224, 224, 3)` (with `channels_last` data format)
169
+ or `(3, 224, 224)` (with `channels_first` data format).
170
+ It should have exactly 3 inputs channels,
171
+ and width and height should be no smaller than 32.
172
+ E.g. `(200, 200, 3)` would be one valid value.
173
+ pooling: Optional pooling mode for feature extraction
174
+ when `include_top` is `False`.
175
+ - `None` means that the output of the model will be
176
+ the 4D tensor output of the
177
+ last convolutional block.
178
+ - `avg` means that global average pooling
179
+ will be applied to the output of the
180
+ last convolutional block, and thus
181
+ the output of the model will be a 2D tensor.
182
+ - `max` means that global max pooling will
183
+ be applied.
184
+ classes: optional number of classes to classify images
185
+ into, only to be specified if `include_top` is True, and
186
+ if no `weights` argument is specified.
187
+
188
+ # Returns
189
+ A Keras model instance.
190
+
191
+ # Raises
192
+ ValueError: in case of invalid argument for `weights`,
193
+ or invalid input shape.
194
+ """
195
+
196
+ """
197
+ Modified ResNet50 feature extractor body
198
+ with specified output stride and skip level feature
199
+ """
200
+ if OS == 8:
201
+ origin_os16_stride = (1, 1)
202
+ origin_os16_block_rate = 2
203
+ origin_os32_stride = (1, 1)
204
+ origin_os32_block_rate = 4
205
+ elif OS == 16:
206
+ origin_os16_stride = (2, 2)
207
+ origin_os16_block_rate = 1
208
+ origin_os32_stride = (1, 1)
209
+ origin_os32_block_rate = 2
210
+ elif OS == 32:
211
+ origin_os16_stride = (2, 2)
212
+ origin_os16_block_rate = 1
213
+ origin_os32_stride = (2, 2)
214
+ origin_os32_block_rate = 1
215
+ else:
216
+ raise ValueError('invalid output stride', OS)
217
+
218
+ if not (weights in {'imagenet', None} or os.path.exists(weights)):
219
+ raise ValueError('The `weights` argument should be either '
220
+ '`None` (random initialization), `imagenet` '
221
+ '(pre-training on ImageNet), '
222
+ 'or the path to the weights file to be loaded.')
223
+
224
+ if weights == 'imagenet' and include_top and classes != 1000:
225
+ raise ValueError('If using `weights` as `"imagenet"` with `include_top`'
226
+ ' as true, `classes` should be 1000')
227
+
228
+ # Determine proper input shape
229
+ input_shape = _obtain_input_shape(input_shape,
230
+ default_size=224,
231
+ min_size=32,
232
+ data_format=K.image_data_format(),
233
+ require_flatten=include_top,
234
+ weights=weights)
235
+
236
+ if input_tensor is None:
237
+ img_input = Input(shape=input_shape)
238
+ else:
239
+ #if not backend.is_keras_tensor(input_tensor):
240
+ #img_input = Input(tensor=input_tensor, shape=input_shape)
241
+ #else:
242
+ #img_input = input_tensor
243
+ img_input = input_tensor
244
+
245
+ if K.image_data_format() == 'channels_last':
246
+ bn_axis = 3
247
+ else:
248
+ bn_axis = 1
249
+
250
+ x = ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
251
+ x = DeeplabConv2D(64, (7, 7),
252
+ strides=(2, 2),
253
+ padding='valid',
254
+ kernel_initializer='he_normal',
255
+ name='conv1')(x)
256
+ x = CustomBatchNormalization(axis=bn_axis, name='bn_conv1')(x)
257
+ x = ReLU()(x)
258
+ x = ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
259
+ x = MaxPooling2D((3, 3), strides=(2, 2))(x)
260
+ x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
261
+ x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
262
+ x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
263
+ # skip level feature, with output stride = 4
264
+ skip = x
265
+
266
+ x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
267
+ x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
268
+ x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
269
+ x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
270
+
271
+ # original output stride changes to 16 from here, so we start to control block stride and dilation rate
272
+ x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', strides=origin_os16_stride) # origin: stride=(2, 2)
273
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', rate=origin_os16_block_rate)
274
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', rate=origin_os16_block_rate)
275
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', rate=origin_os16_block_rate)
276
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', rate=origin_os16_block_rate)
277
+ x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', rate=origin_os16_block_rate)
278
+
279
+ # original output stride changes to 32 from here
280
+ x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', strides=origin_os32_stride, rate=origin_os16_block_rate) # origin: stride=(2, 2)
281
+ x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', rate=origin_os32_block_rate)
282
+ x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', rate=origin_os32_block_rate)
283
+
284
+ if include_top:
285
+ x = GlobalAveragePooling2D(name='avg_pool')(x)
286
+ x = Dense(classes, activation='softmax', name='fc1000')(x)
287
+ else:
288
+ if pooling == 'avg':
289
+ x = GlobalAveragePooling2D()(x)
290
+ elif pooling == 'max':
291
+ x = GlobalMaxPooling2D()(x)
292
+ else:
293
+ warnings.warn('The output shape of `ResNet50(include_top=False)` '
294
+ 'has been changed since Keras 2.2.0.')
295
+
296
+ # Ensure that the model takes into account
297
+ # any potential predecessors of `input_tensor`.
298
+ if input_tensor is not None:
299
+ inputs = get_source_inputs(input_tensor)
300
+ else:
301
+ inputs = img_input
302
+ # Create model.
303
+ model = Model(inputs, x, name='resnet50')
304
+
305
+ # Load weights.
306
+ if weights == 'imagenet':
307
+ if include_top:
308
+ weights_path = get_file(
309
+ 'resnet50_weights_tf_dim_ordering_tf_kernels.h5',
310
+ WEIGHTS_PATH,
311
+ cache_subdir='models',
312
+ md5_hash='a7b3fe01876f51b976af0dea6bc144eb')
313
+ else:
314
+ weights_path = get_file(
315
+ 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
316
+ WEIGHTS_PATH_NO_TOP,
317
+ cache_subdir='models',
318
+ md5_hash='a268eb855778b3df3c7506639542a6af')
319
+ model.load_weights(weights_path)
320
+ elif weights is not None:
321
+ model.load_weights(weights)
322
+
323
+ backbone_len = len(model.layers)
324
+ # need to return feature map and skip connection,
325
+ # not the whole "no top" model
326
+ return x, skip, backbone_len
327
+ #return model
328
+
329
+
330
+
331
+ def Deeplabv3pResNet50(input_shape=(512, 512, 3),
332
+ weights='imagenet',
333
+ input_tensor=None,
334
+ num_classes=21,
335
+ OS=8):
336
+ """ Instantiates the Deeplabv3+ MobileNetV3Large architecture
337
+ # Arguments
338
+ input_shape: shape of input image. format HxWxC
339
+ PASCAL VOC model was trained on (512,512,3) images
340
+ weights: pretrained weights type
341
+ - imagenet: pre-trained on Imagenet
342
+ - None : random initialization
343
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
344
+ to use as image input for the model.
345
+ num_classes: number of desired classes.
346
+ OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
347
+
348
+ # Returns
349
+ A Keras model instance.
350
+ """
351
+ if not (weights in {'imagenet', None}):
352
+ raise ValueError('The `weights` argument should be either '
353
+ '`imagenet` (pre-trained on Imagenet) or '
354
+ '`None` (random initialization)')
355
+
356
+ if input_tensor is None:
357
+ img_input = Input(shape=input_shape, name='image_input')
358
+ else:
359
+ img_input = input_tensor
360
+
361
+ # normalize input image
362
+ img_norm = Lambda(normalize, name='input_normalize')(img_input)
363
+
364
+ # backbone body for feature extract
365
+ x, skip_feature, backbone_len = ResNet50(include_top=False, input_tensor=img_norm, weights=weights, OS=OS)
366
+
367
+ # ASPP block
368
+ x = ASPP_block(x, OS)
369
+
370
+ # Deeplabv3+ decoder for feature projection
371
+ x = Decoder_block(x, skip_feature)
372
+
373
+ # Final prediction conv block
374
+ x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
375
+ x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
376
+ x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
377
+ x = Softmax(name='Predictions/Softmax')(x)
378
+
379
+ # Ensure that the model takes into account
380
+ # any potential predecessors of `input_tensor`.
381
+ #if input_tensor is not None:
382
+ #inputs = get_source_inputs(input_tensor)
383
+ #else:
384
+ #inputs = img_input
385
+ model = Model(img_input, x, name='deeplabv3p_resnet50')
386
+
387
+ return model, backbone_len
388
+
389
+
390
+
391
+
392
+ if __name__ == '__main__':
393
+ input_tensor = Input(shape=(224, 224, 3), name='image_input')
394
+ #model = ResNet50(include_top=False, input_shape=(512, 512, 3), weights='imagenet')
395
+ model = ResNet50(include_top=True, input_tensor=input_tensor, weights='imagenet')
396
+ model.summary()
397
+
398
+ import numpy as np
399
+ from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
400
+ from keras_preprocessing import image
401
+
402
+ img = image.load_img('../../examples/dog.jpg', target_size=(224, 224))
403
+ x = image.img_to_array(img)
404
+ x = np.expand_dims(x, axis=0)
405
+ x = preprocess_input(x)
406
+
407
+ preds = model.predict(x)
408
+ print('Predicted:', decode_predictions(preds))
models/deeplab/deeplabv3p/models/deeplabv3p_xception.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """ Deeplabv3+ Xception model for Keras.
4
+ On Pascal VOC, original model gets to 84.56% mIOU
5
+
6
+ Reference Paper:
7
+ - [Encoder-Decoder with Atrous Separable Convolution
8
+ for Semantic Image Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
9
+ - [Xception: Deep Learning with Depthwise Separable Convolutions]
10
+ (https://arxiv.org/abs/1610.02357)
11
+ """
12
+ import numpy as np
13
+ import tensorflow as tf
14
+
15
+ from tensorflow.keras.models import Model
16
+ from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, ZeroPadding2D, Lambda, AveragePooling2D, Input, Concatenate, Add, Reshape, BatchNormalization, Dropout, ReLU, Softmax, add
17
+ from tensorflow.keras.utils import get_source_inputs, get_file
18
+ #from tensorflow.keras import backend as K
19
+
20
+ from deeplabv3p.models.layers import DeeplabConv2D, DeeplabDepthwiseConv2D, CustomBatchNormalization, SepConv_BN, ASPP_block, Decoder_block, normalize, img_resize
21
+
22
+ WEIGHTS_PATH_X = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.1/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5"
23
+
24
+
25
+ def _conv2d_same(x, filters, prefix, stride=1, kernel_size=3, rate=1):
26
+ """Implements right 'same' padding for even kernel sizes
27
+ Without this there is a 1 pixel drift when stride = 2
28
+ Args:
29
+ x: input tensor
30
+ filters: num of filters in pointwise convolution
31
+ prefix: prefix before name
32
+ stride: stride at depthwise conv
33
+ kernel_size: kernel size for depthwise convolution
34
+ rate: atrous rate for depthwise convolution
35
+ """
36
+ if stride == 1:
37
+ return DeeplabConv2D(filters,
38
+ (kernel_size, kernel_size),
39
+ strides=(stride, stride),
40
+ padding='same', use_bias=False,
41
+ dilation_rate=(rate, rate),
42
+ name=prefix)(x)
43
+ else:
44
+ kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
45
+ pad_total = kernel_size_effective - 1
46
+ pad_beg = pad_total // 2
47
+ pad_end = pad_total - pad_beg
48
+ x = ZeroPadding2D((pad_beg, pad_end))(x)
49
+ return DeeplabConv2D(filters,
50
+ (kernel_size, kernel_size),
51
+ strides=(stride, stride),
52
+ padding='valid', use_bias=False,
53
+ dilation_rate=(rate, rate),
54
+ name=prefix)(x)
55
+
56
+
57
+ def _xception_block(inputs, depth_list, prefix, skip_connection_type, stride,
58
+ rate=1, depth_activation=False, return_skip=False):
59
+ """ Basic building block of modified Xception network
60
+ Args:
61
+ inputs: input tensor
62
+ depth_list: number of filters in each SepConv layer. len(depth_list) == 3
63
+ prefix: prefix before name
64
+ skip_connection_type: one of {'conv','sum','none'}
65
+ stride: stride at last depthwise conv
66
+ rate: atrous rate for depthwise convolution
67
+ depth_activation: flag to use activation between depthwise & pointwise convs
68
+ return_skip: flag to return additional tensor after 2 SepConvs for decoder
69
+ """
70
+ residual = inputs
71
+ for i in range(3):
72
+ residual = SepConv_BN(residual,
73
+ depth_list[i],
74
+ prefix + '_separable_conv{}'.format(i + 1),
75
+ stride=stride if i == 2 else 1,
76
+ rate=rate,
77
+ depth_activation=depth_activation)
78
+ if i == 1:
79
+ skip = residual
80
+ if skip_connection_type == 'conv':
81
+ shortcut = _conv2d_same(inputs, depth_list[-1], prefix + '_shortcut',
82
+ kernel_size=1,
83
+ stride=stride)
84
+ shortcut = CustomBatchNormalization(name=prefix + '_shortcut_BN')(shortcut)
85
+ outputs = add([residual, shortcut])
86
+ elif skip_connection_type == 'sum':
87
+ outputs = add([residual, inputs])
88
+ elif skip_connection_type == 'none':
89
+ outputs = residual
90
+ if return_skip:
91
+ return outputs, skip
92
+ else:
93
+ return outputs
94
+
95
+
96
+ def Xception_body(input_tensor, OS):
97
+ """
98
+ Modified Alighed Xception feature extractor body
99
+ with specified output stride and skip level feature
100
+ """
101
+ if OS == 8:
102
+ origin_os16_stride = 1
103
+ origin_os16_block_rate = 2
104
+ origin_os32_stride = 1
105
+ origin_os32_block_rate = 4
106
+ elif OS == 16:
107
+ origin_os16_stride = 2
108
+ origin_os16_block_rate = 1
109
+ origin_os32_stride = 1
110
+ origin_os32_block_rate = 2
111
+ elif OS == 32:
112
+ origin_os16_stride = 2
113
+ origin_os16_block_rate = 1
114
+ origin_os32_stride = 2
115
+ origin_os32_block_rate = 1
116
+ else:
117
+ raise ValueError('invalid output stride', OS)
118
+
119
+ x = DeeplabConv2D(32, (3, 3), strides=(2, 2),
120
+ name='entry_flow_conv1_1', use_bias=False, padding='same')(input_tensor)
121
+
122
+ x = CustomBatchNormalization(name='entry_flow_conv1_1_BN')(x)
123
+ x = ReLU()(x)
124
+
125
+ x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1)
126
+ x = CustomBatchNormalization(name='entry_flow_conv1_2_BN')(x)
127
+ x = ReLU()(x)
128
+
129
+ x = _xception_block(x, [128, 128, 128], 'entry_flow_block1',
130
+ skip_connection_type='conv', stride=2,
131
+ depth_activation=False)
132
+ # skip level feature, with output stride = 4
133
+ x, skip = _xception_block(x, [256, 256, 256], 'entry_flow_block2',
134
+ skip_connection_type='conv', stride=2,
135
+ depth_activation=False, return_skip=True)
136
+
137
+ # original output stride changes to 16 from here, so we start to control block stride and dilation rate
138
+ x = _xception_block(x, [728, 728, 728], 'entry_flow_block3',
139
+ skip_connection_type='conv', stride=origin_os16_stride,
140
+ depth_activation=False)
141
+ for i in range(16):
142
+ x = _xception_block(x, [728, 728, 728], 'middle_flow_unit_{}'.format(i + 1),
143
+ skip_connection_type='sum', stride=1, rate=origin_os16_block_rate,
144
+ depth_activation=False)
145
+
146
+ # original output stride changes to 32 from here
147
+ x = _xception_block(x, [728, 1024, 1024], 'exit_flow_block1',
148
+ skip_connection_type='conv', stride=origin_os32_stride, rate=origin_os16_block_rate,
149
+ depth_activation=False)
150
+ x = _xception_block(x, [1536, 1536, 2048], 'exit_flow_block2',
151
+ skip_connection_type='none', stride=1, rate=origin_os32_block_rate,
152
+ depth_activation=True)
153
+ # end of feature extractor
154
+
155
+ # Ensure that the model takes into account
156
+ # any potential predecessors of `input_tensor`.
157
+ if input_tensor is not None:
158
+ inputs = get_source_inputs(input_tensor)
159
+ #else:
160
+ #inputs = img_input
161
+
162
+ backbone_len = len(Model(inputs, x).layers)
163
+ return x, skip, backbone_len
164
+
165
+
166
+
167
+ def Deeplabv3pXception(input_shape=(512, 512, 3),
168
+ weights='pascalvoc',
169
+ input_tensor=None,
170
+ num_classes=21,
171
+ OS=16):
172
+ """ Instantiates the Deeplabv3+ architecture
173
+ Optionally loads weights pre-trained
174
+ on PASCAL VOC. This model is available for TensorFlow only,
175
+ and can only be used with inputs following the TensorFlow
176
+ data format `(width, height, channels)`.
177
+ # Arguments
178
+ input_shape: shape of input image. format HxWxC
179
+ PASCAL VOC model was trained on (512,512,3) images
180
+ weights: pretrained weights type
181
+ - pascalvoc : pre-trained on PASCAL VOC
182
+ - None : random initialization
183
+ input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
184
+ to use as image input for the model.
185
+ num_classes: number of desired classes.
186
+ OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}
187
+ # Returns
188
+ A Keras model instance.
189
+ # Raises
190
+ RuntimeError: If attempting to run this model with a
191
+ backend that does not support separable convolutions.
192
+ ValueError: in case of invalid argument for `weights` or `backbone`
193
+ """
194
+
195
+ if not (weights in {'pascalvoc', None}):
196
+ raise ValueError('The `weights` argument should be either '
197
+ '`None` (random initialization) or `pascalvoc` '
198
+ '(pre-trained on PASCAL VOC)')
199
+
200
+ if input_tensor is None:
201
+ img_input = Input(shape=input_shape, name='image_input')
202
+ else:
203
+ img_input = input_tensor
204
+
205
+ # normalize input image
206
+ img_norm = Lambda(normalize, name='input_normalize')(img_input)
207
+
208
+ # backbone body for feature extract
209
+ x, skip_feature, backbone_len = Xception_body(img_norm, OS)
210
+
211
+ # ASPP block
212
+ x = ASPP_block(x, OS)
213
+
214
+ # Deeplabv3+ decoder for feature projection
215
+ x = Decoder_block(x, skip_feature)
216
+
217
+ # Final prediction conv block
218
+ x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
219
+ x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
220
+ x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
221
+ x = Softmax(name='Predictions/Softmax')(x)
222
+
223
+ # Ensure that the model takes into account
224
+ # any potential predecessors of `input_tensor`.
225
+ #if input_tensor is not None:
226
+ #inputs = get_source_inputs(input_tensor)
227
+ #else:
228
+ #inputs = img_input
229
+
230
+ model = Model(img_input, x, name='deeplabv3p_xception')
231
+
232
+ # load weights
233
+ if weights == 'pascalvoc':
234
+ weights_path = get_file('deeplabv3_xception_tf_dim_ordering_tf_kernels.h5',
235
+ WEIGHTS_PATH_X,
236
+ cache_subdir='models')
237
+ model.load_weights(weights_path, by_name=True)
238
+ return model, backbone_len
239
+
models/deeplab/deeplabv3p/models/layers.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ from __future__ import division
4
+
5
+ from functools import wraps
6
+
7
+ from tensorflow.keras import backend as K
8
+ from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, SeparableConv2D, ZeroPadding2D, Lambda, AveragePooling2D, Concatenate, BatchNormalization, Dropout, ReLU
9
+ from tensorflow.keras.regularizers import l2
10
+ import tensorflow as tf
11
+
12
+ L2_FACTOR = 2e-5
13
+
14
+ @wraps(Conv2D)
15
+ def DeeplabConv2D(*args, **kwargs):
16
+ """Wrapper to set Deeplab parameters for Conv2D."""
17
+ deeplab_conv_kwargs = {'kernel_regularizer': l2(L2_FACTOR)}
18
+ deeplab_conv_kwargs['bias_regularizer'] = l2(L2_FACTOR)
19
+ #deeplab_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
20
+ deeplab_conv_kwargs.update(kwargs)
21
+ return Conv2D(*args, **deeplab_conv_kwargs)
22
+
23
+
24
+ @wraps(DepthwiseConv2D)
25
+ def DeeplabDepthwiseConv2D(*args, **kwargs):
26
+ """Wrapper to set Deeplab parameters for DepthwiseConv2D."""
27
+ deeplab_conv_kwargs = {'kernel_regularizer': l2(L2_FACTOR)}
28
+ deeplab_conv_kwargs['bias_regularizer'] = l2(L2_FACTOR)
29
+ #deeplab_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
30
+ deeplab_conv_kwargs.update(kwargs)
31
+ return DepthwiseConv2D(*args, **deeplab_conv_kwargs)
32
+
33
+
34
+ @wraps(SeparableConv2D)
35
+ def DeeplabSeparableConv2D(*args, **kwargs):
36
+ """Wrapper to set Deeplab parameters for SeparableConv2D."""
37
+ deeplab_conv_kwargs = {'kernel_regularizer': l2(L2_FACTOR)}
38
+ deeplab_conv_kwargs['bias_regularizer'] = l2(L2_FACTOR)
39
+ #deeplab_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
40
+ deeplab_conv_kwargs.update(kwargs)
41
+ return SeparableConv2D(*args, **deeplab_conv_kwargs)
42
+
43
+
44
+ def normalize(x):
45
+ return x/127.5 - 1
46
+
47
+
48
+ def img_resize(x, size, mode='bilinear'):
49
+ if mode == 'bilinear':
50
+ return tf.image.resize(x, size=size, method='bilinear')
51
+ elif mode == 'nearest':
52
+ return tf.image.resize(x, size=size, method='nearest')
53
+ else:
54
+ raise ValueError('output model file is not specified')
55
+
56
+
57
+ def CustomBatchNormalization(*args, **kwargs):
58
+ if tf.__version__ >= '2.2':
59
+ from tensorflow.keras.layers.experimental import SyncBatchNormalization
60
+ BatchNorm = SyncBatchNormalization
61
+ else:
62
+ BatchNorm = BatchNormalization
63
+
64
+ return BatchNorm(*args, **kwargs)
65
+
66
+
67
+
68
+ def SepConv_BN(x, filters, prefix, stride=1, kernel_size=3, rate=1, depth_activation=False, epsilon=1e-3):
69
+ """ SepConv with BN between depthwise & pointwise. Optionally add activation after BN
70
+ Implements right "same" padding for even kernel sizes
71
+ Args:
72
+ x: input tensor
73
+ filters: num of filters in pointwise convolution
74
+ prefix: prefix before name
75
+ stride: stride at depthwise conv
76
+ kernel_size: kernel size for depthwise convolution
77
+ rate: atrous rate for depthwise convolution
78
+ depth_activation: flag to use activation between depthwise & pointwise convs
79
+ epsilon: epsilon to use in BN layer
80
+ """
81
+
82
+ if stride == 1:
83
+ depth_padding = 'same'
84
+ else:
85
+ kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
86
+ pad_total = kernel_size_effective - 1
87
+ pad_beg = pad_total // 2
88
+ pad_end = pad_total - pad_beg
89
+ x = ZeroPadding2D((pad_beg, pad_end))(x)
90
+ depth_padding = 'valid'
91
+
92
+ if not depth_activation:
93
+ x = ReLU()(x)
94
+ x = DeeplabDepthwiseConv2D((kernel_size, kernel_size), strides=(stride, stride), dilation_rate=(rate, rate),
95
+ padding=depth_padding, use_bias=False, name=prefix + '_depthwise')(x)
96
+ x = CustomBatchNormalization(name=prefix + '_depthwise_BN', epsilon=epsilon)(x)
97
+ if depth_activation:
98
+ x = ReLU()(x)
99
+ x = DeeplabConv2D(filters, (1, 1), padding='same',
100
+ use_bias=False, name=prefix + '_pointwise')(x)
101
+ x = CustomBatchNormalization(name=prefix + '_pointwise_BN', epsilon=epsilon)(x)
102
+ if depth_activation:
103
+ x = ReLU()(x)
104
+
105
+ return x
106
+
107
+
108
+ def ASPP_block(x, OS):
109
+ """
110
+ branching for Atrous Spatial Pyramid Pooling
111
+ """
112
+ if OS == 8:
113
+ atrous_rates = (12, 24, 36)
114
+ elif OS == 16:
115
+ atrous_rates = (6, 12, 18)
116
+ elif OS == 32:
117
+ # unofficial hyperparameters, just have a try
118
+ atrous_rates = (3, 6, 9)
119
+ else:
120
+ raise ValueError('invalid output stride', OS)
121
+
122
+ # feature map shape, (batch, height, width, channel)
123
+ feature_shape = x.shape.as_list()
124
+
125
+ # Image Feature branch
126
+ b4 = AveragePooling2D(pool_size=(feature_shape[1], feature_shape[2]))(x)
127
+
128
+ b4 = DeeplabConv2D(256, (1, 1), padding='same',
129
+ use_bias=False, name='image_pooling')(b4)
130
+ b4 = CustomBatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
131
+ b4 = ReLU()(b4)
132
+ b4 = Lambda(img_resize, arguments={'size': (feature_shape[1], feature_shape[2]), 'mode': 'bilinear'}, name='aspp_resize')(b4)
133
+
134
+ # simple 1x1
135
+ b0 = DeeplabConv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x)
136
+ b0 = CustomBatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
137
+ b0 = ReLU(name='aspp0_activation')(b0)
138
+
139
+ # rate = 6 (12)
140
+ b1 = SepConv_BN(x, 256, 'aspp1',
141
+ rate=atrous_rates[0], depth_activation=True, epsilon=1e-5)
142
+ # rate = 12 (24)
143
+ b2 = SepConv_BN(x, 256, 'aspp2',
144
+ rate=atrous_rates[1], depth_activation=True, epsilon=1e-5)
145
+ # rate = 18 (36)
146
+ b3 = SepConv_BN(x, 256, 'aspp3',
147
+ rate=atrous_rates[2], depth_activation=True, epsilon=1e-5)
148
+ # concatenate ASPP branches & project
149
+ x = Concatenate()([b4, b0, b1, b2, b3])
150
+
151
+ x = DeeplabConv2D(256, (1, 1), padding='same',
152
+ use_bias=False, name='concat_projection')(x)
153
+ x = CustomBatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
154
+ x = ReLU()(x)
155
+ x = Dropout(0.5)(x)
156
+
157
+ return x
158
+
159
+
160
+ def ASPP_Lite_block(x):
161
+ """
162
+ a simplified version of Deeplab ASPP block, which
163
+ only have global pooling & simple 1x1 conv branch
164
+ """
165
+ # feature map shape, (batch, height, width, channel)
166
+ feature_shape = x.shape.as_list()
167
+
168
+ # Image Feature branch
169
+ b4 = AveragePooling2D(pool_size=(feature_shape[1], feature_shape[2]))(x)
170
+
171
+ b4 = DeeplabConv2D(256, (1, 1), padding='same',
172
+ use_bias=False, name='image_pooling')(b4)
173
+ b4 = CustomBatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
174
+ b4 = ReLU()(b4)
175
+ b4 = Lambda(img_resize, arguments={'size': (feature_shape[1], feature_shape[2]), 'mode': 'bilinear'}, name='aspp_resize')(b4)
176
+
177
+ # simple 1x1 conv
178
+ b0 = DeeplabConv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x)
179
+ b0 = CustomBatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
180
+ b0 = ReLU(name='aspp0_activation')(b0)
181
+
182
+ # only 2 branches
183
+ x = Concatenate()([b4, b0])
184
+ x = DeeplabConv2D(256, (1, 1), padding='same',
185
+ use_bias=False, name='concat_projection')(x)
186
+ x = CustomBatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
187
+ x = ReLU()(x)
188
+ x = Dropout(0.5)(x)
189
+
190
+ return x
191
+
192
+
193
+ def Decoder_block(x, skip_feature):
194
+ """
195
+ DeepLab v.3+ decoder
196
+ Feature projection x4 (x2) block
197
+ """
198
+ # skip feature shape, (batch, height, width, channel)
199
+ skip_shape = skip_feature.shape.as_list()
200
+
201
+ x = Lambda(img_resize, arguments={'size': (skip_shape[1], skip_shape[2]), 'mode': 'bilinear'}, name='decoder_resize')(x)
202
+
203
+ skip_feature = DeeplabConv2D(48, (1, 1), padding='same',
204
+ use_bias=False, name='feature_projection0')(skip_feature)
205
+ skip_feature = CustomBatchNormalization(
206
+ name='feature_projection0_BN', epsilon=1e-5)(skip_feature)
207
+ skip_feature = ReLU()(skip_feature)
208
+ x = Concatenate()([x, skip_feature])
209
+ x = SepConv_BN(x, 256, 'decoder_conv0',
210
+ depth_activation=True, epsilon=1e-5)
211
+ x = SepConv_BN(x, 256, 'decoder_conv1',
212
+ depth_activation=True, epsilon=1e-5)
213
+ return x
214
+
215
+
216
+
217
+ #def icnr_weights(init = tf.glorot_normal_initializer(), scale=2, shape=[3,3,32,4], dtype = tf.float32):
218
+ #sess = tf.Session()
219
+ #return sess.run(ICNR(init, scale=scale)(shape=shape, dtype=dtype))
220
+
221
+ class ICNR:
222
+ """ICNR initializer for checkerboard artifact free sub pixel convolution
223
+ Ref:
224
+ [1] Andrew Aitken et al. Checkerboard artifact free sub-pixel convolution
225
+ https://arxiv.org/pdf/1707.02937.pdf)
226
+ Args:
227
+ initializer: initializer used for sub kernels (orthogonal, glorot uniform, etc.)
228
+ scale: scale factor of sub pixel convolution
229
+ """
230
+
231
+ def __init__(self, initializer, scale=1):
232
+ self.scale = scale
233
+ self.initializer = initializer
234
+
235
+ def __call__(self, shape, dtype, partition_info=None):
236
+ shape = list(shape)
237
+ if self.scale == 1:
238
+ return self.initializer(shape)
239
+
240
+ new_shape = shape[:3] + [shape[3] // (self.scale ** 2)]
241
+ x = self.initializer(new_shape, dtype, partition_info)
242
+ x = tf.transpose(x, perm=[2, 0, 1, 3])
243
+ x = tf.image.resize_nearest_neighbor(x, size=(shape[0] * self.scale, shape[1] * self.scale))
244
+ x = tf.space_to_depth(x, block_size=self.scale)
245
+ x = tf.transpose(x, perm=[1, 2, 0, 3])
246
+
247
+ return x
248
+
249
+ class Subpixel(Conv2D):
250
+ def __init__(self,
251
+ filters,
252
+ kernel_size,
253
+ r,
254
+ padding='valid',
255
+ data_format=None,
256
+ strides=(1,1),
257
+ activation=None,
258
+ use_bias=True,
259
+ kernel_initializer='glorot_uniform',
260
+ bias_initializer='zeros',
261
+ kernel_regularizer=None,
262
+ bias_regularizer=None,
263
+ activity_regularizer=None,
264
+ kernel_constraint=None,
265
+ bias_constraint=None,
266
+ **kwargs):
267
+ super(Subpixel, self).__init__(
268
+ filters=r*r*filters,
269
+ kernel_size=kernel_size,
270
+ strides=strides,
271
+ padding=padding,
272
+ data_format=data_format,
273
+ activation=activation,
274
+ use_bias=use_bias,
275
+ kernel_initializer=kernel_initializer,
276
+ bias_initializer=bias_initializer,
277
+ kernel_regularizer=kernel_regularizer,
278
+ bias_regularizer=bias_regularizer,
279
+ activity_regularizer=activity_regularizer,
280
+ kernel_constraint=kernel_constraint,
281
+ bias_constraint=bias_constraint,
282
+ **kwargs)
283
+ self.r = r
284
+
285
+ def _phase_shift(self, I):
286
+ r = self.r
287
+ bsize, a, b, c = I.get_shape().as_list()
288
+ bsize = K.shape(I)[0] # Handling Dimension(None) type for undefined batch dim
289
+ X = K.reshape(I, [bsize, a, b, int(c/(r*r)),r, r]) # bsize, a, b, c/(r*r), r, r
290
+ X = K.permute_dimensions(X, (0, 1, 2, 5, 4, 3)) # bsize, a, b, r, r, c/(r*r)
291
+ #Keras backend does not support tf.split, so in future versions this could be nicer
292
+ X = [X[:,i,:,:,:,:] for i in range(a)] # a, [bsize, b, r, r, c/(r*r)
293
+ X = K.concatenate(X, 2) # bsize, b, a*r, r, c/(r*r)
294
+ X = [X[:,i,:,:,:] for i in range(b)] # b, [bsize, r, r, c/(r*r)
295
+ X = K.concatenate(X, 2) # bsize, a*r, b*r, c/(r*r)
296
+ return X
297
+
298
+ def call(self, inputs):
299
+ return self._phase_shift(super(Subpixel, self).call(inputs))
300
+
301
+ def compute_output_shape(self, input_shape):
302
+ unshifted = super(Subpixel, self).compute_output_shape(input_shape)
303
+ return (unshifted[0], self.r*unshifted[1], self.r*unshifted[2], int(unshifted[3]/(self.r*self.r)))
304
+
305
+ def get_config(self):
306
+ config = super(Conv2D, self).get_config()
307
+ config.pop('rank')
308
+ config.pop('dilation_rate')
309
+ config['filters']= int(config['filters'] / self.r*self.r)
310
+ config['r'] = self.r
311
+ return config
models/deeplab/deeplabv3p/postprocess_np.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ import numpy as np
4
+
5
+ import pydensecrf.densecrf as dcrf
6
+ from pydensecrf.utils import unary_from_labels
7
+
8
+
9
+ # Fully connected CRF post processing function
10
+ def crf_postprocess(im, mask, zero_unsure=True):
11
+ colors, labels = np.unique(mask, return_inverse=True)
12
+ image_size = mask.shape[:2]
13
+ n_labels = len(set(labels.flat))
14
+ d = dcrf.DenseCRF2D(image_size[1], image_size[0], n_labels) # width, height, nlabels
15
+ U = unary_from_labels(labels, n_labels, gt_prob=.7, zero_unsure=zero_unsure)
16
+ d.setUnaryEnergy(U)
17
+ # This adds the color-independent term, features are the locations only.
18
+ d.addPairwiseGaussian(sxy=(3,3), compat=3)
19
+ # This adds the color-dependent term, i.e. features are (x,y,r,g,b).
20
+ # im is an image-array, e.g. im.dtype == np.uint8 and im.shape == (640,480,3)
21
+ d.addPairwiseBilateral(sxy=80, srgb=13, rgbim=im.astype('uint8'), compat=10)
22
+ Q = d.inference(5) # 5 - num of iterations
23
+ MAP = np.argmax(Q, axis=0).reshape(image_size)
24
+ unique_map = np.unique(MAP)
25
+ result = np.copy(MAP)
26
+ for u in unique_map: # get original labels back
27
+ np.putmask(result, MAP == u, colors[u])
28
+ return result
29
+ # MAP = crf_postprocess(frame, labels.astype('int32'), zero_unsure=False)
30
+
models/deeplab/eval.py ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Calculate mIOU for Deeplabv3p model on validation dataset
5
+ """
6
+ import os, argparse, time
7
+ import numpy as np
8
+ from PIL import Image
9
+ import matplotlib.pyplot as plt
10
+ import copy
11
+ import itertools
12
+ from tqdm import tqdm
13
+ from collections import OrderedDict
14
+ import operator
15
+ from labelme.utils import lblsave as label_save
16
+
17
+ from tensorflow.keras.models import load_model
18
+ import tensorflow.keras.backend as K
19
+ import tensorflow as tf
20
+ import MNN
21
+ import onnxruntime
22
+
23
+ from common.utils import get_data_list, get_classes, get_custom_objects, optimize_tf_gpu, visualize_segmentation
24
+ from deeplabv3p.data import SegmentationGenerator
25
+ from deeplabv3p.metrics import mIOU
26
+ from deeplabv3p.postprocess_np import crf_postprocess
27
+
28
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
29
+
30
+ optimize_tf_gpu(tf, K)
31
+
32
+
33
+ def deeplab_predict_keras(model, image_data):
34
+ prediction = model.predict(image_data)
35
+ prediction = np.argmax(prediction, axis=-1)
36
+ return prediction[0]
37
+
38
+
39
+ def deeplab_predict_onnx(model, image_data):
40
+ input_tensors = []
41
+ for i, input_tensor in enumerate(model.get_inputs()):
42
+ input_tensors.append(input_tensor)
43
+ # assume only 1 input tensor for image
44
+ assert len(input_tensors) == 1, 'invalid input tensor number.'
45
+
46
+ feed = {input_tensors[0].name: image_data}
47
+ prediction = model.run(None, feed)
48
+
49
+ prediction = np.argmax(prediction, axis=-1)
50
+ return prediction[0]
51
+
52
+
53
+ def deeplab_predict_pb(model, image_data):
54
+ # NOTE: TF 1.x frozen pb graph need to specify input/output tensor name
55
+ # so we need to hardcode the input/output tensor names here to get them from model
56
+ output_tensor_name = 'graph/pred_mask/Softmax:0'
57
+
58
+ # assume only 1 input tensor for image
59
+ input_tensor_name = 'graph/image_input:0'
60
+
61
+ # get input/output tensors
62
+ image_input = model.get_tensor_by_name(input_tensor_name)
63
+ output_tensor = model.get_tensor_by_name(output_tensor_name)
64
+
65
+ with tf.Session(graph=model) as sess:
66
+ prediction = sess.run(output_tensor, feed_dict={
67
+ image_input: image_data
68
+ })
69
+ prediction = np.argmax(prediction, axis=-1)
70
+ return prediction[0]
71
+
72
+
73
+ def deeplab_predict_tflite(interpreter, image_data):
74
+ input_details = interpreter.get_input_details()
75
+ output_details = interpreter.get_output_details()
76
+
77
+ interpreter.set_tensor(input_details[0]['index'], image_data)
78
+ interpreter.invoke()
79
+
80
+ prediction = []
81
+ for output_detail in output_details:
82
+ output_data = interpreter.get_tensor(output_detail['index'])
83
+ prediction.append(output_data)
84
+
85
+ prediction = np.argmax(prediction[0], axis=-1)
86
+ return prediction[0]
87
+
88
+
89
+ def deeplab_predict_mnn(interpreter, session, image_data):
90
+ from functools import reduce
91
+ from operator import mul
92
+
93
+ # assume only 1 input tensor for image
94
+ input_tensor = interpreter.getSessionInput(session)
95
+ # get input shape
96
+ input_shape = input_tensor.getShape()
97
+
98
+ # use a temp tensor to copy data
99
+ # TODO: currently MNN python binding have mem leak when creating MNN.Tensor
100
+ # from numpy array, only from tuple is good. So we convert input image to tuple
101
+ input_elementsize = reduce(mul, input_shape)
102
+ tmp_input = MNN.Tensor(input_shape, input_tensor.getDataType(),\
103
+ tuple(image_data.reshape(input_elementsize, -1)), input_tensor.getDimensionType())
104
+
105
+ input_tensor.copyFrom(tmp_input)
106
+ interpreter.runSession(session)
107
+
108
+ prediction = []
109
+ # we only handle single output model
110
+ output_tensor = interpreter.getSessionOutput(session)
111
+ output_shape = output_tensor.getShape()
112
+
113
+ assert output_tensor.getDataType() == MNN.Halide_Type_Float
114
+
115
+ # copy output tensor to host, for further postprocess
116
+ output_elementsize = reduce(mul, output_shape)
117
+ tmp_output = MNN.Tensor(output_shape, output_tensor.getDataType(),\
118
+ tuple(np.zeros(output_shape, dtype=float).reshape(output_elementsize, -1)), output_tensor.getDimensionType())
119
+
120
+ output_tensor.copyToHostTensor(tmp_output)
121
+ #tmp_output.printTensorData()
122
+
123
+ output_data = np.array(tmp_output.getData(), dtype=float).reshape(output_shape)
124
+ # our postprocess code based on TF channel last format, so if the output format
125
+ # doesn't match, we need to transpose
126
+ if output_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe:
127
+ output_data = output_data.transpose((0,2,3,1))
128
+ elif output_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe_C4:
129
+ raise ValueError('unsupported output tensor dimension type')
130
+
131
+ prediction.append(output_data)
132
+ prediction = np.argmax(prediction[0], axis=-1)
133
+ return prediction[0]
134
+
135
+
136
+ def plot_confusion_matrix(cm, classes, mIOU, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
137
+ if normalize:
138
+ cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
139
+ trained_classes = classes
140
+ plt.figure()
141
+ plt.imshow(cm, interpolation='nearest', cmap=cmap)
142
+ plt.title(title,fontsize=11)
143
+ tick_marks = np.arange(len(classes))
144
+ plt.xticks(np.arange(len(trained_classes)), classes, rotation=90,fontsize=9)
145
+ plt.yticks(tick_marks, classes,fontsize=9)
146
+ thresh = cm.max() / 2.
147
+ for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
148
+ plt.text(j, i, np.round(cm[i, j],2), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black", fontsize=7)
149
+ plt.tight_layout()
150
+ plt.ylabel('True label',fontsize=9)
151
+ plt.xlabel('Predicted label',fontsize=9)
152
+
153
+ plt.title('Mean IOU: '+ str(np.round(mIOU*100, 2)))
154
+ output_path = os.path.join('result','confusion_matrix.png')
155
+ os.makedirs('result', exist_ok=True)
156
+ plt.savefig(output_path)
157
+ #plt.show()
158
+ return
159
+
160
+
161
+ def adjust_axes(r, t, fig, axes):
162
+ """
163
+ Plot - adjust axes
164
+ """
165
+ # get text width for re-scaling
166
+ bb = t.get_window_extent(renderer=r)
167
+ text_width_inches = bb.width / fig.dpi
168
+ # get axis width in inches
169
+ current_fig_width = fig.get_figwidth()
170
+ new_fig_width = current_fig_width + text_width_inches
171
+ propotion = new_fig_width / current_fig_width
172
+ # get axis limit
173
+ x_lim = axes.get_xlim()
174
+ axes.set_xlim([x_lim[0], x_lim[1]*propotion])
175
+
176
+
177
+ def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar):
178
+ """
179
+ Draw plot using Matplotlib
180
+ """
181
+ # sort the dictionary by decreasing value, into a list of tuples
182
+ sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
183
+ # unpacking the list of tuples into two lists
184
+ sorted_keys, sorted_values = zip(*sorted_dic_by_value)
185
+ #
186
+ if true_p_bar != "":
187
+ """
188
+ Special case to draw in (green=true predictions) & (red=false predictions)
189
+ """
190
+ fp_sorted = []
191
+ tp_sorted = []
192
+ for key in sorted_keys:
193
+ fp_sorted.append(dictionary[key] - true_p_bar[key])
194
+ tp_sorted.append(true_p_bar[key])
195
+ plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Predictions')
196
+ plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Predictions', left=fp_sorted)
197
+ # add legend
198
+ plt.legend(loc='lower right')
199
+ """
200
+ Write number on side of bar
201
+ """
202
+ fig = plt.gcf() # gcf - get current figure
203
+ axes = plt.gca()
204
+ r = fig.canvas.get_renderer()
205
+ for i, val in enumerate(sorted_values):
206
+ fp_val = fp_sorted[i]
207
+ tp_val = tp_sorted[i]
208
+ fp_str_val = " " + str(fp_val)
209
+ tp_str_val = fp_str_val + " " + str(tp_val)
210
+ # trick to paint multicolor with offset:
211
+ # first paint everything and then repaint the first number
212
+ t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
213
+ plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
214
+ if i == (len(sorted_values)-1): # largest bar
215
+ adjust_axes(r, t, fig, axes)
216
+ else:
217
+ plt.barh(range(n_classes), sorted_values, color=plot_color)
218
+ """
219
+ Write number on side of bar
220
+ """
221
+ fig = plt.gcf() # gcf - get current figure
222
+ axes = plt.gca()
223
+ r = fig.canvas.get_renderer()
224
+ for i, val in enumerate(sorted_values):
225
+ str_val = " " + str(val) # add a space before
226
+ if val < 1.0:
227
+ str_val = " {0:.2f}".format(val)
228
+ t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold')
229
+ # re-set axes to show number inside the figure
230
+ if i == (len(sorted_values)-1): # largest bar
231
+ adjust_axes(r, t, fig, axes)
232
+ # set window title
233
+ fig.canvas.set_window_title(window_title)
234
+ # write classes in y axis
235
+ tick_font_size = 12
236
+ plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
237
+ """
238
+ Re-scale height accordingly
239
+ """
240
+ init_height = fig.get_figheight()
241
+ # comput the matrix height in points and inches
242
+ dpi = fig.dpi
243
+ height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing)
244
+ height_in = height_pt / dpi
245
+ # compute the required figure height
246
+ top_margin = 0.15 # in percentage of the figure height
247
+ bottom_margin = 0.05 # in percentage of the figure height
248
+ figure_height = height_in / (1 - top_margin - bottom_margin)
249
+ # set new height
250
+ if figure_height > init_height:
251
+ fig.set_figheight(figure_height)
252
+
253
+ # set plot title
254
+ plt.title(plot_title, fontsize=14)
255
+ # set axis titles
256
+ # plt.xlabel('classes')
257
+ plt.xlabel(x_label, fontsize='large')
258
+ # adjust size of window
259
+ fig.tight_layout()
260
+ # save the plot
261
+ fig.savefig(output_path)
262
+ # show image
263
+ if to_show:
264
+ plt.show()
265
+ # close the plot
266
+ plt.close()
267
+
268
+
269
+ def plot_mIOU_result(IOUs, mIOU, num_classes):
270
+ '''
271
+ Draw mIOU plot (Show IOU's of all classes in decreasing order)
272
+ '''
273
+ window_title = "mIOU"
274
+ plot_title = "mIOU: {0:.3f}%".format(mIOU*100)
275
+ x_label = "Intersection Over Union"
276
+ output_path = os.path.join('result','mIOU.png')
277
+ os.makedirs('result', exist_ok=True)
278
+ draw_plot_func(IOUs, num_classes, window_title, plot_title, x_label, output_path, to_show=False, plot_color='royalblue', true_p_bar='')
279
+
280
+
281
+ def save_seg_result(image, pred_mask, gt_mask, image_id, class_names):
282
+ # save predict mask as PNG image
283
+ mask_dir = os.path.join('result','predict_mask')
284
+ os.makedirs(mask_dir, exist_ok=True)
285
+ label_save(os.path.join(mask_dir, str(image_id)+'.png'), pred_mask)
286
+
287
+ # visualize segmentation result
288
+ title_str = 'Predict Segmentation\nmIOU: '+str(mIOU(pred_mask, gt_mask))
289
+ gt_title_str = 'GT Segmentation'
290
+ image_array = visualize_segmentation(image, pred_mask, gt_mask, class_names=class_names, title=title_str, gt_title=gt_title_str, ignore_count_threshold=1)
291
+
292
+ # save result as JPG
293
+ result_dir = os.path.join('result','segmentation')
294
+ os.makedirs(result_dir, exist_ok=True)
295
+ result_file = os.path.join(result_dir, str(image_id)+'.jpg')
296
+ Image.fromarray(image_array).save(result_file)
297
+
298
+
299
+ def generate_matrix(gt_mask, pre_mask, num_classes):
300
+ valid = (gt_mask >= 0) & (gt_mask < num_classes)
301
+ label = num_classes * gt_mask[valid].astype('int') + pre_mask[valid]
302
+ count = np.bincount(label, minlength=num_classes**2)
303
+ confusion_matrix = count.reshape(num_classes, num_classes)
304
+ return confusion_matrix
305
+
306
+
307
+ def eval_mIOU(model, model_format, dataset_path, dataset, class_names, model_input_shape, do_crf=False, save_result=False, show_background=False):
308
+ num_classes = len(class_names)
309
+
310
+ #prepare eval dataset generator
311
+ eval_generator = SegmentationGenerator(dataset_path, dataset,
312
+ 1, #batch_size
313
+ num_classes,
314
+ target_size=model_input_shape[::-1],
315
+ weighted_type=None,
316
+ is_eval=True,
317
+ augment=False)
318
+
319
+ if model_format == 'MNN':
320
+ #MNN inference engine need create session
321
+ session = model.createSession()
322
+
323
+ # confusion matrix for all classes
324
+ confusion_matrix = np.zeros((num_classes, num_classes), dtype=float)
325
+
326
+ # get model prediction result
327
+ pbar = tqdm(total=len(eval_generator), desc='Eval model')
328
+ for n, (image_data, y_true) in enumerate(eval_generator):
329
+
330
+ # support of tflite model
331
+ if model_format == 'TFLITE':
332
+ y_pred = deeplab_predict_tflite(model, image_data)
333
+ # support of MNN model
334
+ elif model_format == 'MNN':
335
+ y_pred =deeplab_predict_mnn(model, session, image_data)
336
+ # support of TF 1.x frozen pb model
337
+ elif model_format == 'PB':
338
+ y_pred = deeplab_predict_pb(model, image_data)
339
+ # support of ONNX model
340
+ elif model_format == 'ONNX':
341
+ y_pred = deeplab_predict_onnx(model, image_data)
342
+ # normal keras h5 model
343
+ elif model_format == 'H5':
344
+ y_pred = deeplab_predict_keras(model, image_data)
345
+ else:
346
+ raise ValueError('invalid model format')
347
+
348
+ image = image_data[0].astype('uint8')
349
+ pred_mask = y_pred.reshape(model_input_shape)
350
+ gt_mask = y_true.reshape(model_input_shape).astype('int')
351
+
352
+ # add CRF postprocess
353
+ if do_crf:
354
+ pred_mask = crf_postprocess(image, pred_mask, zero_unsure=False)
355
+
356
+ # save segmentation result image
357
+ if save_result:
358
+ # get eval image name to save corresponding result
359
+ image_list = eval_generator.get_batch_image_path(n)
360
+ assert len(image_list) == 1, 'incorrect image batch'
361
+ image_id = os.path.splitext(os.path.basename(image_list[0]))[0]
362
+
363
+ save_seg_result(image, pred_mask, gt_mask, image_id, class_names)
364
+
365
+ # update confusion matrix
366
+ pred_mask = pred_mask.astype('int')
367
+ gt_mask = gt_mask.astype('int')
368
+ confusion_matrix += generate_matrix(gt_mask, pred_mask, num_classes)
369
+
370
+ # compare prediction result with label
371
+ # to update confusion matrix
372
+ #flat_pred = np.ravel(pred_mask).astype('int')
373
+ #flat_label = np.ravel(gt_mask).astype('int')
374
+ #for p, l in zip(flat_pred, flat_label):
375
+ #if l == num_classes or l == 255:
376
+ #continue
377
+ #if l < num_classes and p < num_classes:
378
+ #confusion_matrix[l, p] += 1
379
+ #else:
380
+ #print('Invalid entry encountered, skipping! Label: ', l,
381
+ #' Prediction: ', p)
382
+
383
+ pbar.update(1)
384
+ pbar.close()
385
+
386
+ # calculate Pixel accuracy
387
+ PixelAcc = np.diag(confusion_matrix).sum() / confusion_matrix.sum()
388
+
389
+ # calculate Class accuracy
390
+ ClassAcc = np.diag(confusion_matrix) / confusion_matrix.sum(axis=1)
391
+ mClassAcc = np.nanmean(ClassAcc)
392
+
393
+ # calculate mIoU
394
+ I = np.diag(confusion_matrix)
395
+ U = np.sum(confusion_matrix, axis=0) + np.sum(confusion_matrix, axis=1) - I
396
+ IoU = I/U
397
+ #mIoU = np.nanmean(IoU)
398
+
399
+ # calculate FW (Frequency Weighted) IoU
400
+ Freq = np.sum(confusion_matrix, axis=1) / np.sum(confusion_matrix)
401
+ FWIoU = (Freq[Freq > 0] * IoU[Freq > 0]).sum()
402
+
403
+ # calculate Dice Coefficient
404
+ DiceCoef = 2*I / (U+I)
405
+
406
+ # collect IOU/ClassAcc/Dice/Freq for every class
407
+ IOUs, CLASS_ACCs, DICEs, FREQs = {}, {}, {}, {}
408
+ for i,(class_name, iou, class_acc, dice, freq) in enumerate(zip(class_names, IoU, ClassAcc, DiceCoef, Freq)):
409
+ IOUs[class_name] = iou
410
+ CLASS_ACCs[class_name] = class_acc
411
+ DICEs[class_name] = dice
412
+ FREQs[class_name] = freq
413
+
414
+ if not show_background:
415
+ #get ride of background class info
416
+ display_class_names = copy.deepcopy(class_names)
417
+ display_class_names.remove('background')
418
+ display_confusion_matrix = copy.deepcopy(confusion_matrix[1:, 1:])
419
+ IOUs.pop('background')
420
+ num_classes = num_classes - 1
421
+ else:
422
+ display_class_names = class_names
423
+ display_confusion_matrix = confusion_matrix
424
+
425
+ #sort IoU result by value, in descending order
426
+ IOUs = OrderedDict(sorted(IOUs.items(), key=operator.itemgetter(1), reverse=True))
427
+
428
+ #calculate mIOU from final IOU dict
429
+ mIoU = np.nanmean(list(IOUs.values()))
430
+
431
+ #show result
432
+ print('\nevaluation summary')
433
+ for class_name, iou in IOUs.items():
434
+ print('%s: IoU %.4f, Freq %.4f, ClassAcc %.4f, Dice %.4f' % (class_name, iou, FREQs[class_name], CLASS_ACCs[class_name], DICEs[class_name]))
435
+ print('mIoU=%.3f' % (mIoU*100))
436
+ print('FWIoU=%.3f' % (FWIoU*100))
437
+ print('PixelAcc=%.3f' % (PixelAcc*100))
438
+ print('mClassAcc=%.3f' % (mClassAcc*100))
439
+
440
+
441
+ # Plot mIOU & confusion matrix
442
+ plot_mIOU_result(IOUs, mIoU, num_classes)
443
+ plot_confusion_matrix(display_confusion_matrix, display_class_names, mIoU, normalize=True)
444
+
445
+ return mIoU
446
+
447
+
448
+
449
+ #load TF 1.x frozen pb graph
450
+ def load_graph(model_path):
451
+ # We parse the graph_def file
452
+ with tf.gfile.GFile(model_path, "rb") as f:
453
+ graph_def = tf.GraphDef()
454
+ graph_def.ParseFromString(f.read())
455
+
456
+ # We load the graph_def in the default graph
457
+ with tf.Graph().as_default() as graph:
458
+ tf.import_graph_def(
459
+ graph_def,
460
+ input_map=None,
461
+ return_elements=None,
462
+ name="graph",
463
+ op_dict=None,
464
+ producer_op_list=None
465
+ )
466
+ return graph
467
+
468
+
469
+ def load_eval_model(model_path):
470
+ # support of tflite model
471
+ if model_path.endswith('.tflite'):
472
+ from tensorflow.lite.python import interpreter as interpreter_wrapper
473
+ model = interpreter_wrapper.Interpreter(model_path=model_path)
474
+ model.allocate_tensors()
475
+ model_format = 'TFLITE'
476
+
477
+ # support of MNN model
478
+ elif model_path.endswith('.mnn'):
479
+ model = MNN.Interpreter(model_path)
480
+ model_format = 'MNN'
481
+
482
+ # support of TF 1.x frozen pb model
483
+ elif model_path.endswith('.pb'):
484
+ model = load_graph(model_path)
485
+ model_format = 'PB'
486
+
487
+ # support of ONNX model
488
+ elif model_path.endswith('.onnx'):
489
+ model = onnxruntime.InferenceSession(model_path)
490
+ model_format = 'ONNX'
491
+
492
+ # normal keras h5 model
493
+ elif model_path.endswith('.h5'):
494
+ custom_object_dict = get_custom_objects()
495
+
496
+ model = load_model(model_path, compile=False, custom_objects=custom_object_dict)
497
+ model_format = 'H5'
498
+ K.set_learning_phase(0)
499
+ else:
500
+ raise ValueError('invalid model file')
501
+
502
+ return model, model_format
503
+
504
+
505
+ def main():
506
+ parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description='evaluate Deeplab model (h5/pb/tflite/mnn) with test dataset')
507
+ '''
508
+ Command line options
509
+ '''
510
+ parser.add_argument(
511
+ '--model_path', type=str, required=True,
512
+ help='path to model file')
513
+
514
+ parser.add_argument(
515
+ '--dataset_path', type=str, required=True,
516
+ help='dataset path containing images and label png file')
517
+
518
+ parser.add_argument(
519
+ '--dataset_file', type=str, required=True,
520
+ help='eval samples txt file')
521
+
522
+ parser.add_argument(
523
+ '--classes_path', type=str, required=False, default='configs/voc_classes.txt',
524
+ help='path to class definitions, default=%(default)s')
525
+
526
+ parser.add_argument(
527
+ '--model_input_shape', type=str,
528
+ help='model image input size as <height>x<width>, default=%(default)s', default='512x512')
529
+
530
+ parser.add_argument(
531
+ '--do_crf', action="store_true",
532
+ help='whether to add CRF postprocess for model output', default=False)
533
+
534
+ parser.add_argument(
535
+ '--show_background', default=False, action="store_true",
536
+ help='Show background evaluation info')
537
+
538
+ parser.add_argument(
539
+ '--save_result', default=False, action="store_true",
540
+ help='Save the segmentaion result image in result/segmentation dir')
541
+
542
+ args = parser.parse_args()
543
+
544
+ # param parse
545
+ height, width = args.model_input_shape.split('x')
546
+ model_input_shape = (int(height), int(width))
547
+
548
+ # add background class to match model & GT
549
+ class_names = get_classes(args.classes_path)
550
+ assert len(class_names) < 254, 'PNG image label only support less than 254 classes.'
551
+ class_names = ['background'] + class_names
552
+
553
+ model, model_format = load_eval_model(args.model_path)
554
+
555
+ # get dataset list
556
+ dataset = get_data_list(args.dataset_file)
557
+
558
+ start = time.time()
559
+ eval_mIOU(model, model_format, args.dataset_path, dataset, class_names, model_input_shape, args.do_crf, args.save_result, args.show_background)
560
+ end = time.time()
561
+ print("Evaluation time cost: {:.6f}s".format(end - start))
562
+
563
+
564
+ if __name__ == '__main__':
565
+ main()
models/deeplab/example/2007_000039.jpg ADDED
models/deeplab/example/2007_000039.png ADDED
models/deeplab/example/2007_000346.jpg ADDED
models/deeplab/example/2007_000346.png ADDED
models/deeplab/example/air.jpg ADDED
models/deeplab/example/car.jpg ADDED