Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .github/ISSUE_TEMPLATE/bug_report.md +38 -0
- .github/ISSUE_TEMPLATE/feature_request.md +20 -0
- .gitmodules +6 -0
- convert_model.py +56 -0
- expman/expman/__init__.py +7 -0
- expman/expman/__main__.py +58 -0
- expman/expman/__pycache__/__init__.cpython-311.pyc +0 -0
- expman/expman/__pycache__/exp_group.cpython-311.pyc +0 -0
- expman/expman/__pycache__/experiment.cpython-311.pyc +0 -0
- expman/expman/exp_group.py +96 -0
- expman/expman/experiment.py +233 -0
- losses.py +18 -0
- matlab/Meye.m +310 -0
- matlab/README.md +57 -0
- matlab/example.m +211 -0
- models/deeplab.py +78 -0
- models/deeplab/README.md +380 -0
- models/deeplab/assets/2007_000346_inference.png +0 -0
- models/deeplab/assets/confusion_matrix.png +0 -0
- models/deeplab/assets/dog_inference.png +0 -0
- models/deeplab/assets/mIOU.png +0 -0
- models/deeplab/common/callbacks.py +32 -0
- models/deeplab/common/data_utils.py +523 -0
- models/deeplab/common/model_utils.py +168 -0
- models/deeplab/common/utils.py +343 -0
- models/deeplab/configs/ade20k_classes.txt +150 -0
- models/deeplab/configs/cityscapes_classes.txt +33 -0
- models/deeplab/configs/coco_classes.txt +80 -0
- models/deeplab/configs/voc_classes.txt +20 -0
- models/deeplab/deeplab.py +297 -0
- models/deeplab/deeplabv3p/data.py +161 -0
- models/deeplab/deeplabv3p/loss.py +74 -0
- models/deeplab/deeplabv3p/metrics.py +46 -0
- models/deeplab/deeplabv3p/model.py +96 -0
- models/deeplab/deeplabv3p/models/__pycache__/deeplabv3p_mobilenetv3.cpython-311.pyc +0 -0
- models/deeplab/deeplabv3p/models/__pycache__/layers.cpython-311.pyc +0 -0
- models/deeplab/deeplabv3p/models/deeplabv3p_mobilenetv2.py +349 -0
- models/deeplab/deeplabv3p/models/deeplabv3p_mobilenetv3.py +912 -0
- models/deeplab/deeplabv3p/models/deeplabv3p_peleenet.py +428 -0
- models/deeplab/deeplabv3p/models/deeplabv3p_resnet50.py +408 -0
- models/deeplab/deeplabv3p/models/deeplabv3p_xception.py +239 -0
- models/deeplab/deeplabv3p/models/layers.py +311 -0
- models/deeplab/deeplabv3p/postprocess_np.py +30 -0
- models/deeplab/eval.py +565 -0
- models/deeplab/example/2007_000039.jpg +0 -0
- models/deeplab/example/2007_000039.png +0 -0
- models/deeplab/example/2007_000346.jpg +0 -0
- models/deeplab/example/2007_000346.png +0 -0
- models/deeplab/example/air.jpg +0 -0
- models/deeplab/example/car.jpg +0 -0
.github/ISSUE_TEMPLATE/bug_report.md
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
name: Bug report
|
3 |
+
about: Create a report to help us improve
|
4 |
+
title: ''
|
5 |
+
labels: bug
|
6 |
+
assignees: ''
|
7 |
+
|
8 |
+
---
|
9 |
+
|
10 |
+
**Describe the bug**
|
11 |
+
A clear and concise description of what the bug is.
|
12 |
+
|
13 |
+
**To Reproduce**
|
14 |
+
Steps to reproduce the behavior:
|
15 |
+
1. Go to '...'
|
16 |
+
2. Click on '....'
|
17 |
+
3. Scroll down to '....'
|
18 |
+
4. See error
|
19 |
+
|
20 |
+
**Expected behavior**
|
21 |
+
A clear and concise description of what you expected to happen.
|
22 |
+
|
23 |
+
**Screenshots**
|
24 |
+
If applicable, add screenshots to help explain your problem.
|
25 |
+
|
26 |
+
**Desktop (please complete the following information):**
|
27 |
+
- OS: [e.g. iOS]
|
28 |
+
- Browser [e.g. chrome, safari]
|
29 |
+
- Version [e.g. 22]
|
30 |
+
|
31 |
+
**Smartphone (please complete the following information):**
|
32 |
+
- Device: [e.g. iPhone6]
|
33 |
+
- OS: [e.g. iOS8.1]
|
34 |
+
- Browser [e.g. stock browser, safari]
|
35 |
+
- Version [e.g. 22]
|
36 |
+
|
37 |
+
**Additional context**
|
38 |
+
Add any other context about the problem here.
|
.github/ISSUE_TEMPLATE/feature_request.md
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
name: Feature request
|
3 |
+
about: Suggest an idea for this project
|
4 |
+
title: ''
|
5 |
+
labels: enhancement
|
6 |
+
assignees: ''
|
7 |
+
|
8 |
+
---
|
9 |
+
|
10 |
+
**Is your feature request related to a problem? Please describe.**
|
11 |
+
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
12 |
+
|
13 |
+
**Describe the solution you'd like**
|
14 |
+
A clear and concise description of what you want to happen.
|
15 |
+
|
16 |
+
**Describe alternatives you've considered**
|
17 |
+
A clear and concise description of any alternative solutions or features you've considered.
|
18 |
+
|
19 |
+
**Additional context**
|
20 |
+
Add any other context or screenshots about the feature request here.
|
.gitmodules
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[submodule "expman"]
|
2 |
+
path = expman
|
3 |
+
url = https://github.com/fabiocarrara/expman
|
4 |
+
[submodule "models/deeplab"]
|
5 |
+
path = models/deeplab
|
6 |
+
url = https://github.com/david8862/tf-keras-deeplabv3p-model-set
|
convert_model.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
from tensorflow.keras import backend as K
|
3 |
+
from adabelief_tf import AdaBeliefOptimizer
|
4 |
+
|
5 |
+
def iou_coef(y_true, y_pred):
|
6 |
+
y_true = tf.cast(y_true, tf.float32)
|
7 |
+
y_pred = tf.cast(y_pred, tf.float32)
|
8 |
+
intersection = K.sum(K.abs(y_true * y_pred), axis=[1, 2, 3])
|
9 |
+
union = K.sum(y_true, axis=[1, 2, 3]) + K.sum(y_pred, axis=[1, 2, 3]) - intersection
|
10 |
+
return K.mean((intersection + 1e-6) / (union + 1e-6))
|
11 |
+
|
12 |
+
def dice_coef(y_true, y_pred):
|
13 |
+
y_true = tf.cast(y_true, tf.float32)
|
14 |
+
y_pred = tf.cast(y_pred, tf.float32)
|
15 |
+
intersection = K.sum(K.abs(y_true * y_pred), axis=[1, 2, 3])
|
16 |
+
return K.mean((2. * intersection + 1e-6) / (K.sum(y_true, axis=[1, 2, 3]) + K.sum(y_pred, axis=[1, 2, 3]) + 1e-6))
|
17 |
+
|
18 |
+
def boundary_loss(y_true, y_pred):
|
19 |
+
y_true = tf.cast(y_true, tf.float32)
|
20 |
+
y_pred = tf.cast(y_pred, tf.float32)
|
21 |
+
dy_true, dx_true = tf.image.image_gradients(y_true)
|
22 |
+
dy_pred, dx_pred = tf.image.image_gradients(y_pred)
|
23 |
+
loss = tf.reduce_mean(tf.abs(dy_pred - dy_true) + tf.abs(dx_pred - dx_true))
|
24 |
+
return loss * 0.5
|
25 |
+
|
26 |
+
def enhanced_binary_crossentropy(y_true, y_pred):
|
27 |
+
y_true = tf.cast(y_true, tf.float32)
|
28 |
+
y_pred = tf.cast(y_pred, tf.float32)
|
29 |
+
bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
|
30 |
+
boundary = boundary_loss(y_true, y_pred)
|
31 |
+
return bce + boundary
|
32 |
+
|
33 |
+
def hard_swish(x):
|
34 |
+
return x * tf.nn.relu6(x + 3) * (1. / 6.)
|
35 |
+
|
36 |
+
# Path to your current .keras model
|
37 |
+
keras_path = 'runs/b32_c-conv_d-|root|meye|data|NN_human_mouse_eyes|_g1.5_l0.001_num_c1_num_f16_num_s5_r128_se23_sp-random_up-relu_us0/best_model.keras'
|
38 |
+
|
39 |
+
# Load the model with custom objects
|
40 |
+
custom_objects = {
|
41 |
+
'AdaBeliefOptimizer': AdaBeliefOptimizer,
|
42 |
+
'iou_coef': iou_coef,
|
43 |
+
'dice_coef': dice_coef,
|
44 |
+
'hard_swish': hard_swish,
|
45 |
+
'enhanced_binary_crossentropy': enhanced_binary_crossentropy,
|
46 |
+
'boundary_loss': boundary_loss
|
47 |
+
}
|
48 |
+
|
49 |
+
print("Loading model from:", keras_path)
|
50 |
+
model = tf.keras.models.load_model(keras_path, custom_objects=custom_objects)
|
51 |
+
|
52 |
+
# Save as .h5
|
53 |
+
h5_path = keras_path.replace('.keras', '.h5')
|
54 |
+
print("Saving model to:", h5_path)
|
55 |
+
model.save(h5_path, save_format='h5')
|
56 |
+
print("Conversion complete!")
|
expman/expman/__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .experiment import Experiment, exp_filter, use_hash_naming
|
2 |
+
from .exp_group import ExpGroup
|
3 |
+
|
4 |
+
abbreviate = Experiment.abbreviate
|
5 |
+
from_dir = Experiment.from_dir
|
6 |
+
gather = ExpGroup.gather
|
7 |
+
is_exp_dir = Experiment.is_exp_dir
|
expman/expman/__main__.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
from .exp_group import ExpGroup
|
4 |
+
|
5 |
+
|
6 |
+
def add_param(args):
|
7 |
+
exps = ExpGroup.gather(args.run)
|
8 |
+
for exp in exps:
|
9 |
+
exp.add_parameter(args.param, args.value)
|
10 |
+
|
11 |
+
|
12 |
+
def mv_param(args):
|
13 |
+
exps = ExpGroup.gather(args.run)
|
14 |
+
for exp in exps:
|
15 |
+
exp.rename_parameter(args.param, args.new_param)
|
16 |
+
|
17 |
+
|
18 |
+
def rm_param(args):
|
19 |
+
exps = ExpGroup.gather(args.run)
|
20 |
+
for exp in exps:
|
21 |
+
exp.remove_parameter(args.param)
|
22 |
+
|
23 |
+
|
24 |
+
def command_line():
|
25 |
+
def guess(value):
|
26 |
+
""" try to guess a python type for the passed string parameter """
|
27 |
+
try:
|
28 |
+
result = eval(value)
|
29 |
+
except (NameError, ValueError):
|
30 |
+
result = value
|
31 |
+
return result
|
32 |
+
|
33 |
+
parser = argparse.ArgumentParser(description='Experiment Manager Utilities')
|
34 |
+
subparsers = parser.add_subparsers(dest='command')
|
35 |
+
subparsers.required = True
|
36 |
+
|
37 |
+
parser_add = subparsers.add_parser('add-param')
|
38 |
+
parser_add.add_argument('run', default='runs/')
|
39 |
+
parser_add.add_argument('param', help='new param name')
|
40 |
+
parser_add.add_argument('value', type=guess, help='new param value')
|
41 |
+
parser_add.set_defaults(func=add_param)
|
42 |
+
|
43 |
+
parser_rm = subparsers.add_parser('rm-param')
|
44 |
+
parser_rm.add_argument('run', default='runs/')
|
45 |
+
parser_rm.add_argument('param', help='param to remove')
|
46 |
+
parser_rm.set_defaults(func=rm_param)
|
47 |
+
|
48 |
+
parser_mv = subparsers.add_parser('mv-param')
|
49 |
+
parser_mv.add_argument('run', default='runs/')
|
50 |
+
parser_mv.add_argument('param', help='param to rename')
|
51 |
+
parser_mv.add_argument('new_param', help='new param name')
|
52 |
+
parser_mv.set_defaults(func=mv_param)
|
53 |
+
|
54 |
+
args = parser.parse_args()
|
55 |
+
args.func(args)
|
56 |
+
|
57 |
+
|
58 |
+
command_line()
|
expman/expman/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (448 Bytes). View file
|
|
expman/expman/__pycache__/exp_group.cpython-311.pyc
ADDED
Binary file (6.82 kB). View file
|
|
expman/expman/__pycache__/experiment.cpython-311.pyc
ADDED
Binary file (16.9 kB). View file
|
|
expman/expman/exp_group.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
from glob import glob
|
5 |
+
from .experiment import Experiment
|
6 |
+
|
7 |
+
|
8 |
+
class ExpGroup:
|
9 |
+
@classmethod
|
10 |
+
def gather(cls, root='runs/'):
|
11 |
+
if Experiment.is_exp_dir(root):
|
12 |
+
exps = (root,)
|
13 |
+
else:
|
14 |
+
exps = glob(os.path.join(root, '*'))
|
15 |
+
exps = filter(Experiment.is_exp_dir, exps)
|
16 |
+
|
17 |
+
exps = map(Experiment.from_dir, exps)
|
18 |
+
exps = filter(lambda x: x.existing, exps)
|
19 |
+
exps = tuple(exps)
|
20 |
+
return cls(exps)
|
21 |
+
|
22 |
+
def __init__(self, experiments=()):
|
23 |
+
assert isinstance(experiments, (list, tuple)), "'experiments' must be a list or tuple"
|
24 |
+
self.experiments = experiments
|
25 |
+
|
26 |
+
@staticmethod
|
27 |
+
def _collect_one(exp_id, exp, csv=None, index_col=None):
|
28 |
+
params = exp.params.to_frame().transpose().infer_objects() # as DataFrame
|
29 |
+
params['exp_id'] = exp_id
|
30 |
+
|
31 |
+
if csv is None:
|
32 |
+
return params
|
33 |
+
|
34 |
+
csv_path = exp.path_to(csv)
|
35 |
+
if os.path.exists(csv_path):
|
36 |
+
stuff = pd.read_csv(csv_path, index_col=index_col)
|
37 |
+
else: # try globbing
|
38 |
+
csv_files = os.path.join(exp.path, csv)
|
39 |
+
csv_files = list(glob(csv_files))
|
40 |
+
if len(csv_files) == 0:
|
41 |
+
return pd.DataFrame()
|
42 |
+
|
43 |
+
stuff = map(lambda x: pd.read_csv(x, index_col=index_col, float_precision='round_trip'), csv_files)
|
44 |
+
stuff = pd.concat(stuff, ignore_index=True)
|
45 |
+
|
46 |
+
stuff['exp_id'] = exp_id
|
47 |
+
return pd.merge(params, stuff, on='exp_id')
|
48 |
+
|
49 |
+
def collect(self, csv=None, index_col=None, prefix=''):
|
50 |
+
results = [self._collect_one(exp_id, exp, csv=csv, index_col=index_col) for exp_id, exp in enumerate(self.experiments)]
|
51 |
+
results = pd.concat(results, ignore_index=True, sort=False)
|
52 |
+
|
53 |
+
if len(results):
|
54 |
+
# build minimal exp_name
|
55 |
+
exp_name = ''
|
56 |
+
params = results.loc[:, :'exp_id'].drop('exp_id', axis=1)
|
57 |
+
if len(params) > 1:
|
58 |
+
varying_params = params.loc[:, params.nunique() > 1]
|
59 |
+
exp_name = varying_params.apply(Experiment.abbreviate, axis=1)
|
60 |
+
idx = results.columns.get_loc('exp_id') + 1
|
61 |
+
results.insert(idx, 'exp_name', prefix + exp_name)
|
62 |
+
|
63 |
+
return results
|
64 |
+
|
65 |
+
def filter(self, filters):
|
66 |
+
if isinstance(filters, str):
|
67 |
+
filters = string.split(',')
|
68 |
+
filters = map(lambda x: x.split('='), filters)
|
69 |
+
filters = {k: v for k, v in filters}
|
70 |
+
|
71 |
+
def __filter_exp(e):
|
72 |
+
for param, value in filters.items():
|
73 |
+
try:
|
74 |
+
p = e.params[param]
|
75 |
+
ptype = type(p)
|
76 |
+
if p != ptype(value):
|
77 |
+
return False
|
78 |
+
except:
|
79 |
+
return False
|
80 |
+
|
81 |
+
return True
|
82 |
+
|
83 |
+
filtered_exps = filter(__filter_exp, self.experiments)
|
84 |
+
filtered_exps = tuple(filtered_exps)
|
85 |
+
return ExpGroup(filtered_exps)
|
86 |
+
|
87 |
+
def items(self, short_names=True, prefix=''):
|
88 |
+
if short_names:
|
89 |
+
params = self.collect(prefix=prefix)
|
90 |
+
exp_names = params['exp_name'].values
|
91 |
+
return zip(exp_names, self.experiments)
|
92 |
+
|
93 |
+
return self.experiments
|
94 |
+
|
95 |
+
def __iter__(self):
|
96 |
+
return iter(self.experiments)
|
expman/expman/experiment.py
ADDED
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import argparse
|
3 |
+
import ast
|
4 |
+
import os
|
5 |
+
import hashlib
|
6 |
+
import shutil
|
7 |
+
import numbers
|
8 |
+
from glob import glob
|
9 |
+
from io import StringIO
|
10 |
+
|
11 |
+
import numpy as np
|
12 |
+
import pandas as pd
|
13 |
+
|
14 |
+
|
15 |
+
hash_naming = False
|
16 |
+
|
17 |
+
def use_hash_naming(use_hashes=True):
|
18 |
+
global hash_naming
|
19 |
+
assert isinstance(use_hashes, bool), "Value must be a boolean."
|
20 |
+
hash_naming = use_hashes
|
21 |
+
|
22 |
+
def _guessed_cast(x):
|
23 |
+
try:
|
24 |
+
return ast.literal_eval(x)
|
25 |
+
except:
|
26 |
+
return x
|
27 |
+
|
28 |
+
def exp_filter(string):
|
29 |
+
if '=' not in string:
|
30 |
+
raise argparse.ArgumentTypeError(
|
31 |
+
'Filter {} is not in format <param1>=<value1>[, <param2>=<value2>[, ...]]'.format(string))
|
32 |
+
filters = string.split(',')
|
33 |
+
filters = map(lambda x: x.split('='), filters)
|
34 |
+
filters = {k: _guessed_cast(v) for k, v in filters}
|
35 |
+
return filters
|
36 |
+
|
37 |
+
|
38 |
+
class Experiment:
|
39 |
+
|
40 |
+
PARAM_FILENAME = 'params.json'
|
41 |
+
|
42 |
+
@staticmethod
|
43 |
+
def _abbr(name, value, params):
|
44 |
+
|
45 |
+
def prefix_len(a, b):
|
46 |
+
return len(os.path.commonprefix((a, b)))
|
47 |
+
|
48 |
+
prefix = [name[:prefix_len(p, name) + 1] for p in params.keys() if p != name]
|
49 |
+
prefix = max(prefix, key=len) if len(prefix) > 0 else name
|
50 |
+
|
51 |
+
sep = ''
|
52 |
+
if isinstance(value, str):
|
53 |
+
sep = '-'
|
54 |
+
elif isinstance(value, numbers.Number):
|
55 |
+
value = '{:g}'.format(value)
|
56 |
+
sep = '-' if prefix[-1].isdigit() else ''
|
57 |
+
elif isinstance(value, (list, tuple)):
|
58 |
+
value = map(str, value)
|
59 |
+
value = map(lambda v: v.replace(os.sep, '|'), value)
|
60 |
+
value = ','.join(list(value))
|
61 |
+
sep = '-'
|
62 |
+
|
63 |
+
return prefix, sep, value
|
64 |
+
|
65 |
+
@classmethod
|
66 |
+
def abbreviate(cls, params):
|
67 |
+
if isinstance(params, pd.DataFrame):
|
68 |
+
params = params.iloc[0]
|
69 |
+
params = params.replace({np.nan: None})
|
70 |
+
|
71 |
+
if hash_naming:
|
72 |
+
exp_name = hashlib.md5(str(sorted(params.items())).encode()).hexdigest()
|
73 |
+
else:
|
74 |
+
abbrev_params = {k: '{}{}{}'.format(*cls._abbr(k, v, params)) for k, v in params.items()}
|
75 |
+
abbrev = sorted(abbrev_params.values())
|
76 |
+
exp_name = '_'.join(abbrev)
|
77 |
+
|
78 |
+
return exp_name
|
79 |
+
|
80 |
+
@classmethod
|
81 |
+
def from_dir(cls, exp_dir):
|
82 |
+
root = os.path.dirname(exp_dir.rstrip('/'))
|
83 |
+
params = os.path.join(exp_dir, cls.PARAM_FILENAME)
|
84 |
+
|
85 |
+
assert os.path.exists(exp_dir), "Experiment directory not found: '{}'".format(exp_dir)
|
86 |
+
assert os.path.exists(params), "Empty run directory found: '{}'".format(params)
|
87 |
+
|
88 |
+
params = cls._read_params(params)
|
89 |
+
exp = cls(params, root=root, create=False)
|
90 |
+
return exp
|
91 |
+
|
92 |
+
@classmethod
|
93 |
+
def is_exp_dir(cls, exp_dir):
|
94 |
+
if os.path.isdir(exp_dir):
|
95 |
+
params = os.path.join(exp_dir, cls.PARAM_FILENAME)
|
96 |
+
if os.path.exists(params):
|
97 |
+
return True
|
98 |
+
|
99 |
+
return False
|
100 |
+
|
101 |
+
@classmethod
|
102 |
+
def update_exp_dir(cls, exp_dir):
|
103 |
+
exp_dir = exp_dir.rstrip('/')
|
104 |
+
root = os.path.dirname(exp_dir)
|
105 |
+
name = os.path.basename(exp_dir)
|
106 |
+
params = os.path.join(exp_dir, cls.PARAM_FILENAME)
|
107 |
+
|
108 |
+
assert os.path.exists(exp_dir), "Experiment directory not found: '{}'".format(exp_dir)
|
109 |
+
assert os.path.exists(params), "Empty run directory found: '{}'".format(params)
|
110 |
+
|
111 |
+
params = cls._read_params(params)
|
112 |
+
new_name = cls.abbreviate(params)
|
113 |
+
|
114 |
+
if name != new_name:
|
115 |
+
new_exp_dir = os.path.join(root, new_name)
|
116 |
+
assert not os.path.exists(new_exp_dir), \
|
117 |
+
"Destination experiment directory already exists: '{}'".format(new_exp_dir)
|
118 |
+
|
119 |
+
print('Renaming:\n {} into\n {}'.format(exp_dir, new_exp_dir))
|
120 |
+
shutil.move(exp_dir, new_exp_dir)
|
121 |
+
|
122 |
+
def __init__(self, params, root='runs/', ignore=(), create=True):
|
123 |
+
# relative dir containing this run
|
124 |
+
self.root = root
|
125 |
+
# params to be ignored in the run naming
|
126 |
+
self.ignore = ignore
|
127 |
+
# parameters of this run
|
128 |
+
if isinstance(params, argparse.Namespace):
|
129 |
+
params = vars(params)
|
130 |
+
|
131 |
+
def _sanitize(v):
|
132 |
+
return tuple(v) if isinstance(v, list) else v
|
133 |
+
|
134 |
+
params = {k: _sanitize(v) for k, v in params.items() if k not in self.ignore}
|
135 |
+
self.params = pd.Series(params, name='params')
|
136 |
+
|
137 |
+
# whether to create the run directory if not exists
|
138 |
+
self.create = create
|
139 |
+
|
140 |
+
self.name = self.abbreviate(self.params)
|
141 |
+
self.path = os.path.join(self.root, self.name)
|
142 |
+
self.existing = os.path.exists(self.path)
|
143 |
+
self.found = self.existing
|
144 |
+
|
145 |
+
if not self.existing:
|
146 |
+
if self.create:
|
147 |
+
os.makedirs(self.path)
|
148 |
+
self.write_params()
|
149 |
+
self.existing = True
|
150 |
+
else:
|
151 |
+
print("Run directory '{}' not found, but not created.".format(self.path))
|
152 |
+
|
153 |
+
else:
|
154 |
+
param_fname = self.path_to(self.PARAM_FILENAME)
|
155 |
+
assert os.path.exists(param_fname), "Empty run, parameters not found: '{}'".format(param_fname)
|
156 |
+
self.params = self._read_params(param_fname)
|
157 |
+
|
158 |
+
|
159 |
+
def __str__(self):
|
160 |
+
s = StringIO()
|
161 |
+
print('Experiment Dir: {}'.format(self.path), file=s)
|
162 |
+
print('Params:', file=s)
|
163 |
+
|
164 |
+
# Set display options differently
|
165 |
+
with pd.option_context('display.max_rows', None,
|
166 |
+
'display.max_columns', None,
|
167 |
+
'display.width', None):
|
168 |
+
print(self.params.to_string(), file=s)
|
169 |
+
|
170 |
+
return s.getvalue()
|
171 |
+
|
172 |
+
def __repr__(self):
|
173 |
+
return self.__str__()
|
174 |
+
|
175 |
+
def path_to(self, path):
|
176 |
+
path = os.path.join(self.path, path)
|
177 |
+
return path
|
178 |
+
|
179 |
+
def add_parameter(self, key, value):
|
180 |
+
assert key not in self.params, "Parameter already exists: '{}'".format(key)
|
181 |
+
self.params[key] = value
|
182 |
+
self._update_run_dir()
|
183 |
+
self.write_params()
|
184 |
+
|
185 |
+
def rename_parameter(self, key, new_key):
|
186 |
+
assert key in self.params, "Cannot rename non-existent parameter: '{}'".format(key)
|
187 |
+
assert new_key not in self.params, "Destination name for parameter exists: '{}'".format(key)
|
188 |
+
|
189 |
+
self.params[new_key] = self.params[key]
|
190 |
+
del self.params[key]
|
191 |
+
|
192 |
+
self._update_run_dir()
|
193 |
+
self.write_params()
|
194 |
+
|
195 |
+
def remove_parameter(self, key):
|
196 |
+
assert key in self.params, "Cannot remove non-existent parameter: '{}'".format(key)
|
197 |
+
del self.params[key]
|
198 |
+
self._update_run_dir()
|
199 |
+
self.write_params()
|
200 |
+
|
201 |
+
def _update_run_dir(self):
|
202 |
+
old_run_dir = self.path
|
203 |
+
if self.existing:
|
204 |
+
self.name = self.abbreviate(self.params)
|
205 |
+
self.path = os.path.join(self.root, self.name)
|
206 |
+
assert not os.path.exists(self.path), "Cannot rename run, new name exists: '{}'".format(self.path)
|
207 |
+
shutil.move(old_run_dir, self.path)
|
208 |
+
|
209 |
+
@staticmethod
|
210 |
+
def _read_params(path):
|
211 |
+
# read json to pd.Series
|
212 |
+
params = pd.read_json(path, typ='series')
|
213 |
+
# transform lists to tuples (for hashability)
|
214 |
+
params = params.apply(lambda x: tuple(x) if isinstance(x, list) else x)
|
215 |
+
return params
|
216 |
+
|
217 |
+
def write_params(self):
|
218 |
+
# write Series as json
|
219 |
+
self.params.to_json(self.path_to(self.PARAM_FILENAME))
|
220 |
+
|
221 |
+
def test():
|
222 |
+
parser = argparse.ArgumentParser(description='Experiment Manager Test')
|
223 |
+
parser.add_argument('-e', '--epochs', type=int, default=70)
|
224 |
+
parser.add_argument('-b', '--batch-size', type=int, default=64)
|
225 |
+
parser.add_argument('-m', '--model', choices=('1d-conv', 'paper'), default='1d-conv')
|
226 |
+
parser.add_argument('-s', '--seed', type=int, default=23)
|
227 |
+
parser.add_argument('--no-cuda', action='store_true')
|
228 |
+
parser.set_defaults(no_cuda=False)
|
229 |
+
args = parser.parse_args()
|
230 |
+
|
231 |
+
run = Experiment(args, root='prova', ignore=['no_cuda'])
|
232 |
+
print(run)
|
233 |
+
print(run.path_to('ckpt/best.h5'))
|
losses.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
from tensorflow.keras import backend as K
|
3 |
+
|
4 |
+
def boundary_loss(y_true, y_pred):
|
5 |
+
"""Additional loss focusing on boundaries"""
|
6 |
+
# Compute gradients
|
7 |
+
dy_true, dx_true = tf.image.image_gradients(y_true)
|
8 |
+
dy_pred, dx_pred = tf.image.image_gradients(y_pred)
|
9 |
+
|
10 |
+
# Compute boundary loss
|
11 |
+
loss = tf.reduce_mean(tf.abs(dy_pred - dy_true) + tf.abs(dx_pred - dx_true))
|
12 |
+
return loss * 0.5 # weight factor
|
13 |
+
|
14 |
+
def enhanced_binary_crossentropy(y_true, y_pred):
|
15 |
+
"""Combine standard BCE with boundary loss"""
|
16 |
+
bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
|
17 |
+
boundary = boundary_loss(y_true, y_pred)
|
18 |
+
return bce + boundary
|
matlab/Meye.m
ADDED
@@ -0,0 +1,310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
classdef Meye
|
2 |
+
|
3 |
+
properties (Access=private)
|
4 |
+
model
|
5 |
+
end
|
6 |
+
|
7 |
+
|
8 |
+
methods
|
9 |
+
|
10 |
+
% CONSTRUCTOR
|
11 |
+
%------------------------------------------------------------------
|
12 |
+
function self = Meye(modelPath)
|
13 |
+
% Class constructor
|
14 |
+
arguments
|
15 |
+
modelPath char {mustBeText}
|
16 |
+
end
|
17 |
+
|
18 |
+
% Change the current directory to the directory where the
|
19 |
+
% original class is, so that the package with the custom layers
|
20 |
+
% is created there
|
21 |
+
classPath = getClassPath(self);
|
22 |
+
oldFolder = cd(classPath);
|
23 |
+
% Import the model saved as ONNX
|
24 |
+
self.model = importONNXNetwork(modelPath, ...
|
25 |
+
'GenerateCustomLayers',true, ...
|
26 |
+
'PackageName','customLayers_meye',...
|
27 |
+
'InputDataFormats', 'BSSC',...
|
28 |
+
'OutputDataFormats',{'BSSC','BC'});
|
29 |
+
|
30 |
+
% Manually change the "nearest" option to "linear" inside of
|
31 |
+
% the automatically generated custom layers. This is necessary
|
32 |
+
% due to the fact that MATLAB still does not support the proper
|
33 |
+
% translation between ONNX layers and DLtoolbox layers
|
34 |
+
self.nearest2Linear([classPath filesep '+customLayers_meye'])
|
35 |
+
|
36 |
+
% Go back to the old current folder
|
37 |
+
cd(oldFolder)
|
38 |
+
end
|
39 |
+
|
40 |
+
|
41 |
+
% PREDICTION OF SINGLE IMAGES
|
42 |
+
%------------------------------------------------------------------
|
43 |
+
function [pupilMask, eyeProb, blinkProb] = predictImage(self, inputImage, options)
|
44 |
+
% Predicts pupil location on a single image
|
45 |
+
arguments
|
46 |
+
self
|
47 |
+
inputImage
|
48 |
+
options.roiPos = []
|
49 |
+
options.threshold = []
|
50 |
+
end
|
51 |
+
|
52 |
+
roiPos = options.roiPos;
|
53 |
+
|
54 |
+
% Convert the image to grayscale if RGB
|
55 |
+
if size(inputImage,3) > 1
|
56 |
+
inputImage = im2gray(inputImage);
|
57 |
+
end
|
58 |
+
|
59 |
+
% Crop the frame to the desired ROI
|
60 |
+
if ~isempty(roiPos)
|
61 |
+
crop = inputImage(roiPos(2):roiPos(2)+roiPos(4)-1,...
|
62 |
+
roiPos(1):roiPos(1)+roiPos(3)-1);
|
63 |
+
else
|
64 |
+
crop = inputImage;
|
65 |
+
end
|
66 |
+
|
67 |
+
% Preprocessing
|
68 |
+
img = double(imresize(crop,[128 128]));
|
69 |
+
img = img / max(img,[],'all');
|
70 |
+
|
71 |
+
% Do the prediction
|
72 |
+
[rawMask, info] = predict(self.model, img);
|
73 |
+
eyeProb = info(1);
|
74 |
+
blinkProb = info(2);
|
75 |
+
|
76 |
+
% Reinsert the cropped prediction in the frame
|
77 |
+
if ~isempty(roiPos)
|
78 |
+
pupilMask = zeros(size(inputImage));
|
79 |
+
pupilMask(roiPos(2):roiPos(2)+roiPos(4)-1,...
|
80 |
+
roiPos(1):roiPos(1)+roiPos(3)-1) = imresize(rawMask, [roiPos(4), roiPos(3)],"bilinear");
|
81 |
+
else
|
82 |
+
pupilMask = imresize(rawMask,size(inputImage),"bilinear");
|
83 |
+
end
|
84 |
+
|
85 |
+
% Apply a threshold to the image if requested
|
86 |
+
if ~isempty(options.threshold)
|
87 |
+
pupilMask = pupilMask > options.threshold;
|
88 |
+
end
|
89 |
+
|
90 |
+
end
|
91 |
+
|
92 |
+
|
93 |
+
% PREDICT A MOVIE AND GET A TABLE WITH THE RESULTS
|
94 |
+
%------------------------------------------------------------------
|
95 |
+
function tab = predictMovie(self, moviePath, options)
|
96 |
+
% Predict an entire video file and returns a results Table
|
97 |
+
%
|
98 |
+
% tab = predictMovie(moviePath, name-value)
|
99 |
+
%
|
100 |
+
% INPUT(S)
|
101 |
+
% - moviePath: (char/string) Full path of a video file.
|
102 |
+
% - name-value pairs
|
103 |
+
% - roiPos: [x,y,width,height] 4-elements vector defining a
|
104 |
+
% rectangle containing the eye. Works best if width and
|
105 |
+
% height are similar. If empty, a prediction will be done on
|
106 |
+
% a full frame(Default: []).
|
107 |
+
% - threshold: [0-1] The pupil prediction is binarized based
|
108 |
+
% on a threshold value to measure pupil size. (Default:0.4)
|
109 |
+
%
|
110 |
+
% OUTPUT(S)
|
111 |
+
% - tab: a MATLAB table containing data of the analyzed video
|
112 |
+
|
113 |
+
arguments
|
114 |
+
self
|
115 |
+
moviePath char {mustBeText}
|
116 |
+
options.roiPos double = []
|
117 |
+
options.threshold = 0.4;
|
118 |
+
end
|
119 |
+
|
120 |
+
% Initialize a video reader
|
121 |
+
v = VideoReader(moviePath);
|
122 |
+
totFrames = v.NumFrames;
|
123 |
+
|
124 |
+
% Initialize Variables
|
125 |
+
frameN = zeros(totFrames,1,'double');
|
126 |
+
frameTime = zeros(totFrames,1,'double');
|
127 |
+
binaryMask = cell(totFrames,1);
|
128 |
+
pupilArea = zeros(totFrames,1,'double');
|
129 |
+
isEye = zeros(totFrames,1,'double');
|
130 |
+
isBlink = zeros(totFrames,1,'double');
|
131 |
+
|
132 |
+
tic
|
133 |
+
for i = 1:totFrames
|
134 |
+
% Progress report
|
135 |
+
if toc>10
|
136 |
+
fprintf('%.1f%% - Processing frame (%u/%u)\n', (i/totFrames)*100 , i, totFrames)
|
137 |
+
tic
|
138 |
+
end
|
139 |
+
|
140 |
+
% Read a frame and make its prediction
|
141 |
+
frame = read(v, i, 'native');
|
142 |
+
[pupilMask, eyeProb, blinkProb] = self.predictImage(frame, roiPos=options.roiPos,...
|
143 |
+
threshold=options.threshold);
|
144 |
+
|
145 |
+
% Save results for this frame
|
146 |
+
frameN(i) = i;
|
147 |
+
frameTime(i) = v.CurrentTime;
|
148 |
+
binaryMask{i} = pupilMask > options.threshold;
|
149 |
+
pupilArea(i) = sum(binaryMask{i},"all");
|
150 |
+
isEye(i) = eyeProb;
|
151 |
+
isBlink(i) = blinkProb;
|
152 |
+
end
|
153 |
+
% Save all the results in a final table
|
154 |
+
tab = table(frameN,frameTime,binaryMask,pupilArea,isEye,isBlink);
|
155 |
+
end
|
156 |
+
|
157 |
+
|
158 |
+
|
159 |
+
% PREVIEW OF A PREDICTED MOVIE
|
160 |
+
%------------------------------------------------------------------
|
161 |
+
function predictMovie_Preview(self, moviePath, options)
|
162 |
+
% Displays a live-preview of prediction for a video file
|
163 |
+
|
164 |
+
arguments
|
165 |
+
self
|
166 |
+
moviePath char {mustBeText}
|
167 |
+
options.roiPos double = []
|
168 |
+
options.threshold double = []
|
169 |
+
end
|
170 |
+
roiPos = options.roiPos;
|
171 |
+
|
172 |
+
|
173 |
+
% Initialize a video reader
|
174 |
+
v = VideoReader(moviePath);
|
175 |
+
% Initialize images to show
|
176 |
+
blankImg = zeros(v.Height, v.Width, 'uint8');
|
177 |
+
cyanColor = cat(3, blankImg, blankImg+255, blankImg+255);
|
178 |
+
pupilTransparency = blankImg;
|
179 |
+
|
180 |
+
% Create a figure for the preview
|
181 |
+
figHandle = figure(...
|
182 |
+
'Name','MEYE video preview',...
|
183 |
+
'NumberTitle','off',...
|
184 |
+
'ToolBar','none',...
|
185 |
+
'MenuBar','none', ...
|
186 |
+
'Color',[.1, .1, .1]);
|
187 |
+
|
188 |
+
ax = axes('Parent',figHandle,...
|
189 |
+
'Units','normalized',...
|
190 |
+
'Position',[0 0 1 .94]);
|
191 |
+
|
192 |
+
imHandle = imshow(blankImg,'Parent',ax);
|
193 |
+
hold on
|
194 |
+
cyanHandle = imshow(cyanColor,'Parent',ax);
|
195 |
+
cyanHandle.AlphaData = pupilTransparency;
|
196 |
+
rect = rectangle('LineWidth',1.5, 'LineStyle','-.','EdgeColor',[1,0,0],...
|
197 |
+
'Parent',ax,'Position',[0,0,0,0]);
|
198 |
+
hold off
|
199 |
+
title(ax,'MEYE Video Preview', 'Color',[1,1,1])
|
200 |
+
|
201 |
+
% Movie-Showing loop
|
202 |
+
while exist("figHandle","var") && ishandle(figHandle) && hasFrame(v)
|
203 |
+
try
|
204 |
+
tic
|
205 |
+
frame = readFrame(v);
|
206 |
+
|
207 |
+
% Actually do the prediction
|
208 |
+
[pupilMask, eyeProb, blinkProb] = self.predictImage(frame, roiPos=roiPos,...
|
209 |
+
threshold=options.threshold);
|
210 |
+
|
211 |
+
% Update graphic elements
|
212 |
+
imHandle.CData = frame;
|
213 |
+
cyanHandle.AlphaData = imresize(pupilMask, [v.Height, v.Width]);
|
214 |
+
if ~isempty(roiPos)
|
215 |
+
rect.Position = roiPos;
|
216 |
+
end
|
217 |
+
titStr = sprintf('Eye: %.2f%% - Blink:%.2f%% - FPS:%.1f',...
|
218 |
+
eyeProb*100, blinkProb*100, 1/toc);
|
219 |
+
ax.Title.String = titStr;
|
220 |
+
drawnow
|
221 |
+
catch ME
|
222 |
+
warning(ME.message)
|
223 |
+
close(figHandle)
|
224 |
+
end
|
225 |
+
end
|
226 |
+
disp('Stop preview.')
|
227 |
+
end
|
228 |
+
|
229 |
+
|
230 |
+
end
|
231 |
+
|
232 |
+
|
233 |
+
%------------------------------------------------------------------
|
234 |
+
%------------------------------------------------------------------
|
235 |
+
% INTERNAL FUNCTIONS
|
236 |
+
%------------------------------------------------------------------
|
237 |
+
%------------------------------------------------------------------
|
238 |
+
methods(Access=private)
|
239 |
+
%------------------------------------------------------------------
|
240 |
+
function path = getClassPath(~)
|
241 |
+
% Returns the full path of where the class file is
|
242 |
+
|
243 |
+
fullPath = mfilename('fullpath');
|
244 |
+
[path,~,~] = fileparts(fullPath);
|
245 |
+
end
|
246 |
+
|
247 |
+
%------------------------------------------------------------------
|
248 |
+
function [fplist,fnlist] = listfiles(~, folderpath, token)
|
249 |
+
listing = dir(folderpath);
|
250 |
+
index = 0;
|
251 |
+
fplist = {};
|
252 |
+
fnlist = {};
|
253 |
+
for i = 1:size(listing,1)
|
254 |
+
s = listing(i).name;
|
255 |
+
if contains(s,token)
|
256 |
+
index = index+1;
|
257 |
+
fplist{index} = [folderpath filesep s];
|
258 |
+
fnlist{index} = s;
|
259 |
+
end
|
260 |
+
end
|
261 |
+
end
|
262 |
+
|
263 |
+
% nearest2Linear
|
264 |
+
%------------------------------------------------------------------
|
265 |
+
function nearest2Linear(self, inputPath)
|
266 |
+
fP = self.listfiles(inputPath, 'Shape_To_Upsample');
|
267 |
+
|
268 |
+
foundFileToChange = false;
|
269 |
+
beforePatter = '"half_pixel", "nearest",';
|
270 |
+
afterPattern = '"half_pixel", "linear",';
|
271 |
+
for i = 1:length(fP)
|
272 |
+
|
273 |
+
% Get the content of the file
|
274 |
+
fID = fopen(fP{i}, 'r');
|
275 |
+
f = fread(fID,'*char')';
|
276 |
+
fclose(fID);
|
277 |
+
|
278 |
+
% Send a verbose warning the first time we are manually
|
279 |
+
% correcting the upsampling layers bug
|
280 |
+
if ~foundFileToChange && contains(f,beforePatter)
|
281 |
+
foundFileToChange = true;
|
282 |
+
msg = ['This is a message from MEYE developers.\n' ...
|
283 |
+
'In the current release of the Deep Learning Toolbox ' ...
|
284 |
+
'MATLAB does not translate well all the layers in the ' ...
|
285 |
+
'ONNX network to native MATLAB layers. In particular the ' ...
|
286 |
+
'automatically generated custom layers that have to do ' ...
|
287 |
+
'with UPSAMPLING are generated with the ''nearest'' instead of ' ...
|
288 |
+
'the ''linear'' mode.\nWe automatically correct for this bug when you ' ...
|
289 |
+
'instantiate a Meye object (henche this warning).\nEverything should work fine, ' ...
|
290 |
+
'and we hope that in future MATLAB releases this hack wont be ' ...
|
291 |
+
'needed anymore.\n' ...
|
292 |
+
'If you find bugs or performance issues, please let us know ' ...
|
293 |
+
'with an issue ' ...
|
294 |
+
'<a href="matlab: web(''https://github.com/fabiocarrara/meye/issues'')">HERE.</a>'];
|
295 |
+
warning(sprintf(msg))
|
296 |
+
end
|
297 |
+
|
298 |
+
% Replace the 'nearest' option with 'linear'
|
299 |
+
newF = strrep(f, beforePatter, afterPattern);
|
300 |
+
|
301 |
+
% Save the file back in its original location
|
302 |
+
fID = fopen(fP{i}, 'w');
|
303 |
+
fprintf(fID,'%s',newF);
|
304 |
+
fclose(fID);
|
305 |
+
end
|
306 |
+
end
|
307 |
+
end
|
308 |
+
end
|
309 |
+
|
310 |
+
|
matlab/README.md
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MEYE pupillometry on MATLAB
|
2 |
+
|
3 |
+
> Try MEYE on a standalone [Web-App](https://www.pupillometry.it/)
|
4 |
+
|
5 |
+
> Learn more on the original [MEYE repo](https://github.com/fabiocarrara/meye)
|
6 |
+
|
7 |
+
> Label your own dataset with [pLabeler](https://github.com/LeonardoLupori/pLabeler)
|
8 |
+
|
9 |
+
Starting from MATLAB version 2021b, MEYE is also available for use on MATLAB!
|
10 |
+
|
11 |
+
Here's a brief tutorial on how to use it in you own experiments.
|
12 |
+
|
13 |
+
## What do you need?
|
14 |
+
|
15 |
+
- [MATLAB 2021b](https://it.mathworks.com/products/matlab.html) or later
|
16 |
+
- [MATLAB Image Processing Toolbox](https://it.mathworks.com/products/image.html)
|
17 |
+
- [MATLAB Deep Learning Toolbox](https://it.mathworks.com/products/deep-learning.html)
|
18 |
+
An additional _support package_ of this toolbox has to be downloaded manually from the Add-On explorer in MATLAB:
|
19 |
+
- _Deep Learning Toolbox™ Converter for ONNX Model Format_
|
20 |
+
data:image/s3,"s3://crabby-images/ba78b/ba78be70fd0876c4411967c9370ddc93a56a1765" alt="image"
|
21 |
+
- A MEYE model in [ONNX](https://onnx.ai/) format. You can download our latest model [here](https://github.com/fabiocarrara/meye/releases).
|
22 |
+
data:image/s3,"s3://crabby-images/ac3ff/ac3ff4ca85c8fdb75c274e9233907242ed44ad57" alt="onnxModel"
|
23 |
+
|
24 |
+
|
25 |
+
## Quick start!
|
26 |
+
|
27 |
+
```matlab
|
28 |
+
% Create an instance of Meye
|
29 |
+
meye = Meye('path/to/model.onnx');
|
30 |
+
|
31 |
+
% Example 1
|
32 |
+
% Make predictions on a single Image
|
33 |
+
%
|
34 |
+
% Load an image for which you want to predict the pupil
|
35 |
+
img = imread('path/to/img.tif');
|
36 |
+
% Make a prediction on a frame
|
37 |
+
[pupil, isEye, isBlink] = meye.predictImage(img);
|
38 |
+
|
39 |
+
% Example 2
|
40 |
+
% Make predictions on a video file and preview the results
|
41 |
+
%
|
42 |
+
meye.predictMovie_Preview('path/to/video');
|
43 |
+
```
|
44 |
+
|
45 |
+
## Examples
|
46 |
+
|
47 |
+
Inside the file [example.m](example.m) you can find 5 extensively commented examples of some use cases for MEYE on MATLAB.
|
48 |
+
These examples require you to download example data from [here](https://drive.google.com/drive/folders/1BG6O5BEkwXkNKC_1XuB3H9wbx3DeNWwF?usp=sharing). To run the examples succesfully, make sure that the downloaded files are in the same folder as the `example.m` file.
|
49 |
+
|
50 |
+
# Known issues
|
51 |
+
|
52 |
+
## Small issue with _Upsample_ layers
|
53 |
+
When [importing](https://it.mathworks.com/help/deeplearning/ref/importonnxnetwork.html) a ONNX network, MATLAB tries to translate all the layers of the network from ONNX Operators to built-in MATLAB layers (see [here](https://it.mathworks.com/help/deeplearning/ref/importonnxnetwork.html#mw_dc6cd14c-e8d0-4370-af81-96626a888d9c)).
|
54 |
+
This operation is not succesful for all the layers and MATLAB tries to overcome erros by automatically generating custom layers to replace the ones that it wasnt able to translate. These _custom_ layers are stored in a folder as MATLAB `.m` class files.
|
55 |
+
We found a small bug in the way MATLAB translates `Upsample` layers while importing MEYE network. In particular, the custom generated layers perform the upsample with the `nearest` interpolation method, while it should be used the `linear` method for best results.
|
56 |
+
For now, we solved this bug by automatically replacing the `nearest` method with the `linear` one in all the custom generated layers. This restores optimal performance with no additional computational costs, but it's a bit hacky.
|
57 |
+
We hope that in future releases MATLAB's process of translation to its own built-in layers will be smoother and this trick will not be needed anymore.
|
matlab/example.m
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
%% Download all the example material
|
2 |
+
%
|
3 |
+
% 1 - Download the latest MEYE model in ONNX format
|
4 |
+
% -------------------------------------------------------------------------
|
5 |
+
% Download the .onnx file from the assets here:
|
6 |
+
% https://github.com/fabiocarrara/meye/releases
|
7 |
+
|
8 |
+
% EXAMPLE data can be found in this folder:
|
9 |
+
% https://drive.google.com/drive/folders/1BG6O5BEkwXkNKC_1XuB3H9wbx3DeNWwF?usp=sharing
|
10 |
+
%
|
11 |
+
% 2 - Download an example image of a simple mouse eye from:
|
12 |
+
% https://drive.google.com/file/d/1hcWcC1cAmzY4r-SIWDIgUY0-gpbmetUL/view?usp=sharing
|
13 |
+
%
|
14 |
+
% 3 - Download an example of a large image here:
|
15 |
+
% https://drive.google.com/file/d/16QixvUMtojqfrcy4WXlYJ7CP3K8vrz_C/view?usp=sharing
|
16 |
+
%
|
17 |
+
% 4 - Download an example pupillometry video here:
|
18 |
+
% https://drive.google.com/file/d/1TYj80dzIR1ZjpEvfefH_akhbUjwpvJta/view?usp=sharing
|
19 |
+
|
20 |
+
|
21 |
+
%% EXAMPLE 1
|
22 |
+
% -------------------------------------------------------------------------
|
23 |
+
% Predict the pupil from a simple image of an eye
|
24 |
+
|
25 |
+
% Clean up the workspace
|
26 |
+
clearvars, clc
|
27 |
+
|
28 |
+
% Change these values according to the filenames of the MEYE model and the
|
29 |
+
% simple pupil image
|
30 |
+
MODEL_NAME = 'meye_20220124.onnx';
|
31 |
+
IMAGE_NAME = 'pupilImage_simple.png';
|
32 |
+
|
33 |
+
|
34 |
+
% Initialize a MEYE object
|
35 |
+
meye = Meye(MODEL_NAME);
|
36 |
+
|
37 |
+
% Load the simple image
|
38 |
+
img = imread(IMAGE_NAME);
|
39 |
+
|
40 |
+
% Predict a single image
|
41 |
+
[pupilMask, eyeProb, blinkProb] = meye.predictImage(img);
|
42 |
+
|
43 |
+
% Plot the results of the prediction
|
44 |
+
subplot(1,3,1)
|
45 |
+
imshow(img)
|
46 |
+
title('Original Image')
|
47 |
+
|
48 |
+
subplot(1,3,2)
|
49 |
+
imagesc(pupilMask)
|
50 |
+
title(sprintf('Prediction (Eye:%.2f%% - Blink:%.2f%%)',eyeProb*100,blinkProb*100))
|
51 |
+
axis off, axis image
|
52 |
+
|
53 |
+
subplot(1,3,3)
|
54 |
+
imshowpair(img, pupilMask)
|
55 |
+
title('Merge')
|
56 |
+
|
57 |
+
|
58 |
+
%% EXAMPLE 2
|
59 |
+
% -------------------------------------------------------------------------
|
60 |
+
% Binarize the pupil prediction and get the pupil size in pixels
|
61 |
+
|
62 |
+
% Clean up the workspace
|
63 |
+
clearvars, close all, clc
|
64 |
+
|
65 |
+
% Change these values according to the filenames of the MEYE model and the
|
66 |
+
% simple pupil image
|
67 |
+
MODEL_NAME = 'meye_20220124.onnx';
|
68 |
+
IMAGE_NAME = 'pupilImage_simple.png';
|
69 |
+
|
70 |
+
|
71 |
+
% Initialize a MEYE object
|
72 |
+
meye = Meye(MODEL_NAME);
|
73 |
+
|
74 |
+
% Load the simple image
|
75 |
+
img = imread(IMAGE_NAME);
|
76 |
+
|
77 |
+
% Predict a single image
|
78 |
+
% You can automatically binarize the prediction by passing the "threshold"
|
79 |
+
% optional argument. This number can be between 0 and 1. If omitted, the
|
80 |
+
% function returns a raw probability map instead of a binarized image
|
81 |
+
pupilBinaryMask = meye.predictImage(img, 'threshold', 0.4);
|
82 |
+
|
83 |
+
imshowpair(img, pupilBinaryMask)
|
84 |
+
title(sprintf('Pupil Size: %u px', sum(pupilBinaryMask,'all')))
|
85 |
+
|
86 |
+
|
87 |
+
%% EXAMPLE 3
|
88 |
+
% -------------------------------------------------------------------------
|
89 |
+
% Predict the pupil on a large image where the eye is a small portion of
|
90 |
+
% the image
|
91 |
+
|
92 |
+
% Clean up the workspace
|
93 |
+
clearvars, close all, clc
|
94 |
+
|
95 |
+
% Change these values according to the filenames of the MEYE model and the
|
96 |
+
% simple pupil image
|
97 |
+
MODEL_NAME = 'meye_20220124.onnx';
|
98 |
+
IMAGE_NAME = 'pupilImage_large.png';
|
99 |
+
|
100 |
+
|
101 |
+
% Initialize a MEYE object
|
102 |
+
meye = Meye(MODEL_NAME);
|
103 |
+
|
104 |
+
% Load the simple image
|
105 |
+
img = imread(IMAGE_NAME);
|
106 |
+
|
107 |
+
% Predict the image
|
108 |
+
pupilMask = meye.predictImage(img);
|
109 |
+
|
110 |
+
% As you can see from this image, the prediction is not perfect. This is
|
111 |
+
% because MEYE was trained on images that tightly contained the eye.
|
112 |
+
subplot(1,2,1)
|
113 |
+
imshowpair(img, pupilMask)
|
114 |
+
title('Tomal Image prediction (low-quality)')
|
115 |
+
|
116 |
+
% In order to solve this issue it is possible to restrict the prediction to
|
117 |
+
% a rectangular Region of Interest (ROI) in the image. This is done simply
|
118 |
+
% by passing the optional argument "roiPos" to the predictImage function.
|
119 |
+
% The roiPos is a 4-elements vector containing X,Y, width, height of a
|
120 |
+
% rectangular shape. Note that X and Y are the coordinates of the top left
|
121 |
+
% corner of the ROI
|
122 |
+
|
123 |
+
ROI = [90,90,200,200];
|
124 |
+
pupilMask = meye.predictImage(img, 'roiPos', ROI);
|
125 |
+
|
126 |
+
% Plot the results with the ROI and see the difference between the 2 methods
|
127 |
+
subplot(1,2,2)
|
128 |
+
imshowpair(img, pupilMask)
|
129 |
+
rectangle('Position',ROI, 'LineStyle','-.','EdgeColor',[1,0,0])
|
130 |
+
title('ROI prediction (high quality)')
|
131 |
+
linkaxes
|
132 |
+
set(gcf,'Position',[300,600,1000,320])
|
133 |
+
|
134 |
+
|
135 |
+
%% EXAMPLE 4
|
136 |
+
% -------------------------------------------------------------------------
|
137 |
+
% Show a preview of the prediction of an entire pupillometry video.
|
138 |
+
%
|
139 |
+
% As you saw you can adjust a few parameters for the prediction.
|
140 |
+
% If you want to get a quick preview of how your pre-recorded video will be
|
141 |
+
% processed, you can use the method predictMovie_Preview.
|
142 |
+
% Here you can play around with different ROI positions and threshold
|
143 |
+
% values and see what are the results before analyzing the whole video.
|
144 |
+
|
145 |
+
% Clean up the workspace
|
146 |
+
clearvars, close all, clc
|
147 |
+
|
148 |
+
% Change these values according to the filenames of the MEYE model and the
|
149 |
+
% simple pupil image
|
150 |
+
MODEL_NAME = 'meye_20220124.onnx';
|
151 |
+
VIDEO_NAME = 'mouse_example.mp4';
|
152 |
+
|
153 |
+
% Initialize a MEYE object
|
154 |
+
meye = Meye(MODEL_NAME);
|
155 |
+
|
156 |
+
% Try to play around moving or resizing the ROI to see how the performances change
|
157 |
+
ROI = [70, 60, 200, 200];
|
158 |
+
|
159 |
+
% Change the threshold value to binarize the pupil prediction.
|
160 |
+
% Use [] to see the raw probability map. Use a number in the range [0:1] to binarize it
|
161 |
+
threshold = 0.4;
|
162 |
+
|
163 |
+
meye.predictMovie_Preview(VIDEO_NAME,"roiPos", ROI,"threshold",threshold);
|
164 |
+
|
165 |
+
|
166 |
+
|
167 |
+
%% EXAMPLE 5
|
168 |
+
% Predict the entire video and get the results table
|
169 |
+
|
170 |
+
% Clean up the workspace
|
171 |
+
clearvars, close all, clc
|
172 |
+
|
173 |
+
% Change these values according to the filenames of the MEYE model and the
|
174 |
+
% simple pupil image
|
175 |
+
MODEL_NAME = 'meye_20220124.onnx';
|
176 |
+
VIDEO_NAME = 'mouse_example.mp4';
|
177 |
+
|
178 |
+
% Initialize a MEYE object
|
179 |
+
meye = Meye(MODEL_NAME);
|
180 |
+
|
181 |
+
% Try to play around moving or resizing the ROI to see how the performances change
|
182 |
+
ROI = [70, 60, 200, 200];
|
183 |
+
|
184 |
+
% Change the threshold value to binarize the pupil prediction.
|
185 |
+
% Use [] to see the raw probability map. Use a number in the range [0:1] to binarize it
|
186 |
+
threshold = 0.4;
|
187 |
+
|
188 |
+
% Predict the whole movie and save results in a table
|
189 |
+
T = meye.predictMovie(VIDEO_NAME, "roiPos", ROI, "threshold", threshold);
|
190 |
+
|
191 |
+
% Show some of the values in the table
|
192 |
+
disp(head(T))
|
193 |
+
|
194 |
+
% Plot some of the results
|
195 |
+
subplot 311
|
196 |
+
plot(T.frameTime,T.isEye, 'LineWidth', 2)
|
197 |
+
title('Eye Probability')
|
198 |
+
ylabel('Probability'),
|
199 |
+
xlim([T.frameTime(1) T.frameTime(end)])
|
200 |
+
|
201 |
+
subplot 312
|
202 |
+
plot(T.frameTime,T.isBlink, 'LineWidth', 2)
|
203 |
+
title('Blink Probability')
|
204 |
+
ylabel('Probability')
|
205 |
+
xlim([T.frameTime(1) T.frameTime(end)])
|
206 |
+
|
207 |
+
subplot 313
|
208 |
+
plot(T.frameTime,T.pupilArea, 'LineWidth', 2)
|
209 |
+
title('Pupil Size')
|
210 |
+
xlabel('Time (s)'), ylabel('Pupil Area (px)')
|
211 |
+
xlim([T.frameTime(1) T.frameTime(end)])
|
models/deeplab.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path += ['models/deeplab']
|
3 |
+
|
4 |
+
import tensorflow as tf
|
5 |
+
|
6 |
+
from tensorflow.keras import backend as K
|
7 |
+
from tensorflow.keras import layers as L
|
8 |
+
from tensorflow.keras.models import Model, Sequential
|
9 |
+
|
10 |
+
from deeplabv3p.models.deeplabv3p_resnet50 import Deeplabv3pResNet50
|
11 |
+
from deeplabv3p.models.deeplabv3p_mobilenetv3 import Deeplabv3pMobileNetV3Small, Deeplabv3pLiteMobileNetV3Small, Deeplabv3pMobileNetV3Large, Deeplabv3pLiteMobileNetV3Large
|
12 |
+
from deeplabv3p.models.deeplabv3p_xception import Deeplabv3pXception
|
13 |
+
from deeplabv3p.models.deeplabv3p_peleenet import Deeplabv3pPeleeNet, Deeplabv3pLitePeleeNet
|
14 |
+
|
15 |
+
AVAILABLE_BACKBONES = {
|
16 |
+
'resnet50': Deeplabv3pResNet50,
|
17 |
+
'xception': Deeplabv3pXception,
|
18 |
+
'mobilenetv3-large': Deeplabv3pMobileNetV3Large,
|
19 |
+
'lite-mobilenetv3-large': Deeplabv3pLiteMobileNetV3Large,
|
20 |
+
'mobilenetv3-small': Deeplabv3pMobileNetV3Small,
|
21 |
+
'lite-mobilenetv3-small': Deeplabv3pLiteMobileNetV3Small,
|
22 |
+
'peleenet': Deeplabv3pPeleeNet,
|
23 |
+
'lite-peleenet': Deeplabv3pLitePeleeNet,
|
24 |
+
}
|
25 |
+
|
26 |
+
AVAILABLE_PRETRAINED_WEIGHTS = {
|
27 |
+
'resnet50': 'imagenet',
|
28 |
+
'xception': None, # 'pascalvoc', # needs fix in upstream
|
29 |
+
'mobilenetv3-large': 'imagenet',
|
30 |
+
'lite-mobilenetv3-large': 'imagenet',
|
31 |
+
'mobilenetv3-small': 'imagenet',
|
32 |
+
'lite-mobilenetv3-small': 'imagenet',
|
33 |
+
'peleenet': 'imagenet',
|
34 |
+
'lite-peleenet': 'imagenet',
|
35 |
+
}
|
36 |
+
|
37 |
+
def build_model(input_shape, output_shape, config):
|
38 |
+
|
39 |
+
assert input_shape[:2] == output_shape[:2], "Only same input-output HW shapes are supported."
|
40 |
+
num_classes = output_shape[2]
|
41 |
+
|
42 |
+
# backbone pretends RGB images to use pretrained weights
|
43 |
+
needs_rgb_conversion = input_shape[2] != 3
|
44 |
+
backbone_input_shape = (input_shape[:2] + (3,)) if needs_rgb_conversion else input_shape
|
45 |
+
backbone_name = config.get('backbone', 'resnet50')
|
46 |
+
weights = config.get('weights', AVAILABLE_PRETRAINED_WEIGHTS[backbone_name])
|
47 |
+
backbone_fn = AVAILABLE_BACKBONES[backbone_name]
|
48 |
+
backbone, backbone_len = backbone_fn(input_shape=backbone_input_shape, num_classes=num_classes, weights=weights, OS=8)
|
49 |
+
|
50 |
+
# segmentation mask
|
51 |
+
out_mask = backbone.get_layer('pred_resize').output
|
52 |
+
out_mask = L.Activation('sigmoid', name='mask')(out_mask)
|
53 |
+
|
54 |
+
# metadata tags (is_eye and is_blink)
|
55 |
+
middle = backbone.get_layer('image_pooling').output
|
56 |
+
middle = L.Flatten()(middle)
|
57 |
+
out_tags = L.Dense(2, activation='sigmoid', name='tags')(middle)
|
58 |
+
|
59 |
+
model = Model(inputs=backbone.input, outputs=[out_mask, out_tags])
|
60 |
+
|
61 |
+
if needs_rgb_conversion:
|
62 |
+
gray_input = L.Input(shape=input_shape)
|
63 |
+
rgb_input = L.Lambda(lambda x: K.tile(x, (1, 1, 1, 3)) , name='gray2rgb')(gray_input) # we assume BHWC
|
64 |
+
out_mask, out_tags = model(rgb_input)
|
65 |
+
|
66 |
+
# rename outputs
|
67 |
+
out_mask = L.Lambda(lambda x: x, name='mask')(out_mask)
|
68 |
+
out_tags = L.Lambda(lambda x: x, name='tags')(out_tags)
|
69 |
+
model = Model(inputs=gray_input, outputs=[out_mask, out_tags])
|
70 |
+
|
71 |
+
return model
|
72 |
+
|
73 |
+
|
74 |
+
if __name__ == "__main__":
|
75 |
+
shape = (128, 128, 1)
|
76 |
+
model = build_model(shape, shape, {'weights': None})#, 'backbone': 'lite-mobilenetv3-small'})
|
77 |
+
model.summary()
|
78 |
+
import pdb; pdb.set_trace()
|
models/deeplab/README.md
ADDED
@@ -0,0 +1,380 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# TF Keras DeepLab v3+ Modelset
|
2 |
+
|
3 |
+
## Introduction
|
4 |
+
|
5 |
+
An end-to-end semantic segmentation pipeline with DeepLabv3+ models. Implement with tf.keras, including data collection/annotation, model training/tuning, model evaluation and on device deployment. Support different backbones and different head architecture:
|
6 |
+
|
7 |
+
#### Backbone
|
8 |
+
- [x] Xception
|
9 |
+
- [x] ResNet50
|
10 |
+
- [x] MobileNetV2
|
11 |
+
- [x] MobilenetV3(Large/Small)
|
12 |
+
- [x] PeleeNet ([paper](https://arxiv.org/abs/1804.06882))
|
13 |
+
|
14 |
+
#### Head
|
15 |
+
- [x] ASPP
|
16 |
+
- [x] ASPP Lite(Only Global Pooling + 1x1 Conv)
|
17 |
+
- [x] Decoder
|
18 |
+
- [x] Different Output Stride(8/16/32)
|
19 |
+
|
20 |
+
#### Loss
|
21 |
+
- [x] Categorical Cross Entropy Loss
|
22 |
+
- [x] Balanced Class Weighted Cross Entropy Loss
|
23 |
+
- [x] Adaptive Class Weighted Cross Entropy Loss
|
24 |
+
- [x] Focal Loss
|
25 |
+
|
26 |
+
#### Postprocess
|
27 |
+
- [x] Numpy CRF (Conditional Random Fields) postprocess implementation
|
28 |
+
|
29 |
+
|
30 |
+
#### Train tech
|
31 |
+
- [x] Transfer training from Imagenet/PascalVOC
|
32 |
+
- [x] Dynamic learning rate decay (Cosine/Exponential/Polynomial/PiecewiseConstant)
|
33 |
+
- [x] Weights Average policy for optimizer (EMA/SWA/Lookahead, valid for TF-2.x with tfa)
|
34 |
+
- [x] GridMask data augmentation ([paper](https://arxiv.org/abs/2001.04086))
|
35 |
+
- [x] Multi-GPU training with SyncBatchNorm support (valid for TF-2.2 and later)
|
36 |
+
|
37 |
+
#### On-device deployment
|
38 |
+
- [x] Tensorflow-Lite Float32/UInt8 model inference
|
39 |
+
- [x] MNN Float32/UInt8 model inference
|
40 |
+
|
41 |
+
|
42 |
+
## Quick Start
|
43 |
+
|
44 |
+
1. Install requirements on Ubuntu 16.04/18.04:
|
45 |
+
|
46 |
+
```
|
47 |
+
# pip install -r requirements.txt
|
48 |
+
```
|
49 |
+
|
50 |
+
2. Download Deeplabv3+ PascalVOC pretrained weights. It's provided by [keras-deeplab-v3-plus](https://github.com/bonlime/keras-deeplab-v3-plus) and imported from [original TF checkpoint](https://github.com/tensorflow/models/tree/master/research/deeplab)
|
51 |
+
3. Run Deeplab segmentation on your image or video.
|
52 |
+
|
53 |
+
```
|
54 |
+
# wget -O weights/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5 https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.1/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5
|
55 |
+
# python deeplab.py --model_type=xception --weights_path=weights/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5 --classes_path=configs/voc_classes.txt --output_stride=16 --image
|
56 |
+
# python deeplab.py --model_type=xception --weights_path=weights/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5 --classes_path=configs/voc_classes.txt --output_stride=16 --input=<your video file>
|
57 |
+
|
58 |
+
```
|
59 |
+
|
60 |
+
Image segment sample:
|
61 |
+
|
62 |
+
<p align="center">
|
63 |
+
<img src="assets/dog_inference.png">
|
64 |
+
</p>
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
## Guide of train/evaluate/demo
|
69 |
+
|
70 |
+
### Train
|
71 |
+
|
72 |
+
1. Prepare dataset
|
73 |
+
1. PascalVOC2012 & SBD (VOC2012 train_aug) semantic segmentation dataset
|
74 |
+
* Run a simple script to download, convert & merge PascalVOC 2012 and SBD:
|
75 |
+
|
76 |
+
```
|
77 |
+
# pushd tools/dataset_converter/voc_augment/
|
78 |
+
# ./dataset_prepare.sh
|
79 |
+
# popd
|
80 |
+
|
81 |
+
```
|
82 |
+
Dataset images & labels will be placed at `VOC2012/`
|
83 |
+
|
84 |
+
2. MS COCO 2017 segmentation dataset
|
85 |
+
* Run a simple script to download COCO2017 dataset, and convert annotated instance mask to PNG format semantic segmentation label image:
|
86 |
+
|
87 |
+
```
|
88 |
+
# pushd tools/dataset_converter/mscoco2017/
|
89 |
+
# ./dataset_prepare.sh
|
90 |
+
# popd
|
91 |
+
|
92 |
+
```
|
93 |
+
You can dig into related script for details. Dataset images & labels will be placed at `mscoco2017/`
|
94 |
+
|
95 |
+
3. ADE20K semantic segmentation dataset
|
96 |
+
* Run a simple script to download, merge & convert ADE20K dataset:
|
97 |
+
|
98 |
+
```
|
99 |
+
# pushd tools/dataset_converter/ade20k/
|
100 |
+
# ./dataset_prepare.sh
|
101 |
+
# popd
|
102 |
+
|
103 |
+
```
|
104 |
+
Dataset images & labels will be placed at `ADEChallengeData2016/`
|
105 |
+
|
106 |
+
4. Cityscapes semantic segmentation dataset
|
107 |
+
* Download the Cityscapes dataset package from `https://www.cityscapes-dataset.com/` (need registration) and put to `tools/dataset_converter/cityscapes/`. Then run a simple script to merge & convert:
|
108 |
+
|
109 |
+
```
|
110 |
+
# pushd tools/dataset_converter/cityscapes/
|
111 |
+
# ./dataset_prepare.sh
|
112 |
+
# popd
|
113 |
+
|
114 |
+
```
|
115 |
+
Dataset images & labels will be placed at `Cityscapes/`
|
116 |
+
|
117 |
+
5. Customized semantic segmentation dataset
|
118 |
+
* Collecting target JPG format images and place at `<dataset_path>/images`
|
119 |
+
* Generate semantic segmentation label image. You can use [labelme](https://github.com/wkentaro/labelme) to annotate your image with polygonal segmentation mask and save to a json file. Then run [json_to_dataset.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/tools/dataset_converter/labelme/json_to_dataset.py) to convert json annotations to PascalVOC style PNG format label images:
|
120 |
+
```
|
121 |
+
# cd tools/dataset_converter/labelme && python json_to_dataset.py -h
|
122 |
+
usage: json_to_dataset.py [-h] --json_file_path JSON_FILE_PATH
|
123 |
+
[--classes_path CLASSES_PATH] --png_label_path
|
124 |
+
PNG_LABEL_PATH
|
125 |
+
|
126 |
+
convert labelme json label to voc png label
|
127 |
+
|
128 |
+
optional arguments:
|
129 |
+
-h, --help show this help message and exit
|
130 |
+
--json_file_path JSON_FILE_PATH
|
131 |
+
path to labelme annotated json label files
|
132 |
+
--classes_path CLASSES_PATH
|
133 |
+
path to class definitions,
|
134 |
+
default=../../../configs/voc_classes.txt
|
135 |
+
--png_label_path PNG_LABEL_PATH
|
136 |
+
output path of converted png label images
|
137 |
+
```
|
138 |
+
|
139 |
+
For class names file format, refer to [voc_classes.txt](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/configs/voc_classes.txt) (not including background class, which would be added as index 0 in code by default).
|
140 |
+
|
141 |
+
* Place the PNG label images at `<dataset_path>/labels`
|
142 |
+
* Create PascalVOC style dataset split (train/val/test) txt files. One line for a image and only include image base name, like:
|
143 |
+
```
|
144 |
+
2007_000033
|
145 |
+
2007_000042
|
146 |
+
2007_000061
|
147 |
+
...
|
148 |
+
```
|
149 |
+
|
150 |
+
You can put these dataset files together at `<dataset_path>` to create an independent dataset directory
|
151 |
+
|
152 |
+
|
153 |
+
2. [train.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/train.py)
|
154 |
+
```
|
155 |
+
# python train.py -h
|
156 |
+
usage: train.py [-h] [--model_type MODEL_TYPE] [--weights_path WEIGHTS_PATH]
|
157 |
+
[--model_input_shape MODEL_INPUT_SHAPE]
|
158 |
+
[--output_stride {8,16,32}] [--dataset_path DATASET_PATH]
|
159 |
+
[--dataset_file DATASET_FILE]
|
160 |
+
[--val_dataset_file VAL_DATASET_FILE] [--val_split VAL_SPLIT]
|
161 |
+
[--classes_path CLASSES_PATH] [--batch_size BATCH_SIZE]
|
162 |
+
[--optimizer {adam,rmsprop,sgd}] [--loss {crossentropy,focal}]
|
163 |
+
[--weighted_type {None,adaptive,balanced}]
|
164 |
+
[--learning_rate LEARNING_RATE]
|
165 |
+
[--average_type {None,ema,swa,lookahead}]
|
166 |
+
[--decay_type {None,cosine,exponential,polynomial,piecewise_constant}]
|
167 |
+
[--transfer_epoch TRANSFER_EPOCH] [--freeze_level {0,1,2}]
|
168 |
+
[--init_epoch INIT_EPOCH] [--total_epoch TOTAL_EPOCH]
|
169 |
+
[--gpu_num GPU_NUM] [--model_pruning] [--eval_online]
|
170 |
+
[--eval_epoch_interval EVAL_EPOCH_INTERVAL]
|
171 |
+
[--save_eval_checkpoint]
|
172 |
+
|
173 |
+
optional arguments:
|
174 |
+
-h, --help show this help message and exit
|
175 |
+
--model_type MODEL_TYPE
|
176 |
+
DeepLabv3+ model type:
|
177 |
+
mobilenetv2/mobilenetv2_lite/resnet50,
|
178 |
+
default=mobilenetv2_lite
|
179 |
+
--weights_path WEIGHTS_PATH
|
180 |
+
Pretrained model/weights file for fine tune
|
181 |
+
--model_input_shape MODEL_INPUT_SHAPE
|
182 |
+
model image input shape as <height>x<width>,
|
183 |
+
default=512x512
|
184 |
+
--output_stride {8,16,32}
|
185 |
+
model output stride, default=16
|
186 |
+
--dataset_path DATASET_PATH
|
187 |
+
dataset path containing images and label png file,
|
188 |
+
default=VOC2012/
|
189 |
+
--dataset_file DATASET_FILE
|
190 |
+
train samples txt file,
|
191 |
+
default=VOC2012/ImageSets/Segmentation/trainval.txt
|
192 |
+
--val_dataset_file VAL_DATASET_FILE
|
193 |
+
val samples txt file, default=None
|
194 |
+
--val_split VAL_SPLIT
|
195 |
+
validation data persentage in dataset if no val
|
196 |
+
dataset provide, default=0.1
|
197 |
+
--classes_path CLASSES_PATH
|
198 |
+
path to class definitions,
|
199 |
+
default=configs/voc_classes.txt
|
200 |
+
--batch_size BATCH_SIZE
|
201 |
+
batch size for training, default=16
|
202 |
+
--optimizer {adam,rmsprop,sgd}
|
203 |
+
optimizer for training (adam/rmsprop/sgd), default=sgd
|
204 |
+
--loss {crossentropy,focal}
|
205 |
+
loss type for training (crossentropy/focal),
|
206 |
+
default=crossentropy
|
207 |
+
--weighted_type {None,adaptive,balanced}
|
208 |
+
class balance weighted type, default=None
|
209 |
+
--learning_rate LEARNING_RATE
|
210 |
+
Initial learning rate, default=0.01
|
211 |
+
--average_type {None,ema,swa,lookahead}
|
212 |
+
weights average type, default=None
|
213 |
+
--decay_type {None,cosine,exponential,polynomial,piecewise_constant}
|
214 |
+
Learning rate decay type, default=None
|
215 |
+
--transfer_epoch TRANSFER_EPOCH
|
216 |
+
Transfer training stage epochs, default=5
|
217 |
+
--freeze_level {0,1,2}
|
218 |
+
Freeze level of the model in transfer training stage.
|
219 |
+
0:NA/1:backbone/2:only open prediction layer
|
220 |
+
--init_epoch INIT_EPOCH
|
221 |
+
initial training epochs for fine tune training,
|
222 |
+
default=0
|
223 |
+
--total_epoch TOTAL_EPOCH
|
224 |
+
total training epochs, default=150
|
225 |
+
--gpu_num GPU_NUM Number of GPU to use, default=1
|
226 |
+
--model_pruning Use model pruning for optimization, only for TF 1.x
|
227 |
+
--eval_online Whether to do evaluation on validation dataset during
|
228 |
+
training
|
229 |
+
--eval_epoch_interval EVAL_EPOCH_INTERVAL
|
230 |
+
Number of iteration(epochs) interval to do evaluation,
|
231 |
+
default=10
|
232 |
+
--save_eval_checkpoint
|
233 |
+
Whether to save checkpoint with best evaluation result
|
234 |
+
```
|
235 |
+
|
236 |
+
Following is a reference config cmd for training mobilenetv2 lite model on PascalVOC2012 & SBD dataset:
|
237 |
+
```
|
238 |
+
# python train.py --model_type=mobilenetv2_lite --output_stride=16 --dataset_path=VOC2012/ --dataset_file=VOC2012/ImageSets/Segmentation/train.txt --val_dataset_file=VOC2012/ImageSets/Segmentation/val.txt --batch_size=16 --freeze_level=1 --transfer_epoch=5 --total_epoch=150 --eval_online --eval_epoch_interval=1 --save_eval_checkpoint --weighted_type=adaptive
|
239 |
+
```
|
240 |
+
|
241 |
+
Checkpoints during training could be found at `logs/000/`. Choose a best one as result
|
242 |
+
|
243 |
+
You can also use Tensorboard to monitor the loss trend during train:
|
244 |
+
```
|
245 |
+
# tensorboard --logdir=logs/000
|
246 |
+
```
|
247 |
+
|
248 |
+
MultiGPU usage: use `--gpu_num N` to use N GPUs. It use [tf.distribute.MirroredStrategy](https://www.tensorflow.org/guide/distributed_training#mirroredstrategy) to support MultiGPU environment.
|
249 |
+
|
250 |
+
|
251 |
+
### Model dump
|
252 |
+
We' better to dump out inference model from training checkpoint for eval or demo. Following script cmd work for that.
|
253 |
+
|
254 |
+
```
|
255 |
+
# python deeplab.py --model_type=mobilenetv2_lite --weights_path=logs/000/<checkpoint>.h5 --classes_path=configs/voc_classes.txt --model_input_shape=512x512 --output_stride=16 --dump_model --output_model_file=model.h5
|
256 |
+
```
|
257 |
+
|
258 |
+
Change model_type, input shape & output stride to get different inference model. If "--model_pruning" was added in training, you also need to use "--pruning_model" here for dumping out the pruned model.
|
259 |
+
|
260 |
+
NOTE: One trained model could be dump out for different input shape & output stride (of course with different accuracy performance).
|
261 |
+
|
262 |
+
|
263 |
+
### Evaluation
|
264 |
+
Use [eval.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/eval.py) to do evaluation on the inference model with your test data. It will calculate following metrics:
|
265 |
+
|
266 |
+
* mIOU
|
267 |
+
* FWIOU (Frequency Weighted IOU)
|
268 |
+
* PA (Pixel Accuracy)
|
269 |
+
* MPA (Mean Pixel Accuracy)
|
270 |
+
|
271 |
+
It will also draw confusion matrix chart and IOU result for each class under "result" dir, and optionally save all the segmentation result images & predicted PNG labels for checking.
|
272 |
+
|
273 |
+
```
|
274 |
+
# python eval.py --model_path=model.h5 --dataset_path=VOC2012/ --dataset_file=VOC2012/ImageSets/Segmentation/val.txt --classes_path=configs/voc_classes.txt --model_input_shape=512x512 --save_result
|
275 |
+
```
|
276 |
+
|
277 |
+
If you enable "--eval_online" option in train.py, evaluation on validation dataset will be executed during training. But that may cost more time for train process.
|
278 |
+
|
279 |
+
|
280 |
+
Following is a sample result trained on MobilenetV2_Lite model with VOC2012+SBD dataset:
|
281 |
+
<p align="center">
|
282 |
+
<img src="assets/mIOU.png">
|
283 |
+
<img src="assets/confusion_matrix.png">
|
284 |
+
</p>
|
285 |
+
|
286 |
+
|
287 |
+
Some experiment on VOC2012+SBD dataset and comparison:
|
288 |
+
|
289 |
+
| Model type | InputSize | Output Stride | TrainSet | TestSet | mIOU | FLOPS | Param | Speed | Ps |
|
290 |
+
| ----- | ------ | ------ | ------ | ----- | ----- | ----- | ----- | ----- | ----- |
|
291 |
+
| [ResNet50](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.1/deeplabv3p_resnet50_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 73.71% | 73.95G | 26.72M | 38ms | Keras on Titan XP |
|
292 |
+
| [MobileNetV3Large](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.1/deeplabv3p_mobilenetv3large_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 72.33% | 9.52G | 3.51M | 29ms | Keras on Titan XP |
|
293 |
+
| [PeleeNet Lite](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.2/deeplabv3p_peleenet_lite_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 68.23% | 7.64G | 2.59M | 37.8ms | Keras on Titan XP |
|
294 |
+
| [MobileNetV2 Lite](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.0/deeplabv3p_mobilenetv2_lite_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 67.83% | 5.24G | 2.11M | 23ms | Keras on Titan XP |
|
295 |
+
| [MobileNetV3Small Lite](https://github.com/david8862/tf-keras-deeplabv3p-model-set/releases/download/1.0.1/deeplabv3p_mobilenetv3small_lite_512_os16_voc.tar.gz) | 512x512 | 16 | VOC12&SBD train | VOC12&SBD val | 64.81% | 1.36G | 1.06M | 20ms | Keras on Titan XP |
|
296 |
+
|
297 |
+
**NOTE**: If you meet any model loading problem with these pretrained weights due to h5 format compatibility issue, try to run "Model dump" with it again to regenerate the inference model.
|
298 |
+
|
299 |
+
|
300 |
+
### Demo
|
301 |
+
1. [deeplab.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/deeplab.py)
|
302 |
+
> * Demo script for trained model
|
303 |
+
|
304 |
+
image inference mode
|
305 |
+
```
|
306 |
+
# python deeplab.py --model_type=mobilenetv2_lite --weights_path=model.h5 --classes_path=configs/voc_classes.txt --model_input_shape=512x512 --output_stride=16 --image
|
307 |
+
```
|
308 |
+
video inference mode
|
309 |
+
```
|
310 |
+
# python deeplab.py --model_type=mobilenetv2_lite --weights_path=model.h5 --classes_path=configs/voc_classes.txt --model_input_shape=512x512 --output_stride=16 --input=test.mp4
|
311 |
+
```
|
312 |
+
For video detection mode, you can use "input=0" to capture live video from web camera and "output=<video name>" to dump out inference result to another video
|
313 |
+
|
314 |
+
### Tensorflow model convert
|
315 |
+
Using [keras_to_tensorflow.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/tools/model_converter/keras_to_tensorflow.py) to convert the tf.keras .h5 model to tensorflow frozen pb model:
|
316 |
+
```
|
317 |
+
# python keras_to_tensorflow.py
|
318 |
+
--input_model="path/to/keras/model.h5"
|
319 |
+
--output_model="path/to/save/model.pb"
|
320 |
+
```
|
321 |
+
|
322 |
+
### ONNX model convert
|
323 |
+
Using [keras_to_onnx.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/tools/model_converter/keras_to_onnx.py) to convert the tf.keras .h5 model to ONNX model:
|
324 |
+
```
|
325 |
+
### need to set environment TF_KERAS=1 for tf.keras model
|
326 |
+
# export TF_KERAS=1
|
327 |
+
# python keras_to_onnx.py
|
328 |
+
--keras_model_file="path/to/keras/model.h5"
|
329 |
+
--output_file="path/to/save/model.onnx"
|
330 |
+
--op_set=11
|
331 |
+
```
|
332 |
+
|
333 |
+
You can also use [eval.py](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/eval.py) to do evaluation on the pb & onnx inference model
|
334 |
+
|
335 |
+
### Inference model deployment
|
336 |
+
See [on-device inference](https://github.com/david8862/tf-keras-deeplabv3p-model-set/blob/master/inference) for TFLite & MNN model deployment
|
337 |
+
|
338 |
+
|
339 |
+
### TODO
|
340 |
+
- [ ] support model pruning
|
341 |
+
- [ ] support SubPixel predict layer
|
342 |
+
- [ ] support Quantization aware training
|
343 |
+
|
344 |
+
|
345 |
+
## Some issues to know
|
346 |
+
1. The test environment is
|
347 |
+
- Ubuntu 16.04/18.04
|
348 |
+
- Python 3.6.8
|
349 |
+
- tensorflow 2.0.0/tensorflow 1.15.0
|
350 |
+
- tf.keras 2.2.4-tf
|
351 |
+
|
352 |
+
2. Imagenet pretrained weights for backbone is automatically loaded (if have) when training, so recommended to freeze backbone layers for several epochs in transfer traning stage.
|
353 |
+
|
354 |
+
3. Training strategy is for reference only. Adjust it according to your dataset and your target. And add further strategy if needed.
|
355 |
+
|
356 |
+
|
357 |
+
## Contribution guidelines
|
358 |
+
New features, improvements and any other kind of contributions are warmly welcome via pull request :)
|
359 |
+
|
360 |
+
|
361 |
+
# Citation
|
362 |
+
Please cite tf-keras-deeplabv3p-model-set in your publications if it helps your research:
|
363 |
+
```
|
364 |
+
@article{Keras-segmentation-deeplab-v3.1,
|
365 |
+
Author = {Jenia Golbstein},
|
366 |
+
Year = {2019}
|
367 |
+
}
|
368 |
+
@article{pytorch-deeplab-xception,
|
369 |
+
Author = {jfzhang95},
|
370 |
+
Year = {2019}
|
371 |
+
}
|
372 |
+
|
373 |
+
@article{Focal Loss,
|
374 |
+
title={Focal Loss for Dense Object Detection},
|
375 |
+
author={Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He, Piotr Dollár},
|
376 |
+
journal = {arXiv},
|
377 |
+
year={2017}
|
378 |
+
}
|
379 |
+
|
380 |
+
```
|
models/deeplab/assets/2007_000346_inference.png
ADDED
![]() |
models/deeplab/assets/confusion_matrix.png
ADDED
![]() |
models/deeplab/assets/dog_inference.png
ADDED
![]() |
models/deeplab/assets/mIOU.png
ADDED
![]() |
models/deeplab/common/callbacks.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding=utf-8 -*-
|
3 |
+
"""custom model callbacks."""
|
4 |
+
import os, sys, random, tempfile
|
5 |
+
import numpy as np
|
6 |
+
from tensorflow_model_optimization.sparsity import keras as sparsity
|
7 |
+
from tensorflow.keras.callbacks import Callback
|
8 |
+
|
9 |
+
from eval import eval_mIOU
|
10 |
+
|
11 |
+
|
12 |
+
class EvalCallBack(Callback):
|
13 |
+
def __init__(self, dataset_path, dataset, class_names, model_input_shape, model_pruning, log_dir, eval_epoch_interval=10, save_eval_checkpoint=False):
|
14 |
+
self.dataset_path = dataset_path
|
15 |
+
self.dataset = dataset
|
16 |
+
self.class_names = class_names
|
17 |
+
self.model_input_shape = model_input_shape
|
18 |
+
self.model_pruning = model_pruning
|
19 |
+
self.log_dir = log_dir
|
20 |
+
self.eval_epoch_interval = eval_epoch_interval
|
21 |
+
self.save_eval_checkpoint = save_eval_checkpoint
|
22 |
+
self.best_mIOU = 0.0
|
23 |
+
|
24 |
+
def on_epoch_end(self, epoch, logs=None):
|
25 |
+
if (epoch+1) % self.eval_epoch_interval == 0:
|
26 |
+
# Do eval every eval_epoch_interval epochs
|
27 |
+
mIOU = eval_mIOU(self.model, 'H5', self.dataset_path, self.dataset, self.class_names, self.model_input_shape, do_crf=False, save_result=False, show_background=True)
|
28 |
+
|
29 |
+
if self.save_eval_checkpoint and mIOU > self.best_mIOU:
|
30 |
+
# Save best mIOU value and model checkpoint
|
31 |
+
self.best_mIOU = mIOU
|
32 |
+
self.model.save(os.path.join(self.log_dir, 'ep{epoch:03d}-loss{loss:.3f}-Jaccard{Jaccard:.3f}-val_loss{val_loss:.3f}-val_Jaccard{val_Jaccard:.3f}-mIOU{mIOU:.3f}.h5'.format(epoch=(epoch+1), loss=logs.get('loss'), Jaccard=logs.get('Jaccard'), val_loss=logs.get('val_loss'), val_Jaccard=logs.get('val_Jaccard'), mIOU=mIOU)))
|
models/deeplab/common/data_utils.py
ADDED
@@ -0,0 +1,523 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding=utf-8 -*-
|
3 |
+
"""Data process utility functions."""
|
4 |
+
import numpy as np
|
5 |
+
import random
|
6 |
+
import math
|
7 |
+
import cv2
|
8 |
+
from PIL import Image, ImageEnhance
|
9 |
+
|
10 |
+
def rand(a=0, b=1):
|
11 |
+
return np.random.rand()*(b-a) + a
|
12 |
+
|
13 |
+
|
14 |
+
def random_horizontal_flip(image, label, prob=.5):
|
15 |
+
"""
|
16 |
+
Random horizontal flip for image & label
|
17 |
+
|
18 |
+
# Arguments
|
19 |
+
image: origin image for horizontal flip
|
20 |
+
numpy array containing image data
|
21 |
+
label: origin label for horizontal flip
|
22 |
+
numpy array containing segment label mask
|
23 |
+
prob: probability for random flip,
|
24 |
+
scalar to control the flip probability.
|
25 |
+
|
26 |
+
# Returns
|
27 |
+
image: adjusted numpy array image.
|
28 |
+
label: adjusted numpy array label mask
|
29 |
+
"""
|
30 |
+
flip = rand() < prob
|
31 |
+
if flip:
|
32 |
+
image = cv2.flip(image, 1)
|
33 |
+
label = cv2.flip(label, 1)
|
34 |
+
|
35 |
+
return image, label
|
36 |
+
|
37 |
+
|
38 |
+
def random_vertical_flip(image, label, prob=.5):
|
39 |
+
"""
|
40 |
+
Random vertical flip for image & label
|
41 |
+
|
42 |
+
# Arguments
|
43 |
+
image: origin image for vertical flip
|
44 |
+
numpy array containing image data
|
45 |
+
label: origin label for vertical flip
|
46 |
+
numpy array containing segment label mask
|
47 |
+
prob: probability for random flip,
|
48 |
+
scalar to control the flip probability.
|
49 |
+
|
50 |
+
# Returns
|
51 |
+
image: adjusted numpy array image.
|
52 |
+
label: adjusted numpy array label mask
|
53 |
+
"""
|
54 |
+
flip = rand() < prob
|
55 |
+
if flip:
|
56 |
+
image = cv2.flip(image, 0)
|
57 |
+
label = cv2.flip(label, 0)
|
58 |
+
|
59 |
+
return image, label
|
60 |
+
|
61 |
+
|
62 |
+
#def random_brightness(image, jitter=.3):
|
63 |
+
#"""
|
64 |
+
#Random adjust brightness for image
|
65 |
+
|
66 |
+
## Arguments
|
67 |
+
#image: origin image for brightness change
|
68 |
+
#numpy array containing image data
|
69 |
+
#jitter: jitter range for random brightness,
|
70 |
+
#scalar to control the random brightness level.
|
71 |
+
|
72 |
+
## Returns
|
73 |
+
#new_image: adjusted numpy array image.
|
74 |
+
#"""
|
75 |
+
#factor = 1.0 + random.gauss(mu=0.0, sigma=jitter)
|
76 |
+
#if random.randint(0,1) and abs(factor) > 0.1:
|
77 |
+
#factor = 1.0/factor
|
78 |
+
#table = np.array([((i / 255.0) ** factor) * 255 for i in np.arange(0, 256)]).astype(np.uint8)
|
79 |
+
#new_image = cv2.LUT(image, table)
|
80 |
+
|
81 |
+
#return new_image
|
82 |
+
|
83 |
+
def random_brightness(image, jitter=.5):
|
84 |
+
"""
|
85 |
+
Random adjust brightness for image
|
86 |
+
|
87 |
+
# Arguments
|
88 |
+
image: origin image for brightness change
|
89 |
+
numpy array containing image data
|
90 |
+
jitter: jitter range for random brightness,
|
91 |
+
scalar to control the random brightness level.
|
92 |
+
|
93 |
+
# Returns
|
94 |
+
image: adjusted numpy array image.
|
95 |
+
"""
|
96 |
+
img = Image.fromarray(image)
|
97 |
+
enh_bri = ImageEnhance.Brightness(img)
|
98 |
+
brightness = rand(jitter, 1/jitter)
|
99 |
+
new_img = enh_bri.enhance(brightness)
|
100 |
+
image = np.asarray(new_img)
|
101 |
+
|
102 |
+
return image
|
103 |
+
|
104 |
+
|
105 |
+
def random_blur(image, prob=.5, size=5):
|
106 |
+
"""
|
107 |
+
Random add gaussian blur to image
|
108 |
+
|
109 |
+
# Arguments
|
110 |
+
image: origin image for blur
|
111 |
+
numpy array containing image data
|
112 |
+
prob: probability for blur,
|
113 |
+
scalar to control the blur probability.
|
114 |
+
size: kernel size for gaussian blur,
|
115 |
+
scalar to control the filter size.
|
116 |
+
|
117 |
+
# Returns
|
118 |
+
image: adjusted numpy array image.
|
119 |
+
"""
|
120 |
+
blur = rand() < prob
|
121 |
+
if blur:
|
122 |
+
image = cv2.GaussianBlur(image, (size, size), 0)
|
123 |
+
|
124 |
+
return image
|
125 |
+
|
126 |
+
|
127 |
+
def random_histeq(image, size=8, prob=.2):
|
128 |
+
"""
|
129 |
+
Random apply "Contrast Limited Adaptive Histogram Equalization"
|
130 |
+
to image
|
131 |
+
|
132 |
+
# Arguments
|
133 |
+
image: origin image for histeq
|
134 |
+
numpy array containing image data
|
135 |
+
size: grid size for CLAHE,
|
136 |
+
scalar to control the grid size.
|
137 |
+
prob: probability for histeq,
|
138 |
+
scalar to control the histeq probability.
|
139 |
+
|
140 |
+
# Returns
|
141 |
+
image: adjusted numpy array image.
|
142 |
+
"""
|
143 |
+
histeq = rand() < prob
|
144 |
+
if histeq:
|
145 |
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(size, size))
|
146 |
+
img_yuv = cv2.cvtColor(image, cv2.COLOR_BGR2YUV)
|
147 |
+
img_yuv[:,:,0] = clahe.apply(img_yuv[:,:,0])
|
148 |
+
image = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR) # to BGR
|
149 |
+
return image
|
150 |
+
|
151 |
+
|
152 |
+
def random_grayscale(image, prob=.2):
|
153 |
+
"""
|
154 |
+
Random convert image to grayscale
|
155 |
+
|
156 |
+
# Arguments
|
157 |
+
image: origin image for grayscale convert
|
158 |
+
numpy array containing image data
|
159 |
+
prob: probability for grayscale convert,
|
160 |
+
scalar to control the convert probability.
|
161 |
+
|
162 |
+
# Returns
|
163 |
+
image: adjusted numpy array image.
|
164 |
+
"""
|
165 |
+
convert = rand() < prob
|
166 |
+
if convert:
|
167 |
+
#convert to grayscale first, and then
|
168 |
+
#back to 3 channels fake BGR
|
169 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
170 |
+
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
171 |
+
|
172 |
+
return image
|
173 |
+
|
174 |
+
|
175 |
+
def random_chroma(image, jitter=.5):
|
176 |
+
"""
|
177 |
+
Random adjust chroma (color level) for image
|
178 |
+
|
179 |
+
# Arguments
|
180 |
+
image: origin image for chroma change
|
181 |
+
numpy array containing image data
|
182 |
+
jitter: jitter range for random chroma,
|
183 |
+
scalar to control the random color level.
|
184 |
+
|
185 |
+
# Returns
|
186 |
+
image: adjusted numpy array image.
|
187 |
+
"""
|
188 |
+
img = Image.fromarray(image)
|
189 |
+
enh_col = ImageEnhance.Color(img)
|
190 |
+
color = rand(jitter, 1/jitter)
|
191 |
+
new_img = enh_col.enhance(color)
|
192 |
+
image = np.asarray(new_img)
|
193 |
+
|
194 |
+
return image
|
195 |
+
|
196 |
+
|
197 |
+
def random_contrast(image, jitter=.5):
|
198 |
+
"""
|
199 |
+
Random adjust contrast for image
|
200 |
+
|
201 |
+
# Arguments
|
202 |
+
image: origin image for contrast change
|
203 |
+
numpy array containing image data
|
204 |
+
jitter: jitter range for random contrast,
|
205 |
+
scalar to control the random contrast level.
|
206 |
+
|
207 |
+
# Returns
|
208 |
+
image: adjusted numpy array image.
|
209 |
+
"""
|
210 |
+
img = Image.fromarray(image)
|
211 |
+
enh_con = ImageEnhance.Contrast(img)
|
212 |
+
contrast = rand(jitter, 1/jitter)
|
213 |
+
new_img = enh_con.enhance(contrast)
|
214 |
+
image = np.asarray(new_img)
|
215 |
+
|
216 |
+
return image
|
217 |
+
|
218 |
+
|
219 |
+
def random_sharpness(image, jitter=.5):
|
220 |
+
"""
|
221 |
+
Random adjust sharpness for image
|
222 |
+
|
223 |
+
# Arguments
|
224 |
+
image: origin image for sharpness change
|
225 |
+
numpy array containing image data
|
226 |
+
jitter: jitter range for random sharpness,
|
227 |
+
scalar to control the random sharpness level.
|
228 |
+
|
229 |
+
# Returns
|
230 |
+
image: adjusted numpy array image.
|
231 |
+
"""
|
232 |
+
img = Image.fromarray(image)
|
233 |
+
enh_sha = ImageEnhance.Sharpness(img)
|
234 |
+
sharpness = rand(jitter, 1/jitter)
|
235 |
+
new_img = enh_sha.enhance(sharpness)
|
236 |
+
image = np.asarray(new_img)
|
237 |
+
|
238 |
+
return image
|
239 |
+
|
240 |
+
|
241 |
+
def random_zoom_rotate(image, label, rotate_range=30, zoom_range=0.2, prob=0.3):
|
242 |
+
"""
|
243 |
+
Random do zoom & rotate for image & label
|
244 |
+
|
245 |
+
# Arguments
|
246 |
+
image: origin image for zoom & rotate
|
247 |
+
numpy array containing image data
|
248 |
+
label: origin label for zoom & rotate
|
249 |
+
numpy array containing segment label mask
|
250 |
+
prob: probability for random flip,
|
251 |
+
scalar to control the flip probability.
|
252 |
+
|
253 |
+
# Returns
|
254 |
+
image: adjusted numpy array image.
|
255 |
+
label: adjusted numpy array label mask
|
256 |
+
"""
|
257 |
+
if rotate_range:
|
258 |
+
angle = random.gauss(mu=0.0, sigma=rotate_range)
|
259 |
+
else:
|
260 |
+
angle = 0.0
|
261 |
+
|
262 |
+
if zoom_range:
|
263 |
+
scale = random.gauss(mu=1.0, sigma=zoom_range)
|
264 |
+
else:
|
265 |
+
scale = 1.0
|
266 |
+
|
267 |
+
warpAffine = rand() < prob
|
268 |
+
if warpAffine and (rotate_range or zoom_range):
|
269 |
+
M = cv2.getRotationMatrix2D((image.shape[1]//2, image.shape[0]//2), angle, scale)
|
270 |
+
image = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
|
271 |
+
label = cv2.warpAffine(label, M, (label.shape[1], label.shape[0]), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
|
272 |
+
|
273 |
+
return image, label
|
274 |
+
|
275 |
+
|
276 |
+
class Grid(object):
|
277 |
+
def __init__(self, d1, d2, rotate=360, ratio=0.5, mode=1, prob=1.):
|
278 |
+
self.d1 = d1
|
279 |
+
self.d2 = d2
|
280 |
+
self.rotate = rotate
|
281 |
+
self.ratio = ratio
|
282 |
+
self.mode=mode
|
283 |
+
self.st_prob = self.prob = prob
|
284 |
+
|
285 |
+
def set_prob(self, epoch, max_epoch):
|
286 |
+
self.prob = self.st_prob * min(1, epoch / max_epoch)
|
287 |
+
|
288 |
+
def __call__(self, img, label):
|
289 |
+
h = img.shape[0]
|
290 |
+
w = img.shape[1]
|
291 |
+
|
292 |
+
if np.random.rand() > self.prob:
|
293 |
+
return img, label
|
294 |
+
|
295 |
+
# 1.5 * h, 1.5 * w works fine with the squared images
|
296 |
+
# But with rectangular input, the mask might not be able to recover back to the input image shape
|
297 |
+
# A square mask with edge length equal to the diagnoal of the input image
|
298 |
+
# will be able to cover all the image spot after the rotation. This is also the minimum square.
|
299 |
+
hh = math.ceil((math.sqrt(h*h + w*w)))
|
300 |
+
|
301 |
+
d = np.random.randint(self.d1, self.d2)
|
302 |
+
#d = self.d
|
303 |
+
|
304 |
+
# maybe use ceil? but i guess no big difference
|
305 |
+
self.l = math.ceil(d*self.ratio)
|
306 |
+
|
307 |
+
mask = np.ones((hh, hh), np.float32)
|
308 |
+
st_h = np.random.randint(d)
|
309 |
+
st_w = np.random.randint(d)
|
310 |
+
for i in range(-1, hh//d+1):
|
311 |
+
s = d*i + st_h
|
312 |
+
t = s+self.l
|
313 |
+
s = max(min(s, hh), 0)
|
314 |
+
t = max(min(t, hh), 0)
|
315 |
+
mask[s:t,:] *= 0
|
316 |
+
for i in range(-1, hh//d+1):
|
317 |
+
s = d*i + st_w
|
318 |
+
t = s+self.l
|
319 |
+
s = max(min(s, hh), 0)
|
320 |
+
t = max(min(t, hh), 0)
|
321 |
+
mask[:,s:t] *= 0
|
322 |
+
r = np.random.randint(self.rotate)
|
323 |
+
mask = Image.fromarray(np.uint8(mask))
|
324 |
+
mask = mask.rotate(r)
|
325 |
+
mask = np.asarray(mask)
|
326 |
+
mask = mask[(hh-h)//2:(hh-h)//2+h, (hh-w)//2:(hh-w)//2+w]
|
327 |
+
|
328 |
+
if self.mode == 1:
|
329 |
+
mask = 1-mask
|
330 |
+
|
331 |
+
#mask = mask.expand_as(img)
|
332 |
+
img = img * np.expand_dims(mask, -1)
|
333 |
+
label = label * mask
|
334 |
+
|
335 |
+
return img, label
|
336 |
+
|
337 |
+
|
338 |
+
def random_gridmask(image, label, prob=0.2):
|
339 |
+
"""
|
340 |
+
Random do GridMask augment for image & label
|
341 |
+
|
342 |
+
reference:
|
343 |
+
https://arxiv.org/abs/2001.04086
|
344 |
+
https://github.com/Jia-Research-Lab/GridMask/blob/master/imagenet_grid/utils/grid.py
|
345 |
+
|
346 |
+
# Arguments
|
347 |
+
image: origin image for GridMask
|
348 |
+
numpy array containing image data
|
349 |
+
label: origin label for zoom & rotate
|
350 |
+
numpy array containing segment label mask
|
351 |
+
prob: probability for GridMask,
|
352 |
+
scalar to control the GridMask probability.
|
353 |
+
|
354 |
+
# Returns
|
355 |
+
image: adjusted numpy array image.
|
356 |
+
label: adjusted numpy array label mask
|
357 |
+
"""
|
358 |
+
grid = Grid(d1=image.shape[1]//7, d2=image.shape[1]//3, rotate=360, ratio=0.5, prob=prob)
|
359 |
+
image, label = grid(image, label)
|
360 |
+
|
361 |
+
return image, label
|
362 |
+
|
363 |
+
|
364 |
+
def random_crop(image, label, crop_shape, prob=.1):
|
365 |
+
"""
|
366 |
+
Random crop a specific size area from image
|
367 |
+
and label
|
368 |
+
|
369 |
+
# Arguments
|
370 |
+
image: origin image for vertical flip
|
371 |
+
numpy array containing image data
|
372 |
+
label: origin label for vertical flip
|
373 |
+
numpy array containing segment label mask
|
374 |
+
crop_shape: target crop shape,
|
375 |
+
list or tuple in (width, height).
|
376 |
+
prob: probability for crop,
|
377 |
+
scalar to control the crop probability.
|
378 |
+
|
379 |
+
# Returns
|
380 |
+
image: croped numpy array image.
|
381 |
+
label: croped numpy array label mask
|
382 |
+
"""
|
383 |
+
# check if the image and label are same shape
|
384 |
+
if (image.shape[0] != label.shape[0]) or (image.shape[1] != label.shape[1]):
|
385 |
+
raise Exception('Image and label must have the same dimensions!')
|
386 |
+
|
387 |
+
crop = rand() < prob
|
388 |
+
if crop:
|
389 |
+
if (crop_shape[0] < image.shape[1]) and (crop_shape[1] < image.shape[0]):
|
390 |
+
x = random.randrange(image.shape[1]-crop_shape[0])
|
391 |
+
y = random.randrange(image.shape[0]-crop_shape[1])
|
392 |
+
|
393 |
+
image = image[y:y+crop_shape[1], x:x+crop_shape[0], :]
|
394 |
+
label = label[y:y+crop_shape[1], x:x+crop_shape[0]]
|
395 |
+
else:
|
396 |
+
image = cv2.resize(image, crop_shape)
|
397 |
+
label = cv2.resize(label, crop_shape, interpolation = cv2.INTER_NEAREST)
|
398 |
+
|
399 |
+
return image, label
|
400 |
+
|
401 |
+
|
402 |
+
|
403 |
+
def normalize_image(image):
|
404 |
+
"""
|
405 |
+
normalize image array from 0 ~ 255
|
406 |
+
to -1.0 ~ 1.0
|
407 |
+
|
408 |
+
# Arguments
|
409 |
+
image: origin input image
|
410 |
+
numpy image array with dtype=float, 0.0 ~ 255.0
|
411 |
+
|
412 |
+
# Returns
|
413 |
+
image: numpy image array with dtype=float, -1.0 ~ 1.0
|
414 |
+
"""
|
415 |
+
image = image.astype(np.float32) / 127.5 - 1
|
416 |
+
|
417 |
+
return image
|
418 |
+
|
419 |
+
|
420 |
+
def denormalize_image(image):
|
421 |
+
"""
|
422 |
+
Denormalize image array from -1.0 ~ 1.0
|
423 |
+
to 0 ~ 255
|
424 |
+
|
425 |
+
# Arguments
|
426 |
+
image: normalized image array with dtype=float, -1.0 ~ 1.0
|
427 |
+
|
428 |
+
# Returns
|
429 |
+
image: numpy image array with dtype=uint8, 0 ~ 255
|
430 |
+
"""
|
431 |
+
image = (image * 127.5 + 127.5).astype(np.uint8)
|
432 |
+
|
433 |
+
return image
|
434 |
+
|
435 |
+
|
436 |
+
def preprocess_image(image, model_image_size):
|
437 |
+
"""
|
438 |
+
Prepare model input image data with
|
439 |
+
resize, normalize and dim expansion
|
440 |
+
|
441 |
+
# Arguments
|
442 |
+
image: origin input image
|
443 |
+
PIL Image object containing image data
|
444 |
+
model_image_size: model input image size
|
445 |
+
tuple of format (height, width).
|
446 |
+
|
447 |
+
# Returns
|
448 |
+
image_data: numpy array of image data for model input.
|
449 |
+
"""
|
450 |
+
resized_image = image.resize(model_image_size, Image.BICUBIC)
|
451 |
+
image_data = np.asarray(resized_image).astype('float32')
|
452 |
+
#image_data = normalize_image(image_data)
|
453 |
+
image_data = np.expand_dims(image_data, 0)
|
454 |
+
return image_data
|
455 |
+
|
456 |
+
|
457 |
+
def mask_resize(mask, target_size):
|
458 |
+
"""
|
459 |
+
Resize predict segmentation mask array to target size
|
460 |
+
with bilinear interpolation
|
461 |
+
|
462 |
+
# Arguments
|
463 |
+
mask: predict mask array to be resize
|
464 |
+
uint8 numpy array with shape (height, width, 1)
|
465 |
+
target_size: target image size,
|
466 |
+
tuple of format (width, height).
|
467 |
+
|
468 |
+
# Returns
|
469 |
+
resize_mask: resized mask array.
|
470 |
+
|
471 |
+
"""
|
472 |
+
dst_w, dst_h = target_size # dest width & height
|
473 |
+
src_h, src_w = mask.shape[:2] # src width & height
|
474 |
+
|
475 |
+
if src_h == dst_h and src_w == dst_w:
|
476 |
+
return mask.copy()
|
477 |
+
|
478 |
+
scale_x = float(src_w) / dst_w # resize scale for width
|
479 |
+
scale_y = float(src_h) / dst_h # resize scale for height
|
480 |
+
|
481 |
+
# create & go through the target image array
|
482 |
+
resize_mask = np.zeros((dst_h, dst_w), dtype=np.uint8)
|
483 |
+
for dst_y in range(dst_h):
|
484 |
+
for dst_x in range(dst_w):
|
485 |
+
# mapping dest point back to src point
|
486 |
+
src_x = (dst_x + 0.5) * scale_x - 0.5
|
487 |
+
src_y = (dst_y + 0.5) * scale_y - 0.5
|
488 |
+
# calculate round point in src image
|
489 |
+
src_x_0 = int(np.floor(src_x))
|
490 |
+
src_y_0 = int(np.floor(src_y))
|
491 |
+
src_x_1 = min(src_x_0 + 1, src_w - 1)
|
492 |
+
src_y_1 = min(src_y_0 + 1, src_h - 1)
|
493 |
+
|
494 |
+
# Bilinear interpolation
|
495 |
+
value0 = (src_x_1 - src_x) * mask[src_y_0, src_x_0] + (src_x - src_x_0) * mask[src_y_0, src_x_1]
|
496 |
+
value1 = (src_x_1 - src_x) * mask[src_y_1, src_x_0] + (src_x - src_x_0) * mask[src_y_1, src_x_1]
|
497 |
+
resize_mask[dst_y, dst_x] = int((src_y_1 - src_y) * value0 + (src_y - src_y_0) * value1)
|
498 |
+
|
499 |
+
return resize_mask
|
500 |
+
|
501 |
+
|
502 |
+
def mask_resize_fast(mask, target_size):
|
503 |
+
"""
|
504 |
+
Use cv2 to do a quick resize on predict
|
505 |
+
segmentation mask array to target size
|
506 |
+
|
507 |
+
# Arguments
|
508 |
+
mask: predict mask array to be resize
|
509 |
+
uint8 numpy array with shape (height, width, 1)
|
510 |
+
target_size: target image size,
|
511 |
+
tuple of format (width, height).
|
512 |
+
|
513 |
+
# Returns
|
514 |
+
resize_mask: resized mask array.
|
515 |
+
|
516 |
+
"""
|
517 |
+
mask = cv2.merge([mask, mask, mask]).astype('uint8')
|
518 |
+
#resize_mask = cv2.resize(mask, target_size, cv2.INTER_AREA)
|
519 |
+
resize_mask = cv2.resize(mask, target_size, cv2.INTER_NEAREST)
|
520 |
+
(resize_mask, _, _) = cv2.split(np.array(resize_mask))
|
521 |
+
|
522 |
+
return resize_mask
|
523 |
+
|
models/deeplab/common/model_utils.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding=utf-8 -*-
|
3 |
+
"""Model utility functions."""
|
4 |
+
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
|
5 |
+
from tensorflow.keras.optimizers.schedules import ExponentialDecay, PolynomialDecay, PiecewiseConstantDecay
|
6 |
+
from tensorflow.keras.experimental import CosineDecay
|
7 |
+
from tensorflow_model_optimization.sparsity import keras as sparsity
|
8 |
+
|
9 |
+
|
10 |
+
def get_pruning_model(model, begin_step, end_step):
|
11 |
+
import tensorflow as tf
|
12 |
+
if tf.__version__.startswith('2'):
|
13 |
+
# model pruning API is not supported in TF 2.0 yet
|
14 |
+
raise Exception('model pruning is not fully supported in TF 2.x, Please switch env to TF 1.x for this feature')
|
15 |
+
|
16 |
+
pruning_params = {
|
17 |
+
'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.0,
|
18 |
+
final_sparsity=0.7,
|
19 |
+
begin_step=begin_step,
|
20 |
+
end_step=end_step,
|
21 |
+
frequency=100)
|
22 |
+
}
|
23 |
+
|
24 |
+
pruning_model = sparsity.prune_low_magnitude(model, **pruning_params)
|
25 |
+
return pruning_model
|
26 |
+
|
27 |
+
|
28 |
+
# some global value for lr scheduler
|
29 |
+
# need to update to CLI option in main()
|
30 |
+
#lr_base = 1e-3
|
31 |
+
#total_epochs = 250
|
32 |
+
|
33 |
+
#def learning_rate_scheduler(epoch, curr_lr, mode='cosine_decay'):
|
34 |
+
#lr_power = 0.9
|
35 |
+
#lr = curr_lr
|
36 |
+
|
37 |
+
## adam default lr
|
38 |
+
#if mode is 'adam':
|
39 |
+
#lr = 0.001
|
40 |
+
|
41 |
+
## original lr scheduler
|
42 |
+
#if mode is 'power_decay':
|
43 |
+
#lr = lr_base * ((1 - float(epoch) / total_epochs) ** lr_power)
|
44 |
+
|
45 |
+
## exponential decay policy
|
46 |
+
#if mode is 'exp_decay':
|
47 |
+
#lr = (float(lr_base) ** float(lr_power)) ** float(epoch + 1)
|
48 |
+
|
49 |
+
## cosine decay policy, including warmup and hold stage
|
50 |
+
#if mode is 'cosine_decay':
|
51 |
+
##warmup & hold hyperparams, adjust for your training
|
52 |
+
#warmup_epochs = 0
|
53 |
+
#hold_base_rate_epochs = 0
|
54 |
+
#warmup_lr = lr_base * 0.01
|
55 |
+
#lr = 0.5 * lr_base * (1 + np.cos(
|
56 |
+
#np.pi * float(epoch - warmup_epochs - hold_base_rate_epochs) /
|
57 |
+
#float(total_epochs - warmup_epochs - hold_base_rate_epochs)))
|
58 |
+
|
59 |
+
#if hold_base_rate_epochs > 0 and epoch < warmup_epochs + hold_base_rate_epochs:
|
60 |
+
#lr = lr_base
|
61 |
+
|
62 |
+
#if warmup_epochs > 0 and epoch < warmup_epochs:
|
63 |
+
#if lr_base < warmup_lr:
|
64 |
+
#raise ValueError('learning_rate_base must be larger or equal to '
|
65 |
+
#'warmup_learning_rate.')
|
66 |
+
#slope = (lr_base - warmup_lr) / float(warmup_epochs)
|
67 |
+
#warmup_rate = slope * float(epoch) + warmup_lr
|
68 |
+
#lr = warmup_rate
|
69 |
+
|
70 |
+
#if mode is 'progressive_drops':
|
71 |
+
## drops as progression proceeds, good for sgd
|
72 |
+
#if epoch > 0.9 * total_epochs:
|
73 |
+
#lr = 0.0001
|
74 |
+
#elif epoch > 0.75 * total_epochs:
|
75 |
+
#lr = 0.001
|
76 |
+
#elif epoch > 0.5 * total_epochs:
|
77 |
+
#lr = 0.01
|
78 |
+
#else:
|
79 |
+
#lr = 0.1
|
80 |
+
|
81 |
+
#print('learning_rate change to: {}'.format(lr))
|
82 |
+
#return lr
|
83 |
+
|
84 |
+
|
85 |
+
def get_lr_scheduler(learning_rate, decay_type, decay_steps):
|
86 |
+
if decay_type:
|
87 |
+
decay_type = decay_type.lower()
|
88 |
+
|
89 |
+
if decay_type == None:
|
90 |
+
lr_scheduler = learning_rate
|
91 |
+
elif decay_type == 'cosine':
|
92 |
+
lr_scheduler = CosineDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, alpha=0.2) # use 0.2*learning_rate as final minimum learning rate
|
93 |
+
elif decay_type == 'exponential':
|
94 |
+
lr_scheduler = ExponentialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, decay_rate=0.9)
|
95 |
+
elif decay_type == 'polynomial':
|
96 |
+
lr_scheduler = PolynomialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, end_learning_rate=learning_rate/100)
|
97 |
+
elif decay_type == 'piecewise_constant':
|
98 |
+
#apply a piecewise constant lr scheduler, including warmup stage
|
99 |
+
boundaries = [500, int(decay_steps*0.9), decay_steps]
|
100 |
+
values = [0.001, learning_rate, learning_rate/10., learning_rate/100.]
|
101 |
+
lr_scheduler = PiecewiseConstantDecay(boundaries=boundaries, values=values)
|
102 |
+
else:
|
103 |
+
raise ValueError('Unsupported lr decay type')
|
104 |
+
|
105 |
+
return lr_scheduler
|
106 |
+
|
107 |
+
|
108 |
+
def get_optimizer(optim_type, learning_rate, average_type=None, decay_type='cosine', decay_steps=100000):
|
109 |
+
optim_type = optim_type.lower()
|
110 |
+
|
111 |
+
lr_scheduler = get_lr_scheduler(learning_rate, decay_type, decay_steps)
|
112 |
+
|
113 |
+
if optim_type == 'adam':
|
114 |
+
optimizer = Adam(learning_rate=lr_scheduler, epsilon=1e-7, amsgrad=False)
|
115 |
+
elif optim_type == 'rmsprop':
|
116 |
+
optimizer = RMSprop(learning_rate=lr_scheduler, rho=0.9, momentum=0.0, centered=False)
|
117 |
+
elif optim_type == 'sgd':
|
118 |
+
optimizer = SGD(learning_rate=lr_scheduler, momentum=0.9, nesterov=False)
|
119 |
+
else:
|
120 |
+
raise ValueError('Unsupported optimizer type')
|
121 |
+
|
122 |
+
if average_type:
|
123 |
+
optimizer = get_averaged_optimizer(average_type, optimizer)
|
124 |
+
|
125 |
+
return optimizer
|
126 |
+
|
127 |
+
|
128 |
+
def get_averaged_optimizer(average_type, optimizer):
|
129 |
+
"""
|
130 |
+
Apply weights average mechanism in optimizer. Need tensorflow-addons
|
131 |
+
which request TF 2.x and have following compatibility table:
|
132 |
+
-------------------------------------------------------------
|
133 |
+
| Tensorflow Addons | Tensorflow | Python |
|
134 |
+
-------------------------------------------------------------
|
135 |
+
| tfa-nightly | 2.3, 2.4 | 3.6, 3.7, 3.8 |
|
136 |
+
-------------------------------------------------------------
|
137 |
+
| tensorflow-addons-0.12.0 | 2.3, 2.4 | 3.6, 3.7, 3.8 |
|
138 |
+
-------------------------------------------------------------
|
139 |
+
| tensorflow-addons-0.11.2 | 2.2, 2.3 | 3.5, 3.6, 3.7, 3.8 |
|
140 |
+
-------------------------------------------------------------
|
141 |
+
| tensorflow-addons-0.10.0 | 2.2 | 3.5, 3.6, 3.7, 3.8 |
|
142 |
+
-------------------------------------------------------------
|
143 |
+
| tensorflow-addons-0.9.1 | 2.1, 2.2 | 3.5, 3.6, 3.7 |
|
144 |
+
-------------------------------------------------------------
|
145 |
+
| tensorflow-addons-0.8.3 | 2.1 | 3.5, 3.6, 3.7 |
|
146 |
+
-------------------------------------------------------------
|
147 |
+
| tensorflow-addons-0.7.1 | 2.1 | 2.7, 3.5, 3.6, 3.7 |
|
148 |
+
-------------------------------------------------------------
|
149 |
+
| tensorflow-addons-0.6.0 | 2.0 | 2.7, 3.5, 3.6, 3.7 |
|
150 |
+
-------------------------------------------------------------
|
151 |
+
"""
|
152 |
+
import tensorflow_addons as tfa
|
153 |
+
|
154 |
+
average_type = average_type.lower()
|
155 |
+
|
156 |
+
if average_type == None:
|
157 |
+
averaged_optimizer = optimizer
|
158 |
+
elif average_type == 'ema':
|
159 |
+
averaged_optimizer = tfa.optimizers.MovingAverage(optimizer, average_decay=0.99)
|
160 |
+
elif average_type == 'swa':
|
161 |
+
averaged_optimizer = tfa.optimizers.SWA(optimizer, start_averaging=0, average_period=10)
|
162 |
+
elif average_type == 'lookahead':
|
163 |
+
averaged_optimizer = tfa.optimizers.Lookahead(optimizer, sync_period=6, slow_step_size=0.5)
|
164 |
+
else:
|
165 |
+
raise ValueError('Unsupported average type')
|
166 |
+
|
167 |
+
return averaged_optimizer
|
168 |
+
|
models/deeplab/common/utils.py
ADDED
@@ -0,0 +1,343 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding=utf-8 -*-
|
3 |
+
"""Miscellaneous utility functions."""
|
4 |
+
|
5 |
+
import os
|
6 |
+
import numpy as np
|
7 |
+
import copy
|
8 |
+
from tqdm import tqdm
|
9 |
+
from PIL import Image
|
10 |
+
import matplotlib.pyplot as plt
|
11 |
+
from matplotlib import gridspec
|
12 |
+
|
13 |
+
from deeplabv3p.models.layers import normalize, img_resize
|
14 |
+
from deeplabv3p.models.deeplabv3p_mobilenetv3 import hard_sigmoid, hard_swish
|
15 |
+
import tensorflow as tf
|
16 |
+
|
17 |
+
|
18 |
+
def optimize_tf_gpu(tf, K):
|
19 |
+
if tf.__version__.startswith('2'):
|
20 |
+
gpus = tf.config.experimental.list_physical_devices('GPU')
|
21 |
+
if gpus:
|
22 |
+
try:
|
23 |
+
# Currently, memory growth needs to be the same across GPUs
|
24 |
+
for gpu in gpus:
|
25 |
+
tf.config.experimental.set_virtual_device_configuration(gpu, [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10000)])
|
26 |
+
#tf.config.experimental.set_memory_growth(gpu, True)
|
27 |
+
except RuntimeError as e:
|
28 |
+
# Memory growth must be set before GPUs have been initialized
|
29 |
+
print(e)
|
30 |
+
else:
|
31 |
+
config = tf.ConfigProto()
|
32 |
+
config.gpu_options.allow_growth=True #dynamic alloc GPU resource
|
33 |
+
config.gpu_options.per_process_gpu_memory_fraction = 0.9 #GPU memory threshold 0.3
|
34 |
+
session = tf.Session(config=config)
|
35 |
+
|
36 |
+
# set session
|
37 |
+
K.set_session(session)
|
38 |
+
|
39 |
+
|
40 |
+
def get_custom_objects():
|
41 |
+
'''
|
42 |
+
form up a custom_objects dict so that the customized
|
43 |
+
layer/function call could be correctly parsed when keras
|
44 |
+
.h5 model is loading or converting
|
45 |
+
'''
|
46 |
+
custom_objects_dict = {
|
47 |
+
'tf': tf,
|
48 |
+
'normalize': normalize,
|
49 |
+
'img_resize': img_resize,
|
50 |
+
'hard_sigmoid': hard_sigmoid,
|
51 |
+
'hard_swish': hard_swish,
|
52 |
+
}
|
53 |
+
return custom_objects_dict
|
54 |
+
|
55 |
+
"""
|
56 |
+
def calculate_weigths_labels(dataset_generator, num_classes, save_path=None):
|
57 |
+
'''
|
58 |
+
calculate a static segment classes (including background) weights
|
59 |
+
coefficient based on class pixel
|
60 |
+
'''
|
61 |
+
# Initialize class count list array
|
62 |
+
class_counts = np.zeros((num_classes,))
|
63 |
+
|
64 |
+
# collecting class pixel count
|
65 |
+
pbar = tqdm(total=len(dataset_generator), desc='Calculating classes weights')
|
66 |
+
for n, (_, y) in enumerate(dataset_generator):
|
67 |
+
mask = (y >= 0) & (y < num_classes)
|
68 |
+
labels = y[mask].astype(np.uint8)
|
69 |
+
count_l = np.bincount(labels, minlength=num_classes)
|
70 |
+
class_counts += count_l
|
71 |
+
pbar.update(1)
|
72 |
+
pbar.close()
|
73 |
+
# sum() to get total valid pixel count
|
74 |
+
total_count = np.sum(class_counts)
|
75 |
+
# get class weights with 1/(log(1.02+(class_count/total_count)))
|
76 |
+
class_weights = []
|
77 |
+
for class_count in class_counts:
|
78 |
+
class_weight = 1 / (np.log(1.02 + (class_count / total_count)))
|
79 |
+
class_weights.append(class_weight)
|
80 |
+
|
81 |
+
class_weights = np.array(class_weights)
|
82 |
+
# save class weights array to file for reloading next time
|
83 |
+
if save_path:
|
84 |
+
classes_weights_path = os.path.join(save_path, 'classes_weights.npy')
|
85 |
+
np.save(classes_weights_path, class_weights)
|
86 |
+
|
87 |
+
return class_weights
|
88 |
+
"""
|
89 |
+
|
90 |
+
|
91 |
+
def calculate_weigths_labels(dataset_generator, num_classes, save_path=None):
|
92 |
+
'''
|
93 |
+
calculate a static segment classes (including background) weights
|
94 |
+
coefficient based on class pixel
|
95 |
+
'''
|
96 |
+
# Initialize class count list array
|
97 |
+
class_counts = np.zeros((num_classes,))
|
98 |
+
|
99 |
+
# collecting class pixel count
|
100 |
+
pbar = tqdm(total=len(dataset_generator), desc='Calculating classes weights')
|
101 |
+
for n, (_, y) in enumerate(dataset_generator):
|
102 |
+
mask = (y >= 0) & (y < num_classes)
|
103 |
+
labels = y[mask].astype(np.uint8)
|
104 |
+
count_l = np.bincount(labels, minlength=num_classes)
|
105 |
+
class_counts += count_l
|
106 |
+
pbar.update(1)
|
107 |
+
pbar.close()
|
108 |
+
# sum() to get total valid pixel count
|
109 |
+
total_count = np.sum(class_counts)
|
110 |
+
|
111 |
+
#
|
112 |
+
# use following formula to calculate balanced class weights:
|
113 |
+
# class_weights = sample_count / (num_classes * np.bincount(labels))
|
114 |
+
#
|
115 |
+
# which is same as
|
116 |
+
# class_weight.compute_class_weight('balanced', class_list, y)
|
117 |
+
#
|
118 |
+
class_weights = total_count / (num_classes * class_counts)
|
119 |
+
class_weights = np.array(class_weights)
|
120 |
+
# save class weights array to file for reloading next time
|
121 |
+
if save_path:
|
122 |
+
classes_weights_path = os.path.join(save_path, 'classes_weights.txt')
|
123 |
+
save_class_weights(classes_weights_path, class_weights)
|
124 |
+
|
125 |
+
return class_weights
|
126 |
+
|
127 |
+
|
128 |
+
def save_class_weights(save_path, class_weights):
|
129 |
+
'''
|
130 |
+
save class weights array with shape (num_classes,)
|
131 |
+
'''
|
132 |
+
weights_file = open(save_path, 'w')
|
133 |
+
for class_weight in list(class_weights):
|
134 |
+
weights_file.write(str(class_weight))
|
135 |
+
weights_file.write('\n')
|
136 |
+
weights_file.close()
|
137 |
+
|
138 |
+
|
139 |
+
def load_class_weights(classes_weights_path):
|
140 |
+
'''
|
141 |
+
load saved class weights txt file and convert
|
142 |
+
to numpy array with shape (num_classes,)
|
143 |
+
'''
|
144 |
+
with open(classes_weights_path) as f:
|
145 |
+
classes_weights = f.readlines()
|
146 |
+
classes_weights = [float(c.strip()) for c in classes_weights]
|
147 |
+
|
148 |
+
return np.array(classes_weights)
|
149 |
+
|
150 |
+
|
151 |
+
def get_classes(classes_path):
|
152 |
+
'''loads the classes'''
|
153 |
+
with open(classes_path) as f:
|
154 |
+
class_names = f.readlines()
|
155 |
+
class_names = [c.strip() for c in class_names]
|
156 |
+
return class_names
|
157 |
+
|
158 |
+
|
159 |
+
def get_data_list(data_list_file, shuffle=True):
|
160 |
+
with open(data_list_file) as f:
|
161 |
+
lines = f.readlines()
|
162 |
+
lines = [line.strip() for line in lines]
|
163 |
+
|
164 |
+
if shuffle:
|
165 |
+
np.random.seed(10101)
|
166 |
+
np.random.shuffle(lines)
|
167 |
+
np.random.seed(None)
|
168 |
+
|
169 |
+
return lines
|
170 |
+
|
171 |
+
|
172 |
+
def figure_to_image(figure):
|
173 |
+
'''
|
174 |
+
Convert a Matplotlib figure to a Pillow image with RGBA channels
|
175 |
+
|
176 |
+
# Arguments
|
177 |
+
figure: matplotlib figure
|
178 |
+
usually create with plt.figure()
|
179 |
+
|
180 |
+
# Returns
|
181 |
+
image: numpy array image
|
182 |
+
'''
|
183 |
+
# draw the renderer
|
184 |
+
figure.canvas.draw()
|
185 |
+
|
186 |
+
# Get the RGBA buffer from the figure
|
187 |
+
w, h = figure.canvas.get_width_height()
|
188 |
+
buf = np.fromstring(figure.canvas.tostring_argb(), dtype=np.uint8)
|
189 |
+
buf.shape = (w, h, 4)
|
190 |
+
|
191 |
+
# canvas.tostring_argb give pixmap in ARGB mode. Roll the ALPHA channel to have it in RGBA mode
|
192 |
+
buf = np.roll(buf, 3, axis=2)
|
193 |
+
image = Image.frombytes("RGBA", (w, h), buf.tostring())
|
194 |
+
# Convert RGBA to RGB
|
195 |
+
image = np.asarray(image)[..., :3]
|
196 |
+
return image
|
197 |
+
|
198 |
+
|
199 |
+
def create_pascal_label_colormap():
|
200 |
+
"""
|
201 |
+
create label colormap with PASCAL VOC segmentation dataset definition
|
202 |
+
|
203 |
+
# Returns
|
204 |
+
colormap: Colormap array for visualizing segmentation
|
205 |
+
"""
|
206 |
+
colormap = np.zeros((256, 3), dtype=int)
|
207 |
+
index = np.arange(256, dtype=int)
|
208 |
+
|
209 |
+
for shift in reversed(range(8)):
|
210 |
+
for channel in range(3):
|
211 |
+
colormap[:, channel] |= ((index >> channel) & 1) << shift
|
212 |
+
index >>= 3
|
213 |
+
|
214 |
+
return colormap
|
215 |
+
|
216 |
+
|
217 |
+
def label_to_color_image(label):
|
218 |
+
"""
|
219 |
+
mapping the segmentation label to color indexing array
|
220 |
+
|
221 |
+
# Arguments
|
222 |
+
label: 2D uint8 numpy array, with segmentation label
|
223 |
+
|
224 |
+
# Returns
|
225 |
+
result: A 2D array with floating type. The element of the array
|
226 |
+
is the color indexed by the corresponding element in the input label
|
227 |
+
to the PascalVOC color map.
|
228 |
+
|
229 |
+
Raises:
|
230 |
+
ValueError: If label is not of rank 2 or its value is larger than color
|
231 |
+
map maximum entry.
|
232 |
+
"""
|
233 |
+
if label.ndim != 2:
|
234 |
+
raise ValueError('Expect 2-D input label')
|
235 |
+
|
236 |
+
colormap = create_pascal_label_colormap()
|
237 |
+
|
238 |
+
if np.max(label) >= len(colormap):
|
239 |
+
raise ValueError('label value too large.')
|
240 |
+
|
241 |
+
return colormap[label]
|
242 |
+
|
243 |
+
|
244 |
+
def visualize_segmentation(image, mask, gt_mask=None, class_names=None, overlay=0.7, ignore_count_threshold=100, title=None, gt_title=None):
|
245 |
+
"""
|
246 |
+
Visualize segmentation mask on input image, using PascalVOC
|
247 |
+
Segmentation color map
|
248 |
+
|
249 |
+
# Arguments
|
250 |
+
image: image array
|
251 |
+
numpy array for input image
|
252 |
+
mask: predict mask array
|
253 |
+
2D numpy array for predict segmentation mask
|
254 |
+
gt_mask: ground truth mask array
|
255 |
+
2D numpy array for gt segmentation mask
|
256 |
+
class_names: label class definition
|
257 |
+
list of label class names
|
258 |
+
ignore_count_threshold: threshold to filter label
|
259 |
+
integer scalar to filter the label value with small count
|
260 |
+
title: predict segmentation title
|
261 |
+
title string for predict segmentation result plot
|
262 |
+
gt_title: ground truth segmentation title
|
263 |
+
title string for ground truth segmentation plot
|
264 |
+
|
265 |
+
# Returns
|
266 |
+
img: A numpy image with segmentation result
|
267 |
+
"""
|
268 |
+
if (gt_mask is not None) and (class_names is not None):
|
269 |
+
grid_spec = gridspec.GridSpec(1, 3, width_ratios=[6, 6, 1])
|
270 |
+
figsize = (15, 10)
|
271 |
+
elif (gt_mask is not None) and (class_names is None):
|
272 |
+
grid_spec = gridspec.GridSpec(1, 2, width_ratios=[6, 6])
|
273 |
+
figsize = (15, 10)
|
274 |
+
elif (gt_mask is None) and (class_names is not None):
|
275 |
+
grid_spec = gridspec.GridSpec(1, 2, width_ratios=[6, 1])
|
276 |
+
figsize = (10, 10)
|
277 |
+
else:
|
278 |
+
grid_spec = [111]
|
279 |
+
figsize = (10, 10)
|
280 |
+
|
281 |
+
figure = plt.figure(figsize=figsize)
|
282 |
+
|
283 |
+
# convert mask array to color mapped image
|
284 |
+
mask_image = label_to_color_image(mask).astype(np.uint8)
|
285 |
+
# show segmentation result image
|
286 |
+
plt.subplot(grid_spec[0])
|
287 |
+
plt.imshow(image)
|
288 |
+
plt.imshow(mask_image, alpha=overlay)
|
289 |
+
plt.axis('off')
|
290 |
+
# add plt title, optional
|
291 |
+
if title:
|
292 |
+
plt.title(title)
|
293 |
+
|
294 |
+
if gt_mask is not None:
|
295 |
+
# reset invalid label value as 0(background)
|
296 |
+
filtered_gt_mask = copy.deepcopy(gt_mask)
|
297 |
+
filtered_gt_mask[filtered_gt_mask>len(class_names)-1] = 0
|
298 |
+
# convert gt mask array to color mapped image
|
299 |
+
gt_mask_image = label_to_color_image(filtered_gt_mask).astype(np.uint8)
|
300 |
+
# show gt segmentation image
|
301 |
+
plt.subplot(grid_spec[1])
|
302 |
+
plt.imshow(image)
|
303 |
+
plt.imshow(gt_mask_image, alpha=overlay)
|
304 |
+
plt.axis('off')
|
305 |
+
# add plt title, optional
|
306 |
+
if gt_title:
|
307 |
+
plt.title(gt_title)
|
308 |
+
|
309 |
+
# if class name list is provided, plot a legend graph of
|
310 |
+
# classes color map
|
311 |
+
if class_names:
|
312 |
+
classes_index = np.arange(len(class_names)).reshape(len(class_names), 1)
|
313 |
+
classes_color_map = label_to_color_image(classes_index)
|
314 |
+
|
315 |
+
labels, count= np.unique(mask, return_counts=True)
|
316 |
+
# filter some corner pixel labels, may be caused by mask resize
|
317 |
+
labels = np.array([labels[i] for i in range(len(labels)) if count[i] > ignore_count_threshold])
|
318 |
+
|
319 |
+
if gt_mask is not None:
|
320 |
+
gt_labels, gt_count= np.unique(filtered_gt_mask, return_counts=True)
|
321 |
+
# filter some corner pixel labels, may be caused by mask resize
|
322 |
+
gt_labels = np.array([gt_labels[i] for i in range(len(gt_labels)) if gt_count[i] > ignore_count_threshold])
|
323 |
+
|
324 |
+
# merge labels & gt labels
|
325 |
+
labels = list(set(list(labels)+list(gt_labels)))
|
326 |
+
labels.sort()
|
327 |
+
labels = np.array(labels)
|
328 |
+
|
329 |
+
ax = plt.subplot(grid_spec[-1])
|
330 |
+
plt.imshow(classes_color_map[labels].astype(np.uint8), interpolation='nearest')
|
331 |
+
|
332 |
+
# adjust subplot display
|
333 |
+
ax.yaxis.tick_right()
|
334 |
+
plt.yticks(range(len(labels)), np.asarray(class_names)[labels])
|
335 |
+
plt.xticks([], [])
|
336 |
+
ax.tick_params(width=0.0)
|
337 |
+
plt.grid('off')
|
338 |
+
|
339 |
+
# convert plt to numpy image
|
340 |
+
img = figure_to_image(figure)
|
341 |
+
plt.close("all")
|
342 |
+
return img
|
343 |
+
|
models/deeplab/configs/ade20k_classes.txt
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wall
|
2 |
+
building
|
3 |
+
sky
|
4 |
+
floor
|
5 |
+
tree
|
6 |
+
ceiling
|
7 |
+
road
|
8 |
+
bed
|
9 |
+
window
|
10 |
+
grass
|
11 |
+
cabinet
|
12 |
+
sidewalk
|
13 |
+
person
|
14 |
+
ground
|
15 |
+
door
|
16 |
+
table
|
17 |
+
mountain
|
18 |
+
plant
|
19 |
+
curtain
|
20 |
+
chair
|
21 |
+
car
|
22 |
+
water
|
23 |
+
picture
|
24 |
+
sofa
|
25 |
+
shelf
|
26 |
+
house
|
27 |
+
sea
|
28 |
+
mirror
|
29 |
+
carpet
|
30 |
+
field
|
31 |
+
armchair
|
32 |
+
seat
|
33 |
+
fence
|
34 |
+
desk
|
35 |
+
rock
|
36 |
+
closet
|
37 |
+
lamp
|
38 |
+
bathtub
|
39 |
+
railing
|
40 |
+
cushion
|
41 |
+
base
|
42 |
+
box
|
43 |
+
column
|
44 |
+
signboard
|
45 |
+
chest of drawers
|
46 |
+
counter
|
47 |
+
sand
|
48 |
+
sink
|
49 |
+
skyscraper
|
50 |
+
fireplace
|
51 |
+
refrigerator
|
52 |
+
grandstand
|
53 |
+
path
|
54 |
+
stairs, steps
|
55 |
+
runway
|
56 |
+
showcase
|
57 |
+
billiard table
|
58 |
+
pillow
|
59 |
+
screen door
|
60 |
+
stairway
|
61 |
+
river
|
62 |
+
bridge
|
63 |
+
bookcase
|
64 |
+
blind, screen
|
65 |
+
coffee table
|
66 |
+
toilet
|
67 |
+
flower
|
68 |
+
book
|
69 |
+
hill
|
70 |
+
bench
|
71 |
+
countertop
|
72 |
+
stove
|
73 |
+
palm tree
|
74 |
+
kitchen island
|
75 |
+
computer
|
76 |
+
swivel chair
|
77 |
+
boat
|
78 |
+
bar
|
79 |
+
arcade machine
|
80 |
+
hovel
|
81 |
+
bus
|
82 |
+
towel
|
83 |
+
light
|
84 |
+
truck
|
85 |
+
tower
|
86 |
+
chandelier
|
87 |
+
sunshade
|
88 |
+
streetlight
|
89 |
+
booth
|
90 |
+
television
|
91 |
+
aeroplane
|
92 |
+
dirt track
|
93 |
+
clothes
|
94 |
+
pole
|
95 |
+
land
|
96 |
+
handrail
|
97 |
+
escalator
|
98 |
+
ottoman
|
99 |
+
bottle
|
100 |
+
buffet
|
101 |
+
poster
|
102 |
+
stage
|
103 |
+
van
|
104 |
+
ship
|
105 |
+
fountain
|
106 |
+
conveyer belt
|
107 |
+
canopy
|
108 |
+
washing machine
|
109 |
+
toy
|
110 |
+
swimming pool
|
111 |
+
stool
|
112 |
+
barrel
|
113 |
+
basket
|
114 |
+
waterfall
|
115 |
+
tent
|
116 |
+
bag
|
117 |
+
motorbike
|
118 |
+
cradle
|
119 |
+
oven
|
120 |
+
ball
|
121 |
+
solid food
|
122 |
+
stair
|
123 |
+
tank
|
124 |
+
brand
|
125 |
+
microwave
|
126 |
+
flowerpot
|
127 |
+
animal
|
128 |
+
bicycle
|
129 |
+
lake
|
130 |
+
dishwasher
|
131 |
+
silver screen
|
132 |
+
blanket
|
133 |
+
sculpture
|
134 |
+
exhaust hood
|
135 |
+
sconce
|
136 |
+
vase
|
137 |
+
traffic light
|
138 |
+
tray
|
139 |
+
dustbin
|
140 |
+
fan
|
141 |
+
wharf
|
142 |
+
crt screen
|
143 |
+
plate
|
144 |
+
monitor
|
145 |
+
notice board
|
146 |
+
shower
|
147 |
+
radiator
|
148 |
+
glass
|
149 |
+
clock
|
150 |
+
flag
|
models/deeplab/configs/cityscapes_classes.txt
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ego vehicle
|
2 |
+
rectification border
|
3 |
+
out of roi
|
4 |
+
static
|
5 |
+
dynamic
|
6 |
+
ground
|
7 |
+
road
|
8 |
+
sidewalk
|
9 |
+
parking
|
10 |
+
rail track
|
11 |
+
building
|
12 |
+
wall
|
13 |
+
fence
|
14 |
+
guard rail
|
15 |
+
bridge
|
16 |
+
tunnel
|
17 |
+
pole
|
18 |
+
polegroup
|
19 |
+
traffic light
|
20 |
+
traffic sign
|
21 |
+
vegetation
|
22 |
+
terrain
|
23 |
+
sky
|
24 |
+
person
|
25 |
+
rider
|
26 |
+
car
|
27 |
+
truck
|
28 |
+
bus
|
29 |
+
caravan
|
30 |
+
trailer
|
31 |
+
train
|
32 |
+
motorcycle
|
33 |
+
bicycle
|
models/deeplab/configs/coco_classes.txt
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
person
|
2 |
+
bicycle
|
3 |
+
car
|
4 |
+
motorbike
|
5 |
+
aeroplane
|
6 |
+
bus
|
7 |
+
train
|
8 |
+
truck
|
9 |
+
boat
|
10 |
+
traffic light
|
11 |
+
fire hydrant
|
12 |
+
stop sign
|
13 |
+
parking meter
|
14 |
+
bench
|
15 |
+
bird
|
16 |
+
cat
|
17 |
+
dog
|
18 |
+
horse
|
19 |
+
sheep
|
20 |
+
cow
|
21 |
+
elephant
|
22 |
+
bear
|
23 |
+
zebra
|
24 |
+
giraffe
|
25 |
+
backpack
|
26 |
+
umbrella
|
27 |
+
handbag
|
28 |
+
tie
|
29 |
+
suitcase
|
30 |
+
frisbee
|
31 |
+
skis
|
32 |
+
snowboard
|
33 |
+
sports ball
|
34 |
+
kite
|
35 |
+
baseball bat
|
36 |
+
baseball glove
|
37 |
+
skateboard
|
38 |
+
surfboard
|
39 |
+
tennis racket
|
40 |
+
bottle
|
41 |
+
wine glass
|
42 |
+
cup
|
43 |
+
fork
|
44 |
+
knife
|
45 |
+
spoon
|
46 |
+
bowl
|
47 |
+
banana
|
48 |
+
apple
|
49 |
+
sandwich
|
50 |
+
orange
|
51 |
+
broccoli
|
52 |
+
carrot
|
53 |
+
hot dog
|
54 |
+
pizza
|
55 |
+
donut
|
56 |
+
cake
|
57 |
+
chair
|
58 |
+
sofa
|
59 |
+
pottedplant
|
60 |
+
bed
|
61 |
+
diningtable
|
62 |
+
toilet
|
63 |
+
tvmonitor
|
64 |
+
laptop
|
65 |
+
mouse
|
66 |
+
remote
|
67 |
+
keyboard
|
68 |
+
cell phone
|
69 |
+
microwave
|
70 |
+
oven
|
71 |
+
toaster
|
72 |
+
sink
|
73 |
+
refrigerator
|
74 |
+
book
|
75 |
+
clock
|
76 |
+
vase
|
77 |
+
scissors
|
78 |
+
teddy bear
|
79 |
+
hair drier
|
80 |
+
toothbrush
|
models/deeplab/configs/voc_classes.txt
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aeroplane
|
2 |
+
bicycle
|
3 |
+
bird
|
4 |
+
boat
|
5 |
+
bottle
|
6 |
+
bus
|
7 |
+
car
|
8 |
+
cat
|
9 |
+
chair
|
10 |
+
cow
|
11 |
+
diningtable
|
12 |
+
dog
|
13 |
+
horse
|
14 |
+
motorbike
|
15 |
+
person
|
16 |
+
pottedplant
|
17 |
+
sheep
|
18 |
+
sofa
|
19 |
+
train
|
20 |
+
tvmonitor
|
models/deeplab/deeplab.py
ADDED
@@ -0,0 +1,297 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#! /usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
Run a Deeplabv3plus semantic segmentation model on test images.
|
5 |
+
"""
|
6 |
+
|
7 |
+
import colorsys
|
8 |
+
import os, sys, argparse
|
9 |
+
import numpy as np
|
10 |
+
import cv2
|
11 |
+
from PIL import Image
|
12 |
+
import matplotlib.pyplot as plt
|
13 |
+
import time
|
14 |
+
from timeit import default_timer as timer
|
15 |
+
import tensorflow as tf
|
16 |
+
from tensorflow.keras import backend as K
|
17 |
+
from tensorflow.keras.models import Model, load_model
|
18 |
+
from tensorflow.keras.utils import multi_gpu_model
|
19 |
+
#from tensorflow_model_optimization.sparsity import keras as sparsity
|
20 |
+
|
21 |
+
from deeplabv3p.model import get_deeplabv3p_model
|
22 |
+
from deeplabv3p.postprocess_np import crf_postprocess
|
23 |
+
from common.utils import get_classes, optimize_tf_gpu, visualize_segmentation
|
24 |
+
from common.data_utils import preprocess_image, mask_resize, mask_resize_fast
|
25 |
+
|
26 |
+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
27 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
28 |
+
|
29 |
+
#tf.enable_eager_execution()
|
30 |
+
optimize_tf_gpu(tf, K)
|
31 |
+
|
32 |
+
default_config = {
|
33 |
+
"model_type": 'mobilenetv2lite',
|
34 |
+
"classes_path": os.path.join('configs', 'voc_classes.txt'),
|
35 |
+
"model_input_shape" : (512, 512),
|
36 |
+
"output_stride": 16,
|
37 |
+
"weights_path": os.path.join('weights', 'mobilenetv2_original.h5'),
|
38 |
+
"do_crf": False,
|
39 |
+
"pruning_model": False,
|
40 |
+
"gpu_num" : 1,
|
41 |
+
}
|
42 |
+
|
43 |
+
|
44 |
+
class DeepLab(object):
|
45 |
+
_defaults = default_config
|
46 |
+
|
47 |
+
@classmethod
|
48 |
+
def get_defaults(cls, n):
|
49 |
+
if n in cls._defaults:
|
50 |
+
return cls._defaults[n]
|
51 |
+
else:
|
52 |
+
return "Unrecognized attribute name '" + n + "'"
|
53 |
+
|
54 |
+
def __init__(self, **kwargs):
|
55 |
+
super(DeepLab, self).__init__()
|
56 |
+
self.__dict__.update(self._defaults) # set up default values
|
57 |
+
self.__dict__.update(kwargs) # and update with user overrides
|
58 |
+
self.class_names = get_classes(self.classes_path)
|
59 |
+
K.set_learning_phase(0)
|
60 |
+
self.deeplab_model = self._generate_model()
|
61 |
+
|
62 |
+
def _generate_model(self):
|
63 |
+
'''to generate the bounding boxes'''
|
64 |
+
weights_path = os.path.expanduser(self.weights_path)
|
65 |
+
assert weights_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
|
66 |
+
|
67 |
+
#add one more class for background
|
68 |
+
self.class_names = ['background'] + self.class_names
|
69 |
+
num_classes = len(self.class_names)
|
70 |
+
assert len(self.class_names) < 254, 'PNG image label only support less than 254 classes.'
|
71 |
+
|
72 |
+
# Load model, or construct model and load weights.
|
73 |
+
try:
|
74 |
+
deeplab_model = get_deeplabv3p_model(self.model_type, num_classes, model_input_shape=self.model_input_shape, output_stride=self.output_stride, freeze_level=0, weights_path=weights_path, training=False)
|
75 |
+
deeplab_model.summary()
|
76 |
+
except Exception as e:
|
77 |
+
print(repr(e))
|
78 |
+
if self.gpu_num>=2:
|
79 |
+
deeplab_model = multi_gpu_model(deeplab_model, gpus=self.gpu_num)
|
80 |
+
|
81 |
+
return deeplab_model
|
82 |
+
|
83 |
+
|
84 |
+
def segment_image(self, image):
|
85 |
+
image_data = preprocess_image(image, self.model_input_shape)
|
86 |
+
# origin image shape, in (height, width) format
|
87 |
+
image_shape = tuple(reversed(image.size))
|
88 |
+
|
89 |
+
start = time.time()
|
90 |
+
out_mask = self.predict(image_data, image_shape)
|
91 |
+
end = time.time()
|
92 |
+
print("Inference time: {:.8f}s".format(end - start))
|
93 |
+
|
94 |
+
# show segmentation result
|
95 |
+
image_array = visualize_segmentation(np.array(image), out_mask, class_names=self.class_names, ignore_count_threshold=500)
|
96 |
+
return Image.fromarray(image_array)
|
97 |
+
|
98 |
+
|
99 |
+
def predict(self, image_data, image_shape):
|
100 |
+
prediction = self.deeplab_model.predict([image_data])
|
101 |
+
# reshape prediction to mask array
|
102 |
+
mask = np.argmax(prediction, -1)[0].reshape(self.model_input_shape)
|
103 |
+
|
104 |
+
# add CRF postprocess if need
|
105 |
+
if self.do_crf:
|
106 |
+
image = image_data[0].astype('uint8')
|
107 |
+
mask = crf_postprocess(image, mask, zero_unsure=False)
|
108 |
+
|
109 |
+
# resize mask back to origin image size
|
110 |
+
mask = mask_resize_fast(mask, tuple(reversed(image_shape)))
|
111 |
+
|
112 |
+
return mask
|
113 |
+
|
114 |
+
|
115 |
+
def dump_model_file(self, output_model_file):
|
116 |
+
self.deeplab_model.save(output_model_file)
|
117 |
+
|
118 |
+
def dump_saved_model(self, saved_model_path):
|
119 |
+
model = self.deeplab_model
|
120 |
+
os.makedirs(saved_model_path, exist_ok=True)
|
121 |
+
|
122 |
+
tf.keras.experimental.export_saved_model(model, saved_model_path)
|
123 |
+
print('export inference model to %s' % str(saved_model_path))
|
124 |
+
|
125 |
+
|
126 |
+
def segment_video(deeplab, video_path, output_path=""):
|
127 |
+
import cv2
|
128 |
+
vid = cv2.VideoCapture(0 if video_path == '0' else video_path)
|
129 |
+
if not vid.isOpened():
|
130 |
+
raise IOError("Couldn't open webcam or video")
|
131 |
+
|
132 |
+
# here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later
|
133 |
+
# to convert it to x264 to reduce file size:
|
134 |
+
# ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4
|
135 |
+
#
|
136 |
+
#video_FourCC = cv2.VideoWriter_fourcc(*'XVID') if video_path == '0' else int(vid.get(cv2.CAP_PROP_FOURCC))
|
137 |
+
video_FourCC = cv2.VideoWriter_fourcc(*'XVID') if video_path == '0' else cv2.VideoWriter_fourcc(*"mp4v")
|
138 |
+
video_fps = vid.get(cv2.CAP_PROP_FPS)
|
139 |
+
video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
140 |
+
int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
141 |
+
isOutput = True if output_path != "" else False
|
142 |
+
if isOutput:
|
143 |
+
print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
|
144 |
+
out = cv2.VideoWriter(output_path, video_FourCC, (5. if video_path == '0' else video_fps), video_size)
|
145 |
+
accum_time = 0
|
146 |
+
curr_fps = 0
|
147 |
+
fps = "FPS: ??"
|
148 |
+
prev_time = timer()
|
149 |
+
while True:
|
150 |
+
return_value, frame = vid.read()
|
151 |
+
image = Image.fromarray(frame)
|
152 |
+
image = deeplab.segment_image(image)
|
153 |
+
result = np.asarray(image)
|
154 |
+
curr_time = timer()
|
155 |
+
exec_time = curr_time - prev_time
|
156 |
+
prev_time = curr_time
|
157 |
+
accum_time = accum_time + exec_time
|
158 |
+
curr_fps = curr_fps + 1
|
159 |
+
if accum_time > 1:
|
160 |
+
accum_time = accum_time - 1
|
161 |
+
fps = "FPS: " + str(curr_fps)
|
162 |
+
curr_fps = 0
|
163 |
+
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
|
164 |
+
fontScale=0.50, color=(255, 0, 0), thickness=2)
|
165 |
+
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
|
166 |
+
cv2.imshow("result", result)
|
167 |
+
if isOutput:
|
168 |
+
out.write(result)
|
169 |
+
if cv2.waitKey(1) & 0xFF == ord('q'):
|
170 |
+
break
|
171 |
+
# Release everything if job is finished
|
172 |
+
vid.release()
|
173 |
+
if isOutput:
|
174 |
+
out.release()
|
175 |
+
cv2.destroyAllWindows()
|
176 |
+
|
177 |
+
|
178 |
+
def segment_img(deeplab):
|
179 |
+
while True:
|
180 |
+
img = input('Input image filename:')
|
181 |
+
try:
|
182 |
+
image = Image.open(img)
|
183 |
+
except:
|
184 |
+
print('Open Error! Try again!')
|
185 |
+
continue
|
186 |
+
else:
|
187 |
+
r_image = deeplab.segment_image(image)
|
188 |
+
r_image.show()
|
189 |
+
|
190 |
+
|
191 |
+
if __name__ == '__main__':
|
192 |
+
# class DeepLab defines the default value, so suppress any default here
|
193 |
+
parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description='demo or dump out Deeplab h5 model')
|
194 |
+
'''
|
195 |
+
Command line options
|
196 |
+
'''
|
197 |
+
parser.add_argument(
|
198 |
+
'--model_type', type=str,
|
199 |
+
help='Deeplabv3p model type: mobilenetv2/xception, default ' + DeepLab.get_defaults("model_type")
|
200 |
+
)
|
201 |
+
|
202 |
+
parser.add_argument(
|
203 |
+
'--weights_path', type=str,
|
204 |
+
help='path to model weight file, default ' + DeepLab.get_defaults("weights_path")
|
205 |
+
)
|
206 |
+
|
207 |
+
parser.add_argument(
|
208 |
+
'--classes_path', type=str,
|
209 |
+
help='path to class definitions, default ' + DeepLab.get_defaults("classes_path")
|
210 |
+
)
|
211 |
+
|
212 |
+
parser.add_argument(
|
213 |
+
'--model_input_shape', type=str,
|
214 |
+
help='model input size as <height>x<width>, default ' +
|
215 |
+
str(DeepLab.get_defaults("model_input_shape")[0])+'x'+str(DeepLab.get_defaults("model_input_shape")[1]),
|
216 |
+
default=str(DeepLab.get_defaults("model_input_shape")[0])+'x'+str(DeepLab.get_defaults("model_input_shape")[1])
|
217 |
+
)
|
218 |
+
|
219 |
+
parser.add_argument(
|
220 |
+
'--output_stride', type=int, choices=[8, 16, 32],
|
221 |
+
help='model output stride, default ' + str(DeepLab.get_defaults("output_stride"))
|
222 |
+
)
|
223 |
+
|
224 |
+
parser.add_argument(
|
225 |
+
'--do_crf', default=False, action="store_true",
|
226 |
+
help='whether to add CRF postprocess for model output, default ' + str(DeepLab.get_defaults("do_crf"))
|
227 |
+
)
|
228 |
+
|
229 |
+
#parser.add_argument(
|
230 |
+
#'--pruning_model', default=False, action="store_true",
|
231 |
+
#help='Whether to be a pruning model/weights file')
|
232 |
+
|
233 |
+
parser.add_argument(
|
234 |
+
'--gpu_num', type=int,
|
235 |
+
help='Number of GPU to use, default ' + str(DeepLab.get_defaults("gpu_num"))
|
236 |
+
)
|
237 |
+
parser.add_argument(
|
238 |
+
'--image', default=False, action="store_true",
|
239 |
+
help='Image inference mode, will ignore all positional arguments'
|
240 |
+
)
|
241 |
+
'''
|
242 |
+
Command line positional arguments -- for video detection mode
|
243 |
+
'''
|
244 |
+
parser.add_argument(
|
245 |
+
"--input", nargs='?', type=str,required=False,default='./path2your_video',
|
246 |
+
help = "Video input path"
|
247 |
+
)
|
248 |
+
|
249 |
+
parser.add_argument(
|
250 |
+
"--output", nargs='?', type=str, default="",
|
251 |
+
help = "[Optional] Video output path"
|
252 |
+
)
|
253 |
+
'''
|
254 |
+
Command line positional arguments -- for model dump
|
255 |
+
'''
|
256 |
+
parser.add_argument(
|
257 |
+
'--dump_model', default=False, action="store_true",
|
258 |
+
help='Dump out training model to inference model'
|
259 |
+
)
|
260 |
+
|
261 |
+
parser.add_argument(
|
262 |
+
'--output_model_file', type=str,
|
263 |
+
help='output inference model file'
|
264 |
+
)
|
265 |
+
|
266 |
+
args = parser.parse_args()
|
267 |
+
# param parse
|
268 |
+
if args.model_input_shape:
|
269 |
+
height, width = args.model_input_shape.split('x')
|
270 |
+
args.model_input_shape = (int(height), int(width))
|
271 |
+
|
272 |
+
# get wrapped inference object
|
273 |
+
deeplab = DeepLab(**vars(args))
|
274 |
+
|
275 |
+
if args.dump_model:
|
276 |
+
"""
|
277 |
+
Dump out training model to inference model
|
278 |
+
"""
|
279 |
+
if not args.output_model_file:
|
280 |
+
raise ValueError('output model file is not specified')
|
281 |
+
|
282 |
+
print('Dumping out training model to inference model')
|
283 |
+
deeplab.dump_model_file(args.output_model_file)
|
284 |
+
sys.exit()
|
285 |
+
|
286 |
+
if args.image:
|
287 |
+
"""
|
288 |
+
Image segmentation mode, disregard any remaining command line arguments
|
289 |
+
"""
|
290 |
+
print("Image segmentation mode")
|
291 |
+
if "input" in args:
|
292 |
+
print(" Ignoring remaining command line arguments: " + args.input + "," + args.output)
|
293 |
+
segment_img(deeplab)
|
294 |
+
elif "input" in args:
|
295 |
+
segment_video(deeplab, args.input, args.output)
|
296 |
+
else:
|
297 |
+
print("Must specify at least video_input_path. See usage with --help.")
|
models/deeplab/deeplabv3p/data.py
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import os, glob, time
|
4 |
+
import random
|
5 |
+
import numpy as np
|
6 |
+
import cv2
|
7 |
+
from PIL import Image
|
8 |
+
from sklearn.utils import class_weight
|
9 |
+
from tensorflow.keras.utils import Sequence
|
10 |
+
|
11 |
+
from common.data_utils import random_horizontal_flip, random_vertical_flip, random_brightness, random_grayscale, random_chroma, random_contrast, random_sharpness, random_blur, random_zoom_rotate, random_gridmask, random_crop, random_histeq
|
12 |
+
|
13 |
+
|
14 |
+
class SegmentationGenerator(Sequence):
|
15 |
+
def __init__(self, dataset_path, data_list,
|
16 |
+
batch_size=1,
|
17 |
+
num_classes=21,
|
18 |
+
target_size=(512, 512),
|
19 |
+
weighted_type=None,
|
20 |
+
is_eval=False,
|
21 |
+
augment=True):
|
22 |
+
# get real path for dataset
|
23 |
+
dataset_realpath = os.path.realpath(dataset_path)
|
24 |
+
self.image_path_list = [os.path.join(dataset_realpath, 'images', image_id.strip()+'.jpg') for image_id in data_list]
|
25 |
+
self.label_path_list = [os.path.join(dataset_realpath, 'labels', image_id.strip()+'.png') for image_id in data_list]
|
26 |
+
# initialize random seed
|
27 |
+
np.random.seed(int(time.time()))
|
28 |
+
|
29 |
+
self.num_classes = num_classes
|
30 |
+
self.batch_size = batch_size
|
31 |
+
self.target_size = target_size
|
32 |
+
self.weighted_type = weighted_type
|
33 |
+
self.augment = augment
|
34 |
+
self.is_eval = is_eval
|
35 |
+
|
36 |
+
# Preallocate memory
|
37 |
+
self.X = np.zeros((batch_size, target_size[1], target_size[0], 3), dtype='float32')
|
38 |
+
self.Y = np.zeros((batch_size, target_size[1]*target_size[0], 1), dtype='float32')
|
39 |
+
self.PIXEL_WEIGHTS = np.zeros((batch_size, target_size[1]*target_size[0]), dtype='float32')
|
40 |
+
|
41 |
+
def get_batch_image_path(self, i):
|
42 |
+
return self.image_path_list[i*self.batch_size:(i+1)*self.batch_size]
|
43 |
+
|
44 |
+
def get_batch_label_path(self, i):
|
45 |
+
return self.label_path_list[i*self.batch_size:(i+1)*self.batch_size]
|
46 |
+
|
47 |
+
def get_weighted_type(self):
|
48 |
+
return self.weighted_type
|
49 |
+
|
50 |
+
def __len__(self):
|
51 |
+
return len(self.image_path_list) // self.batch_size
|
52 |
+
|
53 |
+
def __getitem__(self, i):
|
54 |
+
|
55 |
+
for n, (image_path, label_path) in enumerate(zip(self.image_path_list[i*self.batch_size:(i+1)*self.batch_size],
|
56 |
+
self.label_path_list[i*self.batch_size:(i+1)*self.batch_size])):
|
57 |
+
|
58 |
+
# Load image and label array
|
59 |
+
image = cv2.imread(image_path, cv2.IMREAD_COLOR) # cv2.IMREAD_COLOR/cv2.IMREAD_GRAYSCALE/cv2.IMREAD_UNCHANGED
|
60 |
+
label = np.array(Image.open(label_path))
|
61 |
+
|
62 |
+
# we reset all the invalid label value as 0(background) in training,
|
63 |
+
# but as 255(invalid) in eval
|
64 |
+
if self.is_eval:
|
65 |
+
label[label>(self.num_classes-1)] = 255
|
66 |
+
else:
|
67 |
+
label[label>(self.num_classes-1)] = 0
|
68 |
+
|
69 |
+
# Do augmentation
|
70 |
+
if self.augment:
|
71 |
+
# random horizontal flip image
|
72 |
+
image, label = random_horizontal_flip(image, label)
|
73 |
+
|
74 |
+
# random vertical flip image
|
75 |
+
image, label = random_vertical_flip(image, label)
|
76 |
+
|
77 |
+
# random zoom & rotate image
|
78 |
+
image, label = random_zoom_rotate(image, label)
|
79 |
+
|
80 |
+
# random add gridmask augment for image and label
|
81 |
+
image, label = random_gridmask(image, label)
|
82 |
+
|
83 |
+
# random adjust brightness
|
84 |
+
image = random_brightness(image)
|
85 |
+
|
86 |
+
# random adjust color level
|
87 |
+
image = random_chroma(image)
|
88 |
+
|
89 |
+
# random adjust contrast
|
90 |
+
image = random_contrast(image)
|
91 |
+
|
92 |
+
# random adjust sharpness
|
93 |
+
image = random_sharpness(image)
|
94 |
+
|
95 |
+
# random convert image to grayscale
|
96 |
+
image = random_grayscale(image)
|
97 |
+
|
98 |
+
# random do gaussian blur to image
|
99 |
+
image = random_blur(image)
|
100 |
+
|
101 |
+
# random crop image & label
|
102 |
+
image, label = random_crop(image, label, self.target_size)
|
103 |
+
|
104 |
+
# random do histogram equalization using CLAHE
|
105 |
+
image = random_histeq(image)
|
106 |
+
|
107 |
+
|
108 |
+
# Resize image & label mask to model input shape
|
109 |
+
image = cv2.resize(image, self.target_size)
|
110 |
+
label = cv2.resize(label, self.target_size, interpolation = cv2.INTER_NEAREST)
|
111 |
+
|
112 |
+
label = label.astype('int32')
|
113 |
+
y = label.flatten()
|
114 |
+
|
115 |
+
# we reset all the invalid label value as 0(background) in training,
|
116 |
+
# but as 255(invalid) in eval
|
117 |
+
if self.is_eval:
|
118 |
+
y[y>(self.num_classes-1)] = 255
|
119 |
+
else:
|
120 |
+
y[y>(self.num_classes-1)] = 0
|
121 |
+
|
122 |
+
# append input image and label array
|
123 |
+
self.X[n] = image
|
124 |
+
self.Y[n] = np.expand_dims(y, -1)
|
125 |
+
|
126 |
+
###########################################################################
|
127 |
+
#
|
128 |
+
# generating adaptive pixels weights array, for unbalanced classes training
|
129 |
+
#
|
130 |
+
###########################################################################
|
131 |
+
|
132 |
+
# Create adaptive pixels weights for all classes on one image,
|
133 |
+
# according to pixel number of classes
|
134 |
+
class_list = np.unique(y)
|
135 |
+
if len(class_list):
|
136 |
+
class_weights = class_weight.compute_class_weight('balanced', class_list, y)
|
137 |
+
class_weights = {class_id : weight for class_id , weight in zip(class_list, class_weights)}
|
138 |
+
# class_weigts dict would be like:
|
139 |
+
# {
|
140 |
+
# 0: 0.5997304983036035,
|
141 |
+
# 12: 2.842871240958237,
|
142 |
+
# 15: 1.0195474451419193
|
143 |
+
# }
|
144 |
+
for class_id in class_list:
|
145 |
+
np.putmask(self.PIXEL_WEIGHTS[n], y==class_id, class_weights[class_id])
|
146 |
+
|
147 |
+
# A trick of keras data generator: the last item yield
|
148 |
+
# from a generator could be a sample weights array
|
149 |
+
sample_weight_dict = {'pred_mask' : self.PIXEL_WEIGHTS}
|
150 |
+
|
151 |
+
if self.weighted_type == 'adaptive':
|
152 |
+
return self.X, self.Y, sample_weight_dict
|
153 |
+
else:
|
154 |
+
return self.X, self.Y
|
155 |
+
|
156 |
+
def on_epoch_end(self):
|
157 |
+
# Shuffle dataset for next epoch
|
158 |
+
c = list(zip(self.image_path_list, self.label_path_list))
|
159 |
+
random.shuffle(c)
|
160 |
+
self.image_path_list, self.label_path_list = zip(*c)
|
161 |
+
|
models/deeplab/deeplabv3p/loss.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import numpy as np
|
4 |
+
import tensorflow as tf
|
5 |
+
import tensorflow.keras.backend as K
|
6 |
+
|
7 |
+
|
8 |
+
def sparse_crossentropy_ignoring_last_label(y_true, y_pred):
|
9 |
+
num_classes = K.shape(y_pred)[-1]
|
10 |
+
y_true = K.one_hot(tf.cast(y_true[..., 0], tf.int32), num_classes+1)[..., :-1]
|
11 |
+
return K.categorical_crossentropy(y_true, y_pred)
|
12 |
+
|
13 |
+
def sparse_crossentropy(y_true, y_pred):
|
14 |
+
num_classes = K.shape(y_pred)[-1]
|
15 |
+
y_true = K.one_hot(tf.cast(y_true[..., 0], tf.int32), num_classes)
|
16 |
+
return K.categorical_crossentropy(y_true, y_pred)
|
17 |
+
|
18 |
+
|
19 |
+
def softmax_focal_loss(y_true, y_pred, gamma=2.0, alpha=0.25, from_logits=False):
|
20 |
+
"""
|
21 |
+
Compute softmax focal loss.
|
22 |
+
Reference Paper:
|
23 |
+
"Focal Loss for Dense Object Detection"
|
24 |
+
https://arxiv.org/abs/1708.02002
|
25 |
+
|
26 |
+
# Arguments
|
27 |
+
y_true: Ground truth targets,
|
28 |
+
tensor of shape (?, num_pixel, num_classes).
|
29 |
+
y_pred: Predicted logits,
|
30 |
+
tensor of shape (?, num_pixel, num_classes).
|
31 |
+
gamma: exponent of the modulating factor (1 - p_t) ^ gamma.
|
32 |
+
alpha: optional alpha weighting factor to balance positives vs negatives.
|
33 |
+
|
34 |
+
# Returns
|
35 |
+
softmax_focal_loss: Softmax focal loss, tensor of shape (?, num_pixel).
|
36 |
+
"""
|
37 |
+
if from_logits:
|
38 |
+
y_pred = K.softmax(y_pred)
|
39 |
+
|
40 |
+
# Clip the prediction value to prevent NaN's and Inf's
|
41 |
+
#epsilon = K.epsilon()
|
42 |
+
#y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
|
43 |
+
y_pred = K.maximum(K.minimum(y_pred, 1 - 1e-15), 1e-15)
|
44 |
+
|
45 |
+
# Calculate Cross Entropy
|
46 |
+
cross_entropy = -y_true * K.log(y_pred)
|
47 |
+
|
48 |
+
# Calculate Focal Loss
|
49 |
+
softmax_focal_loss = K.mean(alpha * K.pow(1 - y_pred, gamma) * cross_entropy, axis=-1)
|
50 |
+
return softmax_focal_loss
|
51 |
+
|
52 |
+
|
53 |
+
class WeightedSparseCategoricalCrossEntropy(object):
|
54 |
+
def __init__(self, weights, from_logits=False):
|
55 |
+
self.weights = np.array(weights).astype('float32')
|
56 |
+
self.from_logits = from_logits
|
57 |
+
self.__name__ = 'weighted_sparse_categorical_crossentropy'
|
58 |
+
|
59 |
+
def __call__(self, y_true, y_pred):
|
60 |
+
return self.weighted_sparse_categorical_crossentropy(y_true, y_pred)
|
61 |
+
|
62 |
+
def weighted_sparse_categorical_crossentropy(self, y_true, y_pred):
|
63 |
+
num_classes = len(self.weights)
|
64 |
+
y_true = K.one_hot(tf.cast(y_true[..., 0], tf.int32), num_classes)
|
65 |
+
if self.from_logits:
|
66 |
+
y_pred = K.softmax(y_pred)
|
67 |
+
|
68 |
+
log_pred = K.log(y_pred)
|
69 |
+
unweighted_losses = -K.sum(y_true*log_pred, axis=-1)
|
70 |
+
|
71 |
+
weights = K.sum(K.constant(self.weights) * y_true, axis=-1)
|
72 |
+
weighted_losses = unweighted_losses * weights
|
73 |
+
return weighted_losses
|
74 |
+
|
models/deeplab/deeplabv3p/metrics.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
from tensorflow.keras import backend as K
|
7 |
+
import tensorflow as tf
|
8 |
+
|
9 |
+
|
10 |
+
def mIOU(gt, preds):
|
11 |
+
ulabels = np.unique(gt)
|
12 |
+
iou = np.zeros(len(ulabels))
|
13 |
+
for k, u in enumerate(ulabels):
|
14 |
+
inter = (gt == u) & (preds==u)
|
15 |
+
union = (gt == u) | (preds==u)
|
16 |
+
iou[k] = inter.sum()/union.sum()
|
17 |
+
return np.round(iou.mean(), 2)
|
18 |
+
|
19 |
+
|
20 |
+
def sparse_accuracy_ignoring_last_label(y_true, y_pred):
|
21 |
+
nb_classes = y_pred.shape.as_list()[-1]
|
22 |
+
y_pred = K.reshape(y_pred, (-1, nb_classes))
|
23 |
+
y_true = tf.cast(K.flatten(y_true), tf.int64)
|
24 |
+
legal_labels = ~K.equal(y_true, nb_classes)
|
25 |
+
return K.sum(tf.cast(legal_labels & K.equal(y_true,
|
26 |
+
K.argmax(y_pred, axis=-1)), tf.float32)) / K.sum(tf.cast(legal_labels, tf.float32))
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
def Jaccard(y_true, y_pred):
|
31 |
+
nb_classes = y_pred.shape.as_list()[-1]
|
32 |
+
iou = []
|
33 |
+
pred_pixels = K.argmax(y_pred, axis=-1)
|
34 |
+
for i in range(0, nb_classes+1):
|
35 |
+
true_labels = K.equal(y_true[:,:,0], i)
|
36 |
+
pred_labels = K.equal(pred_pixels, i)
|
37 |
+
inter = tf.cast(true_labels & pred_labels, tf.int32)
|
38 |
+
union = tf.cast(true_labels | pred_labels, tf.int32)
|
39 |
+
legal_batches = K.sum(tf.cast(true_labels, tf.int32), axis=1)>0
|
40 |
+
ious = K.sum(inter, axis=1)/K.sum(union, axis=1)
|
41 |
+
iou.append(K.mean(ious[legal_batches]))
|
42 |
+
iou = tf.stack(iou)
|
43 |
+
legal_labels = ~tf.math.is_nan(iou)
|
44 |
+
iou = iou[legal_labels]
|
45 |
+
return K.mean(iou)
|
46 |
+
|
models/deeplab/deeplabv3p/model.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
create deeplabv3p models
|
5 |
+
"""
|
6 |
+
from functools import partial
|
7 |
+
from tensorflow.keras.layers import Conv2D, Reshape, Activation, Softmax, Lambda, Input
|
8 |
+
from tensorflow.keras.models import Model
|
9 |
+
|
10 |
+
from deeplabv3p.models.deeplabv3p_xception import Deeplabv3pXception
|
11 |
+
from deeplabv3p.models.deeplabv3p_mobilenetv2 import Deeplabv3pMobileNetV2, Deeplabv3pLiteMobileNetV2
|
12 |
+
from deeplabv3p.models.deeplabv3p_mobilenetv3 import Deeplabv3pMobileNetV3Large, Deeplabv3pLiteMobileNetV3Large, Deeplabv3pMobileNetV3Small, Deeplabv3pLiteMobileNetV3Small
|
13 |
+
from deeplabv3p.models.deeplabv3p_peleenet import Deeplabv3pPeleeNet, Deeplabv3pLitePeleeNet
|
14 |
+
from deeplabv3p.models.deeplabv3p_resnet50 import Deeplabv3pResNet50
|
15 |
+
from deeplabv3p.models.layers import DeeplabConv2D, Subpixel, img_resize
|
16 |
+
|
17 |
+
#
|
18 |
+
# A map of model type to construction function for DeepLabv3+
|
19 |
+
#
|
20 |
+
deeplab_model_map = {
|
21 |
+
'mobilenetv2': partial(Deeplabv3pMobileNetV2, alpha=1.0),
|
22 |
+
'mobilenetv2_lite': partial(Deeplabv3pLiteMobileNetV2, alpha=1.0),
|
23 |
+
|
24 |
+
'mobilenetv3large': partial(Deeplabv3pMobileNetV3Large, alpha=1.0),
|
25 |
+
'mobilenetv3large_lite': partial(Deeplabv3pLiteMobileNetV3Large, alpha=1.0),
|
26 |
+
|
27 |
+
'mobilenetv3small': partial(Deeplabv3pMobileNetV3Small, alpha=1.0),
|
28 |
+
'mobilenetv3small_lite': partial(Deeplabv3pLiteMobileNetV3Small, alpha=1.0),
|
29 |
+
|
30 |
+
'peleenet': Deeplabv3pPeleeNet,
|
31 |
+
'peleenet_lite': Deeplabv3pLitePeleeNet,
|
32 |
+
|
33 |
+
'xception': Deeplabv3pXception,
|
34 |
+
'resnet50': Deeplabv3pResNet50,
|
35 |
+
}
|
36 |
+
|
37 |
+
|
38 |
+
def get_deeplabv3p_model(model_type, num_classes, model_input_shape, output_stride, freeze_level=0, weights_path=None, training=True, use_subpixel=False):
|
39 |
+
# check if model type is valid
|
40 |
+
if model_type not in deeplab_model_map.keys():
|
41 |
+
raise ValueError('This model type is not supported now')
|
42 |
+
|
43 |
+
model_function = deeplab_model_map[model_type]
|
44 |
+
|
45 |
+
input_tensor = Input(shape=model_input_shape + (3,), name='image_input')
|
46 |
+
model, backbone_len = model_function(input_tensor=input_tensor,
|
47 |
+
input_shape=model_input_shape + (3,),
|
48 |
+
#weights='imagenet',
|
49 |
+
num_classes=21,
|
50 |
+
OS=output_stride)
|
51 |
+
|
52 |
+
base_model = Model(model.input, model.layers[-5].output)
|
53 |
+
print('backbone layers number: {}'.format(backbone_len))
|
54 |
+
|
55 |
+
if use_subpixel:
|
56 |
+
if model_type == 'xception':
|
57 |
+
scale = 4
|
58 |
+
else:
|
59 |
+
scale = 8
|
60 |
+
x = Subpixel(num_classes, 1, scale, padding='same')(base_model.output)
|
61 |
+
else:
|
62 |
+
x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='conv_upsample')(base_model.output)
|
63 |
+
x = Lambda(img_resize, arguments={'size': (model_input_shape[0], model_input_shape[1])}, name='pred_resize')(x)
|
64 |
+
|
65 |
+
# for training model, we need to flatten mask to calculate loss
|
66 |
+
if training:
|
67 |
+
x = Reshape((model_input_shape[0]*model_input_shape[1], num_classes)) (x)
|
68 |
+
|
69 |
+
x = Softmax(name='pred_mask')(x)
|
70 |
+
model = Model(base_model.input, x, name='deeplabv3p_'+model_type)
|
71 |
+
|
72 |
+
#if use_subpixel:
|
73 |
+
# Do ICNR
|
74 |
+
#for layer in model.layers:
|
75 |
+
#if type(layer) == Subpixel:
|
76 |
+
#c, b = layer.get_weights()
|
77 |
+
#w = icnr_weights(scale=scale, shape=c.shape)
|
78 |
+
#layer.set_weights([w, b])
|
79 |
+
|
80 |
+
if weights_path:
|
81 |
+
model.load_weights(weights_path, by_name=False)#, skip_mismatch=True)
|
82 |
+
print('Load weights {}.'.format(weights_path))
|
83 |
+
|
84 |
+
if freeze_level in [1, 2]:
|
85 |
+
# Freeze the backbone part or freeze all but final feature map & input layers.
|
86 |
+
num = (backbone_len, len(base_model.layers))[freeze_level-1]
|
87 |
+
for i in range(num): model.layers[i].trainable = False
|
88 |
+
print('Freeze the first {} layers of total {} layers.'.format(num, len(model.layers)))
|
89 |
+
elif freeze_level == 0:
|
90 |
+
# Unfreeze all layers.
|
91 |
+
for i in range(len(model.layers)):
|
92 |
+
model.layers[i].trainable= True
|
93 |
+
print('Unfreeze all of the layers.')
|
94 |
+
|
95 |
+
return model
|
96 |
+
|
models/deeplab/deeplabv3p/models/__pycache__/deeplabv3p_mobilenetv3.cpython-311.pyc
ADDED
Binary file (35.4 kB). View file
|
|
models/deeplab/deeplabv3p/models/__pycache__/layers.cpython-311.pyc
ADDED
Binary file (16.6 kB). View file
|
|
models/deeplab/deeplabv3p/models/deeplabv3p_mobilenetv2.py
ADDED
@@ -0,0 +1,349 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
""" Deeplabv3+ MobileNetV2 model for Keras.
|
4 |
+
|
5 |
+
# Reference Paper:
|
6 |
+
- [Encoder-Decoder with Atrous Separable Convolution
|
7 |
+
for Semantic Image Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
|
8 |
+
- [Inverted Residuals and Linear Bottlenecks: Mobile Networks for
|
9 |
+
Classification, Detection and Segmentation](https://arxiv.org/abs/1801.04381)
|
10 |
+
"""
|
11 |
+
from tensorflow.keras.models import Model
|
12 |
+
from tensorflow.keras.activations import relu
|
13 |
+
from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, ZeroPadding2D, Lambda, AveragePooling2D, Input, Concatenate, Add, Reshape, BatchNormalization, Dropout, ReLU, Softmax
|
14 |
+
from tensorflow.keras.utils import get_source_inputs, get_file
|
15 |
+
#from tensorflow.keras import backend as K
|
16 |
+
|
17 |
+
from deeplabv3p.models.layers import DeeplabConv2D, DeeplabDepthwiseConv2D, CustomBatchNormalization, ASPP_block, ASPP_Lite_block, Decoder_block, normalize, img_resize
|
18 |
+
|
19 |
+
BACKBONE_WEIGHT_PATH = ('https://github.com/JonathanCMitchell/mobilenet_v2_keras/'
|
20 |
+
'releases/download/v1.1/')
|
21 |
+
|
22 |
+
WEIGHTS_PATH_MOBILE = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.1/deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5"
|
23 |
+
|
24 |
+
|
25 |
+
def _make_divisible(v, divisor, min_value=None):
|
26 |
+
if min_value is None:
|
27 |
+
min_value = divisor
|
28 |
+
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
29 |
+
# Make sure that round down does not go down by more than 10%.
|
30 |
+
if new_v < 0.9 * v:
|
31 |
+
new_v += divisor
|
32 |
+
return new_v
|
33 |
+
|
34 |
+
|
35 |
+
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id, skip_connection, rate=1):
|
36 |
+
#in_channels = inputs._keras_shape[-1]
|
37 |
+
in_channels = inputs.shape.as_list()[-1]
|
38 |
+
pointwise_conv_filters = int(filters * alpha)
|
39 |
+
pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
|
40 |
+
x = inputs
|
41 |
+
prefix = 'expanded_conv_{}_'.format(block_id)
|
42 |
+
if block_id:
|
43 |
+
# Expand
|
44 |
+
x = DeeplabConv2D(expansion * in_channels, kernel_size=1, padding='same',
|
45 |
+
use_bias=False, activation=None,
|
46 |
+
name=prefix + 'expand')(x)
|
47 |
+
x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999,
|
48 |
+
name=prefix + 'expand_BN')(x)
|
49 |
+
x = ReLU(max_value=6.)(x)
|
50 |
+
else:
|
51 |
+
prefix = 'expanded_conv_'
|
52 |
+
# Depthwise
|
53 |
+
x = DeeplabDepthwiseConv2D(kernel_size=3, strides=stride, activation=None,
|
54 |
+
use_bias=False, padding='same', dilation_rate=(rate, rate),
|
55 |
+
name=prefix + 'depthwise')(x)
|
56 |
+
x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999,
|
57 |
+
name=prefix + 'depthwise_BN')(x)
|
58 |
+
x = ReLU(max_value=6., name=prefix + 'depthwise_relu')(x)
|
59 |
+
|
60 |
+
x = DeeplabConv2D(pointwise_filters,
|
61 |
+
kernel_size=1, padding='same', use_bias=False, activation=None,
|
62 |
+
name=prefix + 'project')(x)
|
63 |
+
x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999,
|
64 |
+
name=prefix + 'project_BN')(x)
|
65 |
+
|
66 |
+
if skip_connection:
|
67 |
+
return Add(name=prefix + 'add')([inputs, x])
|
68 |
+
# if in_channels == pointwise_filters and stride == 1:
|
69 |
+
# return Add(name='res_connect_' + str(block_id))([inputs, x])
|
70 |
+
|
71 |
+
return x
|
72 |
+
|
73 |
+
|
74 |
+
def MobileNetV2_body(input_tensor, OS, alpha, weights='imagenet'):
|
75 |
+
"""
|
76 |
+
Modified MobileNetV2 feature extractor body
|
77 |
+
with specified output stride and skip level feature
|
78 |
+
"""
|
79 |
+
if OS == 8:
|
80 |
+
origin_os16_stride = 1
|
81 |
+
origin_os16_block_rate = 2
|
82 |
+
origin_os32_stride = 1
|
83 |
+
origin_os32_block_rate = 4
|
84 |
+
elif OS == 16:
|
85 |
+
origin_os16_stride = 2
|
86 |
+
origin_os16_block_rate = 1
|
87 |
+
origin_os32_stride = 1
|
88 |
+
origin_os32_block_rate = 2
|
89 |
+
elif OS == 32:
|
90 |
+
origin_os16_stride = 2
|
91 |
+
origin_os16_block_rate = 1
|
92 |
+
origin_os32_stride = 2
|
93 |
+
origin_os32_block_rate = 1
|
94 |
+
else:
|
95 |
+
raise ValueError('invalid output stride', OS)
|
96 |
+
|
97 |
+
first_block_filters = _make_divisible(32 * alpha, 8)
|
98 |
+
x = DeeplabConv2D(first_block_filters,
|
99 |
+
kernel_size=3,
|
100 |
+
strides=(2, 2), padding='same',
|
101 |
+
use_bias=False, name='Conv')(input_tensor)
|
102 |
+
x = CustomBatchNormalization(
|
103 |
+
epsilon=1e-3, momentum=0.999, name='Conv_BN')(x)
|
104 |
+
x = ReLU(6.)(x)
|
105 |
+
|
106 |
+
x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1,
|
107 |
+
expansion=1, block_id=0, skip_connection=False)
|
108 |
+
|
109 |
+
x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2,
|
110 |
+
expansion=6, block_id=1, skip_connection=False)
|
111 |
+
x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1,
|
112 |
+
expansion=6, block_id=2, skip_connection=True)
|
113 |
+
# skip level feature, with output stride = 4
|
114 |
+
skip = x
|
115 |
+
|
116 |
+
x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2,
|
117 |
+
expansion=6, block_id=3, skip_connection=False)
|
118 |
+
x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
|
119 |
+
expansion=6, block_id=4, skip_connection=True)
|
120 |
+
x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
|
121 |
+
expansion=6, block_id=5, skip_connection=True)
|
122 |
+
|
123 |
+
# original output stride changes to 16 from here, so we start to control block stride and dilation rate
|
124 |
+
x = _inverted_res_block(x, filters=64, alpha=alpha, stride=origin_os16_stride, # origin: stride=2!
|
125 |
+
expansion=6, block_id=6, skip_connection=False)
|
126 |
+
x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=origin_os16_block_rate,
|
127 |
+
expansion=6, block_id=7, skip_connection=True)
|
128 |
+
x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=origin_os16_block_rate,
|
129 |
+
expansion=6, block_id=8, skip_connection=True)
|
130 |
+
x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=origin_os16_block_rate,
|
131 |
+
expansion=6, block_id=9, skip_connection=True)
|
132 |
+
|
133 |
+
x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=origin_os16_block_rate,
|
134 |
+
expansion=6, block_id=10, skip_connection=False)
|
135 |
+
x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=origin_os16_block_rate,
|
136 |
+
expansion=6, block_id=11, skip_connection=True)
|
137 |
+
x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=origin_os16_block_rate,
|
138 |
+
expansion=6, block_id=12, skip_connection=True)
|
139 |
+
|
140 |
+
# original output stride changes to 32 from here
|
141 |
+
x = _inverted_res_block(x, filters=160, alpha=alpha, stride=origin_os32_stride, rate=origin_os16_block_rate, # origin: stride=2!
|
142 |
+
expansion=6, block_id=13, skip_connection=False)
|
143 |
+
x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=origin_os32_block_rate,
|
144 |
+
expansion=6, block_id=14, skip_connection=True)
|
145 |
+
x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=origin_os32_block_rate,
|
146 |
+
expansion=6, block_id=15, skip_connection=True)
|
147 |
+
|
148 |
+
x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, rate=origin_os32_block_rate,
|
149 |
+
expansion=6, block_id=16, skip_connection=False)
|
150 |
+
# end of feature extractor
|
151 |
+
|
152 |
+
# expand the model structure to MobileNetV2 no top, so
|
153 |
+
# that we can load official imagenet pretrained weights
|
154 |
+
|
155 |
+
# no alpha applied to last conv as stated in the paper:
|
156 |
+
# if the width multiplier is greater than 1 we
|
157 |
+
# increase the number of output channels
|
158 |
+
if alpha > 1.0:
|
159 |
+
last_block_filters = _make_divisible(1280 * alpha, 8)
|
160 |
+
else:
|
161 |
+
last_block_filters = 1280
|
162 |
+
|
163 |
+
y = DeeplabConv2D(last_block_filters,
|
164 |
+
kernel_size=1,
|
165 |
+
use_bias=False,
|
166 |
+
name='Conv_1')(x)
|
167 |
+
y = CustomBatchNormalization(epsilon=1e-3,
|
168 |
+
momentum=0.999,
|
169 |
+
name='Conv_1_bn')(y)
|
170 |
+
y = ReLU(6., name='out_relu')(y)
|
171 |
+
|
172 |
+
# Ensure that the model takes into account
|
173 |
+
# any potential predecessors of `input_tensor`.
|
174 |
+
if input_tensor is not None:
|
175 |
+
inputs = get_source_inputs(input_tensor)
|
176 |
+
#else:
|
177 |
+
#inputs = img_input
|
178 |
+
|
179 |
+
# hardcode row=224
|
180 |
+
rows = 224
|
181 |
+
|
182 |
+
model = Model(inputs, y, name='mobilenetv2_%0.2f_%s' % (alpha, rows))
|
183 |
+
# Load weights.
|
184 |
+
if weights == 'imagenet':
|
185 |
+
model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
|
186 |
+
str(alpha) + '_' + str(rows) + '_no_top' + '.h5')
|
187 |
+
weight_path = BACKBONE_WEIGHT_PATH + model_name
|
188 |
+
weights_path = get_file(
|
189 |
+
model_name, weight_path, cache_subdir='models')
|
190 |
+
|
191 |
+
model.load_weights(weights_path)
|
192 |
+
|
193 |
+
backbone_len = len(model.layers) - 3
|
194 |
+
# need to return feature map and skip connection,
|
195 |
+
# not the whole "no top" model
|
196 |
+
return x, skip, backbone_len
|
197 |
+
|
198 |
+
|
199 |
+
def Deeplabv3pMobileNetV2(input_shape=(512, 512, 3),
|
200 |
+
alpha=1.0,
|
201 |
+
weights='imagenet',
|
202 |
+
input_tensor=None,
|
203 |
+
num_classes=21,
|
204 |
+
OS=8):
|
205 |
+
""" Instantiates the Deeplabv3+ MobileNetV2 architecture
|
206 |
+
# Arguments
|
207 |
+
input_shape: shape of input image. format HxWxC
|
208 |
+
PASCAL VOC model was trained on (512,512,3) images
|
209 |
+
alpha: controls the width of the MobileNetV2 network. This is known as the
|
210 |
+
width multiplier in the MobileNetV2 paper.
|
211 |
+
- If `alpha` < 1.0, proportionally decreases the number
|
212 |
+
of filters in each layer.
|
213 |
+
- If `alpha` > 1.0, proportionally increases the number
|
214 |
+
of filters in each layer.
|
215 |
+
- If `alpha` = 1, default number of filters from the paper
|
216 |
+
are used at each layer.
|
217 |
+
Used only for mobilenetv2 backbone
|
218 |
+
weights: pretrained weights type
|
219 |
+
- imagenet: pre-trained on Imagenet
|
220 |
+
- None : random initialization
|
221 |
+
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
222 |
+
to use as image input for the model.
|
223 |
+
num_classes: number of desired classes.
|
224 |
+
OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
|
225 |
+
|
226 |
+
# Returns
|
227 |
+
A Keras model instance.
|
228 |
+
"""
|
229 |
+
|
230 |
+
if not (weights in {'imagenet', None}):
|
231 |
+
raise ValueError('The `weights` argument should be either '
|
232 |
+
'`imagenet` (pre-trained on Imagenet) or '
|
233 |
+
'`None` (random initialization)')
|
234 |
+
if input_tensor is None:
|
235 |
+
img_input = Input(shape=input_shape, name='image_input')
|
236 |
+
else:
|
237 |
+
img_input = input_tensor
|
238 |
+
|
239 |
+
# normalize input image
|
240 |
+
img_norm = Lambda(normalize, name='input_normalize')(img_input)
|
241 |
+
|
242 |
+
# backbone body for feature extract
|
243 |
+
x, skip_feature, backbone_len = MobileNetV2_body(img_norm, OS, alpha, weights=weights)
|
244 |
+
|
245 |
+
# ASPP block
|
246 |
+
x = ASPP_block(x, OS)
|
247 |
+
|
248 |
+
# Deeplabv3+ decoder for feature projection
|
249 |
+
x = Decoder_block(x, skip_feature)
|
250 |
+
|
251 |
+
# Final prediction conv block
|
252 |
+
x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
|
253 |
+
x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
|
254 |
+
x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
|
255 |
+
x = Softmax(name='Predictions/Softmax')(x)
|
256 |
+
|
257 |
+
|
258 |
+
# Ensure that the model takes into account
|
259 |
+
# any potential predecessors of `input_tensor`.
|
260 |
+
#if input_tensor is not None:
|
261 |
+
#inputs = get_source_inputs(input_tensor)
|
262 |
+
#else:
|
263 |
+
#inputs = img_input
|
264 |
+
|
265 |
+
model = Model(img_input, x, name='deeplabv3p_mobilenetv2')
|
266 |
+
|
267 |
+
return model, backbone_len
|
268 |
+
|
269 |
+
|
270 |
+
def Deeplabv3pLiteMobileNetV2(input_shape=(512, 512, 3),
|
271 |
+
alpha=1.0,
|
272 |
+
weights='imagenet',
|
273 |
+
input_tensor=None,
|
274 |
+
num_classes=21,
|
275 |
+
OS=8):
|
276 |
+
""" Instantiates the Deeplabv3+ MobileNetV2Lite architecture
|
277 |
+
# Arguments
|
278 |
+
input_shape: shape of input image. format HxWxC
|
279 |
+
PASCAL VOC model was trained on (512,512,3) images
|
280 |
+
alpha: controls the width of the MobileNetV2 network. This is known as the
|
281 |
+
width multiplier in the MobileNetV2 paper.
|
282 |
+
- If `alpha` < 1.0, proportionally decreases the number
|
283 |
+
of filters in each layer.
|
284 |
+
- If `alpha` > 1.0, proportionally increases the number
|
285 |
+
of filters in each layer.
|
286 |
+
- If `alpha` = 1, default number of filters from the paper
|
287 |
+
are used at each layer.
|
288 |
+
Used only for mobilenetv2 backbone
|
289 |
+
weights: pretrained weights type
|
290 |
+
- pascalvoc : pre-trained on PASCAL VOC
|
291 |
+
- imagenet: pre-trained on Imagenet
|
292 |
+
- None : random initialization
|
293 |
+
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
294 |
+
to use as image input for the model.
|
295 |
+
num_classes: number of desired classes.
|
296 |
+
OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
|
297 |
+
|
298 |
+
# Returns
|
299 |
+
A Keras model instance.
|
300 |
+
# Raises
|
301 |
+
RuntimeError: If attempting to run this model with a
|
302 |
+
backend that does not support separable convolutions.
|
303 |
+
ValueError: in case of invalid argument for `weights` or `backbone`
|
304 |
+
"""
|
305 |
+
|
306 |
+
if not (weights in {'pascalvoc', 'imagenet', None}):
|
307 |
+
raise ValueError('The `weights` argument should be either '
|
308 |
+
'`pascalvoc` (pre-trained on PASCAL VOC) '
|
309 |
+
'`imagenet` (pre-trained on Imagenet) or '
|
310 |
+
'`None` (random initialization)')
|
311 |
+
|
312 |
+
if input_tensor is None:
|
313 |
+
img_input = Input(shape=input_shape, name='image_input')
|
314 |
+
else:
|
315 |
+
img_input = input_tensor
|
316 |
+
|
317 |
+
# normalize input image
|
318 |
+
img_norm = Lambda(normalize, name='input_normalize')(img_input)
|
319 |
+
|
320 |
+
# backbone body for feature extract
|
321 |
+
x, _, backbone_len = MobileNetV2_body(img_norm, OS, alpha, weights=weights)
|
322 |
+
|
323 |
+
# use ASPP Lite block & no decode block
|
324 |
+
x = ASPP_Lite_block(x)
|
325 |
+
|
326 |
+
# Final prediction conv block
|
327 |
+
x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
|
328 |
+
x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
|
329 |
+
x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
|
330 |
+
x = Softmax(name='Predictions/Softmax')(x)
|
331 |
+
|
332 |
+
|
333 |
+
# Ensure that the model takes into account
|
334 |
+
# any potential predecessors of `input_tensor`.
|
335 |
+
#if input_tensor is not None:
|
336 |
+
#inputs = get_source_inputs(input_tensor)
|
337 |
+
#else:
|
338 |
+
#inputs = img_input
|
339 |
+
|
340 |
+
model = Model(img_input, x, name='deeplabv3p_mobilenetv2_lite')
|
341 |
+
|
342 |
+
# load weights
|
343 |
+
if weights == 'pascalvoc':
|
344 |
+
weights_path = get_file('deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5',
|
345 |
+
WEIGHTS_PATH_MOBILE,
|
346 |
+
cache_subdir='models')
|
347 |
+
model.load_weights(weights_path, by_name=True)
|
348 |
+
return model, backbone_len
|
349 |
+
|
models/deeplab/deeplabv3p/models/deeplabv3p_mobilenetv3.py
ADDED
@@ -0,0 +1,912 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
#!/usr/bin/env python3
|
3 |
+
# -*- coding: utf-8 -*-
|
4 |
+
""" Deeplabv3+ MobileNetV3(Large/Small) model for Keras.
|
5 |
+
|
6 |
+
# Reference Paper:
|
7 |
+
- [Encoder-Decoder with Atrous Separable Convolution
|
8 |
+
for Semantic Image Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
|
9 |
+
- [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf) (ICCV 2019)
|
10 |
+
"""
|
11 |
+
import os, sys
|
12 |
+
import warnings
|
13 |
+
|
14 |
+
from keras_applications.imagenet_utils import _obtain_input_shape
|
15 |
+
from keras_applications.imagenet_utils import preprocess_input as _preprocess_input
|
16 |
+
from tensorflow.keras.utils import get_source_inputs, get_file
|
17 |
+
from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, Dense, GlobalAveragePooling2D, GlobalMaxPooling2D, Flatten, Softmax, Dropout, ZeroPadding2D
|
18 |
+
from tensorflow.keras.layers import BatchNormalization, Add, Multiply, Reshape
|
19 |
+
from tensorflow.keras.layers import Input, Activation, ReLU, Reshape, Lambda
|
20 |
+
from tensorflow.keras.models import Model
|
21 |
+
from tensorflow.keras import backend as K
|
22 |
+
|
23 |
+
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..'))
|
24 |
+
from deeplabv3p.models.layers import DeeplabConv2D, DeeplabDepthwiseConv2D, CustomBatchNormalization, ASPP_block, ASPP_Lite_block, Decoder_block, normalize, img_resize
|
25 |
+
|
26 |
+
|
27 |
+
BASE_WEIGHT_PATH = ('https://github.com/DrSlink/mobilenet_v3_keras/'
|
28 |
+
'releases/download/v1.0/')
|
29 |
+
WEIGHTS_HASHES = {
|
30 |
+
'large_224_0.75_float': (
|
31 |
+
'765b44a33ad4005b3ac83185abf1d0eb',
|
32 |
+
'c256439950195a46c97ede7c294261c6'),
|
33 |
+
'large_224_1.0_float': (
|
34 |
+
'59e551e166be033d707958cf9e29a6a7',
|
35 |
+
'12c0a8442d84beebe8552addf0dcb950'),
|
36 |
+
'large_minimalistic_224_1.0_float': (
|
37 |
+
'675e7b876c45c57e9e63e6d90a36599c',
|
38 |
+
'c1cddbcde6e26b60bdce8e6e2c7cae54'),
|
39 |
+
'small_224_0.75_float': (
|
40 |
+
'cb65d4e5be93758266aa0a7f2c6708b7',
|
41 |
+
'c944bb457ad52d1594392200b48b4ddb'),
|
42 |
+
'small_224_1.0_float': (
|
43 |
+
'8768d4c2e7dee89b9d02b2d03d65d862',
|
44 |
+
'5bec671f47565ab30e540c257bba8591'),
|
45 |
+
'small_minimalistic_224_1.0_float': (
|
46 |
+
'99cd97fb2fcdad2bf028eb838de69e37',
|
47 |
+
'1efbf7e822e03f250f45faa3c6bbe156'),
|
48 |
+
}
|
49 |
+
|
50 |
+
|
51 |
+
def correct_pad(backend, inputs, kernel_size):
|
52 |
+
"""Returns a tuple for zero-padding for 2D convolution with downsampling.
|
53 |
+
# Arguments
|
54 |
+
input_size: An integer or tuple/list of 2 integers.
|
55 |
+
kernel_size: An integer or tuple/list of 2 integers.
|
56 |
+
# Returns
|
57 |
+
A tuple.
|
58 |
+
"""
|
59 |
+
img_dim = 2 if backend.image_data_format() == 'channels_first' else 1
|
60 |
+
input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)]
|
61 |
+
|
62 |
+
if isinstance(kernel_size, int):
|
63 |
+
kernel_size = (kernel_size, kernel_size)
|
64 |
+
|
65 |
+
if input_size[0] is None:
|
66 |
+
adjust = (1, 1)
|
67 |
+
else:
|
68 |
+
adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
|
69 |
+
|
70 |
+
correct = (kernel_size[0] // 2, kernel_size[1] // 2)
|
71 |
+
|
72 |
+
return ((correct[0] - adjust[0], correct[0]),
|
73 |
+
(correct[1] - adjust[1], correct[1]))
|
74 |
+
|
75 |
+
|
76 |
+
def preprocess_input(x):
|
77 |
+
"""
|
78 |
+
"mode" option description in preprocess_input
|
79 |
+
mode: One of "caffe", "tf" or "torch".
|
80 |
+
- caffe: will convert the images from RGB to BGR,
|
81 |
+
then will zero-center each color channel with
|
82 |
+
respect to the ImageNet dataset,
|
83 |
+
without scaling.
|
84 |
+
- tf: will scale pixels between -1 and 1,
|
85 |
+
sample-wise.
|
86 |
+
- torch: will scale pixels between 0 and 1 and then
|
87 |
+
will normalize each channel with respect to the
|
88 |
+
ImageNet dataset.
|
89 |
+
"""
|
90 |
+
x = _preprocess_input(x, mode='tf', backend=K)
|
91 |
+
#x /= 255.
|
92 |
+
#mean = [0.485, 0.456, 0.406]
|
93 |
+
#std = [0.229, 0.224, 0.225]
|
94 |
+
|
95 |
+
#x[..., 0] -= mean[0]
|
96 |
+
#x[..., 1] -= mean[1]
|
97 |
+
#x[..., 2] -= mean[2]
|
98 |
+
#if std is not None:
|
99 |
+
#x[..., 0] /= std[0]
|
100 |
+
#x[..., 1] /= std[1]
|
101 |
+
#x[..., 2] /= std[2]
|
102 |
+
|
103 |
+
return x
|
104 |
+
|
105 |
+
|
106 |
+
def relu(x):
|
107 |
+
return ReLU()(x)
|
108 |
+
|
109 |
+
|
110 |
+
def hard_sigmoid(x):
|
111 |
+
return ReLU(6.)(x + 3.) * (1. / 6.)
|
112 |
+
|
113 |
+
|
114 |
+
def hard_swish(x):
|
115 |
+
return Multiply()([Activation(hard_sigmoid)(x), x])
|
116 |
+
|
117 |
+
|
118 |
+
# This function is taken from the original tf repo.
|
119 |
+
# It ensures that all layers have a channel number that is divisible by 8
|
120 |
+
# It can be seen here:
|
121 |
+
# https://github.com/tensorflow/models/blob/master/research/
|
122 |
+
# slim/nets/mobilenet/mobilenet.py
|
123 |
+
|
124 |
+
def _depth(v, divisor=8, min_value=None):
|
125 |
+
if min_value is None:
|
126 |
+
min_value = divisor
|
127 |
+
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
128 |
+
# Make sure that round down does not go down by more than 10%.
|
129 |
+
if new_v < 0.9 * v:
|
130 |
+
new_v += divisor
|
131 |
+
return new_v
|
132 |
+
|
133 |
+
|
134 |
+
def _se_block(inputs, filters, se_ratio, prefix):
|
135 |
+
x = GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')(inputs)
|
136 |
+
if K.image_data_format() == 'channels_first':
|
137 |
+
x = Reshape((filters, 1, 1))(x)
|
138 |
+
else:
|
139 |
+
x = Reshape((1, 1, filters))(x)
|
140 |
+
x = DeeplabConv2D(_depth(filters * se_ratio),
|
141 |
+
kernel_size=1,
|
142 |
+
padding='same',
|
143 |
+
name=prefix + 'squeeze_excite/Conv')(x)
|
144 |
+
x = ReLU(name=prefix + 'squeeze_excite/Relu')(x)
|
145 |
+
x = DeeplabConv2D(filters,
|
146 |
+
kernel_size=1,
|
147 |
+
padding='same',
|
148 |
+
name=prefix + 'squeeze_excite/Conv_1')(x)
|
149 |
+
x = Activation(hard_sigmoid)(x)
|
150 |
+
#if K.backend() == 'theano':
|
151 |
+
## For the Theano backend, we have to explicitly make
|
152 |
+
## the excitation weights broadcastable.
|
153 |
+
#x = Lambda(
|
154 |
+
#lambda br: K.pattern_broadcast(br, [True, True, True, False]),
|
155 |
+
#output_shape=lambda input_shape: input_shape,
|
156 |
+
#name=prefix + 'squeeze_excite/broadcast')(x)
|
157 |
+
x = Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x])
|
158 |
+
return x
|
159 |
+
|
160 |
+
|
161 |
+
def _inverted_res_block(x, expansion, filters, kernel_size, stride,
|
162 |
+
se_ratio, activation, block_id, skip_connection=False, rate=1):
|
163 |
+
channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
|
164 |
+
shortcut = x
|
165 |
+
prefix = 'expanded_conv/'
|
166 |
+
infilters = K.int_shape(x)[channel_axis]
|
167 |
+
if block_id:
|
168 |
+
# Expand
|
169 |
+
prefix = 'expanded_conv_{}/'.format(block_id)
|
170 |
+
x = DeeplabConv2D(_depth(infilters * expansion),
|
171 |
+
kernel_size=1,
|
172 |
+
padding='same',
|
173 |
+
use_bias=False,
|
174 |
+
name=prefix + 'expand')(x)
|
175 |
+
x = CustomBatchNormalization(axis=channel_axis,
|
176 |
+
epsilon=1e-3,
|
177 |
+
momentum=0.999,
|
178 |
+
name=prefix + 'expand/BatchNorm')(x)
|
179 |
+
x = Activation(activation)(x)
|
180 |
+
|
181 |
+
#if stride == 2:
|
182 |
+
#x = ZeroPadding2D(padding=correct_pad(K, x, kernel_size),
|
183 |
+
#name=prefix + 'depthwise/pad')(x)
|
184 |
+
x = DeeplabDepthwiseConv2D(kernel_size,
|
185 |
+
strides=stride,
|
186 |
+
padding='same',# if stride == 1 else 'valid',
|
187 |
+
dilation_rate=(rate, rate),
|
188 |
+
use_bias=False,
|
189 |
+
name=prefix + 'depthwise/Conv')(x)
|
190 |
+
x = CustomBatchNormalization(axis=channel_axis,
|
191 |
+
epsilon=1e-3,
|
192 |
+
momentum=0.999,
|
193 |
+
name=prefix + 'depthwise/BatchNorm')(x)
|
194 |
+
x = Activation(activation)(x)
|
195 |
+
|
196 |
+
if se_ratio:
|
197 |
+
x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix)
|
198 |
+
|
199 |
+
x = DeeplabConv2D(filters,
|
200 |
+
kernel_size=1,
|
201 |
+
padding='same',
|
202 |
+
use_bias=False,
|
203 |
+
name=prefix + 'project')(x)
|
204 |
+
x = CustomBatchNormalization(axis=channel_axis,
|
205 |
+
epsilon=1e-3,
|
206 |
+
momentum=0.999,
|
207 |
+
name=prefix + 'project/BatchNorm')(x)
|
208 |
+
|
209 |
+
#if stride == 1 and infilters == filters:
|
210 |
+
#x = Add(name=prefix + 'Add')([shortcut, x])
|
211 |
+
if skip_connection:
|
212 |
+
x = Add(name=prefix + 'Add')([shortcut, x])
|
213 |
+
return x
|
214 |
+
|
215 |
+
|
216 |
+
def MobileNetV3(stack_fn,
|
217 |
+
last_point_ch,
|
218 |
+
input_shape=None,
|
219 |
+
alpha=1.0,
|
220 |
+
model_type='large',
|
221 |
+
minimalistic=False,
|
222 |
+
include_top=True,
|
223 |
+
weights='imagenet',
|
224 |
+
input_tensor=None,
|
225 |
+
classes=1000,
|
226 |
+
pooling=None,
|
227 |
+
dropout_rate=0.2,
|
228 |
+
**kwargs):
|
229 |
+
"""Instantiates the MobileNetV3 architecture.
|
230 |
+
# Arguments
|
231 |
+
stack_fn: a function that returns output tensor for the
|
232 |
+
stacked residual blocks.
|
233 |
+
last_point_ch: number channels at the last layer (before top)
|
234 |
+
input_shape: optional shape tuple, to be specified if you would
|
235 |
+
like to use a model with an input img resolution that is not
|
236 |
+
(224, 224, 3).
|
237 |
+
It should have exactly 3 inputs channels (224, 224, 3).
|
238 |
+
You can also omit this option if you would like
|
239 |
+
to infer input_shape from an input_tensor.
|
240 |
+
If you choose to include both input_tensor and input_shape then
|
241 |
+
input_shape will be used if they match, if the shapes
|
242 |
+
do not match then we will throw an error.
|
243 |
+
E.g. `(160, 160, 3)` would be one valid value.
|
244 |
+
alpha: controls the width of the network. This is known as the
|
245 |
+
depth multiplier in the MobileNetV3 paper, but the name is kept for
|
246 |
+
consistency with MobileNetV1 in Keras.
|
247 |
+
- If `alpha` < 1.0, proportionally decreases the number
|
248 |
+
of filters in each layer.
|
249 |
+
- If `alpha` > 1.0, proportionally increases the number
|
250 |
+
of filters in each layer.
|
251 |
+
- If `alpha` = 1, default number of filters from the paper
|
252 |
+
are used at each layer.
|
253 |
+
model_type: MobileNetV3 is defined as two models: large and small. These
|
254 |
+
models are targeted at high and low resource use cases respectively.
|
255 |
+
minimalistic: In addition to large and small models this module also contains
|
256 |
+
so-called minimalistic models, these models have the same per-layer
|
257 |
+
dimensions characteristic as MobilenetV3 however, they don't utilize any
|
258 |
+
of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5
|
259 |
+
convolutions). While these models are less efficient on CPU, they are
|
260 |
+
much more performant on GPU/DSP.
|
261 |
+
include_top: whether to include the fully-connected
|
262 |
+
layer at the top of the network.
|
263 |
+
weights: one of `None` (random initialization),
|
264 |
+
'imagenet' (pre-training on ImageNet),
|
265 |
+
or the path to the weights file to be loaded.
|
266 |
+
input_tensor: optional Keras tensor (i.e. output of
|
267 |
+
`layers.Input()`)
|
268 |
+
to use as image input for the model.
|
269 |
+
classes: optional number of classes to classify images
|
270 |
+
into, only to be specified if `include_top` is True, and
|
271 |
+
if no `weights` argument is specified.
|
272 |
+
pooling: optional pooling mode for feature extraction
|
273 |
+
when `include_top` is `False`.
|
274 |
+
- `None` means that the output of the model will be
|
275 |
+
the 4D tensor output of the
|
276 |
+
last convolutional layer.
|
277 |
+
- `avg` means that global average pooling
|
278 |
+
will be applied to the output of the
|
279 |
+
last convolutional layer, and thus
|
280 |
+
the output of the model will be a 2D tensor.
|
281 |
+
- `max` means that global max pooling will
|
282 |
+
be applied.
|
283 |
+
dropout_rate: fraction of the input units to drop on the last layer
|
284 |
+
# Returns
|
285 |
+
A Keras model instance.
|
286 |
+
# Raises
|
287 |
+
ValueError: in case of invalid model type, argument for `weights`,
|
288 |
+
or invalid input shape when weights='imagenet'
|
289 |
+
"""
|
290 |
+
|
291 |
+
if not (weights in {'imagenet', None} or os.path.exists(weights)):
|
292 |
+
raise ValueError('The `weights` argument should be either '
|
293 |
+
'`None` (random initialization), `imagenet` '
|
294 |
+
'(pre-training on ImageNet), '
|
295 |
+
'or the path to the weights file to be loaded.')
|
296 |
+
|
297 |
+
if weights == 'imagenet' and include_top and classes != 1000:
|
298 |
+
raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
|
299 |
+
'as true, `classes` should be 1000')
|
300 |
+
|
301 |
+
# Determine proper input shape
|
302 |
+
input_shape = _obtain_input_shape(input_shape,
|
303 |
+
default_size=224,
|
304 |
+
min_size=32,
|
305 |
+
data_format=K.image_data_format(),
|
306 |
+
require_flatten=include_top,
|
307 |
+
weights=weights)
|
308 |
+
|
309 |
+
# If input_shape is None and input_tensor is None using standart shape
|
310 |
+
if input_shape is None and input_tensor is None:
|
311 |
+
input_shape = (None, None, 3)
|
312 |
+
|
313 |
+
if K.image_data_format() == 'channels_last':
|
314 |
+
row_axis, col_axis = (0, 1)
|
315 |
+
else:
|
316 |
+
row_axis, col_axis = (1, 2)
|
317 |
+
rows = input_shape[row_axis]
|
318 |
+
cols = input_shape[col_axis]
|
319 |
+
if rows and cols and (rows < 32 or cols < 32):
|
320 |
+
raise ValueError('Input size must be at least 32x32; got `input_shape=' +
|
321 |
+
str(input_shape) + '`')
|
322 |
+
if weights == 'imagenet':
|
323 |
+
if minimalistic is False and alpha not in [0.75, 1.0] \
|
324 |
+
or minimalistic is True and alpha != 1.0:
|
325 |
+
raise ValueError('If imagenet weights are being loaded, '
|
326 |
+
'alpha can be one of `0.75`, `1.0` for non minimalistic'
|
327 |
+
' or `1.0` for minimalistic only.')
|
328 |
+
|
329 |
+
if rows != cols or rows != 224:
|
330 |
+
warnings.warn('`input_shape` is undefined or non-square, '
|
331 |
+
'or `rows` is not 224.'
|
332 |
+
' Weights for input shape (224, 224) will be'
|
333 |
+
' loaded as the default.')
|
334 |
+
|
335 |
+
if input_tensor is None:
|
336 |
+
img_input = Input(shape=input_shape)
|
337 |
+
else:
|
338 |
+
#if not K.is_keras_tensor(input_tensor):
|
339 |
+
#img_input = Input(tensor=input_tensor, shape=input_shape)
|
340 |
+
#else:
|
341 |
+
#img_input = input_tensor
|
342 |
+
img_input = input_tensor
|
343 |
+
|
344 |
+
channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
|
345 |
+
|
346 |
+
if minimalistic:
|
347 |
+
kernel = 3
|
348 |
+
activation = relu
|
349 |
+
se_ratio = None
|
350 |
+
else:
|
351 |
+
kernel = 5
|
352 |
+
activation = hard_swish
|
353 |
+
se_ratio = 0.25
|
354 |
+
|
355 |
+
x = ZeroPadding2D(padding=correct_pad(K, img_input, 3),
|
356 |
+
name='Conv_pad')(img_input)
|
357 |
+
x = DeeplabConv2D(16,
|
358 |
+
kernel_size=3,
|
359 |
+
strides=(2, 2),
|
360 |
+
padding='valid',
|
361 |
+
use_bias=False,
|
362 |
+
name='Conv')(x)
|
363 |
+
x = CustomBatchNormalization(axis=channel_axis,
|
364 |
+
epsilon=1e-3,
|
365 |
+
momentum=0.999,
|
366 |
+
name='Conv/BatchNorm')(x)
|
367 |
+
x = Activation(activation)(x)
|
368 |
+
|
369 |
+
x, skip_feature = stack_fn(x, kernel, activation, se_ratio)
|
370 |
+
# keep end of the feature extrator as final feature map
|
371 |
+
final_feature = x
|
372 |
+
|
373 |
+
last_conv_ch = _depth(K.int_shape(x)[channel_axis] * 6)
|
374 |
+
|
375 |
+
# if the width multiplier is greater than 1 we
|
376 |
+
# increase the number of output channels
|
377 |
+
if alpha > 1.0:
|
378 |
+
last_point_ch = _depth(last_point_ch * alpha)
|
379 |
+
|
380 |
+
x = DeeplabConv2D(last_conv_ch,
|
381 |
+
kernel_size=1,
|
382 |
+
padding='same',
|
383 |
+
use_bias=False,
|
384 |
+
name='Conv_1')(x)
|
385 |
+
x = CustomBatchNormalization(axis=channel_axis,
|
386 |
+
epsilon=1e-3,
|
387 |
+
momentum=0.999,
|
388 |
+
name='Conv_1/BatchNorm')(x)
|
389 |
+
x = Activation(activation)(x)
|
390 |
+
|
391 |
+
if include_top:
|
392 |
+
x = GlobalAveragePooling2D()(x)
|
393 |
+
if channel_axis == 1:
|
394 |
+
x = Reshape((last_conv_ch, 1, 1))(x)
|
395 |
+
else:
|
396 |
+
x = Reshape((1, 1, last_conv_ch))(x)
|
397 |
+
x = DeeplabConv2D(last_point_ch,
|
398 |
+
kernel_size=1,
|
399 |
+
padding='same',
|
400 |
+
name='Conv_2')(x)
|
401 |
+
x = Activation(activation)(x)
|
402 |
+
if dropout_rate > 0:
|
403 |
+
x = Dropout(dropout_rate)(x)
|
404 |
+
x = DeeplabConv2D(classes,
|
405 |
+
kernel_size=1,
|
406 |
+
padding='same',
|
407 |
+
name='Logits')(x)
|
408 |
+
x = Flatten()(x)
|
409 |
+
x = Softmax(name='Predictions/Softmax')(x)
|
410 |
+
else:
|
411 |
+
if pooling == 'avg':
|
412 |
+
x = GlobalAveragePooling2D(name='avg_pool')(x)
|
413 |
+
elif pooling == 'max':
|
414 |
+
x = GlobalMaxPooling2D(name='max_pool')(x)
|
415 |
+
# Ensure that the model takes into account
|
416 |
+
# any potential predecessors of `input_tensor`.
|
417 |
+
if input_tensor is not None:
|
418 |
+
inputs = get_source_inputs(input_tensor)
|
419 |
+
else:
|
420 |
+
inputs = img_input
|
421 |
+
|
422 |
+
# Create model.
|
423 |
+
model = Model(inputs, x, name='MobilenetV3' + model_type)
|
424 |
+
|
425 |
+
# Load weights.
|
426 |
+
if weights == 'imagenet':
|
427 |
+
model_name = "{}{}_224_{}_float".format(
|
428 |
+
model_type, '_minimalistic' if minimalistic else '', str(alpha))
|
429 |
+
if include_top:
|
430 |
+
file_name = 'weights_mobilenet_v3_' + model_name + '.h5'
|
431 |
+
file_hash = WEIGHTS_HASHES[model_name][0]
|
432 |
+
else:
|
433 |
+
file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5'
|
434 |
+
file_hash = WEIGHTS_HASHES[model_name][1]
|
435 |
+
weights_path = get_file(file_name,
|
436 |
+
BASE_WEIGHT_PATH + file_name,
|
437 |
+
cache_subdir='models',
|
438 |
+
file_hash=file_hash)
|
439 |
+
model.load_weights(weights_path)
|
440 |
+
elif weights is not None:
|
441 |
+
model.load_weights(weights)
|
442 |
+
|
443 |
+
#return model
|
444 |
+
return final_feature, skip_feature, len(model.layers) - 3
|
445 |
+
|
446 |
+
|
447 |
+
|
448 |
+
def MobileNetV3Small(input_shape=None,
|
449 |
+
alpha=1.0,
|
450 |
+
OS=8,
|
451 |
+
minimalistic=False,
|
452 |
+
include_top=True,
|
453 |
+
weights='imagenet',
|
454 |
+
input_tensor=None,
|
455 |
+
classes=1000,
|
456 |
+
pooling=None,
|
457 |
+
dropout_rate=0.2,
|
458 |
+
**kwargs):
|
459 |
+
"""
|
460 |
+
Modified MobileNetV3Large feature extractor body
|
461 |
+
with specified output stride and skip level feature
|
462 |
+
"""
|
463 |
+
if OS == 8:
|
464 |
+
origin_os16_stride = 1
|
465 |
+
origin_os16_block_rate = 2
|
466 |
+
origin_os32_stride = 1
|
467 |
+
origin_os32_block_rate = 4
|
468 |
+
elif OS == 16:
|
469 |
+
origin_os16_stride = 2
|
470 |
+
origin_os16_block_rate = 1
|
471 |
+
origin_os32_stride = 1
|
472 |
+
origin_os32_block_rate = 2
|
473 |
+
elif OS == 32:
|
474 |
+
origin_os16_stride = 2
|
475 |
+
origin_os16_block_rate = 1
|
476 |
+
origin_os32_stride = 2
|
477 |
+
origin_os32_block_rate = 1
|
478 |
+
else:
|
479 |
+
raise ValueError('invalid output stride', OS)
|
480 |
+
|
481 |
+
def stack_fn(x, kernel, activation, se_ratio):
|
482 |
+
def depth(d):
|
483 |
+
return _depth(d * alpha)
|
484 |
+
|
485 |
+
x = _inverted_res_block(x, expansion=1, filters=depth(16), kernel_size=3,
|
486 |
+
stride=2, se_ratio=se_ratio, activation=relu, block_id=0, skip_connection=False)
|
487 |
+
# skip level feature, with output stride = 4
|
488 |
+
skip = x
|
489 |
+
|
490 |
+
x = _inverted_res_block(x, expansion=72. / 16, filters=depth(24), kernel_size=3,
|
491 |
+
stride=2, se_ratio=None, activation=relu, block_id=1, skip_connection=False)
|
492 |
+
x = _inverted_res_block(x, expansion=88. / 24, filters=depth(24), kernel_size=3,
|
493 |
+
stride=1, se_ratio=None, activation=relu, block_id=2, skip_connection=True)
|
494 |
+
|
495 |
+
# original output stride changes to 16 from here, so we start to control block stride and dilation rate
|
496 |
+
x = _inverted_res_block(x, expansion=4, filters=depth(40), kernel_size=kernel,
|
497 |
+
stride=origin_os16_stride, se_ratio=se_ratio, activation=activation, block_id=3, skip_connection=False) # origin: stride=2!
|
498 |
+
x = _inverted_res_block(x, expansion=6, filters=depth(40), kernel_size=kernel,
|
499 |
+
stride=1, se_ratio=se_ratio, activation=activation, block_id=4, skip_connection=True, rate=origin_os16_block_rate)
|
500 |
+
x = _inverted_res_block(x, expansion=6, filters=depth(40), kernel_size=kernel,
|
501 |
+
stride=1, se_ratio=se_ratio, activation=activation, block_id=5, skip_connection=True, rate=origin_os16_block_rate)
|
502 |
+
x = _inverted_res_block(x, expansion=3, filters=depth(48), kernel_size=kernel,
|
503 |
+
stride=1, se_ratio=se_ratio, activation=activation, block_id=6, skip_connection=False, rate=origin_os16_block_rate)
|
504 |
+
x = _inverted_res_block(x, expansion=3, filters=depth(48), kernel_size=kernel,
|
505 |
+
stride=1, se_ratio=se_ratio, activation=activation, block_id=7, skip_connection=True, rate=origin_os16_block_rate)
|
506 |
+
# original output stride changes to 32 from here
|
507 |
+
x = _inverted_res_block(x, expansion=6, filters=depth(96), kernel_size=kernel,
|
508 |
+
stride=origin_os32_stride, se_ratio=se_ratio, activation=activation, block_id=8, skip_connection=False, rate=origin_os16_block_rate) # origin: stride=2!
|
509 |
+
x = _inverted_res_block(x, expansion=6, filters=depth(96), kernel_size=kernel,
|
510 |
+
stride=1, se_ratio=se_ratio, activation=activation, block_id=9, skip_connection=True, rate=origin_os32_block_rate)
|
511 |
+
x = _inverted_res_block(x, expansion=6, filters=depth(96), kernel_size=kernel,
|
512 |
+
stride=1, se_ratio=se_ratio, activation=activation, block_id=10, skip_connection=True, rate=origin_os32_block_rate)
|
513 |
+
return x, skip
|
514 |
+
|
515 |
+
return MobileNetV3(stack_fn,
|
516 |
+
1024,
|
517 |
+
input_shape,
|
518 |
+
alpha,
|
519 |
+
'small',
|
520 |
+
minimalistic,
|
521 |
+
include_top,
|
522 |
+
weights,
|
523 |
+
input_tensor,
|
524 |
+
classes,
|
525 |
+
pooling,
|
526 |
+
dropout_rate,
|
527 |
+
**kwargs)
|
528 |
+
|
529 |
+
|
530 |
+
def MobileNetV3Large(input_shape=None,
|
531 |
+
alpha=1.0,
|
532 |
+
OS=8,
|
533 |
+
minimalistic=False,
|
534 |
+
include_top=True,
|
535 |
+
weights='imagenet',
|
536 |
+
input_tensor=None,
|
537 |
+
classes=1000,
|
538 |
+
pooling=None,
|
539 |
+
dropout_rate=0.2,
|
540 |
+
**kwargs):
|
541 |
+
"""
|
542 |
+
Modified MobileNetV3Large feature extractor body
|
543 |
+
with specified output stride and skip level feature
|
544 |
+
"""
|
545 |
+
if OS == 8:
|
546 |
+
origin_os16_stride = 1
|
547 |
+
origin_os16_block_rate = 2
|
548 |
+
origin_os32_stride = 1
|
549 |
+
origin_os32_block_rate = 4
|
550 |
+
elif OS == 16:
|
551 |
+
origin_os16_stride = 2
|
552 |
+
origin_os16_block_rate = 1
|
553 |
+
origin_os32_stride = 1
|
554 |
+
origin_os32_block_rate = 2
|
555 |
+
elif OS == 32:
|
556 |
+
origin_os16_stride = 2
|
557 |
+
origin_os16_block_rate = 1
|
558 |
+
origin_os32_stride = 2
|
559 |
+
origin_os32_block_rate = 1
|
560 |
+
else:
|
561 |
+
raise ValueError('invalid output stride', OS)
|
562 |
+
|
563 |
+
def stack_fn(x, kernel, activation, se_ratio):
|
564 |
+
def depth(d):
|
565 |
+
return _depth(d * alpha)
|
566 |
+
x = _inverted_res_block(x, expansion=1, filters=depth(16), kernel_size=3,
|
567 |
+
stride=1, se_ratio=None, activation=relu, block_id=0, skip_connection=True)
|
568 |
+
x = _inverted_res_block(x, expansion=4, filters=depth(24), kernel_size=3,
|
569 |
+
stride=2, se_ratio=None, activation=relu, block_id=1, skip_connection=False)
|
570 |
+
x = _inverted_res_block(x, expansion=3, filters=depth(24), kernel_size=3,
|
571 |
+
stride=1, se_ratio=None, activation=relu, block_id=2, skip_connection=True)
|
572 |
+
# skip level feature, with output stride = 4
|
573 |
+
skip = x
|
574 |
+
|
575 |
+
x = _inverted_res_block(x, expansion=3, filters=depth(40), kernel_size=kernel,
|
576 |
+
stride=2, se_ratio=se_ratio, activation=relu, block_id=3, skip_connection=False)
|
577 |
+
x = _inverted_res_block(x, expansion=3, filters=depth(40), kernel_size=kernel,
|
578 |
+
stride=1, se_ratio=se_ratio, activation=relu, block_id=4, skip_connection=True)
|
579 |
+
x = _inverted_res_block(x, expansion=3, filters=depth(40), kernel_size=kernel,
|
580 |
+
stride=1, se_ratio=se_ratio, activation=relu, block_id=5, skip_connection=True)
|
581 |
+
|
582 |
+
# original output stride changes to 16 from here, so we start to control block stride and dilation rate
|
583 |
+
x = _inverted_res_block(x, expansion=6, filters=depth(80), kernel_size=3,
|
584 |
+
stride=origin_os16_stride, se_ratio=None, activation=activation, block_id=6, skip_connection=False) # origin: stride=2!
|
585 |
+
x = _inverted_res_block(x, expansion=2.5, filters=depth(80), kernel_size=3,
|
586 |
+
stride=1, se_ratio=None, activation=activation, block_id=7, skip_connection=True, rate=origin_os16_block_rate)
|
587 |
+
x = _inverted_res_block(x, expansion=2.3, filters=depth(80), kernel_size=3,
|
588 |
+
stride=1, se_ratio=None, activation=activation, block_id=8, skip_connection=True, rate=origin_os16_block_rate)
|
589 |
+
x = _inverted_res_block(x, expansion=2.3, filters=depth(80), kernel_size=3,
|
590 |
+
stride=1, se_ratio=None, activation=activation, block_id=9, skip_connection=True, rate=origin_os16_block_rate)
|
591 |
+
x = _inverted_res_block(x, expansion=6, filters=depth(112), kernel_size=3,
|
592 |
+
stride=1, se_ratio=se_ratio, activation=activation, block_id=10, skip_connection=False, rate=origin_os16_block_rate)
|
593 |
+
x = _inverted_res_block(x, expansion=6, filters=depth(112), kernel_size=3,
|
594 |
+
stride=1, se_ratio=se_ratio, activation=activation, block_id=11, skip_connection=True, rate=origin_os16_block_rate)
|
595 |
+
# original output stride changes to 32 from here
|
596 |
+
x = _inverted_res_block(x, expansion=6, filters=depth(160), kernel_size=kernel,
|
597 |
+
stride=origin_os32_stride, se_ratio=se_ratio,
|
598 |
+
activation=activation, block_id=12, skip_connection=False, rate=origin_os16_block_rate) # origin: stride=2!
|
599 |
+
x = _inverted_res_block(x, expansion=6, filters=depth(160), kernel_size=kernel,
|
600 |
+
stride=1, se_ratio=se_ratio,
|
601 |
+
activation=activation, block_id=13, skip_connection=True, rate=origin_os32_block_rate)
|
602 |
+
x = _inverted_res_block(x, expansion=6, filters=depth(160), kernel_size=kernel,
|
603 |
+
stride=1, se_ratio=se_ratio,
|
604 |
+
activation=activation, block_id=14, skip_connection=True, rate=origin_os32_block_rate)
|
605 |
+
return x, skip
|
606 |
+
|
607 |
+
return MobileNetV3(stack_fn,
|
608 |
+
1280,
|
609 |
+
input_shape,
|
610 |
+
alpha,
|
611 |
+
'large',
|
612 |
+
minimalistic,
|
613 |
+
include_top,
|
614 |
+
weights,
|
615 |
+
input_tensor,
|
616 |
+
classes,
|
617 |
+
pooling,
|
618 |
+
dropout_rate,
|
619 |
+
**kwargs)
|
620 |
+
|
621 |
+
|
622 |
+
setattr(MobileNetV3Small, '__doc__', MobileNetV3.__doc__)
|
623 |
+
setattr(MobileNetV3Large, '__doc__', MobileNetV3.__doc__)
|
624 |
+
|
625 |
+
|
626 |
+
|
627 |
+
def Deeplabv3pMobileNetV3Large(input_shape=(512, 512, 3),
|
628 |
+
alpha=1.0,
|
629 |
+
weights='imagenet',
|
630 |
+
input_tensor=None,
|
631 |
+
num_classes=21,
|
632 |
+
OS=8):
|
633 |
+
""" Instantiates the Deeplabv3+ MobileNetV3Large architecture
|
634 |
+
# Arguments
|
635 |
+
input_shape: shape of input image. format HxWxC
|
636 |
+
PASCAL VOC model was trained on (512,512,3) images
|
637 |
+
alpha: controls the width of the MobileNetV3Large network. This is known as the
|
638 |
+
width multiplier in the MobileNetV3Large paper.
|
639 |
+
- If `alpha` < 1.0, proportionally decreases the number
|
640 |
+
of filters in each layer.
|
641 |
+
- If `alpha` > 1.0, proportionally increases the number
|
642 |
+
of filters in each layer.
|
643 |
+
- If `alpha` = 1, default number of filters from the paper
|
644 |
+
are used at each layer.
|
645 |
+
weights: pretrained weights type
|
646 |
+
- imagenet: pre-trained on Imagenet
|
647 |
+
- None : random initialization
|
648 |
+
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
649 |
+
to use as image input for the model.
|
650 |
+
num_classes: number of desired classes.
|
651 |
+
OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
|
652 |
+
|
653 |
+
# Returns
|
654 |
+
A Keras model instance.
|
655 |
+
"""
|
656 |
+
|
657 |
+
if not (weights in {'imagenet', None}):
|
658 |
+
raise ValueError('The `weights` argument should be either '
|
659 |
+
'`imagenet` (pre-trained on Imagenet) or '
|
660 |
+
'`None` (random initialization)')
|
661 |
+
|
662 |
+
if input_tensor is None:
|
663 |
+
img_input = Input(shape=input_shape, name='image_input')
|
664 |
+
else:
|
665 |
+
img_input = input_tensor
|
666 |
+
|
667 |
+
# normalize input image
|
668 |
+
img_norm = Lambda(normalize, name='input_normalize')(img_input)
|
669 |
+
|
670 |
+
# backbone body for feature extract
|
671 |
+
x, skip_feature, backbone_len = MobileNetV3Large(include_top=False, input_tensor=img_norm, weights=weights, OS=OS, alpha=alpha)
|
672 |
+
|
673 |
+
# ASPP block
|
674 |
+
x = ASPP_block(x, OS)
|
675 |
+
|
676 |
+
# Deeplabv3+ decoder for feature projection
|
677 |
+
x = Decoder_block(x, skip_feature)
|
678 |
+
|
679 |
+
# Final prediction conv block
|
680 |
+
x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
|
681 |
+
x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
|
682 |
+
x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
|
683 |
+
x = Softmax(name='Predictions/Softmax')(x)
|
684 |
+
|
685 |
+
# Ensure that the model takes into account
|
686 |
+
# any potential predecessors of `input_tensor`.
|
687 |
+
#if input_tensor is not None:
|
688 |
+
#inputs = get_source_inputs(input_tensor)
|
689 |
+
#else:
|
690 |
+
#inputs = img_input
|
691 |
+
model = Model(img_input, x, name='deeplabv3p_mobilenetv3large')
|
692 |
+
|
693 |
+
return model, backbone_len
|
694 |
+
|
695 |
+
|
696 |
+
def Deeplabv3pLiteMobileNetV3Large(input_shape=(512, 512, 3),
|
697 |
+
alpha=1.0,
|
698 |
+
weights='imagenet',
|
699 |
+
input_tensor=None,
|
700 |
+
num_classes=21,
|
701 |
+
OS=8):
|
702 |
+
""" Instantiates the Deeplabv3+ MobileNetV3LargeLite architecture
|
703 |
+
# Arguments
|
704 |
+
input_shape: shape of input image. format HxWxC
|
705 |
+
PASCAL VOC model was trained on (512,512,3) images
|
706 |
+
alpha: controls the width of the MobileNetV3Large network. This is known as the
|
707 |
+
width multiplier in the MobileNetV3Large paper.
|
708 |
+
- If `alpha` < 1.0, proportionally decreases the number
|
709 |
+
of filters in each layer.
|
710 |
+
- If `alpha` > 1.0, proportionally increases the number
|
711 |
+
of filters in each layer.
|
712 |
+
- If `alpha` = 1, default number of filters from the paper
|
713 |
+
are used at each layer.
|
714 |
+
weights: pretrained weights type
|
715 |
+
- imagenet: pre-trained on Imagenet
|
716 |
+
- None : random initialization
|
717 |
+
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
718 |
+
to use as image input for the model.
|
719 |
+
num_classes: number of desired classes.
|
720 |
+
OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
|
721 |
+
|
722 |
+
# Returns
|
723 |
+
A Keras model instance.
|
724 |
+
# Raises
|
725 |
+
RuntimeError: If attempting to run this model with a
|
726 |
+
backend that does not support separable convolutions.
|
727 |
+
ValueError: in case of invalid argument for `weights` or `backbone`
|
728 |
+
"""
|
729 |
+
if not (weights in {'imagenet', None}):
|
730 |
+
raise ValueError('The `weights` argument should be either '
|
731 |
+
'`imagenet` (pre-trained on Imagenet) or '
|
732 |
+
'`None` (random initialization)')
|
733 |
+
|
734 |
+
if input_tensor is None:
|
735 |
+
img_input = Input(shape=input_shape, name='image_input')
|
736 |
+
else:
|
737 |
+
img_input = input_tensor
|
738 |
+
|
739 |
+
# normalize input image
|
740 |
+
img_norm = Lambda(normalize, name='input_normalize')(img_input)
|
741 |
+
|
742 |
+
# backbone body for feature extract
|
743 |
+
x, _, backbone_len = MobileNetV3Large(include_top=False, input_tensor=img_norm, weights=weights, OS=OS, alpha=alpha)
|
744 |
+
|
745 |
+
# use ASPP Lite block & no decode block
|
746 |
+
x = ASPP_Lite_block(x)
|
747 |
+
|
748 |
+
# Final prediction conv block
|
749 |
+
x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
|
750 |
+
x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
|
751 |
+
x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
|
752 |
+
x = Softmax(name='Predictions/Softmax')(x)
|
753 |
+
|
754 |
+
# Ensure that the model takes into account
|
755 |
+
# any potential predecessors of `input_tensor`.
|
756 |
+
#if input_tensor is not None:
|
757 |
+
#inputs = get_source_inputs(input_tensor)
|
758 |
+
#else:
|
759 |
+
#inputs = img_input
|
760 |
+
model = Model(img_input, x, name='deeplabv3p_mobilenetv3large_lite')
|
761 |
+
|
762 |
+
return model, backbone_len
|
763 |
+
|
764 |
+
|
765 |
+
|
766 |
+
def Deeplabv3pMobileNetV3Small(input_shape=(512, 512, 3),
|
767 |
+
alpha=1.0,
|
768 |
+
weights='imagenet',
|
769 |
+
input_tensor=None,
|
770 |
+
num_classes=21,
|
771 |
+
OS=8):
|
772 |
+
""" Instantiates the Deeplabv3+ MobileNetV3Small architecture
|
773 |
+
# Arguments
|
774 |
+
input_shape: shape of input image. format HxWxC
|
775 |
+
PASCAL VOC model was trained on (512,512,3) images
|
776 |
+
alpha: controls the width of the MobileNetV3Small network. This is known as the
|
777 |
+
width multiplier in the MobileNetV2 paper.
|
778 |
+
- If `alpha` < 1.0, proportionally decreases the number
|
779 |
+
of filters in each layer.
|
780 |
+
- If `alpha` > 1.0, proportionally increases the number
|
781 |
+
of filters in each layer.
|
782 |
+
- If `alpha` = 1, default number of filters from the paper
|
783 |
+
are used at each layer.
|
784 |
+
weights: pretrained weights type
|
785 |
+
- imagenet: pre-trained on Imagenet
|
786 |
+
- None : random initialization
|
787 |
+
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
788 |
+
to use as image input for the model.
|
789 |
+
num_classes: number of desired classes
|
790 |
+
OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
|
791 |
+
|
792 |
+
# Returns
|
793 |
+
A Keras model instance.
|
794 |
+
"""
|
795 |
+
if not (weights in {'imagenet', None}):
|
796 |
+
raise ValueError('The `weights` argument should be either '
|
797 |
+
'`imagenet` (pre-trained on Imagenet) or '
|
798 |
+
'`None` (random initialization)')
|
799 |
+
|
800 |
+
if input_tensor is None:
|
801 |
+
img_input = Input(shape=input_shape, name='image_input')
|
802 |
+
else:
|
803 |
+
img_input = input_tensor
|
804 |
+
|
805 |
+
# normalize input image
|
806 |
+
img_norm = Lambda(normalize, name='input_normalize')(img_input)
|
807 |
+
|
808 |
+
# backbone body for feature extract
|
809 |
+
x, skip_feature, backbone_len = MobileNetV3Small(include_top=False, input_tensor=img_norm, weights=weights, OS=OS, alpha=alpha)
|
810 |
+
|
811 |
+
# ASPP block
|
812 |
+
x = ASPP_block(x, OS)
|
813 |
+
|
814 |
+
# Deeplabv3+ decoder for feature projection
|
815 |
+
x = Decoder_block(x, skip_feature)
|
816 |
+
|
817 |
+
# Final prediction conv block
|
818 |
+
x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
|
819 |
+
x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
|
820 |
+
x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
|
821 |
+
x = Softmax(name='Predictions/Softmax')(x)
|
822 |
+
|
823 |
+
# Ensure that the model takes into account
|
824 |
+
# any potential predecessors of `input_tensor`.
|
825 |
+
#if input_tensor is not None:
|
826 |
+
#inputs = get_source_inputs(input_tensor)
|
827 |
+
#else:
|
828 |
+
#inputs = img_input
|
829 |
+
model = Model(img_input, x, name='deeplabv3p_mobilenetv3small')
|
830 |
+
|
831 |
+
return model, backbone_len
|
832 |
+
|
833 |
+
|
834 |
+
|
835 |
+
def Deeplabv3pLiteMobileNetV3Small(input_shape=(512, 512, 3),
|
836 |
+
alpha=1.0,
|
837 |
+
weights='imagenet',
|
838 |
+
input_tensor=None,
|
839 |
+
num_classes=21,
|
840 |
+
OS=8):
|
841 |
+
""" Instantiates the Deeplabv3+ MobileNetV3SmallLite architecture
|
842 |
+
# Arguments
|
843 |
+
input_shape: shape of input image. format HxWxC
|
844 |
+
PASCAL VOC model was trained on (512,512,3) images
|
845 |
+
alpha: controls the width of the MobileNetV3Small network. This is known as the
|
846 |
+
width multiplier in the MobileNetV3Small paper.
|
847 |
+
- If `alpha` < 1.0, proportionally decreases the number
|
848 |
+
of filters in each layer.
|
849 |
+
- If `alpha` > 1.0, proportionally increases the number
|
850 |
+
of filters in each layer.
|
851 |
+
- If `alpha` = 1, default number of filters from the paper
|
852 |
+
are used at each layer.
|
853 |
+
weights: pretrained weights type
|
854 |
+
- imagenet: pre-trained on Imagenet
|
855 |
+
- None : random initialization
|
856 |
+
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
857 |
+
to use as image input for the model.
|
858 |
+
num_classes: number of desired classes.
|
859 |
+
OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
|
860 |
+
|
861 |
+
# Returns
|
862 |
+
A Keras model instance.
|
863 |
+
# Raises
|
864 |
+
RuntimeError: If attempting to run this model with a
|
865 |
+
backend that does not support separable convolutions.
|
866 |
+
ValueError: in case of invalid argument for `weights` or `backbone`
|
867 |
+
"""
|
868 |
+
if not (weights in {'imagenet', None}):
|
869 |
+
raise ValueError('The `weights` argument should be either '
|
870 |
+
'`imagenet` (pre-trained on Imagenet) or '
|
871 |
+
'`None` (random initialization)')
|
872 |
+
|
873 |
+
if input_tensor is None:
|
874 |
+
img_input = Input(shape=input_shape, name='image_input')
|
875 |
+
else:
|
876 |
+
img_input = input_tensor
|
877 |
+
|
878 |
+
# normalize input image
|
879 |
+
img_norm = Lambda(normalize, name='input_normalize')(img_input)
|
880 |
+
|
881 |
+
# backbone body for feature extract
|
882 |
+
x, _, backbone_len = MobileNetV3Small(include_top=False, input_tensor=img_norm, weights=weights, OS=OS, alpha=alpha)
|
883 |
+
|
884 |
+
# use ASPP Lite block & no decode block
|
885 |
+
x = ASPP_Lite_block(x)
|
886 |
+
|
887 |
+
# Final prediction conv block
|
888 |
+
x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
|
889 |
+
x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
|
890 |
+
x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
|
891 |
+
x = Softmax(name='Predictions/Softmax')(x)
|
892 |
+
|
893 |
+
# Ensure that the model takes into account
|
894 |
+
# any potential predecessors of `input_tensor`.
|
895 |
+
#if input_tensor is not None:
|
896 |
+
#inputs = get_source_inputs(input_tensor)
|
897 |
+
#else:
|
898 |
+
#inputs = img_input
|
899 |
+
model = Model(img_input, x, name='deeplabv3p_mobilenetv3small_lite')
|
900 |
+
|
901 |
+
return model, backbone_len
|
902 |
+
|
903 |
+
|
904 |
+
|
905 |
+
if __name__ == '__main__':
|
906 |
+
input_tensor = Input(shape=(512, 512, 3), name='image_input')
|
907 |
+
model, backbone_len = Deeplabv3pMobileNetV3Small(input_tensor=input_tensor,
|
908 |
+
alpha=1.0,
|
909 |
+
weights=None,
|
910 |
+
num_classes=21,
|
911 |
+
OS=8)
|
912 |
+
model.summary()
|
models/deeplab/deeplabv3p/models/deeplabv3p_peleenet.py
ADDED
@@ -0,0 +1,428 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
""" Deeplabv3+ PeleeNet model for Keras.
|
4 |
+
|
5 |
+
# Reference Paper:
|
6 |
+
- [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993.pdf)
|
7 |
+
- [Pelee: A Real-Time Object Detection System on Mobile Devices](https://arxiv.org/abs/1804.06882)
|
8 |
+
"""
|
9 |
+
import os, sys
|
10 |
+
import warnings
|
11 |
+
|
12 |
+
from keras_applications.imagenet_utils import _obtain_input_shape
|
13 |
+
from keras_applications.imagenet_utils import preprocess_input as _preprocess_input
|
14 |
+
from tensorflow.keras.utils import get_source_inputs, get_file
|
15 |
+
from tensorflow.keras.models import Model
|
16 |
+
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, ReLU, \
|
17 |
+
MaxPooling2D, Concatenate, AveragePooling2D, Flatten, Dropout, Dense, GlobalAveragePooling2D, GlobalMaxPooling2D, Softmax, Reshape, Lambda
|
18 |
+
from tensorflow.keras import backend as K
|
19 |
+
|
20 |
+
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..'))
|
21 |
+
from deeplabv3p.models.layers import DeeplabConv2D, CustomBatchNormalization, ASPP_block, ASPP_Lite_block, Decoder_block, normalize, img_resize
|
22 |
+
|
23 |
+
|
24 |
+
BASE_WEIGHT_PATH = (
|
25 |
+
'https://github.com/david8862/tf-keras-image-classifier/'
|
26 |
+
'releases/download/v1.0.0/')
|
27 |
+
|
28 |
+
|
29 |
+
def preprocess_input(x):
|
30 |
+
"""
|
31 |
+
"mode" option description in preprocess_input
|
32 |
+
mode: One of "caffe", "tf" or "torch".
|
33 |
+
- caffe: will convert the images from RGB to BGR,
|
34 |
+
then will zero-center each color channel with
|
35 |
+
respect to the ImageNet dataset,
|
36 |
+
without scaling.
|
37 |
+
- tf: will scale pixels between -1 and 1,
|
38 |
+
sample-wise.
|
39 |
+
- torch: will scale pixels between 0 and 1 and then
|
40 |
+
will normalize each channel with respect to the
|
41 |
+
ImageNet dataset.
|
42 |
+
"""
|
43 |
+
#x = _preprocess_input(x, mode='tf', backend=K)
|
44 |
+
x /= 255.
|
45 |
+
mean = [0.485, 0.456, 0.406]
|
46 |
+
std = [0.229, 0.224, 0.225]
|
47 |
+
|
48 |
+
x[..., 0] -= mean[0]
|
49 |
+
x[..., 1] -= mean[1]
|
50 |
+
x[..., 2] -= mean[2]
|
51 |
+
if std is not None:
|
52 |
+
x[..., 0] /= std[0]
|
53 |
+
x[..., 1] /= std[1]
|
54 |
+
x[..., 2] /= std[2]
|
55 |
+
|
56 |
+
return x
|
57 |
+
|
58 |
+
|
59 |
+
def dense_graph(x, growth_rate, bottleneck_width, name=''):
|
60 |
+
growth_rate = int(growth_rate / 2)
|
61 |
+
inter_channel = int(growth_rate * bottleneck_width / 4) * 4
|
62 |
+
|
63 |
+
num_input_features = K.int_shape(x)[-1]
|
64 |
+
|
65 |
+
if inter_channel > num_input_features / 2:
|
66 |
+
inter_channel = int(num_input_features / 8) * 4
|
67 |
+
print('adjust inter_channel to ', inter_channel)
|
68 |
+
|
69 |
+
branch1 = basic_conv2d_graph(
|
70 |
+
x, inter_channel, kernel_size=1, strides=1, padding='valid', name=name + '_branch1a')
|
71 |
+
branch1 = basic_conv2d_graph(
|
72 |
+
branch1, growth_rate, kernel_size=3, strides=1, padding='same', name=name + '_branch1b')
|
73 |
+
|
74 |
+
branch2 = basic_conv2d_graph(
|
75 |
+
x, inter_channel, kernel_size=1, strides=1, padding='valid', name=name + '_branch2a')
|
76 |
+
branch2 = basic_conv2d_graph(
|
77 |
+
branch2, growth_rate, kernel_size=3, strides=1, padding='same', name=name + '_branch2b')
|
78 |
+
branch2 = basic_conv2d_graph(
|
79 |
+
branch2, growth_rate, kernel_size=3, strides=1, padding='same', name=name + '_branch2c')
|
80 |
+
|
81 |
+
out = Concatenate(axis=-1)([x, branch1, branch2])
|
82 |
+
|
83 |
+
return out
|
84 |
+
|
85 |
+
|
86 |
+
def dense_block_graph(x, num_layers, bn_size, growth_rate, name=''):
|
87 |
+
for i in range(num_layers):
|
88 |
+
x = dense_graph(x, growth_rate, bn_size, name=name + '_denselayer{}'.format(i + 1))
|
89 |
+
|
90 |
+
return x
|
91 |
+
|
92 |
+
|
93 |
+
def stem_block_graph(x, num_init_features, name=''):
|
94 |
+
num_stem_features = int(num_init_features / 2)
|
95 |
+
|
96 |
+
out = basic_conv2d_graph(x, num_init_features, kernel_size=3, strides=2, padding='same', name=name + '_stem1')
|
97 |
+
|
98 |
+
branch2 = basic_conv2d_graph(
|
99 |
+
out, num_stem_features, kernel_size=1, strides=1, padding='valid', name=name + '_stem2a')
|
100 |
+
branch2 = basic_conv2d_graph(
|
101 |
+
branch2, num_init_features, kernel_size=3, strides=2, padding='same', name=name + '_stem2b')
|
102 |
+
|
103 |
+
branch1 = MaxPooling2D(pool_size=2, strides=2)(out)
|
104 |
+
|
105 |
+
out = Concatenate(axis=-1)([branch1, branch2])
|
106 |
+
|
107 |
+
out = basic_conv2d_graph(out, num_init_features, kernel_size=1, strides=1, padding='valid', name=name + '_stem3')
|
108 |
+
|
109 |
+
return out
|
110 |
+
|
111 |
+
|
112 |
+
def basic_conv2d_graph(x, out_channels, kernel_size, strides, padding, activation=True, name=''):
|
113 |
+
x = DeeplabConv2D(
|
114 |
+
out_channels, kernel_size=kernel_size, strides=strides,
|
115 |
+
padding=padding, use_bias=False, name=name + '_conv')(x)
|
116 |
+
x = CustomBatchNormalization(name=name + '_norm')(x)
|
117 |
+
if activation:
|
118 |
+
x = ReLU()(x)
|
119 |
+
|
120 |
+
return x
|
121 |
+
|
122 |
+
|
123 |
+
def PeleeNet(input_shape=None,
|
124 |
+
OS=8,
|
125 |
+
growth_rate=32,
|
126 |
+
block_config=[3, 4, 8, 6],
|
127 |
+
num_init_features=32,
|
128 |
+
bottleneck_width=[1, 2, 4, 4],
|
129 |
+
include_top=True,
|
130 |
+
weights='imagenet',
|
131 |
+
input_tensor=None,
|
132 |
+
pooling=None,
|
133 |
+
dropout_rate=0.05,
|
134 |
+
classes=1000,
|
135 |
+
**kwargs):
|
136 |
+
"""Instantiates the PeleeNet architecture.
|
137 |
+
|
138 |
+
# Arguments
|
139 |
+
input_shape: optional shape tuple, to be specified if you would
|
140 |
+
like to use a model with an input img resolution that is not
|
141 |
+
(224, 224, 3).
|
142 |
+
It should have exactly 3 inputs channels (224, 224, 3).
|
143 |
+
You can also omit this option if you would like
|
144 |
+
to infer input_shape from an input_tensor.
|
145 |
+
If you choose to include both input_tensor and input_shape then
|
146 |
+
input_shape will be used if they match, if the shapes
|
147 |
+
do not match then we will throw an error.
|
148 |
+
E.g. `(160, 160, 3)` would be one valid value.
|
149 |
+
include_top: whether to include the fully-connected
|
150 |
+
layer at the top of the network.
|
151 |
+
weights: one of `None` (random initialization),
|
152 |
+
'imagenet' (pre-training on ImageNet),
|
153 |
+
or the path to the weights file to be loaded.
|
154 |
+
input_tensor: optional Keras tensor (i.e. output of
|
155 |
+
`layers.Input()`)
|
156 |
+
to use as image input for the model.
|
157 |
+
pooling: Optional pooling mode for feature extraction
|
158 |
+
when `include_top` is `False`.
|
159 |
+
- `None` means that the output of the model
|
160 |
+
will be the 4D tensor output of the
|
161 |
+
last convolutional block.
|
162 |
+
- `avg` means that global average pooling
|
163 |
+
will be applied to the output of the
|
164 |
+
last convolutional block, and thus
|
165 |
+
the output of the model will be a
|
166 |
+
2D tensor.
|
167 |
+
- `max` means that global max pooling will
|
168 |
+
be applied.
|
169 |
+
classes: optional number of classes to classify images
|
170 |
+
into, only to be specified if `include_top` is True, and
|
171 |
+
if no `weights` argument is specified.
|
172 |
+
|
173 |
+
# Returns
|
174 |
+
A Keras model instance.
|
175 |
+
|
176 |
+
# Raises
|
177 |
+
ValueError: in case of invalid argument for `weights`,
|
178 |
+
or invalid input shape or invalid alpha, rows when
|
179 |
+
weights='imagenet'
|
180 |
+
"""
|
181 |
+
|
182 |
+
if not (weights in {'imagenet', None} or os.path.exists(weights)):
|
183 |
+
raise ValueError('The `weights` argument should be either '
|
184 |
+
'`None` (random initialization), `imagenet` '
|
185 |
+
'(pre-training on ImageNet), '
|
186 |
+
'or the path to the weights file to be loaded.')
|
187 |
+
|
188 |
+
if weights == 'imagenet' and include_top and classes != 1000:
|
189 |
+
raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
|
190 |
+
'as true, `classes` should be 1000')
|
191 |
+
|
192 |
+
input_shape = _obtain_input_shape(input_shape,
|
193 |
+
default_size=224,
|
194 |
+
min_size=32,
|
195 |
+
data_format=K.image_data_format(),
|
196 |
+
require_flatten=include_top,
|
197 |
+
weights=weights)
|
198 |
+
|
199 |
+
# If input_shape is None and input_tensor is None using standard shape
|
200 |
+
if input_shape is None and input_tensor is None:
|
201 |
+
input_shape = (None, None, 3)
|
202 |
+
|
203 |
+
if input_tensor is None:
|
204 |
+
img_input = Input(shape=input_shape)
|
205 |
+
else:
|
206 |
+
#if not K.is_keras_tensor(input_tensor):
|
207 |
+
#img_input = Input(tensor=input_tensor, shape=input_shape)
|
208 |
+
#else:
|
209 |
+
#img_input = input_tensor
|
210 |
+
img_input = input_tensor
|
211 |
+
|
212 |
+
if type(growth_rate) is list:
|
213 |
+
growth_rates = growth_rate
|
214 |
+
assert len(growth_rates) == 4, 'The growth rate must be the list and the size must be 4'
|
215 |
+
else:
|
216 |
+
growth_rates = [growth_rate] * 4
|
217 |
+
|
218 |
+
if type(bottleneck_width) is list:
|
219 |
+
bottleneck_widths = bottleneck_width
|
220 |
+
assert len(bottleneck_widths) == 4, 'The bottleneck width must be the list and the size must be 4'
|
221 |
+
else:
|
222 |
+
bottleneck_widths = [bottleneck_width] * 4
|
223 |
+
|
224 |
+
features = stem_block_graph(img_input, num_init_features, name='bbn_features_stemblock')
|
225 |
+
num_features = num_init_features
|
226 |
+
for i, num_layers in enumerate(block_config):
|
227 |
+
features = dense_block_graph(
|
228 |
+
features, num_layers=num_layers, bn_size=bottleneck_widths[i],
|
229 |
+
growth_rate=growth_rates[i], name='bbn_features_denseblock{}'.format(i + 1))
|
230 |
+
|
231 |
+
num_features = num_features + num_layers * growth_rates[i]
|
232 |
+
features = basic_conv2d_graph(
|
233 |
+
features, num_features, kernel_size=1, strides=1,
|
234 |
+
padding='valid', name='bbn_features_transition{}'.format(i + 1))
|
235 |
+
|
236 |
+
#if i != len(block_config) - 1:
|
237 |
+
#features = AveragePooling2D(pool_size=2, strides=2)(features)
|
238 |
+
|
239 |
+
# skip level feature, with output stride = 4
|
240 |
+
if i == 0:
|
241 |
+
skip = features
|
242 |
+
|
243 |
+
# apply stride pooling according to OS
|
244 |
+
if OS == 8 and i < 1:
|
245 |
+
features = AveragePooling2D(pool_size=2, strides=2)(features)
|
246 |
+
elif OS == 16 and i < 2:
|
247 |
+
features = AveragePooling2D(pool_size=2, strides=2)(features)
|
248 |
+
elif OS == 32 and i != len(block_config) - 1:
|
249 |
+
features = AveragePooling2D(pool_size=2, strides=2)(features)
|
250 |
+
|
251 |
+
features_shape = K.int_shape(features)
|
252 |
+
|
253 |
+
if include_top:
|
254 |
+
x = GlobalAveragePooling2D()(features)
|
255 |
+
if dropout_rate > 0:
|
256 |
+
x = Dropout(dropout_rate)(x)
|
257 |
+
x = Dense(classes, activation='softmax',
|
258 |
+
use_bias=True, name='Logits')(x)
|
259 |
+
else:
|
260 |
+
if pooling == 'avg':
|
261 |
+
x = GlobalAveragePooling2D()(features)
|
262 |
+
elif pooling == 'max':
|
263 |
+
x = GlobalMaxPooling2D()(features)
|
264 |
+
else:
|
265 |
+
x = features
|
266 |
+
|
267 |
+
# Ensure that the model takes into account
|
268 |
+
# any potential predecessors of `input_tensor`.
|
269 |
+
if input_tensor is not None:
|
270 |
+
inputs = get_source_inputs(input_tensor)
|
271 |
+
else:
|
272 |
+
inputs = img_input
|
273 |
+
|
274 |
+
# Create model.
|
275 |
+
model = Model(inputs, x, name='peleenet')
|
276 |
+
|
277 |
+
# Load weights.
|
278 |
+
if weights == 'imagenet':
|
279 |
+
if include_top:
|
280 |
+
file_name = 'peleenet_weights_tf_dim_ordering_tf_kernels_224.h5'
|
281 |
+
weight_path = BASE_WEIGHT_PATH + file_name
|
282 |
+
else:
|
283 |
+
file_name = 'peleenet_weights_tf_dim_ordering_tf_kernels_224_no_top.h5'
|
284 |
+
weight_path = BASE_WEIGHT_PATH + file_name
|
285 |
+
|
286 |
+
weights_path = get_file(file_name, weight_path, cache_subdir='models')
|
287 |
+
model.load_weights(weights_path)
|
288 |
+
elif weights is not None:
|
289 |
+
model.load_weights(weights)
|
290 |
+
|
291 |
+
backbone_len = len(model.layers)
|
292 |
+
# need to return feature map and skip connection,
|
293 |
+
# not the whole "no top" model
|
294 |
+
return x, skip, backbone_len
|
295 |
+
#return model
|
296 |
+
|
297 |
+
|
298 |
+
def Deeplabv3pPeleeNet(input_shape=(512, 512, 3),
|
299 |
+
weights='imagenet',
|
300 |
+
input_tensor=None,
|
301 |
+
num_classes=21,
|
302 |
+
OS=8):
|
303 |
+
""" Instantiates the Deeplabv3+ PeleeNet architecture
|
304 |
+
# Arguments
|
305 |
+
input_shape: shape of input image. format HxWxC
|
306 |
+
PASCAL VOC model was trained on (512,512,3) images
|
307 |
+
weights: pretrained weights type
|
308 |
+
- imagenet: pre-trained on Imagenet
|
309 |
+
- None : random initialization
|
310 |
+
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
311 |
+
to use as image input for the model.
|
312 |
+
num_classes: number of desired classes.
|
313 |
+
OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
|
314 |
+
|
315 |
+
# Returns
|
316 |
+
A Keras model instance.
|
317 |
+
"""
|
318 |
+
|
319 |
+
if not (weights in {'imagenet', None}):
|
320 |
+
raise ValueError('The `weights` argument should be either '
|
321 |
+
'`imagenet` (pre-trained on Imagenet) or '
|
322 |
+
'`None` (random initialization)')
|
323 |
+
if input_tensor is None:
|
324 |
+
img_input = Input(shape=input_shape, name='image_input')
|
325 |
+
else:
|
326 |
+
img_input = input_tensor
|
327 |
+
|
328 |
+
# normalize input image
|
329 |
+
img_norm = Lambda(normalize, name='input_normalize')(img_input)
|
330 |
+
|
331 |
+
# backbone body for feature extract
|
332 |
+
x, skip_feature, backbone_len = PeleeNet(include_top=False, pooling=None, input_tensor=img_norm, weights=weights, OS=OS)
|
333 |
+
|
334 |
+
# ASPP block
|
335 |
+
x = ASPP_block(x, OS)
|
336 |
+
|
337 |
+
# Deeplabv3+ decoder for feature projection
|
338 |
+
x = Decoder_block(x, skip_feature)
|
339 |
+
|
340 |
+
# Final prediction conv block
|
341 |
+
x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
|
342 |
+
x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
|
343 |
+
x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
|
344 |
+
x = Softmax(name='Predictions/Softmax')(x)
|
345 |
+
|
346 |
+
|
347 |
+
# Ensure that the model takes into account
|
348 |
+
# any potential predecessors of `input_tensor`.
|
349 |
+
#if input_tensor is not None:
|
350 |
+
#inputs = get_source_inputs(input_tensor)
|
351 |
+
#else:
|
352 |
+
#inputs = img_input
|
353 |
+
|
354 |
+
model = Model(img_input, x, name='deeplabv3p_peleenet')
|
355 |
+
|
356 |
+
return model, backbone_len
|
357 |
+
|
358 |
+
|
359 |
+
def Deeplabv3pLitePeleeNet(input_shape=(512, 512, 3),
|
360 |
+
weights='imagenet',
|
361 |
+
input_tensor=None,
|
362 |
+
num_classes=21,
|
363 |
+
OS=8):
|
364 |
+
""" Instantiates the Deeplabv3+ MobileNetV2Lite architecture
|
365 |
+
# Arguments
|
366 |
+
input_shape: shape of input image. format HxWxC
|
367 |
+
PASCAL VOC model was trained on (512,512,3) images
|
368 |
+
weights: pretrained weights type
|
369 |
+
- imagenet: pre-trained on Imagenet
|
370 |
+
- None : random initialization
|
371 |
+
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
372 |
+
to use as image input for the model.
|
373 |
+
num_classes: number of desired classes.
|
374 |
+
OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
|
375 |
+
|
376 |
+
# Returns
|
377 |
+
A Keras model instance.
|
378 |
+
# Raises
|
379 |
+
RuntimeError: If attempting to run this model with a
|
380 |
+
backend that does not support separable convolutions.
|
381 |
+
ValueError: in case of invalid argument for `weights` or `backbone`
|
382 |
+
"""
|
383 |
+
|
384 |
+
if not (weights in {'imagenet', None}):
|
385 |
+
raise ValueError('The `weights` argument should be either '
|
386 |
+
'`imagenet` (pre-trained on Imagenet) or '
|
387 |
+
'`None` (random initialization)')
|
388 |
+
|
389 |
+
if input_tensor is None:
|
390 |
+
img_input = Input(shape=input_shape, name='image_input')
|
391 |
+
else:
|
392 |
+
img_input = input_tensor
|
393 |
+
|
394 |
+
# normalize input image
|
395 |
+
img_norm = Lambda(normalize, name='input_normalize')(img_input)
|
396 |
+
|
397 |
+
# backbone body for feature extract
|
398 |
+
x, _, backbone_len = PeleeNet(include_top=False, pooling=None, input_tensor=img_norm, weights=weights, OS=OS)
|
399 |
+
|
400 |
+
# use ASPP Lite block & no decode block
|
401 |
+
x = ASPP_Lite_block(x)
|
402 |
+
|
403 |
+
# Final prediction conv block
|
404 |
+
x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
|
405 |
+
x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
|
406 |
+
x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
|
407 |
+
x = Softmax(name='Predictions/Softmax')(x)
|
408 |
+
|
409 |
+
|
410 |
+
# Ensure that the model takes into account
|
411 |
+
# any potential predecessors of `input_tensor`.
|
412 |
+
#if input_tensor is not None:
|
413 |
+
#inputs = get_source_inputs(input_tensor)
|
414 |
+
#else:
|
415 |
+
#inputs = img_input
|
416 |
+
|
417 |
+
model = Model(img_input, x, name='deeplabv3p_peleenet_lite')
|
418 |
+
|
419 |
+
return model, backbone_len
|
420 |
+
|
421 |
+
|
422 |
+
if __name__ == '__main__':
|
423 |
+
input_tensor = Input(shape=(512, 512, 3), name='image_input')
|
424 |
+
model, backbone_len = Deeplabv3pLitePeleeNet(input_tensor=input_tensor,
|
425 |
+
weights=None,
|
426 |
+
num_classes=21,
|
427 |
+
OS=8)
|
428 |
+
model.summary()
|
models/deeplab/deeplabv3p/models/deeplabv3p_resnet50.py
ADDED
@@ -0,0 +1,408 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
""" Deeplabv3+ ResNet50 model for Keras.
|
4 |
+
|
5 |
+
# Reference:
|
6 |
+
- [Encoder-Decoder with Atrous Separable Convolution
|
7 |
+
for Semantic Image Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
|
8 |
+
- [Deep Residual Learning for Image Recognition](
|
9 |
+
https://arxiv.org/abs/1512.03385) (CVPR 2016 Best Paper Award)
|
10 |
+
"""
|
11 |
+
import os
|
12 |
+
import warnings
|
13 |
+
from keras_applications.imagenet_utils import _obtain_input_shape
|
14 |
+
from tensorflow.keras.models import Model
|
15 |
+
from tensorflow.keras.activations import relu
|
16 |
+
from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, ZeroPadding2D, Lambda, AveragePooling2D, MaxPooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D, Input, Dense, Concatenate, add, Reshape, BatchNormalization, Dropout, ReLU, Softmax
|
17 |
+
from tensorflow.keras.utils import get_source_inputs, get_file
|
18 |
+
from tensorflow.keras import backend as K
|
19 |
+
|
20 |
+
from deeplabv3p.models.layers import DeeplabConv2D, DeeplabDepthwiseConv2D, CustomBatchNormalization, ASPP_block, ASPP_Lite_block, Decoder_block, normalize, img_resize
|
21 |
+
|
22 |
+
|
23 |
+
WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/'
|
24 |
+
'releases/download/v0.2/'
|
25 |
+
'resnet50_weights_tf_dim_ordering_tf_kernels.h5')
|
26 |
+
WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/'
|
27 |
+
'releases/download/v0.2/'
|
28 |
+
'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5')
|
29 |
+
|
30 |
+
|
31 |
+
def identity_block(input_tensor, kernel_size, filters, stage, block, rate=1):
|
32 |
+
"""The identity block is the block that has no conv layer at shortcut.
|
33 |
+
|
34 |
+
# Arguments
|
35 |
+
input_tensor: input tensor
|
36 |
+
kernel_size: default 3, the kernel size of
|
37 |
+
middle conv layer at main path
|
38 |
+
filters: list of integers, the filters of 3 conv layer at main path
|
39 |
+
stage: integer, current stage label, used for generating layer names
|
40 |
+
block: 'a','b'..., current block label, used for generating layer names
|
41 |
+
|
42 |
+
# Returns
|
43 |
+
Output tensor for the block.
|
44 |
+
"""
|
45 |
+
filters1, filters2, filters3 = filters
|
46 |
+
if K.image_data_format() == 'channels_last':
|
47 |
+
bn_axis = 3
|
48 |
+
else:
|
49 |
+
bn_axis = 1
|
50 |
+
conv_name_base = 'res' + str(stage) + block + '_branch'
|
51 |
+
bn_name_base = 'bn' + str(stage) + block + '_branch'
|
52 |
+
|
53 |
+
x = DeeplabConv2D(filters1, (1, 1),
|
54 |
+
kernel_initializer='he_normal',
|
55 |
+
dilation_rate=(rate, rate),
|
56 |
+
name=conv_name_base + '2a')(input_tensor)
|
57 |
+
x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
|
58 |
+
x = ReLU()(x)
|
59 |
+
|
60 |
+
x = DeeplabConv2D(filters2, kernel_size,
|
61 |
+
padding='same',
|
62 |
+
kernel_initializer='he_normal',
|
63 |
+
dilation_rate=(rate, rate),
|
64 |
+
name=conv_name_base + '2b')(x)
|
65 |
+
x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
|
66 |
+
x = ReLU()(x)
|
67 |
+
|
68 |
+
x = DeeplabConv2D(filters3, (1, 1),
|
69 |
+
kernel_initializer='he_normal',
|
70 |
+
dilation_rate=(rate, rate),
|
71 |
+
name=conv_name_base + '2c')(x)
|
72 |
+
x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
|
73 |
+
|
74 |
+
x = add([x, input_tensor])
|
75 |
+
x = ReLU()(x)
|
76 |
+
return x
|
77 |
+
|
78 |
+
|
79 |
+
def conv_block(input_tensor,
|
80 |
+
kernel_size,
|
81 |
+
filters,
|
82 |
+
stage,
|
83 |
+
block,
|
84 |
+
strides=(2, 2),
|
85 |
+
rate=1):
|
86 |
+
"""A block that has a conv layer at shortcut.
|
87 |
+
|
88 |
+
# Arguments
|
89 |
+
input_tensor: input tensor
|
90 |
+
kernel_size: default 3, the kernel size of
|
91 |
+
middle conv layer at main path
|
92 |
+
filters: list of integers, the filters of 3 conv layer at main path
|
93 |
+
stage: integer, current stage label, used for generating layer names
|
94 |
+
block: 'a','b'..., current block label, used for generating layer names
|
95 |
+
strides: Strides for the first conv layer in the block.
|
96 |
+
|
97 |
+
# Returns
|
98 |
+
Output tensor for the block.
|
99 |
+
|
100 |
+
Note that from stage 3,
|
101 |
+
the first conv layer at main path is with strides=(2, 2)
|
102 |
+
And the shortcut should have strides=(2, 2) as well
|
103 |
+
"""
|
104 |
+
filters1, filters2, filters3 = filters
|
105 |
+
if K.image_data_format() == 'channels_last':
|
106 |
+
bn_axis = 3
|
107 |
+
else:
|
108 |
+
bn_axis = 1
|
109 |
+
conv_name_base = 'res' + str(stage) + block + '_branch'
|
110 |
+
bn_name_base = 'bn' + str(stage) + block + '_branch'
|
111 |
+
|
112 |
+
x = DeeplabConv2D(filters1, (1, 1), strides=strides,
|
113 |
+
kernel_initializer='he_normal',
|
114 |
+
dilation_rate=(rate, rate),
|
115 |
+
name=conv_name_base + '2a')(input_tensor)
|
116 |
+
x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
|
117 |
+
x = ReLU()(x)
|
118 |
+
|
119 |
+
x = DeeplabConv2D(filters2, kernel_size, padding='same',
|
120 |
+
kernel_initializer='he_normal',
|
121 |
+
dilation_rate=(rate, rate),
|
122 |
+
name=conv_name_base + '2b')(x)
|
123 |
+
x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
|
124 |
+
x = ReLU()(x)
|
125 |
+
|
126 |
+
x = DeeplabConv2D(filters3, (1, 1),
|
127 |
+
kernel_initializer='he_normal',
|
128 |
+
dilation_rate=(rate, rate),
|
129 |
+
name=conv_name_base + '2c')(x)
|
130 |
+
x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
|
131 |
+
|
132 |
+
shortcut = DeeplabConv2D(filters3, (1, 1), strides=strides,
|
133 |
+
kernel_initializer='he_normal',
|
134 |
+
dilation_rate=(rate, rate),
|
135 |
+
name=conv_name_base + '1')(input_tensor)
|
136 |
+
shortcut = CustomBatchNormalization(
|
137 |
+
axis=bn_axis, name=bn_name_base + '1')(shortcut)
|
138 |
+
|
139 |
+
x = add([x, shortcut])
|
140 |
+
x = ReLU()(x)
|
141 |
+
return x
|
142 |
+
|
143 |
+
|
144 |
+
def ResNet50(include_top=True,
|
145 |
+
OS=8,
|
146 |
+
weights='imagenet',
|
147 |
+
input_tensor=None,
|
148 |
+
input_shape=None,
|
149 |
+
pooling=None,
|
150 |
+
classes=1000,
|
151 |
+
**kwargs):
|
152 |
+
"""Instantiates the ResNet50 architecture.
|
153 |
+
|
154 |
+
Optionally loads weights pre-trained on ImageNet.
|
155 |
+
Note that the data format convention used by the model is
|
156 |
+
the one specified in your Keras config at `~/.keras/keras.json`.
|
157 |
+
|
158 |
+
# Arguments
|
159 |
+
include_top: whether to include the fully-connected
|
160 |
+
layer at the top of the network.
|
161 |
+
weights: one of `None` (random initialization),
|
162 |
+
'imagenet' (pre-training on ImageNet),
|
163 |
+
or the path to the weights file to be loaded.
|
164 |
+
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
165 |
+
to use as image input for the model.
|
166 |
+
input_shape: optional shape tuple, only to be specified
|
167 |
+
if `include_top` is False (otherwise the input shape
|
168 |
+
has to be `(224, 224, 3)` (with `channels_last` data format)
|
169 |
+
or `(3, 224, 224)` (with `channels_first` data format).
|
170 |
+
It should have exactly 3 inputs channels,
|
171 |
+
and width and height should be no smaller than 32.
|
172 |
+
E.g. `(200, 200, 3)` would be one valid value.
|
173 |
+
pooling: Optional pooling mode for feature extraction
|
174 |
+
when `include_top` is `False`.
|
175 |
+
- `None` means that the output of the model will be
|
176 |
+
the 4D tensor output of the
|
177 |
+
last convolutional block.
|
178 |
+
- `avg` means that global average pooling
|
179 |
+
will be applied to the output of the
|
180 |
+
last convolutional block, and thus
|
181 |
+
the output of the model will be a 2D tensor.
|
182 |
+
- `max` means that global max pooling will
|
183 |
+
be applied.
|
184 |
+
classes: optional number of classes to classify images
|
185 |
+
into, only to be specified if `include_top` is True, and
|
186 |
+
if no `weights` argument is specified.
|
187 |
+
|
188 |
+
# Returns
|
189 |
+
A Keras model instance.
|
190 |
+
|
191 |
+
# Raises
|
192 |
+
ValueError: in case of invalid argument for `weights`,
|
193 |
+
or invalid input shape.
|
194 |
+
"""
|
195 |
+
|
196 |
+
"""
|
197 |
+
Modified ResNet50 feature extractor body
|
198 |
+
with specified output stride and skip level feature
|
199 |
+
"""
|
200 |
+
if OS == 8:
|
201 |
+
origin_os16_stride = (1, 1)
|
202 |
+
origin_os16_block_rate = 2
|
203 |
+
origin_os32_stride = (1, 1)
|
204 |
+
origin_os32_block_rate = 4
|
205 |
+
elif OS == 16:
|
206 |
+
origin_os16_stride = (2, 2)
|
207 |
+
origin_os16_block_rate = 1
|
208 |
+
origin_os32_stride = (1, 1)
|
209 |
+
origin_os32_block_rate = 2
|
210 |
+
elif OS == 32:
|
211 |
+
origin_os16_stride = (2, 2)
|
212 |
+
origin_os16_block_rate = 1
|
213 |
+
origin_os32_stride = (2, 2)
|
214 |
+
origin_os32_block_rate = 1
|
215 |
+
else:
|
216 |
+
raise ValueError('invalid output stride', OS)
|
217 |
+
|
218 |
+
if not (weights in {'imagenet', None} or os.path.exists(weights)):
|
219 |
+
raise ValueError('The `weights` argument should be either '
|
220 |
+
'`None` (random initialization), `imagenet` '
|
221 |
+
'(pre-training on ImageNet), '
|
222 |
+
'or the path to the weights file to be loaded.')
|
223 |
+
|
224 |
+
if weights == 'imagenet' and include_top and classes != 1000:
|
225 |
+
raise ValueError('If using `weights` as `"imagenet"` with `include_top`'
|
226 |
+
' as true, `classes` should be 1000')
|
227 |
+
|
228 |
+
# Determine proper input shape
|
229 |
+
input_shape = _obtain_input_shape(input_shape,
|
230 |
+
default_size=224,
|
231 |
+
min_size=32,
|
232 |
+
data_format=K.image_data_format(),
|
233 |
+
require_flatten=include_top,
|
234 |
+
weights=weights)
|
235 |
+
|
236 |
+
if input_tensor is None:
|
237 |
+
img_input = Input(shape=input_shape)
|
238 |
+
else:
|
239 |
+
#if not backend.is_keras_tensor(input_tensor):
|
240 |
+
#img_input = Input(tensor=input_tensor, shape=input_shape)
|
241 |
+
#else:
|
242 |
+
#img_input = input_tensor
|
243 |
+
img_input = input_tensor
|
244 |
+
|
245 |
+
if K.image_data_format() == 'channels_last':
|
246 |
+
bn_axis = 3
|
247 |
+
else:
|
248 |
+
bn_axis = 1
|
249 |
+
|
250 |
+
x = ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
|
251 |
+
x = DeeplabConv2D(64, (7, 7),
|
252 |
+
strides=(2, 2),
|
253 |
+
padding='valid',
|
254 |
+
kernel_initializer='he_normal',
|
255 |
+
name='conv1')(x)
|
256 |
+
x = CustomBatchNormalization(axis=bn_axis, name='bn_conv1')(x)
|
257 |
+
x = ReLU()(x)
|
258 |
+
x = ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
|
259 |
+
x = MaxPooling2D((3, 3), strides=(2, 2))(x)
|
260 |
+
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
|
261 |
+
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
|
262 |
+
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
|
263 |
+
# skip level feature, with output stride = 4
|
264 |
+
skip = x
|
265 |
+
|
266 |
+
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
|
267 |
+
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
|
268 |
+
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
|
269 |
+
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
|
270 |
+
|
271 |
+
# original output stride changes to 16 from here, so we start to control block stride and dilation rate
|
272 |
+
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', strides=origin_os16_stride) # origin: stride=(2, 2)
|
273 |
+
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', rate=origin_os16_block_rate)
|
274 |
+
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', rate=origin_os16_block_rate)
|
275 |
+
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', rate=origin_os16_block_rate)
|
276 |
+
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', rate=origin_os16_block_rate)
|
277 |
+
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', rate=origin_os16_block_rate)
|
278 |
+
|
279 |
+
# original output stride changes to 32 from here
|
280 |
+
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', strides=origin_os32_stride, rate=origin_os16_block_rate) # origin: stride=(2, 2)
|
281 |
+
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', rate=origin_os32_block_rate)
|
282 |
+
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', rate=origin_os32_block_rate)
|
283 |
+
|
284 |
+
if include_top:
|
285 |
+
x = GlobalAveragePooling2D(name='avg_pool')(x)
|
286 |
+
x = Dense(classes, activation='softmax', name='fc1000')(x)
|
287 |
+
else:
|
288 |
+
if pooling == 'avg':
|
289 |
+
x = GlobalAveragePooling2D()(x)
|
290 |
+
elif pooling == 'max':
|
291 |
+
x = GlobalMaxPooling2D()(x)
|
292 |
+
else:
|
293 |
+
warnings.warn('The output shape of `ResNet50(include_top=False)` '
|
294 |
+
'has been changed since Keras 2.2.0.')
|
295 |
+
|
296 |
+
# Ensure that the model takes into account
|
297 |
+
# any potential predecessors of `input_tensor`.
|
298 |
+
if input_tensor is not None:
|
299 |
+
inputs = get_source_inputs(input_tensor)
|
300 |
+
else:
|
301 |
+
inputs = img_input
|
302 |
+
# Create model.
|
303 |
+
model = Model(inputs, x, name='resnet50')
|
304 |
+
|
305 |
+
# Load weights.
|
306 |
+
if weights == 'imagenet':
|
307 |
+
if include_top:
|
308 |
+
weights_path = get_file(
|
309 |
+
'resnet50_weights_tf_dim_ordering_tf_kernels.h5',
|
310 |
+
WEIGHTS_PATH,
|
311 |
+
cache_subdir='models',
|
312 |
+
md5_hash='a7b3fe01876f51b976af0dea6bc144eb')
|
313 |
+
else:
|
314 |
+
weights_path = get_file(
|
315 |
+
'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
|
316 |
+
WEIGHTS_PATH_NO_TOP,
|
317 |
+
cache_subdir='models',
|
318 |
+
md5_hash='a268eb855778b3df3c7506639542a6af')
|
319 |
+
model.load_weights(weights_path)
|
320 |
+
elif weights is not None:
|
321 |
+
model.load_weights(weights)
|
322 |
+
|
323 |
+
backbone_len = len(model.layers)
|
324 |
+
# need to return feature map and skip connection,
|
325 |
+
# not the whole "no top" model
|
326 |
+
return x, skip, backbone_len
|
327 |
+
#return model
|
328 |
+
|
329 |
+
|
330 |
+
|
331 |
+
def Deeplabv3pResNet50(input_shape=(512, 512, 3),
|
332 |
+
weights='imagenet',
|
333 |
+
input_tensor=None,
|
334 |
+
num_classes=21,
|
335 |
+
OS=8):
|
336 |
+
""" Instantiates the Deeplabv3+ MobileNetV3Large architecture
|
337 |
+
# Arguments
|
338 |
+
input_shape: shape of input image. format HxWxC
|
339 |
+
PASCAL VOC model was trained on (512,512,3) images
|
340 |
+
weights: pretrained weights type
|
341 |
+
- imagenet: pre-trained on Imagenet
|
342 |
+
- None : random initialization
|
343 |
+
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
344 |
+
to use as image input for the model.
|
345 |
+
num_classes: number of desired classes.
|
346 |
+
OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}.
|
347 |
+
|
348 |
+
# Returns
|
349 |
+
A Keras model instance.
|
350 |
+
"""
|
351 |
+
if not (weights in {'imagenet', None}):
|
352 |
+
raise ValueError('The `weights` argument should be either '
|
353 |
+
'`imagenet` (pre-trained on Imagenet) or '
|
354 |
+
'`None` (random initialization)')
|
355 |
+
|
356 |
+
if input_tensor is None:
|
357 |
+
img_input = Input(shape=input_shape, name='image_input')
|
358 |
+
else:
|
359 |
+
img_input = input_tensor
|
360 |
+
|
361 |
+
# normalize input image
|
362 |
+
img_norm = Lambda(normalize, name='input_normalize')(img_input)
|
363 |
+
|
364 |
+
# backbone body for feature extract
|
365 |
+
x, skip_feature, backbone_len = ResNet50(include_top=False, input_tensor=img_norm, weights=weights, OS=OS)
|
366 |
+
|
367 |
+
# ASPP block
|
368 |
+
x = ASPP_block(x, OS)
|
369 |
+
|
370 |
+
# Deeplabv3+ decoder for feature projection
|
371 |
+
x = Decoder_block(x, skip_feature)
|
372 |
+
|
373 |
+
# Final prediction conv block
|
374 |
+
x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
|
375 |
+
x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
|
376 |
+
x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
|
377 |
+
x = Softmax(name='Predictions/Softmax')(x)
|
378 |
+
|
379 |
+
# Ensure that the model takes into account
|
380 |
+
# any potential predecessors of `input_tensor`.
|
381 |
+
#if input_tensor is not None:
|
382 |
+
#inputs = get_source_inputs(input_tensor)
|
383 |
+
#else:
|
384 |
+
#inputs = img_input
|
385 |
+
model = Model(img_input, x, name='deeplabv3p_resnet50')
|
386 |
+
|
387 |
+
return model, backbone_len
|
388 |
+
|
389 |
+
|
390 |
+
|
391 |
+
|
392 |
+
if __name__ == '__main__':
|
393 |
+
input_tensor = Input(shape=(224, 224, 3), name='image_input')
|
394 |
+
#model = ResNet50(include_top=False, input_shape=(512, 512, 3), weights='imagenet')
|
395 |
+
model = ResNet50(include_top=True, input_tensor=input_tensor, weights='imagenet')
|
396 |
+
model.summary()
|
397 |
+
|
398 |
+
import numpy as np
|
399 |
+
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
|
400 |
+
from keras_preprocessing import image
|
401 |
+
|
402 |
+
img = image.load_img('../../examples/dog.jpg', target_size=(224, 224))
|
403 |
+
x = image.img_to_array(img)
|
404 |
+
x = np.expand_dims(x, axis=0)
|
405 |
+
x = preprocess_input(x)
|
406 |
+
|
407 |
+
preds = model.predict(x)
|
408 |
+
print('Predicted:', decode_predictions(preds))
|
models/deeplab/deeplabv3p/models/deeplabv3p_xception.py
ADDED
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
""" Deeplabv3+ Xception model for Keras.
|
4 |
+
On Pascal VOC, original model gets to 84.56% mIOU
|
5 |
+
|
6 |
+
Reference Paper:
|
7 |
+
- [Encoder-Decoder with Atrous Separable Convolution
|
8 |
+
for Semantic Image Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
|
9 |
+
- [Xception: Deep Learning with Depthwise Separable Convolutions]
|
10 |
+
(https://arxiv.org/abs/1610.02357)
|
11 |
+
"""
|
12 |
+
import numpy as np
|
13 |
+
import tensorflow as tf
|
14 |
+
|
15 |
+
from tensorflow.keras.models import Model
|
16 |
+
from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, ZeroPadding2D, Lambda, AveragePooling2D, Input, Concatenate, Add, Reshape, BatchNormalization, Dropout, ReLU, Softmax, add
|
17 |
+
from tensorflow.keras.utils import get_source_inputs, get_file
|
18 |
+
#from tensorflow.keras import backend as K
|
19 |
+
|
20 |
+
from deeplabv3p.models.layers import DeeplabConv2D, DeeplabDepthwiseConv2D, CustomBatchNormalization, SepConv_BN, ASPP_block, Decoder_block, normalize, img_resize
|
21 |
+
|
22 |
+
WEIGHTS_PATH_X = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.1/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5"
|
23 |
+
|
24 |
+
|
25 |
+
def _conv2d_same(x, filters, prefix, stride=1, kernel_size=3, rate=1):
|
26 |
+
"""Implements right 'same' padding for even kernel sizes
|
27 |
+
Without this there is a 1 pixel drift when stride = 2
|
28 |
+
Args:
|
29 |
+
x: input tensor
|
30 |
+
filters: num of filters in pointwise convolution
|
31 |
+
prefix: prefix before name
|
32 |
+
stride: stride at depthwise conv
|
33 |
+
kernel_size: kernel size for depthwise convolution
|
34 |
+
rate: atrous rate for depthwise convolution
|
35 |
+
"""
|
36 |
+
if stride == 1:
|
37 |
+
return DeeplabConv2D(filters,
|
38 |
+
(kernel_size, kernel_size),
|
39 |
+
strides=(stride, stride),
|
40 |
+
padding='same', use_bias=False,
|
41 |
+
dilation_rate=(rate, rate),
|
42 |
+
name=prefix)(x)
|
43 |
+
else:
|
44 |
+
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
|
45 |
+
pad_total = kernel_size_effective - 1
|
46 |
+
pad_beg = pad_total // 2
|
47 |
+
pad_end = pad_total - pad_beg
|
48 |
+
x = ZeroPadding2D((pad_beg, pad_end))(x)
|
49 |
+
return DeeplabConv2D(filters,
|
50 |
+
(kernel_size, kernel_size),
|
51 |
+
strides=(stride, stride),
|
52 |
+
padding='valid', use_bias=False,
|
53 |
+
dilation_rate=(rate, rate),
|
54 |
+
name=prefix)(x)
|
55 |
+
|
56 |
+
|
57 |
+
def _xception_block(inputs, depth_list, prefix, skip_connection_type, stride,
|
58 |
+
rate=1, depth_activation=False, return_skip=False):
|
59 |
+
""" Basic building block of modified Xception network
|
60 |
+
Args:
|
61 |
+
inputs: input tensor
|
62 |
+
depth_list: number of filters in each SepConv layer. len(depth_list) == 3
|
63 |
+
prefix: prefix before name
|
64 |
+
skip_connection_type: one of {'conv','sum','none'}
|
65 |
+
stride: stride at last depthwise conv
|
66 |
+
rate: atrous rate for depthwise convolution
|
67 |
+
depth_activation: flag to use activation between depthwise & pointwise convs
|
68 |
+
return_skip: flag to return additional tensor after 2 SepConvs for decoder
|
69 |
+
"""
|
70 |
+
residual = inputs
|
71 |
+
for i in range(3):
|
72 |
+
residual = SepConv_BN(residual,
|
73 |
+
depth_list[i],
|
74 |
+
prefix + '_separable_conv{}'.format(i + 1),
|
75 |
+
stride=stride if i == 2 else 1,
|
76 |
+
rate=rate,
|
77 |
+
depth_activation=depth_activation)
|
78 |
+
if i == 1:
|
79 |
+
skip = residual
|
80 |
+
if skip_connection_type == 'conv':
|
81 |
+
shortcut = _conv2d_same(inputs, depth_list[-1], prefix + '_shortcut',
|
82 |
+
kernel_size=1,
|
83 |
+
stride=stride)
|
84 |
+
shortcut = CustomBatchNormalization(name=prefix + '_shortcut_BN')(shortcut)
|
85 |
+
outputs = add([residual, shortcut])
|
86 |
+
elif skip_connection_type == 'sum':
|
87 |
+
outputs = add([residual, inputs])
|
88 |
+
elif skip_connection_type == 'none':
|
89 |
+
outputs = residual
|
90 |
+
if return_skip:
|
91 |
+
return outputs, skip
|
92 |
+
else:
|
93 |
+
return outputs
|
94 |
+
|
95 |
+
|
96 |
+
def Xception_body(input_tensor, OS):
|
97 |
+
"""
|
98 |
+
Modified Alighed Xception feature extractor body
|
99 |
+
with specified output stride and skip level feature
|
100 |
+
"""
|
101 |
+
if OS == 8:
|
102 |
+
origin_os16_stride = 1
|
103 |
+
origin_os16_block_rate = 2
|
104 |
+
origin_os32_stride = 1
|
105 |
+
origin_os32_block_rate = 4
|
106 |
+
elif OS == 16:
|
107 |
+
origin_os16_stride = 2
|
108 |
+
origin_os16_block_rate = 1
|
109 |
+
origin_os32_stride = 1
|
110 |
+
origin_os32_block_rate = 2
|
111 |
+
elif OS == 32:
|
112 |
+
origin_os16_stride = 2
|
113 |
+
origin_os16_block_rate = 1
|
114 |
+
origin_os32_stride = 2
|
115 |
+
origin_os32_block_rate = 1
|
116 |
+
else:
|
117 |
+
raise ValueError('invalid output stride', OS)
|
118 |
+
|
119 |
+
x = DeeplabConv2D(32, (3, 3), strides=(2, 2),
|
120 |
+
name='entry_flow_conv1_1', use_bias=False, padding='same')(input_tensor)
|
121 |
+
|
122 |
+
x = CustomBatchNormalization(name='entry_flow_conv1_1_BN')(x)
|
123 |
+
x = ReLU()(x)
|
124 |
+
|
125 |
+
x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1)
|
126 |
+
x = CustomBatchNormalization(name='entry_flow_conv1_2_BN')(x)
|
127 |
+
x = ReLU()(x)
|
128 |
+
|
129 |
+
x = _xception_block(x, [128, 128, 128], 'entry_flow_block1',
|
130 |
+
skip_connection_type='conv', stride=2,
|
131 |
+
depth_activation=False)
|
132 |
+
# skip level feature, with output stride = 4
|
133 |
+
x, skip = _xception_block(x, [256, 256, 256], 'entry_flow_block2',
|
134 |
+
skip_connection_type='conv', stride=2,
|
135 |
+
depth_activation=False, return_skip=True)
|
136 |
+
|
137 |
+
# original output stride changes to 16 from here, so we start to control block stride and dilation rate
|
138 |
+
x = _xception_block(x, [728, 728, 728], 'entry_flow_block3',
|
139 |
+
skip_connection_type='conv', stride=origin_os16_stride,
|
140 |
+
depth_activation=False)
|
141 |
+
for i in range(16):
|
142 |
+
x = _xception_block(x, [728, 728, 728], 'middle_flow_unit_{}'.format(i + 1),
|
143 |
+
skip_connection_type='sum', stride=1, rate=origin_os16_block_rate,
|
144 |
+
depth_activation=False)
|
145 |
+
|
146 |
+
# original output stride changes to 32 from here
|
147 |
+
x = _xception_block(x, [728, 1024, 1024], 'exit_flow_block1',
|
148 |
+
skip_connection_type='conv', stride=origin_os32_stride, rate=origin_os16_block_rate,
|
149 |
+
depth_activation=False)
|
150 |
+
x = _xception_block(x, [1536, 1536, 2048], 'exit_flow_block2',
|
151 |
+
skip_connection_type='none', stride=1, rate=origin_os32_block_rate,
|
152 |
+
depth_activation=True)
|
153 |
+
# end of feature extractor
|
154 |
+
|
155 |
+
# Ensure that the model takes into account
|
156 |
+
# any potential predecessors of `input_tensor`.
|
157 |
+
if input_tensor is not None:
|
158 |
+
inputs = get_source_inputs(input_tensor)
|
159 |
+
#else:
|
160 |
+
#inputs = img_input
|
161 |
+
|
162 |
+
backbone_len = len(Model(inputs, x).layers)
|
163 |
+
return x, skip, backbone_len
|
164 |
+
|
165 |
+
|
166 |
+
|
167 |
+
def Deeplabv3pXception(input_shape=(512, 512, 3),
|
168 |
+
weights='pascalvoc',
|
169 |
+
input_tensor=None,
|
170 |
+
num_classes=21,
|
171 |
+
OS=16):
|
172 |
+
""" Instantiates the Deeplabv3+ architecture
|
173 |
+
Optionally loads weights pre-trained
|
174 |
+
on PASCAL VOC. This model is available for TensorFlow only,
|
175 |
+
and can only be used with inputs following the TensorFlow
|
176 |
+
data format `(width, height, channels)`.
|
177 |
+
# Arguments
|
178 |
+
input_shape: shape of input image. format HxWxC
|
179 |
+
PASCAL VOC model was trained on (512,512,3) images
|
180 |
+
weights: pretrained weights type
|
181 |
+
- pascalvoc : pre-trained on PASCAL VOC
|
182 |
+
- None : random initialization
|
183 |
+
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
|
184 |
+
to use as image input for the model.
|
185 |
+
num_classes: number of desired classes.
|
186 |
+
OS: determines input_shape/feature_extractor_output ratio. One of {8,16,32}
|
187 |
+
# Returns
|
188 |
+
A Keras model instance.
|
189 |
+
# Raises
|
190 |
+
RuntimeError: If attempting to run this model with a
|
191 |
+
backend that does not support separable convolutions.
|
192 |
+
ValueError: in case of invalid argument for `weights` or `backbone`
|
193 |
+
"""
|
194 |
+
|
195 |
+
if not (weights in {'pascalvoc', None}):
|
196 |
+
raise ValueError('The `weights` argument should be either '
|
197 |
+
'`None` (random initialization) or `pascalvoc` '
|
198 |
+
'(pre-trained on PASCAL VOC)')
|
199 |
+
|
200 |
+
if input_tensor is None:
|
201 |
+
img_input = Input(shape=input_shape, name='image_input')
|
202 |
+
else:
|
203 |
+
img_input = input_tensor
|
204 |
+
|
205 |
+
# normalize input image
|
206 |
+
img_norm = Lambda(normalize, name='input_normalize')(img_input)
|
207 |
+
|
208 |
+
# backbone body for feature extract
|
209 |
+
x, skip_feature, backbone_len = Xception_body(img_norm, OS)
|
210 |
+
|
211 |
+
# ASPP block
|
212 |
+
x = ASPP_block(x, OS)
|
213 |
+
|
214 |
+
# Deeplabv3+ decoder for feature projection
|
215 |
+
x = Decoder_block(x, skip_feature)
|
216 |
+
|
217 |
+
# Final prediction conv block
|
218 |
+
x = DeeplabConv2D(num_classes, (1, 1), padding='same', name='logits_semantic')(x)
|
219 |
+
x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
|
220 |
+
x = Reshape((input_shape[0]*input_shape[1], num_classes)) (x)
|
221 |
+
x = Softmax(name='Predictions/Softmax')(x)
|
222 |
+
|
223 |
+
# Ensure that the model takes into account
|
224 |
+
# any potential predecessors of `input_tensor`.
|
225 |
+
#if input_tensor is not None:
|
226 |
+
#inputs = get_source_inputs(input_tensor)
|
227 |
+
#else:
|
228 |
+
#inputs = img_input
|
229 |
+
|
230 |
+
model = Model(img_input, x, name='deeplabv3p_xception')
|
231 |
+
|
232 |
+
# load weights
|
233 |
+
if weights == 'pascalvoc':
|
234 |
+
weights_path = get_file('deeplabv3_xception_tf_dim_ordering_tf_kernels.h5',
|
235 |
+
WEIGHTS_PATH_X,
|
236 |
+
cache_subdir='models')
|
237 |
+
model.load_weights(weights_path, by_name=True)
|
238 |
+
return model, backbone_len
|
239 |
+
|
models/deeplab/deeplabv3p/models/layers.py
ADDED
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
from __future__ import division
|
4 |
+
|
5 |
+
from functools import wraps
|
6 |
+
|
7 |
+
from tensorflow.keras import backend as K
|
8 |
+
from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, SeparableConv2D, ZeroPadding2D, Lambda, AveragePooling2D, Concatenate, BatchNormalization, Dropout, ReLU
|
9 |
+
from tensorflow.keras.regularizers import l2
|
10 |
+
import tensorflow as tf
|
11 |
+
|
12 |
+
L2_FACTOR = 2e-5
|
13 |
+
|
14 |
+
@wraps(Conv2D)
|
15 |
+
def DeeplabConv2D(*args, **kwargs):
|
16 |
+
"""Wrapper to set Deeplab parameters for Conv2D."""
|
17 |
+
deeplab_conv_kwargs = {'kernel_regularizer': l2(L2_FACTOR)}
|
18 |
+
deeplab_conv_kwargs['bias_regularizer'] = l2(L2_FACTOR)
|
19 |
+
#deeplab_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
|
20 |
+
deeplab_conv_kwargs.update(kwargs)
|
21 |
+
return Conv2D(*args, **deeplab_conv_kwargs)
|
22 |
+
|
23 |
+
|
24 |
+
@wraps(DepthwiseConv2D)
|
25 |
+
def DeeplabDepthwiseConv2D(*args, **kwargs):
|
26 |
+
"""Wrapper to set Deeplab parameters for DepthwiseConv2D."""
|
27 |
+
deeplab_conv_kwargs = {'kernel_regularizer': l2(L2_FACTOR)}
|
28 |
+
deeplab_conv_kwargs['bias_regularizer'] = l2(L2_FACTOR)
|
29 |
+
#deeplab_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
|
30 |
+
deeplab_conv_kwargs.update(kwargs)
|
31 |
+
return DepthwiseConv2D(*args, **deeplab_conv_kwargs)
|
32 |
+
|
33 |
+
|
34 |
+
@wraps(SeparableConv2D)
|
35 |
+
def DeeplabSeparableConv2D(*args, **kwargs):
|
36 |
+
"""Wrapper to set Deeplab parameters for SeparableConv2D."""
|
37 |
+
deeplab_conv_kwargs = {'kernel_regularizer': l2(L2_FACTOR)}
|
38 |
+
deeplab_conv_kwargs['bias_regularizer'] = l2(L2_FACTOR)
|
39 |
+
#deeplab_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
|
40 |
+
deeplab_conv_kwargs.update(kwargs)
|
41 |
+
return SeparableConv2D(*args, **deeplab_conv_kwargs)
|
42 |
+
|
43 |
+
|
44 |
+
def normalize(x):
|
45 |
+
return x/127.5 - 1
|
46 |
+
|
47 |
+
|
48 |
+
def img_resize(x, size, mode='bilinear'):
|
49 |
+
if mode == 'bilinear':
|
50 |
+
return tf.image.resize(x, size=size, method='bilinear')
|
51 |
+
elif mode == 'nearest':
|
52 |
+
return tf.image.resize(x, size=size, method='nearest')
|
53 |
+
else:
|
54 |
+
raise ValueError('output model file is not specified')
|
55 |
+
|
56 |
+
|
57 |
+
def CustomBatchNormalization(*args, **kwargs):
|
58 |
+
if tf.__version__ >= '2.2':
|
59 |
+
from tensorflow.keras.layers.experimental import SyncBatchNormalization
|
60 |
+
BatchNorm = SyncBatchNormalization
|
61 |
+
else:
|
62 |
+
BatchNorm = BatchNormalization
|
63 |
+
|
64 |
+
return BatchNorm(*args, **kwargs)
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
def SepConv_BN(x, filters, prefix, stride=1, kernel_size=3, rate=1, depth_activation=False, epsilon=1e-3):
|
69 |
+
""" SepConv with BN between depthwise & pointwise. Optionally add activation after BN
|
70 |
+
Implements right "same" padding for even kernel sizes
|
71 |
+
Args:
|
72 |
+
x: input tensor
|
73 |
+
filters: num of filters in pointwise convolution
|
74 |
+
prefix: prefix before name
|
75 |
+
stride: stride at depthwise conv
|
76 |
+
kernel_size: kernel size for depthwise convolution
|
77 |
+
rate: atrous rate for depthwise convolution
|
78 |
+
depth_activation: flag to use activation between depthwise & pointwise convs
|
79 |
+
epsilon: epsilon to use in BN layer
|
80 |
+
"""
|
81 |
+
|
82 |
+
if stride == 1:
|
83 |
+
depth_padding = 'same'
|
84 |
+
else:
|
85 |
+
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
|
86 |
+
pad_total = kernel_size_effective - 1
|
87 |
+
pad_beg = pad_total // 2
|
88 |
+
pad_end = pad_total - pad_beg
|
89 |
+
x = ZeroPadding2D((pad_beg, pad_end))(x)
|
90 |
+
depth_padding = 'valid'
|
91 |
+
|
92 |
+
if not depth_activation:
|
93 |
+
x = ReLU()(x)
|
94 |
+
x = DeeplabDepthwiseConv2D((kernel_size, kernel_size), strides=(stride, stride), dilation_rate=(rate, rate),
|
95 |
+
padding=depth_padding, use_bias=False, name=prefix + '_depthwise')(x)
|
96 |
+
x = CustomBatchNormalization(name=prefix + '_depthwise_BN', epsilon=epsilon)(x)
|
97 |
+
if depth_activation:
|
98 |
+
x = ReLU()(x)
|
99 |
+
x = DeeplabConv2D(filters, (1, 1), padding='same',
|
100 |
+
use_bias=False, name=prefix + '_pointwise')(x)
|
101 |
+
x = CustomBatchNormalization(name=prefix + '_pointwise_BN', epsilon=epsilon)(x)
|
102 |
+
if depth_activation:
|
103 |
+
x = ReLU()(x)
|
104 |
+
|
105 |
+
return x
|
106 |
+
|
107 |
+
|
108 |
+
def ASPP_block(x, OS):
|
109 |
+
"""
|
110 |
+
branching for Atrous Spatial Pyramid Pooling
|
111 |
+
"""
|
112 |
+
if OS == 8:
|
113 |
+
atrous_rates = (12, 24, 36)
|
114 |
+
elif OS == 16:
|
115 |
+
atrous_rates = (6, 12, 18)
|
116 |
+
elif OS == 32:
|
117 |
+
# unofficial hyperparameters, just have a try
|
118 |
+
atrous_rates = (3, 6, 9)
|
119 |
+
else:
|
120 |
+
raise ValueError('invalid output stride', OS)
|
121 |
+
|
122 |
+
# feature map shape, (batch, height, width, channel)
|
123 |
+
feature_shape = x.shape.as_list()
|
124 |
+
|
125 |
+
# Image Feature branch
|
126 |
+
b4 = AveragePooling2D(pool_size=(feature_shape[1], feature_shape[2]))(x)
|
127 |
+
|
128 |
+
b4 = DeeplabConv2D(256, (1, 1), padding='same',
|
129 |
+
use_bias=False, name='image_pooling')(b4)
|
130 |
+
b4 = CustomBatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
|
131 |
+
b4 = ReLU()(b4)
|
132 |
+
b4 = Lambda(img_resize, arguments={'size': (feature_shape[1], feature_shape[2]), 'mode': 'bilinear'}, name='aspp_resize')(b4)
|
133 |
+
|
134 |
+
# simple 1x1
|
135 |
+
b0 = DeeplabConv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x)
|
136 |
+
b0 = CustomBatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
|
137 |
+
b0 = ReLU(name='aspp0_activation')(b0)
|
138 |
+
|
139 |
+
# rate = 6 (12)
|
140 |
+
b1 = SepConv_BN(x, 256, 'aspp1',
|
141 |
+
rate=atrous_rates[0], depth_activation=True, epsilon=1e-5)
|
142 |
+
# rate = 12 (24)
|
143 |
+
b2 = SepConv_BN(x, 256, 'aspp2',
|
144 |
+
rate=atrous_rates[1], depth_activation=True, epsilon=1e-5)
|
145 |
+
# rate = 18 (36)
|
146 |
+
b3 = SepConv_BN(x, 256, 'aspp3',
|
147 |
+
rate=atrous_rates[2], depth_activation=True, epsilon=1e-5)
|
148 |
+
# concatenate ASPP branches & project
|
149 |
+
x = Concatenate()([b4, b0, b1, b2, b3])
|
150 |
+
|
151 |
+
x = DeeplabConv2D(256, (1, 1), padding='same',
|
152 |
+
use_bias=False, name='concat_projection')(x)
|
153 |
+
x = CustomBatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
|
154 |
+
x = ReLU()(x)
|
155 |
+
x = Dropout(0.5)(x)
|
156 |
+
|
157 |
+
return x
|
158 |
+
|
159 |
+
|
160 |
+
def ASPP_Lite_block(x):
|
161 |
+
"""
|
162 |
+
a simplified version of Deeplab ASPP block, which
|
163 |
+
only have global pooling & simple 1x1 conv branch
|
164 |
+
"""
|
165 |
+
# feature map shape, (batch, height, width, channel)
|
166 |
+
feature_shape = x.shape.as_list()
|
167 |
+
|
168 |
+
# Image Feature branch
|
169 |
+
b4 = AveragePooling2D(pool_size=(feature_shape[1], feature_shape[2]))(x)
|
170 |
+
|
171 |
+
b4 = DeeplabConv2D(256, (1, 1), padding='same',
|
172 |
+
use_bias=False, name='image_pooling')(b4)
|
173 |
+
b4 = CustomBatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
|
174 |
+
b4 = ReLU()(b4)
|
175 |
+
b4 = Lambda(img_resize, arguments={'size': (feature_shape[1], feature_shape[2]), 'mode': 'bilinear'}, name='aspp_resize')(b4)
|
176 |
+
|
177 |
+
# simple 1x1 conv
|
178 |
+
b0 = DeeplabConv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x)
|
179 |
+
b0 = CustomBatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
|
180 |
+
b0 = ReLU(name='aspp0_activation')(b0)
|
181 |
+
|
182 |
+
# only 2 branches
|
183 |
+
x = Concatenate()([b4, b0])
|
184 |
+
x = DeeplabConv2D(256, (1, 1), padding='same',
|
185 |
+
use_bias=False, name='concat_projection')(x)
|
186 |
+
x = CustomBatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
|
187 |
+
x = ReLU()(x)
|
188 |
+
x = Dropout(0.5)(x)
|
189 |
+
|
190 |
+
return x
|
191 |
+
|
192 |
+
|
193 |
+
def Decoder_block(x, skip_feature):
|
194 |
+
"""
|
195 |
+
DeepLab v.3+ decoder
|
196 |
+
Feature projection x4 (x2) block
|
197 |
+
"""
|
198 |
+
# skip feature shape, (batch, height, width, channel)
|
199 |
+
skip_shape = skip_feature.shape.as_list()
|
200 |
+
|
201 |
+
x = Lambda(img_resize, arguments={'size': (skip_shape[1], skip_shape[2]), 'mode': 'bilinear'}, name='decoder_resize')(x)
|
202 |
+
|
203 |
+
skip_feature = DeeplabConv2D(48, (1, 1), padding='same',
|
204 |
+
use_bias=False, name='feature_projection0')(skip_feature)
|
205 |
+
skip_feature = CustomBatchNormalization(
|
206 |
+
name='feature_projection0_BN', epsilon=1e-5)(skip_feature)
|
207 |
+
skip_feature = ReLU()(skip_feature)
|
208 |
+
x = Concatenate()([x, skip_feature])
|
209 |
+
x = SepConv_BN(x, 256, 'decoder_conv0',
|
210 |
+
depth_activation=True, epsilon=1e-5)
|
211 |
+
x = SepConv_BN(x, 256, 'decoder_conv1',
|
212 |
+
depth_activation=True, epsilon=1e-5)
|
213 |
+
return x
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
+
#def icnr_weights(init = tf.glorot_normal_initializer(), scale=2, shape=[3,3,32,4], dtype = tf.float32):
|
218 |
+
#sess = tf.Session()
|
219 |
+
#return sess.run(ICNR(init, scale=scale)(shape=shape, dtype=dtype))
|
220 |
+
|
221 |
+
class ICNR:
|
222 |
+
"""ICNR initializer for checkerboard artifact free sub pixel convolution
|
223 |
+
Ref:
|
224 |
+
[1] Andrew Aitken et al. Checkerboard artifact free sub-pixel convolution
|
225 |
+
https://arxiv.org/pdf/1707.02937.pdf)
|
226 |
+
Args:
|
227 |
+
initializer: initializer used for sub kernels (orthogonal, glorot uniform, etc.)
|
228 |
+
scale: scale factor of sub pixel convolution
|
229 |
+
"""
|
230 |
+
|
231 |
+
def __init__(self, initializer, scale=1):
|
232 |
+
self.scale = scale
|
233 |
+
self.initializer = initializer
|
234 |
+
|
235 |
+
def __call__(self, shape, dtype, partition_info=None):
|
236 |
+
shape = list(shape)
|
237 |
+
if self.scale == 1:
|
238 |
+
return self.initializer(shape)
|
239 |
+
|
240 |
+
new_shape = shape[:3] + [shape[3] // (self.scale ** 2)]
|
241 |
+
x = self.initializer(new_shape, dtype, partition_info)
|
242 |
+
x = tf.transpose(x, perm=[2, 0, 1, 3])
|
243 |
+
x = tf.image.resize_nearest_neighbor(x, size=(shape[0] * self.scale, shape[1] * self.scale))
|
244 |
+
x = tf.space_to_depth(x, block_size=self.scale)
|
245 |
+
x = tf.transpose(x, perm=[1, 2, 0, 3])
|
246 |
+
|
247 |
+
return x
|
248 |
+
|
249 |
+
class Subpixel(Conv2D):
|
250 |
+
def __init__(self,
|
251 |
+
filters,
|
252 |
+
kernel_size,
|
253 |
+
r,
|
254 |
+
padding='valid',
|
255 |
+
data_format=None,
|
256 |
+
strides=(1,1),
|
257 |
+
activation=None,
|
258 |
+
use_bias=True,
|
259 |
+
kernel_initializer='glorot_uniform',
|
260 |
+
bias_initializer='zeros',
|
261 |
+
kernel_regularizer=None,
|
262 |
+
bias_regularizer=None,
|
263 |
+
activity_regularizer=None,
|
264 |
+
kernel_constraint=None,
|
265 |
+
bias_constraint=None,
|
266 |
+
**kwargs):
|
267 |
+
super(Subpixel, self).__init__(
|
268 |
+
filters=r*r*filters,
|
269 |
+
kernel_size=kernel_size,
|
270 |
+
strides=strides,
|
271 |
+
padding=padding,
|
272 |
+
data_format=data_format,
|
273 |
+
activation=activation,
|
274 |
+
use_bias=use_bias,
|
275 |
+
kernel_initializer=kernel_initializer,
|
276 |
+
bias_initializer=bias_initializer,
|
277 |
+
kernel_regularizer=kernel_regularizer,
|
278 |
+
bias_regularizer=bias_regularizer,
|
279 |
+
activity_regularizer=activity_regularizer,
|
280 |
+
kernel_constraint=kernel_constraint,
|
281 |
+
bias_constraint=bias_constraint,
|
282 |
+
**kwargs)
|
283 |
+
self.r = r
|
284 |
+
|
285 |
+
def _phase_shift(self, I):
|
286 |
+
r = self.r
|
287 |
+
bsize, a, b, c = I.get_shape().as_list()
|
288 |
+
bsize = K.shape(I)[0] # Handling Dimension(None) type for undefined batch dim
|
289 |
+
X = K.reshape(I, [bsize, a, b, int(c/(r*r)),r, r]) # bsize, a, b, c/(r*r), r, r
|
290 |
+
X = K.permute_dimensions(X, (0, 1, 2, 5, 4, 3)) # bsize, a, b, r, r, c/(r*r)
|
291 |
+
#Keras backend does not support tf.split, so in future versions this could be nicer
|
292 |
+
X = [X[:,i,:,:,:,:] for i in range(a)] # a, [bsize, b, r, r, c/(r*r)
|
293 |
+
X = K.concatenate(X, 2) # bsize, b, a*r, r, c/(r*r)
|
294 |
+
X = [X[:,i,:,:,:] for i in range(b)] # b, [bsize, r, r, c/(r*r)
|
295 |
+
X = K.concatenate(X, 2) # bsize, a*r, b*r, c/(r*r)
|
296 |
+
return X
|
297 |
+
|
298 |
+
def call(self, inputs):
|
299 |
+
return self._phase_shift(super(Subpixel, self).call(inputs))
|
300 |
+
|
301 |
+
def compute_output_shape(self, input_shape):
|
302 |
+
unshifted = super(Subpixel, self).compute_output_shape(input_shape)
|
303 |
+
return (unshifted[0], self.r*unshifted[1], self.r*unshifted[2], int(unshifted[3]/(self.r*self.r)))
|
304 |
+
|
305 |
+
def get_config(self):
|
306 |
+
config = super(Conv2D, self).get_config()
|
307 |
+
config.pop('rank')
|
308 |
+
config.pop('dilation_rate')
|
309 |
+
config['filters']= int(config['filters'] / self.r*self.r)
|
310 |
+
config['r'] = self.r
|
311 |
+
return config
|
models/deeplab/deeplabv3p/postprocess_np.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
import pydensecrf.densecrf as dcrf
|
6 |
+
from pydensecrf.utils import unary_from_labels
|
7 |
+
|
8 |
+
|
9 |
+
# Fully connected CRF post processing function
|
10 |
+
def crf_postprocess(im, mask, zero_unsure=True):
|
11 |
+
colors, labels = np.unique(mask, return_inverse=True)
|
12 |
+
image_size = mask.shape[:2]
|
13 |
+
n_labels = len(set(labels.flat))
|
14 |
+
d = dcrf.DenseCRF2D(image_size[1], image_size[0], n_labels) # width, height, nlabels
|
15 |
+
U = unary_from_labels(labels, n_labels, gt_prob=.7, zero_unsure=zero_unsure)
|
16 |
+
d.setUnaryEnergy(U)
|
17 |
+
# This adds the color-independent term, features are the locations only.
|
18 |
+
d.addPairwiseGaussian(sxy=(3,3), compat=3)
|
19 |
+
# This adds the color-dependent term, i.e. features are (x,y,r,g,b).
|
20 |
+
# im is an image-array, e.g. im.dtype == np.uint8 and im.shape == (640,480,3)
|
21 |
+
d.addPairwiseBilateral(sxy=80, srgb=13, rgbim=im.astype('uint8'), compat=10)
|
22 |
+
Q = d.inference(5) # 5 - num of iterations
|
23 |
+
MAP = np.argmax(Q, axis=0).reshape(image_size)
|
24 |
+
unique_map = np.unique(MAP)
|
25 |
+
result = np.copy(MAP)
|
26 |
+
for u in unique_map: # get original labels back
|
27 |
+
np.putmask(result, MAP == u, colors[u])
|
28 |
+
return result
|
29 |
+
# MAP = crf_postprocess(frame, labels.astype('int32'), zero_unsure=False)
|
30 |
+
|
models/deeplab/eval.py
ADDED
@@ -0,0 +1,565 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
Calculate mIOU for Deeplabv3p model on validation dataset
|
5 |
+
"""
|
6 |
+
import os, argparse, time
|
7 |
+
import numpy as np
|
8 |
+
from PIL import Image
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
import copy
|
11 |
+
import itertools
|
12 |
+
from tqdm import tqdm
|
13 |
+
from collections import OrderedDict
|
14 |
+
import operator
|
15 |
+
from labelme.utils import lblsave as label_save
|
16 |
+
|
17 |
+
from tensorflow.keras.models import load_model
|
18 |
+
import tensorflow.keras.backend as K
|
19 |
+
import tensorflow as tf
|
20 |
+
import MNN
|
21 |
+
import onnxruntime
|
22 |
+
|
23 |
+
from common.utils import get_data_list, get_classes, get_custom_objects, optimize_tf_gpu, visualize_segmentation
|
24 |
+
from deeplabv3p.data import SegmentationGenerator
|
25 |
+
from deeplabv3p.metrics import mIOU
|
26 |
+
from deeplabv3p.postprocess_np import crf_postprocess
|
27 |
+
|
28 |
+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
29 |
+
|
30 |
+
optimize_tf_gpu(tf, K)
|
31 |
+
|
32 |
+
|
33 |
+
def deeplab_predict_keras(model, image_data):
|
34 |
+
prediction = model.predict(image_data)
|
35 |
+
prediction = np.argmax(prediction, axis=-1)
|
36 |
+
return prediction[0]
|
37 |
+
|
38 |
+
|
39 |
+
def deeplab_predict_onnx(model, image_data):
|
40 |
+
input_tensors = []
|
41 |
+
for i, input_tensor in enumerate(model.get_inputs()):
|
42 |
+
input_tensors.append(input_tensor)
|
43 |
+
# assume only 1 input tensor for image
|
44 |
+
assert len(input_tensors) == 1, 'invalid input tensor number.'
|
45 |
+
|
46 |
+
feed = {input_tensors[0].name: image_data}
|
47 |
+
prediction = model.run(None, feed)
|
48 |
+
|
49 |
+
prediction = np.argmax(prediction, axis=-1)
|
50 |
+
return prediction[0]
|
51 |
+
|
52 |
+
|
53 |
+
def deeplab_predict_pb(model, image_data):
|
54 |
+
# NOTE: TF 1.x frozen pb graph need to specify input/output tensor name
|
55 |
+
# so we need to hardcode the input/output tensor names here to get them from model
|
56 |
+
output_tensor_name = 'graph/pred_mask/Softmax:0'
|
57 |
+
|
58 |
+
# assume only 1 input tensor for image
|
59 |
+
input_tensor_name = 'graph/image_input:0'
|
60 |
+
|
61 |
+
# get input/output tensors
|
62 |
+
image_input = model.get_tensor_by_name(input_tensor_name)
|
63 |
+
output_tensor = model.get_tensor_by_name(output_tensor_name)
|
64 |
+
|
65 |
+
with tf.Session(graph=model) as sess:
|
66 |
+
prediction = sess.run(output_tensor, feed_dict={
|
67 |
+
image_input: image_data
|
68 |
+
})
|
69 |
+
prediction = np.argmax(prediction, axis=-1)
|
70 |
+
return prediction[0]
|
71 |
+
|
72 |
+
|
73 |
+
def deeplab_predict_tflite(interpreter, image_data):
|
74 |
+
input_details = interpreter.get_input_details()
|
75 |
+
output_details = interpreter.get_output_details()
|
76 |
+
|
77 |
+
interpreter.set_tensor(input_details[0]['index'], image_data)
|
78 |
+
interpreter.invoke()
|
79 |
+
|
80 |
+
prediction = []
|
81 |
+
for output_detail in output_details:
|
82 |
+
output_data = interpreter.get_tensor(output_detail['index'])
|
83 |
+
prediction.append(output_data)
|
84 |
+
|
85 |
+
prediction = np.argmax(prediction[0], axis=-1)
|
86 |
+
return prediction[0]
|
87 |
+
|
88 |
+
|
89 |
+
def deeplab_predict_mnn(interpreter, session, image_data):
|
90 |
+
from functools import reduce
|
91 |
+
from operator import mul
|
92 |
+
|
93 |
+
# assume only 1 input tensor for image
|
94 |
+
input_tensor = interpreter.getSessionInput(session)
|
95 |
+
# get input shape
|
96 |
+
input_shape = input_tensor.getShape()
|
97 |
+
|
98 |
+
# use a temp tensor to copy data
|
99 |
+
# TODO: currently MNN python binding have mem leak when creating MNN.Tensor
|
100 |
+
# from numpy array, only from tuple is good. So we convert input image to tuple
|
101 |
+
input_elementsize = reduce(mul, input_shape)
|
102 |
+
tmp_input = MNN.Tensor(input_shape, input_tensor.getDataType(),\
|
103 |
+
tuple(image_data.reshape(input_elementsize, -1)), input_tensor.getDimensionType())
|
104 |
+
|
105 |
+
input_tensor.copyFrom(tmp_input)
|
106 |
+
interpreter.runSession(session)
|
107 |
+
|
108 |
+
prediction = []
|
109 |
+
# we only handle single output model
|
110 |
+
output_tensor = interpreter.getSessionOutput(session)
|
111 |
+
output_shape = output_tensor.getShape()
|
112 |
+
|
113 |
+
assert output_tensor.getDataType() == MNN.Halide_Type_Float
|
114 |
+
|
115 |
+
# copy output tensor to host, for further postprocess
|
116 |
+
output_elementsize = reduce(mul, output_shape)
|
117 |
+
tmp_output = MNN.Tensor(output_shape, output_tensor.getDataType(),\
|
118 |
+
tuple(np.zeros(output_shape, dtype=float).reshape(output_elementsize, -1)), output_tensor.getDimensionType())
|
119 |
+
|
120 |
+
output_tensor.copyToHostTensor(tmp_output)
|
121 |
+
#tmp_output.printTensorData()
|
122 |
+
|
123 |
+
output_data = np.array(tmp_output.getData(), dtype=float).reshape(output_shape)
|
124 |
+
# our postprocess code based on TF channel last format, so if the output format
|
125 |
+
# doesn't match, we need to transpose
|
126 |
+
if output_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe:
|
127 |
+
output_data = output_data.transpose((0,2,3,1))
|
128 |
+
elif output_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe_C4:
|
129 |
+
raise ValueError('unsupported output tensor dimension type')
|
130 |
+
|
131 |
+
prediction.append(output_data)
|
132 |
+
prediction = np.argmax(prediction[0], axis=-1)
|
133 |
+
return prediction[0]
|
134 |
+
|
135 |
+
|
136 |
+
def plot_confusion_matrix(cm, classes, mIOU, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
|
137 |
+
if normalize:
|
138 |
+
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
|
139 |
+
trained_classes = classes
|
140 |
+
plt.figure()
|
141 |
+
plt.imshow(cm, interpolation='nearest', cmap=cmap)
|
142 |
+
plt.title(title,fontsize=11)
|
143 |
+
tick_marks = np.arange(len(classes))
|
144 |
+
plt.xticks(np.arange(len(trained_classes)), classes, rotation=90,fontsize=9)
|
145 |
+
plt.yticks(tick_marks, classes,fontsize=9)
|
146 |
+
thresh = cm.max() / 2.
|
147 |
+
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
|
148 |
+
plt.text(j, i, np.round(cm[i, j],2), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black", fontsize=7)
|
149 |
+
plt.tight_layout()
|
150 |
+
plt.ylabel('True label',fontsize=9)
|
151 |
+
plt.xlabel('Predicted label',fontsize=9)
|
152 |
+
|
153 |
+
plt.title('Mean IOU: '+ str(np.round(mIOU*100, 2)))
|
154 |
+
output_path = os.path.join('result','confusion_matrix.png')
|
155 |
+
os.makedirs('result', exist_ok=True)
|
156 |
+
plt.savefig(output_path)
|
157 |
+
#plt.show()
|
158 |
+
return
|
159 |
+
|
160 |
+
|
161 |
+
def adjust_axes(r, t, fig, axes):
|
162 |
+
"""
|
163 |
+
Plot - adjust axes
|
164 |
+
"""
|
165 |
+
# get text width for re-scaling
|
166 |
+
bb = t.get_window_extent(renderer=r)
|
167 |
+
text_width_inches = bb.width / fig.dpi
|
168 |
+
# get axis width in inches
|
169 |
+
current_fig_width = fig.get_figwidth()
|
170 |
+
new_fig_width = current_fig_width + text_width_inches
|
171 |
+
propotion = new_fig_width / current_fig_width
|
172 |
+
# get axis limit
|
173 |
+
x_lim = axes.get_xlim()
|
174 |
+
axes.set_xlim([x_lim[0], x_lim[1]*propotion])
|
175 |
+
|
176 |
+
|
177 |
+
def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar):
|
178 |
+
"""
|
179 |
+
Draw plot using Matplotlib
|
180 |
+
"""
|
181 |
+
# sort the dictionary by decreasing value, into a list of tuples
|
182 |
+
sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
|
183 |
+
# unpacking the list of tuples into two lists
|
184 |
+
sorted_keys, sorted_values = zip(*sorted_dic_by_value)
|
185 |
+
#
|
186 |
+
if true_p_bar != "":
|
187 |
+
"""
|
188 |
+
Special case to draw in (green=true predictions) & (red=false predictions)
|
189 |
+
"""
|
190 |
+
fp_sorted = []
|
191 |
+
tp_sorted = []
|
192 |
+
for key in sorted_keys:
|
193 |
+
fp_sorted.append(dictionary[key] - true_p_bar[key])
|
194 |
+
tp_sorted.append(true_p_bar[key])
|
195 |
+
plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Predictions')
|
196 |
+
plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Predictions', left=fp_sorted)
|
197 |
+
# add legend
|
198 |
+
plt.legend(loc='lower right')
|
199 |
+
"""
|
200 |
+
Write number on side of bar
|
201 |
+
"""
|
202 |
+
fig = plt.gcf() # gcf - get current figure
|
203 |
+
axes = plt.gca()
|
204 |
+
r = fig.canvas.get_renderer()
|
205 |
+
for i, val in enumerate(sorted_values):
|
206 |
+
fp_val = fp_sorted[i]
|
207 |
+
tp_val = tp_sorted[i]
|
208 |
+
fp_str_val = " " + str(fp_val)
|
209 |
+
tp_str_val = fp_str_val + " " + str(tp_val)
|
210 |
+
# trick to paint multicolor with offset:
|
211 |
+
# first paint everything and then repaint the first number
|
212 |
+
t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
|
213 |
+
plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
|
214 |
+
if i == (len(sorted_values)-1): # largest bar
|
215 |
+
adjust_axes(r, t, fig, axes)
|
216 |
+
else:
|
217 |
+
plt.barh(range(n_classes), sorted_values, color=plot_color)
|
218 |
+
"""
|
219 |
+
Write number on side of bar
|
220 |
+
"""
|
221 |
+
fig = plt.gcf() # gcf - get current figure
|
222 |
+
axes = plt.gca()
|
223 |
+
r = fig.canvas.get_renderer()
|
224 |
+
for i, val in enumerate(sorted_values):
|
225 |
+
str_val = " " + str(val) # add a space before
|
226 |
+
if val < 1.0:
|
227 |
+
str_val = " {0:.2f}".format(val)
|
228 |
+
t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold')
|
229 |
+
# re-set axes to show number inside the figure
|
230 |
+
if i == (len(sorted_values)-1): # largest bar
|
231 |
+
adjust_axes(r, t, fig, axes)
|
232 |
+
# set window title
|
233 |
+
fig.canvas.set_window_title(window_title)
|
234 |
+
# write classes in y axis
|
235 |
+
tick_font_size = 12
|
236 |
+
plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
|
237 |
+
"""
|
238 |
+
Re-scale height accordingly
|
239 |
+
"""
|
240 |
+
init_height = fig.get_figheight()
|
241 |
+
# comput the matrix height in points and inches
|
242 |
+
dpi = fig.dpi
|
243 |
+
height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing)
|
244 |
+
height_in = height_pt / dpi
|
245 |
+
# compute the required figure height
|
246 |
+
top_margin = 0.15 # in percentage of the figure height
|
247 |
+
bottom_margin = 0.05 # in percentage of the figure height
|
248 |
+
figure_height = height_in / (1 - top_margin - bottom_margin)
|
249 |
+
# set new height
|
250 |
+
if figure_height > init_height:
|
251 |
+
fig.set_figheight(figure_height)
|
252 |
+
|
253 |
+
# set plot title
|
254 |
+
plt.title(plot_title, fontsize=14)
|
255 |
+
# set axis titles
|
256 |
+
# plt.xlabel('classes')
|
257 |
+
plt.xlabel(x_label, fontsize='large')
|
258 |
+
# adjust size of window
|
259 |
+
fig.tight_layout()
|
260 |
+
# save the plot
|
261 |
+
fig.savefig(output_path)
|
262 |
+
# show image
|
263 |
+
if to_show:
|
264 |
+
plt.show()
|
265 |
+
# close the plot
|
266 |
+
plt.close()
|
267 |
+
|
268 |
+
|
269 |
+
def plot_mIOU_result(IOUs, mIOU, num_classes):
|
270 |
+
'''
|
271 |
+
Draw mIOU plot (Show IOU's of all classes in decreasing order)
|
272 |
+
'''
|
273 |
+
window_title = "mIOU"
|
274 |
+
plot_title = "mIOU: {0:.3f}%".format(mIOU*100)
|
275 |
+
x_label = "Intersection Over Union"
|
276 |
+
output_path = os.path.join('result','mIOU.png')
|
277 |
+
os.makedirs('result', exist_ok=True)
|
278 |
+
draw_plot_func(IOUs, num_classes, window_title, plot_title, x_label, output_path, to_show=False, plot_color='royalblue', true_p_bar='')
|
279 |
+
|
280 |
+
|
281 |
+
def save_seg_result(image, pred_mask, gt_mask, image_id, class_names):
|
282 |
+
# save predict mask as PNG image
|
283 |
+
mask_dir = os.path.join('result','predict_mask')
|
284 |
+
os.makedirs(mask_dir, exist_ok=True)
|
285 |
+
label_save(os.path.join(mask_dir, str(image_id)+'.png'), pred_mask)
|
286 |
+
|
287 |
+
# visualize segmentation result
|
288 |
+
title_str = 'Predict Segmentation\nmIOU: '+str(mIOU(pred_mask, gt_mask))
|
289 |
+
gt_title_str = 'GT Segmentation'
|
290 |
+
image_array = visualize_segmentation(image, pred_mask, gt_mask, class_names=class_names, title=title_str, gt_title=gt_title_str, ignore_count_threshold=1)
|
291 |
+
|
292 |
+
# save result as JPG
|
293 |
+
result_dir = os.path.join('result','segmentation')
|
294 |
+
os.makedirs(result_dir, exist_ok=True)
|
295 |
+
result_file = os.path.join(result_dir, str(image_id)+'.jpg')
|
296 |
+
Image.fromarray(image_array).save(result_file)
|
297 |
+
|
298 |
+
|
299 |
+
def generate_matrix(gt_mask, pre_mask, num_classes):
|
300 |
+
valid = (gt_mask >= 0) & (gt_mask < num_classes)
|
301 |
+
label = num_classes * gt_mask[valid].astype('int') + pre_mask[valid]
|
302 |
+
count = np.bincount(label, minlength=num_classes**2)
|
303 |
+
confusion_matrix = count.reshape(num_classes, num_classes)
|
304 |
+
return confusion_matrix
|
305 |
+
|
306 |
+
|
307 |
+
def eval_mIOU(model, model_format, dataset_path, dataset, class_names, model_input_shape, do_crf=False, save_result=False, show_background=False):
|
308 |
+
num_classes = len(class_names)
|
309 |
+
|
310 |
+
#prepare eval dataset generator
|
311 |
+
eval_generator = SegmentationGenerator(dataset_path, dataset,
|
312 |
+
1, #batch_size
|
313 |
+
num_classes,
|
314 |
+
target_size=model_input_shape[::-1],
|
315 |
+
weighted_type=None,
|
316 |
+
is_eval=True,
|
317 |
+
augment=False)
|
318 |
+
|
319 |
+
if model_format == 'MNN':
|
320 |
+
#MNN inference engine need create session
|
321 |
+
session = model.createSession()
|
322 |
+
|
323 |
+
# confusion matrix for all classes
|
324 |
+
confusion_matrix = np.zeros((num_classes, num_classes), dtype=float)
|
325 |
+
|
326 |
+
# get model prediction result
|
327 |
+
pbar = tqdm(total=len(eval_generator), desc='Eval model')
|
328 |
+
for n, (image_data, y_true) in enumerate(eval_generator):
|
329 |
+
|
330 |
+
# support of tflite model
|
331 |
+
if model_format == 'TFLITE':
|
332 |
+
y_pred = deeplab_predict_tflite(model, image_data)
|
333 |
+
# support of MNN model
|
334 |
+
elif model_format == 'MNN':
|
335 |
+
y_pred =deeplab_predict_mnn(model, session, image_data)
|
336 |
+
# support of TF 1.x frozen pb model
|
337 |
+
elif model_format == 'PB':
|
338 |
+
y_pred = deeplab_predict_pb(model, image_data)
|
339 |
+
# support of ONNX model
|
340 |
+
elif model_format == 'ONNX':
|
341 |
+
y_pred = deeplab_predict_onnx(model, image_data)
|
342 |
+
# normal keras h5 model
|
343 |
+
elif model_format == 'H5':
|
344 |
+
y_pred = deeplab_predict_keras(model, image_data)
|
345 |
+
else:
|
346 |
+
raise ValueError('invalid model format')
|
347 |
+
|
348 |
+
image = image_data[0].astype('uint8')
|
349 |
+
pred_mask = y_pred.reshape(model_input_shape)
|
350 |
+
gt_mask = y_true.reshape(model_input_shape).astype('int')
|
351 |
+
|
352 |
+
# add CRF postprocess
|
353 |
+
if do_crf:
|
354 |
+
pred_mask = crf_postprocess(image, pred_mask, zero_unsure=False)
|
355 |
+
|
356 |
+
# save segmentation result image
|
357 |
+
if save_result:
|
358 |
+
# get eval image name to save corresponding result
|
359 |
+
image_list = eval_generator.get_batch_image_path(n)
|
360 |
+
assert len(image_list) == 1, 'incorrect image batch'
|
361 |
+
image_id = os.path.splitext(os.path.basename(image_list[0]))[0]
|
362 |
+
|
363 |
+
save_seg_result(image, pred_mask, gt_mask, image_id, class_names)
|
364 |
+
|
365 |
+
# update confusion matrix
|
366 |
+
pred_mask = pred_mask.astype('int')
|
367 |
+
gt_mask = gt_mask.astype('int')
|
368 |
+
confusion_matrix += generate_matrix(gt_mask, pred_mask, num_classes)
|
369 |
+
|
370 |
+
# compare prediction result with label
|
371 |
+
# to update confusion matrix
|
372 |
+
#flat_pred = np.ravel(pred_mask).astype('int')
|
373 |
+
#flat_label = np.ravel(gt_mask).astype('int')
|
374 |
+
#for p, l in zip(flat_pred, flat_label):
|
375 |
+
#if l == num_classes or l == 255:
|
376 |
+
#continue
|
377 |
+
#if l < num_classes and p < num_classes:
|
378 |
+
#confusion_matrix[l, p] += 1
|
379 |
+
#else:
|
380 |
+
#print('Invalid entry encountered, skipping! Label: ', l,
|
381 |
+
#' Prediction: ', p)
|
382 |
+
|
383 |
+
pbar.update(1)
|
384 |
+
pbar.close()
|
385 |
+
|
386 |
+
# calculate Pixel accuracy
|
387 |
+
PixelAcc = np.diag(confusion_matrix).sum() / confusion_matrix.sum()
|
388 |
+
|
389 |
+
# calculate Class accuracy
|
390 |
+
ClassAcc = np.diag(confusion_matrix) / confusion_matrix.sum(axis=1)
|
391 |
+
mClassAcc = np.nanmean(ClassAcc)
|
392 |
+
|
393 |
+
# calculate mIoU
|
394 |
+
I = np.diag(confusion_matrix)
|
395 |
+
U = np.sum(confusion_matrix, axis=0) + np.sum(confusion_matrix, axis=1) - I
|
396 |
+
IoU = I/U
|
397 |
+
#mIoU = np.nanmean(IoU)
|
398 |
+
|
399 |
+
# calculate FW (Frequency Weighted) IoU
|
400 |
+
Freq = np.sum(confusion_matrix, axis=1) / np.sum(confusion_matrix)
|
401 |
+
FWIoU = (Freq[Freq > 0] * IoU[Freq > 0]).sum()
|
402 |
+
|
403 |
+
# calculate Dice Coefficient
|
404 |
+
DiceCoef = 2*I / (U+I)
|
405 |
+
|
406 |
+
# collect IOU/ClassAcc/Dice/Freq for every class
|
407 |
+
IOUs, CLASS_ACCs, DICEs, FREQs = {}, {}, {}, {}
|
408 |
+
for i,(class_name, iou, class_acc, dice, freq) in enumerate(zip(class_names, IoU, ClassAcc, DiceCoef, Freq)):
|
409 |
+
IOUs[class_name] = iou
|
410 |
+
CLASS_ACCs[class_name] = class_acc
|
411 |
+
DICEs[class_name] = dice
|
412 |
+
FREQs[class_name] = freq
|
413 |
+
|
414 |
+
if not show_background:
|
415 |
+
#get ride of background class info
|
416 |
+
display_class_names = copy.deepcopy(class_names)
|
417 |
+
display_class_names.remove('background')
|
418 |
+
display_confusion_matrix = copy.deepcopy(confusion_matrix[1:, 1:])
|
419 |
+
IOUs.pop('background')
|
420 |
+
num_classes = num_classes - 1
|
421 |
+
else:
|
422 |
+
display_class_names = class_names
|
423 |
+
display_confusion_matrix = confusion_matrix
|
424 |
+
|
425 |
+
#sort IoU result by value, in descending order
|
426 |
+
IOUs = OrderedDict(sorted(IOUs.items(), key=operator.itemgetter(1), reverse=True))
|
427 |
+
|
428 |
+
#calculate mIOU from final IOU dict
|
429 |
+
mIoU = np.nanmean(list(IOUs.values()))
|
430 |
+
|
431 |
+
#show result
|
432 |
+
print('\nevaluation summary')
|
433 |
+
for class_name, iou in IOUs.items():
|
434 |
+
print('%s: IoU %.4f, Freq %.4f, ClassAcc %.4f, Dice %.4f' % (class_name, iou, FREQs[class_name], CLASS_ACCs[class_name], DICEs[class_name]))
|
435 |
+
print('mIoU=%.3f' % (mIoU*100))
|
436 |
+
print('FWIoU=%.3f' % (FWIoU*100))
|
437 |
+
print('PixelAcc=%.3f' % (PixelAcc*100))
|
438 |
+
print('mClassAcc=%.3f' % (mClassAcc*100))
|
439 |
+
|
440 |
+
|
441 |
+
# Plot mIOU & confusion matrix
|
442 |
+
plot_mIOU_result(IOUs, mIoU, num_classes)
|
443 |
+
plot_confusion_matrix(display_confusion_matrix, display_class_names, mIoU, normalize=True)
|
444 |
+
|
445 |
+
return mIoU
|
446 |
+
|
447 |
+
|
448 |
+
|
449 |
+
#load TF 1.x frozen pb graph
|
450 |
+
def load_graph(model_path):
|
451 |
+
# We parse the graph_def file
|
452 |
+
with tf.gfile.GFile(model_path, "rb") as f:
|
453 |
+
graph_def = tf.GraphDef()
|
454 |
+
graph_def.ParseFromString(f.read())
|
455 |
+
|
456 |
+
# We load the graph_def in the default graph
|
457 |
+
with tf.Graph().as_default() as graph:
|
458 |
+
tf.import_graph_def(
|
459 |
+
graph_def,
|
460 |
+
input_map=None,
|
461 |
+
return_elements=None,
|
462 |
+
name="graph",
|
463 |
+
op_dict=None,
|
464 |
+
producer_op_list=None
|
465 |
+
)
|
466 |
+
return graph
|
467 |
+
|
468 |
+
|
469 |
+
def load_eval_model(model_path):
|
470 |
+
# support of tflite model
|
471 |
+
if model_path.endswith('.tflite'):
|
472 |
+
from tensorflow.lite.python import interpreter as interpreter_wrapper
|
473 |
+
model = interpreter_wrapper.Interpreter(model_path=model_path)
|
474 |
+
model.allocate_tensors()
|
475 |
+
model_format = 'TFLITE'
|
476 |
+
|
477 |
+
# support of MNN model
|
478 |
+
elif model_path.endswith('.mnn'):
|
479 |
+
model = MNN.Interpreter(model_path)
|
480 |
+
model_format = 'MNN'
|
481 |
+
|
482 |
+
# support of TF 1.x frozen pb model
|
483 |
+
elif model_path.endswith('.pb'):
|
484 |
+
model = load_graph(model_path)
|
485 |
+
model_format = 'PB'
|
486 |
+
|
487 |
+
# support of ONNX model
|
488 |
+
elif model_path.endswith('.onnx'):
|
489 |
+
model = onnxruntime.InferenceSession(model_path)
|
490 |
+
model_format = 'ONNX'
|
491 |
+
|
492 |
+
# normal keras h5 model
|
493 |
+
elif model_path.endswith('.h5'):
|
494 |
+
custom_object_dict = get_custom_objects()
|
495 |
+
|
496 |
+
model = load_model(model_path, compile=False, custom_objects=custom_object_dict)
|
497 |
+
model_format = 'H5'
|
498 |
+
K.set_learning_phase(0)
|
499 |
+
else:
|
500 |
+
raise ValueError('invalid model file')
|
501 |
+
|
502 |
+
return model, model_format
|
503 |
+
|
504 |
+
|
505 |
+
def main():
|
506 |
+
parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description='evaluate Deeplab model (h5/pb/tflite/mnn) with test dataset')
|
507 |
+
'''
|
508 |
+
Command line options
|
509 |
+
'''
|
510 |
+
parser.add_argument(
|
511 |
+
'--model_path', type=str, required=True,
|
512 |
+
help='path to model file')
|
513 |
+
|
514 |
+
parser.add_argument(
|
515 |
+
'--dataset_path', type=str, required=True,
|
516 |
+
help='dataset path containing images and label png file')
|
517 |
+
|
518 |
+
parser.add_argument(
|
519 |
+
'--dataset_file', type=str, required=True,
|
520 |
+
help='eval samples txt file')
|
521 |
+
|
522 |
+
parser.add_argument(
|
523 |
+
'--classes_path', type=str, required=False, default='configs/voc_classes.txt',
|
524 |
+
help='path to class definitions, default=%(default)s')
|
525 |
+
|
526 |
+
parser.add_argument(
|
527 |
+
'--model_input_shape', type=str,
|
528 |
+
help='model image input size as <height>x<width>, default=%(default)s', default='512x512')
|
529 |
+
|
530 |
+
parser.add_argument(
|
531 |
+
'--do_crf', action="store_true",
|
532 |
+
help='whether to add CRF postprocess for model output', default=False)
|
533 |
+
|
534 |
+
parser.add_argument(
|
535 |
+
'--show_background', default=False, action="store_true",
|
536 |
+
help='Show background evaluation info')
|
537 |
+
|
538 |
+
parser.add_argument(
|
539 |
+
'--save_result', default=False, action="store_true",
|
540 |
+
help='Save the segmentaion result image in result/segmentation dir')
|
541 |
+
|
542 |
+
args = parser.parse_args()
|
543 |
+
|
544 |
+
# param parse
|
545 |
+
height, width = args.model_input_shape.split('x')
|
546 |
+
model_input_shape = (int(height), int(width))
|
547 |
+
|
548 |
+
# add background class to match model & GT
|
549 |
+
class_names = get_classes(args.classes_path)
|
550 |
+
assert len(class_names) < 254, 'PNG image label only support less than 254 classes.'
|
551 |
+
class_names = ['background'] + class_names
|
552 |
+
|
553 |
+
model, model_format = load_eval_model(args.model_path)
|
554 |
+
|
555 |
+
# get dataset list
|
556 |
+
dataset = get_data_list(args.dataset_file)
|
557 |
+
|
558 |
+
start = time.time()
|
559 |
+
eval_mIOU(model, model_format, args.dataset_path, dataset, class_names, model_input_shape, args.do_crf, args.save_result, args.show_background)
|
560 |
+
end = time.time()
|
561 |
+
print("Evaluation time cost: {:.6f}s".format(end - start))
|
562 |
+
|
563 |
+
|
564 |
+
if __name__ == '__main__':
|
565 |
+
main()
|
models/deeplab/example/2007_000039.jpg
ADDED
![]() |
models/deeplab/example/2007_000039.png
ADDED
![]() |
models/deeplab/example/2007_000346.jpg
ADDED
![]() |
models/deeplab/example/2007_000346.png
ADDED
![]() |
models/deeplab/example/air.jpg
ADDED
![]() |
models/deeplab/example/car.jpg
ADDED
![]() |