giantmonkeyTC commited on
Commit
c2ca15f
·
1 Parent(s): 2bbd8e5
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. .circleci/config.yml +35 -0
  3. .circleci/docker/Dockerfile +13 -0
  4. .circleci/test.yml +199 -0
  5. .dev_scripts/benchmark_full_models.txt +26 -0
  6. .dev_scripts/benchmark_options.py +11 -0
  7. .dev_scripts/benchmark_train_models.txt +13 -0
  8. .dev_scripts/covignore.cfg +6 -0
  9. .dev_scripts/diff_coverage_test.sh +42 -0
  10. .dev_scripts/gather_models.py +229 -0
  11. .dev_scripts/gen_benchmark_script.py +193 -0
  12. .dev_scripts/linter.sh +3 -0
  13. .dev_scripts/test_benchmark.sh +128 -0
  14. .dev_scripts/train_benchmark.sh +128 -0
  15. .gitignore +137 -0
  16. .pre-commit-config-zh-cn.yaml +50 -0
  17. .pre-commit-config.yaml +50 -0
  18. .readthedocs.yml +14 -0
  19. CITATION.cff +8 -0
  20. LICENSE +203 -0
  21. MANIFEST.in +6 -0
  22. README_zh-CN.md +427 -0
  23. configs/.DS_Store +0 -0
  24. configs/3dssd/3dssd_4xb4_kitti-3d-car.py +119 -0
  25. configs/3dssd/README.md +45 -0
  26. configs/3dssd/metafile.yml +29 -0
  27. configs/_base_/datasets/kitti-3d-3class.py +167 -0
  28. configs/_base_/datasets/kitti-3d-car.py +165 -0
  29. configs/_base_/datasets/kitti-mono3d.py +100 -0
  30. configs/_base_/datasets/lyft-3d-range100.py +150 -0
  31. configs/_base_/datasets/lyft-3d.py +160 -0
  32. configs/_base_/datasets/nuim-instance.py +70 -0
  33. configs/_base_/datasets/nus-3d.py +169 -0
  34. configs/_base_/datasets/nus-mono3d.py +119 -0
  35. configs/_base_/datasets/s3dis-3d.py +134 -0
  36. configs/_base_/datasets/s3dis-seg.py +169 -0
  37. configs/_base_/datasets/scannet-3d.py +141 -0
  38. configs/_base_/datasets/scannet-seg.py +164 -0
  39. configs/_base_/datasets/semantickitti.py +224 -0
  40. configs/_base_/datasets/sunrgbd-3d.py +126 -0
  41. configs/_base_/datasets/waymoD3-fov-mono3d-3class.py +184 -0
  42. configs/_base_/datasets/waymoD3-mv-mono3d-3class.py +191 -0
  43. configs/_base_/datasets/waymoD5-3d-3class.py +178 -0
  44. configs/_base_/datasets/waymoD5-3d-car.py +173 -0
  45. configs/_base_/datasets/waymoD5-fov-mono3d-3class.py +163 -0
  46. configs/_base_/datasets/waymoD5-mv-mono3d-3class.py +163 -0
  47. configs/_base_/datasets/waymoD5-mv3d-3class.py +178 -0
  48. configs/_base_/default_runtime.py +23 -0
  49. configs/_base_/models/3dssd.py +76 -0
  50. configs/_base_/models/cascade-mask-rcnn_r50_fpn.py +199 -0
.DS_Store ADDED
Binary file (12.3 kB). View file
 
.circleci/config.yml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2.1
2
+
3
+ # this allows you to use CircleCI's dynamic configuration feature
4
+ setup: true
5
+
6
+ # the path-filtering orb is required to continue a pipeline based on
7
+ # the path of an updated fileset
8
+ orbs:
9
+ path-filtering: circleci/[email protected]
10
+
11
+ workflows:
12
+ # the always-run workflow is always triggered, regardless of the pipeline parameters.
13
+ always-run:
14
+ jobs:
15
+ # the path-filtering/filter job determines which pipeline
16
+ # parameters to update.
17
+ - path-filtering/filter:
18
+ name: check-updated-files
19
+ # 3-column, whitespace-delimited mapping. One mapping per
20
+ # line:
21
+ # <regex path-to-test> <parameter-to-set> <value-of-pipeline-parameter>
22
+ mapping: |
23
+ mmdet3d/.* lint_only false
24
+ requirements/.* lint_only false
25
+ tests/.* lint_only false
26
+ tools/.* lint_only false
27
+ configs/.* lint_only false
28
+ .circleci/.* lint_only false
29
+ projects/.* lint_only false
30
+ base-revision: dev-1.x
31
+ # this is the path of the configuration we should trigger once
32
+ # path filtering and pipeline parameter value updates are
33
+ # complete. In this case, we are using the parent dynamic
34
+ # configuration itself.
35
+ config-path: .circleci/test.yml
.circleci/docker/Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG PYTORCH="1.8.1"
2
+ ARG CUDA="10.2"
3
+ ARG CUDNN="7"
4
+
5
+ ARG DEBIAN_FRONTEND=noninteractive
6
+
7
+ FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
8
+
9
+ # To fix GPG key error when running apt-get update
10
+ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
11
+ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
12
+
13
+ RUN apt-get update && apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx
.circleci/test.yml ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2.1
2
+
3
+ # the default pipeline parameters, which will be updated according to
4
+ # the results of the path-filtering orb
5
+ parameters:
6
+ lint_only:
7
+ type: boolean
8
+ default: true
9
+
10
+ jobs:
11
+ lint:
12
+ docker:
13
+ - image: cimg/python:3.7.4
14
+ steps:
15
+ - checkout
16
+ - run:
17
+ name: Install pre-commit hook
18
+ command: |
19
+ pip install pre-commit
20
+ pre-commit install
21
+ - run:
22
+ name: Linting
23
+ command: pre-commit run --all-files
24
+ - run:
25
+ name: Check docstring coverage
26
+ command: |
27
+ pip install interrogate
28
+ interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-magic --ignore-regex "__repr__" --fail-under 90 mmdet3d
29
+
30
+ build_cpu:
31
+ parameters:
32
+ # The python version must match available image tags in
33
+ # https://circleci.com/developer/images/image/cimg/python
34
+ python:
35
+ type: string
36
+ torch:
37
+ type: string
38
+ torchvision:
39
+ type: string
40
+ docker:
41
+ - image: cimg/python:<< parameters.python >>
42
+ resource_class: large
43
+ steps:
44
+ - checkout
45
+ - run:
46
+ name: Install Libraries
47
+ command: |
48
+ sudo apt-get update
49
+ sudo apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx libjpeg-dev zlib1g-dev libtinfo-dev libncurses5
50
+ - run:
51
+ name: Configure Python & pip
52
+ command: |
53
+ pip install --upgrade pip
54
+ pip install wheel
55
+ - run:
56
+ name: Install PyTorch
57
+ command: pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
58
+ - when:
59
+ condition:
60
+ equal: ["3.9.0", << parameters.python >>]
61
+ steps:
62
+ - run: pip install "protobuf <= 3.20.1" && sudo apt-get update && sudo apt-get -y install libprotobuf-dev protobuf-compiler cmake
63
+ - run:
64
+ name: Install mmdet3d dependencies
65
+ command: |
66
+ pip install git+ssh://[email protected]/open-mmlab/mmengine.git@main
67
+ pip install -U openmim
68
+ mim install 'mmcv >= 2.0.0rc4'
69
+ pip install git+ssh://[email protected]/open-mmlab/[email protected]
70
+ pip install -r requirements/tests.txt
71
+ - run:
72
+ name: Build and install
73
+ command: |
74
+ pip install -e .
75
+ - run:
76
+ name: Run unittests
77
+ command: |
78
+ coverage run --branch --source mmdet3d -m pytest tests/
79
+ coverage xml
80
+ coverage report -m
81
+
82
+ build_cuda:
83
+ parameters:
84
+ torch:
85
+ type: string
86
+ cuda:
87
+ type: enum
88
+ enum: ["10.2", "11.7"]
89
+ cudnn:
90
+ type: integer
91
+ default: 8
92
+ machine:
93
+ image: linux-cuda-11:default
94
+ # docker_layer_caching: true
95
+ resource_class: gpu.nvidia.small.multi
96
+ steps:
97
+ - checkout
98
+ - run:
99
+ name: Install nvidia-container-toolkit and Restart Docker
100
+ command: |
101
+ sudo apt-get update
102
+ sudo apt-get install -y nvidia-container-toolkit
103
+ sudo systemctl restart docker
104
+ - run:
105
+ # Cloning repos in VM since Docker doesn't have access to the private key
106
+ name: Clone Repos
107
+ command: |
108
+ git clone -b main --depth 1 ssh://[email protected]/open-mmlab/mmengine.git /home/circleci/mmengine
109
+ git clone -b dev-3.x --depth 1 ssh://[email protected]/open-mmlab/mmdetection.git /home/circleci/mmdetection
110
+ - run:
111
+ name: Build Docker image
112
+ command: |
113
+ docker build .circleci/docker -t mmdet3d:gpu --build-arg PYTORCH=<< parameters.torch >> --build-arg CUDA=<< parameters.cuda >> --build-arg CUDNN=<< parameters.cudnn >>
114
+ docker run --gpus all -t -d -v /home/circleci/project:/mmdetection3d -v /home/circleci/mmengine:/mmengine -v /home/circleci/mmdetection:/mmdetection -w /mmdetection3d --name mmdet3d mmdet3d:gpu
115
+ docker exec mmdet3d apt-get install -y git
116
+ - run:
117
+ name: Install mmdet3d dependencies
118
+ command: |
119
+ docker exec mmdet3d pip install -e /mmengine
120
+ docker exec mmdet3d pip install -U openmim
121
+ docker exec mmdet3d mim install 'mmcv >= 2.0.0rc4'
122
+ docker exec mmdet3d pip install -e /mmdetection
123
+ docker exec mmdet3d pip install -r requirements/tests.txt
124
+ - run:
125
+ name: Build and install
126
+ command: |
127
+ docker exec mmdet3d pip install -e .
128
+ - run:
129
+ name: Run unittests
130
+ command: |
131
+ docker exec mmdet3d pytest tests/
132
+
133
+ workflows:
134
+ pr_stage_lint:
135
+ when: << pipeline.parameters.lint_only >>
136
+ jobs:
137
+ - lint:
138
+ name: lint
139
+ filters:
140
+ branches:
141
+ ignore:
142
+ - dev-1.x
143
+ pr_stage_test:
144
+ when:
145
+ not: << pipeline.parameters.lint_only >>
146
+ jobs:
147
+ - lint:
148
+ name: lint
149
+ filters:
150
+ branches:
151
+ ignore:
152
+ - dev-1.x
153
+ - build_cpu:
154
+ name: minimum_version_cpu
155
+ torch: 1.8.1
156
+ torchvision: 0.9.1
157
+ python: 3.7.4 # The lowest python 3.7.x version available on CircleCI images
158
+ requires:
159
+ - lint
160
+ - build_cpu:
161
+ name: maximum_version_cpu
162
+ torch: 2.0.0
163
+ torchvision: 0.15.1
164
+ python: 3.9.0
165
+ requires:
166
+ - minimum_version_cpu
167
+ - hold:
168
+ type: approval
169
+ requires:
170
+ - maximum_version_cpu
171
+ - build_cuda:
172
+ name: mainstream_version_gpu
173
+ torch: 1.8.1
174
+ # Use double quotation mark to explicitly specify its type
175
+ # as string instead of number
176
+ cuda: "10.2"
177
+ cudnn: 7
178
+ requires:
179
+ - hold
180
+ - build_cuda:
181
+ name: maximum_version_gpu
182
+ torch: 2.0.0
183
+ cuda: "11.7"
184
+ cudnn: 8
185
+ requires:
186
+ - hold
187
+ merge_stage_test:
188
+ when:
189
+ not: << pipeline.parameters.lint_only >>
190
+ jobs:
191
+ - build_cuda:
192
+ name: minimum_version_gpu
193
+ torch: 1.8.1
194
+ cuda: "10.2"
195
+ cudnn: 7
196
+ filters:
197
+ branches:
198
+ only:
199
+ - dev-1.x
.dev_scripts/benchmark_full_models.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ configs/3dssd/3dssd_4xb4_kitti-3d-car.py
2
+ configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn_8xb4-cyclic-20e_nus-3d.py
3
+ configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
4
+ configs/fcaf3d/fcaf3d_2xb8_s3dis-3d-5class.py
5
+ configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py
6
+ configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py
7
+ configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py
8
+ configs/h3dnet/h3dnet_8xb3_scannet-seg.py
9
+ configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py
10
+ configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py
11
+ configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py
12
+ configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
13
+ configs/paconv/paconv_ssg_8xb8-cosine-150e_s3dis-seg.py
14
+ configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py
15
+ configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py
16
+ configs/point_rcnn/point-rcnn_8xb2_kitti-3d-3class.py
17
+ configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
18
+ configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
19
+ configs/pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py
20
+ configs/pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class.py
21
+ configs/regnet/pointpillars_hv_regnet-1.6gf_fpn_sbn-all_8xb4-2x_nus-3d.py
22
+ configs/second/second_hv_secfpn_8xb6-80e_kitti-3d-3class.py
23
+ configs/second/second_hv_secfpn_8xb6-amp-80e_kitti-3d-3class.py
24
+ configs/smoke/smoke_dla34_dlaneck_gn-all_4xb8-6x_kitti-mono3d.py
25
+ configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_nus-3d.py
26
+ configs/votenet/votenet_8xb8_scannet-3d.py
.dev_scripts/benchmark_options.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ third_part_libs = [
4
+ 'conda install openblas-devel -c anaconda',
5
+ "pip install -U git+https://github.com/NVIDIA/MinkowskiEngine -v --no-deps --install-option='--blas_include_dirs=/opt/conda/include' --install-option='--blas=openblas'" # noqa
6
+ ]
7
+ default_floating_range = 0.5
8
+ model_floating_ranges = {
9
+ 'configs/pointpillars/pointpillars_hv_secfpn_sbn-all_16xb2-2x_waymoD5-3d-3class.py': # noqa
10
+ 0.7
11
+ }
.dev_scripts/benchmark_train_models.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ configs/3dssd/3dssd_4xb4_kitti-3d-car.py
2
+ configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn_8xb4-cyclic-20e_nus-3d.py
3
+ configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
4
+ configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py
5
+ configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
6
+ configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
7
+ configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
8
+ configs/pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py
9
+ configs/pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class.py
10
+ configs/second/second_hv_secfpn_8xb6-80e_kitti-3d-3class.py
11
+ configs/second/second_hv_secfpn_8xb6-amp-80e_kitti-3d-3class.py
12
+ configs/smoke/smoke_dla34_dlaneck_gn-all_4xb8-6x_kitti-mono3d.py
13
+ configs/votenet/votenet_8xb8_scannet-3d.py
.dev_scripts/covignore.cfg ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Each line should be the relative path to the root directory
2
+ # of this repo. Support regular expression as well.
3
+ # For example:
4
+ # .*/utils.py
5
+
6
+ .*/__init__.py
.dev_scripts/diff_coverage_test.sh ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ readarray -t IGNORED_FILES < $( dirname "$0" )/covignore.cfg
4
+
5
+
6
+ REUSE_COVERAGE_REPORT=${REUSE_COVERAGE_REPORT:-0}
7
+ REPO=${1:-"origin"}
8
+ BRANCH=${2:-"refactor_dev"}
9
+
10
+ git fetch $REPO $BRANCH
11
+
12
+ PY_FILES=""
13
+ for FILE_NAME in $(git diff --name-only ${REPO}/${BRANCH}); do
14
+ # Only test python files in mmdet3d/ existing in current branch, and not ignored in covignore.cfg
15
+ if [ ${FILE_NAME: -3} == ".py" ] && [ ${FILE_NAME:0:8} == "mmdet3d/" ] && [ -f "$FILE_NAME" ]; then
16
+ IGNORED=false
17
+ for IGNORED_FILE_NAME in "${IGNORED_FILES[@]}"; do
18
+ # Skip blank lines
19
+ if [ -z "$IGNORED_FILE_NAME" ]; then
20
+ continue
21
+ fi
22
+ if [ "${IGNORED_FILE_NAME::1}" != "#" ] && [[ "$FILE_NAME" =~ $IGNORED_FILE_NAME ]]; then
23
+ echo "Ignoring $FILE_NAME"
24
+ IGNORED=true
25
+ break
26
+ fi
27
+ done
28
+ if [ "$IGNORED" = false ]; then
29
+ PY_FILES="$PY_FILES $FILE_NAME"
30
+ fi
31
+ fi
32
+ done
33
+
34
+ # Only test the coverage when PY_FILES are not empty, otherwise they will test the entire project
35
+ if [ ! -z "${PY_FILES}" ]
36
+ then
37
+ if [ "$REUSE_COVERAGE_REPORT" == "0" ]; then
38
+ coverage run --branch --source mmdet3d -m pytest tests/
39
+ fi
40
+ coverage report --fail-under 80 -m $PY_FILES
41
+ interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-magic --ignore-regex "__repr__" --fail-under 95 $PY_FILES
42
+ fi
.dev_scripts/gather_models.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ """Script to gather benchmarked models and prepare them for upload.
3
+
4
+ Usage:
5
+ python gather_models.py ${root_path} ${out_dir}
6
+
7
+ Example:
8
+ python gather_models.py \
9
+ work_dirs/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d \
10
+ work_dirs/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d
11
+
12
+ Note that before running the above command, rename the directory with the
13
+ config name if you did not use the default directory name, create
14
+ a corresponding directory 'pgd' under the above path and put the used config
15
+ into it.
16
+ """
17
+
18
+ import argparse
19
+ import glob
20
+ import json
21
+ import shutil
22
+ import subprocess
23
+ from os import path as osp
24
+
25
+ import mmengine
26
+ import torch
27
+
28
+ # build schedule look-up table to automatically find the final model
29
+ SCHEDULES_LUT = {
30
+ '_1x_': 12,
31
+ '_2x_': 24,
32
+ '_20e_': 20,
33
+ '_3x_': 36,
34
+ '_4x_': 48,
35
+ '_24e_': 24,
36
+ '_6x_': 73,
37
+ '_50e_': 50,
38
+ '_80e_': 80,
39
+ '_100e_': 100,
40
+ '_150e_': 150,
41
+ '_200e_': 200,
42
+ '_250e_': 250,
43
+ '_400e_': 400
44
+ }
45
+
46
+ # TODO: add support for lyft dataset
47
+ RESULTS_LUT = {
48
+ 'coco': ['bbox_mAP', 'segm_mAP'],
49
+ 'nus': ['pts_bbox_NuScenes/NDS', 'NDS'],
50
+ 'kitti-3d-3class': ['KITTI/Overall_3D_moderate', 'Overall_3D_moderate'],
51
+ 'kitti-3d-car': ['KITTI/Car_3D_moderate_strict', 'Car_3D_moderate_strict'],
52
+ 'lyft': ['score'],
53
+ 'scannet_seg': ['miou'],
54
+ 's3dis_seg': ['miou'],
55
+ 'scannet': ['mAP_0.50'],
56
+ 'sunrgbd': ['mAP_0.50'],
57
+ 'kitti-mono3d': [
58
+ 'img_bbox/KITTI/Car_3D_AP40_moderate_strict',
59
+ 'Car_3D_AP40_moderate_strict'
60
+ ],
61
+ 'nus-mono3d': ['img_bbox_NuScenes/NDS', 'NDS']
62
+ }
63
+
64
+
65
+ def get_model_dataset(log_json_path):
66
+ for key in RESULTS_LUT:
67
+ if log_json_path.find(key) != -1:
68
+ return key
69
+
70
+
71
+ def process_checkpoint(in_file, out_file):
72
+ checkpoint = torch.load(in_file, map_location='cpu')
73
+ # remove optimizer for smaller file size
74
+ if 'optimizer' in checkpoint:
75
+ del checkpoint['optimizer']
76
+ # if it is necessary to remove some sensitive data in checkpoint['meta'],
77
+ # add the code here.
78
+ torch.save(checkpoint, out_file)
79
+ sha = subprocess.check_output(['sha256sum', out_file]).decode()
80
+ final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
81
+ subprocess.Popen(['mv', out_file, final_file])
82
+ return final_file
83
+
84
+
85
+ def get_final_epoch(config):
86
+ if config.find('grid_rcnn') != -1 and config.find('2x') != -1:
87
+ # grid_rcnn 2x trains 25 epochs
88
+ return 25
89
+
90
+ for schedule_name, epoch_num in SCHEDULES_LUT.items():
91
+ if config.find(schedule_name) != -1:
92
+ return epoch_num
93
+
94
+
95
+ def get_best_results(log_json_path):
96
+ dataset = get_model_dataset(log_json_path)
97
+ max_dict = dict()
98
+ max_memory = 0
99
+ with open(log_json_path, 'r') as f:
100
+ for line in f.readlines():
101
+ log_line = json.loads(line)
102
+ if 'mode' not in log_line.keys():
103
+ continue
104
+
105
+ # record memory and find best results & epochs
106
+ if log_line['mode'] == 'train' \
107
+ and max_memory <= log_line['memory']:
108
+ max_memory = log_line['memory']
109
+
110
+ elif log_line['mode'] == 'val':
111
+ result_dict = {
112
+ key: log_line[key]
113
+ for key in RESULTS_LUT[dataset] if key in log_line
114
+ }
115
+ if len(max_dict) == 0:
116
+ max_dict = result_dict
117
+ max_dict['epoch'] = log_line['epoch']
118
+ elif all(
119
+ [max_dict[key] <= result_dict[key]
120
+ for key in result_dict]):
121
+ max_dict.update(result_dict)
122
+ max_dict['epoch'] = log_line['epoch']
123
+
124
+ max_dict['memory'] = max_memory
125
+ return max_dict
126
+
127
+
128
+ def parse_args():
129
+ parser = argparse.ArgumentParser(description='Gather benchmarked models')
130
+ parser.add_argument(
131
+ 'root',
132
+ type=str,
133
+ help='root path of benchmarked models to be gathered')
134
+ parser.add_argument(
135
+ 'out', type=str, help='output path of gathered models to be stored')
136
+
137
+ args = parser.parse_args()
138
+ return args
139
+
140
+
141
+ def main():
142
+ args = parse_args()
143
+ models_root = args.root
144
+ models_out = args.out
145
+ mmengine.mkdir_or_exist(models_out)
146
+
147
+ # find all models in the root directory to be gathered
148
+ raw_configs = list(mmengine.scandir('./configs', '.py', recursive=True))
149
+
150
+ # filter configs that is not trained in the experiments dir
151
+ used_configs = []
152
+ for raw_config in raw_configs:
153
+ if osp.exists(osp.join(models_root, raw_config)):
154
+ used_configs.append(raw_config)
155
+ print(f'Find {len(used_configs)} models to be gathered')
156
+
157
+ # find final_ckpt and log file for trained each config
158
+ # and parse the best performance
159
+ model_infos = []
160
+ for used_config in used_configs:
161
+ # get logs
162
+ log_json_path = glob.glob(osp.join(models_root, '*.log.json'))[0]
163
+ log_txt_path = glob.glob(osp.join(models_root, '*.log'))[0]
164
+ model_performance = get_best_results(log_json_path)
165
+ final_epoch = model_performance['epoch']
166
+ final_model = 'epoch_{}.pth'.format(final_epoch)
167
+ model_path = osp.join(models_root, final_model)
168
+
169
+ # skip if the model is still training
170
+ if not osp.exists(model_path):
171
+ print(f'Expected {model_path} does not exist!')
172
+ continue
173
+
174
+ if model_performance is None:
175
+ print(f'Obtained no performance for model {used_config}')
176
+ continue
177
+
178
+ model_time = osp.split(log_txt_path)[-1].split('.')[0]
179
+ model_infos.append(
180
+ dict(
181
+ config=used_config,
182
+ results=model_performance,
183
+ epochs=final_epoch,
184
+ model_time=model_time,
185
+ log_json_path=osp.split(log_json_path)[-1]))
186
+
187
+ # publish model for each checkpoint
188
+ publish_model_infos = []
189
+ for model in model_infos:
190
+ model_publish_dir = osp.join(models_out, model['config'].rstrip('.py'))
191
+ mmengine.mkdir_or_exist(model_publish_dir)
192
+
193
+ model_name = model['config'].split('/')[-1].rstrip(
194
+ '.py') + '_' + model['model_time']
195
+ publish_model_path = osp.join(model_publish_dir, model_name)
196
+ trained_model_path = osp.join(models_root,
197
+ 'epoch_{}.pth'.format(model['epochs']))
198
+
199
+ # convert model
200
+ final_model_path = process_checkpoint(trained_model_path,
201
+ publish_model_path)
202
+
203
+ # copy log
204
+ shutil.copy(
205
+ osp.join(models_root, model['log_json_path']),
206
+ osp.join(model_publish_dir, f'{model_name}.log.json'))
207
+ shutil.copy(
208
+ osp.join(models_root, model['log_json_path'].rstrip('.json')),
209
+ osp.join(model_publish_dir, f'{model_name}.log'))
210
+
211
+ # copy config to guarantee reproducibility
212
+ config_path = model['config']
213
+ config_path = osp.join(
214
+ 'configs',
215
+ config_path) if 'configs' not in config_path else config_path
216
+ target_cconfig_path = osp.split(config_path)[-1]
217
+ shutil.copy(config_path,
218
+ osp.join(model_publish_dir, target_cconfig_path))
219
+
220
+ model['model_path'] = final_model_path
221
+ publish_model_infos.append(model)
222
+
223
+ models = dict(models=publish_model_infos)
224
+ print(f'Totally gathered {len(publish_model_infos)} models')
225
+ mmengine.dump(models, osp.join(models_out, 'model_info.json'))
226
+
227
+
228
+ if __name__ == '__main__':
229
+ main()
.dev_scripts/gen_benchmark_script.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import re
3
+ from os import path as osp
4
+
5
+
6
+ def parse_args():
7
+ parser = argparse.ArgumentParser(
8
+ description='Generate benchmark training/testing scripts')
9
+ parser.add_argument(
10
+ '--input_file',
11
+ required=False,
12
+ type=str,
13
+ help='Input file containing the paths '
14
+ 'of configs to be trained/tested.')
15
+ parser.add_argument(
16
+ '--output_file',
17
+ required=True,
18
+ type=str,
19
+ help='Output file containing the '
20
+ 'commands to train/test selected models.')
21
+ parser.add_argument(
22
+ '--gpus_per_node',
23
+ type=int,
24
+ default=8,
25
+ help='GPUs per node config for slurm, '
26
+ 'should be set according to your slurm environment')
27
+ parser.add_argument(
28
+ '--cpus_per_task',
29
+ type=int,
30
+ default=5,
31
+ help='CPUs per task config for slurm, '
32
+ 'should be set according to your slurm environment')
33
+ parser.add_argument(
34
+ '--gpus',
35
+ type=int,
36
+ default=8,
37
+ help='Totally used num of GPUs config for slurm (in testing), '
38
+ 'should be set according to your slurm environment')
39
+ parser.add_argument(
40
+ '--mode', type=str, default='train', help='Train or test')
41
+ parser.add_argument(
42
+ '--long_work_dir',
43
+ action='store_true',
44
+ help='Whether use full relative path of config as work dir')
45
+ parser.add_argument(
46
+ '--max_keep_ckpts',
47
+ type=int,
48
+ default=1,
49
+ help='The max number of checkpoints saved in training')
50
+ parser.add_argument(
51
+ '--full_log',
52
+ action='store_true',
53
+ help='Whether save full log in a file')
54
+
55
+ args = parser.parse_args()
56
+ return args
57
+
58
+
59
+ args = parse_args()
60
+ assert args.mode in ['train', 'test'], 'Currently we only support ' \
61
+ 'automatically generating training or testing scripts.'
62
+
63
+ config_paths = []
64
+
65
+ if args.input_file is not None:
66
+ with open(args.input_file, 'r') as fi:
67
+ config_paths = fi.read().strip().split('\n')
68
+ else:
69
+ while True:
70
+ print('Please type a config path and '
71
+ 'press enter (press enter directly to exit):')
72
+ config_path = input()
73
+ if config_path != '':
74
+ config_paths.append(config_path)
75
+ else:
76
+ break
77
+
78
+ script = '''PARTITION=$1
79
+ CHECKPOINT_DIR=$2
80
+
81
+ '''
82
+
83
+ if args.mode == 'train':
84
+ for i, config_path in enumerate(config_paths):
85
+ root_dir = osp.dirname(osp.dirname(osp.abspath(__file__)))
86
+ if not osp.exists(osp.join(root_dir, config_path)):
87
+ print(f'Invalid config path (does not exist):\n{config_path}')
88
+ continue
89
+
90
+ config_name = config_path.split('/')[-1][:-3]
91
+ match_obj = re.match(r'^.*_[0-9]+x([0-9]+)_.*$', config_name)
92
+ if match_obj is None:
93
+ print(f'Invalid config path (no GPU num in '
94
+ f'config name):\n{config_path}')
95
+ continue
96
+
97
+ gpu_num = int(match_obj.group(1))
98
+ work_dir_name = config_path if args.long_work_dir else config_name
99
+
100
+ script += f"echo '{config_path}' &\n"
101
+ if args.full_log:
102
+ script += f'mkdir -p $CHECKPOINT_DIR/{work_dir_name}\n'
103
+
104
+ # training commands
105
+ script += f'GPUS={gpu_num} GPUS_PER_NODE={args.gpus_per_node} ' \
106
+ f'CPUS_PER_TASK={args.cpus_per_task} ' \
107
+ f'./tools/slurm_train.sh $PARTITION {config_name} ' \
108
+ f'{config_path} \\\n'
109
+ script += f'$CHECKPOINT_DIR/{work_dir_name} --cfg-options ' \
110
+ f'checkpoint_config.max_keep_ckpts=' \
111
+ f'{args.max_keep_ckpts} \\\n' \
112
+
113
+ # if output full log, redirect stdout and stderr to
114
+ # another log file in work dir
115
+ if args.full_log:
116
+ script += f'2>&1|tee $CHECKPOINT_DIR/{work_dir_name}' \
117
+ f'/FULL_LOG.txt &\n'
118
+ else:
119
+ script += '>/dev/null &\n'
120
+
121
+ if i != len(config_paths) - 1:
122
+ script += '\n'
123
+
124
+ print(f'Successfully generated script for {config_name}')
125
+
126
+ with open(args.output_file, 'w') as fo:
127
+ fo.write(script)
128
+
129
+ elif args.mode == 'test':
130
+ for i, config_path in enumerate(config_paths):
131
+ root_dir = osp.dirname(osp.dirname(osp.abspath(__file__)))
132
+ if not osp.exists(osp.join(root_dir, config_path)):
133
+ print(f'Invalid config path (does not exist):\n{config_path}')
134
+ continue
135
+
136
+ config_name = config_path.split('/')[-1][:-3]
137
+
138
+ tasks = {
139
+ 'scannet_seg', 'scannet', 's3dis_seg', 'sunrgbd', 'kitti', 'nus',
140
+ 'lyft', 'waymo'
141
+ }
142
+ eval_option = None
143
+ for task in tasks:
144
+ if task in config_name:
145
+ eval_option = task
146
+ break
147
+ if eval_option is None:
148
+ print(f'Invalid config path (invalid task):\n{config_path}')
149
+ continue
150
+
151
+ work_dir_name = config_path if args.long_work_dir else config_name
152
+
153
+ script += f"echo '{config_path}' &\n"
154
+ if args.full_log:
155
+ script += f'mkdir -p $CHECKPOINT_DIR/{work_dir_name}\n'
156
+
157
+ # training commands
158
+ script += f'GPUS={args.gpus} GPUS_PER_NODE={args.gpus_per_node} ' \
159
+ f'CPUS_PER_TASK={args.cpus_per_task} ' \
160
+ f'./tools/slurm_test.sh $PARTITION {config_name} ' \
161
+ f'{config_path} \\\n'
162
+ script += f'$CHECKPOINT_DIR/{work_dir_name}/latest.pth ' \
163
+
164
+ if eval_option in ['scannet_seg', 's3dis_seg']:
165
+ script += '--eval mIoU \\\n'
166
+ elif eval_option in ['scannet', 'sunrgbd', 'kitti', 'nus']:
167
+ script += '--eval map \\\n'
168
+ elif eval_option in ['lyft']:
169
+ script += f'--format-only --eval-options jsonfile_prefix=' \
170
+ f'$CHECKPOINT_DIR/{work_dir_name}/results_challenge ' \
171
+ f'csv_savepath=$CHECKPOINT_DIR/{work_dir_name}/' \
172
+ f'results_challenge.csv \\\n'
173
+ elif eval_option in ['waymo']:
174
+ script += f'--eval waymo --eval-options pklfile_prefix=' \
175
+ f'$CHECKPOINT_DIR/{work_dir_name}/kitti_results ' \
176
+ f'submission_prefix=$CHECKPOINT_DIR/{work_dir_name}/' \
177
+ f'kitti_results \\\n'
178
+
179
+ # if output full log, redirect stdout and stderr to
180
+ # another log file in work dir
181
+ if args.full_log:
182
+ script += f'2>&1|tee $CHECKPOINT_DIR/{work_dir_name}' \
183
+ f'/FULL_LOG.txt &\n'
184
+ else:
185
+ script += '>/dev/null &\n'
186
+
187
+ if i != len(config_paths) - 1:
188
+ script += '\n'
189
+
190
+ print(f'Successfully generated script for {config_name}')
191
+
192
+ with open(args.output_file, 'w') as fo:
193
+ fo.write(script)
.dev_scripts/linter.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ yapf -r -i mmdet3d/ configs/ tests/ tools/
2
+ isort mmdet3d/ configs/ tests/ tools/
3
+ flake8 .
.dev_scripts/test_benchmark.sh ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PARTITION=$1
2
+ CHECKPOINT_DIR=$2
3
+
4
+ echo 'configs/3dssd/3dssd_4xb4_kitti-3d-car.py' &
5
+ mkdir -p $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py
6
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION 3dssd_4x4_kitti-3d-car configs/3dssd/3dssd_4xb4_kitti-3d-car.py \
7
+ $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py/latest.pth --eval map \
8
+ 2>&1|tee $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py/FULL_LOG.txt &
9
+
10
+ echo 'configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py' &
11
+ mkdir -p $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py
12
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION centerpoint_02pillar_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py \
13
+ $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py/latest.pth --eval map \
14
+ 2>&1|tee $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py/FULL_LOG.txt &
15
+
16
+ echo 'configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py' &
17
+ mkdir -p $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
18
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py \
19
+ $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py/latest.pth --eval map \
20
+ 2>&1|tee $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py/FULL_LOG.txt &
21
+
22
+ echo 'configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py' &
23
+ mkdir -p $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py
24
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py \
25
+ $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py/latest.pth --eval map \
26
+ 2>&1|tee $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py/FULL_LOG.txt &
27
+
28
+ echo 'configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py' &
29
+ mkdir -p $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py
30
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py \
31
+ $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py/latest.pth --eval map \
32
+ 2>&1|tee $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py/FULL_LOG.txt &
33
+
34
+ echo 'configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py' &
35
+ mkdir -p $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py
36
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py \
37
+ $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py/latest.pth --eval map \
38
+ 2>&1|tee $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py/FULL_LOG.txt &
39
+
40
+ echo 'configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py' &
41
+ mkdir -p $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py
42
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION groupfree3d_8x4_scannet-3d-18class-L6-O256 configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py \
43
+ $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py/latest.pth --eval map \
44
+ 2>&1|tee $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py/FULL_LOG.txt &
45
+
46
+ echo 'configs/h3dnet/h3dnet_8xb3_scannet-seg.py' &
47
+ mkdir -p $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py
48
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION h3dnet_3x8_scannet-3d-18class configs/h3dnet/h3dnet_8xb3_scannet-seg.py \
49
+ $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py/latest.pth --eval map \
50
+ 2>&1|tee $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py/FULL_LOG.txt &
51
+
52
+ echo 'configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py' &
53
+ mkdir -p $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py
54
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py \
55
+ $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py/latest.pth --eval map \
56
+ 2>&1|tee $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py/FULL_LOG.txt &
57
+
58
+ echo 'configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py' &
59
+ mkdir -p $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py
60
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION imvotenet_stage2_16x8_sunrgbd-3d-10class configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py \
61
+ $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py/latest.pth --eval map \
62
+ 2>&1|tee $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py/FULL_LOG.txt &
63
+
64
+ echo 'configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py' &
65
+ mkdir -p $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py
66
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION imvoxelnet_4x8_kitti-3d-car configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py \
67
+ $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py/latest.pth --eval map \
68
+ 2>&1|tee $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py/FULL_LOG.txt &
69
+
70
+ echo 'configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py' &
71
+ mkdir -p $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
72
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py \
73
+ $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py/latest.pth --eval map \
74
+ 2>&1|tee $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py/FULL_LOG.txt &
75
+
76
+ echo 'configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py' &
77
+ mkdir -p $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py
78
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py \
79
+ $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py/latest.pth --eval map \
80
+ 2>&1|tee $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py/FULL_LOG.txt &
81
+
82
+ echo 'configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py' &
83
+ mkdir -p $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
84
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION pointnet2_msg_16x2_cosine_80e_s3dis_seg-3d-13class configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py \
85
+ $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py/latest.pth --eval mIoU \
86
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py/FULL_LOG.txt &
87
+
88
+ echo 'configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py' &
89
+ mkdir -p $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
90
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION pointnet2_msg_16x2_cosine_250e_scannet_seg-3d-20class configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py \
91
+ $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py/latest.pth --eval map \
92
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py/FULL_LOG.txt &
93
+
94
+ echo 'configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py' &
95
+ mkdir -p $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py
96
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_pointpillars_fpn_sbn-all_2x8_2x_lyft-3d configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py \
97
+ $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/latest.pth --format-only --eval-options jsonfile_prefix=$CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/results_challenge csv_savepath=$CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/results_challenge.csv \
98
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/FULL_LOG.txt &
99
+
100
+ echo 'configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py' &
101
+ mkdir -p $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py
102
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py \
103
+ $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/latest.pth --eval waymo --eval-options pklfile_prefix=$CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/kitti_results submission_prefix=$CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/kitti_results \
104
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/FULL_LOG.txt &
105
+
106
+ echo 'configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py' &
107
+ mkdir -p $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py
108
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py \
109
+ $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py/latest.pth --eval map \
110
+ 2>&1|tee $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py/FULL_LOG.txt &
111
+
112
+ echo 'configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py' &
113
+ mkdir -p $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py
114
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_second_secfpn_6x8_80e_kitti-3d-3class configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py \
115
+ $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py/latest.pth --eval map \
116
+ 2>&1|tee $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py/FULL_LOG.txt &
117
+
118
+ echo 'configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py' &
119
+ mkdir -p $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py
120
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py \
121
+ $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/latest.pth --format-only --eval-options jsonfile_prefix=$CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/results_challenge csv_savepath=$CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/results_challenge.csv \
122
+ 2>&1|tee $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/FULL_LOG.txt &
123
+
124
+ echo 'configs/votenet/votenet_8xb8_scannet-3d.py' &
125
+ mkdir -p $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py
126
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION votenet_8x8_scannet-3d-18class configs/votenet/votenet_8xb8_scannet-3d.py \
127
+ $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py/latest.pth --eval map \
128
+ 2>&1|tee $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py/FULL_LOG.txt &
.dev_scripts/train_benchmark.sh ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PARTITION=$1
2
+ CHECKPOINT_DIR=$2
3
+
4
+ echo 'configs/3dssd/3dssd_4xb4_kitti-3d-car.py' &
5
+ mkdir -p $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py
6
+ GPUS=4 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION 3dssd_4x4_kitti-3d-car configs/3dssd/3dssd_4xb4_kitti-3d-car.py \
7
+ $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
8
+ 2>&1|tee $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py/FULL_LOG.txt &
9
+
10
+ echo 'configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py' &
11
+ mkdir -p $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py
12
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION centerpoint_02pillar_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py \
13
+ $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
14
+ 2>&1|tee $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py/FULL_LOG.txt &
15
+
16
+ echo 'configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py' &
17
+ mkdir -p $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
18
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py \
19
+ $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
20
+ 2>&1|tee $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py/FULL_LOG.txt &
21
+
22
+ echo 'configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py' &
23
+ mkdir -p $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py
24
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py \
25
+ $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
26
+ 2>&1|tee $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py/FULL_LOG.txt &
27
+
28
+ echo 'configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py' &
29
+ mkdir -p $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py
30
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py \
31
+ $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
32
+ 2>&1|tee $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py/FULL_LOG.txt &
33
+
34
+ echo 'configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py' &
35
+ mkdir -p $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py
36
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py \
37
+ $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
38
+ 2>&1|tee $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py/FULL_LOG.txt &
39
+
40
+ echo 'configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py' &
41
+ mkdir -p $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py
42
+ GPUS=4 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION groupfree3d_8x4_scannet-3d-18class-L6-O256 configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py \
43
+ $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
44
+ 2>&1|tee $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py/FULL_LOG.txt &
45
+
46
+ echo 'configs/h3dnet/h3dnet_8xb3_scannet-seg.py' &
47
+ mkdir -p $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py
48
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION h3dnet_3x8_scannet-3d-18class configs/h3dnet/h3dnet_8xb3_scannet-seg.py \
49
+ $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
50
+ 2>&1|tee $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py/FULL_LOG.txt &
51
+
52
+ echo 'configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py' &
53
+ mkdir -p $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py
54
+ GPUS=4 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py \
55
+ $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
56
+ 2>&1|tee $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py/FULL_LOG.txt &
57
+
58
+ echo 'configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py' &
59
+ mkdir -p $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py
60
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION imvotenet_stage2_16x8_sunrgbd-3d-10class configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py \
61
+ $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
62
+ 2>&1|tee $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py/FULL_LOG.txt &
63
+
64
+ echo 'configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py' &
65
+ mkdir -p $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py
66
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION imvoxelnet_4x8_kitti-3d-car configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py \
67
+ $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
68
+ 2>&1|tee $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py/FULL_LOG.txt &
69
+
70
+ echo 'configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py' &
71
+ mkdir -p $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
72
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py \
73
+ $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
74
+ 2>&1|tee $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py/FULL_LOG.txt &
75
+
76
+ echo 'configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py' &
77
+ mkdir -p $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py
78
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py \
79
+ $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
80
+ 2>&1|tee $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py/FULL_LOG.txt &
81
+
82
+ echo 'configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py' &
83
+ mkdir -p $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
84
+ GPUS=2 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION pointnet2_msg_16x2_cosine_80e_s3dis_seg-3d-13class configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py \
85
+ $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
86
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py/FULL_LOG.txt &
87
+
88
+ echo 'configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py' &
89
+ mkdir -p $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
90
+ GPUS=2 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION pointnet2_msg_16x2_cosine_250e_scannet_seg-3d-20class configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py \
91
+ $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
92
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py/FULL_LOG.txt &
93
+
94
+ echo 'configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py' &
95
+ mkdir -p $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py
96
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_pointpillars_fpn_sbn-all_2x8_2x_lyft-3d configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py \
97
+ $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
98
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/FULL_LOG.txt &
99
+
100
+ echo 'configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py' &
101
+ mkdir -p $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py
102
+ GPUS=16 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py \
103
+ $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
104
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/FULL_LOG.txt &
105
+
106
+ echo 'configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py' &
107
+ mkdir -p $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py
108
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py \
109
+ $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
110
+ 2>&1|tee $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py/FULL_LOG.txt &
111
+
112
+ echo 'configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py' &
113
+ mkdir -p $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py
114
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_second_secfpn_6x8_80e_kitti-3d-3class configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py \
115
+ $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
116
+ 2>&1|tee $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py/FULL_LOG.txt &
117
+
118
+ echo 'configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py' &
119
+ mkdir -p $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py
120
+ GPUS=16 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py \
121
+ $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
122
+ 2>&1|tee $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/FULL_LOG.txt &
123
+
124
+ echo 'configs/votenet/votenet_8xb8_scannet-3d.py' &
125
+ mkdir -p $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py
126
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION votenet_8x8_scannet-3d-18class configs/votenet/votenet_8xb8_scannet-3d.py \
127
+ $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
128
+ 2>&1|tee $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py/FULL_LOG.txt &
.gitignore ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.ipynb
6
+
7
+ # C extensions
8
+ *.so
9
+
10
+ # Distribution / packaging
11
+ .Python
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *.cover
48
+ .hypothesis/
49
+ .pytest_cache/
50
+
51
+ # Translations
52
+ *.mo
53
+ *.pot
54
+
55
+ # Django stuff:
56
+ *.log
57
+ local_settings.py
58
+ db.sqlite3
59
+
60
+ # Flask stuff:
61
+ instance/
62
+ .webassets-cache
63
+
64
+ # Scrapy stuff:
65
+ .scrapy
66
+
67
+ # Sphinx documentation
68
+ docs/en/_build/
69
+ docs/zh_cn/_build/
70
+
71
+ # PyBuilder
72
+ target/
73
+
74
+ # Jupyter Notebook
75
+ .ipynb_checkpoints
76
+
77
+ # pyenv
78
+ .python-version
79
+
80
+ # celery beat schedule file
81
+ celerybeat-schedule
82
+
83
+ # SageMath parsed files
84
+ *.sage.py
85
+
86
+ # Environments
87
+ .env
88
+ .venv
89
+ env/
90
+ venv/
91
+ ENV/
92
+ env.bak/
93
+ venv.bak/
94
+
95
+ # Spyder project settings
96
+ .spyderproject
97
+ .spyproject
98
+
99
+ # Rope project settings
100
+ .ropeproject
101
+
102
+ # mkdocs documentation
103
+ /site
104
+
105
+ # mypy
106
+ .mypy_cache/
107
+
108
+ # cython generated cpp
109
+ data
110
+ .vscode
111
+ .idea
112
+
113
+ # custom
114
+ *.pkl
115
+ *.pkl.json
116
+ *.log.json
117
+ work_dirs/
118
+ exps/
119
+ *~
120
+ mmdet3d/.mim
121
+
122
+ # Pytorch
123
+ *.pth
124
+
125
+ # demo
126
+ *.jpg
127
+ *.png
128
+ data/s3dis/Stanford3dDataset_v1.2_Aligned_Version/
129
+ data/scannet/scans/
130
+ data/sunrgbd/OFFICIAL_SUNRGBD/
131
+ *.obj
132
+ *.ply
133
+
134
+ # Waymo evaluation
135
+ mmdet3d/evaluation/functional/waymo_utils/compute_detection_metrics_main
136
+ mmdet3d/evaluation/functional/waymo_utils/compute_detection_let_metrics_main
137
+ mmdet3d/evaluation/functional/waymo_utils/compute_segmentation_metrics_main
.pre-commit-config-zh-cn.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://gitee.com/openmmlab/mirrors-flake8
3
+ rev: 5.0.4
4
+ hooks:
5
+ - id: flake8
6
+ - repo: https://gitee.com/openmmlab/mirrors-isort
7
+ rev: 5.11.5
8
+ hooks:
9
+ - id: isort
10
+ - repo: https://gitee.com/openmmlab/mirrors-yapf
11
+ rev: v0.32.0
12
+ hooks:
13
+ - id: yapf
14
+ - repo: https://gitee.com/openmmlab/mirrors-pre-commit-hooks
15
+ rev: v4.3.0
16
+ hooks:
17
+ - id: trailing-whitespace
18
+ - id: check-yaml
19
+ - id: end-of-file-fixer
20
+ - id: requirements-txt-fixer
21
+ - id: double-quote-string-fixer
22
+ - id: check-merge-conflict
23
+ - id: fix-encoding-pragma
24
+ args: ["--remove"]
25
+ - id: mixed-line-ending
26
+ args: ["--fix=lf"]
27
+ - repo: https://gitee.com/openmmlab/mirrors-codespell
28
+ rev: v2.2.1
29
+ hooks:
30
+ - id: codespell
31
+ - repo: https://gitee.com/openmmlab/mirrors-mdformat
32
+ rev: 0.7.9
33
+ hooks:
34
+ - id: mdformat
35
+ args: ["--number"]
36
+ additional_dependencies:
37
+ - mdformat-openmmlab
38
+ - mdformat_frontmatter
39
+ - linkify-it-py
40
+ - repo: https://gitee.com/openmmlab/mirrors-docformatter
41
+ rev: v1.3.1
42
+ hooks:
43
+ - id: docformatter
44
+ args: ["--in-place", "--wrap-descriptions", "79"]
45
+ - repo: https://gitee.com/openmmlab/pre-commit-hooks
46
+ rev: v0.2.0
47
+ hooks:
48
+ - id: check-algo-readme
49
+ - id: check-copyright
50
+ args: ["mmdet3d"]
.pre-commit-config.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/PyCQA/flake8
3
+ rev: 5.0.4
4
+ hooks:
5
+ - id: flake8
6
+ - repo: https://github.com/PyCQA/isort
7
+ rev: 5.11.5
8
+ hooks:
9
+ - id: isort
10
+ - repo: https://github.com/pre-commit/mirrors-yapf
11
+ rev: v0.32.0
12
+ hooks:
13
+ - id: yapf
14
+ - repo: https://github.com/pre-commit/pre-commit-hooks
15
+ rev: v4.3.0
16
+ hooks:
17
+ - id: trailing-whitespace
18
+ - id: check-yaml
19
+ - id: end-of-file-fixer
20
+ - id: requirements-txt-fixer
21
+ - id: double-quote-string-fixer
22
+ - id: check-merge-conflict
23
+ - id: fix-encoding-pragma
24
+ args: ["--remove"]
25
+ - id: mixed-line-ending
26
+ args: ["--fix=lf"]
27
+ - repo: https://github.com/codespell-project/codespell
28
+ rev: v2.2.1
29
+ hooks:
30
+ - id: codespell
31
+ - repo: https://github.com/executablebooks/mdformat
32
+ rev: 0.7.9
33
+ hooks:
34
+ - id: mdformat
35
+ args: [ "--number" ]
36
+ additional_dependencies:
37
+ - mdformat-openmmlab
38
+ - mdformat_frontmatter
39
+ - linkify-it-py
40
+ - repo: https://github.com/myint/docformatter
41
+ rev: v1.3.1
42
+ hooks:
43
+ - id: docformatter
44
+ args: ["--in-place", "--wrap-descriptions", "79"]
45
+ - repo: https://github.com/open-mmlab/pre-commit-hooks
46
+ rev: v0.2.0 # Use the ref you want to point at
47
+ hooks:
48
+ - id: check-algo-readme
49
+ - id: check-copyright
50
+ args: ["mmdet3d"] # replace the dir_to_check with your expected directory to check
.readthedocs.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2
2
+
3
+ build:
4
+ os: ubuntu-22.04
5
+ tools:
6
+ python: "3.8"
7
+
8
+ formats:
9
+ - epub
10
+
11
+ python:
12
+ install:
13
+ - requirements: requirements/docs.txt
14
+ - requirements: requirements/readthedocs.txt
CITATION.cff ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ cff-version: 1.2.0
2
+ message: "If you use this software, please cite it as below."
3
+ authors:
4
+ - name: "MMDetection3D Contributors"
5
+ title: "OpenMMLab's Next-generation Platform for General 3D Object Detection"
6
+ date-released: 2020-07-23
7
+ url: "https://github.com/open-mmlab/mmdetection3d"
8
+ license: Apache-2.0
LICENSE ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2018-2019 Open-MMLab. All rights reserved.
2
+
3
+ Apache License
4
+ Version 2.0, January 2004
5
+ http://www.apache.org/licenses/
6
+
7
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8
+
9
+ 1. Definitions.
10
+
11
+ "License" shall mean the terms and conditions for use, reproduction,
12
+ and distribution as defined by Sections 1 through 9 of this document.
13
+
14
+ "Licensor" shall mean the copyright owner or entity authorized by
15
+ the copyright owner that is granting the License.
16
+
17
+ "Legal Entity" shall mean the union of the acting entity and all
18
+ other entities that control, are controlled by, or are under common
19
+ control with that entity. For the purposes of this definition,
20
+ "control" means (i) the power, direct or indirect, to cause the
21
+ direction or management of such entity, whether by contract or
22
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
23
+ outstanding shares, or (iii) beneficial ownership of such entity.
24
+
25
+ "You" (or "Your") shall mean an individual or Legal Entity
26
+ exercising permissions granted by this License.
27
+
28
+ "Source" form shall mean the preferred form for making modifications,
29
+ including but not limited to software source code, documentation
30
+ source, and configuration files.
31
+
32
+ "Object" form shall mean any form resulting from mechanical
33
+ transformation or translation of a Source form, including but
34
+ not limited to compiled object code, generated documentation,
35
+ and conversions to other media types.
36
+
37
+ "Work" shall mean the work of authorship, whether in Source or
38
+ Object form, made available under the License, as indicated by a
39
+ copyright notice that is included in or attached to the work
40
+ (an example is provided in the Appendix below).
41
+
42
+ "Derivative Works" shall mean any work, whether in Source or Object
43
+ form, that is based on (or derived from) the Work and for which the
44
+ editorial revisions, annotations, elaborations, or other modifications
45
+ represent, as a whole, an original work of authorship. For the purposes
46
+ of this License, Derivative Works shall not include works that remain
47
+ separable from, or merely link (or bind by name) to the interfaces of,
48
+ the Work and Derivative Works thereof.
49
+
50
+ "Contribution" shall mean any work of authorship, including
51
+ the original version of the Work and any modifications or additions
52
+ to that Work or Derivative Works thereof, that is intentionally
53
+ submitted to Licensor for inclusion in the Work by the copyright owner
54
+ or by an individual or Legal Entity authorized to submit on behalf of
55
+ the copyright owner. For the purposes of this definition, "submitted"
56
+ means any form of electronic, verbal, or written communication sent
57
+ to the Licensor or its representatives, including but not limited to
58
+ communication on electronic mailing lists, source code control systems,
59
+ and issue tracking systems that are managed by, or on behalf of, the
60
+ Licensor for the purpose of discussing and improving the Work, but
61
+ excluding communication that is conspicuously marked or otherwise
62
+ designated in writing by the copyright owner as "Not a Contribution."
63
+
64
+ "Contributor" shall mean Licensor and any individual or Legal Entity
65
+ on behalf of whom a Contribution has been received by Licensor and
66
+ subsequently incorporated within the Work.
67
+
68
+ 2. Grant of Copyright License. Subject to the terms and conditions of
69
+ this License, each Contributor hereby grants to You a perpetual,
70
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
71
+ copyright license to reproduce, prepare Derivative Works of,
72
+ publicly display, publicly perform, sublicense, and distribute the
73
+ Work and such Derivative Works in Source or Object form.
74
+
75
+ 3. Grant of Patent License. Subject to the terms and conditions of
76
+ this License, each Contributor hereby grants to You a perpetual,
77
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
78
+ (except as stated in this section) patent license to make, have made,
79
+ use, offer to sell, sell, import, and otherwise transfer the Work,
80
+ where such license applies only to those patent claims licensable
81
+ by such Contributor that are necessarily infringed by their
82
+ Contribution(s) alone or by combination of their Contribution(s)
83
+ with the Work to which such Contribution(s) was submitted. If You
84
+ institute patent litigation against any entity (including a
85
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
86
+ or a Contribution incorporated within the Work constitutes direct
87
+ or contributory patent infringement, then any patent licenses
88
+ granted to You under this License for that Work shall terminate
89
+ as of the date such litigation is filed.
90
+
91
+ 4. Redistribution. You may reproduce and distribute copies of the
92
+ Work or Derivative Works thereof in any medium, with or without
93
+ modifications, and in Source or Object form, provided that You
94
+ meet the following conditions:
95
+
96
+ (a) You must give any other recipients of the Work or
97
+ Derivative Works a copy of this License; and
98
+
99
+ (b) You must cause any modified files to carry prominent notices
100
+ stating that You changed the files; and
101
+
102
+ (c) You must retain, in the Source form of any Derivative Works
103
+ that You distribute, all copyright, patent, trademark, and
104
+ attribution notices from the Source form of the Work,
105
+ excluding those notices that do not pertain to any part of
106
+ the Derivative Works; and
107
+
108
+ (d) If the Work includes a "NOTICE" text file as part of its
109
+ distribution, then any Derivative Works that You distribute must
110
+ include a readable copy of the attribution notices contained
111
+ within such NOTICE file, excluding those notices that do not
112
+ pertain to any part of the Derivative Works, in at least one
113
+ of the following places: within a NOTICE text file distributed
114
+ as part of the Derivative Works; within the Source form or
115
+ documentation, if provided along with the Derivative Works; or,
116
+ within a display generated by the Derivative Works, if and
117
+ wherever such third-party notices normally appear. The contents
118
+ of the NOTICE file are for informational purposes only and
119
+ do not modify the License. You may add Your own attribution
120
+ notices within Derivative Works that You distribute, alongside
121
+ or as an addendum to the NOTICE text from the Work, provided
122
+ that such additional attribution notices cannot be construed
123
+ as modifying the License.
124
+
125
+ You may add Your own copyright statement to Your modifications and
126
+ may provide additional or different license terms and conditions
127
+ for use, reproduction, or distribution of Your modifications, or
128
+ for any such Derivative Works as a whole, provided Your use,
129
+ reproduction, and distribution of the Work otherwise complies with
130
+ the conditions stated in this License.
131
+
132
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
133
+ any Contribution intentionally submitted for inclusion in the Work
134
+ by You to the Licensor shall be under the terms and conditions of
135
+ this License, without any additional terms or conditions.
136
+ Notwithstanding the above, nothing herein shall supersede or modify
137
+ the terms of any separate license agreement you may have executed
138
+ with Licensor regarding such Contributions.
139
+
140
+ 6. Trademarks. This License does not grant permission to use the trade
141
+ names, trademarks, service marks, or product names of the Licensor,
142
+ except as required for reasonable and customary use in describing the
143
+ origin of the Work and reproducing the content of the NOTICE file.
144
+
145
+ 7. Disclaimer of Warranty. Unless required by applicable law or
146
+ agreed to in writing, Licensor provides the Work (and each
147
+ Contributor provides its Contributions) on an "AS IS" BASIS,
148
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149
+ implied, including, without limitation, any warranties or conditions
150
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151
+ PARTICULAR PURPOSE. You are solely responsible for determining the
152
+ appropriateness of using or redistributing the Work and assume any
153
+ risks associated with Your exercise of permissions under this License.
154
+
155
+ 8. Limitation of Liability. In no event and under no legal theory,
156
+ whether in tort (including negligence), contract, or otherwise,
157
+ unless required by applicable law (such as deliberate and grossly
158
+ negligent acts) or agreed to in writing, shall any Contributor be
159
+ liable to You for damages, including any direct, indirect, special,
160
+ incidental, or consequential damages of any character arising as a
161
+ result of this License or out of the use or inability to use the
162
+ Work (including but not limited to damages for loss of goodwill,
163
+ work stoppage, computer failure or malfunction, or any and all
164
+ other commercial damages or losses), even if such Contributor
165
+ has been advised of the possibility of such damages.
166
+
167
+ 9. Accepting Warranty or Additional Liability. While redistributing
168
+ the Work or Derivative Works thereof, You may choose to offer,
169
+ and charge a fee for, acceptance of support, warranty, indemnity,
170
+ or other liability obligations and/or rights consistent with this
171
+ License. However, in accepting such obligations, You may act only
172
+ on Your own behalf and on Your sole responsibility, not on behalf
173
+ of any other Contributor, and only if You agree to indemnify,
174
+ defend, and hold each Contributor harmless for any liability
175
+ incurred by, or claims asserted against, such Contributor by reason
176
+ of your accepting any such warranty or additional liability.
177
+
178
+ END OF TERMS AND CONDITIONS
179
+
180
+ APPENDIX: How to apply the Apache License to your work.
181
+
182
+ To apply the Apache License to your work, attach the following
183
+ boilerplate notice, with the fields enclosed by brackets "[]"
184
+ replaced with your own identifying information. (Don't include
185
+ the brackets!) The text should be enclosed in the appropriate
186
+ comment syntax for the file format. We also recommend that a
187
+ file or class name and description of purpose be included on the
188
+ same "printed page" as the copyright notice for easier
189
+ identification within third-party archives.
190
+
191
+ Copyright 2018-2019 Open-MMLab.
192
+
193
+ Licensed under the Apache License, Version 2.0 (the "License");
194
+ you may not use this file except in compliance with the License.
195
+ You may obtain a copy of the License at
196
+
197
+ http://www.apache.org/licenses/LICENSE-2.0
198
+
199
+ Unless required by applicable law or agreed to in writing, software
200
+ distributed under the License is distributed on an "AS IS" BASIS,
201
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202
+ See the License for the specific language governing permissions and
203
+ limitations under the License.
MANIFEST.in ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ include mmdet3d/.mim/model-index.yml
2
+ include mmdet3d/.mim/dataset-index.yml
3
+ include requirements/*.txt
4
+ recursive-include mmdet3d/.mim/ops *.cpp *.cu *.h *.cc
5
+ recursive-include mmdet3d/.mim/configs *.py *.yml
6
+ recursive-include mmdet3d/.mim/tools *.sh *.py
README_zh-CN.md ADDED
@@ -0,0 +1,427 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <img src="resources/mmdet3d-logo.png" width="600"/>
3
+ <div>&nbsp;</div>
4
+ <div align="center">
5
+ <b><font size="5">OpenMMLab 官网</font></b>
6
+ <sup>
7
+ <a href="https://openmmlab.com">
8
+ <i><font size="4">HOT</font></i>
9
+ </a>
10
+ </sup>
11
+ &nbsp;&nbsp;&nbsp;&nbsp;
12
+ <b><font size="5">OpenMMLab 开放平台</font></b>
13
+ <sup>
14
+ <a href="https://platform.openmmlab.com">
15
+ <i><font size="4">TRY IT OUT</font></i>
16
+ </a>
17
+ </sup>
18
+ </div>
19
+ <div>&nbsp;</div>
20
+
21
+ [![PyPI](https://img.shields.io/pypi/v/mmdet3d)](https://pypi.org/project/mmdet3d)
22
+ [![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmdetection3d.readthedocs.io/zh_CN/latest/)
23
+ [![badge](https://github.com/open-mmlab/mmdetection3d/workflows/build/badge.svg)](https://github.com/open-mmlab/mmdetection3d/actions)
24
+ [![codecov](https://codecov.io/gh/open-mmlab/mmdetection3d/branch/main/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmdetection3d)
25
+ [![license](https://img.shields.io/github/license/open-mmlab/mmdetection3d.svg)](https://github.com/open-mmlab/mmdetection3d/blob/main/LICENSE)
26
+ [![open issues](https://isitmaintained.com/badge/open/open-mmlab/mmdetection3d.svg)](https://github.com/open-mmlab/mmdetection3d/issues)
27
+ [![issue resolution](https://isitmaintained.com/badge/resolution/open-mmlab/mmdetection3d.svg)](https://github.com/open-mmlab/mmdetection3d/issues)
28
+
29
+ [📘使用文档](https://mmdetection3d.readthedocs.io/zh_CN/latest/) |
30
+ [🛠️安装教程](https://mmdetection3d.readthedocs.io/zh_CN/latest/get_started.html) |
31
+ [👀模型库](https://mmdetection3d.readthedocs.io/zh_CN/latest/model_zoo.html) |
32
+ [🆕更新日志](https://mmdetection3d.readthedocs.io/en/latest/notes/changelog.html) |
33
+ [🚀进行中的项目](https://github.com/open-mmlab/mmdetection3d/projects) |
34
+ [🤔报告问题](https://github.com/open-mmlab/mmdetection3d/issues/new/choose)
35
+
36
+ </div>
37
+
38
+ <div align="center">
39
+
40
+ [English](README.md) | 简体中文
41
+
42
+ </div>
43
+
44
+ <div align="center">
45
+ <a href="https://openmmlab.medium.com/" style="text-decoration:none;">
46
+ <img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
47
+ <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
48
+ <a href="https://discord.com/channels/1037617289144569886/1046608014234370059" style="text-decoration:none;">
49
+ <img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
50
+ <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
51
+ <a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
52
+ <img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
53
+ <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
54
+ <a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
55
+ <img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
56
+ <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
57
+ <a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
58
+ <img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
59
+ <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
60
+ <a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
61
+ <img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
62
+ </div>
63
+
64
+ ## 简介
65
+
66
+ MMDetection3D 是一个基于 PyTorch 的目标检测开源工具箱,下一代面向 3D 检测的平台。它是 [OpenMMlab](https://openmmlab.com/) 项目的一部分。
67
+
68
+ 主分支代码目前支持 PyTorch 1.8 以上的版本。
69
+
70
+ ![demo image](resources/mmdet3d_outdoor_demo.gif)
71
+
72
+ <details open>
73
+ <summary>主要特性</summary>
74
+
75
+ - **支持多模态/单模态的检测器**
76
+
77
+ 支持多模态/单模态检测器,包括 MVXNet,VoteNet,PointPillars 等。
78
+
79
+ - **支持户内/户外的数据集**
80
+
81
+ 支持室内/室外的 3D 检测数据集,包括 ScanNet,SUNRGB-D,Waymo,nuScenes,Lyft,KITTI。对于 nuScenes 数据集,我们也支持 [nuImages 数据集](https://github.com/open-mmlab/mmdetection3d/tree/main/configs/nuimages)。
82
+
83
+ - **与 2D 检测器的自然整合**
84
+
85
+ [MMDetection](https://github.com/open-mmlab/mmdetection/blob/3.x/docs/zh_cn/model_zoo.md) 支持的 **300+ 个模型,40+ 的论文算法**,和相关模块���可以在此代码库中训练或使用。
86
+
87
+ - **性能高**
88
+
89
+ 训练速度比其他代码库更快。下表可见主要的对比结果。更多的细节可见[基准测评文档](./docs/zh_cn/notes/benchmarks.md)。我们对比了每秒训练的样本数(值越高越好)。其他代码库不支持的模型被标记为 `✗`。
90
+
91
+ | Methods | MMDetection3D | [OpenPCDet](https://github.com/open-mmlab/OpenPCDet) | [votenet](https://github.com/facebookresearch/votenet) | [Det3D](https://github.com/poodarchu/Det3D) |
92
+ | :-----------------: | :-----------: | :--------------------------------------------------: | :----------------------------------------------------: | :-----------------------------------------: |
93
+ | VoteNet | 358 | ✗ | 77 | ✗ |
94
+ | PointPillars-car | 141 | ✗ | ✗ | 140 |
95
+ | PointPillars-3class | 107 | 44 | ✗ | ✗ |
96
+ | SECOND | 40 | 30 | ✗ | ✗ |
97
+ | Part-A2 | 17 | 14 | ✗ | ✗ |
98
+
99
+ </details>
100
+
101
+ 和 [MMDetection](https://github.com/open-mmlab/mmdetection),[MMCV](https://github.com/open-mmlab/mmcv) 一样,MMDetection3D 也可以作为一个库去支持各式各样的项目。
102
+
103
+ ## 最新进展
104
+
105
+ ### 亮点
106
+
107
+ 在1.4版本中,MMDetecion3D 重构了 Waymo 数据集, 加速了 Waymo 数据集的预处理、训练/测试启动、验证的速度。并且在 Waymo 上拓展了对 单目/BEV 等基于相机的三维目标检测模型的支持。在[这里](https://mmdetection3d.readthedocs.io/en/latest/advanced_guides/datasets/waymo.html)提供了对 Waymo 数据信息的详细解读。
108
+
109
+ 此外,在1.4版本中,MMDetection3D 提供了 [Waymo-mini](https://download.openmmlab.com/mmdetection3d/data/waymo_mmdet3d_after_1x4/waymo_mini.tar.gz) 来帮助社区用户上手 Waymo 并用于快速迭代开发。
110
+
111
+ **v1.4.0** 版本已经在 2024.1.8 发布:
112
+
113
+ - 在 `projects` 中支持了 [DSVT](<(https://arxiv.org/abs/2301.06051)>) 的训练
114
+ - 在 `projects` 中支持了 [Nerf-Det](https://arxiv.org/abs/2307.14620)
115
+ - 重构了 Waymo 数据集
116
+
117
+ **v1.3.0** 版本已经在 2023.10.18 发布:
118
+
119
+ - 在 `projects` 中支持 [CENet](https://arxiv.org/abs/2207.12691)
120
+ - 使用新的 3D inferencers 增强演示代码效果
121
+
122
+ **v1.2.0** 版本已经在 2023.7.4 发布:
123
+
124
+ - 在 `mmdet3d/configs`中支持 [新Config样式](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta)
125
+ - 在 `projects` 中支持 [DSVT](<(https://arxiv.org/abs/2301.06051)>) 的推理
126
+ - 支持通过 `mim` 从 [OpenDataLab](https://opendatalab.com/) 下载数据集
127
+
128
+ **v1.1.1** 版本已经在 2023.5.30 发布:
129
+
130
+ - 在 `projects` 中支持 [TPVFormer](https://arxiv.org/pdf/2302.07817.pdf)
131
+ - 在 `projects` 中支持 BEVFusion 的训练
132
+ - 支持基于激光雷达的 3D 语义分割基准
133
+
134
+ ## 安装
135
+
136
+ 请参考[快速入门文档](https://mmdetection3d.readthedocs.io/zh_CN/latest/get_started.html)进行安装。
137
+
138
+ ## 教程
139
+
140
+ <details>
141
+ <summary>用户指南</summary>
142
+
143
+ - [训练 & 测试](https://mmdetection3d.readthedocs.io/zh_CN/latest/user_guides/index.html#train-test)
144
+ - [学习配置文件](https://mmdetection3d.readthedocs.io/zh_CN/latest/user_guides/config.html)
145
+ - [坐标系](https://mmdetection3d.readthedocs.io/zh_CN/latest/user_guides/coord_sys_tutorial.html)
146
+ - [数据预处理](https://mmdetection3d.readthedocs.io/zh_CN/latest/user_guides/dataset_prepare.html)
147
+ - [自定义数据预处理流程](https://mmdetection3d.readthedocs.io/zh_CN/latest/user_guides/data_pipeline.html)
148
+ - [在标注数据集上测试和训练](https://mmdetection3d.readthedocs.io/zh_CN/latest/user_guides/train_test.html)
149
+ - [推理](https://mmdetection3d.readthedocs.io/zh_CN/latest/user_guides/inference.html)
150
+ - [在自定义数据集上进行训练](https://mmdetection3d.readthedocs.io/zh_CN/latest/user_guides/new_data_model.html)
151
+ - [实用工具](https://mmdetection3d.readthedocs.io/zh_CN/latest/user_guides/index.html#useful-tools)
152
+
153
+ </details>
154
+
155
+ <details>
156
+ <summary>进阶教程</summary>
157
+
158
+ - [数据集](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/index.html#datasets)
159
+ - [KITTI 数据集](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/datasets/kitti.html)
160
+ - [NuScenes 数���集](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/datasets/nuscenes.html)
161
+ - [Lyft 数据集](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/datasets/lyft.html)
162
+ - [Waymo 数据集](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/datasets/waymo.html)
163
+ - [SUN RGB-D 数据集](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/datasets/sunrgbd.html)
164
+ - [ScanNet 数据集](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/datasets/scannet.html)
165
+ - [S3DIS 数据集](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/datasets/s3dis.html)
166
+ - [SemanticKITTI 数据集](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/datasets/semantickitti.html)
167
+ - [支持的任务](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/index.html#supported-tasks)
168
+ - [基于激光雷达的 3D 检测](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/supported_tasks/lidar_det3d.html)
169
+ - [基于视觉的 3D 检测](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/supported_tasks/vision_det3d.html)
170
+ - [基于激光雷达的 3D 语义分割](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/supported_tasks/lidar_sem_seg3d.html)
171
+ - [自定义项目](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/index.html#customization)
172
+ - [自定义数据集](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/customize_dataset.html)
173
+ - [自定义模型](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/customize_models.html)
174
+ - [自定义运行时配置](https://mmdetection3d.readthedocs.io/zh_CN/latest/advanced_guides/customize_runtime.html)
175
+
176
+ </details>
177
+
178
+ ## 基准测试和模型库
179
+
180
+ 测试结果和模型可以在[模型库](docs/zh_cn/model_zoo.md)中找到。
181
+
182
+ <div align="center">
183
+ <b>模块组件</b>
184
+ </div>
185
+ <table align="center">
186
+ <tbody>
187
+ <tr align="center" valign="bottom">
188
+ <td>
189
+ <b>主干网络</b>
190
+ </td>
191
+ <td>
192
+ <b>检测头</b>
193
+ </td>
194
+ <td>
195
+ <b>特性</b>
196
+ </td>
197
+ </tr>
198
+ <tr valign="top">
199
+ <td>
200
+ <ul>
201
+ <li><a href="configs/pointnet2">PointNet (CVPR'2017)</a></li>
202
+ <li><a href="configs/pointnet2">PointNet++ (NeurIPS'2017)</a></li>
203
+ <li><a href="configs/regnet">RegNet (CVPR'2020)</a></li>
204
+ <li><a href="configs/dgcnn">DGCNN (TOG'2019)</a></li>
205
+ <li>DLA (CVPR'2018)</li>
206
+ <li>MinkResNet (CVPR'2019)</li>
207
+ <li><a href="configs/minkunet">MinkUNet (CVPR'2019)</a></li>
208
+ <li><a href="configs/cylinder3d">Cylinder3D (CVPR'2021)</a></li>
209
+ </ul>
210
+ </td>
211
+ <td>
212
+ <ul>
213
+ <li><a href="configs/free_anchor">FreeAnchor (NeurIPS'2019)</a></li>
214
+ </ul>
215
+ </td>
216
+ <td>
217
+ <ul>
218
+ <li><a href="configs/dynamic_voxelization">Dynamic Voxelization (CoRL'2019)</a></li>
219
+ </ul>
220
+ </td>
221
+ </tr>
222
+ </td>
223
+ </tr>
224
+ </tbody>
225
+ </table>
226
+
227
+ <div align="center">
228
+ <b>算法模型</b>
229
+ </div>
230
+ <table align="center">
231
+ <tbody>
232
+ <tr align="center" valign="middle">
233
+ <td>
234
+ <b>激光雷达 3D 目标检测</b>
235
+ </td>
236
+ <td>
237
+ <b>相机 3D 目标检测</b>
238
+ </td>
239
+ <td>
240
+ <b>多模态 3D 目标检测</b>
241
+ </td>
242
+ <td>
243
+ <b>3D 语义分割</b>
244
+ </td>
245
+ </tr>
246
+ <tr valign="top">
247
+ <td>
248
+ <li><b>室外</b></li>
249
+ <ul>
250
+ <li><a href="configs/second">SECOND (Sensor'2018)</a></li>
251
+ <li><a href="configs/pointpillars">PointPillars (CVPR'2019)</a></li>
252
+ <li><a href="configs/ssn">SSN (ECCV'2020)</a></li>
253
+ <li><a href="configs/3dssd">3DSSD (CVPR'2020)</a></li>
254
+ <li><a href="configs/sassd">SA-SSD (CVPR'2020)</a></li>
255
+ <li><a href="configs/point_rcnn">PointRCNN (CVPR'2019)</a></li>
256
+ <li><a href="configs/parta2">Part-A2 (TPAMI'2020)</a></li>
257
+ <li><a href="configs/centerpoint">CenterPoint (CVPR'2021)</a></li>
258
+ <li><a href="configs/pv_rcnn">PV-RCNN (CVPR'2020)</a></li>
259
+ <li><a href="projects/CenterFormer">CenterFormer (ECCV'2022)</a></li>
260
+ </ul>
261
+ <li><b>室内</b></li>
262
+ <ul>
263
+ <li><a href="configs/votenet">VoteNet (ICCV'2019)</a></li>
264
+ <li><a href="configs/h3dnet">H3DNet (ECCV'2020)</a></li>
265
+ <li><a href="configs/groupfree3d">Group-Free-3D (ICCV'2021)</a></li>
266
+ <li><a href="configs/fcaf3d">FCAF3D (ECCV'2022)</a></li>
267
+ <li><a href="projects/TR3D">TR3D (ArXiv'2023)</a></li>
268
+ </ul>
269
+ </td>
270
+ <td>
271
+ <li><b>室外</b></li>
272
+ <ul>
273
+ <li><a href="configs/imvoxelnet">ImVoxelNet (WACV'2022)</a></li>
274
+ <li><a href="configs/smoke">SMOKE (CVPRW'2020)</a></li>
275
+ <li><a href="configs/fcos3d">FCOS3D (ICCVW'2021)</a></li>
276
+ <li><a href="configs/pgd">PGD (CoRL'2021)</a></li>
277
+ <li><a href="configs/monoflex">MonoFlex (CVPR'2021)</a></li>
278
+ <li><a href="projects/DETR3D">DETR3D (CoRL'2021)</a></li>
279
+ <li><a href="projects/PETR">PETR (ECCV'2022)</a></li>
280
+ </ul>
281
+ <li><b>Indoor</b></li>
282
+ <ul>
283
+ <li><a href="configs/imvoxelnet">ImVoxelNet (WACV'2022)</a></li>
284
+ </ul>
285
+ </td>
286
+ <td>
287
+ <li><b>室外</b></li>
288
+ <ul>
289
+ <li><a href="configs/mvxnet">MVXNet (ICRA'2019)</a></li>
290
+ <li><a href="projects/BEVFusion">BEVFusion (ICRA'2023)</a></li>
291
+ </ul>
292
+ <li><b>室内</b></li>
293
+ <ul>
294
+ <li><a href="configs/imvotenet">ImVoteNet (CVPR'2020)</a></li>
295
+ </ul>
296
+ </td>
297
+ <td>
298
+ <li><b>室外</b></li>
299
+ <ul>
300
+ <li><a href="configs/minkunet">MinkUNet (CVPR'2019)</a></li>
301
+ <li><a href="configs/spvcnn">SPVCNN (ECCV'2020)</a></li>
302
+ <li><a href="configs/cylinder3d">Cylinder3D (CVPR'2021)</a></li>
303
+ <li><a href="projects/TPVFormer">TPVFormer (CVPR'2023)</a></li>
304
+ </ul>
305
+ <li><b>室内</b></li>
306
+ <ul>
307
+ <li><a href="configs/pointnet2">PointNet++ (NeurIPS'2017)</a></li>
308
+ <li><a href="configs/paconv">PAConv (CVPR'2021)</a></li>
309
+ <li><a href="configs/dgcnn">DGCNN (TOG'2019)</a></li>
310
+ </ul>
311
+ </ul>
312
+ </td>
313
+ </tr>
314
+ </td>
315
+ </tr>
316
+ </tbody>
317
+ </table>
318
+
319
+ | | ResNet | VoVNet | Swin-T | PointNet++ | SECOND | DGCNN | RegNetX | DLA | MinkResNet | Cylinder3D | MinkUNet |
320
+ | :-----------: | :----: | :----: | :----: | :--------: | :----: | :---: | :-----: | :-: | :--------: | :--------: | :------: |
321
+ | SECOND | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
322
+ | PointPillars | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ |
323
+ | FreeAnchor | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ |
324
+ | VoteNet | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
325
+ | H3DNet | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
326
+ | 3DSSD | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
327
+ | Part-A2 | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
328
+ | MVXNet | ✓ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
329
+ | CenterPoint | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
330
+ | SSN | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ |
331
+ | ImVoteNet | ✓ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
332
+ | FCOS3D | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
333
+ | PointNet++ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
334
+ | Group-Free-3D | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
335
+ | ImVoxelNet | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
336
+ | PAConv | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
337
+ | DGCNN | ✗ | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ |
338
+ | SMOKE | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ |
339
+ | PGD | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
340
+ | MonoFlex | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ |
341
+ | SA-SSD | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
342
+ | FCAF3D | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ |
343
+ | PV-RCNN | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
344
+ | Cylinder3D | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ |
345
+ | MinkUNet | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✓ |
346
+ | SPVCNN | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✓ |
347
+ | BEVFusion | ✗ | ✗ | ✓ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
348
+ | CenterFormer | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
349
+ | TR3D | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ |
350
+ | DETR3D | ✓ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
351
+ | PETR | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
352
+ | TPVFormer | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ |
353
+
354
+ **注意:**[MMDetection](https://github.com/open-mmlab/mmdetection/blob/3.x/docs/zh_cn/model_zoo.md) 支持的基于 2D 检测的 **300+ 个模型,40+ 的论文算法**在 MMDetection3D 中都可以被训练或使用。
355
+
356
+ ## 常见问题
357
+
358
+ 请参考 [FAQ](docs/zh_cn/notes/faq.md) 了解其他用户的常见问题。
359
+
360
+ ## 贡献指南
361
+
362
+ 我们感谢所有的贡献者为改进和提升 MMDetection3D 所作出的努力。请参考[贡献指南](docs/en/notes/contribution_guides.md)来了解参与项目贡献的相关指引。
363
+
364
+ ## 致谢
365
+
366
+ MMDetection3D 是一款由来自不同高校和企业的研发人员共同参与贡献的开源项目。我们感谢所有为项目提供算法复现和新功能支持的贡献者,以及提供宝贵反馈的用户。我们希望这个工具箱和基准测试可以为社区提供灵活的代码工具,供用户复现已有算法并开发自己的新的 3D 检测模型。
367
+
368
+ ## 引用
369
+
370
+ 如果你觉得本项目对你的研究工作有所帮助,请参考如下 bibtex 引用 MMdetection3D:
371
+
372
+ ```latex
373
+ @misc{mmdet3d2020,
374
+ title={{MMDetection3D: OpenMMLab} next-generation platform for general {3D} object detection},
375
+ author={MMDetection3D Contributors},
376
+ howpublished = {\url{https://github.com/open-mmlab/mmdetection3d}},
377
+ year={2020}
378
+ }
379
+ ```
380
+
381
+ ## 开源许可证
382
+
383
+ 该项目采用 [Apache 2.0 开源许可证](LICENSE)。
384
+
385
+ ## OpenMMLab 的其他项目
386
+
387
+ - [MMEngine](https://github.com/open-mmlab/mmengine): OpenMMLab 深度学习模型训练基础库
388
+ - [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab 计算机视觉基础库
389
+ - [MMEval](https://github.com/open-mmlab/mmeval): 统一开放的跨框架算法评测库
390
+ - [MIM](https://github.com/open-mmlab/mim): MIM 是 OpenMMlab 项目、算法、模型的统一入口
391
+ - [MMPreTrain](https://github.com/open-mmlab/mmpretrain): OpenMMLab 深度学习预训练工具箱
392
+ - [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱
393
+ - [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D 目标检测平台
394
+ - [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab 旋转框检测工具箱与测试基准
395
+ - [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO 系列工具箱与测试基准
396
+ - [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱
397
+ - [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包
398
+ - [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱
399
+ - [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 人体参数化模型工具箱与测试基准
400
+ - [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab 自监督学习工具箱与测试基准
401
+ - [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab 模型压缩工具箱与测试基准
402
+ - [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准
403
+ - [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频理解工具箱
404
+ - [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平台
405
+ - [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab 光流估计工具箱与测试基准
406
+ - [MMagic](https://github.com/open-mmlab/mmagic): OpenMMLab 新一代人工智能内容生成(AIGC)工具箱
407
+ - [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab 图片视频生成模型工具箱
408
+ - [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab 模型部署框架
409
+
410
+ ## 欢迎加入 OpenMMLab 社区
411
+
412
+ 扫描下方的二维码可关注 OpenMMLab ���队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab),扫描下方微信二维码添加喵喵好友,进入 MMDetection3D 微信交流社群。【加好友申请格式:研究方向+地区+学校/公司+姓名】
413
+
414
+ <div align="center">
415
+ <img src="https://user-images.githubusercontent.com/58739961/187154320-f3312cdf-31f2-4316-9dbb-8d7b0e1b7e08.jpg" height="400" /> <img src="https://github.com/open-mmlab/mmdetection3d/assets/62195058/dfb3f6a9-25c6-47a5-936b-3f1d7347a42b" height="400" />
416
+ </div>
417
+
418
+ 我们会在 OpenMMLab 社区为大家
419
+
420
+ - 📢 分享 AI 框架的前沿核心技术
421
+ - 💻 解读 PyTorch 常用模块源码
422
+ - 📰 发布 OpenMMLab 的相关新闻
423
+ - 🚀 介绍 OpenMMLab 开发的前沿算法
424
+ - 🏃 获取更高效的问题答疑和意见反馈
425
+ - 🔥 提供与各行各业开发者充分交流的平台
426
+
427
+ 干货满满 📘,等你来撩 💗,OpenMMLab 社区期待您的加入 👬
configs/.DS_Store ADDED
Binary file (12.3 kB). View file
 
configs/3dssd/3dssd_4xb4_kitti-3d-car.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../_base_/models/3dssd.py', '../_base_/datasets/kitti-3d-car.py',
3
+ '../_base_/default_runtime.py'
4
+ ]
5
+
6
+ # dataset settings
7
+ dataset_type = 'KittiDataset'
8
+ data_root = 'data/kitti/'
9
+ class_names = ['Car']
10
+ point_cloud_range = [0, -40, -5, 70, 40, 3]
11
+ input_modality = dict(use_lidar=True, use_camera=False)
12
+ backend_args = None
13
+
14
+ db_sampler = dict(
15
+ data_root=data_root,
16
+ info_path=data_root + 'kitti_dbinfos_train.pkl',
17
+ rate=1.0,
18
+ prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
19
+ classes=class_names,
20
+ sample_groups=dict(Car=15),
21
+ points_loader=dict(
22
+ type='LoadPointsFromFile',
23
+ coord_type='LIDAR',
24
+ load_dim=4,
25
+ use_dim=4,
26
+ backend_args=backend_args),
27
+ backend_args=backend_args)
28
+
29
+ train_pipeline = [
30
+ dict(
31
+ type='LoadPointsFromFile',
32
+ coord_type='LIDAR',
33
+ load_dim=4,
34
+ use_dim=4,
35
+ backend_args=backend_args),
36
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
37
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
38
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
39
+ dict(type='ObjectSample', db_sampler=db_sampler),
40
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
41
+ dict(
42
+ type='ObjectNoise',
43
+ num_try=100,
44
+ translation_std=[1.0, 1.0, 0],
45
+ global_rot_range=[0.0, 0.0],
46
+ rot_range=[-1.0471975511965976, 1.0471975511965976]),
47
+ dict(
48
+ type='GlobalRotScaleTrans',
49
+ rot_range=[-0.78539816, 0.78539816],
50
+ scale_ratio_range=[0.9, 1.1]),
51
+ # 3DSSD can get a higher performance without this transform
52
+ # dict(type='BackgroundPointsFilter', bbox_enlarge_range=(0.5, 2.0, 0.5)),
53
+ dict(type='PointSample', num_points=16384),
54
+ dict(
55
+ type='Pack3DDetInputs',
56
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
57
+ ]
58
+
59
+ test_pipeline = [
60
+ dict(
61
+ type='LoadPointsFromFile',
62
+ coord_type='LIDAR',
63
+ load_dim=4,
64
+ use_dim=4,
65
+ backend_args=backend_args),
66
+ dict(
67
+ type='MultiScaleFlipAug3D',
68
+ img_scale=(1333, 800),
69
+ pts_scale_ratio=1,
70
+ flip=False,
71
+ transforms=[
72
+ dict(
73
+ type='GlobalRotScaleTrans',
74
+ rot_range=[0, 0],
75
+ scale_ratio_range=[1., 1.],
76
+ translation_std=[0, 0, 0]),
77
+ dict(type='RandomFlip3D'),
78
+ dict(
79
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range),
80
+ dict(type='PointSample', num_points=16384),
81
+ ]),
82
+ dict(type='Pack3DDetInputs', keys=['points'])
83
+ ]
84
+
85
+ train_dataloader = dict(
86
+ batch_size=4, dataset=dict(dataset=dict(pipeline=train_pipeline, )))
87
+ test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
88
+ val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
89
+
90
+ # model settings
91
+ model = dict(
92
+ bbox_head=dict(
93
+ num_classes=1,
94
+ bbox_coder=dict(
95
+ type='AnchorFreeBBoxCoder', num_dir_bins=12, with_rot=True)))
96
+
97
+ # optimizer
98
+ lr = 0.002 # max learning rate
99
+ optim_wrapper = dict(
100
+ type='OptimWrapper',
101
+ optimizer=dict(type='AdamW', lr=lr, weight_decay=0.),
102
+ clip_grad=dict(max_norm=35, norm_type=2),
103
+ )
104
+
105
+ # training schedule for 1x
106
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=2)
107
+ val_cfg = dict(type='ValLoop')
108
+ test_cfg = dict(type='TestLoop')
109
+
110
+ # learning rate
111
+ param_scheduler = [
112
+ dict(
113
+ type='MultiStepLR',
114
+ begin=0,
115
+ end=80,
116
+ by_epoch=True,
117
+ milestones=[45, 60],
118
+ gamma=0.1)
119
+ ]
configs/3dssd/README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 3DSSD: Point-based 3D Single Stage Object Detector
2
+
3
+ > [3DSSD: Point-based 3D Single Stage Object Detector](https://arxiv.org/abs/2002.10187)
4
+
5
+ <!-- [ALGORITHM] -->
6
+
7
+ ## Abstract
8
+
9
+ Currently, there have been many kinds of voxel-based 3D single stage detectors, while point-based single stage methods are still underexplored. In this paper, we first present a lightweight and effective point-based 3D single stage object detector, named 3DSSD, achieving a good balance between accuracy and efficiency. In this paradigm, all upsampling layers and refinement stage, which are indispensable in all existing point-based methods, are abandoned to reduce the large computation cost. We novelly propose a fusion sampling strategy in downsampling process to make detection on less representative points feasible. A delicate box prediction network including a candidate generation layer, an anchor-free regression head with a 3D center-ness assignment strategy is designed to meet with our demand of accuracy and speed. Our paradigm is an elegant single stage anchor-free framework, showing great superiority to other existing methods. We evaluate 3DSSD on widely used KITTI dataset and more challenging nuScenes dataset. Our method outperforms all state-of-the-art voxel-based single stage methods by a large margin, and has comparable performance to two stage point-based methods as well, with inference speed more than 25 FPS, 2x faster than former state-of-the-art point-based methods.
10
+
11
+ <div align=center>
12
+ <img src="https://user-images.githubusercontent.com/30491025/143854187-54ed1257-a046-4764-81cd-d2c8404137d3.png" width="800"/>
13
+ </div>
14
+
15
+ ## Introduction
16
+
17
+ We implement 3DSSD and provide the results and checkpoints on KITTI datasets.
18
+
19
+ Some settings in our implementation are different from the [official implementation](https://github.com/Jia-Research-Lab/3DSSD), which bring marginal differences to the performance on KITTI datasets in our experiments. To simplify and unify the models of our implementation, we skip them in our models. These differences are listed as below:
20
+
21
+ 1. We keep the scenes without any object while the official code skips these scenes in training. In the official implementation, only 3229 and 3394 samples are used as training and validation sets, respectively. In our implementation, we keep using 3712 and 3769 samples as training and validation sets, respectively, as those used for all the other models in our implementation on KITTI datasets.
22
+ 2. We do not modify the decay of `batch normalization` during training.
23
+ 3. While using [`DataBaseSampler`](https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/datasets/pipelines/dbsampler.py#L80) for data augmentation, the official code uses road planes as reference to place the sampled objects while we do not.
24
+ 4. We perform detection using LIDAR coordinates while the official code uses camera coordinates.
25
+
26
+ ## Results and models
27
+
28
+ ### KITTI
29
+
30
+ | Backbone | Class | Lr schd | Mem (GB) | Inf time (fps) | mAP | Download |
31
+ | :--------------------------------------------: | :---: | :-----: | :------: | :------------: | :----------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
32
+ | [PointNet2SAMSG](./3dssd_4xb4_kitti-3d-car.py) | Car | 72e | 4.7 | | 78.58(81.27)<sup>1</sup> | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/3dssd/3dssd_4x4_kitti-3d-car/3dssd_4x4_kitti-3d-car_20210818_203828-b89c8fc4.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/3dssd/3dssd_4x4_kitti-3d-car/3dssd_4x4_kitti-3d-car_20210818_203828.log.json) |
33
+
34
+ \[1\]: We report two different 3D object detection performance here. 78.58mAP is evaluated by our evaluation code and 81.27mAP is evaluated by the official development kit (so as that used in the paper and official code of 3DSSD ). We found that the commonly used Python implementation of [`rotate_iou`](https://github.com/traveller59/second.pytorch/blob/e42e4a0e17262ab7d180ee96a0a36427f2c20a44/second/core/non_max_suppression/nms_gpu.py#L605) which is used in our KITTI dataset evaluation, is different from the official implementation in [KITTI benchmark](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d).
35
+
36
+ ## Citation
37
+
38
+ ```latex
39
+ @inproceedings{yang20203dssd,
40
+ author = {Zetong Yang and Yanan Sun and Shu Liu and Jiaya Jia},
41
+ title = {3DSSD: Point-based 3D Single Stage Object Detector},
42
+ booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
43
+ year = {2020}
44
+ }
45
+ ```
configs/3dssd/metafile.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Collections:
2
+ - Name: 3DSSD
3
+ Metadata:
4
+ Training Data: KITTI
5
+ Training Techniques:
6
+ - AdamW
7
+ Training Resources: 4x TITAN X
8
+ Architecture:
9
+ - PointNet++
10
+ Paper:
11
+ URL: https://arxiv.org/abs/2002.10187
12
+ Title: '3DSSD: Point-based 3D Single Stage Object Detector'
13
+ README: configs/3dssd/README.md
14
+ Code:
15
+ URL: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/models/detectors/ssd3dnet.py#L7
16
+ Version: v0.6.0
17
+
18
+ Models:
19
+ - Name: 3dssd_4x4_kitti-3d-car
20
+ In Collection: 3DSSD
21
+ Config: configs/3dssd/3dssd_4xb4_kitti-3d-car.py
22
+ Metadata:
23
+ Training Memory (GB): 4.7
24
+ Results:
25
+ - Task: 3D Object Detection
26
+ Dataset: KITTI
27
+ Metrics:
28
+ mAP: 78.58
29
+ Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/3dssd/3dssd_4x4_kitti-3d-car/3dssd_4x4_kitti-3d-car_20210818_203828-b89c8fc4.pth
configs/_base_/datasets/kitti-3d-3class.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'KittiDataset'
3
+ data_root = 'data/kitti/'
4
+ class_names = ['Pedestrian', 'Cyclist', 'Car']
5
+ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
6
+ input_modality = dict(use_lidar=True, use_camera=False)
7
+ metainfo = dict(classes=class_names)
8
+
9
+ # Example to use different file client
10
+ # Method 1: simply set the data root and let the file I/O module
11
+ # automatically infer from prefix (not support LMDB and Memcache yet)
12
+
13
+ # data_root = 's3://openmmlab/datasets/detection3d/kitti/'
14
+
15
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
16
+ # backend_args = dict(
17
+ # backend='petrel',
18
+ # path_mapping=dict({
19
+ # './data/': 's3://openmmlab/datasets/detection3d/',
20
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
21
+ # }))
22
+ backend_args = None
23
+
24
+ db_sampler = dict(
25
+ data_root=data_root,
26
+ info_path=data_root + 'kitti_dbinfos_train.pkl',
27
+ rate=1.0,
28
+ prepare=dict(
29
+ filter_by_difficulty=[-1],
30
+ filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
31
+ classes=class_names,
32
+ sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6),
33
+ points_loader=dict(
34
+ type='LoadPointsFromFile',
35
+ coord_type='LIDAR',
36
+ load_dim=4,
37
+ use_dim=4,
38
+ backend_args=backend_args),
39
+ backend_args=backend_args)
40
+
41
+ train_pipeline = [
42
+ dict(
43
+ type='LoadPointsFromFile',
44
+ coord_type='LIDAR',
45
+ load_dim=4, # x, y, z, intensity
46
+ use_dim=4,
47
+ backend_args=backend_args),
48
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
49
+ dict(type='ObjectSample', db_sampler=db_sampler),
50
+ dict(
51
+ type='ObjectNoise',
52
+ num_try=100,
53
+ translation_std=[1.0, 1.0, 0.5],
54
+ global_rot_range=[0.0, 0.0],
55
+ rot_range=[-0.78539816, 0.78539816]),
56
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
57
+ dict(
58
+ type='GlobalRotScaleTrans',
59
+ rot_range=[-0.78539816, 0.78539816],
60
+ scale_ratio_range=[0.95, 1.05]),
61
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
62
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
63
+ dict(type='PointShuffle'),
64
+ dict(
65
+ type='Pack3DDetInputs',
66
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
67
+ ]
68
+ test_pipeline = [
69
+ dict(
70
+ type='LoadPointsFromFile',
71
+ coord_type='LIDAR',
72
+ load_dim=4,
73
+ use_dim=4,
74
+ backend_args=backend_args),
75
+ dict(
76
+ type='MultiScaleFlipAug3D',
77
+ img_scale=(1333, 800),
78
+ pts_scale_ratio=1,
79
+ flip=False,
80
+ transforms=[
81
+ dict(
82
+ type='GlobalRotScaleTrans',
83
+ rot_range=[0, 0],
84
+ scale_ratio_range=[1., 1.],
85
+ translation_std=[0, 0, 0]),
86
+ dict(type='RandomFlip3D'),
87
+ dict(
88
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range)
89
+ ]),
90
+ dict(type='Pack3DDetInputs', keys=['points'])
91
+ ]
92
+ # construct a pipeline for data and gt loading in show function
93
+ # please keep its loading function consistent with test_pipeline (e.g. client)
94
+ eval_pipeline = [
95
+ dict(
96
+ type='LoadPointsFromFile',
97
+ coord_type='LIDAR',
98
+ load_dim=4,
99
+ use_dim=4,
100
+ backend_args=backend_args),
101
+ dict(type='Pack3DDetInputs', keys=['points'])
102
+ ]
103
+ train_dataloader = dict(
104
+ batch_size=6,
105
+ num_workers=4,
106
+ persistent_workers=True,
107
+ sampler=dict(type='DefaultSampler', shuffle=True),
108
+ dataset=dict(
109
+ type='RepeatDataset',
110
+ times=2,
111
+ dataset=dict(
112
+ type=dataset_type,
113
+ data_root=data_root,
114
+ ann_file='kitti_infos_train.pkl',
115
+ data_prefix=dict(pts='training/velodyne_reduced'),
116
+ pipeline=train_pipeline,
117
+ modality=input_modality,
118
+ test_mode=False,
119
+ metainfo=metainfo,
120
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
121
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
122
+ box_type_3d='LiDAR',
123
+ backend_args=backend_args)))
124
+ val_dataloader = dict(
125
+ batch_size=1,
126
+ num_workers=1,
127
+ persistent_workers=True,
128
+ drop_last=False,
129
+ sampler=dict(type='DefaultSampler', shuffle=False),
130
+ dataset=dict(
131
+ type=dataset_type,
132
+ data_root=data_root,
133
+ data_prefix=dict(pts='training/velodyne_reduced'),
134
+ ann_file='kitti_infos_val.pkl',
135
+ pipeline=test_pipeline,
136
+ modality=input_modality,
137
+ test_mode=True,
138
+ metainfo=metainfo,
139
+ box_type_3d='LiDAR',
140
+ backend_args=backend_args))
141
+ test_dataloader = dict(
142
+ batch_size=1,
143
+ num_workers=1,
144
+ persistent_workers=True,
145
+ drop_last=False,
146
+ sampler=dict(type='DefaultSampler', shuffle=False),
147
+ dataset=dict(
148
+ type=dataset_type,
149
+ data_root=data_root,
150
+ data_prefix=dict(pts='training/velodyne_reduced'),
151
+ ann_file='kitti_infos_val.pkl',
152
+ pipeline=test_pipeline,
153
+ modality=input_modality,
154
+ test_mode=True,
155
+ metainfo=metainfo,
156
+ box_type_3d='LiDAR',
157
+ backend_args=backend_args))
158
+ val_evaluator = dict(
159
+ type='KittiMetric',
160
+ ann_file=data_root + 'kitti_infos_val.pkl',
161
+ metric='bbox',
162
+ backend_args=backend_args)
163
+ test_evaluator = val_evaluator
164
+
165
+ vis_backends = [dict(type='LocalVisBackend')]
166
+ visualizer = dict(
167
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/kitti-3d-car.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'KittiDataset'
3
+ data_root = 'data/kitti/'
4
+ class_names = ['Car']
5
+ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
6
+ input_modality = dict(use_lidar=True, use_camera=False)
7
+ metainfo = dict(classes=class_names)
8
+
9
+ # Example to use different file client
10
+ # Method 1: simply set the data root and let the file I/O module
11
+ # automatically infer from prefix (not support LMDB and Memcache yet)
12
+
13
+ # data_root = 's3://openmmlab/datasets/detection3d/kitti/'
14
+
15
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
16
+ # backend_args = dict(
17
+ # backend='petrel',
18
+ # path_mapping=dict({
19
+ # './data/': 's3://openmmlab/datasets/detection3d/',
20
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
21
+ # }))
22
+ backend_args = None
23
+
24
+ db_sampler = dict(
25
+ data_root=data_root,
26
+ info_path=data_root + 'kitti_dbinfos_train.pkl',
27
+ rate=1.0,
28
+ prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
29
+ classes=class_names,
30
+ sample_groups=dict(Car=15),
31
+ points_loader=dict(
32
+ type='LoadPointsFromFile',
33
+ coord_type='LIDAR',
34
+ load_dim=4,
35
+ use_dim=4,
36
+ backend_args=backend_args),
37
+ backend_args=backend_args)
38
+
39
+ train_pipeline = [
40
+ dict(
41
+ type='LoadPointsFromFile',
42
+ coord_type='LIDAR',
43
+ load_dim=4, # x, y, z, intensity
44
+ use_dim=4,
45
+ backend_args=backend_args),
46
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
47
+ dict(type='ObjectSample', db_sampler=db_sampler),
48
+ dict(
49
+ type='ObjectNoise',
50
+ num_try=100,
51
+ translation_std=[1.0, 1.0, 0.5],
52
+ global_rot_range=[0.0, 0.0],
53
+ rot_range=[-0.78539816, 0.78539816]),
54
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
55
+ dict(
56
+ type='GlobalRotScaleTrans',
57
+ rot_range=[-0.78539816, 0.78539816],
58
+ scale_ratio_range=[0.95, 1.05]),
59
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
60
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
61
+ dict(type='PointShuffle'),
62
+ dict(
63
+ type='Pack3DDetInputs',
64
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
65
+ ]
66
+ test_pipeline = [
67
+ dict(
68
+ type='LoadPointsFromFile',
69
+ coord_type='LIDAR',
70
+ load_dim=4,
71
+ use_dim=4,
72
+ backend_args=backend_args),
73
+ dict(
74
+ type='MultiScaleFlipAug3D',
75
+ img_scale=(1333, 800),
76
+ pts_scale_ratio=1,
77
+ flip=False,
78
+ transforms=[
79
+ dict(
80
+ type='GlobalRotScaleTrans',
81
+ rot_range=[0, 0],
82
+ scale_ratio_range=[1., 1.],
83
+ translation_std=[0, 0, 0]),
84
+ dict(type='RandomFlip3D'),
85
+ dict(
86
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range)
87
+ ]),
88
+ dict(type='Pack3DDetInputs', keys=['points'])
89
+ ]
90
+ # construct a pipeline for data and gt loading in show function
91
+ # please keep its loading function consistent with test_pipeline (e.g. client)
92
+ eval_pipeline = [
93
+ dict(
94
+ type='LoadPointsFromFile',
95
+ coord_type='LIDAR',
96
+ load_dim=4,
97
+ use_dim=4,
98
+ backend_args=backend_args),
99
+ dict(type='Pack3DDetInputs', keys=['points'])
100
+ ]
101
+ train_dataloader = dict(
102
+ batch_size=6,
103
+ num_workers=4,
104
+ persistent_workers=True,
105
+ sampler=dict(type='DefaultSampler', shuffle=True),
106
+ dataset=dict(
107
+ type='RepeatDataset',
108
+ times=2,
109
+ dataset=dict(
110
+ type=dataset_type,
111
+ data_root=data_root,
112
+ ann_file='kitti_infos_train.pkl',
113
+ data_prefix=dict(pts='training/velodyne_reduced'),
114
+ pipeline=train_pipeline,
115
+ modality=input_modality,
116
+ test_mode=False,
117
+ metainfo=metainfo,
118
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
119
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
120
+ box_type_3d='LiDAR',
121
+ backend_args=backend_args)))
122
+ val_dataloader = dict(
123
+ batch_size=1,
124
+ num_workers=1,
125
+ persistent_workers=True,
126
+ drop_last=False,
127
+ sampler=dict(type='DefaultSampler', shuffle=False),
128
+ dataset=dict(
129
+ type=dataset_type,
130
+ data_root=data_root,
131
+ data_prefix=dict(pts='training/velodyne_reduced'),
132
+ ann_file='kitti_infos_val.pkl',
133
+ pipeline=test_pipeline,
134
+ modality=input_modality,
135
+ test_mode=True,
136
+ metainfo=metainfo,
137
+ box_type_3d='LiDAR',
138
+ backend_args=backend_args))
139
+ test_dataloader = dict(
140
+ batch_size=1,
141
+ num_workers=1,
142
+ persistent_workers=True,
143
+ drop_last=False,
144
+ sampler=dict(type='DefaultSampler', shuffle=False),
145
+ dataset=dict(
146
+ type=dataset_type,
147
+ data_root=data_root,
148
+ data_prefix=dict(pts='training/velodyne_reduced'),
149
+ ann_file='kitti_infos_val.pkl',
150
+ pipeline=test_pipeline,
151
+ modality=input_modality,
152
+ test_mode=True,
153
+ metainfo=metainfo,
154
+ box_type_3d='LiDAR',
155
+ backend_args=backend_args))
156
+ val_evaluator = dict(
157
+ type='KittiMetric',
158
+ ann_file=data_root + 'kitti_infos_val.pkl',
159
+ metric='bbox',
160
+ backend_args=backend_args)
161
+ test_evaluator = val_evaluator
162
+
163
+ vis_backends = [dict(type='LocalVisBackend')]
164
+ visualizer = dict(
165
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/kitti-mono3d.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'KittiDataset'
2
+ data_root = 'data/kitti/'
3
+ class_names = ['Pedestrian', 'Cyclist', 'Car']
4
+ input_modality = dict(use_lidar=False, use_camera=True)
5
+ metainfo = dict(classes=class_names)
6
+
7
+ # Example to use different file client
8
+ # Method 1: simply set the data root and let the file I/O module
9
+ # automatically infer from prefix (not support LMDB and Memcache yet)
10
+
11
+ # data_root = 's3://openmmlab/datasets/detection3d/kitti/'
12
+
13
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
14
+ # backend_args = dict(
15
+ # backend='petrel',
16
+ # path_mapping=dict({
17
+ # './data/': 's3://openmmlab/datasets/detection3d/',
18
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
19
+ # }))
20
+ backend_args = None
21
+
22
+ train_pipeline = [
23
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
24
+ dict(
25
+ type='LoadAnnotations3D',
26
+ with_bbox=True,
27
+ with_label=True,
28
+ with_attr_label=False,
29
+ with_bbox_3d=True,
30
+ with_label_3d=True,
31
+ with_bbox_depth=True),
32
+ dict(type='Resize', scale=(1242, 375), keep_ratio=True),
33
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
34
+ dict(
35
+ type='Pack3DDetInputs',
36
+ keys=[
37
+ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
38
+ 'gt_labels_3d', 'centers_2d', 'depths'
39
+ ]),
40
+ ]
41
+ test_pipeline = [
42
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
43
+ dict(type='Resize', scale=(1242, 375), keep_ratio=True),
44
+ dict(type='Pack3DDetInputs', keys=['img'])
45
+ ]
46
+ eval_pipeline = [
47
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
48
+ dict(type='Pack3DDetInputs', keys=['img'])
49
+ ]
50
+
51
+ train_dataloader = dict(
52
+ batch_size=2,
53
+ num_workers=2,
54
+ persistent_workers=True,
55
+ sampler=dict(type='DefaultSampler', shuffle=True),
56
+ dataset=dict(
57
+ type=dataset_type,
58
+ data_root=data_root,
59
+ ann_file='kitti_infos_train.pkl',
60
+ data_prefix=dict(img='training/image_2'),
61
+ pipeline=train_pipeline,
62
+ modality=input_modality,
63
+ load_type='fov_image_based',
64
+ test_mode=False,
65
+ metainfo=metainfo,
66
+ # we use box_type_3d='Camera' in monocular 3d
67
+ # detection task
68
+ box_type_3d='Camera',
69
+ backend_args=backend_args))
70
+ val_dataloader = dict(
71
+ batch_size=1,
72
+ num_workers=2,
73
+ persistent_workers=True,
74
+ drop_last=False,
75
+ sampler=dict(type='DefaultSampler', shuffle=False),
76
+ dataset=dict(
77
+ type=dataset_type,
78
+ data_root=data_root,
79
+ data_prefix=dict(img='training/image_2'),
80
+ ann_file='kitti_infos_val.pkl',
81
+ pipeline=test_pipeline,
82
+ modality=input_modality,
83
+ load_type='fov_image_based',
84
+ metainfo=metainfo,
85
+ test_mode=True,
86
+ box_type_3d='Camera',
87
+ backend_args=backend_args))
88
+ test_dataloader = val_dataloader
89
+
90
+ val_evaluator = dict(
91
+ type='KittiMetric',
92
+ ann_file=data_root + 'kitti_infos_val.pkl',
93
+ metric='bbox',
94
+ backend_args=backend_args)
95
+
96
+ test_evaluator = val_evaluator
97
+
98
+ vis_backends = [dict(type='LocalVisBackend')]
99
+ visualizer = dict(
100
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/lyft-3d-range100.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # If point cloud range is changed, the models should also change their point
2
+ # cloud range accordingly
3
+ point_cloud_range = [-100, -100, -5, 100, 100, 3]
4
+ # For Lyft we usually do 9-class detection
5
+ class_names = [
6
+ 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
7
+ 'bicycle', 'pedestrian', 'animal'
8
+ ]
9
+ dataset_type = 'LyftDataset'
10
+ data_root = 'data/lyft/'
11
+ data_prefix = dict(pts='v1.01-train/lidar', img='', sweeps='v1.01-train/lidar')
12
+ # Input modality for Lyft dataset, this is consistent with the submission
13
+ # format which requires the information in input_modality.
14
+ input_modality = dict(
15
+ use_lidar=True,
16
+ use_camera=False,
17
+ use_radar=False,
18
+ use_map=False,
19
+ use_external=False)
20
+
21
+ # Example to use different file client
22
+ # Method 1: simply set the data root and let the file I/O module
23
+ # automatically infer from prefix (not support LMDB and Memcache yet)
24
+
25
+ # data_root = 's3://openmmlab/datasets/detection3d/lyft/'
26
+
27
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
28
+ # backend_args = dict(
29
+ # backend='petrel',
30
+ # path_mapping=dict({
31
+ # './data/': 's3://openmmlab/datasets/detection3d/',
32
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
33
+ # }))
34
+ backend_args = None
35
+
36
+ train_pipeline = [
37
+ dict(
38
+ type='LoadPointsFromFile',
39
+ coord_type='LIDAR',
40
+ load_dim=5,
41
+ use_dim=5,
42
+ backend_args=backend_args),
43
+ dict(
44
+ type='LoadPointsFromMultiSweeps',
45
+ sweeps_num=10,
46
+ backend_args=backend_args),
47
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
48
+ dict(
49
+ type='GlobalRotScaleTrans',
50
+ rot_range=[-0.3925, 0.3925],
51
+ scale_ratio_range=[0.95, 1.05],
52
+ translation_std=[0, 0, 0]),
53
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
54
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
55
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
56
+ dict(type='PointShuffle'),
57
+ dict(
58
+ type='Pack3DDetInputs',
59
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
60
+ ]
61
+ test_pipeline = [
62
+ dict(
63
+ type='LoadPointsFromFile',
64
+ coord_type='LIDAR',
65
+ load_dim=5,
66
+ use_dim=5,
67
+ backend_args=backend_args),
68
+ dict(
69
+ type='LoadPointsFromMultiSweeps',
70
+ sweeps_num=10,
71
+ backend_args=backend_args),
72
+ dict(
73
+ type='MultiScaleFlipAug3D',
74
+ img_scale=(1333, 800),
75
+ pts_scale_ratio=1,
76
+ flip=False,
77
+ transforms=[
78
+ dict(
79
+ type='GlobalRotScaleTrans',
80
+ rot_range=[0, 0],
81
+ scale_ratio_range=[1., 1.],
82
+ translation_std=[0, 0, 0]),
83
+ dict(type='RandomFlip3D'),
84
+ dict(
85
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range),
86
+ ]),
87
+ dict(type='Pack3DDetInputs', keys=['points'])
88
+ ]
89
+ # construct a pipeline for data and gt loading in show function
90
+ # please keep its loading function consistent with test_pipeline (e.g. client)
91
+ eval_pipeline = [
92
+ dict(
93
+ type='LoadPointsFromFile',
94
+ coord_type='LIDAR',
95
+ load_dim=5,
96
+ use_dim=5,
97
+ backend_args=backend_args),
98
+ dict(
99
+ type='LoadPointsFromMultiSweeps',
100
+ sweeps_num=10,
101
+ backend_args=backend_args),
102
+ dict(type='Pack3DDetInputs', keys=['points'])
103
+ ]
104
+
105
+ train_dataloader = dict(
106
+ batch_size=2,
107
+ num_workers=2,
108
+ persistent_workers=True,
109
+ sampler=dict(type='DefaultSampler', shuffle=True),
110
+ dataset=dict(
111
+ type=dataset_type,
112
+ data_root=data_root,
113
+ ann_file='lyft_infos_train.pkl',
114
+ pipeline=train_pipeline,
115
+ metainfo=dict(classes=class_names),
116
+ modality=input_modality,
117
+ data_prefix=data_prefix,
118
+ test_mode=False,
119
+ box_type_3d='LiDAR',
120
+ backend_args=backend_args))
121
+ val_dataloader = dict(
122
+ batch_size=1,
123
+ num_workers=1,
124
+ persistent_workers=True,
125
+ drop_last=False,
126
+ sampler=dict(type='DefaultSampler', shuffle=False),
127
+ dataset=dict(
128
+ type=dataset_type,
129
+ data_root=data_root,
130
+ ann_file='lyft_infos_val.pkl',
131
+ pipeline=test_pipeline,
132
+ metainfo=dict(classes=class_names),
133
+ modality=input_modality,
134
+ test_mode=True,
135
+ data_prefix=data_prefix,
136
+ box_type_3d='LiDAR',
137
+ backend_args=backend_args))
138
+ test_dataloader = val_dataloader
139
+
140
+ val_evaluator = dict(
141
+ type='LyftMetric',
142
+ data_root=data_root,
143
+ ann_file='lyft_infos_val.pkl',
144
+ metric='bbox',
145
+ backend_args=backend_args)
146
+ test_evaluator = val_evaluator
147
+
148
+ vis_backends = [dict(type='LocalVisBackend')]
149
+ visualizer = dict(
150
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/lyft-3d.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # If point cloud range is changed, the models should also change their point
2
+ # cloud range accordingly
3
+ point_cloud_range = [-80, -80, -5, 80, 80, 3]
4
+ # For Lyft we usually do 9-class detection
5
+ class_names = [
6
+ 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
7
+ 'bicycle', 'pedestrian', 'animal'
8
+ ]
9
+ dataset_type = 'LyftDataset'
10
+ data_root = 'data/lyft/'
11
+ # Input modality for Lyft dataset, this is consistent with the submission
12
+ # format which requires the information in input_modality.
13
+ input_modality = dict(use_lidar=True, use_camera=False)
14
+ data_prefix = dict(pts='v1.01-train/lidar', img='', sweeps='v1.01-train/lidar')
15
+
16
+ # Example to use different file client
17
+ # Method 1: simply set the data root and let the file I/O module
18
+ # automatically infer from prefix (not support LMDB and Memcache yet)
19
+
20
+ # data_root = 's3://openmmlab/datasets/detection3d/lyft/'
21
+
22
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
23
+ # backend_args = dict(
24
+ # backend='petrel',
25
+ # path_mapping=dict({
26
+ # './data/': 's3://openmmlab/datasets/detection3d/',
27
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
28
+ # }))
29
+ backend_args = None
30
+
31
+ train_pipeline = [
32
+ dict(
33
+ type='LoadPointsFromFile',
34
+ coord_type='LIDAR',
35
+ load_dim=5,
36
+ use_dim=5,
37
+ backend_args=backend_args),
38
+ dict(
39
+ type='LoadPointsFromMultiSweeps',
40
+ sweeps_num=10,
41
+ backend_args=backend_args),
42
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
43
+ dict(
44
+ type='GlobalRotScaleTrans',
45
+ rot_range=[-0.3925, 0.3925],
46
+ scale_ratio_range=[0.95, 1.05],
47
+ translation_std=[0, 0, 0]),
48
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
49
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
50
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
51
+ dict(type='PointShuffle'),
52
+ dict(
53
+ type='Pack3DDetInputs',
54
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
55
+ ]
56
+ test_pipeline = [
57
+ dict(
58
+ type='LoadPointsFromFile',
59
+ coord_type='LIDAR',
60
+ load_dim=5,
61
+ use_dim=5,
62
+ backend_args=backend_args),
63
+ dict(
64
+ type='LoadPointsFromMultiSweeps',
65
+ sweeps_num=10,
66
+ backend_args=backend_args),
67
+ dict(
68
+ type='MultiScaleFlipAug3D',
69
+ img_scale=(1333, 800),
70
+ pts_scale_ratio=1,
71
+ flip=False,
72
+ transforms=[
73
+ dict(
74
+ type='GlobalRotScaleTrans',
75
+ rot_range=[0, 0],
76
+ scale_ratio_range=[1., 1.],
77
+ translation_std=[0, 0, 0]),
78
+ dict(type='RandomFlip3D'),
79
+ dict(
80
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range)
81
+ ]),
82
+ dict(type='Pack3DDetInputs', keys=['points'])
83
+ ]
84
+ # construct a pipeline for data and gt loading in show function
85
+ # please keep its loading function consistent with test_pipeline (e.g. client)
86
+ eval_pipeline = [
87
+ dict(
88
+ type='LoadPointsFromFile',
89
+ coord_type='LIDAR',
90
+ load_dim=5,
91
+ use_dim=5,
92
+ backend_args=backend_args),
93
+ dict(
94
+ type='LoadPointsFromMultiSweeps',
95
+ sweeps_num=10,
96
+ backend_args=backend_args),
97
+ dict(type='Pack3DDetInputs', keys=['points'])
98
+ ]
99
+ train_dataloader = dict(
100
+ batch_size=2,
101
+ num_workers=2,
102
+ persistent_workers=True,
103
+ sampler=dict(type='DefaultSampler', shuffle=True),
104
+ dataset=dict(
105
+ type=dataset_type,
106
+ data_root=data_root,
107
+ ann_file='lyft_infos_train.pkl',
108
+ pipeline=train_pipeline,
109
+ metainfo=dict(classes=class_names),
110
+ modality=input_modality,
111
+ data_prefix=data_prefix,
112
+ test_mode=False,
113
+ box_type_3d='LiDAR',
114
+ backend_args=backend_args))
115
+ test_dataloader = dict(
116
+ batch_size=1,
117
+ num_workers=1,
118
+ persistent_workers=True,
119
+ drop_last=False,
120
+ sampler=dict(type='DefaultSampler', shuffle=False),
121
+ dataset=dict(
122
+ type=dataset_type,
123
+ data_root=data_root,
124
+ ann_file='lyft_infos_val.pkl',
125
+ pipeline=test_pipeline,
126
+ metainfo=dict(classes=class_names),
127
+ modality=input_modality,
128
+ data_prefix=data_prefix,
129
+ test_mode=True,
130
+ box_type_3d='LiDAR',
131
+ backend_args=backend_args))
132
+ val_dataloader = dict(
133
+ batch_size=1,
134
+ num_workers=1,
135
+ persistent_workers=True,
136
+ drop_last=False,
137
+ sampler=dict(type='DefaultSampler', shuffle=False),
138
+ dataset=dict(
139
+ type=dataset_type,
140
+ data_root=data_root,
141
+ ann_file='lyft_infos_val.pkl',
142
+ pipeline=test_pipeline,
143
+ metainfo=dict(classes=class_names),
144
+ modality=input_modality,
145
+ test_mode=True,
146
+ data_prefix=data_prefix,
147
+ box_type_3d='LiDAR',
148
+ backend_args=backend_args))
149
+
150
+ val_evaluator = dict(
151
+ type='LyftMetric',
152
+ data_root=data_root,
153
+ ann_file='lyft_infos_val.pkl',
154
+ metric='bbox',
155
+ backend_args=backend_args)
156
+ test_evaluator = val_evaluator
157
+
158
+ vis_backends = [dict(type='LocalVisBackend')]
159
+ visualizer = dict(
160
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/nuim-instance.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'CocoDataset'
2
+ data_root = 'data/nuimages/'
3
+ class_names = [
4
+ 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
5
+ 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
6
+ ]
7
+
8
+ # Example to use different file client
9
+ # Method 1: simply set the data root and let the file I/O module
10
+ # automatically infer from prefix (not support LMDB and Memcache yet)
11
+
12
+ # data_root = 's3://openmmlab/datasets/detection3d/nuimages/'
13
+
14
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
15
+ # backend_args = dict(
16
+ # backend='petrel',
17
+ # path_mapping=dict({
18
+ # './data/': 's3://openmmlab/datasets/detection3d/',
19
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
20
+ # }))
21
+ backend_args = None
22
+
23
+ train_pipeline = [
24
+ dict(type='LoadImageFromFile', backend_args=backend_args),
25
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
26
+ dict(
27
+ type='Resize',
28
+ img_scale=[(1280, 720), (1920, 1080)],
29
+ multiscale_mode='range',
30
+ keep_ratio=True),
31
+ dict(type='RandomFlip', flip_ratio=0.5),
32
+ dict(type='PackDetInputs'),
33
+ ]
34
+ test_pipeline = [
35
+ dict(type='LoadImageFromFile', backend_args=backend_args),
36
+ dict(
37
+ type='MultiScaleFlipAug',
38
+ img_scale=(1600, 900),
39
+ flip=False,
40
+ transforms=[
41
+ dict(type='Resize', keep_ratio=True),
42
+ dict(type='RandomFlip'),
43
+ ]),
44
+ dict(
45
+ type='PackDetInputs',
46
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
47
+ 'scale_factor')),
48
+ ]
49
+ data = dict(
50
+ samples_per_gpu=2,
51
+ workers_per_gpu=2,
52
+ train=dict(
53
+ type=dataset_type,
54
+ ann_file=data_root + 'annotations/nuimages_v1.0-train.json',
55
+ img_prefix=data_root,
56
+ classes=class_names,
57
+ pipeline=train_pipeline),
58
+ val=dict(
59
+ type=dataset_type,
60
+ ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
61
+ img_prefix=data_root,
62
+ classes=class_names,
63
+ pipeline=test_pipeline),
64
+ test=dict(
65
+ type=dataset_type,
66
+ ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
67
+ img_prefix=data_root,
68
+ classes=class_names,
69
+ pipeline=test_pipeline))
70
+ evaluation = dict(metric=['bbox', 'segm'])
configs/_base_/datasets/nus-3d.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # If point cloud range is changed, the models should also change their point
2
+ # cloud range accordingly
3
+ point_cloud_range = [-50, -50, -5, 50, 50, 3]
4
+ # Using calibration info convert the Lidar-coordinate point cloud range to the
5
+ # ego-coordinate point cloud range could bring a little promotion in nuScenes.
6
+ # point_cloud_range = [-50, -50.8, -5, 50, 49.2, 3]
7
+ # For nuScenes we usually do 10-class detection
8
+ class_names = [
9
+ 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
10
+ 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
11
+ ]
12
+ metainfo = dict(classes=class_names)
13
+ dataset_type = 'NuScenesDataset'
14
+ data_root = 'data/nuscenes/'
15
+ # Input modality for nuScenes dataset, this is consistent with the submission
16
+ # format which requires the information in input_modality.
17
+ input_modality = dict(use_lidar=True, use_camera=False)
18
+ data_prefix = dict(pts='samples/LIDAR_TOP', img='', sweeps='sweeps/LIDAR_TOP')
19
+
20
+ # Example to use different file client
21
+ # Method 1: simply set the data root and let the file I/O module
22
+ # automatically infer from prefix (not support LMDB and Memcache yet)
23
+
24
+ # data_root = 's3://openmmlab/datasets/detection3d/nuscenes/'
25
+
26
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
27
+ # backend_args = dict(
28
+ # backend='petrel',
29
+ # path_mapping=dict({
30
+ # './data/': 's3://openmmlab/datasets/detection3d/',
31
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
32
+ # }))
33
+ backend_args = None
34
+
35
+ train_pipeline = [
36
+ dict(
37
+ type='LoadPointsFromFile',
38
+ coord_type='LIDAR',
39
+ load_dim=5,
40
+ use_dim=5,
41
+ backend_args=backend_args),
42
+ dict(
43
+ type='LoadPointsFromMultiSweeps',
44
+ sweeps_num=10,
45
+ backend_args=backend_args),
46
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
47
+ dict(
48
+ type='GlobalRotScaleTrans',
49
+ rot_range=[-0.3925, 0.3925],
50
+ scale_ratio_range=[0.95, 1.05],
51
+ translation_std=[0, 0, 0]),
52
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
53
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
54
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
55
+ dict(type='ObjectNameFilter', classes=class_names),
56
+ dict(type='PointShuffle'),
57
+ dict(
58
+ type='Pack3DDetInputs',
59
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
60
+ ]
61
+ test_pipeline = [
62
+ dict(
63
+ type='LoadPointsFromFile',
64
+ coord_type='LIDAR',
65
+ load_dim=5,
66
+ use_dim=5,
67
+ backend_args=backend_args),
68
+ dict(
69
+ type='LoadPointsFromMultiSweeps',
70
+ sweeps_num=10,
71
+ test_mode=True,
72
+ backend_args=backend_args),
73
+ dict(
74
+ type='MultiScaleFlipAug3D',
75
+ img_scale=(1333, 800),
76
+ pts_scale_ratio=1,
77
+ flip=False,
78
+ transforms=[
79
+ dict(
80
+ type='GlobalRotScaleTrans',
81
+ rot_range=[0, 0],
82
+ scale_ratio_range=[1., 1.],
83
+ translation_std=[0, 0, 0]),
84
+ dict(type='RandomFlip3D'),
85
+ dict(
86
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range)
87
+ ]),
88
+ dict(type='Pack3DDetInputs', keys=['points'])
89
+ ]
90
+ # construct a pipeline for data and gt loading in show function
91
+ # please keep its loading function consistent with test_pipeline (e.g. client)
92
+ eval_pipeline = [
93
+ dict(
94
+ type='LoadPointsFromFile',
95
+ coord_type='LIDAR',
96
+ load_dim=5,
97
+ use_dim=5,
98
+ backend_args=backend_args),
99
+ dict(
100
+ type='LoadPointsFromMultiSweeps',
101
+ sweeps_num=10,
102
+ test_mode=True,
103
+ backend_args=backend_args),
104
+ dict(type='Pack3DDetInputs', keys=['points'])
105
+ ]
106
+ train_dataloader = dict(
107
+ batch_size=4,
108
+ num_workers=4,
109
+ persistent_workers=True,
110
+ sampler=dict(type='DefaultSampler', shuffle=True),
111
+ dataset=dict(
112
+ type=dataset_type,
113
+ data_root=data_root,
114
+ ann_file='nuscenes_infos_train.pkl',
115
+ pipeline=train_pipeline,
116
+ metainfo=metainfo,
117
+ modality=input_modality,
118
+ test_mode=False,
119
+ data_prefix=data_prefix,
120
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
121
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
122
+ box_type_3d='LiDAR',
123
+ backend_args=backend_args))
124
+ test_dataloader = dict(
125
+ batch_size=1,
126
+ num_workers=1,
127
+ persistent_workers=True,
128
+ drop_last=False,
129
+ sampler=dict(type='DefaultSampler', shuffle=False),
130
+ dataset=dict(
131
+ type=dataset_type,
132
+ data_root=data_root,
133
+ ann_file='nuscenes_infos_val.pkl',
134
+ pipeline=test_pipeline,
135
+ metainfo=metainfo,
136
+ modality=input_modality,
137
+ data_prefix=data_prefix,
138
+ test_mode=True,
139
+ box_type_3d='LiDAR',
140
+ backend_args=backend_args))
141
+ val_dataloader = dict(
142
+ batch_size=1,
143
+ num_workers=1,
144
+ persistent_workers=True,
145
+ drop_last=False,
146
+ sampler=dict(type='DefaultSampler', shuffle=False),
147
+ dataset=dict(
148
+ type=dataset_type,
149
+ data_root=data_root,
150
+ ann_file='nuscenes_infos_val.pkl',
151
+ pipeline=test_pipeline,
152
+ metainfo=metainfo,
153
+ modality=input_modality,
154
+ test_mode=True,
155
+ data_prefix=data_prefix,
156
+ box_type_3d='LiDAR',
157
+ backend_args=backend_args))
158
+
159
+ val_evaluator = dict(
160
+ type='NuScenesMetric',
161
+ data_root=data_root,
162
+ ann_file=data_root + 'nuscenes_infos_val.pkl',
163
+ metric='bbox',
164
+ backend_args=backend_args)
165
+ test_evaluator = val_evaluator
166
+
167
+ vis_backends = [dict(type='LocalVisBackend')]
168
+ visualizer = dict(
169
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/nus-mono3d.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'NuScenesDataset'
2
+ data_root = 'data/nuscenes/'
3
+ class_names = [
4
+ 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
5
+ 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
6
+ ]
7
+ metainfo = dict(classes=class_names)
8
+ # Input modality for nuScenes dataset, this is consistent with the submission
9
+ # format which requires the information in input_modality.
10
+ input_modality = dict(use_lidar=False, use_camera=True)
11
+
12
+ # Example to use different file client
13
+ # Method 1: simply set the data root and let the file I/O module
14
+ # automatically infer from prefix (not support LMDB and Memcache yet)
15
+
16
+ # data_root = 's3://openmmlab/datasets/detection3d/nuscenes/'
17
+
18
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
19
+ # backend_args = dict(
20
+ # backend='petrel',
21
+ # path_mapping=dict({
22
+ # './data/': 's3://openmmlab/datasets/detection3d/',
23
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
24
+ # }))
25
+ backend_args = None
26
+
27
+ train_pipeline = [
28
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
29
+ dict(
30
+ type='LoadAnnotations3D',
31
+ with_bbox=True,
32
+ with_label=True,
33
+ with_attr_label=True,
34
+ with_bbox_3d=True,
35
+ with_label_3d=True,
36
+ with_bbox_depth=True),
37
+ dict(type='Resize', scale=(1600, 900), keep_ratio=True),
38
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
39
+ dict(
40
+ type='Pack3DDetInputs',
41
+ keys=[
42
+ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'attr_labels',
43
+ 'gt_bboxes_3d', 'gt_labels_3d', 'centers_2d', 'depths'
44
+ ]),
45
+ ]
46
+
47
+ test_pipeline = [
48
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
49
+ dict(type='mmdet.Resize', scale=(1600, 900), keep_ratio=True),
50
+ dict(type='Pack3DDetInputs', keys=['img'])
51
+ ]
52
+
53
+ train_dataloader = dict(
54
+ batch_size=2,
55
+ num_workers=2,
56
+ persistent_workers=True,
57
+ sampler=dict(type='DefaultSampler', shuffle=True),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ data_prefix=dict(
62
+ pts='',
63
+ CAM_FRONT='samples/CAM_FRONT',
64
+ CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT',
65
+ CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT',
66
+ CAM_BACK='samples/CAM_BACK',
67
+ CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
68
+ CAM_BACK_LEFT='samples/CAM_BACK_LEFT'),
69
+ ann_file='nuscenes_infos_train.pkl',
70
+ load_type='mv_image_based',
71
+ pipeline=train_pipeline,
72
+ metainfo=metainfo,
73
+ modality=input_modality,
74
+ test_mode=False,
75
+ # we use box_type_3d='Camera' in monocular 3d
76
+ # detection task
77
+ box_type_3d='Camera',
78
+ use_valid_flag=True,
79
+ backend_args=backend_args))
80
+ val_dataloader = dict(
81
+ batch_size=1,
82
+ num_workers=2,
83
+ persistent_workers=True,
84
+ drop_last=False,
85
+ sampler=dict(type='DefaultSampler', shuffle=False),
86
+ dataset=dict(
87
+ type=dataset_type,
88
+ data_root=data_root,
89
+ data_prefix=dict(
90
+ pts='',
91
+ CAM_FRONT='samples/CAM_FRONT',
92
+ CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT',
93
+ CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT',
94
+ CAM_BACK='samples/CAM_BACK',
95
+ CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
96
+ CAM_BACK_LEFT='samples/CAM_BACK_LEFT'),
97
+ ann_file='nuscenes_infos_val.pkl',
98
+ load_type='mv_image_based',
99
+ pipeline=test_pipeline,
100
+ modality=input_modality,
101
+ metainfo=metainfo,
102
+ test_mode=True,
103
+ box_type_3d='Camera',
104
+ use_valid_flag=True,
105
+ backend_args=backend_args))
106
+ test_dataloader = val_dataloader
107
+
108
+ val_evaluator = dict(
109
+ type='NuScenesMetric',
110
+ data_root=data_root,
111
+ ann_file=data_root + 'nuscenes_infos_val.pkl',
112
+ metric='bbox',
113
+ backend_args=backend_args)
114
+
115
+ test_evaluator = val_evaluator
116
+
117
+ vis_backends = [dict(type='LocalVisBackend')]
118
+ visualizer = dict(
119
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/s3dis-3d.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'S3DISDataset'
3
+ data_root = 'data/s3dis/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/detection3d/s3dis/'
10
+
11
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection3d/',
16
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ metainfo = dict(classes=('table', 'chair', 'sofa', 'bookcase', 'board'))
21
+ train_area = [1, 2, 3, 4, 6]
22
+ test_area = 5
23
+
24
+ train_pipeline = [
25
+ dict(
26
+ type='LoadPointsFromFile',
27
+ coord_type='DEPTH',
28
+ shift_height=False,
29
+ use_color=True,
30
+ load_dim=6,
31
+ use_dim=[0, 1, 2, 3, 4, 5],
32
+ backend_args=backend_args),
33
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
34
+ dict(type='PointSample', num_points=100000),
35
+ dict(
36
+ type='RandomFlip3D',
37
+ sync_2d=False,
38
+ flip_ratio_bev_horizontal=0.5,
39
+ flip_ratio_bev_vertical=0.5),
40
+ dict(
41
+ type='GlobalRotScaleTrans',
42
+ rot_range=[-0.087266, 0.087266],
43
+ scale_ratio_range=[0.9, 1.1],
44
+ translation_std=[.1, .1, .1],
45
+ shift_height=False),
46
+ dict(type='NormalizePointsColor', color_mean=None),
47
+ dict(
48
+ type='Pack3DDetInputs',
49
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
50
+ ]
51
+ test_pipeline = [
52
+ dict(
53
+ type='LoadPointsFromFile',
54
+ coord_type='DEPTH',
55
+ shift_height=False,
56
+ use_color=True,
57
+ load_dim=6,
58
+ use_dim=[0, 1, 2, 3, 4, 5],
59
+ backend_args=backend_args),
60
+ dict(
61
+ type='MultiScaleFlipAug3D',
62
+ img_scale=(1333, 800),
63
+ pts_scale_ratio=1,
64
+ flip=False,
65
+ transforms=[
66
+ dict(
67
+ type='GlobalRotScaleTrans',
68
+ rot_range=[0, 0],
69
+ scale_ratio_range=[1., 1.],
70
+ translation_std=[0, 0, 0]),
71
+ dict(
72
+ type='RandomFlip3D',
73
+ sync_2d=False,
74
+ flip_ratio_bev_horizontal=0.5,
75
+ flip_ratio_bev_vertical=0.5),
76
+ dict(type='PointSample', num_points=100000),
77
+ dict(type='NormalizePointsColor', color_mean=None),
78
+ ]),
79
+ dict(type='Pack3DDetInputs', keys=['points'])
80
+ ]
81
+
82
+ train_dataloader = dict(
83
+ batch_size=8,
84
+ num_workers=4,
85
+ sampler=dict(type='DefaultSampler', shuffle=True),
86
+ dataset=dict(
87
+ type='RepeatDataset',
88
+ times=13,
89
+ dataset=dict(
90
+ type='ConcatDataset',
91
+ datasets=[
92
+ dict(
93
+ type=dataset_type,
94
+ data_root=data_root,
95
+ ann_file=f's3dis_infos_Area_{i}.pkl',
96
+ pipeline=train_pipeline,
97
+ filter_empty_gt=True,
98
+ metainfo=metainfo,
99
+ box_type_3d='Depth',
100
+ backend_args=backend_args) for i in train_area
101
+ ])))
102
+
103
+ val_dataloader = dict(
104
+ batch_size=1,
105
+ num_workers=1,
106
+ sampler=dict(type='DefaultSampler', shuffle=False),
107
+ dataset=dict(
108
+ type=dataset_type,
109
+ data_root=data_root,
110
+ ann_file=f's3dis_infos_Area_{test_area}.pkl',
111
+ pipeline=test_pipeline,
112
+ metainfo=metainfo,
113
+ test_mode=True,
114
+ box_type_3d='Depth',
115
+ backend_args=backend_args))
116
+ test_dataloader = dict(
117
+ batch_size=1,
118
+ num_workers=1,
119
+ sampler=dict(type='DefaultSampler', shuffle=False),
120
+ dataset=dict(
121
+ type=dataset_type,
122
+ data_root=data_root,
123
+ ann_file=f's3dis_infos_Area_{test_area}.pkl',
124
+ pipeline=test_pipeline,
125
+ metainfo=metainfo,
126
+ test_mode=True,
127
+ box_type_3d='Depth',
128
+ backend_args=backend_args))
129
+ val_evaluator = dict(type='IndoorMetric')
130
+ test_evaluator = val_evaluator
131
+
132
+ vis_backends = [dict(type='LocalVisBackend')]
133
+ visualizer = dict(
134
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/s3dis-seg.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For S3DIS seg we usually do 13-class segmentation
2
+ class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
3
+ 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')
4
+ metainfo = dict(classes=class_names)
5
+ dataset_type = 'S3DISSegDataset'
6
+ data_root = 'data/s3dis/'
7
+ input_modality = dict(use_lidar=True, use_camera=False)
8
+ data_prefix = dict(
9
+ pts='points',
10
+ pts_instance_mask='instance_mask',
11
+ pts_semantic_mask='semantic_mask')
12
+
13
+ # Example to use different file client
14
+ # Method 1: simply set the data root and let the file I/O module
15
+ # automatically infer from prefix (not support LMDB and Memcache yet)
16
+
17
+ # data_root = 's3://openmmlab/datasets/detection3d/s3dis/'
18
+
19
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
20
+ # backend_args = dict(
21
+ # backend='petrel',
22
+ # path_mapping=dict({
23
+ # './data/': 's3://openmmlab/datasets/detection3d/',
24
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
25
+ # }))
26
+ backend_args = None
27
+
28
+ num_points = 4096
29
+ train_area = [1, 2, 3, 4, 6]
30
+ test_area = 5
31
+ train_pipeline = [
32
+ dict(
33
+ type='LoadPointsFromFile',
34
+ coord_type='DEPTH',
35
+ shift_height=False,
36
+ use_color=True,
37
+ load_dim=6,
38
+ use_dim=[0, 1, 2, 3, 4, 5],
39
+ backend_args=backend_args),
40
+ dict(
41
+ type='LoadAnnotations3D',
42
+ with_bbox_3d=False,
43
+ with_label_3d=False,
44
+ with_mask_3d=False,
45
+ with_seg_3d=True,
46
+ backend_args=backend_args),
47
+ dict(type='PointSegClassMapping'),
48
+ dict(
49
+ type='IndoorPatchPointSample',
50
+ num_points=num_points,
51
+ block_size=1.0,
52
+ ignore_index=len(class_names),
53
+ use_normalized_coord=True,
54
+ enlarge_size=0.2,
55
+ min_unique_num=None),
56
+ dict(type='NormalizePointsColor', color_mean=None),
57
+ dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
58
+ ]
59
+ test_pipeline = [
60
+ dict(
61
+ type='LoadPointsFromFile',
62
+ coord_type='DEPTH',
63
+ shift_height=False,
64
+ use_color=True,
65
+ load_dim=6,
66
+ use_dim=[0, 1, 2, 3, 4, 5],
67
+ backend_args=backend_args),
68
+ dict(
69
+ type='LoadAnnotations3D',
70
+ with_bbox_3d=False,
71
+ with_label_3d=False,
72
+ with_mask_3d=False,
73
+ with_seg_3d=True,
74
+ backend_args=backend_args),
75
+ dict(type='NormalizePointsColor', color_mean=None),
76
+ dict(type='Pack3DDetInputs', keys=['points'])
77
+ ]
78
+ # construct a pipeline for data and gt loading in show function
79
+ # please keep its loading function consistent with test_pipeline (e.g. client)
80
+ # we need to load gt seg_mask!
81
+ eval_pipeline = [
82
+ dict(
83
+ type='LoadPointsFromFile',
84
+ coord_type='DEPTH',
85
+ shift_height=False,
86
+ use_color=True,
87
+ load_dim=6,
88
+ use_dim=[0, 1, 2, 3, 4, 5],
89
+ backend_args=backend_args),
90
+ dict(type='NormalizePointsColor', color_mean=None),
91
+ dict(type='Pack3DDetInputs', keys=['points'])
92
+ ]
93
+ tta_pipeline = [
94
+ dict(
95
+ type='LoadPointsFromFile',
96
+ coord_type='DEPTH',
97
+ shift_height=False,
98
+ use_color=True,
99
+ load_dim=6,
100
+ use_dim=[0, 1, 2, 3, 4, 5],
101
+ backend_args=backend_args),
102
+ dict(
103
+ type='LoadAnnotations3D',
104
+ with_bbox_3d=False,
105
+ with_label_3d=False,
106
+ with_mask_3d=False,
107
+ with_seg_3d=True,
108
+ backend_args=backend_args),
109
+ dict(type='NormalizePointsColor', color_mean=None),
110
+ dict(
111
+ type='TestTimeAug',
112
+ transforms=[[
113
+ dict(
114
+ type='RandomFlip3D',
115
+ sync_2d=False,
116
+ flip_ratio_bev_horizontal=0.,
117
+ flip_ratio_bev_vertical=0.)
118
+ ], [dict(type='Pack3DDetInputs', keys=['points'])]])
119
+ ]
120
+
121
+ # train on area 1, 2, 3, 4, 6
122
+ # test on area 5
123
+ train_dataloader = dict(
124
+ batch_size=8,
125
+ num_workers=4,
126
+ persistent_workers=True,
127
+ sampler=dict(type='DefaultSampler', shuffle=True),
128
+ dataset=dict(
129
+ type=dataset_type,
130
+ data_root=data_root,
131
+ ann_files=[f's3dis_infos_Area_{i}.pkl' for i in train_area],
132
+ metainfo=metainfo,
133
+ data_prefix=data_prefix,
134
+ pipeline=train_pipeline,
135
+ modality=input_modality,
136
+ ignore_index=len(class_names),
137
+ scene_idxs=[
138
+ f'seg_info/Area_{i}_resampled_scene_idxs.npy' for i in train_area
139
+ ],
140
+ test_mode=False,
141
+ backend_args=backend_args))
142
+ test_dataloader = dict(
143
+ batch_size=1,
144
+ num_workers=1,
145
+ persistent_workers=True,
146
+ drop_last=False,
147
+ sampler=dict(type='DefaultSampler', shuffle=False),
148
+ dataset=dict(
149
+ type=dataset_type,
150
+ data_root=data_root,
151
+ ann_files=f's3dis_infos_Area_{test_area}.pkl',
152
+ metainfo=metainfo,
153
+ data_prefix=data_prefix,
154
+ pipeline=test_pipeline,
155
+ modality=input_modality,
156
+ ignore_index=len(class_names),
157
+ scene_idxs=f'seg_info/Area_{test_area}_resampled_scene_idxs.npy',
158
+ test_mode=True,
159
+ backend_args=backend_args))
160
+ val_dataloader = test_dataloader
161
+
162
+ val_evaluator = dict(type='SegMetric')
163
+ test_evaluator = val_evaluator
164
+
165
+ vis_backends = [dict(type='LocalVisBackend')]
166
+ visualizer = dict(
167
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
168
+
169
+ tta_model = dict(type='Seg3DTTAModel')
configs/_base_/datasets/scannet-3d.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'ScanNetDataset'
3
+ data_root = 'data/scannet/'
4
+
5
+ metainfo = dict(
6
+ classes=('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
7
+ 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
8
+ 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
9
+ 'garbagebin'))
10
+
11
+ # Example to use different file client
12
+ # Method 1: simply set the data root and let the file I/O module
13
+ # automatically infer from prefix (not support LMDB and Memcache yet)
14
+
15
+ # data_root = 's3://openmmlab/datasets/detection3d/scannet/'
16
+
17
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
18
+ # backend_args = dict(
19
+ # backend='petrel',
20
+ # path_mapping=dict({
21
+ # './data/': 's3://openmmlab/datasets/detection3d/',
22
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
23
+ # }))
24
+ backend_args = None
25
+
26
+ train_pipeline = [
27
+ dict(
28
+ type='LoadPointsFromFile',
29
+ coord_type='DEPTH',
30
+ shift_height=True,
31
+ load_dim=6,
32
+ use_dim=[0, 1, 2],
33
+ backend_args=backend_args),
34
+ dict(
35
+ type='LoadAnnotations3D',
36
+ with_bbox_3d=True,
37
+ with_label_3d=True,
38
+ with_mask_3d=True,
39
+ with_seg_3d=True,
40
+ backend_args=backend_args),
41
+ dict(type='GlobalAlignment', rotation_axis=2),
42
+ dict(type='PointSegClassMapping'),
43
+ dict(type='PointSample', num_points=40000),
44
+ dict(
45
+ type='RandomFlip3D',
46
+ sync_2d=False,
47
+ flip_ratio_bev_horizontal=0.5,
48
+ flip_ratio_bev_vertical=0.5),
49
+ dict(
50
+ type='GlobalRotScaleTrans',
51
+ rot_range=[-0.087266, 0.087266],
52
+ scale_ratio_range=[1.0, 1.0],
53
+ shift_height=True),
54
+ dict(
55
+ type='Pack3DDetInputs',
56
+ keys=[
57
+ 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
58
+ 'pts_instance_mask'
59
+ ])
60
+ ]
61
+ test_pipeline = [
62
+ dict(
63
+ type='LoadPointsFromFile',
64
+ coord_type='DEPTH',
65
+ shift_height=True,
66
+ load_dim=6,
67
+ use_dim=[0, 1, 2],
68
+ backend_args=backend_args),
69
+ dict(type='GlobalAlignment', rotation_axis=2),
70
+ dict(
71
+ type='MultiScaleFlipAug3D',
72
+ img_scale=(1333, 800),
73
+ pts_scale_ratio=1,
74
+ flip=False,
75
+ transforms=[
76
+ dict(
77
+ type='GlobalRotScaleTrans',
78
+ rot_range=[0, 0],
79
+ scale_ratio_range=[1., 1.],
80
+ translation_std=[0, 0, 0]),
81
+ dict(
82
+ type='RandomFlip3D',
83
+ sync_2d=False,
84
+ flip_ratio_bev_horizontal=0.5,
85
+ flip_ratio_bev_vertical=0.5),
86
+ dict(type='PointSample', num_points=40000),
87
+ ]),
88
+ dict(type='Pack3DDetInputs', keys=['points'])
89
+ ]
90
+
91
+ train_dataloader = dict(
92
+ batch_size=8,
93
+ num_workers=4,
94
+ sampler=dict(type='DefaultSampler', shuffle=True),
95
+ dataset=dict(
96
+ type='RepeatDataset',
97
+ times=5,
98
+ dataset=dict(
99
+ type=dataset_type,
100
+ data_root=data_root,
101
+ ann_file='scannet_infos_train.pkl',
102
+ pipeline=train_pipeline,
103
+ filter_empty_gt=False,
104
+ metainfo=metainfo,
105
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
106
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
107
+ box_type_3d='Depth',
108
+ backend_args=backend_args)))
109
+
110
+ val_dataloader = dict(
111
+ batch_size=1,
112
+ num_workers=1,
113
+ sampler=dict(type='DefaultSampler', shuffle=False),
114
+ dataset=dict(
115
+ type=dataset_type,
116
+ data_root=data_root,
117
+ ann_file='scannet_infos_val.pkl',
118
+ pipeline=test_pipeline,
119
+ metainfo=metainfo,
120
+ test_mode=True,
121
+ box_type_3d='Depth',
122
+ backend_args=backend_args))
123
+ test_dataloader = dict(
124
+ batch_size=1,
125
+ num_workers=1,
126
+ sampler=dict(type='DefaultSampler', shuffle=False),
127
+ dataset=dict(
128
+ type=dataset_type,
129
+ data_root=data_root,
130
+ ann_file='scannet_infos_val.pkl',
131
+ pipeline=test_pipeline,
132
+ metainfo=metainfo,
133
+ test_mode=True,
134
+ box_type_3d='Depth',
135
+ backend_args=backend_args))
136
+ val_evaluator = dict(type='IndoorMetric')
137
+ test_evaluator = val_evaluator
138
+
139
+ vis_backends = [dict(type='LocalVisBackend')]
140
+ visualizer = dict(
141
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/scannet-seg.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For ScanNet seg we usually do 20-class segmentation
2
+ class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',
3
+ 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',
4
+ 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink',
5
+ 'bathtub', 'otherfurniture')
6
+ metainfo = dict(classes=class_names)
7
+ dataset_type = 'ScanNetSegDataset'
8
+ data_root = 'data/scannet/'
9
+ input_modality = dict(use_lidar=True, use_camera=False)
10
+ data_prefix = dict(
11
+ pts='points',
12
+ pts_instance_mask='instance_mask',
13
+ pts_semantic_mask='semantic_mask')
14
+
15
+ # Example to use different file client
16
+ # Method 1: simply set the data root and let the file I/O module
17
+ # automatically infer from prefix (not support LMDB and Memcache yet)
18
+
19
+ # data_root = 's3://openmmlab/datasets/detection3d/scannet/'
20
+
21
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
22
+ # backend_args = dict(
23
+ # backend='petrel',
24
+ # path_mapping=dict({
25
+ # './data/': 's3://openmmlab/datasets/detection3d/',
26
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
27
+ # }))
28
+ backend_args = None
29
+
30
+ num_points = 8192
31
+ train_pipeline = [
32
+ dict(
33
+ type='LoadPointsFromFile',
34
+ coord_type='DEPTH',
35
+ shift_height=False,
36
+ use_color=True,
37
+ load_dim=6,
38
+ use_dim=[0, 1, 2, 3, 4, 5],
39
+ backend_args=backend_args),
40
+ dict(
41
+ type='LoadAnnotations3D',
42
+ with_bbox_3d=False,
43
+ with_label_3d=False,
44
+ with_mask_3d=False,
45
+ with_seg_3d=True,
46
+ backend_args=backend_args),
47
+ dict(type='PointSegClassMapping'),
48
+ dict(
49
+ type='IndoorPatchPointSample',
50
+ num_points=num_points,
51
+ block_size=1.5,
52
+ ignore_index=len(class_names),
53
+ use_normalized_coord=False,
54
+ enlarge_size=0.2,
55
+ min_unique_num=None),
56
+ dict(type='NormalizePointsColor', color_mean=None),
57
+ dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
58
+ ]
59
+ test_pipeline = [
60
+ dict(
61
+ type='LoadPointsFromFile',
62
+ coord_type='DEPTH',
63
+ shift_height=False,
64
+ use_color=True,
65
+ load_dim=6,
66
+ use_dim=[0, 1, 2, 3, 4, 5],
67
+ backend_args=backend_args),
68
+ dict(
69
+ type='LoadAnnotations3D',
70
+ with_bbox_3d=False,
71
+ with_label_3d=False,
72
+ with_mask_3d=False,
73
+ with_seg_3d=True,
74
+ backend_args=backend_args),
75
+ dict(type='NormalizePointsColor', color_mean=None),
76
+ dict(type='Pack3DDetInputs', keys=['points'])
77
+ ]
78
+ # construct a pipeline for data and gt loading in show function
79
+ # please keep its loading function consistent with test_pipeline (e.g. client)
80
+ # we need to load gt seg_mask!
81
+ eval_pipeline = [
82
+ dict(
83
+ type='LoadPointsFromFile',
84
+ coord_type='DEPTH',
85
+ shift_height=False,
86
+ use_color=True,
87
+ load_dim=6,
88
+ use_dim=[0, 1, 2, 3, 4, 5],
89
+ backend_args=backend_args),
90
+ dict(type='NormalizePointsColor', color_mean=None),
91
+ dict(type='Pack3DDetInputs', keys=['points'])
92
+ ]
93
+ tta_pipeline = [
94
+ dict(
95
+ type='LoadPointsFromFile',
96
+ coord_type='DEPTH',
97
+ shift_height=False,
98
+ use_color=True,
99
+ load_dim=6,
100
+ use_dim=[0, 1, 2, 3, 4, 5],
101
+ backend_args=backend_args),
102
+ dict(
103
+ type='LoadAnnotations3D',
104
+ with_bbox_3d=False,
105
+ with_label_3d=False,
106
+ with_mask_3d=False,
107
+ with_seg_3d=True,
108
+ backend_args=backend_args),
109
+ dict(type='NormalizePointsColor', color_mean=None),
110
+ dict(
111
+ type='TestTimeAug',
112
+ transforms=[[
113
+ dict(
114
+ type='RandomFlip3D',
115
+ sync_2d=False,
116
+ flip_ratio_bev_horizontal=0.,
117
+ flip_ratio_bev_vertical=0.)
118
+ ], [dict(type='Pack3DDetInputs', keys=['points'])]])
119
+ ]
120
+
121
+ train_dataloader = dict(
122
+ batch_size=8,
123
+ num_workers=4,
124
+ persistent_workers=True,
125
+ sampler=dict(type='DefaultSampler', shuffle=True),
126
+ dataset=dict(
127
+ type=dataset_type,
128
+ data_root=data_root,
129
+ ann_file='scannet_infos_train.pkl',
130
+ metainfo=metainfo,
131
+ data_prefix=data_prefix,
132
+ pipeline=train_pipeline,
133
+ modality=input_modality,
134
+ ignore_index=len(class_names),
135
+ scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy',
136
+ test_mode=False,
137
+ backend_args=backend_args))
138
+ test_dataloader = dict(
139
+ batch_size=1,
140
+ num_workers=1,
141
+ persistent_workers=True,
142
+ drop_last=False,
143
+ sampler=dict(type='DefaultSampler', shuffle=False),
144
+ dataset=dict(
145
+ type=dataset_type,
146
+ data_root=data_root,
147
+ ann_file='scannet_infos_val.pkl',
148
+ metainfo=metainfo,
149
+ data_prefix=data_prefix,
150
+ pipeline=test_pipeline,
151
+ modality=input_modality,
152
+ ignore_index=len(class_names),
153
+ test_mode=True,
154
+ backend_args=backend_args))
155
+ val_dataloader = test_dataloader
156
+
157
+ val_evaluator = dict(type='SegMetric')
158
+ test_evaluator = val_evaluator
159
+
160
+ vis_backends = [dict(type='LocalVisBackend')]
161
+ visualizer = dict(
162
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
163
+
164
+ tta_model = dict(type='Seg3DTTAModel')
configs/_base_/datasets/semantickitti.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For SemanticKitti we usually do 19-class segmentation.
2
+ # For labels_map we follow the uniform format of MMDetection & MMSegmentation
3
+ # i.e. we consider the unlabeled class as the last one, which is different
4
+ # from the original implementation of some methods e.g. Cylinder3D.
5
+ dataset_type = 'SemanticKittiDataset'
6
+ data_root = 'data/semantickitti/'
7
+ class_names = [
8
+ 'car', 'bicycle', 'motorcycle', 'truck', 'bus', 'person', 'bicyclist',
9
+ 'motorcyclist', 'road', 'parking', 'sidewalk', 'other-ground', 'building',
10
+ 'fence', 'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign'
11
+ ]
12
+ labels_map = {
13
+ 0: 19, # "unlabeled"
14
+ 1: 19, # "outlier" mapped to "unlabeled" --------------mapped
15
+ 10: 0, # "car"
16
+ 11: 1, # "bicycle"
17
+ 13: 4, # "bus" mapped to "other-vehicle" --------------mapped
18
+ 15: 2, # "motorcycle"
19
+ 16: 4, # "on-rails" mapped to "other-vehicle" ---------mapped
20
+ 18: 3, # "truck"
21
+ 20: 4, # "other-vehicle"
22
+ 30: 5, # "person"
23
+ 31: 6, # "bicyclist"
24
+ 32: 7, # "motorcyclist"
25
+ 40: 8, # "road"
26
+ 44: 9, # "parking"
27
+ 48: 10, # "sidewalk"
28
+ 49: 11, # "other-ground"
29
+ 50: 12, # "building"
30
+ 51: 13, # "fence"
31
+ 52: 19, # "other-structure" mapped to "unlabeled" ------mapped
32
+ 60: 8, # "lane-marking" to "road" ---------------------mapped
33
+ 70: 14, # "vegetation"
34
+ 71: 15, # "trunk"
35
+ 72: 16, # "terrain"
36
+ 80: 17, # "pole"
37
+ 81: 18, # "traffic-sign"
38
+ 99: 19, # "other-object" to "unlabeled" ----------------mapped
39
+ 252: 0, # "moving-car" to "car" ------------------------mapped
40
+ 253: 6, # "moving-bicyclist" to "bicyclist" ------------mapped
41
+ 254: 5, # "moving-person" to "person" ------------------mapped
42
+ 255: 7, # "moving-motorcyclist" to "motorcyclist" ------mapped
43
+ 256: 4, # "moving-on-rails" mapped to "other-vehic------mapped
44
+ 257: 4, # "moving-bus" mapped to "other-vehicle" -------mapped
45
+ 258: 3, # "moving-truck" to "truck" --------------------mapped
46
+ 259: 4 # "moving-other"-vehicle to "other-vehicle"-----mapped
47
+ }
48
+
49
+ metainfo = dict(
50
+ classes=class_names, seg_label_mapping=labels_map, max_label=259)
51
+
52
+ input_modality = dict(use_lidar=True, use_camera=False)
53
+
54
+ # Example to use different file client
55
+ # Method 1: simply set the data root and let the file I/O module
56
+ # automatically infer from prefix (not support LMDB and Memcache yet)
57
+
58
+ # data_root = 's3://openmmlab/datasets/detection3d/semantickitti/'
59
+
60
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
61
+ # backend_args = dict(
62
+ # backend='petrel',
63
+ # path_mapping=dict({
64
+ # './data/': 's3://openmmlab/datasets/detection3d/',
65
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
66
+ # }))
67
+ backend_args = None
68
+
69
+ train_pipeline = [
70
+ dict(
71
+ type='LoadPointsFromFile',
72
+ coord_type='LIDAR',
73
+ load_dim=4,
74
+ use_dim=4,
75
+ backend_args=backend_args),
76
+ dict(
77
+ type='LoadAnnotations3D',
78
+ with_bbox_3d=False,
79
+ with_label_3d=False,
80
+ with_seg_3d=True,
81
+ seg_3d_dtype='np.int32',
82
+ seg_offset=2**16,
83
+ dataset_type='semantickitti',
84
+ backend_args=backend_args),
85
+ dict(type='PointSegClassMapping'),
86
+ dict(
87
+ type='RandomFlip3D',
88
+ sync_2d=False,
89
+ flip_ratio_bev_horizontal=0.5,
90
+ flip_ratio_bev_vertical=0.5),
91
+ dict(
92
+ type='GlobalRotScaleTrans',
93
+ rot_range=[-0.78539816, 0.78539816],
94
+ scale_ratio_range=[0.95, 1.05],
95
+ translation_std=[0.1, 0.1, 0.1],
96
+ ),
97
+ dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
98
+ ]
99
+ test_pipeline = [
100
+ dict(
101
+ type='LoadPointsFromFile',
102
+ coord_type='LIDAR',
103
+ load_dim=4,
104
+ use_dim=4,
105
+ backend_args=backend_args),
106
+ dict(
107
+ type='LoadAnnotations3D',
108
+ with_bbox_3d=False,
109
+ with_label_3d=False,
110
+ with_seg_3d=True,
111
+ seg_3d_dtype='np.int32',
112
+ seg_offset=2**16,
113
+ dataset_type='semantickitti',
114
+ backend_args=backend_args),
115
+ dict(type='PointSegClassMapping'),
116
+ dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
117
+ ]
118
+ # construct a pipeline for data and gt loading in show function
119
+ # please keep its loading function consistent with test_pipeline (e.g. client)
120
+ eval_pipeline = [
121
+ dict(
122
+ type='LoadPointsFromFile',
123
+ coord_type='LIDAR',
124
+ load_dim=4,
125
+ use_dim=4,
126
+ backend_args=backend_args),
127
+ dict(type='Pack3DDetInputs', keys=['points'])
128
+ ]
129
+ tta_pipeline = [
130
+ dict(
131
+ type='LoadPointsFromFile',
132
+ coord_type='LIDAR',
133
+ load_dim=4,
134
+ use_dim=4,
135
+ backend_args=backend_args),
136
+ dict(
137
+ type='LoadAnnotations3D',
138
+ with_bbox_3d=False,
139
+ with_label_3d=False,
140
+ with_seg_3d=True,
141
+ seg_3d_dtype='np.int32',
142
+ seg_offset=2**16,
143
+ dataset_type='semantickitti',
144
+ backend_args=backend_args),
145
+ dict(type='PointSegClassMapping'),
146
+ dict(
147
+ type='TestTimeAug',
148
+ transforms=[[
149
+ dict(
150
+ type='RandomFlip3D',
151
+ sync_2d=False,
152
+ flip_ratio_bev_horizontal=0.,
153
+ flip_ratio_bev_vertical=0.),
154
+ dict(
155
+ type='RandomFlip3D',
156
+ sync_2d=False,
157
+ flip_ratio_bev_horizontal=0.,
158
+ flip_ratio_bev_vertical=1.),
159
+ dict(
160
+ type='RandomFlip3D',
161
+ sync_2d=False,
162
+ flip_ratio_bev_horizontal=1.,
163
+ flip_ratio_bev_vertical=0.),
164
+ dict(
165
+ type='RandomFlip3D',
166
+ sync_2d=False,
167
+ flip_ratio_bev_horizontal=1.,
168
+ flip_ratio_bev_vertical=1.)
169
+ ],
170
+ [
171
+ dict(
172
+ type='GlobalRotScaleTrans',
173
+ rot_range=[pcd_rotate_range, pcd_rotate_range],
174
+ scale_ratio_range=[
175
+ pcd_scale_factor, pcd_scale_factor
176
+ ],
177
+ translation_std=[0, 0, 0])
178
+ for pcd_rotate_range in [-0.78539816, 0.0, 0.78539816]
179
+ for pcd_scale_factor in [0.95, 1.0, 1.05]
180
+ ], [dict(type='Pack3DDetInputs', keys=['points'])]])
181
+ ]
182
+
183
+ train_dataloader = dict(
184
+ batch_size=2,
185
+ num_workers=4,
186
+ persistent_workers=True,
187
+ sampler=dict(type='DefaultSampler', shuffle=True),
188
+ dataset=dict(
189
+ type=dataset_type,
190
+ data_root=data_root,
191
+ ann_file='semantickitti_infos_train.pkl',
192
+ pipeline=train_pipeline,
193
+ metainfo=metainfo,
194
+ modality=input_modality,
195
+ ignore_index=19,
196
+ backend_args=backend_args))
197
+
198
+ test_dataloader = dict(
199
+ batch_size=1,
200
+ num_workers=1,
201
+ persistent_workers=True,
202
+ drop_last=False,
203
+ sampler=dict(type='DefaultSampler', shuffle=False),
204
+ dataset=dict(
205
+ type=dataset_type,
206
+ data_root=data_root,
207
+ ann_file='semantickitti_infos_val.pkl',
208
+ pipeline=test_pipeline,
209
+ metainfo=metainfo,
210
+ modality=input_modality,
211
+ ignore_index=19,
212
+ test_mode=True,
213
+ backend_args=backend_args))
214
+
215
+ val_dataloader = test_dataloader
216
+
217
+ val_evaluator = dict(type='SegMetric')
218
+ test_evaluator = val_evaluator
219
+
220
+ vis_backends = [dict(type='LocalVisBackend')]
221
+ visualizer = dict(
222
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
223
+
224
+ tta_model = dict(type='Seg3DTTAModel')
configs/_base_/datasets/sunrgbd-3d.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'SUNRGBDDataset'
2
+ data_root = 'data/sunrgbd/'
3
+ class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
4
+ 'night_stand', 'bookshelf', 'bathtub')
5
+
6
+ metainfo = dict(classes=class_names)
7
+
8
+ # Example to use different file client
9
+ # Method 1: simply set the data root and let the file I/O module
10
+ # automatically infer from prefix (not support LMDB and Memcache yet)
11
+
12
+ # data_root = 's3://openmmlab/datasets/detection3d/sunrgbd/'
13
+
14
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
15
+ # backend_args = dict(
16
+ # backend='petrel',
17
+ # path_mapping=dict({
18
+ # './data/': 's3://openmmlab/datasets/detection3d/',
19
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
20
+ # }))
21
+ backend_args = None
22
+
23
+ train_pipeline = [
24
+ dict(
25
+ type='LoadPointsFromFile',
26
+ coord_type='DEPTH',
27
+ shift_height=True,
28
+ load_dim=6,
29
+ use_dim=[0, 1, 2],
30
+ backend_args=backend_args),
31
+ dict(type='LoadAnnotations3D'),
32
+ dict(
33
+ type='RandomFlip3D',
34
+ sync_2d=False,
35
+ flip_ratio_bev_horizontal=0.5,
36
+ ),
37
+ dict(
38
+ type='GlobalRotScaleTrans',
39
+ rot_range=[-0.523599, 0.523599],
40
+ scale_ratio_range=[0.85, 1.15],
41
+ shift_height=True),
42
+ dict(type='PointSample', num_points=20000),
43
+ dict(
44
+ type='Pack3DDetInputs',
45
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
46
+ ]
47
+ test_pipeline = [
48
+ dict(
49
+ type='LoadPointsFromFile',
50
+ coord_type='DEPTH',
51
+ shift_height=True,
52
+ load_dim=6,
53
+ use_dim=[0, 1, 2],
54
+ backend_args=backend_args),
55
+ dict(
56
+ type='MultiScaleFlipAug3D',
57
+ img_scale=(1333, 800),
58
+ pts_scale_ratio=1,
59
+ flip=False,
60
+ transforms=[
61
+ dict(
62
+ type='GlobalRotScaleTrans',
63
+ rot_range=[0, 0],
64
+ scale_ratio_range=[1., 1.],
65
+ translation_std=[0, 0, 0]),
66
+ dict(
67
+ type='RandomFlip3D',
68
+ sync_2d=False,
69
+ flip_ratio_bev_horizontal=0.5,
70
+ ),
71
+ dict(type='PointSample', num_points=20000)
72
+ ]),
73
+ dict(type='Pack3DDetInputs', keys=['points'])
74
+ ]
75
+
76
+ train_dataloader = dict(
77
+ batch_size=16,
78
+ num_workers=4,
79
+ sampler=dict(type='DefaultSampler', shuffle=True),
80
+ dataset=dict(
81
+ type='RepeatDataset',
82
+ times=5,
83
+ dataset=dict(
84
+ type=dataset_type,
85
+ data_root=data_root,
86
+ ann_file='sunrgbd_infos_train.pkl',
87
+ pipeline=train_pipeline,
88
+ filter_empty_gt=False,
89
+ metainfo=metainfo,
90
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
91
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
92
+ box_type_3d='Depth',
93
+ backend_args=backend_args)))
94
+
95
+ val_dataloader = dict(
96
+ batch_size=1,
97
+ num_workers=1,
98
+ sampler=dict(type='DefaultSampler', shuffle=False),
99
+ dataset=dict(
100
+ type=dataset_type,
101
+ data_root=data_root,
102
+ ann_file='sunrgbd_infos_val.pkl',
103
+ pipeline=test_pipeline,
104
+ metainfo=metainfo,
105
+ test_mode=True,
106
+ box_type_3d='Depth',
107
+ backend_args=backend_args))
108
+ test_dataloader = dict(
109
+ batch_size=1,
110
+ num_workers=1,
111
+ sampler=dict(type='DefaultSampler', shuffle=False),
112
+ dataset=dict(
113
+ type=dataset_type,
114
+ data_root=data_root,
115
+ ann_file='sunrgbd_infos_val.pkl',
116
+ pipeline=test_pipeline,
117
+ metainfo=metainfo,
118
+ test_mode=True,
119
+ box_type_3d='Depth',
120
+ backend_args=backend_args))
121
+ val_evaluator = dict(type='IndoorMetric')
122
+ test_evaluator = val_evaluator
123
+
124
+ vis_backends = [dict(type='LocalVisBackend')]
125
+ visualizer = dict(
126
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/waymoD3-fov-mono3d-3class.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D3 in the config name means the whole dataset is divided into 3 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ data_root = 'data/waymo/kitti_format/'
6
+ class_names = ['Pedestrian', 'Cyclist', 'Car']
7
+ metainfo = dict(classes=class_names)
8
+ input_modality = dict(use_lidar=False, use_camera=True)
9
+
10
+ # Example to use different file client
11
+ # Method 1: simply set the data root and let the file I/O module
12
+ # automatically infer from prefix (not support LMDB and Memcache yet)
13
+
14
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
15
+
16
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
17
+ # backend_args = dict(
18
+ # backend='petrel',
19
+ # path_mapping=dict({
20
+ # './data/': 's3://openmmlab/datasets/detection3d/',
21
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
22
+ # }))
23
+ backend_args = None
24
+
25
+ train_pipeline = [
26
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
27
+ dict(
28
+ type='LoadAnnotations3D',
29
+ with_bbox=True,
30
+ with_label=True,
31
+ with_attr_label=False,
32
+ with_bbox_3d=True,
33
+ with_label_3d=True,
34
+ with_bbox_depth=True),
35
+ # base shape (1248, 832), scale (0.95, 1.05)
36
+ dict(
37
+ type='RandomResize3D',
38
+ scale=(1248, 832),
39
+ ratio_range=(0.95, 1.05),
40
+ # ratio_range=(1., 1.),
41
+ interpolation='nearest',
42
+ keep_ratio=True,
43
+ ),
44
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
45
+ dict(
46
+ type='Pack3DDetInputs',
47
+ keys=[
48
+ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
49
+ 'gt_labels_3d', 'centers_2d', 'depths'
50
+ ]),
51
+ ]
52
+
53
+ test_pipeline = [
54
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
55
+ dict(
56
+ type='RandomResize3D',
57
+ scale=(1248, 832),
58
+ ratio_range=(1., 1.),
59
+ interpolation='nearest',
60
+ keep_ratio=True),
61
+ dict(
62
+ type='Pack3DDetInputs',
63
+ keys=['img'],
64
+ meta_keys=[
65
+ 'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
66
+ 'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
67
+ ]),
68
+ ]
69
+ # construct a pipeline for data and gt loading in show function
70
+ # please keep its loading function consistent with test_pipeline (e.g. client)
71
+ eval_pipeline = [
72
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
73
+ dict(
74
+ type='RandomResize3D',
75
+ scale=(1248, 832),
76
+ ratio_range=(1., 1.),
77
+ interpolation='nearest',
78
+ keep_ratio=True),
79
+ dict(
80
+ type='Pack3DDetInputs',
81
+ keys=['img'],
82
+ meta_keys=[
83
+ 'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
84
+ 'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
85
+ ]),
86
+ ]
87
+
88
+ train_dataloader = dict(
89
+ batch_size=3,
90
+ num_workers=3,
91
+ persistent_workers=True,
92
+ sampler=dict(type='DefaultSampler', shuffle=True),
93
+ dataset=dict(
94
+ type=dataset_type,
95
+ data_root=data_root,
96
+ ann_file='waymo_infos_train.pkl',
97
+ data_prefix=dict(
98
+ pts='training/velodyne',
99
+ CAM_FRONT='training/image_0',
100
+ CAM_FRONT_LEFT='training/image_1',
101
+ CAM_FRONT_RIGHT='training/image_2',
102
+ CAM_SIDE_LEFT='training/image_3',
103
+ CAM_SIDE_RIGHT='training/image_4'),
104
+ pipeline=train_pipeline,
105
+ modality=input_modality,
106
+ test_mode=False,
107
+ metainfo=metainfo,
108
+ cam_sync_instances=True,
109
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
110
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
111
+ box_type_3d='Camera',
112
+ load_type='fov_image_based',
113
+ # load one frame every three frames
114
+ load_interval=3,
115
+ backend_args=backend_args))
116
+
117
+ val_dataloader = dict(
118
+ batch_size=1,
119
+ num_workers=1,
120
+ persistent_workers=True,
121
+ drop_last=False,
122
+ sampler=dict(type='DefaultSampler', shuffle=False),
123
+ dataset=dict(
124
+ type=dataset_type,
125
+ data_root=data_root,
126
+ data_prefix=dict(
127
+ pts='training/velodyne',
128
+ CAM_FRONT='training/image_0',
129
+ CAM_FRONT_LEFT='training/image_1',
130
+ CAM_FRONT_RIGHT='training/image_2',
131
+ CAM_SIDE_LEFT='training/image_3',
132
+ CAM_SIDE_RIGHT='training/image_4'),
133
+ ann_file='waymo_infos_val.pkl',
134
+ pipeline=eval_pipeline,
135
+ modality=input_modality,
136
+ test_mode=True,
137
+ metainfo=metainfo,
138
+ cam_sync_instances=True,
139
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
140
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
141
+ box_type_3d='Camera',
142
+ load_type='fov_image_based',
143
+ load_eval_anns=False,
144
+ backend_args=backend_args))
145
+
146
+ test_dataloader = dict(
147
+ batch_size=1,
148
+ num_workers=1,
149
+ persistent_workers=True,
150
+ drop_last=False,
151
+ sampler=dict(type='DefaultSampler', shuffle=False),
152
+ dataset=dict(
153
+ type=dataset_type,
154
+ data_root=data_root,
155
+ data_prefix=dict(
156
+ pts='training/velodyne',
157
+ CAM_FRONT='training/image_0',
158
+ CAM_FRONT_LEFT='training/image_1',
159
+ CAM_FRONT_RIGHT='training/image_2',
160
+ CAM_SIDE_LEFT='training/image_3',
161
+ CAM_SIDE_RIGHT='training/image_4'),
162
+ ann_file='waymo_infos_val.pkl',
163
+ pipeline=eval_pipeline,
164
+ modality=input_modality,
165
+ test_mode=True,
166
+ metainfo=metainfo,
167
+ cam_sync_instances=True,
168
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
169
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
170
+ box_type_3d='Camera',
171
+ load_type='fov_image_based',
172
+ backend_args=backend_args))
173
+
174
+ val_evaluator = dict(
175
+ type='WaymoMetric',
176
+ waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin',
177
+ metric='LET_mAP',
178
+ load_type='fov_image_based',
179
+ result_prefix='./pgd_fov_pred')
180
+ test_evaluator = val_evaluator
181
+
182
+ vis_backends = [dict(type='LocalVisBackend')]
183
+ visualizer = dict(
184
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/waymoD3-mv-mono3d-3class.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D3 in the config name means the whole dataset is divided into 3 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ data_root = 'data/waymo/kitti_format/'
6
+ class_names = ['Pedestrian', 'Cyclist', 'Car']
7
+ metainfo = dict(classes=class_names)
8
+ input_modality = dict(use_lidar=False, use_camera=True)
9
+
10
+ # Example to use different file client
11
+ # Method 1: simply set the data root and let the file I/O module
12
+ # automatically infer from prefix (not support LMDB and Memcache yet)
13
+
14
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
15
+
16
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
17
+ # backend_args = dict(
18
+ # backend='petrel',
19
+ # path_mapping=dict({
20
+ # './data/': 's3://openmmlab/datasets/detection3d/',
21
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
22
+ # }))
23
+ backend_args = None
24
+
25
+ train_pipeline = [
26
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
27
+ dict(
28
+ type='LoadAnnotations3D',
29
+ with_bbox=True,
30
+ with_label=True,
31
+ with_attr_label=False,
32
+ with_bbox_3d=True,
33
+ with_label_3d=True,
34
+ with_bbox_depth=True),
35
+ # base shape (1248, 832), scale (0.95, 1.05)
36
+ dict(
37
+ type='RandomResize3D',
38
+ scale=(1248, 832),
39
+ # ratio_range=(1., 1.),
40
+ ratio_range=(0.95, 1.05),
41
+ interpolation='nearest',
42
+ keep_ratio=True,
43
+ ),
44
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
45
+ dict(
46
+ type='Pack3DDetInputs',
47
+ keys=[
48
+ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
49
+ 'gt_labels_3d', 'centers_2d', 'depths'
50
+ ]),
51
+ ]
52
+
53
+ test_pipeline = [
54
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
55
+ dict(
56
+ type='Resize3D',
57
+ scale_factor=0.65,
58
+ interpolation='nearest',
59
+ keep_ratio=True),
60
+ dict(
61
+ type='Pack3DDetInputs',
62
+ keys=['img'],
63
+ meta_keys=[
64
+ 'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
65
+ 'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
66
+ ]),
67
+ ]
68
+ # construct a pipeline for data and gt loading in show function
69
+ # please keep its loading function consistent with test_pipeline (e.g. client)
70
+ eval_pipeline = [
71
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
72
+ dict(
73
+ type='Resize3D',
74
+ scale_factor=0.65,
75
+ interpolation='nearest',
76
+ keep_ratio=True),
77
+ dict(
78
+ type='Pack3DDetInputs',
79
+ keys=['img'],
80
+ meta_keys=[
81
+ 'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
82
+ 'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
83
+ ]),
84
+ ]
85
+
86
+ train_dataloader = dict(
87
+ batch_size=3,
88
+ num_workers=3,
89
+ persistent_workers=True,
90
+ sampler=dict(type='DefaultSampler', shuffle=True),
91
+ dataset=dict(
92
+ type=dataset_type,
93
+ data_root=data_root,
94
+ ann_file='waymo_infos_train.pkl',
95
+ data_prefix=dict(
96
+ pts='training/velodyne',
97
+ CAM_FRONT='training/image_0',
98
+ CAM_FRONT_LEFT='training/image_1',
99
+ CAM_FRONT_RIGHT='training/image_2',
100
+ CAM_SIDE_LEFT='training/image_3',
101
+ CAM_SIDE_RIGHT='training/image_4'),
102
+ pipeline=train_pipeline,
103
+ modality=input_modality,
104
+ test_mode=False,
105
+ metainfo=metainfo,
106
+ cam_sync_instances=True,
107
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
108
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
109
+ box_type_3d='Camera',
110
+ load_type='mv_image_based',
111
+ # load one frame every three frames
112
+ load_interval=3,
113
+ backend_args=backend_args))
114
+
115
+ val_dataloader = dict(
116
+ batch_size=1,
117
+ num_workers=0,
118
+ persistent_workers=False,
119
+ drop_last=False,
120
+ sampler=dict(type='DefaultSampler', shuffle=False),
121
+ dataset=dict(
122
+ type=dataset_type,
123
+ data_root=data_root,
124
+ data_prefix=dict(
125
+ pts='training/velodyne',
126
+ CAM_FRONT='training/image_0',
127
+ CAM_FRONT_LEFT='training/image_1',
128
+ CAM_FRONT_RIGHT='training/image_2',
129
+ CAM_SIDE_LEFT='training/image_3',
130
+ CAM_SIDE_RIGHT='training/image_4'),
131
+ ann_file='waymo_infos_val.pkl',
132
+ pipeline=eval_pipeline,
133
+ modality=input_modality,
134
+ test_mode=True,
135
+ metainfo=metainfo,
136
+ cam_sync_instances=True,
137
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
138
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
139
+ box_type_3d='Camera',
140
+ load_type='mv_image_based',
141
+ # load_eval_anns=False,
142
+ backend_args=backend_args))
143
+
144
+ test_dataloader = dict(
145
+ batch_size=1,
146
+ num_workers=0,
147
+ persistent_workers=False,
148
+ drop_last=False,
149
+ sampler=dict(type='DefaultSampler', shuffle=False),
150
+ dataset=dict(
151
+ type=dataset_type,
152
+ data_root=data_root,
153
+ data_prefix=dict(
154
+ pts='training/velodyne',
155
+ CAM_FRONT='training/image_0',
156
+ CAM_FRONT_LEFT='training/image_1',
157
+ CAM_FRONT_RIGHT='training/image_2',
158
+ CAM_SIDE_LEFT='training/image_3',
159
+ CAM_SIDE_RIGHT='training/image_4'),
160
+ ann_file='waymo_infos_val.pkl',
161
+ pipeline=eval_pipeline,
162
+ modality=input_modality,
163
+ test_mode=True,
164
+ metainfo=metainfo,
165
+ cam_sync_instances=True,
166
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
167
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
168
+ box_type_3d='Camera',
169
+ load_type='mv_image_based',
170
+ load_eval_anns=False,
171
+ backend_args=backend_args))
172
+
173
+ val_evaluator = dict(
174
+ type='WaymoMetric',
175
+ waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
176
+ metric='LET_mAP',
177
+ load_type='mv_image_based',
178
+ result_prefix='./pgd_mv_pred',
179
+ nms_cfg=dict(
180
+ use_rotate_nms=True,
181
+ nms_across_levels=False,
182
+ nms_pre=500,
183
+ nms_thr=0.05,
184
+ score_thr=0.001,
185
+ min_bbox_size=0,
186
+ max_per_frame=100))
187
+ test_evaluator = val_evaluator
188
+
189
+ vis_backends = [dict(type='LocalVisBackend')]
190
+ visualizer = dict(
191
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/waymoD5-3d-3class.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D5 in the config name means the whole dataset is divided into 5 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
6
+ data_root = 'data/waymo/kitti_format/'
7
+
8
+ # Example to use different file client
9
+ # Method 1: simply set the data root and let the file I/O module
10
+ # automatically infer from prefix (not support LMDB and Memcache yet)
11
+
12
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
13
+
14
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
15
+ # backend_args = dict(
16
+ # backend='petrel',
17
+ # path_mapping=dict({
18
+ # './data/': 's3://openmmlab/datasets/detection3d/',
19
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
20
+ # }))
21
+ backend_args = None
22
+
23
+ class_names = ['Car', 'Pedestrian', 'Cyclist']
24
+ metainfo = dict(classes=class_names)
25
+
26
+ point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
27
+ input_modality = dict(use_lidar=True, use_camera=False)
28
+ db_sampler = dict(
29
+ data_root=data_root,
30
+ info_path=data_root + 'waymo_dbinfos_train.pkl',
31
+ rate=1.0,
32
+ prepare=dict(
33
+ filter_by_difficulty=[-1],
34
+ filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
35
+ classes=class_names,
36
+ sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),
37
+ points_loader=dict(
38
+ type='LoadPointsFromFile',
39
+ coord_type='LIDAR',
40
+ load_dim=6,
41
+ use_dim=[0, 1, 2, 3, 4],
42
+ backend_args=backend_args),
43
+ backend_args=backend_args)
44
+
45
+ train_pipeline = [
46
+ dict(
47
+ type='LoadPointsFromFile',
48
+ coord_type='LIDAR',
49
+ load_dim=6,
50
+ use_dim=5,
51
+ backend_args=backend_args),
52
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
53
+ # dict(type='ObjectSample', db_sampler=db_sampler),
54
+ dict(
55
+ type='RandomFlip3D',
56
+ sync_2d=False,
57
+ flip_ratio_bev_horizontal=0.5,
58
+ flip_ratio_bev_vertical=0.5),
59
+ dict(
60
+ type='GlobalRotScaleTrans',
61
+ rot_range=[-0.78539816, 0.78539816],
62
+ scale_ratio_range=[0.95, 1.05]),
63
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
64
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
65
+ dict(type='PointShuffle'),
66
+ dict(
67
+ type='Pack3DDetInputs',
68
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
69
+ ]
70
+ test_pipeline = [
71
+ dict(
72
+ type='LoadPointsFromFile',
73
+ coord_type='LIDAR',
74
+ load_dim=6,
75
+ use_dim=5,
76
+ backend_args=backend_args),
77
+ dict(
78
+ type='MultiScaleFlipAug3D',
79
+ img_scale=(1333, 800),
80
+ pts_scale_ratio=1,
81
+ flip=False,
82
+ transforms=[
83
+ dict(
84
+ type='GlobalRotScaleTrans',
85
+ rot_range=[0, 0],
86
+ scale_ratio_range=[1., 1.],
87
+ translation_std=[0, 0, 0]),
88
+ dict(type='RandomFlip3D'),
89
+ dict(
90
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range)
91
+ ]),
92
+ dict(
93
+ type='Pack3DDetInputs',
94
+ keys=['points'],
95
+ meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp'])
96
+ ]
97
+ # construct a pipeline for data and gt loading in show function
98
+ # please keep its loading function consistent with test_pipeline (e.g. client)
99
+ eval_pipeline = [
100
+ dict(
101
+ type='LoadPointsFromFile',
102
+ coord_type='LIDAR',
103
+ load_dim=6,
104
+ use_dim=5,
105
+ backend_args=backend_args),
106
+ dict(
107
+ type='Pack3DDetInputs',
108
+ keys=['points'],
109
+ meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp'])
110
+ ]
111
+
112
+ train_dataloader = dict(
113
+ batch_size=2,
114
+ num_workers=2,
115
+ persistent_workers=True,
116
+ sampler=dict(type='DefaultSampler', shuffle=True),
117
+ dataset=dict(
118
+ type='RepeatDataset',
119
+ times=2,
120
+ dataset=dict(
121
+ type=dataset_type,
122
+ data_root=data_root,
123
+ ann_file='waymo_infos_train.pkl',
124
+ data_prefix=dict(
125
+ pts='training/velodyne', sweeps='training/velodyne'),
126
+ pipeline=train_pipeline,
127
+ modality=input_modality,
128
+ test_mode=False,
129
+ metainfo=metainfo,
130
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
131
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
132
+ box_type_3d='LiDAR',
133
+ # load one frame every five frames
134
+ load_interval=5,
135
+ backend_args=backend_args)))
136
+ val_dataloader = dict(
137
+ batch_size=1,
138
+ num_workers=1,
139
+ persistent_workers=True,
140
+ drop_last=False,
141
+ sampler=dict(type='DefaultSampler', shuffle=False),
142
+ dataset=dict(
143
+ type=dataset_type,
144
+ data_root=data_root,
145
+ data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
146
+ ann_file='waymo_infos_val.pkl',
147
+ pipeline=eval_pipeline,
148
+ modality=input_modality,
149
+ test_mode=True,
150
+ metainfo=metainfo,
151
+ box_type_3d='LiDAR',
152
+ backend_args=backend_args))
153
+
154
+ test_dataloader = dict(
155
+ batch_size=1,
156
+ num_workers=1,
157
+ persistent_workers=True,
158
+ drop_last=False,
159
+ sampler=dict(type='DefaultSampler', shuffle=False),
160
+ dataset=dict(
161
+ type=dataset_type,
162
+ data_root=data_root,
163
+ data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
164
+ ann_file='waymo_infos_val.pkl',
165
+ pipeline=eval_pipeline,
166
+ modality=input_modality,
167
+ test_mode=True,
168
+ metainfo=metainfo,
169
+ box_type_3d='LiDAR',
170
+ backend_args=backend_args))
171
+
172
+ val_evaluator = dict(
173
+ type='WaymoMetric', waymo_bin_file='./data/waymo/waymo_format/gt.bin')
174
+ test_evaluator = val_evaluator
175
+
176
+ vis_backends = [dict(type='LocalVisBackend')]
177
+ visualizer = dict(
178
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/waymoD5-3d-car.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D5 in the config name means the whole dataset is divided into 5 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ data_root = 'data/waymo/kitti_format/'
6
+
7
+ # Example to use different file client
8
+ # Method 1: simply set the data root and let the file I/O module
9
+ # automatically infer from prefix (not support LMDB and Memcache yet)
10
+
11
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
12
+
13
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
14
+ # backend_args = dict(
15
+ # backend='petrel',
16
+ # path_mapping=dict({
17
+ # './data/': 's3://openmmlab/datasets/detection3d/',
18
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
19
+ # }))
20
+ backend_args = None
21
+
22
+ class_names = ['Car']
23
+ metainfo = dict(classes=class_names)
24
+
25
+ point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
26
+ input_modality = dict(use_lidar=True, use_camera=False)
27
+ db_sampler = dict(
28
+ data_root=data_root,
29
+ info_path=data_root + 'waymo_dbinfos_train.pkl',
30
+ rate=1.0,
31
+ prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
32
+ classes=class_names,
33
+ sample_groups=dict(Car=15),
34
+ points_loader=dict(
35
+ type='LoadPointsFromFile',
36
+ coord_type='LIDAR',
37
+ load_dim=6,
38
+ use_dim=[0, 1, 2, 3, 4],
39
+ backend_args=backend_args),
40
+ backend_args=backend_args)
41
+
42
+ train_pipeline = [
43
+ dict(
44
+ type='LoadPointsFromFile',
45
+ coord_type='LIDAR',
46
+ load_dim=6,
47
+ use_dim=5,
48
+ backend_args=backend_args),
49
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
50
+ dict(type='ObjectSample', db_sampler=db_sampler),
51
+ dict(
52
+ type='RandomFlip3D',
53
+ sync_2d=False,
54
+ flip_ratio_bev_horizontal=0.5,
55
+ flip_ratio_bev_vertical=0.5),
56
+ dict(
57
+ type='GlobalRotScaleTrans',
58
+ rot_range=[-0.78539816, 0.78539816],
59
+ scale_ratio_range=[0.95, 1.05]),
60
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
61
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
62
+ dict(type='PointShuffle'),
63
+ dict(
64
+ type='Pack3DDetInputs',
65
+ keys=['points'],
66
+ meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp'])
67
+ ]
68
+ test_pipeline = [
69
+ dict(
70
+ type='LoadPointsFromFile',
71
+ coord_type='LIDAR',
72
+ load_dim=6,
73
+ use_dim=5,
74
+ backend_args=backend_args),
75
+ dict(
76
+ type='MultiScaleFlipAug3D',
77
+ img_scale=(1333, 800),
78
+ pts_scale_ratio=1,
79
+ flip=False,
80
+ transforms=[
81
+ dict(
82
+ type='GlobalRotScaleTrans',
83
+ rot_range=[0, 0],
84
+ scale_ratio_range=[1., 1.],
85
+ translation_std=[0, 0, 0]),
86
+ dict(type='RandomFlip3D'),
87
+ dict(
88
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range)
89
+ ]),
90
+ dict(
91
+ type='Pack3DDetInputs',
92
+ keys=['points'],
93
+ meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp'])
94
+ ]
95
+ # construct a pipeline for data and gt loading in show function
96
+ # please keep its loading function consistent with test_pipeline (e.g. client)
97
+ eval_pipeline = [
98
+ dict(
99
+ type='LoadPointsFromFile',
100
+ coord_type='LIDAR',
101
+ load_dim=6,
102
+ use_dim=5,
103
+ backend_args=backend_args),
104
+ dict(type='Pack3DDetInputs', keys=['points']),
105
+ ]
106
+
107
+ train_dataloader = dict(
108
+ batch_size=2,
109
+ num_workers=2,
110
+ persistent_workers=True,
111
+ sampler=dict(type='DefaultSampler', shuffle=True),
112
+ dataset=dict(
113
+ type='RepeatDataset',
114
+ times=2,
115
+ dataset=dict(
116
+ type=dataset_type,
117
+ data_root=data_root,
118
+ ann_file='waymo_infos_train.pkl',
119
+ data_prefix=dict(
120
+ pts='training/velodyne', sweeps='training/velodyne'),
121
+ pipeline=train_pipeline,
122
+ modality=input_modality,
123
+ test_mode=False,
124
+ metainfo=metainfo,
125
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
126
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
127
+ box_type_3d='LiDAR',
128
+ # load one frame every five frames
129
+ load_interval=5,
130
+ backend_args=backend_args)))
131
+ val_dataloader = dict(
132
+ batch_size=1,
133
+ num_workers=1,
134
+ persistent_workers=True,
135
+ drop_last=False,
136
+ sampler=dict(type='DefaultSampler', shuffle=False),
137
+ dataset=dict(
138
+ type=dataset_type,
139
+ data_root=data_root,
140
+ data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
141
+ ann_file='waymo_infos_val.pkl',
142
+ pipeline=eval_pipeline,
143
+ modality=input_modality,
144
+ test_mode=True,
145
+ metainfo=metainfo,
146
+ box_type_3d='LiDAR',
147
+ backend_args=backend_args))
148
+
149
+ test_dataloader = dict(
150
+ batch_size=1,
151
+ num_workers=1,
152
+ persistent_workers=True,
153
+ drop_last=False,
154
+ sampler=dict(type='DefaultSampler', shuffle=False),
155
+ dataset=dict(
156
+ type=dataset_type,
157
+ data_root=data_root,
158
+ data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
159
+ ann_file='waymo_infos_val.pkl',
160
+ pipeline=eval_pipeline,
161
+ modality=input_modality,
162
+ test_mode=True,
163
+ metainfo=metainfo,
164
+ box_type_3d='LiDAR',
165
+ backend_args=backend_args))
166
+
167
+ val_evaluator = dict(
168
+ type='WaymoMetric', waymo_bin_file='./data/waymo/waymo_format/gt.bin')
169
+ test_evaluator = val_evaluator
170
+
171
+ vis_backends = [dict(type='LocalVisBackend')]
172
+ visualizer = dict(
173
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/waymoD5-fov-mono3d-3class.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D3 in the config name means the whole dataset is divided into 3 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ data_root = 'data/waymo/kitti_format/'
6
+ class_names = ['Car', 'Pedestrian', 'Cyclist']
7
+ input_modality = dict(use_lidar=False, use_camera=True)
8
+
9
+ # Example to use different file client
10
+ # Method 1: simply set the data root and let the file I/O module
11
+ # automatically infer from prefix (not support LMDB and Memcache yet)
12
+
13
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
14
+
15
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
16
+ # backend_args = dict(
17
+ # backend='petrel',
18
+ # path_mapping=dict({
19
+ # './data/': 's3://openmmlab/datasets/detection3d/',
20
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
21
+ # }))
22
+ backend_args = None
23
+
24
+ train_pipeline = [
25
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
26
+ dict(
27
+ type='LoadAnnotations3D',
28
+ with_bbox=True,
29
+ with_label=True,
30
+ with_attr_label=False,
31
+ with_bbox_3d=True,
32
+ with_label_3d=True,
33
+ with_bbox_depth=True),
34
+ # base shape (1248, 832), scale (0.95, 1.05)
35
+ dict(
36
+ type='RandomResize3D',
37
+ scale=(1284, 832),
38
+ ratio_range=(0.95, 1.05),
39
+ keep_ratio=True,
40
+ ),
41
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
42
+ dict(
43
+ type='Pack3DDetInputs',
44
+ keys=[
45
+ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
46
+ 'gt_labels_3d', 'centers_2d', 'depths'
47
+ ]),
48
+ ]
49
+
50
+ test_pipeline = [
51
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
52
+ dict(
53
+ type='RandomResize3D',
54
+ scale=(1248, 832),
55
+ ratio_range=(1., 1.),
56
+ keep_ratio=True),
57
+ dict(type='Pack3DDetInputs', keys=['img']),
58
+ ]
59
+ # construct a pipeline for data and gt loading in show function
60
+ # please keep its loading function consistent with test_pipeline (e.g. client)
61
+ eval_pipeline = [
62
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
63
+ dict(
64
+ type='RandomResize3D',
65
+ scale=(1248, 832),
66
+ ratio_range=(1., 1.),
67
+ keep_ratio=True),
68
+ dict(type='Pack3DDetInputs', keys=['img']),
69
+ ]
70
+
71
+ metainfo = dict(CLASSES=class_names)
72
+
73
+ train_dataloader = dict(
74
+ batch_size=3,
75
+ num_workers=3,
76
+ persistent_workers=True,
77
+ sampler=dict(type='DefaultSampler', shuffle=True),
78
+ dataset=dict(
79
+ type=dataset_type,
80
+ data_root=data_root,
81
+ ann_file='waymo_infos_train.pkl',
82
+ data_prefix=dict(
83
+ pts='training/velodyne',
84
+ CAM_FRONT='training/image_0',
85
+ CAM_FRONT_LEFT='training/image_1',
86
+ CAM_FRONT_RIGHT='training/image_2',
87
+ CAM_SIDE_LEFT='training/image_3',
88
+ CAM_SIDE_RIGHT='training/image_4'),
89
+ pipeline=train_pipeline,
90
+ modality=input_modality,
91
+ test_mode=False,
92
+ metainfo=metainfo,
93
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
94
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
95
+ box_type_3d='Camera',
96
+ load_type='fov_image_based',
97
+ # load one frame every three frames
98
+ load_interval=5,
99
+ backend_args=backend_args))
100
+
101
+ val_dataloader = dict(
102
+ batch_size=1,
103
+ num_workers=1,
104
+ persistent_workers=True,
105
+ drop_last=False,
106
+ sampler=dict(type='DefaultSampler', shuffle=False),
107
+ dataset=dict(
108
+ type=dataset_type,
109
+ data_root=data_root,
110
+ data_prefix=dict(
111
+ pts='training/velodyne',
112
+ CAM_FRONT='training/image_0',
113
+ CAM_FRONT_LEFT='training/image_1',
114
+ CAM_FRONT_RIGHT='training/image_2',
115
+ CAM_SIDE_LEFT='training/image_3',
116
+ CAM_SIDE_RIGHT='training/image_4'),
117
+ ann_file='waymo_infos_val.pkl',
118
+ pipeline=eval_pipeline,
119
+ modality=input_modality,
120
+ test_mode=True,
121
+ metainfo=metainfo,
122
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
123
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
124
+ box_type_3d='Camera',
125
+ load_type='fov_image_based',
126
+ backend_args=backend_args))
127
+
128
+ test_dataloader = dict(
129
+ batch_size=1,
130
+ num_workers=1,
131
+ persistent_workers=True,
132
+ drop_last=False,
133
+ sampler=dict(type='DefaultSampler', shuffle=False),
134
+ dataset=dict(
135
+ type=dataset_type,
136
+ data_root=data_root,
137
+ data_prefix=dict(
138
+ pts='training/velodyne',
139
+ CAM_FRONT='training/image_0',
140
+ CAM_FRONT_LEFT='training/image_1',
141
+ CAM_FRONT_RIGHT='training/image_2',
142
+ CAM_SIDE_LEFT='training/image_3',
143
+ CAM_SIDE_RIGHT='training/image_4'),
144
+ ann_file='waymo_infos_val.pkl',
145
+ pipeline=eval_pipeline,
146
+ modality=input_modality,
147
+ test_mode=True,
148
+ metainfo=metainfo,
149
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
150
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
151
+ box_type_3d='Camera',
152
+ load_type='fov_image_based',
153
+ backend_args=backend_args))
154
+
155
+ val_evaluator = dict(
156
+ type='WaymoMetric',
157
+ ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
158
+ waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin',
159
+ data_root='./data/waymo/waymo_format',
160
+ metric='LET_mAP',
161
+ load_type='fov_image_based',
162
+ backend_args=backend_args)
163
+ test_evaluator = val_evaluator
configs/_base_/datasets/waymoD5-mv-mono3d-3class.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D3 in the config name means the whole dataset is divided into 3 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ data_root = 'data/waymo/kitti_format/'
6
+ class_names = ['Car', 'Pedestrian', 'Cyclist']
7
+ input_modality = dict(use_lidar=False, use_camera=True)
8
+
9
+ # Example to use different file client
10
+ # Method 1: simply set the data root and let the file I/O module
11
+ # automatically infer from prefix (not support LMDB and Memcache yet)
12
+
13
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
14
+
15
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
16
+ # backend_args = dict(
17
+ # backend='petrel',
18
+ # path_mapping=dict({
19
+ # './data/': 's3://openmmlab/datasets/detection3d/',
20
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
21
+ # }))
22
+ backend_args = None
23
+
24
+ train_pipeline = [
25
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
26
+ dict(
27
+ type='LoadAnnotations3D',
28
+ with_bbox=True,
29
+ with_label=True,
30
+ with_attr_label=False,
31
+ with_bbox_3d=True,
32
+ with_label_3d=True,
33
+ with_bbox_depth=True),
34
+ # base shape (1248, 832), scale (0.95, 1.05)
35
+ dict(
36
+ type='RandomResize3D',
37
+ scale=(1284, 832),
38
+ ratio_range=(0.95, 1.05),
39
+ keep_ratio=True,
40
+ ),
41
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
42
+ dict(
43
+ type='Pack3DDetInputs',
44
+ keys=[
45
+ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
46
+ 'gt_labels_3d', 'centers_2d', 'depths'
47
+ ]),
48
+ ]
49
+
50
+ test_pipeline = [
51
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
52
+ dict(
53
+ type='RandomResize3D',
54
+ scale=(1248, 832),
55
+ ratio_range=(1., 1.),
56
+ keep_ratio=True),
57
+ dict(type='Pack3DDetInputs', keys=['img']),
58
+ ]
59
+ # construct a pipeline for data and gt loading in show function
60
+ # please keep its loading function consistent with test_pipeline (e.g. client)
61
+ eval_pipeline = [
62
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
63
+ dict(
64
+ type='RandomResize3D',
65
+ scale=(1248, 832),
66
+ ratio_range=(1., 1.),
67
+ keep_ratio=True),
68
+ dict(type='Pack3DDetInputs', keys=['img']),
69
+ ]
70
+
71
+ metainfo = dict(classes=class_names)
72
+
73
+ train_dataloader = dict(
74
+ batch_size=3,
75
+ num_workers=3,
76
+ persistent_workers=True,
77
+ sampler=dict(type='DefaultSampler', shuffle=True),
78
+ dataset=dict(
79
+ type=dataset_type,
80
+ data_root=data_root,
81
+ ann_file='waymo_infos_train.pkl',
82
+ data_prefix=dict(
83
+ pts='training/velodyne',
84
+ CAM_FRONT='training/image_0',
85
+ CAM_FRONT_LEFT='training/image_1',
86
+ CAM_FRONT_RIGHT='training/image_2',
87
+ CAM_SIDE_LEFT='training/image_3',
88
+ CAM_SIDE_RIGHT='training/image_4'),
89
+ pipeline=train_pipeline,
90
+ modality=input_modality,
91
+ test_mode=False,
92
+ metainfo=metainfo,
93
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
94
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
95
+ box_type_3d='Camera',
96
+ load_type='mv_image_based',
97
+ # load one frame every three frames
98
+ load_interval=5,
99
+ backend_args=backend_args))
100
+
101
+ val_dataloader = dict(
102
+ batch_size=1,
103
+ num_workers=1,
104
+ persistent_workers=True,
105
+ drop_last=False,
106
+ sampler=dict(type='DefaultSampler', shuffle=False),
107
+ dataset=dict(
108
+ type=dataset_type,
109
+ data_root=data_root,
110
+ data_prefix=dict(
111
+ pts='training/velodyne',
112
+ CAM_FRONT='training/image_0',
113
+ CAM_FRONT_LEFT='training/image_1',
114
+ CAM_FRONT_RIGHT='training/image_2',
115
+ CAM_SIDE_LEFT='training/image_3',
116
+ CAM_SIDE_RIGHT='training/image_4'),
117
+ ann_file='waymo_infos_val.pkl',
118
+ pipeline=eval_pipeline,
119
+ modality=input_modality,
120
+ test_mode=True,
121
+ metainfo=metainfo,
122
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
123
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
124
+ box_type_3d='Camera',
125
+ load_type='mv_image_based',
126
+ backend_args=backend_args))
127
+
128
+ test_dataloader = dict(
129
+ batch_size=1,
130
+ num_workers=1,
131
+ persistent_workers=True,
132
+ drop_last=False,
133
+ sampler=dict(type='DefaultSampler', shuffle=False),
134
+ dataset=dict(
135
+ type=dataset_type,
136
+ data_root=data_root,
137
+ data_prefix=dict(
138
+ pts='training/velodyne',
139
+ CAM_FRONT='training/image_0',
140
+ CAM_FRONT_LEFT='training/image_1',
141
+ CAM_FRONT_RIGHT='training/image_2',
142
+ CAM_SIDE_LEFT='training/image_3',
143
+ CAM_SIDE_RIGHT='training/image_4'),
144
+ ann_file='waymo_infos_val.pkl',
145
+ pipeline=eval_pipeline,
146
+ modality=input_modality,
147
+ test_mode=True,
148
+ metainfo=metainfo,
149
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
150
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
151
+ box_type_3d='Camera',
152
+ load_type='mv_image_based',
153
+ backend_args=backend_args))
154
+
155
+ val_evaluator = dict(
156
+ type='WaymoMetric',
157
+ ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
158
+ waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
159
+ data_root='./data/waymo/waymo_format',
160
+ metric='LET_mAP',
161
+ load_type='mv_image_based',
162
+ backend_args=backend_args)
163
+ test_evaluator = val_evaluator
configs/_base_/datasets/waymoD5-mv3d-3class.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D5 in the config name means the whole dataset is divided into 5 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ data_root = 'data/waymo/kitti_format/'
6
+
7
+ # Example to use different file client
8
+ # Method 1: simply set the data root and let the file I/O module
9
+ # automatically infer from prefix (not support LMDB and Memcache yet)
10
+
11
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
12
+
13
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
14
+ # backend_args = dict(
15
+ # backend='petrel',
16
+ # path_mapping=dict({
17
+ # './data/': 's3://openmmlab/datasets/detection3d/',
18
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
19
+ # }))
20
+ backend_args = None
21
+
22
+ class_names = ['Pedestrian', 'Cyclist', 'Car']
23
+ input_modality = dict(use_lidar=False, use_camera=True)
24
+ point_cloud_range = [-35.0, -75.0, -2, 75.0, 75.0, 4]
25
+
26
+ train_transforms = [
27
+ dict(type='PhotoMetricDistortion3D'),
28
+ dict(
29
+ type='RandomResize3D',
30
+ scale=(1248, 832),
31
+ ratio_range=(0.95, 1.05),
32
+ keep_ratio=True),
33
+ dict(type='RandomCrop3D', crop_size=(1080, 720)),
34
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5, flip_box3d=False),
35
+ ]
36
+
37
+ train_pipeline = [
38
+ dict(
39
+ type='LoadMultiViewImageFromFiles',
40
+ to_float32=True,
41
+ backend_args=backend_args),
42
+ dict(
43
+ type='LoadAnnotations3D',
44
+ with_bbox=True,
45
+ with_label=True,
46
+ with_attr_label=False,
47
+ with_bbox_3d=True,
48
+ with_label_3d=True,
49
+ with_bbox_depth=True),
50
+ dict(type='MultiViewWrapper', transforms=train_transforms),
51
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
52
+ dict(type='ObjectNameFilter', classes=class_names),
53
+ dict(
54
+ type='Pack3DDetInputs', keys=[
55
+ 'img',
56
+ 'gt_bboxes_3d',
57
+ 'gt_labels_3d',
58
+ ]),
59
+ ]
60
+ test_transforms = [
61
+ dict(
62
+ type='RandomResize3D',
63
+ scale=(1248, 832),
64
+ ratio_range=(1., 1.),
65
+ keep_ratio=True)
66
+ ]
67
+ test_pipeline = [
68
+ dict(
69
+ type='LoadMultiViewImageFromFiles',
70
+ to_float32=True,
71
+ backend_args=backend_args),
72
+ dict(type='MultiViewWrapper', transforms=test_transforms),
73
+ dict(
74
+ type='Pack3DDetInputs',
75
+ keys=['img'],
76
+ meta_keys=[
77
+ 'box_type_3d', 'img_shape', 'ori_cam2img', 'scale_factor',
78
+ 'sample_idx', 'context_name', 'timestamp', 'lidar2cam',
79
+ 'num_ref_frames', 'num_views'
80
+ ])
81
+ ]
82
+ # construct a pipeline for data and gt loading in show function
83
+ # please keep its loading function consistent with test_pipeline (e.g. client)
84
+ eval_pipeline = [
85
+ dict(
86
+ type='LoadMultiViewImageFromFiles',
87
+ to_float32=True,
88
+ backend_args=backend_args),
89
+ dict(type='MultiViewWrapper', transforms=test_transforms),
90
+ dict(
91
+ type='Pack3DDetInputs',
92
+ keys=['img'],
93
+ meta_keys=[
94
+ 'box_type_3d', 'img_shape', 'ori_cam2img', 'scale_factor',
95
+ 'sample_idx', 'context_name', 'timestamp', 'lidar2cam',
96
+ 'num_ref_frames', 'num_views'
97
+ ])
98
+ ]
99
+ metainfo = dict(classes=class_names)
100
+
101
+ train_dataloader = dict(
102
+ batch_size=2,
103
+ num_workers=2,
104
+ persistent_workers=True,
105
+ sampler=dict(type='DefaultSampler', shuffle=True),
106
+ dataset=dict(
107
+ type=dataset_type,
108
+ data_root=data_root,
109
+ ann_file='waymo_infos_train.pkl',
110
+ data_prefix=dict(
111
+ pts='training/velodyne',
112
+ CAM_FRONT='training/image_0',
113
+ CAM_FRONT_LEFT='training/image_1',
114
+ CAM_FRONT_RIGHT='training/image_2',
115
+ CAM_SIDE_LEFT='training/image_3',
116
+ CAM_SIDE_RIGHT='training/image_4'),
117
+ pipeline=train_pipeline,
118
+ modality=input_modality,
119
+ test_mode=False,
120
+ cam_sync_instances=True,
121
+ metainfo=metainfo,
122
+ box_type_3d='Lidar',
123
+ load_interval=5,
124
+ backend_args=backend_args))
125
+
126
+ val_dataloader = dict(
127
+ batch_size=1,
128
+ num_workers=1,
129
+ persistent_workers=True,
130
+ drop_last=False,
131
+ sampler=dict(type='DefaultSampler', shuffle=False),
132
+ dataset=dict(
133
+ type=dataset_type,
134
+ data_root=data_root,
135
+ ann_file='waymo_infos_val.pkl',
136
+ data_prefix=dict(
137
+ pts='training/velodyne',
138
+ CAM_FRONT='training/image_0',
139
+ CAM_FRONT_LEFT='training/image_1',
140
+ CAM_FRONT_RIGHT='training/image_2',
141
+ CAM_SIDE_LEFT='training/image_3',
142
+ CAM_SIDE_RIGHT='training/image_4'),
143
+ pipeline=eval_pipeline,
144
+ modality=input_modality,
145
+ test_mode=True,
146
+ metainfo=metainfo,
147
+ box_type_3d='Lidar',
148
+ backend_args=backend_args))
149
+
150
+ test_dataloader = dict(
151
+ batch_size=1,
152
+ num_workers=1,
153
+ persistent_workers=True,
154
+ drop_last=False,
155
+ sampler=dict(type='DefaultSampler', shuffle=False),
156
+ dataset=dict(
157
+ type=dataset_type,
158
+ data_root=data_root,
159
+ ann_file='waymo_infos_val.pkl',
160
+ data_prefix=dict(
161
+ pts='training/velodyne',
162
+ CAM_FRONT='training/image_0',
163
+ CAM_FRONT_LEFT='training/image_1',
164
+ CAM_FRONT_RIGHT='training/image_2',
165
+ CAM_SIDE_LEFT='training/image_3',
166
+ CAM_SIDE_RIGHT='training/image_4'),
167
+ pipeline=test_pipeline,
168
+ modality=input_modality,
169
+ test_mode=True,
170
+ metainfo=metainfo,
171
+ box_type_3d='Lidar',
172
+ backend_args=backend_args))
173
+ val_evaluator = dict(
174
+ type='WaymoMetric',
175
+ waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
176
+ metric='LET_mAP')
177
+
178
+ test_evaluator = val_evaluator
configs/_base_/default_runtime.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'mmdet3d'
2
+
3
+ default_hooks = dict(
4
+ timer=dict(type='IterTimerHook'),
5
+ logger=dict(type='LoggerHook', interval=50),
6
+ param_scheduler=dict(type='ParamSchedulerHook'),
7
+ checkpoint=dict(type='CheckpointHook', interval=-1),
8
+ sampler_seed=dict(type='DistSamplerSeedHook'),
9
+ visualization=dict(type='Det3DVisualizationHook'))
10
+
11
+ env_cfg = dict(
12
+ cudnn_benchmark=False,
13
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
14
+ dist_cfg=dict(backend='nccl'),
15
+ )
16
+
17
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
18
+
19
+ log_level = 'INFO'
20
+ load_from = None
21
+ resume = False
22
+
23
+ # TODO: support auto scaling lr
configs/_base_/models/3dssd.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='SSD3DNet',
3
+ data_preprocessor=dict(type='Det3DDataPreprocessor'),
4
+ backbone=dict(
5
+ type='PointNet2SAMSG',
6
+ in_channels=4,
7
+ num_points=(4096, 512, (256, 256)),
8
+ radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),
9
+ num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),
10
+ sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),
11
+ ((64, 64, 128), (64, 64, 128), (64, 96, 128)),
12
+ ((128, 128, 256), (128, 192, 256), (128, 256, 256))),
13
+ aggregation_channels=(64, 128, 256),
14
+ fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),
15
+ fps_sample_range_lists=((-1), (-1), (512, -1)),
16
+ norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
17
+ sa_cfg=dict(
18
+ type='PointSAModuleMSG',
19
+ pool_mod='max',
20
+ use_xyz=True,
21
+ normalize_xyz=False)),
22
+ bbox_head=dict(
23
+ type='SSD3DHead',
24
+ vote_module_cfg=dict(
25
+ in_channels=256,
26
+ num_points=256,
27
+ gt_per_seed=1,
28
+ conv_channels=(128, ),
29
+ conv_cfg=dict(type='Conv1d'),
30
+ norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
31
+ with_res_feat=False,
32
+ vote_xyz_range=(3.0, 3.0, 2.0)),
33
+ vote_aggregation_cfg=dict(
34
+ type='PointSAModuleMSG',
35
+ num_point=256,
36
+ radii=(4.8, 6.4),
37
+ sample_nums=(16, 32),
38
+ mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),
39
+ norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
40
+ use_xyz=True,
41
+ normalize_xyz=False,
42
+ bias=True),
43
+ pred_layer_cfg=dict(
44
+ in_channels=1536,
45
+ shared_conv_channels=(512, 128),
46
+ cls_conv_channels=(128, ),
47
+ reg_conv_channels=(128, ),
48
+ conv_cfg=dict(type='Conv1d'),
49
+ norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
50
+ bias=True),
51
+ objectness_loss=dict(
52
+ type='mmdet.CrossEntropyLoss',
53
+ use_sigmoid=True,
54
+ reduction='sum',
55
+ loss_weight=1.0),
56
+ center_loss=dict(
57
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0),
58
+ dir_class_loss=dict(
59
+ type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
60
+ dir_res_loss=dict(
61
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0),
62
+ size_res_loss=dict(
63
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0),
64
+ corner_loss=dict(
65
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0),
66
+ vote_loss=dict(
67
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0)),
68
+ # model training and testing settings
69
+ train_cfg=dict(
70
+ sample_mode='spec', pos_distance_thr=10.0, expand_dims_length=0.05),
71
+ test_cfg=dict(
72
+ nms_cfg=dict(type='nms', iou_thr=0.1),
73
+ sample_mode='spec',
74
+ score_thr=0.0,
75
+ per_class_proposal=True,
76
+ max_output_num=100))
configs/_base_/models/cascade-mask-rcnn_r50_fpn.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='CascadeRCNN',
4
+ pretrained='torchvision://resnet50',
5
+ _scope_='mmdet',
6
+ backbone=dict(
7
+ type='ResNet',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ frozen_stages=1,
12
+ norm_cfg=dict(type='BN', requires_grad=True),
13
+ norm_eval=True,
14
+ style='pytorch'),
15
+ neck=dict(
16
+ type='FPN',
17
+ in_channels=[256, 512, 1024, 2048],
18
+ out_channels=256,
19
+ num_outs=5),
20
+ rpn_head=dict(
21
+ type='RPNHead',
22
+ in_channels=256,
23
+ feat_channels=256,
24
+ anchor_generator=dict(
25
+ type='AnchorGenerator',
26
+ scales=[8],
27
+ ratios=[0.5, 1.0, 2.0],
28
+ strides=[4, 8, 16, 32, 64]),
29
+ bbox_coder=dict(
30
+ type='DeltaXYWHBBoxCoder',
31
+ target_means=[.0, .0, .0, .0],
32
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
33
+ loss_cls=dict(
34
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
35
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
36
+ roi_head=dict(
37
+ type='CascadeRoIHead',
38
+ num_stages=3,
39
+ stage_loss_weights=[1, 0.5, 0.25],
40
+ bbox_roi_extractor=dict(
41
+ type='SingleRoIExtractor',
42
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
43
+ out_channels=256,
44
+ featmap_strides=[4, 8, 16, 32]),
45
+ bbox_head=[
46
+ dict(
47
+ type='Shared2FCBBoxHead',
48
+ in_channels=256,
49
+ fc_out_channels=1024,
50
+ roi_feat_size=7,
51
+ num_classes=80,
52
+ bbox_coder=dict(
53
+ type='DeltaXYWHBBoxCoder',
54
+ target_means=[0., 0., 0., 0.],
55
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
56
+ reg_class_agnostic=True,
57
+ loss_cls=dict(
58
+ type='CrossEntropyLoss',
59
+ use_sigmoid=False,
60
+ loss_weight=1.0),
61
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
62
+ loss_weight=1.0)),
63
+ dict(
64
+ type='Shared2FCBBoxHead',
65
+ in_channels=256,
66
+ fc_out_channels=1024,
67
+ roi_feat_size=7,
68
+ num_classes=80,
69
+ bbox_coder=dict(
70
+ type='DeltaXYWHBBoxCoder',
71
+ target_means=[0., 0., 0., 0.],
72
+ target_stds=[0.05, 0.05, 0.1, 0.1]),
73
+ reg_class_agnostic=True,
74
+ loss_cls=dict(
75
+ type='CrossEntropyLoss',
76
+ use_sigmoid=False,
77
+ loss_weight=1.0),
78
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
79
+ loss_weight=1.0)),
80
+ dict(
81
+ type='Shared2FCBBoxHead',
82
+ in_channels=256,
83
+ fc_out_channels=1024,
84
+ roi_feat_size=7,
85
+ num_classes=80,
86
+ bbox_coder=dict(
87
+ type='DeltaXYWHBBoxCoder',
88
+ target_means=[0., 0., 0., 0.],
89
+ target_stds=[0.033, 0.033, 0.067, 0.067]),
90
+ reg_class_agnostic=True,
91
+ loss_cls=dict(
92
+ type='CrossEntropyLoss',
93
+ use_sigmoid=False,
94
+ loss_weight=1.0),
95
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
96
+ ],
97
+ mask_roi_extractor=dict(
98
+ type='SingleRoIExtractor',
99
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
100
+ out_channels=256,
101
+ featmap_strides=[4, 8, 16, 32]),
102
+ mask_head=dict(
103
+ type='FCNMaskHead',
104
+ num_convs=4,
105
+ in_channels=256,
106
+ conv_out_channels=256,
107
+ num_classes=80,
108
+ loss_mask=dict(
109
+ type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
110
+ # model training and testing settings
111
+ train_cfg=dict(
112
+ rpn=dict(
113
+ assigner=dict(
114
+ type='MaxIoUAssigner',
115
+ pos_iou_thr=0.7,
116
+ neg_iou_thr=0.3,
117
+ min_pos_iou=0.3,
118
+ match_low_quality=True,
119
+ ignore_iof_thr=-1),
120
+ sampler=dict(
121
+ type='RandomSampler',
122
+ num=256,
123
+ pos_fraction=0.5,
124
+ neg_pos_ub=-1,
125
+ add_gt_as_proposals=False),
126
+ allowed_border=0,
127
+ pos_weight=-1,
128
+ debug=False),
129
+ rpn_proposal=dict(
130
+ nms_pre=2000,
131
+ nms_post=2000,
132
+ max_per_img=2000,
133
+ nms=dict(type='nms', iou_threshold=0.7),
134
+ min_bbox_size=0),
135
+ rcnn=[
136
+ dict(
137
+ assigner=dict(
138
+ type='MaxIoUAssigner',
139
+ pos_iou_thr=0.5,
140
+ neg_iou_thr=0.5,
141
+ min_pos_iou=0.5,
142
+ match_low_quality=False,
143
+ ignore_iof_thr=-1),
144
+ sampler=dict(
145
+ type='RandomSampler',
146
+ num=512,
147
+ pos_fraction=0.25,
148
+ neg_pos_ub=-1,
149
+ add_gt_as_proposals=True),
150
+ mask_size=28,
151
+ pos_weight=-1,
152
+ debug=False),
153
+ dict(
154
+ assigner=dict(
155
+ type='MaxIoUAssigner',
156
+ pos_iou_thr=0.6,
157
+ neg_iou_thr=0.6,
158
+ min_pos_iou=0.6,
159
+ match_low_quality=False,
160
+ ignore_iof_thr=-1),
161
+ sampler=dict(
162
+ type='RandomSampler',
163
+ num=512,
164
+ pos_fraction=0.25,
165
+ neg_pos_ub=-1,
166
+ add_gt_as_proposals=True),
167
+ mask_size=28,
168
+ pos_weight=-1,
169
+ debug=False),
170
+ dict(
171
+ assigner=dict(
172
+ type='MaxIoUAssigner',
173
+ pos_iou_thr=0.7,
174
+ neg_iou_thr=0.7,
175
+ min_pos_iou=0.7,
176
+ match_low_quality=False,
177
+ ignore_iof_thr=-1),
178
+ sampler=dict(
179
+ type='RandomSampler',
180
+ num=512,
181
+ pos_fraction=0.25,
182
+ neg_pos_ub=-1,
183
+ add_gt_as_proposals=True),
184
+ mask_size=28,
185
+ pos_weight=-1,
186
+ debug=False)
187
+ ]),
188
+ test_cfg=dict(
189
+ rpn=dict(
190
+ nms_pre=1000,
191
+ nms_post=1000,
192
+ max_per_img=1000,
193
+ nms=dict(type='nms', iou_threshold=0.7),
194
+ min_bbox_size=0),
195
+ rcnn=dict(
196
+ score_thr=0.05,
197
+ nms=dict(type='nms', iou_threshold=0.5),
198
+ max_per_img=100,
199
+ mask_thr_binary=0.5)))