wzhouxiff commited on
Commit
38e3f9b
·
0 Parent(s):
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +4 -0
  2. ZoeDepth/.gitignore +148 -0
  3. ZoeDepth/LICENSE +21 -0
  4. ZoeDepth/README.md +248 -0
  5. ZoeDepth/environment.yml +26 -0
  6. ZoeDepth/evaluate.py +160 -0
  7. ZoeDepth/hubconf.py +154 -0
  8. ZoeDepth/sanity.py +98 -0
  9. ZoeDepth/sanity_hub.py +43 -0
  10. ZoeDepth/train_mix.py +179 -0
  11. ZoeDepth/train_mono.py +174 -0
  12. ZoeDepth/train_test_inputs/kitti_eigen_test_files_with_gt.txt +0 -0
  13. ZoeDepth/train_test_inputs/kitti_eigen_train_files_with_gt.txt +0 -0
  14. ZoeDepth/train_test_inputs/nyudepthv2_test_files_with_gt.txt +654 -0
  15. ZoeDepth/train_test_inputs/nyudepthv2_train_files_with_gt.txt +0 -0
  16. ZoeDepth/ui/app.py +66 -0
  17. ZoeDepth/ui/gradio_depth_pred.py +52 -0
  18. ZoeDepth/ui/gradio_im_to_3d.py +93 -0
  19. ZoeDepth/ui/gradio_pano_to_3d.py +120 -0
  20. ZoeDepth/ui/ui_requirements.txt +2 -0
  21. ZoeDepth/zoedepth/data/__init__.py +24 -0
  22. ZoeDepth/zoedepth/data/data_mono.py +573 -0
  23. ZoeDepth/zoedepth/data/ddad.py +117 -0
  24. ZoeDepth/zoedepth/data/diml_indoor_test.py +125 -0
  25. ZoeDepth/zoedepth/data/diml_outdoor_test.py +114 -0
  26. ZoeDepth/zoedepth/data/diode.py +125 -0
  27. ZoeDepth/zoedepth/data/hypersim.py +138 -0
  28. ZoeDepth/zoedepth/data/ibims.py +81 -0
  29. ZoeDepth/zoedepth/data/preprocess.py +154 -0
  30. ZoeDepth/zoedepth/data/sun_rgbd_loader.py +106 -0
  31. ZoeDepth/zoedepth/data/transforms.py +481 -0
  32. ZoeDepth/zoedepth/data/vkitti.py +151 -0
  33. ZoeDepth/zoedepth/data/vkitti2.py +187 -0
  34. ZoeDepth/zoedepth/models/__init__.py +24 -0
  35. ZoeDepth/zoedepth/models/base_models/__init__.py +24 -0
  36. ZoeDepth/zoedepth/models/base_models/midas.py +377 -0
  37. ZoeDepth/zoedepth/models/builder.py +51 -0
  38. ZoeDepth/zoedepth/models/depth_model.py +152 -0
  39. ZoeDepth/zoedepth/models/layers/attractor.py +208 -0
  40. ZoeDepth/zoedepth/models/layers/dist_layers.py +121 -0
  41. ZoeDepth/zoedepth/models/layers/localbins_layers.py +169 -0
  42. ZoeDepth/zoedepth/models/layers/patch_transformer.py +91 -0
  43. ZoeDepth/zoedepth/models/model_io.py +92 -0
  44. ZoeDepth/zoedepth/models/zoedepth/__init__.py +31 -0
  45. ZoeDepth/zoedepth/models/zoedepth/config_zoedepth.json +58 -0
  46. ZoeDepth/zoedepth/models/zoedepth/config_zoedepth_kitti.json +22 -0
  47. ZoeDepth/zoedepth/models/zoedepth/zoedepth_v1.py +250 -0
  48. ZoeDepth/zoedepth/models/zoedepth_nk/__init__.py +31 -0
  49. ZoeDepth/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json +67 -0
  50. ZoeDepth/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py +333 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ outputs/
2
+ ckpt/
3
+ checkpoints/*
4
+ __pycache__/
ZoeDepth/.gitignore ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.png
2
+ **.gif
3
+ .vscode/
4
+ *.rdb
5
+ **.xml
6
+ wandb/
7
+ slurm/
8
+ tmp/
9
+ .logs/
10
+ checkpoints/
11
+ external_jobs/
12
+ # Byte-compiled / optimized / DLL files
13
+ __pycache__/
14
+ *.py[cod]
15
+ *$py.class
16
+ ptlflow_logs/
17
+ output/
18
+ log/
19
+ .idea/
20
+ # C extensions
21
+ *.so
22
+ results/
23
+ **.DS_Store
24
+ **.pt
25
+ demo/
26
+ # Distribution / packaging
27
+ .Python
28
+ build/
29
+ develop-eggs/
30
+ dist/
31
+ downloads/
32
+ eggs/
33
+ .eggs/
34
+ lib/
35
+ lib64/
36
+ parts/
37
+ sdist/
38
+ var/
39
+ wheels/
40
+ pip-wheel-metadata/
41
+ share/python-wheels/
42
+ *.egg-info/
43
+ .installed.cfg
44
+ *.egg
45
+ MANIFEST
46
+ ~shortcuts/
47
+ **/wandb_logs/
48
+ **.db
49
+ # PyInstaller
50
+ # Usually these files are written by a python script from a template
51
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
52
+ *.manifest
53
+ *.spec
54
+
55
+ # Installer logs
56
+ pip-log.txt
57
+ pip-delete-this-directory.txt
58
+
59
+ # Unit test / coverage reports
60
+ htmlcov/
61
+ .tox/
62
+ .nox/
63
+ .coverage
64
+ .coverage.*
65
+ .cache
66
+ nosetests.xml
67
+ coverage.xml
68
+ *.cover
69
+ *.py,cover
70
+ .hypothesis/
71
+ .pytest_cache/
72
+
73
+ # Translations
74
+ *.mo
75
+ *.pot
76
+
77
+ # Django stuff:
78
+ *.log
79
+ local_settings.py
80
+ db.sqlite3
81
+ db.sqlite3-journal
82
+
83
+ # Flask stuff:
84
+ instance/
85
+ .webassets-cache
86
+
87
+ # Scrapy stuff:
88
+ .scrapy
89
+
90
+ # Sphinx documentation
91
+ docs/_build/
92
+
93
+ # PyBuilder
94
+ target/
95
+
96
+ # Jupyter Notebook
97
+ .ipynb_checkpoints
98
+
99
+ # IPython
100
+ profile_default/
101
+ ipython_config.py
102
+
103
+ # pyenv
104
+ .python-version
105
+
106
+ # pipenv
107
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
108
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
109
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
110
+ # install all needed dependencies.
111
+ #Pipfile.lock
112
+
113
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
114
+ __pypackages__/
115
+
116
+ # Celery stuff
117
+ celerybeat-schedule
118
+ celerybeat.pid
119
+
120
+ # SageMath parsed files
121
+ *.sage.py
122
+
123
+ # Environments
124
+ .env
125
+ .venv
126
+ env/
127
+ venv/
128
+ ENV/
129
+ env.bak/
130
+ venv.bak/
131
+
132
+ # Spyder project settings
133
+ .spyderproject
134
+ .spyproject
135
+
136
+ # Rope project settings
137
+ .ropeproject
138
+
139
+ # mkdocs documentation
140
+ /site
141
+
142
+ # mypy
143
+ .mypy_cache/
144
+ .dmypy.json
145
+ dmypy.json
146
+
147
+ # Pyre type checker
148
+ .pyre/
ZoeDepth/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
ZoeDepth/README.md ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # **ZoeDepth: Combining relative and metric depth** (Official implementation) <!-- omit in toc -->
2
+ [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/isl-org/ZoeDepth)
3
+ [![Open in Spaces](https://huggingface.co/datasets/huggingface/badges/raw/main/open-in-hf-spaces-sm.svg)](https://huggingface.co/spaces/shariqfarooq/ZoeDepth)
4
+
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) ![PyTorch](https://img.shields.io/badge/PyTorch_v1.10.1-EE4C2C?&logo=pytorch&logoColor=white)
6
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/zoedepth-zero-shot-transfer-by-combining/monocular-depth-estimation-on-nyu-depth-v2)](https://paperswithcode.com/sota/monocular-depth-estimation-on-nyu-depth-v2?p=zoedepth-zero-shot-transfer-by-combining)
7
+
8
+ >#### [ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth](https://arxiv.org/abs/2302.12288)
9
+ > ##### [Shariq Farooq Bhat](https://shariqfarooq123.github.io), [Reiner Birkl](https://www.researchgate.net/profile/Reiner-Birkl), [Diana Wofk](https://dwofk.github.io/), [Peter Wonka](http://peterwonka.net/), [Matthias Müller](https://matthias.pw/)
10
+
11
+ [[Paper]](https://arxiv.org/abs/2302.12288)
12
+
13
+ ![teaser](assets/zoedepth-teaser.png)
14
+
15
+ ## **Table of Contents** <!-- omit in toc -->
16
+ - [**Usage**](#usage)
17
+ - [Using torch hub](#using-torch-hub)
18
+ - [Using local copy](#using-local-copy)
19
+ - [Using local torch hub](#using-local-torch-hub)
20
+ - [or load the models manually](#or-load-the-models-manually)
21
+ - [Using ZoeD models to predict depth](#using-zoed-models-to-predict-depth)
22
+ - [**Environment setup**](#environment-setup)
23
+ - [**Sanity checks** (Recommended)](#sanity-checks-recommended)
24
+ - [Model files](#model-files)
25
+ - [**Evaluation**](#evaluation)
26
+ - [Evaluating offical models](#evaluating-offical-models)
27
+ - [Evaluating local checkpoint](#evaluating-local-checkpoint)
28
+ - [**Training**](#training)
29
+ - [**Gradio demo**](#gradio-demo)
30
+ - [**Citation**](#citation)
31
+
32
+
33
+ ## **Usage**
34
+ It is recommended to fetch the latest [MiDaS repo](https://github.com/isl-org/MiDaS) via torch hub before proceeding:
35
+ ```python
36
+ import torch
37
+
38
+ torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True) # Triggers fresh download of MiDaS repo
39
+ ```
40
+ ### **ZoeDepth models** <!-- omit in toc -->
41
+ ### Using torch hub
42
+ ```python
43
+ import torch
44
+
45
+ repo = "isl-org/ZoeDepth"
46
+ # Zoe_N
47
+ model_zoe_n = torch.hub.load(repo, "ZoeD_N", pretrained=True)
48
+
49
+ # Zoe_K
50
+ model_zoe_k = torch.hub.load(repo, "ZoeD_K", pretrained=True)
51
+
52
+ # Zoe_NK
53
+ model_zoe_nk = torch.hub.load(repo, "ZoeD_NK", pretrained=True)
54
+ ```
55
+ ### Using local copy
56
+ Clone this repo:
57
+ ```bash
58
+ git clone https://github.com/isl-org/ZoeDepth.git && cd ZoeDepth
59
+ ```
60
+ #### Using local torch hub
61
+ You can use local source for torch hub to load the ZoeDepth models, for example:
62
+ ```python
63
+ import torch
64
+
65
+ # Zoe_N
66
+ model_zoe_n = torch.hub.load(".", "ZoeD_N", source="local", pretrained=True)
67
+ ```
68
+
69
+ #### or load the models manually
70
+ ```python
71
+ from zoedepth.models.builder import build_model
72
+ from zoedepth.utils.config import get_config
73
+
74
+ # ZoeD_N
75
+ conf = get_config("zoedepth", "infer")
76
+ model_zoe_n = build_model(conf)
77
+
78
+ # ZoeD_K
79
+ conf = get_config("zoedepth", "infer", config_version="kitti")
80
+ model_zoe_k = build_model(conf)
81
+
82
+ # ZoeD_NK
83
+ conf = get_config("zoedepth_nk", "infer")
84
+ model_zoe_nk = build_model(conf)
85
+ ```
86
+
87
+ ### Using ZoeD models to predict depth
88
+ ```python
89
+ ##### sample prediction
90
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
91
+ zoe = model_zoe_n.to(DEVICE)
92
+
93
+
94
+ # Local file
95
+ from PIL import Image
96
+ image = Image.open("/path/to/image.jpg").convert("RGB") # load
97
+ depth_numpy = zoe.infer_pil(image) # as numpy
98
+
99
+ depth_pil = zoe.infer_pil(image, output_type="pil") # as 16-bit PIL Image
100
+
101
+ depth_tensor = zoe.infer_pil(image, output_type="tensor") # as torch tensor
102
+
103
+
104
+
105
+ # Tensor
106
+ from zoedepth.utils.misc import pil_to_batched_tensor
107
+ X = pil_to_batched_tensor(image).to(DEVICE)
108
+ depth_tensor = zoe.infer(X)
109
+
110
+
111
+
112
+ # From URL
113
+ from zoedepth.utils.misc import get_image_from_url
114
+
115
+ # Example URL
116
+ URL = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS4W8H_Nxk_rs3Vje_zj6mglPOH7bnPhQitBH8WkqjlqQVotdtDEG37BsnGofME3_u6lDk&usqp=CAU"
117
+
118
+
119
+ image = get_image_from_url(URL) # fetch
120
+ depth = zoe.infer_pil(image)
121
+
122
+ # Save raw
123
+ from zoedepth.utils.misc import save_raw_16bit
124
+ fpath = "/path/to/output.png"
125
+ save_raw_16bit(depth, fpath)
126
+
127
+ # Colorize output
128
+ from zoedepth.utils.misc import colorize
129
+
130
+ colored = colorize(depth)
131
+
132
+ # save colored output
133
+ fpath_colored = "/path/to/output_colored.png"
134
+ Image.fromarray(colored).save(fpath_colored)
135
+ ```
136
+
137
+ ## **Environment setup**
138
+ The project depends on :
139
+ - [pytorch](https://pytorch.org/) (Main framework)
140
+ - [timm](https://timm.fast.ai/) (Backbone helper for MiDaS)
141
+ - pillow, matplotlib, scipy, h5py, opencv (utilities)
142
+
143
+ Install environment using `environment.yml` :
144
+
145
+ Using [mamba](https://github.com/mamba-org/mamba) (fastest):
146
+ ```bash
147
+ mamba env create -n zoe --file environment.yml
148
+ mamba activate zoe
149
+ ```
150
+ Using conda :
151
+
152
+ ```bash
153
+ conda env create -n zoe --file environment.yml
154
+ conda activate zoe
155
+ ```
156
+
157
+ ## **Sanity checks** (Recommended)
158
+ Check if models can be loaded:
159
+ ```bash
160
+ python sanity_hub.py
161
+ ```
162
+ Try a demo prediction pipeline:
163
+ ```bash
164
+ python sanity.py
165
+ ```
166
+ This will save a file `pred.png` in the root folder, showing RGB and corresponding predicted depth side-by-side.
167
+ ## Model files
168
+ Models are defined under `models/` folder, with `models/<model_name>_<version>.py` containing model definitions and `models/config_<model_name>.json` containing configuration.
169
+
170
+ Single metric head models (Zoe_N and Zoe_K from the paper) have the common definition and are defined under `models/zoedepth` while as the multi-headed model (Zoe_NK) is defined under `models/zoedepth_nk`.
171
+ ## **Evaluation**
172
+ Download the required dataset and change the `DATASETS_CONFIG` dictionary in `utils/config.py` accordingly.
173
+ ### Evaluating offical models
174
+ On NYU-Depth-v2 for example:
175
+
176
+ For ZoeD_N:
177
+ ```bash
178
+ python evaluate.py -m zoedepth -d nyu
179
+ ```
180
+
181
+ For ZoeD_NK:
182
+ ```bash
183
+ python evaluate.py -m zoedepth_nk -d nyu
184
+ ```
185
+
186
+ ### Evaluating local checkpoint
187
+ ```bash
188
+ python evaluate.py -m zoedepth --pretrained_resource="local::/path/to/local/ckpt.pt" -d nyu
189
+ ```
190
+ Pretrained resources are prefixed with `url::` to indicate weights should be fetched from a url, or `local::` to indicate path is a local file. Refer to `models/model_io.py` for details.
191
+
192
+ The dataset name should match the corresponding key in `utils.config.DATASETS_CONFIG` .
193
+
194
+ ## **Training**
195
+ Download training datasets as per instructions given [here](https://github.com/cleinc/bts/tree/master/pytorch#nyu-depvh-v2). Then for training a single head model on NYU-Depth-v2 :
196
+ ```bash
197
+ python train_mono.py -m zoedepth --pretrained_resource=""
198
+ ```
199
+
200
+ For training the Zoe-NK model:
201
+ ```bash
202
+ python train_mix.py -m zoedepth_nk --pretrained_resource=""
203
+ ```
204
+ ## **Gradio demo**
205
+ We provide a UI demo built using [gradio](https://gradio.app/). To get started, install UI requirements:
206
+ ```bash
207
+ pip install -r ui/ui_requirements.txt
208
+ ```
209
+ Then launch the gradio UI:
210
+ ```bash
211
+ python -m ui.app
212
+ ```
213
+
214
+ The UI is also hosted on HuggingFace🤗 [here](https://huggingface.co/spaces/shariqfarooq/ZoeDepth)
215
+ ## **Citation**
216
+ ```
217
+ @misc{https://doi.org/10.48550/arxiv.2302.12288,
218
+ doi = {10.48550/ARXIV.2302.12288},
219
+
220
+ url = {https://arxiv.org/abs/2302.12288},
221
+
222
+ author = {Bhat, Shariq Farooq and Birkl, Reiner and Wofk, Diana and Wonka, Peter and Müller, Matthias},
223
+
224
+ keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
225
+
226
+ title = {ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth},
227
+
228
+ publisher = {arXiv},
229
+
230
+ year = {2023},
231
+
232
+ copyright = {arXiv.org perpetual, non-exclusive license}
233
+ }
234
+
235
+ ```
236
+
237
+
238
+
239
+
240
+
241
+
242
+
243
+
244
+
245
+
246
+
247
+
248
+
ZoeDepth/environment.yml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: zoe
2
+ channels:
3
+ - pytorch
4
+ - nvidia
5
+ - conda-forge
6
+ dependencies:
7
+ - cuda=11.7.1
8
+ - h5py=3.7.0
9
+ - hdf5=1.12.2
10
+ - matplotlib=3.6.2
11
+ - matplotlib-base=3.6.2
12
+ - numpy=1.24.1
13
+ - opencv=4.6.0
14
+ - pip=22.3.1
15
+ - python=3.9.7
16
+ - pytorch=1.13.1
17
+ - pytorch-cuda=11.7
18
+ - pytorch-mutex=1.0
19
+ - scipy=1.10.0
20
+ - torchaudio=0.13.1
21
+ - torchvision=0.14.1
22
+ - pip:
23
+ - huggingface-hub==0.11.1
24
+ - timm==0.6.12
25
+ - tqdm==4.64.1
26
+ - wandb==0.13.9
ZoeDepth/evaluate.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import argparse
26
+ from pprint import pprint
27
+
28
+ import torch
29
+ from zoedepth.utils.easydict import EasyDict as edict
30
+ from tqdm import tqdm
31
+
32
+ from zoedepth.data.data_mono import DepthDataLoader
33
+ from zoedepth.models.builder import build_model
34
+ from zoedepth.utils.arg_utils import parse_unknown
35
+ from zoedepth.utils.config import change_dataset, get_config, ALL_EVAL_DATASETS, ALL_INDOOR, ALL_OUTDOOR
36
+ from zoedepth.utils.misc import (RunningAverageDict, colors, compute_metrics,
37
+ count_parameters)
38
+
39
+
40
+ @torch.no_grad()
41
+ def infer(model, images, **kwargs):
42
+ """Inference with flip augmentation"""
43
+ # images.shape = N, C, H, W
44
+ def get_depth_from_prediction(pred):
45
+ if isinstance(pred, torch.Tensor):
46
+ pred = pred # pass
47
+ elif isinstance(pred, (list, tuple)):
48
+ pred = pred[-1]
49
+ elif isinstance(pred, dict):
50
+ pred = pred['metric_depth'] if 'metric_depth' in pred else pred['out']
51
+ else:
52
+ raise NotImplementedError(f"Unknown output type {type(pred)}")
53
+ return pred
54
+
55
+ pred1 = model(images, **kwargs)
56
+ pred1 = get_depth_from_prediction(pred1)
57
+
58
+ pred2 = model(torch.flip(images, [3]), **kwargs)
59
+ pred2 = get_depth_from_prediction(pred2)
60
+ pred2 = torch.flip(pred2, [3])
61
+
62
+ mean_pred = 0.5 * (pred1 + pred2)
63
+
64
+ return mean_pred
65
+
66
+
67
+ @torch.no_grad()
68
+ def evaluate(model, test_loader, config, round_vals=True, round_precision=3):
69
+ model.eval()
70
+ metrics = RunningAverageDict()
71
+ for i, sample in tqdm(enumerate(test_loader), total=len(test_loader)):
72
+ if 'has_valid_depth' in sample:
73
+ if not sample['has_valid_depth']:
74
+ continue
75
+ image, depth = sample['image'], sample['depth']
76
+ image, depth = image.cuda(), depth.cuda()
77
+ depth = depth.squeeze().unsqueeze(0).unsqueeze(0)
78
+ focal = sample.get('focal', torch.Tensor(
79
+ [715.0873]).cuda()) # This magic number (focal) is only used for evaluating BTS model
80
+ pred = infer(model, image, dataset=sample['dataset'][0], focal=focal)
81
+
82
+ # Save image, depth, pred for visualization
83
+ if "save_images" in config and config.save_images:
84
+ import os
85
+ # print("Saving images ...")
86
+ from PIL import Image
87
+ import torchvision.transforms as transforms
88
+ from zoedepth.utils.misc import colorize
89
+
90
+ os.makedirs(config.save_images, exist_ok=True)
91
+ # def save_image(img, path):
92
+ d = colorize(depth.squeeze().cpu().numpy(), 0, 10)
93
+ p = colorize(pred.squeeze().cpu().numpy(), 0, 10)
94
+ im = transforms.ToPILImage()(image.squeeze().cpu())
95
+ im.save(os.path.join(config.save_images, f"{i}_img.png"))
96
+ Image.fromarray(d).save(os.path.join(config.save_images, f"{i}_depth.png"))
97
+ Image.fromarray(p).save(os.path.join(config.save_images, f"{i}_pred.png"))
98
+
99
+
100
+
101
+ # print(depth.shape, pred.shape)
102
+ metrics.update(compute_metrics(depth, pred, config=config))
103
+
104
+ if round_vals:
105
+ def r(m): return round(m, round_precision)
106
+ else:
107
+ def r(m): return m
108
+ metrics = {k: r(v) for k, v in metrics.get_value().items()}
109
+ return metrics
110
+
111
+ def main(config):
112
+ model = build_model(config)
113
+ test_loader = DepthDataLoader(config, 'online_eval').data
114
+ model = model.cuda()
115
+ metrics = evaluate(model, test_loader, config)
116
+ print(f"{colors.fg.green}")
117
+ print(metrics)
118
+ print(f"{colors.reset}")
119
+ metrics['#params'] = f"{round(count_parameters(model, include_all=True)/1e6, 2)}M"
120
+ return metrics
121
+
122
+
123
+ def eval_model(model_name, pretrained_resource, dataset='nyu', **kwargs):
124
+
125
+ # Load default pretrained resource defined in config if not set
126
+ overwrite = {**kwargs, "pretrained_resource": pretrained_resource} if pretrained_resource else kwargs
127
+ config = get_config(model_name, "eval", dataset, **overwrite)
128
+ # config = change_dataset(config, dataset) # change the dataset
129
+ pprint(config)
130
+ print(f"Evaluating {model_name} on {dataset}...")
131
+ metrics = main(config)
132
+ return metrics
133
+
134
+
135
+ if __name__ == '__main__':
136
+ parser = argparse.ArgumentParser()
137
+ parser.add_argument("-m", "--model", type=str,
138
+ required=True, help="Name of the model to evaluate")
139
+ parser.add_argument("-p", "--pretrained_resource", type=str,
140
+ required=False, default=None, help="Pretrained resource to use for fetching weights. If not set, default resource from model config is used, Refer models.model_io.load_state_from_resource for more details.")
141
+ parser.add_argument("-d", "--dataset", type=str, required=False,
142
+ default='nyu', help="Dataset to evaluate on")
143
+
144
+ args, unknown_args = parser.parse_known_args()
145
+ overwrite_kwargs = parse_unknown(unknown_args)
146
+
147
+ if "ALL_INDOOR" in args.dataset:
148
+ datasets = ALL_INDOOR
149
+ elif "ALL_OUTDOOR" in args.dataset:
150
+ datasets = ALL_OUTDOOR
151
+ elif "ALL" in args.dataset:
152
+ datasets = ALL_EVAL_DATASETS
153
+ elif "," in args.dataset:
154
+ datasets = args.dataset.split(",")
155
+ else:
156
+ datasets = [args.dataset]
157
+
158
+ for dataset in datasets:
159
+ eval_model(args.model, pretrained_resource=args.pretrained_resource,
160
+ dataset=dataset, **overwrite_kwargs)
ZoeDepth/hubconf.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ dependencies=['torch']
26
+ from zoedepth.utils.config import get_config
27
+ from zoedepth.models.builder import build_model
28
+ import numpy as np
29
+ import torch
30
+
31
+
32
+ # ZoeD_N
33
+ def ZoeD_N(pretrained=False, midas_model_type="DPT_BEiT_L_384", config_mode="infer", **kwargs):
34
+ """Zoe_M12_N model. This is the version of ZoeDepth that has a single metric head
35
+ Args:
36
+ pretrained (bool): If True, returns a model pre-trained on NYU-Depth-V2
37
+ midas_model_type (str): Midas model type. Should be one of the models as listed in torch.hub.list("intel-isl/MiDaS"). Default: DPT_BEiT_L_384
38
+ config_mode (str): Config mode. Should be one of "infer", "train" or "eval". Default: "infer"
39
+
40
+ Keyword Args:
41
+ **kwargs: Additional arguments to pass to the model
42
+ The following arguments are supported:
43
+ train_midas (bool): If True, returns a model that with trainable midas base. Default: False
44
+ use_pretrained_midas (bool): If True, returns a model that uses pretrained midas base. Default: False
45
+ n_bins (int): Number of bin centers. Defaults to 64.
46
+ bin_centers_type (str): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
47
+ For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus".
48
+ bin_embedding_dim (int): bin embedding dimension. Defaults to 128.
49
+ min_depth (float): Lower bound for normed bin centers. Defaults to 1e-3.
50
+ max_depth (float): Upper bound for normed bin centers. Defaults to 10.
51
+ n_attractors (List[int]): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
52
+ attractor_alpha (int): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 1000.
53
+ attractor_gamma (int): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
54
+ attractor_kind (str): Attraction aggregation "sum" or "mean". Defaults to 'mean'.
55
+ attractor_type (str): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'inv'.
56
+ min_temp (int): Lower bound for temperature of output probability distribution. Defaults to 0.0212.
57
+ max_temp (int): Upper bound for temperature of output probability distribution. Defaults to 50.
58
+ force_keep_ar (bool): If True, the model will keep the aspect ratio of the input image. Defaults to True.
59
+ """
60
+ if pretrained and midas_model_type != "DPT_BEiT_L_384":
61
+ raise ValueError(f"Only DPT_BEiT_L_384 MiDaS model is supported for pretrained Zoe_N model, got: {midas_model_type}")
62
+
63
+ if not pretrained:
64
+ pretrained_resource = None
65
+ else:
66
+ pretrained_resource = "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt"
67
+
68
+ config = get_config("zoedepth", config_mode, pretrained_resource=pretrained_resource, **kwargs)
69
+ model = build_model(config)
70
+ return model
71
+
72
+ # ZoeD_K
73
+ def ZoeD_K(pretrained=False, midas_model_type="DPT_BEiT_L_384", config_mode="infer", **kwargs):
74
+ """Zoe_M12_K model. This is the version of ZoeDepth that has a single metric head
75
+ Args:
76
+ pretrained (bool): If True, returns a model pre-trained on NYU-Depth-V2
77
+ midas_model_type (str): Midas model type. Should be one of the models as listed in torch.hub.list("intel-isl/MiDaS"). Default: DPT_BEiT_L_384
78
+ config_mode (str): Config mode. Should be one of "infer", "train" or "eval". Default: "infer"
79
+
80
+ Keyword Args:
81
+ **kwargs: Additional arguments to pass to the model
82
+ The following arguments are supported:
83
+ train_midas (bool): If True, returns a model that with trainable midas base. Default: False
84
+ use_pretrained_midas (bool): If True, returns a model that uses pretrained midas base. Default: False
85
+ n_bins (int): Number of bin centers. Defaults to 64.
86
+ bin_centers_type (str): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
87
+ For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus".
88
+ bin_embedding_dim (int): bin embedding dimension. Defaults to 128.
89
+ min_depth (float): Lower bound for normed bin centers. Defaults to 1e-3.
90
+ max_depth (float): Upper bound for normed bin centers. Defaults to 10.
91
+ n_attractors (List[int]): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
92
+ attractor_alpha (int): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 1000.
93
+ attractor_gamma (int): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
94
+ attractor_kind (str): Attraction aggregation "sum" or "mean". Defaults to 'mean'.
95
+ attractor_type (str): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'inv'.
96
+ min_temp (int): Lower bound for temperature of output probability distribution. Defaults to 0.0212.
97
+ max_temp (int): Upper bound for temperature of output probability distribution. Defaults to 50.
98
+ force_keep_ar (bool): If True, the model will keep the aspect ratio of the input image. Defaults to True.
99
+
100
+ """
101
+ if pretrained and midas_model_type != "DPT_BEiT_L_384":
102
+ raise ValueError(f"Only DPT_BEiT_L_384 MiDaS model is supported for pretrained Zoe_K model, got: {midas_model_type}")
103
+
104
+ if not pretrained:
105
+ pretrained_resource = None
106
+ else:
107
+ pretrained_resource = "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt"
108
+
109
+ config = get_config("zoedepth", config_mode, pretrained_resource=pretrained_resource, config_version="kitti", **kwargs)
110
+ model = build_model(config)
111
+ return model
112
+
113
+ # Zoe_NK
114
+ def ZoeD_NK(pretrained=False, midas_model_type="DPT_BEiT_L_384", config_mode="infer", **kwargs):
115
+ """ZoeDepthNK model. This is the version of ZoeDepth that has two metric heads and uses a learned router to route to experts.
116
+ Args:
117
+ pretrained (bool): If True, returns a model pre-trained on NYU-Depth-V2
118
+ midas_model_type (str): Midas model type. Should be one of the models as listed in torch.hub.list("intel-isl/MiDaS"). Default: DPT_BEiT_L_384
119
+
120
+ Keyword Args:
121
+ **kwargs: Additional arguments to pass to the model
122
+ The following arguments are supported:
123
+ train_midas (bool): If True, returns a model that with trainable midas base. Defaults to True
124
+ use_pretrained_midas (bool): If True, returns a model that uses pretrained midas base. Defaults to True
125
+ bin_conf (List[dict]): A list of dictionaries that contain the bin configuration for each metric head. Each dictionary should contain the following keys:
126
+ "name" (str, typically same as the dataset name), "n_bins" (int), "min_depth" (float), "max_depth" (float)
127
+ The length of this list determines the number of metric heads.
128
+ bin_centers_type (str): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
129
+ For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus".
130
+ bin_embedding_dim (int): bin embedding dimension. Defaults to 128.
131
+
132
+ n_attractors (List[int]): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
133
+ attractor_alpha (int): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 1000.
134
+ attractor_gamma (int): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
135
+ attractor_kind (str): Attraction aggregation "sum" or "mean". Defaults to 'mean'.
136
+ attractor_type (str): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'inv'.
137
+
138
+ min_temp (int): Lower bound for temperature of output probability distribution. Defaults to 0.0212.
139
+ max_temp (int): Upper bound for temperature of output probability distribution. Defaults to 50.
140
+
141
+ memory_efficient (bool): Whether to use memory efficient version of attractor layers. Memory efficient version is slower but is recommended incase of multiple metric heads in order save GPU memory. Defaults to True.
142
+
143
+ """
144
+ if pretrained and midas_model_type != "DPT_BEiT_L_384":
145
+ raise ValueError(f"Only DPT_BEiT_L_384 MiDaS model is supported for pretrained Zoe_NK model, got: {midas_model_type}")
146
+
147
+ if not pretrained:
148
+ pretrained_resource = None
149
+ else:
150
+ pretrained_resource = "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt"
151
+
152
+ config = get_config("zoedepth_nk", config_mode, pretrained_resource=pretrained_resource, **kwargs)
153
+ model = build_model(config)
154
+ return model
ZoeDepth/sanity.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import numpy as np
26
+ from torchvision.transforms import ToTensor
27
+ from PIL import Image
28
+ from zoedepth.utils.misc import get_image_from_url, colorize
29
+ import torch
30
+
31
+ from zoedepth.models.builder import build_model
32
+ from zoedepth.utils.config import get_config
33
+ from pprint import pprint
34
+
35
+
36
+ torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True)
37
+
38
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
39
+ if DEVICE == "cpu":
40
+ print("WARNING: Running on CPU. This will be slow. Check your CUDA installation.")
41
+
42
+ print("*" * 20 + " Testing zoedepth " + "*" * 20)
43
+ conf = get_config("zoedepth", "infer")
44
+
45
+
46
+ print("Config:")
47
+ pprint(conf)
48
+
49
+ model = build_model(conf).to(DEVICE)
50
+ model.eval()
51
+ x = torch.rand(1, 3, 384, 512).to(DEVICE)
52
+
53
+ print("-"*20 + "Testing on a random input" + "-"*20)
54
+
55
+ with torch.no_grad():
56
+ out = model(x)
57
+
58
+ if isinstance(out, dict):
59
+ # print shapes of all outputs
60
+ for k, v in out.items():
61
+ if v is not None:
62
+ print(k, v.shape)
63
+ else:
64
+ print([o.shape for o in out if o is not None])
65
+
66
+ print("\n\n")
67
+ print("-"*20 + " Testing on an indoor scene from url " + "-"*20)
68
+
69
+ # Test img
70
+ url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS4W8H_Nxk_rs3Vje_zj6mglPOH7bnPhQitBH8WkqjlqQVotdtDEG37BsnGofME3_u6lDk&usqp=CAU"
71
+ img = get_image_from_url(url)
72
+ orig_size = img.size
73
+ X = ToTensor()(img)
74
+ X = X.unsqueeze(0).to(DEVICE)
75
+
76
+ print("X.shape", X.shape)
77
+ print("predicting")
78
+
79
+ with torch.no_grad():
80
+ out = model.infer(X).cpu()
81
+
82
+ # or just,
83
+ # out = model.infer_pil(img)
84
+
85
+
86
+ print("output.shape", out.shape)
87
+ pred = Image.fromarray(colorize(out))
88
+ # Stack img and pred side by side for comparison and save
89
+ pred = pred.resize(orig_size, Image.ANTIALIAS)
90
+ stacked = Image.new("RGB", (orig_size[0]*2, orig_size[1]))
91
+ stacked.paste(img, (0, 0))
92
+ stacked.paste(pred, (orig_size[0], 0))
93
+
94
+ stacked.save("pred.png")
95
+ print("saved pred.png")
96
+
97
+
98
+ model.infer_pil(img, output_type="pil").save("pred_raw.png")
ZoeDepth/sanity_hub.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import numpy as np
27
+ from torchvision.transforms import ToTensor
28
+ from PIL import Image
29
+ from zoedepth.utils.misc import get_image_from_url, colorize
30
+
31
+ from zoedepth.models.builder import build_model
32
+ from zoedepth.utils.config import get_config
33
+ from pprint import pprint
34
+
35
+
36
+
37
+ # Trigger reload of MiDaS
38
+ torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True)
39
+
40
+
41
+ model = torch.hub.load(".", "ZoeD_K", source="local", pretrained=True)
42
+ model = torch.hub.load(".", "ZoeD_NK", source="local", pretrained=True)
43
+ model = torch.hub.load(".", "ZoeD_N", source="local", pretrained=True)
ZoeDepth/train_mix.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ from zoedepth.utils.misc import count_parameters, parallelize
26
+ from zoedepth.utils.config import get_config
27
+ from zoedepth.utils.arg_utils import parse_unknown
28
+ from zoedepth.trainers.builder import get_trainer
29
+ from zoedepth.models.builder import build_model
30
+ from zoedepth.data.data_mono import MixedNYUKITTI
31
+ import torch.utils.data.distributed
32
+ import torch.multiprocessing as mp
33
+ import torch
34
+ import numpy as np
35
+ from pprint import pprint
36
+ import argparse
37
+ import os
38
+
39
+ os.environ["PYOPENGL_PLATFORM"] = "egl"
40
+ os.environ["WANDB_START_METHOD"] = "thread"
41
+
42
+
43
+ def fix_random_seed(seed: int):
44
+ """
45
+ Fix random seed for reproducibility
46
+
47
+ Args:
48
+ seed (int): random seed
49
+ """
50
+ import random
51
+
52
+ import numpy
53
+ import torch
54
+
55
+ random.seed(seed)
56
+ numpy.random.seed(seed)
57
+ torch.manual_seed(seed)
58
+ torch.cuda.manual_seed(seed)
59
+ torch.cuda.manual_seed_all(seed)
60
+
61
+ torch.backends.cudnn.deterministic = True
62
+ torch.backends.cudnn.benchmark = False
63
+
64
+
65
+ def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"):
66
+ import glob
67
+ import os
68
+
69
+ from zoedepth.models.model_io import load_wts
70
+
71
+ if hasattr(config, "checkpoint"):
72
+ checkpoint = config.checkpoint
73
+ elif hasattr(config, "ckpt_pattern"):
74
+ pattern = config.ckpt_pattern
75
+ matches = glob.glob(os.path.join(
76
+ checkpoint_dir, f"*{pattern}*{ckpt_type}*"))
77
+ if not (len(matches) > 0):
78
+ raise ValueError(f"No matches found for the pattern {pattern}")
79
+
80
+ checkpoint = matches[0]
81
+
82
+ else:
83
+ return model
84
+ model = load_wts(model, checkpoint)
85
+ print("Loaded weights from {0}".format(checkpoint))
86
+ return model
87
+
88
+
89
+ def main_worker(gpu, ngpus_per_node, config):
90
+ try:
91
+ fix_random_seed(43)
92
+
93
+ config.gpu = gpu
94
+
95
+ model = build_model(config)
96
+ model = load_ckpt(config, model)
97
+ model = parallelize(config, model)
98
+
99
+ total_params = f"{round(count_parameters(model)/1e6,2)}M"
100
+ config.total_params = total_params
101
+ print(f"Total parameters : {total_params}")
102
+
103
+ train_loader = MixedNYUKITTI(config, "train").data
104
+ test_loader = MixedNYUKITTI(config, "online_eval").data
105
+
106
+ trainer = get_trainer(config)(
107
+ config, model, train_loader, test_loader, device=config.gpu)
108
+
109
+ trainer.train()
110
+ finally:
111
+ import wandb
112
+ wandb.finish()
113
+
114
+
115
+ if __name__ == '__main__':
116
+ mp.set_start_method('forkserver')
117
+
118
+ parser = argparse.ArgumentParser()
119
+ parser.add_argument("-m", "--model", type=str, default="synunet")
120
+ parser.add_argument("-d", "--dataset", type=str, default='mix')
121
+ parser.add_argument("--trainer", type=str, default=None)
122
+
123
+ args, unknown_args = parser.parse_known_args()
124
+ overwrite_kwargs = parse_unknown(unknown_args)
125
+
126
+ overwrite_kwargs["model"] = args.model
127
+ if args.trainer is not None:
128
+ overwrite_kwargs["trainer"] = args.trainer
129
+
130
+ config = get_config(args.model, "train", args.dataset, **overwrite_kwargs)
131
+ # git_commit()
132
+ if config.use_shared_dict:
133
+ shared_dict = mp.Manager().dict()
134
+ else:
135
+ shared_dict = None
136
+ config.shared_dict = shared_dict
137
+
138
+ config.batch_size = config.bs
139
+ config.mode = 'train'
140
+ if config.root != "." and not os.path.isdir(config.root):
141
+ os.makedirs(config.root)
142
+
143
+ try:
144
+ node_str = os.environ['SLURM_JOB_NODELIST'].replace(
145
+ '[', '').replace(']', '')
146
+ nodes = node_str.split(',')
147
+
148
+ config.world_size = len(nodes)
149
+ config.rank = int(os.environ['SLURM_PROCID'])
150
+ # config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints"
151
+
152
+ except KeyError as e:
153
+ # We are NOT using SLURM
154
+ config.world_size = 1
155
+ config.rank = 0
156
+ nodes = ["127.0.0.1"]
157
+
158
+ if config.distributed:
159
+
160
+ print(config.rank)
161
+ port = np.random.randint(15000, 15025)
162
+ config.dist_url = 'tcp://{}:{}'.format(nodes[0], port)
163
+ print(config.dist_url)
164
+ config.dist_backend = 'nccl'
165
+ config.gpu = None
166
+
167
+ ngpus_per_node = torch.cuda.device_count()
168
+ config.num_workers = config.workers
169
+ config.ngpus_per_node = ngpus_per_node
170
+ print("Config:")
171
+ pprint(config)
172
+ if config.distributed:
173
+ config.world_size = ngpus_per_node * config.world_size
174
+ mp.spawn(main_worker, nprocs=ngpus_per_node,
175
+ args=(ngpus_per_node, config))
176
+ else:
177
+ if ngpus_per_node == 1:
178
+ config.gpu = 0
179
+ main_worker(config.gpu, ngpus_per_node, config)
ZoeDepth/train_mono.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ from zoedepth.utils.misc import count_parameters, parallelize
26
+ from zoedepth.utils.config import get_config
27
+ from zoedepth.utils.arg_utils import parse_unknown
28
+ from zoedepth.trainers.builder import get_trainer
29
+ from zoedepth.models.builder import build_model
30
+ from zoedepth.data.data_mono import DepthDataLoader
31
+ import torch.utils.data.distributed
32
+ import torch.multiprocessing as mp
33
+ import torch
34
+ import numpy as np
35
+ from pprint import pprint
36
+ import argparse
37
+ import os
38
+
39
+ os.environ["PYOPENGL_PLATFORM"] = "egl"
40
+ os.environ["WANDB_START_METHOD"] = "thread"
41
+
42
+
43
+ def fix_random_seed(seed: int):
44
+ import random
45
+
46
+ import numpy
47
+ import torch
48
+
49
+ random.seed(seed)
50
+ numpy.random.seed(seed)
51
+ torch.manual_seed(seed)
52
+ torch.cuda.manual_seed(seed)
53
+ torch.cuda.manual_seed_all(seed)
54
+
55
+ torch.backends.cudnn.deterministic = True
56
+ torch.backends.cudnn.benchmark = True
57
+
58
+
59
+ def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"):
60
+ import glob
61
+ import os
62
+
63
+ from zoedepth.models.model_io import load_wts
64
+
65
+ if hasattr(config, "checkpoint"):
66
+ checkpoint = config.checkpoint
67
+ elif hasattr(config, "ckpt_pattern"):
68
+ pattern = config.ckpt_pattern
69
+ matches = glob.glob(os.path.join(
70
+ checkpoint_dir, f"*{pattern}*{ckpt_type}*"))
71
+ if not (len(matches) > 0):
72
+ raise ValueError(f"No matches found for the pattern {pattern}")
73
+
74
+ checkpoint = matches[0]
75
+
76
+ else:
77
+ return model
78
+ model = load_wts(model, checkpoint)
79
+ print("Loaded weights from {0}".format(checkpoint))
80
+ return model
81
+
82
+
83
+ def main_worker(gpu, ngpus_per_node, config):
84
+ try:
85
+ seed = config.seed if 'seed' in config and config.seed else 43
86
+ fix_random_seed(seed)
87
+
88
+ config.gpu = gpu
89
+
90
+ model = build_model(config)
91
+ model = load_ckpt(config, model)
92
+ model = parallelize(config, model)
93
+
94
+ total_params = f"{round(count_parameters(model)/1e6,2)}M"
95
+ config.total_params = total_params
96
+ print(f"Total parameters : {total_params}")
97
+
98
+ train_loader = DepthDataLoader(config, "train").data
99
+ test_loader = DepthDataLoader(config, "online_eval").data
100
+
101
+ trainer = get_trainer(config)(
102
+ config, model, train_loader, test_loader, device=config.gpu)
103
+
104
+ trainer.train()
105
+ finally:
106
+ import wandb
107
+ wandb.finish()
108
+
109
+
110
+ if __name__ == '__main__':
111
+ mp.set_start_method('forkserver')
112
+
113
+ parser = argparse.ArgumentParser()
114
+ parser.add_argument("-m", "--model", type=str, default="synunet")
115
+ parser.add_argument("-d", "--dataset", type=str, default='nyu')
116
+ parser.add_argument("--trainer", type=str, default=None)
117
+
118
+ args, unknown_args = parser.parse_known_args()
119
+ overwrite_kwargs = parse_unknown(unknown_args)
120
+
121
+ overwrite_kwargs["model"] = args.model
122
+ if args.trainer is not None:
123
+ overwrite_kwargs["trainer"] = args.trainer
124
+
125
+ config = get_config(args.model, "train", args.dataset, **overwrite_kwargs)
126
+ # git_commit()
127
+ if config.use_shared_dict:
128
+ shared_dict = mp.Manager().dict()
129
+ else:
130
+ shared_dict = None
131
+ config.shared_dict = shared_dict
132
+
133
+ config.batch_size = config.bs
134
+ config.mode = 'train'
135
+ if config.root != "." and not os.path.isdir(config.root):
136
+ os.makedirs(config.root)
137
+
138
+ try:
139
+ node_str = os.environ['SLURM_JOB_NODELIST'].replace(
140
+ '[', '').replace(']', '')
141
+ nodes = node_str.split(',')
142
+
143
+ config.world_size = len(nodes)
144
+ config.rank = int(os.environ['SLURM_PROCID'])
145
+ # config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints"
146
+
147
+ except KeyError as e:
148
+ # We are NOT using SLURM
149
+ config.world_size = 1
150
+ config.rank = 0
151
+ nodes = ["127.0.0.1"]
152
+
153
+ if config.distributed:
154
+
155
+ print(config.rank)
156
+ port = np.random.randint(15000, 15025)
157
+ config.dist_url = 'tcp://{}:{}'.format(nodes[0], port)
158
+ print(config.dist_url)
159
+ config.dist_backend = 'nccl'
160
+ config.gpu = None
161
+
162
+ ngpus_per_node = torch.cuda.device_count()
163
+ config.num_workers = config.workers
164
+ config.ngpus_per_node = ngpus_per_node
165
+ print("Config:")
166
+ pprint(config)
167
+ if config.distributed:
168
+ config.world_size = ngpus_per_node * config.world_size
169
+ mp.spawn(main_worker, nprocs=ngpus_per_node,
170
+ args=(ngpus_per_node, config))
171
+ else:
172
+ if ngpus_per_node == 1:
173
+ config.gpu = 0
174
+ main_worker(config.gpu, ngpus_per_node, config)
ZoeDepth/train_test_inputs/kitti_eigen_test_files_with_gt.txt ADDED
The diff for this file is too large to render. See raw diff
 
ZoeDepth/train_test_inputs/kitti_eigen_train_files_with_gt.txt ADDED
The diff for this file is too large to render. See raw diff
 
ZoeDepth/train_test_inputs/nyudepthv2_test_files_with_gt.txt ADDED
@@ -0,0 +1,654 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bathroom/rgb_00045.jpg bathroom/sync_depth_00045.png 518.8579
2
+ bathroom/rgb_00046.jpg bathroom/sync_depth_00046.png 518.8579
3
+ bathroom/rgb_00507.jpg bathroom/sync_depth_00507.png 518.8579
4
+ bathroom/rgb_00508.jpg bathroom/sync_depth_00508.png 518.8579
5
+ bathroom/rgb_00509.jpg bathroom/sync_depth_00509.png 518.8579
6
+ bathroom/rgb_00510.jpg bathroom/sync_depth_00510.png 518.8579
7
+ bathroom/rgb_00511.jpg bathroom/sync_depth_00511.png 518.8579
8
+ bathroom/rgb_00512.jpg bathroom/sync_depth_00512.png 518.8579
9
+ bathroom/rgb_00649.jpg bathroom/sync_depth_00649.png 518.8579
10
+ bathroom/rgb_00650.jpg bathroom/sync_depth_00650.png 518.8579
11
+ bathroom/rgb_00655.jpg bathroom/sync_depth_00655.png 518.8579
12
+ bathroom/rgb_00656.jpg bathroom/sync_depth_00656.png 518.8579
13
+ bathroom/rgb_00657.jpg bathroom/sync_depth_00657.png 518.8579
14
+ bathroom/rgb_00662.jpg bathroom/sync_depth_00662.png 518.8579
15
+ bathroom/rgb_00663.jpg bathroom/sync_depth_00663.png 518.8579
16
+ bathroom/rgb_00667.jpg bathroom/sync_depth_00667.png 518.8579
17
+ bathroom/rgb_00668.jpg bathroom/sync_depth_00668.png 518.8579
18
+ bathroom/rgb_00670.jpg bathroom/sync_depth_00670.png 518.8579
19
+ bathroom/rgb_00671.jpg bathroom/sync_depth_00671.png 518.8579
20
+ bathroom/rgb_00672.jpg bathroom/sync_depth_00672.png 518.8579
21
+ bathroom/rgb_00675.jpg bathroom/sync_depth_00675.png 518.8579
22
+ bathroom/rgb_00676.jpg bathroom/sync_depth_00676.png 518.8579
23
+ bathroom/rgb_00677.jpg bathroom/sync_depth_00677.png 518.8579
24
+ bathroom/rgb_00678.jpg bathroom/sync_depth_00678.png 518.8579
25
+ bathroom/rgb_00679.jpg bathroom/sync_depth_00679.png 518.8579
26
+ bathroom/rgb_00680.jpg bathroom/sync_depth_00680.png 518.8579
27
+ bathroom/rgb_00685.jpg bathroom/sync_depth_00685.png 518.8579
28
+ bathroom/rgb_00686.jpg bathroom/sync_depth_00686.png 518.8579
29
+ bathroom/rgb_00687.jpg bathroom/sync_depth_00687.png 518.8579
30
+ bathroom/rgb_00688.jpg bathroom/sync_depth_00688.png 518.8579
31
+ bathroom/rgb_00689.jpg bathroom/sync_depth_00689.png 518.8579
32
+ bathroom/rgb_00692.jpg bathroom/sync_depth_00692.png 518.8579
33
+ bathroom/rgb_00693.jpg bathroom/sync_depth_00693.png 518.8579
34
+ bathroom/rgb_00696.jpg bathroom/sync_depth_00696.png 518.8579
35
+ bathroom/rgb_00669.jpg bathroom/sync_depth_00669.png 518.8579
36
+ bathroom/rgb_00697.jpg bathroom/sync_depth_00697.png 518.8579
37
+ bathroom/rgb_00698.jpg bathroom/sync_depth_00698.png 518.8579
38
+ bathroom/rgb_00705.jpg bathroom/sync_depth_00705.png 518.8579
39
+ bathroom/rgb_00706.jpg bathroom/sync_depth_00706.png 518.8579
40
+ bathroom/rgb_00707.jpg bathroom/sync_depth_00707.png 518.8579
41
+ bathroom/rgb_00708.jpg bathroom/sync_depth_00708.png 518.8579
42
+ bathroom/rgb_00709.jpg bathroom/sync_depth_00709.png 518.8579
43
+ bathroom/rgb_00710.jpg bathroom/sync_depth_00710.png 518.8579
44
+ bathroom/rgb_00711.jpg bathroom/sync_depth_00711.png 518.8579
45
+ bathroom/rgb_00712.jpg bathroom/sync_depth_00712.png 518.8579
46
+ bathroom/rgb_00716.jpg bathroom/sync_depth_00716.png 518.8579
47
+ bathroom/rgb_00717.jpg bathroom/sync_depth_00717.png 518.8579
48
+ bathroom/rgb_00723.jpg bathroom/sync_depth_00723.png 518.8579
49
+ bathroom/rgb_00724.jpg bathroom/sync_depth_00724.png 518.8579
50
+ bathroom/rgb_00725.jpg bathroom/sync_depth_00725.png 518.8579
51
+ bathroom/rgb_00726.jpg bathroom/sync_depth_00726.png 518.8579
52
+ bathroom/rgb_00727.jpg bathroom/sync_depth_00727.png 518.8579
53
+ bathroom/rgb_00730.jpg bathroom/sync_depth_00730.png 518.8579
54
+ bathroom/rgb_00731.jpg bathroom/sync_depth_00731.png 518.8579
55
+ bathroom/rgb_00732.jpg bathroom/sync_depth_00732.png 518.8579
56
+ bathroom/rgb_00733.jpg bathroom/sync_depth_00733.png 518.8579
57
+ bathroom/rgb_00742.jpg bathroom/sync_depth_00742.png 518.8579
58
+ bathroom/rgb_00743.jpg bathroom/sync_depth_00743.png 518.8579
59
+ bedroom/rgb_00055.jpg bedroom/sync_depth_00055.png 518.8579
60
+ bedroom/rgb_00056.jpg bedroom/sync_depth_00056.png 518.8579
61
+ bedroom/rgb_00058.jpg bedroom/sync_depth_00058.png 518.8579
62
+ bedroom/rgb_00059.jpg bedroom/sync_depth_00059.png 518.8579
63
+ bedroom/rgb_00060.jpg bedroom/sync_depth_00060.png 518.8579
64
+ bedroom/rgb_00061.jpg bedroom/sync_depth_00061.png 518.8579
65
+ bedroom/rgb_00062.jpg bedroom/sync_depth_00062.png 518.8579
66
+ bedroom/rgb_00075.jpg bedroom/sync_depth_00075.png 518.8579
67
+ bedroom/rgb_00076.jpg bedroom/sync_depth_00076.png 518.8579
68
+ bedroom/rgb_00077.jpg bedroom/sync_depth_00077.png 518.8579
69
+ bedroom/rgb_00078.jpg bedroom/sync_depth_00078.png 518.8579
70
+ bedroom/rgb_00170.jpg bedroom/sync_depth_00170.png 518.8579
71
+ bedroom/rgb_00171.jpg bedroom/sync_depth_00171.png 518.8579
72
+ bedroom/rgb_00172.jpg bedroom/sync_depth_00172.png 518.8579
73
+ bedroom/rgb_00173.jpg bedroom/sync_depth_00173.png 518.8579
74
+ bedroom/rgb_00174.jpg bedroom/sync_depth_00174.png 518.8579
75
+ bedroom/rgb_00175.jpg bedroom/sync_depth_00175.png 518.8579
76
+ bedroom/rgb_00180.jpg bedroom/sync_depth_00180.png 518.8579
77
+ bedroom/rgb_00181.jpg bedroom/sync_depth_00181.png 518.8579
78
+ bedroom/rgb_00182.jpg bedroom/sync_depth_00182.png 518.8579
79
+ bedroom/rgb_00183.jpg bedroom/sync_depth_00183.png 518.8579
80
+ bedroom/rgb_00184.jpg bedroom/sync_depth_00184.png 518.8579
81
+ bedroom/rgb_00185.jpg bedroom/sync_depth_00185.png 518.8579
82
+ bedroom/rgb_00186.jpg bedroom/sync_depth_00186.png 518.8579
83
+ bedroom/rgb_00187.jpg bedroom/sync_depth_00187.png 518.8579
84
+ bedroom/rgb_00188.jpg bedroom/sync_depth_00188.png 518.8579
85
+ bedroom/rgb_00189.jpg bedroom/sync_depth_00189.png 518.8579
86
+ bedroom/rgb_00190.jpg bedroom/sync_depth_00190.png 518.8579
87
+ bedroom/rgb_00191.jpg bedroom/sync_depth_00191.png 518.8579
88
+ bedroom/rgb_00192.jpg bedroom/sync_depth_00192.png 518.8579
89
+ bedroom/rgb_00219.jpg bedroom/sync_depth_00219.png 518.8579
90
+ bedroom/rgb_00220.jpg bedroom/sync_depth_00220.png 518.8579
91
+ bedroom/rgb_00221.jpg bedroom/sync_depth_00221.png 518.8579
92
+ bedroom/rgb_00279.jpg bedroom/sync_depth_00279.png 518.8579
93
+ bedroom/rgb_00179.jpg bedroom/sync_depth_00179.png 518.8579
94
+ bedroom/rgb_00280.jpg bedroom/sync_depth_00280.png 518.8579
95
+ bedroom/rgb_00536.jpg bedroom/sync_depth_00536.png 518.8579
96
+ bedroom/rgb_00960.jpg bedroom/sync_depth_00960.png 518.8579
97
+ bedroom/rgb_01000.jpg bedroom/sync_depth_01000.png 518.8579
98
+ bedroom/rgb_01052.jpg bedroom/sync_depth_01052.png 518.8579
99
+ bedroom/rgb_01092.jpg bedroom/sync_depth_01092.png 518.8579
100
+ bedroom/rgb_01122.jpg bedroom/sync_depth_01122.png 518.8579
101
+ bedroom/rgb_01150.jpg bedroom/sync_depth_01150.png 518.8579
102
+ bedroom/rgb_00281.jpg bedroom/sync_depth_00281.png 518.8579
103
+ bedroom/rgb_00282.jpg bedroom/sync_depth_00282.png 518.8579
104
+ bedroom/rgb_00514.jpg bedroom/sync_depth_00514.png 518.8579
105
+ bedroom/rgb_00515.jpg bedroom/sync_depth_00515.png 518.8579
106
+ bedroom/rgb_00516.jpg bedroom/sync_depth_00516.png 518.8579
107
+ bedroom/rgb_00517.jpg bedroom/sync_depth_00517.png 518.8579
108
+ bedroom/rgb_00518.jpg bedroom/sync_depth_00518.png 518.8579
109
+ bedroom/rgb_00519.jpg bedroom/sync_depth_00519.png 518.8579
110
+ bedroom/rgb_00520.jpg bedroom/sync_depth_00520.png 518.8579
111
+ bedroom/rgb_00521.jpg bedroom/sync_depth_00521.png 518.8579
112
+ bedroom/rgb_00522.jpg bedroom/sync_depth_00522.png 518.8579
113
+ bedroom/rgb_00523.jpg bedroom/sync_depth_00523.png 518.8579
114
+ bedroom/rgb_00524.jpg bedroom/sync_depth_00524.png 518.8579
115
+ bedroom/rgb_00525.jpg bedroom/sync_depth_00525.png 518.8579
116
+ bedroom/rgb_00530.jpg bedroom/sync_depth_00530.png 518.8579
117
+ bedroom/rgb_00531.jpg bedroom/sync_depth_00531.png 518.8579
118
+ bedroom/rgb_00532.jpg bedroom/sync_depth_00532.png 518.8579
119
+ bedroom/rgb_00537.jpg bedroom/sync_depth_00537.png 518.8579
120
+ bedroom/rgb_00538.jpg bedroom/sync_depth_00538.png 518.8579
121
+ bedroom/rgb_00916.jpg bedroom/sync_depth_00916.png 518.8579
122
+ bedroom/rgb_00917.jpg bedroom/sync_depth_00917.png 518.8579
123
+ bedroom/rgb_00918.jpg bedroom/sync_depth_00918.png 518.8579
124
+ bedroom/rgb_00925.jpg bedroom/sync_depth_00925.png 518.8579
125
+ bedroom/rgb_00926.jpg bedroom/sync_depth_00926.png 518.8579
126
+ bedroom/rgb_00927.jpg bedroom/sync_depth_00927.png 518.8579
127
+ bedroom/rgb_00931.jpg bedroom/sync_depth_00931.png 518.8579
128
+ bedroom/rgb_00932.jpg bedroom/sync_depth_00932.png 518.8579
129
+ bedroom/rgb_00933.jpg bedroom/sync_depth_00933.png 518.8579
130
+ bedroom/rgb_00934.jpg bedroom/sync_depth_00934.png 518.8579
131
+ bedroom/rgb_00944.jpg bedroom/sync_depth_00944.png 518.8579
132
+ bedroom/rgb_00945.jpg bedroom/sync_depth_00945.png 518.8579
133
+ bedroom/rgb_00946.jpg bedroom/sync_depth_00946.png 518.8579
134
+ bedroom/rgb_00958.jpg bedroom/sync_depth_00958.png 518.8579
135
+ bedroom/rgb_00959.jpg bedroom/sync_depth_00959.png 518.8579
136
+ bedroom/rgb_00961.jpg bedroom/sync_depth_00961.png 518.8579
137
+ bedroom/rgb_00964.jpg bedroom/sync_depth_00964.png 518.8579
138
+ bedroom/rgb_00965.jpg bedroom/sync_depth_00965.png 518.8579
139
+ bedroom/rgb_00966.jpg bedroom/sync_depth_00966.png 518.8579
140
+ bedroom/rgb_00969.jpg bedroom/sync_depth_00969.png 518.8579
141
+ bedroom/rgb_00970.jpg bedroom/sync_depth_00970.png 518.8579
142
+ bedroom/rgb_00971.jpg bedroom/sync_depth_00971.png 518.8579
143
+ bedroom/rgb_00972.jpg bedroom/sync_depth_00972.png 518.8579
144
+ bedroom/rgb_00973.jpg bedroom/sync_depth_00973.png 518.8579
145
+ bedroom/rgb_00974.jpg bedroom/sync_depth_00974.png 518.8579
146
+ bedroom/rgb_00975.jpg bedroom/sync_depth_00975.png 518.8579
147
+ bedroom/rgb_00976.jpg bedroom/sync_depth_00976.png 518.8579
148
+ bedroom/rgb_00990.jpg bedroom/sync_depth_00990.png 518.8579
149
+ bedroom/rgb_00991.jpg bedroom/sync_depth_00991.png 518.8579
150
+ bedroom/rgb_00992.jpg bedroom/sync_depth_00992.png 518.8579
151
+ bedroom/rgb_00993.jpg bedroom/sync_depth_00993.png 518.8579
152
+ bedroom/rgb_00994.jpg bedroom/sync_depth_00994.png 518.8579
153
+ bedroom/rgb_01001.jpg bedroom/sync_depth_01001.png 518.8579
154
+ bedroom/rgb_01002.jpg bedroom/sync_depth_01002.png 518.8579
155
+ bedroom/rgb_01003.jpg bedroom/sync_depth_01003.png 518.8579
156
+ bedroom/rgb_01009.jpg bedroom/sync_depth_01009.png 518.8579
157
+ bedroom/rgb_01010.jpg bedroom/sync_depth_01010.png 518.8579
158
+ bedroom/rgb_01011.jpg bedroom/sync_depth_01011.png 518.8579
159
+ bedroom/rgb_01020.jpg bedroom/sync_depth_01020.png 518.8579
160
+ bedroom/rgb_01021.jpg bedroom/sync_depth_01021.png 518.8579
161
+ bedroom/rgb_01022.jpg bedroom/sync_depth_01022.png 518.8579
162
+ bedroom/rgb_01031.jpg bedroom/sync_depth_01031.png 518.8579
163
+ bedroom/rgb_01032.jpg bedroom/sync_depth_01032.png 518.8579
164
+ bedroom/rgb_01033.jpg bedroom/sync_depth_01033.png 518.8579
165
+ bedroom/rgb_01037.jpg bedroom/sync_depth_01037.png 518.8579
166
+ bedroom/rgb_01038.jpg bedroom/sync_depth_01038.png 518.8579
167
+ bedroom/rgb_01047.jpg bedroom/sync_depth_01047.png 518.8579
168
+ bedroom/rgb_01048.jpg bedroom/sync_depth_01048.png 518.8579
169
+ bedroom/rgb_01051.jpg bedroom/sync_depth_01051.png 518.8579
170
+ bedroom/rgb_01056.jpg bedroom/sync_depth_01056.png 518.8579
171
+ bedroom/rgb_01057.jpg bedroom/sync_depth_01057.png 518.8579
172
+ bedroom/rgb_01074.jpg bedroom/sync_depth_01074.png 518.8579
173
+ bedroom/rgb_01075.jpg bedroom/sync_depth_01075.png 518.8579
174
+ bedroom/rgb_01076.jpg bedroom/sync_depth_01076.png 518.8579
175
+ bedroom/rgb_01077.jpg bedroom/sync_depth_01077.png 518.8579
176
+ bedroom/rgb_01078.jpg bedroom/sync_depth_01078.png 518.8579
177
+ bedroom/rgb_01079.jpg bedroom/sync_depth_01079.png 518.8579
178
+ bedroom/rgb_01080.jpg bedroom/sync_depth_01080.png 518.8579
179
+ bedroom/rgb_01081.jpg bedroom/sync_depth_01081.png 518.8579
180
+ bedroom/rgb_01082.jpg bedroom/sync_depth_01082.png 518.8579
181
+ bedroom/rgb_01083.jpg bedroom/sync_depth_01083.png 518.8579
182
+ bedroom/rgb_01087.jpg bedroom/sync_depth_01087.png 518.8579
183
+ bedroom/rgb_01088.jpg bedroom/sync_depth_01088.png 518.8579
184
+ bedroom/rgb_01089.jpg bedroom/sync_depth_01089.png 518.8579
185
+ bedroom/rgb_01090.jpg bedroom/sync_depth_01090.png 518.8579
186
+ bedroom/rgb_01091.jpg bedroom/sync_depth_01091.png 518.8579
187
+ bedroom/rgb_01093.jpg bedroom/sync_depth_01093.png 518.8579
188
+ bedroom/rgb_01094.jpg bedroom/sync_depth_01094.png 518.8579
189
+ bedroom/rgb_01095.jpg bedroom/sync_depth_01095.png 518.8579
190
+ bedroom/rgb_01097.jpg bedroom/sync_depth_01097.png 518.8579
191
+ bedroom/rgb_01098.jpg bedroom/sync_depth_01098.png 518.8579
192
+ bedroom/rgb_01099.jpg bedroom/sync_depth_01099.png 518.8579
193
+ bedroom/rgb_01100.jpg bedroom/sync_depth_01100.png 518.8579
194
+ bedroom/rgb_01101.jpg bedroom/sync_depth_01101.png 518.8579
195
+ bedroom/rgb_01102.jpg bedroom/sync_depth_01102.png 518.8579
196
+ bedroom/rgb_01103.jpg bedroom/sync_depth_01103.png 518.8579
197
+ bedroom/rgb_01105.jpg bedroom/sync_depth_01105.png 518.8579
198
+ bedroom/rgb_01106.jpg bedroom/sync_depth_01106.png 518.8579
199
+ bedroom/rgb_01107.jpg bedroom/sync_depth_01107.png 518.8579
200
+ bedroom/rgb_01108.jpg bedroom/sync_depth_01108.png 518.8579
201
+ bedroom/rgb_01116.jpg bedroom/sync_depth_01116.png 518.8579
202
+ bedroom/rgb_01117.jpg bedroom/sync_depth_01117.png 518.8579
203
+ bedroom/rgb_01118.jpg bedroom/sync_depth_01118.png 518.8579
204
+ bedroom/rgb_01123.jpg bedroom/sync_depth_01123.png 518.8579
205
+ bedroom/rgb_01124.jpg bedroom/sync_depth_01124.png 518.8579
206
+ bedroom/rgb_01125.jpg bedroom/sync_depth_01125.png 518.8579
207
+ bedroom/rgb_01126.jpg bedroom/sync_depth_01126.png 518.8579
208
+ bedroom/rgb_01127.jpg bedroom/sync_depth_01127.png 518.8579
209
+ bedroom/rgb_01128.jpg bedroom/sync_depth_01128.png 518.8579
210
+ bedroom/rgb_01129.jpg bedroom/sync_depth_01129.png 518.8579
211
+ bedroom/rgb_01130.jpg bedroom/sync_depth_01130.png 518.8579
212
+ bedroom/rgb_01134.jpg bedroom/sync_depth_01134.png 518.8579
213
+ bedroom/rgb_01135.jpg bedroom/sync_depth_01135.png 518.8579
214
+ bedroom/rgb_01143.jpg bedroom/sync_depth_01143.png 518.8579
215
+ bedroom/rgb_01144.jpg bedroom/sync_depth_01144.png 518.8579
216
+ bedroom/rgb_01145.jpg bedroom/sync_depth_01145.png 518.8579
217
+ bedroom/rgb_01146.jpg bedroom/sync_depth_01146.png 518.8579
218
+ bedroom/rgb_01147.jpg bedroom/sync_depth_01147.png 518.8579
219
+ bedroom/rgb_01148.jpg bedroom/sync_depth_01148.png 518.8579
220
+ bedroom/rgb_01149.jpg bedroom/sync_depth_01149.png 518.8579
221
+ bedroom/rgb_01151.jpg bedroom/sync_depth_01151.png 518.8579
222
+ bedroom/rgb_01152.jpg bedroom/sync_depth_01152.png 518.8579
223
+ bedroom/rgb_01153.jpg bedroom/sync_depth_01153.png 518.8579
224
+ bedroom/rgb_01154.jpg bedroom/sync_depth_01154.png 518.8579
225
+ bedroom/rgb_01155.jpg bedroom/sync_depth_01155.png 518.8579
226
+ bedroom/rgb_01156.jpg bedroom/sync_depth_01156.png 518.8579
227
+ bedroom/rgb_01157.jpg bedroom/sync_depth_01157.png 518.8579
228
+ bedroom/rgb_01161.jpg bedroom/sync_depth_01161.png 518.8579
229
+ bedroom/rgb_01162.jpg bedroom/sync_depth_01162.png 518.8579
230
+ bedroom/rgb_01163.jpg bedroom/sync_depth_01163.png 518.8579
231
+ bedroom/rgb_01164.jpg bedroom/sync_depth_01164.png 518.8579
232
+ bedroom/rgb_01165.jpg bedroom/sync_depth_01165.png 518.8579
233
+ bedroom/rgb_01166.jpg bedroom/sync_depth_01166.png 518.8579
234
+ bedroom/rgb_01169.jpg bedroom/sync_depth_01169.png 518.8579
235
+ bedroom/rgb_01170.jpg bedroom/sync_depth_01170.png 518.8579
236
+ bedroom/rgb_01173.jpg bedroom/sync_depth_01173.png 518.8579
237
+ bedroom/rgb_01174.jpg bedroom/sync_depth_01174.png 518.8579
238
+ bedroom/rgb_01175.jpg bedroom/sync_depth_01175.png 518.8579
239
+ bedroom/rgb_01178.jpg bedroom/sync_depth_01178.png 518.8579
240
+ bedroom/rgb_01179.jpg bedroom/sync_depth_01179.png 518.8579
241
+ bedroom/rgb_01180.jpg bedroom/sync_depth_01180.png 518.8579
242
+ bedroom/rgb_01181.jpg bedroom/sync_depth_01181.png 518.8579
243
+ bedroom/rgb_01182.jpg bedroom/sync_depth_01182.png 518.8579
244
+ bedroom/rgb_01183.jpg bedroom/sync_depth_01183.png 518.8579
245
+ bedroom/rgb_01191.jpg bedroom/sync_depth_01191.png 518.8579
246
+ bedroom/rgb_01192.jpg bedroom/sync_depth_01192.png 518.8579
247
+ bedroom/rgb_01193.jpg bedroom/sync_depth_01193.png 518.8579
248
+ bedroom/rgb_01194.jpg bedroom/sync_depth_01194.png 518.8579
249
+ bedroom/rgb_01195.jpg bedroom/sync_depth_01195.png 518.8579
250
+ bookstore/rgb_00083.jpg bookstore/sync_depth_00083.png 518.8579
251
+ bookstore/rgb_00084.jpg bookstore/sync_depth_00084.png 518.8579
252
+ bookstore/rgb_00085.jpg bookstore/sync_depth_00085.png 518.8579
253
+ bookstore/rgb_00086.jpg bookstore/sync_depth_00086.png 518.8579
254
+ bookstore/rgb_00087.jpg bookstore/sync_depth_00087.png 518.8579
255
+ bookstore/rgb_00088.jpg bookstore/sync_depth_00088.png 518.8579
256
+ bookstore/rgb_00089.jpg bookstore/sync_depth_00089.png 518.8579
257
+ bookstore/rgb_00090.jpg bookstore/sync_depth_00090.png 518.8579
258
+ bookstore/rgb_00116.jpg bookstore/sync_depth_00116.png 518.8579
259
+ bookstore/rgb_00117.jpg bookstore/sync_depth_00117.png 518.8579
260
+ bookstore/rgb_00118.jpg bookstore/sync_depth_00118.png 518.8579
261
+ classroom/rgb_00283.jpg classroom/sync_depth_00283.png 518.8579
262
+ classroom/rgb_00284.jpg classroom/sync_depth_00284.png 518.8579
263
+ classroom/rgb_00295.jpg classroom/sync_depth_00295.png 518.8579
264
+ classroom/rgb_00296.jpg classroom/sync_depth_00296.png 518.8579
265
+ classroom/rgb_00297.jpg classroom/sync_depth_00297.png 518.8579
266
+ classroom/rgb_00298.jpg classroom/sync_depth_00298.png 518.8579
267
+ classroom/rgb_00299.jpg classroom/sync_depth_00299.png 518.8579
268
+ classroom/rgb_00300.jpg classroom/sync_depth_00300.png 518.8579
269
+ classroom/rgb_00301.jpg classroom/sync_depth_00301.png 518.8579
270
+ classroom/rgb_00309.jpg classroom/sync_depth_00309.png 518.8579
271
+ classroom/rgb_00310.jpg classroom/sync_depth_00310.png 518.8579
272
+ classroom/rgb_00311.jpg classroom/sync_depth_00311.png 518.8579
273
+ classroom/rgb_00314.jpg classroom/sync_depth_00314.png 518.8579
274
+ classroom/rgb_00315.jpg classroom/sync_depth_00315.png 518.8579
275
+ classroom/rgb_00316.jpg classroom/sync_depth_00316.png 518.8579
276
+ classroom/rgb_00324.jpg classroom/sync_depth_00324.png 518.8579
277
+ classroom/rgb_00325.jpg classroom/sync_depth_00325.png 518.8579
278
+ classroom/rgb_00326.jpg classroom/sync_depth_00326.png 518.8579
279
+ classroom/rgb_00327.jpg classroom/sync_depth_00327.png 518.8579
280
+ classroom/rgb_00328.jpg classroom/sync_depth_00328.png 518.8579
281
+ classroom/rgb_00329.jpg classroom/sync_depth_00329.png 518.8579
282
+ classroom/rgb_00330.jpg classroom/sync_depth_00330.png 518.8579
283
+ classroom/rgb_00331.jpg classroom/sync_depth_00331.png 518.8579
284
+ computer_lab/rgb_00332.jpg computer_lab/sync_depth_00332.png 518.8579
285
+ computer_lab/rgb_00333.jpg computer_lab/sync_depth_00333.png 518.8579
286
+ computer_lab/rgb_00334.jpg computer_lab/sync_depth_00334.png 518.8579
287
+ dining_room/rgb_00548.jpg dining_room/sync_depth_00548.png 518.8579
288
+ dining_room/rgb_00549.jpg dining_room/sync_depth_00549.png 518.8579
289
+ dining_room/rgb_00550.jpg dining_room/sync_depth_00550.png 518.8579
290
+ dining_room/rgb_01346.jpg dining_room/sync_depth_01346.png 518.8579
291
+ dining_room/rgb_01347.jpg dining_room/sync_depth_01347.png 518.8579
292
+ dining_room/rgb_01348.jpg dining_room/sync_depth_01348.png 518.8579
293
+ dining_room/rgb_01352.jpg dining_room/sync_depth_01352.png 518.8579
294
+ dining_room/rgb_01353.jpg dining_room/sync_depth_01353.png 518.8579
295
+ dining_room/rgb_01354.jpg dining_room/sync_depth_01354.png 518.8579
296
+ dining_room/rgb_01355.jpg dining_room/sync_depth_01355.png 518.8579
297
+ dining_room/rgb_01363.jpg dining_room/sync_depth_01363.png 518.8579
298
+ dining_room/rgb_01364.jpg dining_room/sync_depth_01364.png 518.8579
299
+ dining_room/rgb_01367.jpg dining_room/sync_depth_01367.png 518.8579
300
+ dining_room/rgb_01368.jpg dining_room/sync_depth_01368.png 518.8579
301
+ dining_room/rgb_01383.jpg dining_room/sync_depth_01383.png 518.8579
302
+ dining_room/rgb_01384.jpg dining_room/sync_depth_01384.png 518.8579
303
+ dining_room/rgb_01385.jpg dining_room/sync_depth_01385.png 518.8579
304
+ dining_room/rgb_01387.jpg dining_room/sync_depth_01387.png 518.8579
305
+ dining_room/rgb_01388.jpg dining_room/sync_depth_01388.png 518.8579
306
+ dining_room/rgb_01389.jpg dining_room/sync_depth_01389.png 518.8579
307
+ dining_room/rgb_01390.jpg dining_room/sync_depth_01390.png 518.8579
308
+ dining_room/rgb_01393.jpg dining_room/sync_depth_01393.png 518.8579
309
+ dining_room/rgb_01394.jpg dining_room/sync_depth_01394.png 518.8579
310
+ dining_room/rgb_01395.jpg dining_room/sync_depth_01395.png 518.8579
311
+ dining_room/rgb_01396.jpg dining_room/sync_depth_01396.png 518.8579
312
+ dining_room/rgb_01397.jpg dining_room/sync_depth_01397.png 518.8579
313
+ dining_room/rgb_01398.jpg dining_room/sync_depth_01398.png 518.8579
314
+ dining_room/rgb_01399.jpg dining_room/sync_depth_01399.png 518.8579
315
+ dining_room/rgb_01400.jpg dining_room/sync_depth_01400.png 518.8579
316
+ dining_room/rgb_01406.jpg dining_room/sync_depth_01406.png 518.8579
317
+ dining_room/rgb_01407.jpg dining_room/sync_depth_01407.png 518.8579
318
+ dining_room/rgb_01408.jpg dining_room/sync_depth_01408.png 518.8579
319
+ dining_room/rgb_01409.jpg dining_room/sync_depth_01409.png 518.8579
320
+ dining_room/rgb_01410.jpg dining_room/sync_depth_01410.png 518.8579
321
+ dining_room/rgb_01386.jpg dining_room/sync_depth_01386.png 518.8579
322
+ dining_room/rgb_01411.jpg dining_room/sync_depth_01411.png 518.8579
323
+ dining_room/rgb_01412.jpg dining_room/sync_depth_01412.png 518.8579
324
+ dining_room/rgb_01413.jpg dining_room/sync_depth_01413.png 518.8579
325
+ dining_room/rgb_01420.jpg dining_room/sync_depth_01420.png 518.8579
326
+ dining_room/rgb_01421.jpg dining_room/sync_depth_01421.png 518.8579
327
+ dining_room/rgb_01422.jpg dining_room/sync_depth_01422.png 518.8579
328
+ dining_room/rgb_01423.jpg dining_room/sync_depth_01423.png 518.8579
329
+ dining_room/rgb_01429.jpg dining_room/sync_depth_01429.png 518.8579
330
+ dining_room/rgb_01430.jpg dining_room/sync_depth_01430.png 518.8579
331
+ dining_room/rgb_01431.jpg dining_room/sync_depth_01431.png 518.8579
332
+ dining_room/rgb_01432.jpg dining_room/sync_depth_01432.png 518.8579
333
+ dining_room/rgb_01440.jpg dining_room/sync_depth_01440.png 518.8579
334
+ dining_room/rgb_01441.jpg dining_room/sync_depth_01441.png 518.8579
335
+ dining_room/rgb_01442.jpg dining_room/sync_depth_01442.png 518.8579
336
+ dining_room/rgb_01443.jpg dining_room/sync_depth_01443.png 518.8579
337
+ dining_room/rgb_01444.jpg dining_room/sync_depth_01444.png 518.8579
338
+ dining_room/rgb_01445.jpg dining_room/sync_depth_01445.png 518.8579
339
+ dining_room/rgb_01446.jpg dining_room/sync_depth_01446.png 518.8579
340
+ dining_room/rgb_01447.jpg dining_room/sync_depth_01447.png 518.8579
341
+ dining_room/rgb_01448.jpg dining_room/sync_depth_01448.png 518.8579
342
+ foyer/rgb_00350.jpg foyer/sync_depth_00350.png 518.8579
343
+ foyer/rgb_00351.jpg foyer/sync_depth_00351.png 518.8579
344
+ home_office/rgb_00354.jpg home_office/sync_depth_00354.png 518.8579
345
+ home_office/rgb_00355.jpg home_office/sync_depth_00355.png 518.8579
346
+ home_office/rgb_00356.jpg home_office/sync_depth_00356.png 518.8579
347
+ home_office/rgb_00357.jpg home_office/sync_depth_00357.png 518.8579
348
+ home_office/rgb_00358.jpg home_office/sync_depth_00358.png 518.8579
349
+ home_office/rgb_00359.jpg home_office/sync_depth_00359.png 518.8579
350
+ home_office/rgb_00360.jpg home_office/sync_depth_00360.png 518.8579
351
+ home_office/rgb_00361.jpg home_office/sync_depth_00361.png 518.8579
352
+ home_office/rgb_00362.jpg home_office/sync_depth_00362.png 518.8579
353
+ home_office/rgb_00363.jpg home_office/sync_depth_00363.png 518.8579
354
+ home_office/rgb_00383.jpg home_office/sync_depth_00383.png 518.8579
355
+ home_office/rgb_00384.jpg home_office/sync_depth_00384.png 518.8579
356
+ home_office/rgb_00385.jpg home_office/sync_depth_00385.png 518.8579
357
+ home_office/rgb_00386.jpg home_office/sync_depth_00386.png 518.8579
358
+ home_office/rgb_00387.jpg home_office/sync_depth_00387.png 518.8579
359
+ home_office/rgb_00388.jpg home_office/sync_depth_00388.png 518.8579
360
+ home_office/rgb_00389.jpg home_office/sync_depth_00389.png 518.8579
361
+ home_office/rgb_00394.jpg home_office/sync_depth_00394.png 518.8579
362
+ home_office/rgb_00395.jpg home_office/sync_depth_00395.png 518.8579
363
+ home_office/rgb_00396.jpg home_office/sync_depth_00396.png 518.8579
364
+ home_office/rgb_00554.jpg home_office/sync_depth_00554.png 518.8579
365
+ home_office/rgb_00555.jpg home_office/sync_depth_00555.png 518.8579
366
+ home_office/rgb_00556.jpg home_office/sync_depth_00556.png 518.8579
367
+ home_office/rgb_00557.jpg home_office/sync_depth_00557.png 518.8579
368
+ kitchen/rgb_00000.jpg kitchen/sync_depth_00000.png 518.8579
369
+ kitchen/rgb_00001.jpg kitchen/sync_depth_00001.png 518.8579
370
+ kitchen/rgb_00124.jpg kitchen/sync_depth_00124.png 518.8579
371
+ kitchen/rgb_00125.jpg kitchen/sync_depth_00125.png 518.8579
372
+ kitchen/rgb_00126.jpg kitchen/sync_depth_00126.png 518.8579
373
+ kitchen/rgb_00127.jpg kitchen/sync_depth_00127.png 518.8579
374
+ kitchen/rgb_00128.jpg kitchen/sync_depth_00128.png 518.8579
375
+ kitchen/rgb_00130.jpg kitchen/sync_depth_00130.png 518.8579
376
+ kitchen/rgb_00131.jpg kitchen/sync_depth_00131.png 518.8579
377
+ kitchen/rgb_00132.jpg kitchen/sync_depth_00132.png 518.8579
378
+ kitchen/rgb_00133.jpg kitchen/sync_depth_00133.png 518.8579
379
+ kitchen/rgb_00136.jpg kitchen/sync_depth_00136.png 518.8579
380
+ kitchen/rgb_00193.jpg kitchen/sync_depth_00193.png 518.8579
381
+ kitchen/rgb_00194.jpg kitchen/sync_depth_00194.png 518.8579
382
+ kitchen/rgb_00195.jpg kitchen/sync_depth_00195.png 518.8579
383
+ kitchen/rgb_00196.jpg kitchen/sync_depth_00196.png 518.8579
384
+ kitchen/rgb_00197.jpg kitchen/sync_depth_00197.png 518.8579
385
+ kitchen/rgb_00199.jpg kitchen/sync_depth_00199.png 518.8579
386
+ kitchen/rgb_00200.jpg kitchen/sync_depth_00200.png 518.8579
387
+ kitchen/rgb_00201.jpg kitchen/sync_depth_00201.png 518.8579
388
+ kitchen/rgb_00249.jpg kitchen/sync_depth_00249.png 518.8579
389
+ kitchen/rgb_00558.jpg kitchen/sync_depth_00558.png 518.8579
390
+ kitchen/rgb_00559.jpg kitchen/sync_depth_00559.png 518.8579
391
+ kitchen/rgb_00560.jpg kitchen/sync_depth_00560.png 518.8579
392
+ kitchen/rgb_00561.jpg kitchen/sync_depth_00561.png 518.8579
393
+ kitchen/rgb_00562.jpg kitchen/sync_depth_00562.png 518.8579
394
+ kitchen/rgb_00563.jpg kitchen/sync_depth_00563.png 518.8579
395
+ kitchen/rgb_00564.jpg kitchen/sync_depth_00564.png 518.8579
396
+ kitchen/rgb_00565.jpg kitchen/sync_depth_00565.png 518.8579
397
+ kitchen/rgb_00566.jpg kitchen/sync_depth_00566.png 518.8579
398
+ kitchen/rgb_00567.jpg kitchen/sync_depth_00567.png 518.8579
399
+ kitchen/rgb_00568.jpg kitchen/sync_depth_00568.png 518.8579
400
+ kitchen/rgb_00569.jpg kitchen/sync_depth_00569.png 518.8579
401
+ kitchen/rgb_00570.jpg kitchen/sync_depth_00570.png 518.8579
402
+ kitchen/rgb_00198.jpg kitchen/sync_depth_00198.png 518.8579
403
+ kitchen/rgb_00758.jpg kitchen/sync_depth_00758.png 518.8579
404
+ kitchen/rgb_00776.jpg kitchen/sync_depth_00776.png 518.8579
405
+ kitchen/rgb_00811.jpg kitchen/sync_depth_00811.png 518.8579
406
+ kitchen/rgb_00844.jpg kitchen/sync_depth_00844.png 518.8579
407
+ kitchen/rgb_00759.jpg kitchen/sync_depth_00759.png 518.8579
408
+ kitchen/rgb_00760.jpg kitchen/sync_depth_00760.png 518.8579
409
+ kitchen/rgb_00761.jpg kitchen/sync_depth_00761.png 518.8579
410
+ kitchen/rgb_00762.jpg kitchen/sync_depth_00762.png 518.8579
411
+ kitchen/rgb_00763.jpg kitchen/sync_depth_00763.png 518.8579
412
+ kitchen/rgb_00764.jpg kitchen/sync_depth_00764.png 518.8579
413
+ kitchen/rgb_00765.jpg kitchen/sync_depth_00765.png 518.8579
414
+ kitchen/rgb_00766.jpg kitchen/sync_depth_00766.png 518.8579
415
+ kitchen/rgb_00767.jpg kitchen/sync_depth_00767.png 518.8579
416
+ kitchen/rgb_00768.jpg kitchen/sync_depth_00768.png 518.8579
417
+ kitchen/rgb_00769.jpg kitchen/sync_depth_00769.png 518.8579
418
+ kitchen/rgb_00770.jpg kitchen/sync_depth_00770.png 518.8579
419
+ kitchen/rgb_00771.jpg kitchen/sync_depth_00771.png 518.8579
420
+ kitchen/rgb_00772.jpg kitchen/sync_depth_00772.png 518.8579
421
+ kitchen/rgb_00773.jpg kitchen/sync_depth_00773.png 518.8579
422
+ kitchen/rgb_00774.jpg kitchen/sync_depth_00774.png 518.8579
423
+ kitchen/rgb_00775.jpg kitchen/sync_depth_00775.png 518.8579
424
+ kitchen/rgb_00777.jpg kitchen/sync_depth_00777.png 518.8579
425
+ kitchen/rgb_00778.jpg kitchen/sync_depth_00778.png 518.8579
426
+ kitchen/rgb_00779.jpg kitchen/sync_depth_00779.png 518.8579
427
+ kitchen/rgb_00780.jpg kitchen/sync_depth_00780.png 518.8579
428
+ kitchen/rgb_00781.jpg kitchen/sync_depth_00781.png 518.8579
429
+ kitchen/rgb_00782.jpg kitchen/sync_depth_00782.png 518.8579
430
+ kitchen/rgb_00783.jpg kitchen/sync_depth_00783.png 518.8579
431
+ kitchen/rgb_00784.jpg kitchen/sync_depth_00784.png 518.8579
432
+ kitchen/rgb_00785.jpg kitchen/sync_depth_00785.png 518.8579
433
+ kitchen/rgb_00786.jpg kitchen/sync_depth_00786.png 518.8579
434
+ kitchen/rgb_00799.jpg kitchen/sync_depth_00799.png 518.8579
435
+ kitchen/rgb_00800.jpg kitchen/sync_depth_00800.png 518.8579
436
+ kitchen/rgb_00801.jpg kitchen/sync_depth_00801.png 518.8579
437
+ kitchen/rgb_00802.jpg kitchen/sync_depth_00802.png 518.8579
438
+ kitchen/rgb_00803.jpg kitchen/sync_depth_00803.png 518.8579
439
+ kitchen/rgb_00809.jpg kitchen/sync_depth_00809.png 518.8579
440
+ kitchen/rgb_00810.jpg kitchen/sync_depth_00810.png 518.8579
441
+ kitchen/rgb_00812.jpg kitchen/sync_depth_00812.png 518.8579
442
+ kitchen/rgb_00813.jpg kitchen/sync_depth_00813.png 518.8579
443
+ kitchen/rgb_00820.jpg kitchen/sync_depth_00820.png 518.8579
444
+ kitchen/rgb_00821.jpg kitchen/sync_depth_00821.png 518.8579
445
+ kitchen/rgb_00822.jpg kitchen/sync_depth_00822.png 518.8579
446
+ kitchen/rgb_00832.jpg kitchen/sync_depth_00832.png 518.8579
447
+ kitchen/rgb_00833.jpg kitchen/sync_depth_00833.png 518.8579
448
+ kitchen/rgb_00834.jpg kitchen/sync_depth_00834.png 518.8579
449
+ kitchen/rgb_00835.jpg kitchen/sync_depth_00835.png 518.8579
450
+ kitchen/rgb_00836.jpg kitchen/sync_depth_00836.png 518.8579
451
+ kitchen/rgb_00837.jpg kitchen/sync_depth_00837.png 518.8579
452
+ kitchen/rgb_00838.jpg kitchen/sync_depth_00838.png 518.8579
453
+ kitchen/rgb_00839.jpg kitchen/sync_depth_00839.png 518.8579
454
+ kitchen/rgb_00840.jpg kitchen/sync_depth_00840.png 518.8579
455
+ kitchen/rgb_00841.jpg kitchen/sync_depth_00841.png 518.8579
456
+ kitchen/rgb_00842.jpg kitchen/sync_depth_00842.png 518.8579
457
+ kitchen/rgb_00843.jpg kitchen/sync_depth_00843.png 518.8579
458
+ kitchen/rgb_00845.jpg kitchen/sync_depth_00845.png 518.8579
459
+ kitchen/rgb_00849.jpg kitchen/sync_depth_00849.png 518.8579
460
+ kitchen/rgb_00850.jpg kitchen/sync_depth_00850.png 518.8579
461
+ kitchen/rgb_00851.jpg kitchen/sync_depth_00851.png 518.8579
462
+ kitchen/rgb_00856.jpg kitchen/sync_depth_00856.png 518.8579
463
+ kitchen/rgb_00857.jpg kitchen/sync_depth_00857.png 518.8579
464
+ kitchen/rgb_00858.jpg kitchen/sync_depth_00858.png 518.8579
465
+ kitchen/rgb_00859.jpg kitchen/sync_depth_00859.png 518.8579
466
+ kitchen/rgb_00860.jpg kitchen/sync_depth_00860.png 518.8579
467
+ kitchen/rgb_00861.jpg kitchen/sync_depth_00861.png 518.8579
468
+ kitchen/rgb_00868.jpg kitchen/sync_depth_00868.png 518.8579
469
+ kitchen/rgb_00869.jpg kitchen/sync_depth_00869.png 518.8579
470
+ kitchen/rgb_00870.jpg kitchen/sync_depth_00870.png 518.8579
471
+ kitchen/rgb_00905.jpg kitchen/sync_depth_00905.png 518.8579
472
+ kitchen/rgb_00906.jpg kitchen/sync_depth_00906.png 518.8579
473
+ kitchen/rgb_00907.jpg kitchen/sync_depth_00907.png 518.8579
474
+ living_room/rgb_00152.jpg living_room/sync_depth_00152.png 518.8579
475
+ living_room/rgb_00153.jpg living_room/sync_depth_00153.png 518.8579
476
+ living_room/rgb_00154.jpg living_room/sync_depth_00154.png 518.8579
477
+ living_room/rgb_00166.jpg living_room/sync_depth_00166.png 518.8579
478
+ living_room/rgb_00167.jpg living_room/sync_depth_00167.png 518.8579
479
+ living_room/rgb_00168.jpg living_room/sync_depth_00168.png 518.8579
480
+ living_room/rgb_00206.jpg living_room/sync_depth_00206.png 518.8579
481
+ living_room/rgb_00207.jpg living_room/sync_depth_00207.png 518.8579
482
+ living_room/rgb_00208.jpg living_room/sync_depth_00208.png 518.8579
483
+ living_room/rgb_00209.jpg living_room/sync_depth_00209.png 518.8579
484
+ living_room/rgb_00210.jpg living_room/sync_depth_00210.png 518.8579
485
+ living_room/rgb_00211.jpg living_room/sync_depth_00211.png 518.8579
486
+ living_room/rgb_00263.jpg living_room/sync_depth_00263.png 518.8579
487
+ living_room/rgb_00578.jpg living_room/sync_depth_00578.png 518.8579
488
+ living_room/rgb_00579.jpg living_room/sync_depth_00579.png 518.8579
489
+ living_room/rgb_00580.jpg living_room/sync_depth_00580.png 518.8579
490
+ living_room/rgb_00581.jpg living_room/sync_depth_00581.png 518.8579
491
+ living_room/rgb_00590.jpg living_room/sync_depth_00590.png 518.8579
492
+ living_room/rgb_00591.jpg living_room/sync_depth_00591.png 518.8579
493
+ living_room/rgb_00592.jpg living_room/sync_depth_00592.png 518.8579
494
+ living_room/rgb_00593.jpg living_room/sync_depth_00593.png 518.8579
495
+ living_room/rgb_00602.jpg living_room/sync_depth_00602.png 518.8579
496
+ living_room/rgb_00603.jpg living_room/sync_depth_00603.png 518.8579
497
+ living_room/rgb_00604.jpg living_room/sync_depth_00604.png 518.8579
498
+ living_room/rgb_00605.jpg living_room/sync_depth_00605.png 518.8579
499
+ living_room/rgb_00606.jpg living_room/sync_depth_00606.png 518.8579
500
+ living_room/rgb_01200.jpg living_room/sync_depth_01200.png 518.8579
501
+ living_room/rgb_01201.jpg living_room/sync_depth_01201.png 518.8579
502
+ living_room/rgb_01202.jpg living_room/sync_depth_01202.png 518.8579
503
+ living_room/rgb_01203.jpg living_room/sync_depth_01203.png 518.8579
504
+ living_room/rgb_01204.jpg living_room/sync_depth_01204.png 518.8579
505
+ living_room/rgb_01205.jpg living_room/sync_depth_01205.png 518.8579
506
+ living_room/rgb_01206.jpg living_room/sync_depth_01206.png 518.8579
507
+ living_room/rgb_01207.jpg living_room/sync_depth_01207.png 518.8579
508
+ living_room/rgb_00582.jpg living_room/sync_depth_00582.png 518.8579
509
+ living_room/rgb_01208.jpg living_room/sync_depth_01208.png 518.8579
510
+ living_room/rgb_01247.jpg living_room/sync_depth_01247.png 518.8579
511
+ living_room/rgb_01277.jpg living_room/sync_depth_01277.png 518.8579
512
+ living_room/rgb_01302.jpg living_room/sync_depth_01302.png 518.8579
513
+ living_room/rgb_01209.jpg living_room/sync_depth_01209.png 518.8579
514
+ living_room/rgb_01210.jpg living_room/sync_depth_01210.png 518.8579
515
+ living_room/rgb_01211.jpg living_room/sync_depth_01211.png 518.8579
516
+ living_room/rgb_01215.jpg living_room/sync_depth_01215.png 518.8579
517
+ living_room/rgb_01216.jpg living_room/sync_depth_01216.png 518.8579
518
+ living_room/rgb_01217.jpg living_room/sync_depth_01217.png 518.8579
519
+ living_room/rgb_01218.jpg living_room/sync_depth_01218.png 518.8579
520
+ living_room/rgb_01219.jpg living_room/sync_depth_01219.png 518.8579
521
+ living_room/rgb_01225.jpg living_room/sync_depth_01225.png 518.8579
522
+ living_room/rgb_01226.jpg living_room/sync_depth_01226.png 518.8579
523
+ living_room/rgb_01227.jpg living_room/sync_depth_01227.png 518.8579
524
+ living_room/rgb_01228.jpg living_room/sync_depth_01228.png 518.8579
525
+ living_room/rgb_01229.jpg living_room/sync_depth_01229.png 518.8579
526
+ living_room/rgb_01232.jpg living_room/sync_depth_01232.png 518.8579
527
+ living_room/rgb_01233.jpg living_room/sync_depth_01233.png 518.8579
528
+ living_room/rgb_01234.jpg living_room/sync_depth_01234.png 518.8579
529
+ living_room/rgb_01246.jpg living_room/sync_depth_01246.png 518.8579
530
+ living_room/rgb_01248.jpg living_room/sync_depth_01248.png 518.8579
531
+ living_room/rgb_01249.jpg living_room/sync_depth_01249.png 518.8579
532
+ living_room/rgb_01253.jpg living_room/sync_depth_01253.png 518.8579
533
+ living_room/rgb_01254.jpg living_room/sync_depth_01254.png 518.8579
534
+ living_room/rgb_01255.jpg living_room/sync_depth_01255.png 518.8579
535
+ living_room/rgb_01256.jpg living_room/sync_depth_01256.png 518.8579
536
+ living_room/rgb_01257.jpg living_room/sync_depth_01257.png 518.8579
537
+ living_room/rgb_01258.jpg living_room/sync_depth_01258.png 518.8579
538
+ living_room/rgb_01259.jpg living_room/sync_depth_01259.png 518.8579
539
+ living_room/rgb_01260.jpg living_room/sync_depth_01260.png 518.8579
540
+ living_room/rgb_01261.jpg living_room/sync_depth_01261.png 518.8579
541
+ living_room/rgb_01262.jpg living_room/sync_depth_01262.png 518.8579
542
+ living_room/rgb_01263.jpg living_room/sync_depth_01263.png 518.8579
543
+ living_room/rgb_01264.jpg living_room/sync_depth_01264.png 518.8579
544
+ living_room/rgb_01274.jpg living_room/sync_depth_01274.png 518.8579
545
+ living_room/rgb_01275.jpg living_room/sync_depth_01275.png 518.8579
546
+ living_room/rgb_01276.jpg living_room/sync_depth_01276.png 518.8579
547
+ living_room/rgb_01278.jpg living_room/sync_depth_01278.png 518.8579
548
+ living_room/rgb_01279.jpg living_room/sync_depth_01279.png 518.8579
549
+ living_room/rgb_01284.jpg living_room/sync_depth_01284.png 518.8579
550
+ living_room/rgb_01285.jpg living_room/sync_depth_01285.png 518.8579
551
+ living_room/rgb_01286.jpg living_room/sync_depth_01286.png 518.8579
552
+ living_room/rgb_01287.jpg living_room/sync_depth_01287.png 518.8579
553
+ living_room/rgb_01288.jpg living_room/sync_depth_01288.png 518.8579
554
+ living_room/rgb_01289.jpg living_room/sync_depth_01289.png 518.8579
555
+ living_room/rgb_01290.jpg living_room/sync_depth_01290.png 518.8579
556
+ living_room/rgb_01291.jpg living_room/sync_depth_01291.png 518.8579
557
+ living_room/rgb_01292.jpg living_room/sync_depth_01292.png 518.8579
558
+ living_room/rgb_01293.jpg living_room/sync_depth_01293.png 518.8579
559
+ living_room/rgb_01294.jpg living_room/sync_depth_01294.png 518.8579
560
+ living_room/rgb_01296.jpg living_room/sync_depth_01296.png 518.8579
561
+ living_room/rgb_01297.jpg living_room/sync_depth_01297.png 518.8579
562
+ living_room/rgb_01298.jpg living_room/sync_depth_01298.png 518.8579
563
+ living_room/rgb_01301.jpg living_room/sync_depth_01301.png 518.8579
564
+ living_room/rgb_01303.jpg living_room/sync_depth_01303.png 518.8579
565
+ living_room/rgb_01304.jpg living_room/sync_depth_01304.png 518.8579
566
+ living_room/rgb_01305.jpg living_room/sync_depth_01305.png 518.8579
567
+ living_room/rgb_01306.jpg living_room/sync_depth_01306.png 518.8579
568
+ living_room/rgb_01307.jpg living_room/sync_depth_01307.png 518.8579
569
+ living_room/rgb_01313.jpg living_room/sync_depth_01313.png 518.8579
570
+ living_room/rgb_01314.jpg living_room/sync_depth_01314.png 518.8579
571
+ living_room/rgb_01328.jpg living_room/sync_depth_01328.png 518.8579
572
+ living_room/rgb_01329.jpg living_room/sync_depth_01329.png 518.8579
573
+ living_room/rgb_01330.jpg living_room/sync_depth_01330.png 518.8579
574
+ living_room/rgb_01331.jpg living_room/sync_depth_01331.png 518.8579
575
+ living_room/rgb_01334.jpg living_room/sync_depth_01334.png 518.8579
576
+ living_room/rgb_01335.jpg living_room/sync_depth_01335.png 518.8579
577
+ living_room/rgb_01336.jpg living_room/sync_depth_01336.png 518.8579
578
+ living_room/rgb_01337.jpg living_room/sync_depth_01337.png 518.8579
579
+ living_room/rgb_01338.jpg living_room/sync_depth_01338.png 518.8579
580
+ living_room/rgb_01339.jpg living_room/sync_depth_01339.png 518.8579
581
+ office/rgb_00008.jpg office/sync_depth_00008.png 518.8579
582
+ office/rgb_00013.jpg office/sync_depth_00013.png 518.8579
583
+ office/rgb_00014.jpg office/sync_depth_00014.png 518.8579
584
+ office/rgb_00015.jpg office/sync_depth_00015.png 518.8579
585
+ office/rgb_00016.jpg office/sync_depth_00016.png 518.8579
586
+ office/rgb_00017.jpg office/sync_depth_00017.png 518.8579
587
+ office/rgb_00020.jpg office/sync_depth_00020.png 518.8579
588
+ office/rgb_00027.jpg office/sync_depth_00027.png 518.8579
589
+ office/rgb_00028.jpg office/sync_depth_00028.png 518.8579
590
+ office/rgb_00029.jpg office/sync_depth_00029.png 518.8579
591
+ office/rgb_00030.jpg office/sync_depth_00030.png 518.8579
592
+ office/rgb_00031.jpg office/sync_depth_00031.png 518.8579
593
+ office/rgb_00032.jpg office/sync_depth_00032.png 518.8579
594
+ office/rgb_00033.jpg office/sync_depth_00033.png 518.8579
595
+ office/rgb_00034.jpg office/sync_depth_00034.png 518.8579
596
+ office/rgb_00035.jpg office/sync_depth_00035.png 518.8579
597
+ office/rgb_00036.jpg office/sync_depth_00036.png 518.8579
598
+ office/rgb_00038.jpg office/sync_depth_00038.png 518.8579
599
+ office/rgb_00039.jpg office/sync_depth_00039.png 518.8579
600
+ office/rgb_00040.jpg office/sync_depth_00040.png 518.8579
601
+ office/rgb_00041.jpg office/sync_depth_00041.png 518.8579
602
+ office/rgb_00042.jpg office/sync_depth_00042.png 518.8579
603
+ office/rgb_00270.jpg office/sync_depth_00270.png 518.8579
604
+ office/rgb_00271.jpg office/sync_depth_00271.png 518.8579
605
+ office/rgb_00611.jpg office/sync_depth_00611.png 518.8579
606
+ office/rgb_00612.jpg office/sync_depth_00612.png 518.8579
607
+ office/rgb_00616.jpg office/sync_depth_00616.png 518.8579
608
+ office/rgb_00617.jpg office/sync_depth_00617.png 518.8579
609
+ office/rgb_00618.jpg office/sync_depth_00618.png 518.8579
610
+ office/rgb_00619.jpg office/sync_depth_00619.png 518.8579
611
+ office/rgb_00620.jpg office/sync_depth_00620.png 518.8579
612
+ office/rgb_00632.jpg office/sync_depth_00632.png 518.8579
613
+ office/rgb_00633.jpg office/sync_depth_00633.png 518.8579
614
+ office/rgb_00634.jpg office/sync_depth_00634.png 518.8579
615
+ office/rgb_00635.jpg office/sync_depth_00635.png 518.8579
616
+ office/rgb_00636.jpg office/sync_depth_00636.png 518.8579
617
+ office/rgb_00637.jpg office/sync_depth_00637.png 518.8579
618
+ office/rgb_00037.jpg office/sync_depth_00037.png 518.8579
619
+ office_kitchen/rgb_00410.jpg office_kitchen/sync_depth_00410.png 518.8579
620
+ office_kitchen/rgb_00411.jpg office_kitchen/sync_depth_00411.png 518.8579
621
+ office_kitchen/rgb_00412.jpg office_kitchen/sync_depth_00412.png 518.8579
622
+ office_kitchen/rgb_00413.jpg office_kitchen/sync_depth_00413.png 518.8579
623
+ playroom/rgb_00429.jpg playroom/sync_depth_00429.png 518.8579
624
+ playroom/rgb_00430.jpg playroom/sync_depth_00430.png 518.8579
625
+ playroom/rgb_00431.jpg playroom/sync_depth_00431.png 518.8579
626
+ playroom/rgb_00432.jpg playroom/sync_depth_00432.png 518.8579
627
+ playroom/rgb_00433.jpg playroom/sync_depth_00433.png 518.8579
628
+ playroom/rgb_00434.jpg playroom/sync_depth_00434.png 518.8579
629
+ playroom/rgb_00440.jpg playroom/sync_depth_00440.png 518.8579
630
+ playroom/rgb_00441.jpg playroom/sync_depth_00441.png 518.8579
631
+ playroom/rgb_00442.jpg playroom/sync_depth_00442.png 518.8579
632
+ playroom/rgb_00443.jpg playroom/sync_depth_00443.png 518.8579
633
+ playroom/rgb_00444.jpg playroom/sync_depth_00444.png 518.8579
634
+ playroom/rgb_00445.jpg playroom/sync_depth_00445.png 518.8579
635
+ playroom/rgb_00446.jpg playroom/sync_depth_00446.png 518.8579
636
+ playroom/rgb_00447.jpg playroom/sync_depth_00447.png 518.8579
637
+ reception_room/rgb_00461.jpg reception_room/sync_depth_00461.png 518.8579
638
+ reception_room/rgb_00462.jpg reception_room/sync_depth_00462.png 518.8579
639
+ reception_room/rgb_00463.jpg reception_room/sync_depth_00463.png 518.8579
640
+ reception_room/rgb_00464.jpg reception_room/sync_depth_00464.png 518.8579
641
+ reception_room/rgb_00465.jpg reception_room/sync_depth_00465.png 518.8579
642
+ study/rgb_00468.jpg study/sync_depth_00468.png 518.8579
643
+ study/rgb_00469.jpg study/sync_depth_00469.png 518.8579
644
+ study/rgb_00470.jpg study/sync_depth_00470.png 518.8579
645
+ study/rgb_00471.jpg study/sync_depth_00471.png 518.8579
646
+ study/rgb_00472.jpg study/sync_depth_00472.png 518.8579
647
+ study/rgb_00473.jpg study/sync_depth_00473.png 518.8579
648
+ study/rgb_00474.jpg study/sync_depth_00474.png 518.8579
649
+ study/rgb_00475.jpg study/sync_depth_00475.png 518.8579
650
+ study/rgb_00476.jpg study/sync_depth_00476.png 518.8579
651
+ study/rgb_00643.jpg study/sync_depth_00643.png 518.8579
652
+ study/rgb_00644.jpg study/sync_depth_00644.png 518.8579
653
+ study_room/rgb_00272.jpg study_room/sync_depth_00272.png 518.8579
654
+ study_room/rgb_00278.jpg study_room/sync_depth_00278.png 518.8579
ZoeDepth/train_test_inputs/nyudepthv2_train_files_with_gt.txt ADDED
The diff for this file is too large to render. See raw diff
 
ZoeDepth/ui/app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import gradio as gr
26
+ import torch
27
+
28
+ from .gradio_depth_pred import create_demo as create_depth_pred_demo
29
+ from .gradio_im_to_3d import create_demo as create_im_to_3d_demo
30
+ from .gradio_pano_to_3d import create_demo as create_pano_to_3d_demo
31
+
32
+
33
+ css = """
34
+ #img-display-container {
35
+ max-height: 50vh;
36
+ }
37
+ #img-display-input {
38
+ max-height: 40vh;
39
+ }
40
+ #img-display-output {
41
+ max-height: 40vh;
42
+ }
43
+
44
+ """
45
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
46
+ model = torch.hub.load('isl-org/ZoeDepth', "ZoeD_N", pretrained=True).to(DEVICE).eval()
47
+
48
+ title = "# ZoeDepth"
49
+ description = """Official demo for **ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth**.
50
+
51
+ ZoeDepth is a deep learning model for metric depth estimation from a single image.
52
+
53
+ Please refer to our [paper](https://arxiv.org/abs/2302.12288) or [github](https://github.com/isl-org/ZoeDepth) for more details."""
54
+
55
+ with gr.Blocks(css=css) as demo:
56
+ gr.Markdown(title)
57
+ gr.Markdown(description)
58
+ with gr.Tab("Depth Prediction"):
59
+ create_depth_pred_demo(model)
60
+ with gr.Tab("Image to 3D"):
61
+ create_im_to_3d_demo(model)
62
+ with gr.Tab("360 Panorama to 3D"):
63
+ create_pano_to_3d_demo(model)
64
+
65
+ if __name__ == '__main__':
66
+ demo.queue().launch()
ZoeDepth/ui/gradio_depth_pred.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import gradio as gr
26
+ from zoedepth.utils.misc import colorize
27
+ from PIL import Image
28
+ import tempfile
29
+
30
+ def predict_depth(model, image):
31
+ depth = model.infer_pil(image)
32
+ return depth
33
+
34
+ def create_demo(model):
35
+ gr.Markdown("### Depth Prediction demo")
36
+ with gr.Row():
37
+ input_image = gr.Image(label="Input Image", type='pil', elem_id='img-display-input').style(height="auto")
38
+ depth_image = gr.Image(label="Depth Map", elem_id='img-display-output')
39
+ raw_file = gr.File(label="16-bit raw depth, multiplier:256")
40
+ submit = gr.Button("Submit")
41
+
42
+ def on_submit(image):
43
+ depth = predict_depth(model, image)
44
+ colored_depth = colorize(depth, cmap='gray_r')
45
+ tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
46
+ raw_depth = Image.fromarray((depth*256).astype('uint16'))
47
+ raw_depth.save(tmp.name)
48
+ return [colored_depth, tmp.name]
49
+
50
+ submit.click(on_submit, inputs=[input_image], outputs=[depth_image, raw_file])
51
+ # examples = gr.Examples(examples=["examples/person_1.jpeg", "examples/person_2.jpeg", "examples/person-leaves.png", "examples/living-room.jpeg"],
52
+ # inputs=[input_image])
ZoeDepth/ui/gradio_im_to_3d.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import gradio as gr
26
+ import numpy as np
27
+ import trimesh
28
+ from zoedepth.utils.geometry import depth_to_points, create_triangles
29
+ from functools import partial
30
+ import tempfile
31
+
32
+
33
+ def depth_edges_mask(depth):
34
+ """Returns a mask of edges in the depth map.
35
+ Args:
36
+ depth: 2D numpy array of shape (H, W) with dtype float32.
37
+ Returns:
38
+ mask: 2D numpy array of shape (H, W) with dtype bool.
39
+ """
40
+ # Compute the x and y gradients of the depth map.
41
+ depth_dx, depth_dy = np.gradient(depth)
42
+ # Compute the gradient magnitude.
43
+ depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2)
44
+ # Compute the edge mask.
45
+ mask = depth_grad > 0.05
46
+ return mask
47
+
48
+
49
+ def predict_depth(model, image):
50
+ depth = model.infer_pil(image)
51
+ return depth
52
+
53
+ def get_mesh(model, image, keep_edges=False):
54
+ image.thumbnail((1024,1024)) # limit the size of the input image
55
+ depth = predict_depth(model, image)
56
+ pts3d = depth_to_points(depth[None])
57
+ pts3d = pts3d.reshape(-1, 3)
58
+
59
+ # Create a trimesh mesh from the points
60
+ # Each pixel is connected to its 4 neighbors
61
+ # colors are the RGB values of the image
62
+
63
+ verts = pts3d.reshape(-1, 3)
64
+ image = np.array(image)
65
+ if keep_edges:
66
+ triangles = create_triangles(image.shape[0], image.shape[1])
67
+ else:
68
+ triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth))
69
+ colors = image.reshape(-1, 3)
70
+ mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors)
71
+
72
+ # Save as glb
73
+ glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
74
+ glb_path = glb_file.name
75
+ mesh.export(glb_path)
76
+ return glb_path
77
+
78
+ def create_demo(model):
79
+
80
+ gr.Markdown("### Image to 3D mesh")
81
+ gr.Markdown("Convert a single 2D image to a 3D mesh")
82
+
83
+ with gr.Row():
84
+ image = gr.Image(label="Input Image", type='pil')
85
+ result = gr.Model3D(label="3d mesh reconstruction", clear_color=[
86
+ 1.0, 1.0, 1.0, 1.0])
87
+
88
+ checkbox = gr.Checkbox(label="Keep occlusion edges", value=False)
89
+ submit = gr.Button("Submit")
90
+ submit.click(partial(get_mesh, model), inputs=[image, checkbox], outputs=[result])
91
+ # examples = gr.Examples(examples=["examples/aerial_beach.jpeg", "examples/mountains.jpeg", "examples/person_1.jpeg", "examples/ancient-carved.jpeg"],
92
+ # inputs=[image])
93
+
ZoeDepth/ui/gradio_pano_to_3d.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import gradio as gr
26
+ import numpy as np
27
+ import trimesh
28
+ from zoedepth.utils.geometry import create_triangles
29
+ from functools import partial
30
+ import tempfile
31
+
32
+ def depth_edges_mask(depth):
33
+ """Returns a mask of edges in the depth map.
34
+ Args:
35
+ depth: 2D numpy array of shape (H, W) with dtype float32.
36
+ Returns:
37
+ mask: 2D numpy array of shape (H, W) with dtype bool.
38
+ """
39
+ # Compute the x and y gradients of the depth map.
40
+ depth_dx, depth_dy = np.gradient(depth)
41
+ # Compute the gradient magnitude.
42
+ depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2)
43
+ # Compute the edge mask.
44
+ mask = depth_grad > 0.05
45
+ return mask
46
+
47
+
48
+ def pano_depth_to_world_points(depth):
49
+ """
50
+ 360 depth to world points
51
+ given 2D depth is an equirectangular projection of a spherical image
52
+ Treat depth as radius
53
+
54
+ longitude : -pi to pi
55
+ latitude : -pi/2 to pi/2
56
+ """
57
+
58
+ # Convert depth to radius
59
+ radius = depth.flatten()
60
+
61
+ lon = np.linspace(-np.pi, np.pi, depth.shape[1])
62
+ lat = np.linspace(-np.pi/2, np.pi/2, depth.shape[0])
63
+
64
+ lon, lat = np.meshgrid(lon, lat)
65
+ lon = lon.flatten()
66
+ lat = lat.flatten()
67
+
68
+ # Convert to cartesian coordinates
69
+ x = radius * np.cos(lat) * np.cos(lon)
70
+ y = radius * np.cos(lat) * np.sin(lon)
71
+ z = radius * np.sin(lat)
72
+
73
+ pts3d = np.stack([x, y, z], axis=1)
74
+
75
+ return pts3d
76
+
77
+
78
+ def predict_depth(model, image):
79
+ depth = model.infer_pil(image)
80
+ return depth
81
+
82
+ def get_mesh(model, image, keep_edges=False):
83
+ image.thumbnail((1024,1024)) # limit the size of the image
84
+ depth = predict_depth(model, image)
85
+ pts3d = pano_depth_to_world_points(depth)
86
+
87
+ # Create a trimesh mesh from the points
88
+ # Each pixel is connected to its 4 neighbors
89
+ # colors are the RGB values of the image
90
+
91
+ verts = pts3d.reshape(-1, 3)
92
+ image = np.array(image)
93
+ if keep_edges:
94
+ triangles = create_triangles(image.shape[0], image.shape[1])
95
+ else:
96
+ triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth))
97
+ colors = image.reshape(-1, 3)
98
+ mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors)
99
+
100
+ # Save as glb
101
+ glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
102
+ glb_path = glb_file.name
103
+ mesh.export(glb_path)
104
+ return glb_path
105
+
106
+ def create_demo(model):
107
+ gr.Markdown("### Panorama to 3D mesh")
108
+ gr.Markdown("Convert a 360 spherical panorama to a 3D mesh")
109
+ gr.Markdown("ZoeDepth was not trained on panoramic images. It doesn't know anything about panoramas or spherical projection. Here, we just treat the estimated depth as radius and some projection errors are expected. Nonetheless, ZoeDepth still works surprisingly well on 360 reconstruction.")
110
+
111
+ with gr.Row():
112
+ input_image = gr.Image(label="Input Image", type='pil')
113
+ result = gr.Model3D(label="3d mesh reconstruction", clear_color=[
114
+ 1.0, 1.0, 1.0, 1.0])
115
+
116
+ checkbox = gr.Checkbox(label="Keep occlusion edges", value=True)
117
+ submit = gr.Button("Submit")
118
+ submit.click(partial(get_mesh, model), inputs=[input_image, checkbox], outputs=[result])
119
+ # examples = gr.Examples(examples=["examples/pano_1.jpeg", "examples/pano_2.jpeg", "examples/pano_3.jpeg"],
120
+ # inputs=[input_image])
ZoeDepth/ui/ui_requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ trimesh==3.9.42
ZoeDepth/zoedepth/data/__init__.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
ZoeDepth/zoedepth/data/data_mono.py ADDED
@@ -0,0 +1,573 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ # This file is partly inspired from BTS (https://github.com/cleinc/bts/blob/master/pytorch/bts_dataloader.py); author: Jin Han Lee
26
+
27
+ import itertools
28
+ import os
29
+ import random
30
+
31
+ import numpy as np
32
+ import cv2
33
+ import torch
34
+ import torch.nn as nn
35
+ import torch.utils.data.distributed
36
+ from zoedepth.utils.easydict import EasyDict as edict
37
+ from PIL import Image, ImageOps
38
+ from torch.utils.data import DataLoader, Dataset
39
+ from torchvision import transforms
40
+
41
+ from zoedepth.utils.config import change_dataset
42
+
43
+ from .ddad import get_ddad_loader
44
+ from .diml_indoor_test import get_diml_indoor_loader
45
+ from .diml_outdoor_test import get_diml_outdoor_loader
46
+ from .diode import get_diode_loader
47
+ from .hypersim import get_hypersim_loader
48
+ from .ibims import get_ibims_loader
49
+ from .sun_rgbd_loader import get_sunrgbd_loader
50
+ from .vkitti import get_vkitti_loader
51
+ from .vkitti2 import get_vkitti2_loader
52
+
53
+ from .preprocess import CropParams, get_white_border, get_black_border
54
+
55
+
56
+ def _is_pil_image(img):
57
+ return isinstance(img, Image.Image)
58
+
59
+
60
+ def _is_numpy_image(img):
61
+ return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
62
+
63
+
64
+ def preprocessing_transforms(mode, **kwargs):
65
+ return transforms.Compose([
66
+ ToTensor(mode=mode, **kwargs)
67
+ ])
68
+
69
+
70
+ class DepthDataLoader(object):
71
+ def __init__(self, config, mode, device='cpu', transform=None, **kwargs):
72
+ """
73
+ Data loader for depth datasets
74
+
75
+ Args:
76
+ config (dict): Config dictionary. Refer to utils/config.py
77
+ mode (str): "train" or "online_eval"
78
+ device (str, optional): Device to load the data on. Defaults to 'cpu'.
79
+ transform (torchvision.transforms, optional): Transform to apply to the data. Defaults to None.
80
+ """
81
+
82
+ self.config = config
83
+
84
+ if config.dataset == 'ibims':
85
+ self.data = get_ibims_loader(config, batch_size=1, num_workers=1)
86
+ return
87
+
88
+ if config.dataset == 'sunrgbd':
89
+ self.data = get_sunrgbd_loader(
90
+ data_dir_root=config.sunrgbd_root, batch_size=1, num_workers=1)
91
+ return
92
+
93
+ if config.dataset == 'diml_indoor':
94
+ self.data = get_diml_indoor_loader(
95
+ data_dir_root=config.diml_indoor_root, batch_size=1, num_workers=1)
96
+ return
97
+
98
+ if config.dataset == 'diml_outdoor':
99
+ self.data = get_diml_outdoor_loader(
100
+ data_dir_root=config.diml_outdoor_root, batch_size=1, num_workers=1)
101
+ return
102
+
103
+ if "diode" in config.dataset:
104
+ self.data = get_diode_loader(
105
+ config[config.dataset+"_root"], batch_size=1, num_workers=1)
106
+ return
107
+
108
+ if config.dataset == 'hypersim_test':
109
+ self.data = get_hypersim_loader(
110
+ config.hypersim_test_root, batch_size=1, num_workers=1)
111
+ return
112
+
113
+ if config.dataset == 'vkitti':
114
+ self.data = get_vkitti_loader(
115
+ config.vkitti_root, batch_size=1, num_workers=1)
116
+ return
117
+
118
+ if config.dataset == 'vkitti2':
119
+ self.data = get_vkitti2_loader(
120
+ config.vkitti2_root, batch_size=1, num_workers=1)
121
+ return
122
+
123
+ if config.dataset == 'ddad':
124
+ self.data = get_ddad_loader(config.ddad_root, resize_shape=(
125
+ 352, 1216), batch_size=1, num_workers=1)
126
+ return
127
+
128
+ img_size = self.config.get("img_size", None)
129
+ img_size = img_size if self.config.get(
130
+ "do_input_resize", False) else None
131
+
132
+ if transform is None:
133
+ transform = preprocessing_transforms(mode, size=img_size)
134
+
135
+ if mode == 'train':
136
+
137
+ Dataset = DataLoadPreprocess
138
+ self.training_samples = Dataset(
139
+ config, mode, transform=transform, device=device)
140
+
141
+ if config.distributed:
142
+ self.train_sampler = torch.utils.data.distributed.DistributedSampler(
143
+ self.training_samples)
144
+ else:
145
+ self.train_sampler = None
146
+
147
+ self.data = DataLoader(self.training_samples,
148
+ batch_size=config.batch_size,
149
+ shuffle=(self.train_sampler is None),
150
+ num_workers=config.workers,
151
+ pin_memory=True,
152
+ persistent_workers=True,
153
+ # prefetch_factor=2,
154
+ sampler=self.train_sampler)
155
+
156
+ elif mode == 'online_eval':
157
+ self.testing_samples = DataLoadPreprocess(
158
+ config, mode, transform=transform)
159
+ if config.distributed: # redundant. here only for readability and to be more explicit
160
+ # Give whole test set to all processes (and report evaluation only on one) regardless
161
+ self.eval_sampler = None
162
+ else:
163
+ self.eval_sampler = None
164
+ self.data = DataLoader(self.testing_samples, 1,
165
+ shuffle=kwargs.get("shuffle_test", False),
166
+ num_workers=1,
167
+ pin_memory=False,
168
+ sampler=self.eval_sampler)
169
+
170
+ elif mode == 'test':
171
+ self.testing_samples = DataLoadPreprocess(
172
+ config, mode, transform=transform)
173
+ self.data = DataLoader(self.testing_samples,
174
+ 1, shuffle=False, num_workers=1)
175
+
176
+ else:
177
+ print(
178
+ 'mode should be one of \'train, test, online_eval\'. Got {}'.format(mode))
179
+
180
+
181
+ def repetitive_roundrobin(*iterables):
182
+ """
183
+ cycles through iterables but sample wise
184
+ first yield first sample from first iterable then first sample from second iterable and so on
185
+ then second sample from first iterable then second sample from second iterable and so on
186
+
187
+ If one iterable is shorter than the others, it is repeated until all iterables are exhausted
188
+ repetitive_roundrobin('ABC', 'D', 'EF') --> A D E B D F C D E
189
+ """
190
+ # Repetitive roundrobin
191
+ iterables_ = [iter(it) for it in iterables]
192
+ exhausted = [False] * len(iterables)
193
+ while not all(exhausted):
194
+ for i, it in enumerate(iterables_):
195
+ try:
196
+ yield next(it)
197
+ except StopIteration:
198
+ exhausted[i] = True
199
+ iterables_[i] = itertools.cycle(iterables[i])
200
+ # First elements may get repeated if one iterable is shorter than the others
201
+ yield next(iterables_[i])
202
+
203
+
204
+ class RepetitiveRoundRobinDataLoader(object):
205
+ def __init__(self, *dataloaders):
206
+ self.dataloaders = dataloaders
207
+
208
+ def __iter__(self):
209
+ return repetitive_roundrobin(*self.dataloaders)
210
+
211
+ def __len__(self):
212
+ # First samples get repeated, thats why the plus one
213
+ return len(self.dataloaders) * (max(len(dl) for dl in self.dataloaders) + 1)
214
+
215
+
216
+ class MixedNYUKITTI(object):
217
+ def __init__(self, config, mode, device='cpu', **kwargs):
218
+ config = edict(config)
219
+ config.workers = config.workers // 2
220
+ self.config = config
221
+ nyu_conf = change_dataset(edict(config), 'nyu')
222
+ kitti_conf = change_dataset(edict(config), 'kitti')
223
+
224
+ # make nyu default for testing
225
+ self.config = config = nyu_conf
226
+ img_size = self.config.get("img_size", None)
227
+ img_size = img_size if self.config.get(
228
+ "do_input_resize", False) else None
229
+ if mode == 'train':
230
+ nyu_loader = DepthDataLoader(
231
+ nyu_conf, mode, device=device, transform=preprocessing_transforms(mode, size=img_size)).data
232
+ kitti_loader = DepthDataLoader(
233
+ kitti_conf, mode, device=device, transform=preprocessing_transforms(mode, size=img_size)).data
234
+ # It has been changed to repetitive roundrobin
235
+ self.data = RepetitiveRoundRobinDataLoader(
236
+ nyu_loader, kitti_loader)
237
+ else:
238
+ self.data = DepthDataLoader(nyu_conf, mode, device=device).data
239
+
240
+
241
+ def remove_leading_slash(s):
242
+ if s[0] == '/' or s[0] == '\\':
243
+ return s[1:]
244
+ return s
245
+
246
+
247
+ class CachedReader:
248
+ def __init__(self, shared_dict=None):
249
+ if shared_dict:
250
+ self._cache = shared_dict
251
+ else:
252
+ self._cache = {}
253
+
254
+ def open(self, fpath):
255
+ im = self._cache.get(fpath, None)
256
+ if im is None:
257
+ im = self._cache[fpath] = Image.open(fpath)
258
+ return im
259
+
260
+
261
+ class ImReader:
262
+ def __init__(self):
263
+ pass
264
+
265
+ # @cache
266
+ def open(self, fpath):
267
+ return Image.open(fpath)
268
+
269
+
270
+ class DataLoadPreprocess(Dataset):
271
+ def __init__(self, config, mode, transform=None, is_for_online_eval=False, **kwargs):
272
+ self.config = config
273
+ if mode == 'online_eval':
274
+ with open(config.filenames_file_eval, 'r') as f:
275
+ self.filenames = f.readlines()
276
+ else:
277
+ with open(config.filenames_file, 'r') as f:
278
+ self.filenames = f.readlines()
279
+
280
+ self.mode = mode
281
+ self.transform = transform
282
+ self.to_tensor = ToTensor(mode)
283
+ self.is_for_online_eval = is_for_online_eval
284
+ if config.use_shared_dict:
285
+ self.reader = CachedReader(config.shared_dict)
286
+ else:
287
+ self.reader = ImReader()
288
+
289
+ def postprocess(self, sample):
290
+ return sample
291
+
292
+ def __getitem__(self, idx):
293
+ sample_path = self.filenames[idx]
294
+ focal = float(sample_path.split()[2])
295
+ sample = {}
296
+
297
+ if self.mode == 'train':
298
+ if self.config.dataset == 'kitti' and self.config.use_right and random.random() > 0.5:
299
+ image_path = os.path.join(
300
+ self.config.data_path, remove_leading_slash(sample_path.split()[3]))
301
+ depth_path = os.path.join(
302
+ self.config.gt_path, remove_leading_slash(sample_path.split()[4]))
303
+ else:
304
+ image_path = os.path.join(
305
+ self.config.data_path, remove_leading_slash(sample_path.split()[0]))
306
+ depth_path = os.path.join(
307
+ self.config.gt_path, remove_leading_slash(sample_path.split()[1]))
308
+
309
+ image = self.reader.open(image_path)
310
+ depth_gt = self.reader.open(depth_path)
311
+ w, h = image.size
312
+
313
+ if self.config.do_kb_crop:
314
+ height = image.height
315
+ width = image.width
316
+ top_margin = int(height - 352)
317
+ left_margin = int((width - 1216) / 2)
318
+ depth_gt = depth_gt.crop(
319
+ (left_margin, top_margin, left_margin + 1216, top_margin + 352))
320
+ image = image.crop(
321
+ (left_margin, top_margin, left_margin + 1216, top_margin + 352))
322
+
323
+ # Avoid blank boundaries due to pixel registration?
324
+ # Train images have white border. Test images have black border.
325
+ if self.config.dataset == 'nyu' and self.config.avoid_boundary:
326
+ # print("Avoiding Blank Boundaries!")
327
+ # We just crop and pad again with reflect padding to original size
328
+ # original_size = image.size
329
+ crop_params = get_white_border(np.array(image, dtype=np.uint8))
330
+ image = image.crop((crop_params.left, crop_params.top, crop_params.right, crop_params.bottom))
331
+ depth_gt = depth_gt.crop((crop_params.left, crop_params.top, crop_params.right, crop_params.bottom))
332
+
333
+ # Use reflect padding to fill the blank
334
+ image = np.array(image)
335
+ image = np.pad(image, ((crop_params.top, h - crop_params.bottom), (crop_params.left, w - crop_params.right), (0, 0)), mode='reflect')
336
+ image = Image.fromarray(image)
337
+
338
+ depth_gt = np.array(depth_gt)
339
+ depth_gt = np.pad(depth_gt, ((crop_params.top, h - crop_params.bottom), (crop_params.left, w - crop_params.right)), 'constant', constant_values=0)
340
+ depth_gt = Image.fromarray(depth_gt)
341
+
342
+
343
+ if self.config.do_random_rotate and (self.config.aug):
344
+ random_angle = (random.random() - 0.5) * 2 * self.config.degree
345
+ image = self.rotate_image(image, random_angle)
346
+ depth_gt = self.rotate_image(
347
+ depth_gt, random_angle, flag=Image.NEAREST)
348
+
349
+ image = np.asarray(image, dtype=np.float32) / 255.0
350
+ depth_gt = np.asarray(depth_gt, dtype=np.float32)
351
+ depth_gt = np.expand_dims(depth_gt, axis=2)
352
+
353
+ if self.config.dataset == 'nyu':
354
+ depth_gt = depth_gt / 1000.0
355
+ else:
356
+ depth_gt = depth_gt / 256.0
357
+
358
+ if self.config.aug and (self.config.random_crop):
359
+ image, depth_gt = self.random_crop(
360
+ image, depth_gt, self.config.input_height, self.config.input_width)
361
+
362
+ if self.config.aug and self.config.random_translate:
363
+ # print("Random Translation!")
364
+ image, depth_gt = self.random_translate(image, depth_gt, self.config.max_translation)
365
+
366
+ image, depth_gt = self.train_preprocess(image, depth_gt)
367
+ mask = np.logical_and(depth_gt > self.config.min_depth,
368
+ depth_gt < self.config.max_depth).squeeze()[None, ...]
369
+ sample = {'image': image, 'depth': depth_gt, 'focal': focal,
370
+ 'mask': mask, **sample}
371
+
372
+ else:
373
+ if self.mode == 'online_eval':
374
+ data_path = self.config.data_path_eval
375
+ else:
376
+ data_path = self.config.data_path
377
+
378
+ image_path = os.path.join(
379
+ data_path, remove_leading_slash(sample_path.split()[0]))
380
+ image = np.asarray(self.reader.open(image_path),
381
+ dtype=np.float32) / 255.0
382
+
383
+ if self.mode == 'online_eval':
384
+ gt_path = self.config.gt_path_eval
385
+ depth_path = os.path.join(
386
+ gt_path, remove_leading_slash(sample_path.split()[1]))
387
+ has_valid_depth = False
388
+ try:
389
+ depth_gt = self.reader.open(depth_path)
390
+ has_valid_depth = True
391
+ except IOError:
392
+ depth_gt = False
393
+ # print('Missing gt for {}'.format(image_path))
394
+
395
+ if has_valid_depth:
396
+ depth_gt = np.asarray(depth_gt, dtype=np.float32)
397
+ depth_gt = np.expand_dims(depth_gt, axis=2)
398
+ if self.config.dataset == 'nyu':
399
+ depth_gt = depth_gt / 1000.0
400
+ else:
401
+ depth_gt = depth_gt / 256.0
402
+
403
+ mask = np.logical_and(
404
+ depth_gt >= self.config.min_depth, depth_gt <= self.config.max_depth).squeeze()[None, ...]
405
+ else:
406
+ mask = False
407
+
408
+ if self.config.do_kb_crop:
409
+ height = image.shape[0]
410
+ width = image.shape[1]
411
+ top_margin = int(height - 352)
412
+ left_margin = int((width - 1216) / 2)
413
+ image = image[top_margin:top_margin + 352,
414
+ left_margin:left_margin + 1216, :]
415
+ if self.mode == 'online_eval' and has_valid_depth:
416
+ depth_gt = depth_gt[top_margin:top_margin +
417
+ 352, left_margin:left_margin + 1216, :]
418
+
419
+ if self.mode == 'online_eval':
420
+ sample = {'image': image, 'depth': depth_gt, 'focal': focal, 'has_valid_depth': has_valid_depth,
421
+ 'image_path': sample_path.split()[0], 'depth_path': sample_path.split()[1],
422
+ 'mask': mask}
423
+ else:
424
+ sample = {'image': image, 'focal': focal}
425
+
426
+ if (self.mode == 'train') or ('has_valid_depth' in sample and sample['has_valid_depth']):
427
+ mask = np.logical_and(depth_gt > self.config.min_depth,
428
+ depth_gt < self.config.max_depth).squeeze()[None, ...]
429
+ sample['mask'] = mask
430
+
431
+ if self.transform:
432
+ sample = self.transform(sample)
433
+
434
+ sample = self.postprocess(sample)
435
+ sample['dataset'] = self.config.dataset
436
+ sample = {**sample, 'image_path': sample_path.split()[0], 'depth_path': sample_path.split()[1]}
437
+
438
+ return sample
439
+
440
+ def rotate_image(self, image, angle, flag=Image.BILINEAR):
441
+ result = image.rotate(angle, resample=flag)
442
+ return result
443
+
444
+ def random_crop(self, img, depth, height, width):
445
+ assert img.shape[0] >= height
446
+ assert img.shape[1] >= width
447
+ assert img.shape[0] == depth.shape[0]
448
+ assert img.shape[1] == depth.shape[1]
449
+ x = random.randint(0, img.shape[1] - width)
450
+ y = random.randint(0, img.shape[0] - height)
451
+ img = img[y:y + height, x:x + width, :]
452
+ depth = depth[y:y + height, x:x + width, :]
453
+
454
+ return img, depth
455
+
456
+ def random_translate(self, img, depth, max_t=20):
457
+ assert img.shape[0] == depth.shape[0]
458
+ assert img.shape[1] == depth.shape[1]
459
+ p = self.config.translate_prob
460
+ do_translate = random.random()
461
+ if do_translate > p:
462
+ return img, depth
463
+ x = random.randint(-max_t, max_t)
464
+ y = random.randint(-max_t, max_t)
465
+ M = np.float32([[1, 0, x], [0, 1, y]])
466
+ # print(img.shape, depth.shape)
467
+ img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
468
+ depth = cv2.warpAffine(depth, M, (depth.shape[1], depth.shape[0]))
469
+ depth = depth.squeeze()[..., None] # add channel dim back. Affine warp removes it
470
+ # print("after", img.shape, depth.shape)
471
+ return img, depth
472
+
473
+ def train_preprocess(self, image, depth_gt):
474
+ if self.config.aug:
475
+ # Random flipping
476
+ do_flip = random.random()
477
+ if do_flip > 0.5:
478
+ image = (image[:, ::-1, :]).copy()
479
+ depth_gt = (depth_gt[:, ::-1, :]).copy()
480
+
481
+ # Random gamma, brightness, color augmentation
482
+ do_augment = random.random()
483
+ if do_augment > 0.5:
484
+ image = self.augment_image(image)
485
+
486
+ return image, depth_gt
487
+
488
+ def augment_image(self, image):
489
+ # gamma augmentation
490
+ gamma = random.uniform(0.9, 1.1)
491
+ image_aug = image ** gamma
492
+
493
+ # brightness augmentation
494
+ if self.config.dataset == 'nyu':
495
+ brightness = random.uniform(0.75, 1.25)
496
+ else:
497
+ brightness = random.uniform(0.9, 1.1)
498
+ image_aug = image_aug * brightness
499
+
500
+ # color augmentation
501
+ colors = np.random.uniform(0.9, 1.1, size=3)
502
+ white = np.ones((image.shape[0], image.shape[1]))
503
+ color_image = np.stack([white * colors[i] for i in range(3)], axis=2)
504
+ image_aug *= color_image
505
+ image_aug = np.clip(image_aug, 0, 1)
506
+
507
+ return image_aug
508
+
509
+ def __len__(self):
510
+ return len(self.filenames)
511
+
512
+
513
+ class ToTensor(object):
514
+ def __init__(self, mode, do_normalize=False, size=None):
515
+ self.mode = mode
516
+ self.normalize = transforms.Normalize(
517
+ mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if do_normalize else nn.Identity()
518
+ self.size = size
519
+ if size is not None:
520
+ self.resize = transforms.Resize(size=size)
521
+ else:
522
+ self.resize = nn.Identity()
523
+
524
+ def __call__(self, sample):
525
+ image, focal = sample['image'], sample['focal']
526
+ image = self.to_tensor(image)
527
+ image = self.normalize(image)
528
+ image = self.resize(image)
529
+
530
+ if self.mode == 'test':
531
+ return {'image': image, 'focal': focal}
532
+
533
+ depth = sample['depth']
534
+ if self.mode == 'train':
535
+ depth = self.to_tensor(depth)
536
+ return {**sample, 'image': image, 'depth': depth, 'focal': focal}
537
+ else:
538
+ has_valid_depth = sample['has_valid_depth']
539
+ image = self.resize(image)
540
+ return {**sample, 'image': image, 'depth': depth, 'focal': focal, 'has_valid_depth': has_valid_depth,
541
+ 'image_path': sample['image_path'], 'depth_path': sample['depth_path']}
542
+
543
+ def to_tensor(self, pic):
544
+ if not (_is_pil_image(pic) or _is_numpy_image(pic)):
545
+ raise TypeError(
546
+ 'pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
547
+
548
+ if isinstance(pic, np.ndarray):
549
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
550
+ return img
551
+
552
+ # handle PIL Image
553
+ if pic.mode == 'I':
554
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
555
+ elif pic.mode == 'I;16':
556
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
557
+ else:
558
+ img = torch.ByteTensor(
559
+ torch.ByteStorage.from_buffer(pic.tobytes()))
560
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
561
+ if pic.mode == 'YCbCr':
562
+ nchannel = 3
563
+ elif pic.mode == 'I;16':
564
+ nchannel = 1
565
+ else:
566
+ nchannel = len(pic.mode)
567
+ img = img.view(pic.size[1], pic.size[0], nchannel)
568
+
569
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
570
+ if isinstance(img, torch.ByteTensor):
571
+ return img.float()
572
+ else:
573
+ return img
ZoeDepth/zoedepth/data/ddad.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import numpy as np
28
+ import torch
29
+ from PIL import Image
30
+ from torch.utils.data import DataLoader, Dataset
31
+ from torchvision import transforms
32
+
33
+
34
+ class ToTensor(object):
35
+ def __init__(self, resize_shape):
36
+ # self.normalize = transforms.Normalize(
37
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38
+ self.normalize = lambda x : x
39
+ self.resize = transforms.Resize(resize_shape)
40
+
41
+ def __call__(self, sample):
42
+ image, depth = sample['image'], sample['depth']
43
+ image = self.to_tensor(image)
44
+ image = self.normalize(image)
45
+ depth = self.to_tensor(depth)
46
+
47
+ image = self.resize(image)
48
+
49
+ return {'image': image, 'depth': depth, 'dataset': "ddad"}
50
+
51
+ def to_tensor(self, pic):
52
+
53
+ if isinstance(pic, np.ndarray):
54
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
55
+ return img
56
+
57
+ # # handle PIL Image
58
+ if pic.mode == 'I':
59
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
60
+ elif pic.mode == 'I;16':
61
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
62
+ else:
63
+ img = torch.ByteTensor(
64
+ torch.ByteStorage.from_buffer(pic.tobytes()))
65
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
66
+ if pic.mode == 'YCbCr':
67
+ nchannel = 3
68
+ elif pic.mode == 'I;16':
69
+ nchannel = 1
70
+ else:
71
+ nchannel = len(pic.mode)
72
+ img = img.view(pic.size[1], pic.size[0], nchannel)
73
+
74
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
75
+
76
+ if isinstance(img, torch.ByteTensor):
77
+ return img.float()
78
+ else:
79
+ return img
80
+
81
+
82
+ class DDAD(Dataset):
83
+ def __init__(self, data_dir_root, resize_shape):
84
+ import glob
85
+
86
+ # image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
87
+ self.image_files = glob.glob(os.path.join(data_dir_root, '*.png'))
88
+ self.depth_files = [r.replace("_rgb.png", "_depth.npy")
89
+ for r in self.image_files]
90
+ self.transform = ToTensor(resize_shape)
91
+
92
+ def __getitem__(self, idx):
93
+
94
+ image_path = self.image_files[idx]
95
+ depth_path = self.depth_files[idx]
96
+
97
+ image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
98
+ depth = np.load(depth_path) # meters
99
+
100
+ # depth[depth > 8] = -1
101
+ depth = depth[..., None]
102
+
103
+ sample = dict(image=image, depth=depth)
104
+ sample = self.transform(sample)
105
+
106
+ if idx == 0:
107
+ print(sample["image"].shape)
108
+
109
+ return sample
110
+
111
+ def __len__(self):
112
+ return len(self.image_files)
113
+
114
+
115
+ def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs):
116
+ dataset = DDAD(data_dir_root, resize_shape)
117
+ return DataLoader(dataset, batch_size, **kwargs)
ZoeDepth/zoedepth/data/diml_indoor_test.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import numpy as np
28
+ import torch
29
+ from PIL import Image
30
+ from torch.utils.data import DataLoader, Dataset
31
+ from torchvision import transforms
32
+
33
+
34
+ class ToTensor(object):
35
+ def __init__(self):
36
+ # self.normalize = transforms.Normalize(
37
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38
+ self.normalize = lambda x : x
39
+ self.resize = transforms.Resize((480, 640))
40
+
41
+ def __call__(self, sample):
42
+ image, depth = sample['image'], sample['depth']
43
+ image = self.to_tensor(image)
44
+ image = self.normalize(image)
45
+ depth = self.to_tensor(depth)
46
+
47
+ image = self.resize(image)
48
+
49
+ return {'image': image, 'depth': depth, 'dataset': "diml_indoor"}
50
+
51
+ def to_tensor(self, pic):
52
+
53
+ if isinstance(pic, np.ndarray):
54
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
55
+ return img
56
+
57
+ # # handle PIL Image
58
+ if pic.mode == 'I':
59
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
60
+ elif pic.mode == 'I;16':
61
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
62
+ else:
63
+ img = torch.ByteTensor(
64
+ torch.ByteStorage.from_buffer(pic.tobytes()))
65
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
66
+ if pic.mode == 'YCbCr':
67
+ nchannel = 3
68
+ elif pic.mode == 'I;16':
69
+ nchannel = 1
70
+ else:
71
+ nchannel = len(pic.mode)
72
+ img = img.view(pic.size[1], pic.size[0], nchannel)
73
+
74
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
75
+ if isinstance(img, torch.ByteTensor):
76
+ return img.float()
77
+ else:
78
+ return img
79
+
80
+
81
+ class DIML_Indoor(Dataset):
82
+ def __init__(self, data_dir_root):
83
+ import glob
84
+
85
+ # image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
86
+ self.image_files = glob.glob(os.path.join(
87
+ data_dir_root, "LR", '*', 'color', '*.png'))
88
+ self.depth_files = [r.replace("color", "depth_filled").replace(
89
+ "_c.png", "_depth_filled.png") for r in self.image_files]
90
+ self.transform = ToTensor()
91
+
92
+ def __getitem__(self, idx):
93
+ image_path = self.image_files[idx]
94
+ depth_path = self.depth_files[idx]
95
+
96
+ image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
97
+ depth = np.asarray(Image.open(depth_path),
98
+ dtype='uint16') / 1000.0 # mm to meters
99
+
100
+ # print(np.shape(image))
101
+ # print(np.shape(depth))
102
+
103
+ # depth[depth > 8] = -1
104
+ depth = depth[..., None]
105
+
106
+ sample = dict(image=image, depth=depth)
107
+
108
+ # return sample
109
+ sample = self.transform(sample)
110
+
111
+ if idx == 0:
112
+ print(sample["image"].shape)
113
+
114
+ return sample
115
+
116
+ def __len__(self):
117
+ return len(self.image_files)
118
+
119
+
120
+ def get_diml_indoor_loader(data_dir_root, batch_size=1, **kwargs):
121
+ dataset = DIML_Indoor(data_dir_root)
122
+ return DataLoader(dataset, batch_size, **kwargs)
123
+
124
+ # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/HR")
125
+ # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/LR")
ZoeDepth/zoedepth/data/diml_outdoor_test.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import numpy as np
28
+ import torch
29
+ from PIL import Image
30
+ from torch.utils.data import DataLoader, Dataset
31
+ from torchvision import transforms
32
+
33
+
34
+ class ToTensor(object):
35
+ def __init__(self):
36
+ # self.normalize = transforms.Normalize(
37
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38
+ self.normalize = lambda x : x
39
+
40
+ def __call__(self, sample):
41
+ image, depth = sample['image'], sample['depth']
42
+ image = self.to_tensor(image)
43
+ image = self.normalize(image)
44
+ depth = self.to_tensor(depth)
45
+
46
+ return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"}
47
+
48
+ def to_tensor(self, pic):
49
+
50
+ if isinstance(pic, np.ndarray):
51
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
52
+ return img
53
+
54
+ # # handle PIL Image
55
+ if pic.mode == 'I':
56
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
57
+ elif pic.mode == 'I;16':
58
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
59
+ else:
60
+ img = torch.ByteTensor(
61
+ torch.ByteStorage.from_buffer(pic.tobytes()))
62
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
63
+ if pic.mode == 'YCbCr':
64
+ nchannel = 3
65
+ elif pic.mode == 'I;16':
66
+ nchannel = 1
67
+ else:
68
+ nchannel = len(pic.mode)
69
+ img = img.view(pic.size[1], pic.size[0], nchannel)
70
+
71
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
72
+ if isinstance(img, torch.ByteTensor):
73
+ return img.float()
74
+ else:
75
+ return img
76
+
77
+
78
+ class DIML_Outdoor(Dataset):
79
+ def __init__(self, data_dir_root):
80
+ import glob
81
+
82
+ # image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
83
+ self.image_files = glob.glob(os.path.join(
84
+ data_dir_root, "*", 'outleft', '*.png'))
85
+ self.depth_files = [r.replace("outleft", "depthmap")
86
+ for r in self.image_files]
87
+ self.transform = ToTensor()
88
+
89
+ def __getitem__(self, idx):
90
+ image_path = self.image_files[idx]
91
+ depth_path = self.depth_files[idx]
92
+
93
+ image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
94
+ depth = np.asarray(Image.open(depth_path),
95
+ dtype='uint16') / 1000.0 # mm to meters
96
+
97
+ # depth[depth > 8] = -1
98
+ depth = depth[..., None]
99
+
100
+ sample = dict(image=image, depth=depth, dataset="diml_outdoor")
101
+
102
+ # return sample
103
+ return self.transform(sample)
104
+
105
+ def __len__(self):
106
+ return len(self.image_files)
107
+
108
+
109
+ def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs):
110
+ dataset = DIML_Outdoor(data_dir_root)
111
+ return DataLoader(dataset, batch_size, **kwargs)
112
+
113
+ # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR")
114
+ # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR")
ZoeDepth/zoedepth/data/diode.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import numpy as np
28
+ import torch
29
+ from PIL import Image
30
+ from torch.utils.data import DataLoader, Dataset
31
+ from torchvision import transforms
32
+
33
+
34
+ class ToTensor(object):
35
+ def __init__(self):
36
+ # self.normalize = transforms.Normalize(
37
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38
+ self.normalize = lambda x : x
39
+ self.resize = transforms.Resize(480)
40
+
41
+ def __call__(self, sample):
42
+ image, depth = sample['image'], sample['depth']
43
+ image = self.to_tensor(image)
44
+ image = self.normalize(image)
45
+ depth = self.to_tensor(depth)
46
+
47
+ image = self.resize(image)
48
+
49
+ return {'image': image, 'depth': depth, 'dataset': "diode"}
50
+
51
+ def to_tensor(self, pic):
52
+
53
+ if isinstance(pic, np.ndarray):
54
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
55
+ return img
56
+
57
+ # # handle PIL Image
58
+ if pic.mode == 'I':
59
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
60
+ elif pic.mode == 'I;16':
61
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
62
+ else:
63
+ img = torch.ByteTensor(
64
+ torch.ByteStorage.from_buffer(pic.tobytes()))
65
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
66
+ if pic.mode == 'YCbCr':
67
+ nchannel = 3
68
+ elif pic.mode == 'I;16':
69
+ nchannel = 1
70
+ else:
71
+ nchannel = len(pic.mode)
72
+ img = img.view(pic.size[1], pic.size[0], nchannel)
73
+
74
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
75
+
76
+ if isinstance(img, torch.ByteTensor):
77
+ return img.float()
78
+ else:
79
+ return img
80
+
81
+
82
+ class DIODE(Dataset):
83
+ def __init__(self, data_dir_root):
84
+ import glob
85
+
86
+ # image paths are of the form <data_dir_root>/scene_#/scan_#/*.png
87
+ self.image_files = glob.glob(
88
+ os.path.join(data_dir_root, '*', '*', '*.png'))
89
+ self.depth_files = [r.replace(".png", "_depth.npy")
90
+ for r in self.image_files]
91
+ self.depth_mask_files = [
92
+ r.replace(".png", "_depth_mask.npy") for r in self.image_files]
93
+ self.transform = ToTensor()
94
+
95
+ def __getitem__(self, idx):
96
+ image_path = self.image_files[idx]
97
+ depth_path = self.depth_files[idx]
98
+ depth_mask_path = self.depth_mask_files[idx]
99
+
100
+ image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
101
+ depth = np.load(depth_path) # in meters
102
+ valid = np.load(depth_mask_path) # binary
103
+
104
+ # depth[depth > 8] = -1
105
+ # depth = depth[..., None]
106
+
107
+ sample = dict(image=image, depth=depth, valid=valid)
108
+
109
+ # return sample
110
+ sample = self.transform(sample)
111
+
112
+ if idx == 0:
113
+ print(sample["image"].shape)
114
+
115
+ return sample
116
+
117
+ def __len__(self):
118
+ return len(self.image_files)
119
+
120
+
121
+ def get_diode_loader(data_dir_root, batch_size=1, **kwargs):
122
+ dataset = DIODE(data_dir_root)
123
+ return DataLoader(dataset, batch_size, **kwargs)
124
+
125
+ # get_diode_loader(data_dir_root="datasets/diode/val/outdoor")
ZoeDepth/zoedepth/data/hypersim.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import glob
26
+ import os
27
+
28
+ import h5py
29
+ import numpy as np
30
+ import torch
31
+ from PIL import Image
32
+ from torch.utils.data import DataLoader, Dataset
33
+ from torchvision import transforms
34
+
35
+
36
+ def hypersim_distance_to_depth(npyDistance):
37
+ intWidth, intHeight, fltFocal = 1024, 768, 886.81
38
+
39
+ npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape(
40
+ 1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None]
41
+ npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5,
42
+ intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None]
43
+ npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32)
44
+ npyImageplane = np.concatenate(
45
+ [npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2)
46
+
47
+ npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal
48
+ return npyDepth
49
+
50
+
51
+ class ToTensor(object):
52
+ def __init__(self):
53
+ # self.normalize = transforms.Normalize(
54
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
55
+ self.normalize = lambda x: x
56
+ self.resize = transforms.Resize((480, 640))
57
+
58
+ def __call__(self, sample):
59
+ image, depth = sample['image'], sample['depth']
60
+ image = self.to_tensor(image)
61
+ image = self.normalize(image)
62
+ depth = self.to_tensor(depth)
63
+
64
+ image = self.resize(image)
65
+
66
+ return {'image': image, 'depth': depth, 'dataset': "hypersim"}
67
+
68
+ def to_tensor(self, pic):
69
+
70
+ if isinstance(pic, np.ndarray):
71
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
72
+ return img
73
+
74
+ # # handle PIL Image
75
+ if pic.mode == 'I':
76
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
77
+ elif pic.mode == 'I;16':
78
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
79
+ else:
80
+ img = torch.ByteTensor(
81
+ torch.ByteStorage.from_buffer(pic.tobytes()))
82
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
83
+ if pic.mode == 'YCbCr':
84
+ nchannel = 3
85
+ elif pic.mode == 'I;16':
86
+ nchannel = 1
87
+ else:
88
+ nchannel = len(pic.mode)
89
+ img = img.view(pic.size[1], pic.size[0], nchannel)
90
+
91
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
92
+ if isinstance(img, torch.ByteTensor):
93
+ return img.float()
94
+ else:
95
+ return img
96
+
97
+
98
+ class HyperSim(Dataset):
99
+ def __init__(self, data_dir_root):
100
+ # image paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.tonemap.jpg
101
+ # depth paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.depth_meters.hdf5
102
+ self.image_files = glob.glob(os.path.join(
103
+ data_dir_root, '*', 'images', 'scene_cam_*_final_preview', '*.tonemap.jpg'))
104
+ self.depth_files = [r.replace("_final_preview", "_geometry_hdf5").replace(
105
+ ".tonemap.jpg", ".depth_meters.hdf5") for r in self.image_files]
106
+ self.transform = ToTensor()
107
+
108
+ def __getitem__(self, idx):
109
+ image_path = self.image_files[idx]
110
+ depth_path = self.depth_files[idx]
111
+
112
+ image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
113
+
114
+ # depth from hdf5
115
+ depth_fd = h5py.File(depth_path, "r")
116
+ # in meters (Euclidean distance)
117
+ distance_meters = np.array(depth_fd['dataset'])
118
+ depth = hypersim_distance_to_depth(
119
+ distance_meters) # in meters (planar depth)
120
+
121
+ # depth[depth > 8] = -1
122
+ depth = depth[..., None]
123
+
124
+ sample = dict(image=image, depth=depth)
125
+ sample = self.transform(sample)
126
+
127
+ if idx == 0:
128
+ print(sample["image"].shape)
129
+
130
+ return sample
131
+
132
+ def __len__(self):
133
+ return len(self.image_files)
134
+
135
+
136
+ def get_hypersim_loader(data_dir_root, batch_size=1, **kwargs):
137
+ dataset = HyperSim(data_dir_root)
138
+ return DataLoader(dataset, batch_size, **kwargs)
ZoeDepth/zoedepth/data/ibims.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import numpy as np
28
+ import torch
29
+ from PIL import Image
30
+ from torch.utils.data import DataLoader, Dataset
31
+ from torchvision import transforms as T
32
+
33
+
34
+ class iBims(Dataset):
35
+ def __init__(self, config):
36
+ root_folder = config.ibims_root
37
+ with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f:
38
+ imglist = f.read().split()
39
+
40
+ samples = []
41
+ for basename in imglist:
42
+ img_path = os.path.join(root_folder, 'rgb', basename + ".png")
43
+ depth_path = os.path.join(root_folder, 'depth', basename + ".png")
44
+ valid_mask_path = os.path.join(
45
+ root_folder, 'mask_invalid', basename+".png")
46
+ transp_mask_path = os.path.join(
47
+ root_folder, 'mask_transp', basename+".png")
48
+
49
+ samples.append(
50
+ (img_path, depth_path, valid_mask_path, transp_mask_path))
51
+
52
+ self.samples = samples
53
+ # self.normalize = T.Normalize(
54
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
55
+ self.normalize = lambda x : x
56
+
57
+ def __getitem__(self, idx):
58
+ img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx]
59
+
60
+ img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0
61
+ depth = np.asarray(Image.open(depth_path),
62
+ dtype=np.uint16).astype('float')*50.0/65535
63
+
64
+ mask_valid = np.asarray(Image.open(valid_mask_path))
65
+ mask_transp = np.asarray(Image.open(transp_mask_path))
66
+
67
+ # depth = depth * mask_valid * mask_transp
68
+ depth = np.where(mask_valid * mask_transp, depth, -1)
69
+
70
+ img = torch.from_numpy(img).permute(2, 0, 1)
71
+ img = self.normalize(img)
72
+ depth = torch.from_numpy(depth).unsqueeze(0)
73
+ return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims')
74
+
75
+ def __len__(self):
76
+ return len(self.samples)
77
+
78
+
79
+ def get_ibims_loader(config, batch_size=1, **kwargs):
80
+ dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs)
81
+ return dataloader
ZoeDepth/zoedepth/data/preprocess.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import numpy as np
26
+ from dataclasses import dataclass
27
+ from typing import Tuple, List
28
+
29
+ # dataclass to store the crop parameters
30
+ @dataclass
31
+ class CropParams:
32
+ top: int
33
+ bottom: int
34
+ left: int
35
+ right: int
36
+
37
+
38
+
39
+ def get_border_params(rgb_image, tolerance=0.1, cut_off=20, value=0, level_diff_threshold=5, channel_axis=-1, min_border=5) -> CropParams:
40
+ gray_image = np.mean(rgb_image, axis=channel_axis)
41
+ h, w = gray_image.shape
42
+
43
+
44
+ def num_value_pixels(arr):
45
+ return np.sum(np.abs(arr - value) < level_diff_threshold)
46
+
47
+ def is_above_tolerance(arr, total_pixels):
48
+ return (num_value_pixels(arr) / total_pixels) > tolerance
49
+
50
+ # Crop top border until number of value pixels become below tolerance
51
+ top = min_border
52
+ while is_above_tolerance(gray_image[top, :], w) and top < h-1:
53
+ top += 1
54
+ if top > cut_off:
55
+ break
56
+
57
+ # Crop bottom border until number of value pixels become below tolerance
58
+ bottom = h - min_border
59
+ while is_above_tolerance(gray_image[bottom, :], w) and bottom > 0:
60
+ bottom -= 1
61
+ if h - bottom > cut_off:
62
+ break
63
+
64
+ # Crop left border until number of value pixels become below tolerance
65
+ left = min_border
66
+ while is_above_tolerance(gray_image[:, left], h) and left < w-1:
67
+ left += 1
68
+ if left > cut_off:
69
+ break
70
+
71
+ # Crop right border until number of value pixels become below tolerance
72
+ right = w - min_border
73
+ while is_above_tolerance(gray_image[:, right], h) and right > 0:
74
+ right -= 1
75
+ if w - right > cut_off:
76
+ break
77
+
78
+
79
+ return CropParams(top, bottom, left, right)
80
+
81
+
82
+ def get_white_border(rgb_image, value=255, **kwargs) -> CropParams:
83
+ """Crops the white border of the RGB.
84
+
85
+ Args:
86
+ rgb: RGB image, shape (H, W, 3).
87
+ Returns:
88
+ Crop parameters.
89
+ """
90
+ if value == 255:
91
+ # assert range of values in rgb image is [0, 255]
92
+ assert np.max(rgb_image) <= 255 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 255]."
93
+ assert rgb_image.max() > 1, "RGB image values are not in range [0, 255]."
94
+ elif value == 1:
95
+ # assert range of values in rgb image is [0, 1]
96
+ assert np.max(rgb_image) <= 1 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 1]."
97
+
98
+ return get_border_params(rgb_image, value=value, **kwargs)
99
+
100
+ def get_black_border(rgb_image, **kwargs) -> CropParams:
101
+ """Crops the black border of the RGB.
102
+
103
+ Args:
104
+ rgb: RGB image, shape (H, W, 3).
105
+
106
+ Returns:
107
+ Crop parameters.
108
+ """
109
+
110
+ return get_border_params(rgb_image, value=0, **kwargs)
111
+
112
+ def crop_image(image: np.ndarray, crop_params: CropParams) -> np.ndarray:
113
+ """Crops the image according to the crop parameters.
114
+
115
+ Args:
116
+ image: RGB or depth image, shape (H, W, 3) or (H, W).
117
+ crop_params: Crop parameters.
118
+
119
+ Returns:
120
+ Cropped image.
121
+ """
122
+ return image[crop_params.top:crop_params.bottom, crop_params.left:crop_params.right]
123
+
124
+ def crop_images(*images: np.ndarray, crop_params: CropParams) -> Tuple[np.ndarray]:
125
+ """Crops the images according to the crop parameters.
126
+
127
+ Args:
128
+ images: RGB or depth images, shape (H, W, 3) or (H, W).
129
+ crop_params: Crop parameters.
130
+
131
+ Returns:
132
+ Cropped images.
133
+ """
134
+ return tuple(crop_image(image, crop_params) for image in images)
135
+
136
+ def crop_black_or_white_border(rgb_image, *other_images: np.ndarray, tolerance=0.1, cut_off=20, level_diff_threshold=5) -> Tuple[np.ndarray]:
137
+ """Crops the white and black border of the RGB and depth images.
138
+
139
+ Args:
140
+ rgb: RGB image, shape (H, W, 3). This image is used to determine the border.
141
+ other_images: The other images to crop according to the border of the RGB image.
142
+ Returns:
143
+ Cropped RGB and other images.
144
+ """
145
+ # crop black border
146
+ crop_params = get_black_border(rgb_image, tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold)
147
+ cropped_images = crop_images(rgb_image, *other_images, crop_params=crop_params)
148
+
149
+ # crop white border
150
+ crop_params = get_white_border(cropped_images[0], tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold)
151
+ cropped_images = crop_images(*cropped_images, crop_params=crop_params)
152
+
153
+ return cropped_images
154
+
ZoeDepth/zoedepth/data/sun_rgbd_loader.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import numpy as np
28
+ import torch
29
+ from PIL import Image
30
+ from torch.utils.data import DataLoader, Dataset
31
+ from torchvision import transforms
32
+
33
+
34
+ class ToTensor(object):
35
+ def __init__(self):
36
+ # self.normalize = transforms.Normalize(
37
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38
+ self.normalize = lambda x : x
39
+
40
+ def __call__(self, sample):
41
+ image, depth = sample['image'], sample['depth']
42
+ image = self.to_tensor(image)
43
+ image = self.normalize(image)
44
+ depth = self.to_tensor(depth)
45
+
46
+ return {'image': image, 'depth': depth, 'dataset': "sunrgbd"}
47
+
48
+ def to_tensor(self, pic):
49
+
50
+ if isinstance(pic, np.ndarray):
51
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
52
+ return img
53
+
54
+ # # handle PIL Image
55
+ if pic.mode == 'I':
56
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
57
+ elif pic.mode == 'I;16':
58
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
59
+ else:
60
+ img = torch.ByteTensor(
61
+ torch.ByteStorage.from_buffer(pic.tobytes()))
62
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
63
+ if pic.mode == 'YCbCr':
64
+ nchannel = 3
65
+ elif pic.mode == 'I;16':
66
+ nchannel = 1
67
+ else:
68
+ nchannel = len(pic.mode)
69
+ img = img.view(pic.size[1], pic.size[0], nchannel)
70
+
71
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
72
+ if isinstance(img, torch.ByteTensor):
73
+ return img.float()
74
+ else:
75
+ return img
76
+
77
+
78
+ class SunRGBD(Dataset):
79
+ def __init__(self, data_dir_root):
80
+ # test_file_dirs = loadmat(train_test_file)['alltest'].squeeze()
81
+ # all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs]
82
+ # self.all_test = [os.path.join(data_dir_root, t) for t in all_test]
83
+ import glob
84
+ self.image_files = glob.glob(
85
+ os.path.join(data_dir_root, 'rgb', 'rgb', '*'))
86
+ self.depth_files = [
87
+ r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files]
88
+ self.transform = ToTensor()
89
+
90
+ def __getitem__(self, idx):
91
+ image_path = self.image_files[idx]
92
+ depth_path = self.depth_files[idx]
93
+
94
+ image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
95
+ depth = np.asarray(Image.open(depth_path), dtype='uint16') / 1000.0
96
+ depth[depth > 8] = -1
97
+ depth = depth[..., None]
98
+ return self.transform(dict(image=image, depth=depth))
99
+
100
+ def __len__(self):
101
+ return len(self.image_files)
102
+
103
+
104
+ def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs):
105
+ dataset = SunRGBD(data_dir_root)
106
+ return DataLoader(dataset, batch_size, **kwargs)
ZoeDepth/zoedepth/data/transforms.py ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import math
26
+ import random
27
+
28
+ import cv2
29
+ import numpy as np
30
+
31
+
32
+ class RandomFliplr(object):
33
+ """Horizontal flip of the sample with given probability.
34
+ """
35
+
36
+ def __init__(self, probability=0.5):
37
+ """Init.
38
+
39
+ Args:
40
+ probability (float, optional): Flip probability. Defaults to 0.5.
41
+ """
42
+ self.__probability = probability
43
+
44
+ def __call__(self, sample):
45
+ prob = random.random()
46
+
47
+ if prob < self.__probability:
48
+ for k, v in sample.items():
49
+ if len(v.shape) >= 2:
50
+ sample[k] = np.fliplr(v).copy()
51
+
52
+ return sample
53
+
54
+
55
+ def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
56
+ """Rezise the sample to ensure the given size. Keeps aspect ratio.
57
+
58
+ Args:
59
+ sample (dict): sample
60
+ size (tuple): image size
61
+
62
+ Returns:
63
+ tuple: new size
64
+ """
65
+ shape = list(sample["disparity"].shape)
66
+
67
+ if shape[0] >= size[0] and shape[1] >= size[1]:
68
+ return sample
69
+
70
+ scale = [0, 0]
71
+ scale[0] = size[0] / shape[0]
72
+ scale[1] = size[1] / shape[1]
73
+
74
+ scale = max(scale)
75
+
76
+ shape[0] = math.ceil(scale * shape[0])
77
+ shape[1] = math.ceil(scale * shape[1])
78
+
79
+ # resize
80
+ sample["image"] = cv2.resize(
81
+ sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
82
+ )
83
+
84
+ sample["disparity"] = cv2.resize(
85
+ sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
86
+ )
87
+ sample["mask"] = cv2.resize(
88
+ sample["mask"].astype(np.float32),
89
+ tuple(shape[::-1]),
90
+ interpolation=cv2.INTER_NEAREST,
91
+ )
92
+ sample["mask"] = sample["mask"].astype(bool)
93
+
94
+ return tuple(shape)
95
+
96
+
97
+ class RandomCrop(object):
98
+ """Get a random crop of the sample with the given size (width, height).
99
+ """
100
+
101
+ def __init__(
102
+ self,
103
+ width,
104
+ height,
105
+ resize_if_needed=False,
106
+ image_interpolation_method=cv2.INTER_AREA,
107
+ ):
108
+ """Init.
109
+
110
+ Args:
111
+ width (int): output width
112
+ height (int): output height
113
+ resize_if_needed (bool, optional): If True, sample might be upsampled to ensure
114
+ that a crop of size (width, height) is possbile. Defaults to False.
115
+ """
116
+ self.__size = (height, width)
117
+ self.__resize_if_needed = resize_if_needed
118
+ self.__image_interpolation_method = image_interpolation_method
119
+
120
+ def __call__(self, sample):
121
+
122
+ shape = sample["disparity"].shape
123
+
124
+ if self.__size[0] > shape[0] or self.__size[1] > shape[1]:
125
+ if self.__resize_if_needed:
126
+ shape = apply_min_size(
127
+ sample, self.__size, self.__image_interpolation_method
128
+ )
129
+ else:
130
+ raise Exception(
131
+ "Output size {} bigger than input size {}.".format(
132
+ self.__size, shape
133
+ )
134
+ )
135
+
136
+ offset = (
137
+ np.random.randint(shape[0] - self.__size[0] + 1),
138
+ np.random.randint(shape[1] - self.__size[1] + 1),
139
+ )
140
+
141
+ for k, v in sample.items():
142
+ if k == "code" or k == "basis":
143
+ continue
144
+
145
+ if len(sample[k].shape) >= 2:
146
+ sample[k] = v[
147
+ offset[0]: offset[0] + self.__size[0],
148
+ offset[1]: offset[1] + self.__size[1],
149
+ ]
150
+
151
+ return sample
152
+
153
+
154
+ class Resize(object):
155
+ """Resize sample to given size (width, height).
156
+ """
157
+
158
+ def __init__(
159
+ self,
160
+ width,
161
+ height,
162
+ resize_target=True,
163
+ keep_aspect_ratio=False,
164
+ ensure_multiple_of=1,
165
+ resize_method="lower_bound",
166
+ image_interpolation_method=cv2.INTER_AREA,
167
+ letter_box=False,
168
+ ):
169
+ """Init.
170
+
171
+ Args:
172
+ width (int): desired output width
173
+ height (int): desired output height
174
+ resize_target (bool, optional):
175
+ True: Resize the full sample (image, mask, target).
176
+ False: Resize image only.
177
+ Defaults to True.
178
+ keep_aspect_ratio (bool, optional):
179
+ True: Keep the aspect ratio of the input sample.
180
+ Output sample might not have the given width and height, and
181
+ resize behaviour depends on the parameter 'resize_method'.
182
+ Defaults to False.
183
+ ensure_multiple_of (int, optional):
184
+ Output width and height is constrained to be multiple of this parameter.
185
+ Defaults to 1.
186
+ resize_method (str, optional):
187
+ "lower_bound": Output will be at least as large as the given size.
188
+ "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
189
+ "minimal": Scale as least as possible. (Output size might be smaller than given size.)
190
+ Defaults to "lower_bound".
191
+ """
192
+ self.__width = width
193
+ self.__height = height
194
+
195
+ self.__resize_target = resize_target
196
+ self.__keep_aspect_ratio = keep_aspect_ratio
197
+ self.__multiple_of = ensure_multiple_of
198
+ self.__resize_method = resize_method
199
+ self.__image_interpolation_method = image_interpolation_method
200
+ self.__letter_box = letter_box
201
+
202
+ def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
203
+ y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
204
+
205
+ if max_val is not None and y > max_val:
206
+ y = (np.floor(x / self.__multiple_of)
207
+ * self.__multiple_of).astype(int)
208
+
209
+ if y < min_val:
210
+ y = (np.ceil(x / self.__multiple_of)
211
+ * self.__multiple_of).astype(int)
212
+
213
+ return y
214
+
215
+ def get_size(self, width, height):
216
+ # determine new height and width
217
+ scale_height = self.__height / height
218
+ scale_width = self.__width / width
219
+
220
+ if self.__keep_aspect_ratio:
221
+ if self.__resize_method == "lower_bound":
222
+ # scale such that output size is lower bound
223
+ if scale_width > scale_height:
224
+ # fit width
225
+ scale_height = scale_width
226
+ else:
227
+ # fit height
228
+ scale_width = scale_height
229
+ elif self.__resize_method == "upper_bound":
230
+ # scale such that output size is upper bound
231
+ if scale_width < scale_height:
232
+ # fit width
233
+ scale_height = scale_width
234
+ else:
235
+ # fit height
236
+ scale_width = scale_height
237
+ elif self.__resize_method == "minimal":
238
+ # scale as least as possbile
239
+ if abs(1 - scale_width) < abs(1 - scale_height):
240
+ # fit width
241
+ scale_height = scale_width
242
+ else:
243
+ # fit height
244
+ scale_width = scale_height
245
+ else:
246
+ raise ValueError(
247
+ f"resize_method {self.__resize_method} not implemented"
248
+ )
249
+
250
+ if self.__resize_method == "lower_bound":
251
+ new_height = self.constrain_to_multiple_of(
252
+ scale_height * height, min_val=self.__height
253
+ )
254
+ new_width = self.constrain_to_multiple_of(
255
+ scale_width * width, min_val=self.__width
256
+ )
257
+ elif self.__resize_method == "upper_bound":
258
+ new_height = self.constrain_to_multiple_of(
259
+ scale_height * height, max_val=self.__height
260
+ )
261
+ new_width = self.constrain_to_multiple_of(
262
+ scale_width * width, max_val=self.__width
263
+ )
264
+ elif self.__resize_method == "minimal":
265
+ new_height = self.constrain_to_multiple_of(scale_height * height)
266
+ new_width = self.constrain_to_multiple_of(scale_width * width)
267
+ else:
268
+ raise ValueError(
269
+ f"resize_method {self.__resize_method} not implemented")
270
+
271
+ return (new_width, new_height)
272
+
273
+ def make_letter_box(self, sample):
274
+ top = bottom = (self.__height - sample.shape[0]) // 2
275
+ left = right = (self.__width - sample.shape[1]) // 2
276
+ sample = cv2.copyMakeBorder(
277
+ sample, top, bottom, left, right, cv2.BORDER_CONSTANT, None, 0)
278
+ return sample
279
+
280
+ def __call__(self, sample):
281
+ width, height = self.get_size(
282
+ sample["image"].shape[1], sample["image"].shape[0]
283
+ )
284
+
285
+ # resize sample
286
+ sample["image"] = cv2.resize(
287
+ sample["image"],
288
+ (width, height),
289
+ interpolation=self.__image_interpolation_method,
290
+ )
291
+
292
+ if self.__letter_box:
293
+ sample["image"] = self.make_letter_box(sample["image"])
294
+
295
+ if self.__resize_target:
296
+ if "disparity" in sample:
297
+ sample["disparity"] = cv2.resize(
298
+ sample["disparity"],
299
+ (width, height),
300
+ interpolation=cv2.INTER_NEAREST,
301
+ )
302
+
303
+ if self.__letter_box:
304
+ sample["disparity"] = self.make_letter_box(
305
+ sample["disparity"])
306
+
307
+ if "depth" in sample:
308
+ sample["depth"] = cv2.resize(
309
+ sample["depth"], (width,
310
+ height), interpolation=cv2.INTER_NEAREST
311
+ )
312
+
313
+ if self.__letter_box:
314
+ sample["depth"] = self.make_letter_box(sample["depth"])
315
+
316
+ sample["mask"] = cv2.resize(
317
+ sample["mask"].astype(np.float32),
318
+ (width, height),
319
+ interpolation=cv2.INTER_NEAREST,
320
+ )
321
+
322
+ if self.__letter_box:
323
+ sample["mask"] = self.make_letter_box(sample["mask"])
324
+
325
+ sample["mask"] = sample["mask"].astype(bool)
326
+
327
+ return sample
328
+
329
+
330
+ class ResizeFixed(object):
331
+ def __init__(self, size):
332
+ self.__size = size
333
+
334
+ def __call__(self, sample):
335
+ sample["image"] = cv2.resize(
336
+ sample["image"], self.__size[::-1], interpolation=cv2.INTER_LINEAR
337
+ )
338
+
339
+ sample["disparity"] = cv2.resize(
340
+ sample["disparity"], self.__size[::-
341
+ 1], interpolation=cv2.INTER_NEAREST
342
+ )
343
+
344
+ sample["mask"] = cv2.resize(
345
+ sample["mask"].astype(np.float32),
346
+ self.__size[::-1],
347
+ interpolation=cv2.INTER_NEAREST,
348
+ )
349
+ sample["mask"] = sample["mask"].astype(bool)
350
+
351
+ return sample
352
+
353
+
354
+ class Rescale(object):
355
+ """Rescale target values to the interval [0, max_val].
356
+ If input is constant, values are set to max_val / 2.
357
+ """
358
+
359
+ def __init__(self, max_val=1.0, use_mask=True):
360
+ """Init.
361
+
362
+ Args:
363
+ max_val (float, optional): Max output value. Defaults to 1.0.
364
+ use_mask (bool, optional): Only operate on valid pixels (mask == True). Defaults to True.
365
+ """
366
+ self.__max_val = max_val
367
+ self.__use_mask = use_mask
368
+
369
+ def __call__(self, sample):
370
+ disp = sample["disparity"]
371
+
372
+ if self.__use_mask:
373
+ mask = sample["mask"]
374
+ else:
375
+ mask = np.ones_like(disp, dtype=np.bool)
376
+
377
+ if np.sum(mask) == 0:
378
+ return sample
379
+
380
+ min_val = np.min(disp[mask])
381
+ max_val = np.max(disp[mask])
382
+
383
+ if max_val > min_val:
384
+ sample["disparity"][mask] = (
385
+ (disp[mask] - min_val) / (max_val - min_val) * self.__max_val
386
+ )
387
+ else:
388
+ sample["disparity"][mask] = np.ones_like(
389
+ disp[mask]) * self.__max_val / 2.0
390
+
391
+ return sample
392
+
393
+
394
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
395
+ class NormalizeImage(object):
396
+ """Normlize image by given mean and std.
397
+ """
398
+
399
+ def __init__(self, mean, std):
400
+ self.__mean = mean
401
+ self.__std = std
402
+
403
+ def __call__(self, sample):
404
+ sample["image"] = (sample["image"] - self.__mean) / self.__std
405
+
406
+ return sample
407
+
408
+
409
+ class DepthToDisparity(object):
410
+ """Convert depth to disparity. Removes depth from sample.
411
+ """
412
+
413
+ def __init__(self, eps=1e-4):
414
+ self.__eps = eps
415
+
416
+ def __call__(self, sample):
417
+ assert "depth" in sample
418
+
419
+ sample["mask"][sample["depth"] < self.__eps] = False
420
+
421
+ sample["disparity"] = np.zeros_like(sample["depth"])
422
+ sample["disparity"][sample["depth"] >= self.__eps] = (
423
+ 1.0 / sample["depth"][sample["depth"] >= self.__eps]
424
+ )
425
+
426
+ del sample["depth"]
427
+
428
+ return sample
429
+
430
+
431
+ class DisparityToDepth(object):
432
+ """Convert disparity to depth. Removes disparity from sample.
433
+ """
434
+
435
+ def __init__(self, eps=1e-4):
436
+ self.__eps = eps
437
+
438
+ def __call__(self, sample):
439
+ assert "disparity" in sample
440
+
441
+ disp = np.abs(sample["disparity"])
442
+ sample["mask"][disp < self.__eps] = False
443
+
444
+ # print(sample["disparity"])
445
+ # print(sample["mask"].sum())
446
+ # exit()
447
+
448
+ sample["depth"] = np.zeros_like(disp)
449
+ sample["depth"][disp >= self.__eps] = (
450
+ 1.0 / disp[disp >= self.__eps]
451
+ )
452
+
453
+ del sample["disparity"]
454
+
455
+ return sample
456
+
457
+
458
+ class PrepareForNet(object):
459
+ """Prepare sample for usage as network input.
460
+ """
461
+
462
+ def __init__(self):
463
+ pass
464
+
465
+ def __call__(self, sample):
466
+ image = np.transpose(sample["image"], (2, 0, 1))
467
+ sample["image"] = np.ascontiguousarray(image).astype(np.float32)
468
+
469
+ if "mask" in sample:
470
+ sample["mask"] = sample["mask"].astype(np.float32)
471
+ sample["mask"] = np.ascontiguousarray(sample["mask"])
472
+
473
+ if "disparity" in sample:
474
+ disparity = sample["disparity"].astype(np.float32)
475
+ sample["disparity"] = np.ascontiguousarray(disparity)
476
+
477
+ if "depth" in sample:
478
+ depth = sample["depth"].astype(np.float32)
479
+ sample["depth"] = np.ascontiguousarray(depth)
480
+
481
+ return sample
ZoeDepth/zoedepth/data/vkitti.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ from torch.utils.data import Dataset, DataLoader
27
+ from torchvision import transforms
28
+ import os
29
+
30
+ from PIL import Image
31
+ import numpy as np
32
+ import cv2
33
+
34
+
35
+ class ToTensor(object):
36
+ def __init__(self):
37
+ self.normalize = transforms.Normalize(
38
+ mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
39
+ # self.resize = transforms.Resize((375, 1242))
40
+
41
+ def __call__(self, sample):
42
+ image, depth = sample['image'], sample['depth']
43
+
44
+ image = self.to_tensor(image)
45
+ image = self.normalize(image)
46
+ depth = self.to_tensor(depth)
47
+
48
+ # image = self.resize(image)
49
+
50
+ return {'image': image, 'depth': depth, 'dataset': "vkitti"}
51
+
52
+ def to_tensor(self, pic):
53
+
54
+ if isinstance(pic, np.ndarray):
55
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
56
+ return img
57
+
58
+ # # handle PIL Image
59
+ if pic.mode == 'I':
60
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
61
+ elif pic.mode == 'I;16':
62
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
63
+ else:
64
+ img = torch.ByteTensor(
65
+ torch.ByteStorage.from_buffer(pic.tobytes()))
66
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
67
+ if pic.mode == 'YCbCr':
68
+ nchannel = 3
69
+ elif pic.mode == 'I;16':
70
+ nchannel = 1
71
+ else:
72
+ nchannel = len(pic.mode)
73
+ img = img.view(pic.size[1], pic.size[0], nchannel)
74
+
75
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
76
+ if isinstance(img, torch.ByteTensor):
77
+ return img.float()
78
+ else:
79
+ return img
80
+
81
+
82
+ class VKITTI(Dataset):
83
+ def __init__(self, data_dir_root, do_kb_crop=True):
84
+ import glob
85
+ # image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
86
+ self.image_files = glob.glob(os.path.join(
87
+ data_dir_root, "test_color", '*.png'))
88
+ self.depth_files = [r.replace("test_color", "test_depth")
89
+ for r in self.image_files]
90
+ self.do_kb_crop = True
91
+ self.transform = ToTensor()
92
+
93
+ def __getitem__(self, idx):
94
+ image_path = self.image_files[idx]
95
+ depth_path = self.depth_files[idx]
96
+
97
+ image = Image.open(image_path)
98
+ depth = Image.open(depth_path)
99
+ depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR |
100
+ cv2.IMREAD_ANYDEPTH)
101
+ print("dpeth min max", depth.min(), depth.max())
102
+
103
+ # print(np.shape(image))
104
+ # print(np.shape(depth))
105
+
106
+ # depth[depth > 8] = -1
107
+
108
+ if self.do_kb_crop and False:
109
+ height = image.height
110
+ width = image.width
111
+ top_margin = int(height - 352)
112
+ left_margin = int((width - 1216) / 2)
113
+ depth = depth.crop(
114
+ (left_margin, top_margin, left_margin + 1216, top_margin + 352))
115
+ image = image.crop(
116
+ (left_margin, top_margin, left_margin + 1216, top_margin + 352))
117
+ # uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216]
118
+
119
+ image = np.asarray(image, dtype=np.float32) / 255.0
120
+ # depth = np.asarray(depth, dtype=np.uint16) /1.
121
+ depth = depth[..., None]
122
+ sample = dict(image=image, depth=depth)
123
+
124
+ # return sample
125
+ sample = self.transform(sample)
126
+
127
+ if idx == 0:
128
+ print(sample["image"].shape)
129
+
130
+ return sample
131
+
132
+ def __len__(self):
133
+ return len(self.image_files)
134
+
135
+
136
+ def get_vkitti_loader(data_dir_root, batch_size=1, **kwargs):
137
+ dataset = VKITTI(data_dir_root)
138
+ return DataLoader(dataset, batch_size, **kwargs)
139
+
140
+
141
+ if __name__ == "__main__":
142
+ loader = get_vkitti_loader(
143
+ data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti_test")
144
+ print("Total files", len(loader.dataset))
145
+ for i, sample in enumerate(loader):
146
+ print(sample["image"].shape)
147
+ print(sample["depth"].shape)
148
+ print(sample["dataset"])
149
+ print(sample['depth'].min(), sample['depth'].max())
150
+ if i > 5:
151
+ break
ZoeDepth/zoedepth/data/vkitti2.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import os
26
+
27
+ import cv2
28
+ import numpy as np
29
+ import torch
30
+ from PIL import Image
31
+ from torch.utils.data import DataLoader, Dataset
32
+ from torchvision import transforms
33
+
34
+
35
+ class ToTensor(object):
36
+ def __init__(self):
37
+ # self.normalize = transforms.Normalize(
38
+ # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
39
+ self.normalize = lambda x: x
40
+ # self.resize = transforms.Resize((375, 1242))
41
+
42
+ def __call__(self, sample):
43
+ image, depth = sample['image'], sample['depth']
44
+
45
+ image = self.to_tensor(image)
46
+ image = self.normalize(image)
47
+ depth = self.to_tensor(depth)
48
+
49
+ # image = self.resize(image)
50
+
51
+ return {'image': image, 'depth': depth, 'dataset': "vkitti"}
52
+
53
+ def to_tensor(self, pic):
54
+
55
+ if isinstance(pic, np.ndarray):
56
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
57
+ return img
58
+
59
+ # # handle PIL Image
60
+ if pic.mode == 'I':
61
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
62
+ elif pic.mode == 'I;16':
63
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
64
+ else:
65
+ img = torch.ByteTensor(
66
+ torch.ByteStorage.from_buffer(pic.tobytes()))
67
+ # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
68
+ if pic.mode == 'YCbCr':
69
+ nchannel = 3
70
+ elif pic.mode == 'I;16':
71
+ nchannel = 1
72
+ else:
73
+ nchannel = len(pic.mode)
74
+ img = img.view(pic.size[1], pic.size[0], nchannel)
75
+
76
+ img = img.transpose(0, 1).transpose(0, 2).contiguous()
77
+ if isinstance(img, torch.ByteTensor):
78
+ return img.float()
79
+ else:
80
+ return img
81
+
82
+
83
+ class VKITTI2(Dataset):
84
+ def __init__(self, data_dir_root, do_kb_crop=True, split="test"):
85
+ import glob
86
+
87
+ # image paths are of the form <data_dir_root>/rgb/<scene>/<variant>/frames/<rgb,depth>/Camera<0,1>/rgb_{}.jpg
88
+ self.image_files = glob.glob(os.path.join(
89
+ data_dir_root, "rgb", "**", "frames", "rgb", "Camera_0", '*.jpg'), recursive=True)
90
+ self.depth_files = [r.replace("/rgb/", "/depth/").replace(
91
+ "rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files]
92
+ self.do_kb_crop = True
93
+ self.transform = ToTensor()
94
+
95
+ # If train test split is not created, then create one.
96
+ # Split is such that 8% of the frames from each scene are used for testing.
97
+ if not os.path.exists(os.path.join(data_dir_root, "train.txt")):
98
+ import random
99
+ scenes = set([os.path.basename(os.path.dirname(
100
+ os.path.dirname(os.path.dirname(f)))) for f in self.image_files])
101
+ train_files = []
102
+ test_files = []
103
+ for scene in scenes:
104
+ scene_files = [f for f in self.image_files if os.path.basename(
105
+ os.path.dirname(os.path.dirname(os.path.dirname(f)))) == scene]
106
+ random.shuffle(scene_files)
107
+ train_files.extend(scene_files[:int(len(scene_files) * 0.92)])
108
+ test_files.extend(scene_files[int(len(scene_files) * 0.92):])
109
+ with open(os.path.join(data_dir_root, "train.txt"), "w") as f:
110
+ f.write("\n".join(train_files))
111
+ with open(os.path.join(data_dir_root, "test.txt"), "w") as f:
112
+ f.write("\n".join(test_files))
113
+
114
+ if split == "train":
115
+ with open(os.path.join(data_dir_root, "train.txt"), "r") as f:
116
+ self.image_files = f.read().splitlines()
117
+ self.depth_files = [r.replace("/rgb/", "/depth/").replace(
118
+ "rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files]
119
+ elif split == "test":
120
+ with open(os.path.join(data_dir_root, "test.txt"), "r") as f:
121
+ self.image_files = f.read().splitlines()
122
+ self.depth_files = [r.replace("/rgb/", "/depth/").replace(
123
+ "rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files]
124
+
125
+ def __getitem__(self, idx):
126
+ image_path = self.image_files[idx]
127
+ depth_path = self.depth_files[idx]
128
+
129
+ image = Image.open(image_path)
130
+ # depth = Image.open(depth_path)
131
+ depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR |
132
+ cv2.IMREAD_ANYDEPTH) / 100.0 # cm to m
133
+ depth = Image.fromarray(depth)
134
+ # print("dpeth min max", depth.min(), depth.max())
135
+
136
+ # print(np.shape(image))
137
+ # print(np.shape(depth))
138
+
139
+ if self.do_kb_crop:
140
+ if idx == 0:
141
+ print("Using KB input crop")
142
+ height = image.height
143
+ width = image.width
144
+ top_margin = int(height - 352)
145
+ left_margin = int((width - 1216) / 2)
146
+ depth = depth.crop(
147
+ (left_margin, top_margin, left_margin + 1216, top_margin + 352))
148
+ image = image.crop(
149
+ (left_margin, top_margin, left_margin + 1216, top_margin + 352))
150
+ # uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216]
151
+
152
+ image = np.asarray(image, dtype=np.float32) / 255.0
153
+ # depth = np.asarray(depth, dtype=np.uint16) /1.
154
+ depth = np.asarray(depth, dtype=np.float32) / 1.
155
+ depth[depth > 80] = -1
156
+
157
+ depth = depth[..., None]
158
+ sample = dict(image=image, depth=depth)
159
+
160
+ # return sample
161
+ sample = self.transform(sample)
162
+
163
+ if idx == 0:
164
+ print(sample["image"].shape)
165
+
166
+ return sample
167
+
168
+ def __len__(self):
169
+ return len(self.image_files)
170
+
171
+
172
+ def get_vkitti2_loader(data_dir_root, batch_size=1, **kwargs):
173
+ dataset = VKITTI2(data_dir_root)
174
+ return DataLoader(dataset, batch_size, **kwargs)
175
+
176
+
177
+ if __name__ == "__main__":
178
+ loader = get_vkitti2_loader(
179
+ data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti2")
180
+ print("Total files", len(loader.dataset))
181
+ for i, sample in enumerate(loader):
182
+ print(sample["image"].shape)
183
+ print(sample["depth"].shape)
184
+ print(sample["dataset"])
185
+ print(sample['depth'].min(), sample['depth'].max())
186
+ if i > 5:
187
+ break
ZoeDepth/zoedepth/models/__init__.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
ZoeDepth/zoedepth/models/base_models/__init__.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
ZoeDepth/zoedepth/models/base_models/midas.py ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+ import numpy as np
28
+ from torchvision.transforms import Normalize
29
+
30
+
31
+ def denormalize(x):
32
+ """Reverses the imagenet normalization applied to the input.
33
+
34
+ Args:
35
+ x (torch.Tensor - shape(N,3,H,W)): input tensor
36
+
37
+ Returns:
38
+ torch.Tensor - shape(N,3,H,W): Denormalized input
39
+ """
40
+ mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device)
41
+ std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device)
42
+ return x * std + mean
43
+
44
+ def get_activation(name, bank):
45
+ def hook(model, input, output):
46
+ bank[name] = output
47
+ return hook
48
+
49
+
50
+ class Resize(object):
51
+ """Resize sample to given size (width, height).
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ width,
57
+ height,
58
+ resize_target=True,
59
+ keep_aspect_ratio=False,
60
+ ensure_multiple_of=1,
61
+ resize_method="lower_bound",
62
+ ):
63
+ """Init.
64
+ Args:
65
+ width (int): desired output width
66
+ height (int): desired output height
67
+ resize_target (bool, optional):
68
+ True: Resize the full sample (image, mask, target).
69
+ False: Resize image only.
70
+ Defaults to True.
71
+ keep_aspect_ratio (bool, optional):
72
+ True: Keep the aspect ratio of the input sample.
73
+ Output sample might not have the given width and height, and
74
+ resize behaviour depends on the parameter 'resize_method'.
75
+ Defaults to False.
76
+ ensure_multiple_of (int, optional):
77
+ Output width and height is constrained to be multiple of this parameter.
78
+ Defaults to 1.
79
+ resize_method (str, optional):
80
+ "lower_bound": Output will be at least as large as the given size.
81
+ "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
82
+ "minimal": Scale as least as possible. (Output size might be smaller than given size.)
83
+ Defaults to "lower_bound".
84
+ """
85
+ print("Params passed to Resize transform:")
86
+ print("\twidth: ", width)
87
+ print("\theight: ", height)
88
+ print("\tresize_target: ", resize_target)
89
+ print("\tkeep_aspect_ratio: ", keep_aspect_ratio)
90
+ print("\tensure_multiple_of: ", ensure_multiple_of)
91
+ print("\tresize_method: ", resize_method)
92
+
93
+ self.__width = width
94
+ self.__height = height
95
+
96
+ self.__keep_aspect_ratio = keep_aspect_ratio
97
+ self.__multiple_of = ensure_multiple_of
98
+ self.__resize_method = resize_method
99
+
100
+ def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
101
+ y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
102
+
103
+ if max_val is not None and y > max_val:
104
+ y = (np.floor(x / self.__multiple_of)
105
+ * self.__multiple_of).astype(int)
106
+
107
+ if y < min_val:
108
+ y = (np.ceil(x / self.__multiple_of)
109
+ * self.__multiple_of).astype(int)
110
+
111
+ return y
112
+
113
+ def get_size(self, width, height):
114
+ # determine new height and width
115
+ scale_height = self.__height / height
116
+ scale_width = self.__width / width
117
+
118
+ if self.__keep_aspect_ratio:
119
+ if self.__resize_method == "lower_bound":
120
+ # scale such that output size is lower bound
121
+ if scale_width > scale_height:
122
+ # fit width
123
+ scale_height = scale_width
124
+ else:
125
+ # fit height
126
+ scale_width = scale_height
127
+ elif self.__resize_method == "upper_bound":
128
+ # scale such that output size is upper bound
129
+ if scale_width < scale_height:
130
+ # fit width
131
+ scale_height = scale_width
132
+ else:
133
+ # fit height
134
+ scale_width = scale_height
135
+ elif self.__resize_method == "minimal":
136
+ # scale as least as possbile
137
+ if abs(1 - scale_width) < abs(1 - scale_height):
138
+ # fit width
139
+ scale_height = scale_width
140
+ else:
141
+ # fit height
142
+ scale_width = scale_height
143
+ else:
144
+ raise ValueError(
145
+ f"resize_method {self.__resize_method} not implemented"
146
+ )
147
+
148
+ if self.__resize_method == "lower_bound":
149
+ new_height = self.constrain_to_multiple_of(
150
+ scale_height * height, min_val=self.__height
151
+ )
152
+ new_width = self.constrain_to_multiple_of(
153
+ scale_width * width, min_val=self.__width
154
+ )
155
+ elif self.__resize_method == "upper_bound":
156
+ new_height = self.constrain_to_multiple_of(
157
+ scale_height * height, max_val=self.__height
158
+ )
159
+ new_width = self.constrain_to_multiple_of(
160
+ scale_width * width, max_val=self.__width
161
+ )
162
+ elif self.__resize_method == "minimal":
163
+ new_height = self.constrain_to_multiple_of(scale_height * height)
164
+ new_width = self.constrain_to_multiple_of(scale_width * width)
165
+ else:
166
+ raise ValueError(
167
+ f"resize_method {self.__resize_method} not implemented")
168
+
169
+ return (new_width, new_height)
170
+
171
+ def __call__(self, x):
172
+ width, height = self.get_size(*x.shape[-2:][::-1])
173
+ return nn.functional.interpolate(x, (height, width), mode='bilinear', align_corners=True)
174
+
175
+ class PrepForMidas(object):
176
+ def __init__(self, resize_mode="minimal", keep_aspect_ratio=True, img_size=384, do_resize=True):
177
+ if isinstance(img_size, int):
178
+ img_size = (img_size, img_size)
179
+ net_h, net_w = img_size
180
+ self.normalization = Normalize(
181
+ mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
182
+ self.resizer = Resize(net_w, net_h, keep_aspect_ratio=keep_aspect_ratio, ensure_multiple_of=32, resize_method=resize_mode) \
183
+ if do_resize else nn.Identity()
184
+
185
+ def __call__(self, x):
186
+ return self.normalization(self.resizer(x))
187
+
188
+
189
+ class MidasCore(nn.Module):
190
+ def __init__(self, midas, trainable=False, fetch_features=True, layer_names=('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1'), freeze_bn=False, keep_aspect_ratio=True,
191
+ img_size=384, **kwargs):
192
+ """Midas Base model used for multi-scale feature extraction.
193
+
194
+ Args:
195
+ midas (torch.nn.Module): Midas model.
196
+ trainable (bool, optional): Train midas model. Defaults to False.
197
+ fetch_features (bool, optional): Extract multi-scale features. Defaults to True.
198
+ layer_names (tuple, optional): Layers used for feature extraction. Order = (head output features, last layer features, ...decoder features). Defaults to ('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1').
199
+ freeze_bn (bool, optional): Freeze BatchNorm. Generally results in better finetuning performance. Defaults to False.
200
+ keep_aspect_ratio (bool, optional): Keep the aspect ratio of input images while resizing. Defaults to True.
201
+ img_size (int, tuple, optional): Input resolution. Defaults to 384.
202
+ """
203
+ super().__init__()
204
+ self.core = midas
205
+ self.output_channels = None
206
+ self.core_out = {}
207
+ self.trainable = trainable
208
+ self.fetch_features = fetch_features
209
+ # midas.scratch.output_conv = nn.Identity()
210
+ self.handles = []
211
+ # self.layer_names = ['out_conv','l4_rn', 'r4', 'r3', 'r2', 'r1']
212
+ self.layer_names = layer_names
213
+
214
+ self.set_trainable(trainable)
215
+ self.set_fetch_features(fetch_features)
216
+
217
+ self.prep = PrepForMidas(keep_aspect_ratio=keep_aspect_ratio,
218
+ img_size=img_size, do_resize=kwargs.get('do_resize', True))
219
+
220
+ if freeze_bn:
221
+ self.freeze_bn()
222
+
223
+ def set_trainable(self, trainable):
224
+ self.trainable = trainable
225
+ if trainable:
226
+ self.unfreeze()
227
+ else:
228
+ self.freeze()
229
+ return self
230
+
231
+ def set_fetch_features(self, fetch_features):
232
+ self.fetch_features = fetch_features
233
+ if fetch_features:
234
+ if len(self.handles) == 0:
235
+ self.attach_hooks(self.core)
236
+ else:
237
+ self.remove_hooks()
238
+ return self
239
+
240
+ def freeze(self):
241
+ for p in self.parameters():
242
+ p.requires_grad = False
243
+ self.trainable = False
244
+ return self
245
+
246
+ def unfreeze(self):
247
+ for p in self.parameters():
248
+ p.requires_grad = True
249
+ self.trainable = True
250
+ return self
251
+
252
+ def freeze_bn(self):
253
+ for m in self.modules():
254
+ if isinstance(m, nn.BatchNorm2d):
255
+ m.eval()
256
+ return self
257
+
258
+ def forward(self, x, denorm=False, return_rel_depth=False):
259
+ with torch.no_grad():
260
+ if denorm:
261
+ x = denormalize(x)
262
+ x = self.prep(x)
263
+ # print("Shape after prep: ", x.shape)
264
+
265
+ with torch.set_grad_enabled(self.trainable):
266
+
267
+ # print("Input size to Midascore", x.shape)
268
+ rel_depth = self.core(x)
269
+ # print("Output from midas shape", rel_depth.shape)
270
+ if not self.fetch_features:
271
+ return rel_depth
272
+ out = [self.core_out[k] for k in self.layer_names]
273
+
274
+ if return_rel_depth:
275
+ return rel_depth, out
276
+ return out
277
+
278
+ def get_rel_pos_params(self):
279
+ for name, p in self.core.pretrained.named_parameters():
280
+ if "relative_position" in name:
281
+ yield p
282
+
283
+ def get_enc_params_except_rel_pos(self):
284
+ for name, p in self.core.pretrained.named_parameters():
285
+ if "relative_position" not in name:
286
+ yield p
287
+
288
+ def freeze_encoder(self, freeze_rel_pos=False):
289
+ if freeze_rel_pos:
290
+ for p in self.core.pretrained.parameters():
291
+ p.requires_grad = False
292
+ else:
293
+ for p in self.get_enc_params_except_rel_pos():
294
+ p.requires_grad = False
295
+ return self
296
+
297
+ def attach_hooks(self, midas):
298
+ if len(self.handles) > 0:
299
+ self.remove_hooks()
300
+ if "out_conv" in self.layer_names:
301
+ self.handles.append(list(midas.scratch.output_conv.children())[
302
+ 3].register_forward_hook(get_activation("out_conv", self.core_out)))
303
+ if "r4" in self.layer_names:
304
+ self.handles.append(midas.scratch.refinenet4.register_forward_hook(
305
+ get_activation("r4", self.core_out)))
306
+ if "r3" in self.layer_names:
307
+ self.handles.append(midas.scratch.refinenet3.register_forward_hook(
308
+ get_activation("r3", self.core_out)))
309
+ if "r2" in self.layer_names:
310
+ self.handles.append(midas.scratch.refinenet2.register_forward_hook(
311
+ get_activation("r2", self.core_out)))
312
+ if "r1" in self.layer_names:
313
+ self.handles.append(midas.scratch.refinenet1.register_forward_hook(
314
+ get_activation("r1", self.core_out)))
315
+ if "l4_rn" in self.layer_names:
316
+ self.handles.append(midas.scratch.layer4_rn.register_forward_hook(
317
+ get_activation("l4_rn", self.core_out)))
318
+
319
+ return self
320
+
321
+ def remove_hooks(self):
322
+ for h in self.handles:
323
+ h.remove()
324
+ return self
325
+
326
+ def __del__(self):
327
+ self.remove_hooks()
328
+
329
+ def set_output_channels(self, model_type):
330
+ self.output_channels = MIDAS_SETTINGS[model_type]
331
+
332
+ @staticmethod
333
+ def build(midas_model_type="DPT_BEiT_L_384", train_midas=False, use_pretrained_midas=True, fetch_features=False, freeze_bn=True, force_keep_ar=False, force_reload=False, **kwargs):
334
+ if midas_model_type not in MIDAS_SETTINGS:
335
+ raise ValueError(
336
+ f"Invalid model type: {midas_model_type}. Must be one of {list(MIDAS_SETTINGS.keys())}")
337
+ if "img_size" in kwargs:
338
+ kwargs = MidasCore.parse_img_size(kwargs)
339
+ img_size = kwargs.pop("img_size", [384, 384])
340
+ print("img_size", img_size)
341
+ midas = torch.hub.load("intel-isl/MiDaS", midas_model_type,
342
+ pretrained=use_pretrained_midas, force_reload=force_reload)
343
+ kwargs.update({'keep_aspect_ratio': force_keep_ar})
344
+ midas_core = MidasCore(midas, trainable=train_midas, fetch_features=fetch_features,
345
+ freeze_bn=freeze_bn, img_size=img_size, **kwargs)
346
+ midas_core.set_output_channels(midas_model_type)
347
+ return midas_core
348
+
349
+ @staticmethod
350
+ def build_from_config(config):
351
+ return MidasCore.build(**config)
352
+
353
+ @staticmethod
354
+ def parse_img_size(config):
355
+ assert 'img_size' in config
356
+ if isinstance(config['img_size'], str):
357
+ assert "," in config['img_size'], "img_size should be a string with comma separated img_size=H,W"
358
+ config['img_size'] = list(map(int, config['img_size'].split(",")))
359
+ assert len(
360
+ config['img_size']) == 2, "img_size should be a string with comma separated img_size=H,W"
361
+ elif isinstance(config['img_size'], int):
362
+ config['img_size'] = [config['img_size'], config['img_size']]
363
+ else:
364
+ assert isinstance(config['img_size'], list) and len(
365
+ config['img_size']) == 2, "img_size should be a list of H,W"
366
+ return config
367
+
368
+
369
+ nchannels2models = {
370
+ tuple([256]*5): ["DPT_BEiT_L_384", "DPT_BEiT_L_512", "DPT_BEiT_B_384", "DPT_SwinV2_L_384", "DPT_SwinV2_B_384", "DPT_SwinV2_T_256", "DPT_Large", "DPT_Hybrid"],
371
+ (512, 256, 128, 64, 64): ["MiDaS_small"]
372
+ }
373
+
374
+ # Model name to number of output channels
375
+ MIDAS_SETTINGS = {m: k for k, v in nchannels2models.items()
376
+ for m in v
377
+ }
ZoeDepth/zoedepth/models/builder.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ from importlib import import_module
26
+ from zoedepth.models.depth_model import DepthModel
27
+
28
+ def build_model(config) -> DepthModel:
29
+ """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface.
30
+ This function should be used to construct models for training and evaluation.
31
+
32
+ Args:
33
+ config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder.
34
+
35
+ Returns:
36
+ torch.nn.Module: Model corresponding to name and version as specified in config
37
+ """
38
+ module_name = f"zoedepth.models.{config.model}"
39
+ try:
40
+ module = import_module(module_name)
41
+ except ModuleNotFoundError as e:
42
+ # print the original error message
43
+ print(e)
44
+ raise ValueError(
45
+ f"Model {config.model} not found. Refer above error for details.") from e
46
+ try:
47
+ get_version = getattr(module, "get_version")
48
+ except AttributeError as e:
49
+ raise ValueError(
50
+ f"Model {config.model} has no get_version function.") from e
51
+ return get_version(config.version_name).build_from_config(config)
ZoeDepth/zoedepth/models/depth_model.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import numpy as np
26
+ import torch
27
+ import torch.nn as nn
28
+ import torch.nn.functional as F
29
+ from torchvision import transforms
30
+ import PIL.Image
31
+ from PIL import Image
32
+ from typing import Union
33
+
34
+
35
+ class DepthModel(nn.Module):
36
+ def __init__(self):
37
+ super().__init__()
38
+ self.device = 'cpu'
39
+
40
+ def to(self, device) -> nn.Module:
41
+ self.device = device
42
+ return super().to(device)
43
+
44
+ def forward(self, x, *args, **kwargs):
45
+ raise NotImplementedError
46
+
47
+ def _infer(self, x: torch.Tensor):
48
+ """
49
+ Inference interface for the model
50
+ Args:
51
+ x (torch.Tensor): input tensor of shape (b, c, h, w)
52
+ Returns:
53
+ torch.Tensor: output tensor of shape (b, 1, h, w)
54
+ """
55
+ return self(x)['metric_depth']
56
+
57
+ def _infer_with_pad_aug(self, x: torch.Tensor, pad_input: bool=True, fh: float=3, fw: float=3, upsampling_mode: str='bicubic', padding_mode="reflect", **kwargs) -> torch.Tensor:
58
+ """
59
+ Inference interface for the model with padding augmentation
60
+ Padding augmentation fixes the boundary artifacts in the output depth map.
61
+ Boundary artifacts are sometimes caused by the fact that the model is trained on NYU raw dataset which has a black or white border around the image.
62
+ This augmentation pads the input image and crops the prediction back to the original size / view.
63
+
64
+ Note: This augmentation is not required for the models trained with 'avoid_boundary'=True.
65
+ Args:
66
+ x (torch.Tensor): input tensor of shape (b, c, h, w)
67
+ pad_input (bool, optional): whether to pad the input or not. Defaults to True.
68
+ fh (float, optional): height padding factor. The padding is calculated as sqrt(h/2) * fh. Defaults to 3.
69
+ fw (float, optional): width padding factor. The padding is calculated as sqrt(w/2) * fw. Defaults to 3.
70
+ upsampling_mode (str, optional): upsampling mode. Defaults to 'bicubic'.
71
+ padding_mode (str, optional): padding mode. Defaults to "reflect".
72
+ Returns:
73
+ torch.Tensor: output tensor of shape (b, 1, h, w)
74
+ """
75
+ # assert x is nchw and c = 3
76
+ assert x.dim() == 4, "x must be 4 dimensional, got {}".format(x.dim())
77
+ assert x.shape[1] == 3, "x must have 3 channels, got {}".format(x.shape[1])
78
+
79
+ if pad_input:
80
+ assert fh > 0 or fw > 0, "atlease one of fh and fw must be greater than 0"
81
+ pad_h = int(np.sqrt(x.shape[2]/2) * fh)
82
+ pad_w = int(np.sqrt(x.shape[3]/2) * fw)
83
+ padding = [pad_w, pad_w]
84
+ if pad_h > 0:
85
+ padding += [pad_h, pad_h]
86
+
87
+ x = F.pad(x, padding, mode=padding_mode, **kwargs)
88
+ out = self._infer(x)
89
+ if out.shape[-2:] != x.shape[-2:]:
90
+ out = F.interpolate(out, size=(x.shape[2], x.shape[3]), mode=upsampling_mode, align_corners=False)
91
+ if pad_input:
92
+ # crop to the original size, handling the case where pad_h and pad_w is 0
93
+ if pad_h > 0:
94
+ out = out[:, :, pad_h:-pad_h,:]
95
+ if pad_w > 0:
96
+ out = out[:, :, :, pad_w:-pad_w]
97
+ return out
98
+
99
+ def infer_with_flip_aug(self, x, pad_input: bool=True, **kwargs) -> torch.Tensor:
100
+ """
101
+ Inference interface for the model with horizontal flip augmentation
102
+ Horizontal flip augmentation improves the accuracy of the model by averaging the output of the model with and without horizontal flip.
103
+ Args:
104
+ x (torch.Tensor): input tensor of shape (b, c, h, w)
105
+ pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
106
+ Returns:
107
+ torch.Tensor: output tensor of shape (b, 1, h, w)
108
+ """
109
+ # infer with horizontal flip and average
110
+ out = self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs)
111
+ out_flip = self._infer_with_pad_aug(torch.flip(x, dims=[3]), pad_input=pad_input, **kwargs)
112
+ out = (out + torch.flip(out_flip, dims=[3])) / 2
113
+ return out
114
+
115
+ def infer(self, x, pad_input: bool=True, with_flip_aug: bool=True, **kwargs) -> torch.Tensor:
116
+ """
117
+ Inference interface for the model
118
+ Args:
119
+ x (torch.Tensor): input tensor of shape (b, c, h, w)
120
+ pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
121
+ with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True.
122
+ Returns:
123
+ torch.Tensor: output tensor of shape (b, 1, h, w)
124
+ """
125
+ if with_flip_aug:
126
+ return self.infer_with_flip_aug(x, pad_input=pad_input, **kwargs)
127
+ else:
128
+ return self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs)
129
+
130
+ @torch.no_grad()
131
+ def infer_pil(self, pil_img, pad_input: bool=True, with_flip_aug: bool=True, output_type: str="numpy", **kwargs) -> Union[np.ndarray, PIL.Image.Image, torch.Tensor]:
132
+ """
133
+ Inference interface for the model for PIL image
134
+ Args:
135
+ pil_img (PIL.Image.Image): input PIL image
136
+ pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
137
+ with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True.
138
+ output_type (str, optional): output type. Supported values are 'numpy', 'pil' and 'tensor'. Defaults to "numpy".
139
+ """
140
+ x = transforms.ToTensor()(pil_img).unsqueeze(0).to(self.device)
141
+ out_tensor = self.infer(x, pad_input=pad_input, with_flip_aug=with_flip_aug, **kwargs)
142
+ if output_type == "numpy":
143
+ return out_tensor.squeeze().cpu().numpy()
144
+ elif output_type == "pil":
145
+ # uint16 is required for depth pil image
146
+ out_16bit_numpy = (out_tensor.squeeze().cpu().numpy()*256).astype(np.uint16)
147
+ return Image.fromarray(out_16bit_numpy)
148
+ elif output_type == "tensor":
149
+ return out_tensor.squeeze().cpu()
150
+ else:
151
+ raise ValueError(f"output_type {output_type} not supported. Supported values are 'numpy', 'pil' and 'tensor'")
152
+
ZoeDepth/zoedepth/models/layers/attractor.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+
29
+ @torch.jit.script
30
+ def exp_attractor(dx, alpha: float = 300, gamma: int = 2):
31
+ """Exponential attractor: dc = exp(-alpha*|dx|^gamma) * dx , where dx = a - c, a = attractor point, c = bin center, dc = shift in bin centermmary for exp_attractor
32
+
33
+ Args:
34
+ dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center.
35
+ alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300.
36
+ gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2.
37
+
38
+ Returns:
39
+ torch.Tensor : Delta shifts - dc; New bin centers = Old bin centers + dc
40
+ """
41
+ return torch.exp(-alpha*(torch.abs(dx)**gamma)) * (dx)
42
+
43
+
44
+ @torch.jit.script
45
+ def inv_attractor(dx, alpha: float = 300, gamma: int = 2):
46
+ """Inverse attractor: dc = dx / (1 + alpha*dx^gamma), where dx = a - c, a = attractor point, c = bin center, dc = shift in bin center
47
+ This is the default one according to the accompanying paper.
48
+
49
+ Args:
50
+ dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center.
51
+ alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300.
52
+ gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2.
53
+
54
+ Returns:
55
+ torch.Tensor: Delta shifts - dc; New bin centers = Old bin centers + dc
56
+ """
57
+ return dx.div(1+alpha*dx.pow(gamma))
58
+
59
+
60
+ class AttractorLayer(nn.Module):
61
+ def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10,
62
+ alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False):
63
+ """
64
+ Attractor layer for bin centers. Bin centers are bounded on the interval (min_depth, max_depth)
65
+ """
66
+ super().__init__()
67
+
68
+ self.n_attractors = n_attractors
69
+ self.n_bins = n_bins
70
+ self.min_depth = min_depth
71
+ self.max_depth = max_depth
72
+ self.alpha = alpha
73
+ self.gamma = gamma
74
+ self.kind = kind
75
+ self.attractor_type = attractor_type
76
+ self.memory_efficient = memory_efficient
77
+
78
+ self._net = nn.Sequential(
79
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
80
+ nn.ReLU(inplace=True),
81
+ nn.Conv2d(mlp_dim, n_attractors*2, 1, 1, 0), # x2 for linear norm
82
+ nn.ReLU(inplace=True)
83
+ )
84
+
85
+ def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
86
+ """
87
+ Args:
88
+ x (torch.Tensor) : feature block; shape - n, c, h, w
89
+ b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w
90
+
91
+ Returns:
92
+ tuple(torch.Tensor,torch.Tensor) : new bin centers normed and scaled; shape - n, nbins, h, w
93
+ """
94
+ if prev_b_embedding is not None:
95
+ if interpolate:
96
+ prev_b_embedding = nn.functional.interpolate(
97
+ prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
98
+ x = x + prev_b_embedding
99
+
100
+ A = self._net(x)
101
+ eps = 1e-3
102
+ A = A + eps
103
+ n, c, h, w = A.shape
104
+ A = A.view(n, self.n_attractors, 2, h, w)
105
+ A_normed = A / A.sum(dim=2, keepdim=True) # n, a, 2, h, w
106
+ A_normed = A[:, :, 0, ...] # n, na, h, w
107
+
108
+ b_prev = nn.functional.interpolate(
109
+ b_prev, (h, w), mode='bilinear', align_corners=True)
110
+ b_centers = b_prev
111
+
112
+ if self.attractor_type == 'exp':
113
+ dist = exp_attractor
114
+ else:
115
+ dist = inv_attractor
116
+
117
+ if not self.memory_efficient:
118
+ func = {'mean': torch.mean, 'sum': torch.sum}[self.kind]
119
+ # .shape N, nbins, h, w
120
+ delta_c = func(dist(A_normed.unsqueeze(
121
+ 2) - b_centers.unsqueeze(1)), dim=1)
122
+ else:
123
+ delta_c = torch.zeros_like(b_centers, device=b_centers.device)
124
+ for i in range(self.n_attractors):
125
+ # .shape N, nbins, h, w
126
+ delta_c += dist(A_normed[:, i, ...].unsqueeze(1) - b_centers)
127
+
128
+ if self.kind == 'mean':
129
+ delta_c = delta_c / self.n_attractors
130
+
131
+ b_new_centers = b_centers + delta_c
132
+ B_centers = (self.max_depth - self.min_depth) * \
133
+ b_new_centers + self.min_depth
134
+ B_centers, _ = torch.sort(B_centers, dim=1)
135
+ B_centers = torch.clip(B_centers, self.min_depth, self.max_depth)
136
+ return b_new_centers, B_centers
137
+
138
+
139
+ class AttractorLayerUnnormed(nn.Module):
140
+ def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10,
141
+ alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False):
142
+ """
143
+ Attractor layer for bin centers. Bin centers are unbounded
144
+ """
145
+ super().__init__()
146
+
147
+ self.n_attractors = n_attractors
148
+ self.n_bins = n_bins
149
+ self.min_depth = min_depth
150
+ self.max_depth = max_depth
151
+ self.alpha = alpha
152
+ self.gamma = gamma
153
+ self.kind = kind
154
+ self.attractor_type = attractor_type
155
+ self.memory_efficient = memory_efficient
156
+
157
+ self._net = nn.Sequential(
158
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
159
+ nn.ReLU(inplace=True),
160
+ nn.Conv2d(mlp_dim, n_attractors, 1, 1, 0),
161
+ nn.Softplus()
162
+ )
163
+
164
+ def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
165
+ """
166
+ Args:
167
+ x (torch.Tensor) : feature block; shape - n, c, h, w
168
+ b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w
169
+
170
+ Returns:
171
+ tuple(torch.Tensor,torch.Tensor) : new bin centers unbounded; shape - n, nbins, h, w. Two outputs just to keep the API consistent with the normed version
172
+ """
173
+ if prev_b_embedding is not None:
174
+ if interpolate:
175
+ prev_b_embedding = nn.functional.interpolate(
176
+ prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
177
+ x = x + prev_b_embedding
178
+
179
+ A = self._net(x)
180
+ n, c, h, w = A.shape
181
+
182
+ b_prev = nn.functional.interpolate(
183
+ b_prev, (h, w), mode='bilinear', align_corners=True)
184
+ b_centers = b_prev
185
+
186
+ if self.attractor_type == 'exp':
187
+ dist = exp_attractor
188
+ else:
189
+ dist = inv_attractor
190
+
191
+ if not self.memory_efficient:
192
+ func = {'mean': torch.mean, 'sum': torch.sum}[self.kind]
193
+ # .shape N, nbins, h, w
194
+ delta_c = func(
195
+ dist(A.unsqueeze(2) - b_centers.unsqueeze(1)), dim=1)
196
+ else:
197
+ delta_c = torch.zeros_like(b_centers, device=b_centers.device)
198
+ for i in range(self.n_attractors):
199
+ delta_c += dist(A[:, i, ...].unsqueeze(1) -
200
+ b_centers) # .shape N, nbins, h, w
201
+
202
+ if self.kind == 'mean':
203
+ delta_c = delta_c / self.n_attractors
204
+
205
+ b_new_centers = b_centers + delta_c
206
+ B_centers = b_new_centers
207
+
208
+ return b_new_centers, B_centers
ZoeDepth/zoedepth/models/layers/dist_layers.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+
29
+ def log_binom(n, k, eps=1e-7):
30
+ """ log(nCk) using stirling approximation """
31
+ n = n + eps
32
+ k = k + eps
33
+ return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps)
34
+
35
+
36
+ class LogBinomial(nn.Module):
37
+ def __init__(self, n_classes=256, act=torch.softmax):
38
+ """Compute log binomial distribution for n_classes
39
+
40
+ Args:
41
+ n_classes (int, optional): number of output classes. Defaults to 256.
42
+ """
43
+ super().__init__()
44
+ self.K = n_classes
45
+ self.act = act
46
+ self.register_buffer('k_idx', torch.arange(
47
+ 0, n_classes).view(1, -1, 1, 1))
48
+ self.register_buffer('K_minus_1', torch.Tensor(
49
+ [self.K-1]).view(1, -1, 1, 1))
50
+
51
+ def forward(self, x, t=1., eps=1e-4):
52
+ """Compute log binomial distribution for x
53
+
54
+ Args:
55
+ x (torch.Tensor - NCHW): probabilities
56
+ t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1..
57
+ eps (float, optional): Small number for numerical stability. Defaults to 1e-4.
58
+
59
+ Returns:
60
+ torch.Tensor -NCHW: log binomial distribution logbinomial(p;t)
61
+ """
62
+ if x.ndim == 3:
63
+ x = x.unsqueeze(1) # make it nchw
64
+
65
+ one_minus_x = torch.clamp(1 - x, eps, 1)
66
+ x = torch.clamp(x, eps, 1)
67
+ y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \
68
+ torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x)
69
+ return self.act(y/t, dim=1)
70
+
71
+
72
+ class ConditionalLogBinomial(nn.Module):
73
+ def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax):
74
+ """Conditional Log Binomial distribution
75
+
76
+ Args:
77
+ in_features (int): number of input channels in main feature
78
+ condition_dim (int): number of input channels in condition feature
79
+ n_classes (int, optional): Number of classes. Defaults to 256.
80
+ bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2.
81
+ p_eps (float, optional): small eps value. Defaults to 1e-4.
82
+ max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50.
83
+ min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7.
84
+ """
85
+ super().__init__()
86
+ self.p_eps = p_eps
87
+ self.max_temp = max_temp
88
+ self.min_temp = min_temp
89
+ self.log_binomial_transform = LogBinomial(n_classes, act=act)
90
+ bottleneck = (in_features + condition_dim) // bottleneck_factor
91
+ self.mlp = nn.Sequential(
92
+ nn.Conv2d(in_features + condition_dim, bottleneck,
93
+ kernel_size=1, stride=1, padding=0),
94
+ nn.GELU(),
95
+ # 2 for p linear norm, 2 for t linear norm
96
+ nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0),
97
+ nn.Softplus()
98
+ )
99
+
100
+ def forward(self, x, cond):
101
+ """Forward pass
102
+
103
+ Args:
104
+ x (torch.Tensor - NCHW): Main feature
105
+ cond (torch.Tensor - NCHW): condition feature
106
+
107
+ Returns:
108
+ torch.Tensor: Output log binomial distribution
109
+ """
110
+ pt = self.mlp(torch.concat((x, cond), dim=1))
111
+ p, t = pt[:, :2, ...], pt[:, 2:, ...]
112
+
113
+ p = p + self.p_eps
114
+ p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...])
115
+
116
+ t = t + self.p_eps
117
+ t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...])
118
+ t = t.unsqueeze(1)
119
+ t = (self.max_temp - self.min_temp) * t + self.min_temp
120
+
121
+ return self.log_binomial_transform(p, t)
ZoeDepth/zoedepth/models/layers/localbins_layers.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+
29
+ class SeedBinRegressor(nn.Module):
30
+ def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10):
31
+ """Bin center regressor network. Bin centers are bounded on (min_depth, max_depth) interval.
32
+
33
+ Args:
34
+ in_features (int): input channels
35
+ n_bins (int, optional): Number of bin centers. Defaults to 16.
36
+ mlp_dim (int, optional): Hidden dimension. Defaults to 256.
37
+ min_depth (float, optional): Min depth value. Defaults to 1e-3.
38
+ max_depth (float, optional): Max depth value. Defaults to 10.
39
+ """
40
+ super().__init__()
41
+ self.version = "1_1"
42
+ self.min_depth = min_depth
43
+ self.max_depth = max_depth
44
+
45
+ self._net = nn.Sequential(
46
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
47
+ nn.ReLU(inplace=True),
48
+ nn.Conv2d(mlp_dim, n_bins, 1, 1, 0),
49
+ nn.ReLU(inplace=True)
50
+ )
51
+
52
+ def forward(self, x):
53
+ """
54
+ Returns tensor of bin_width vectors (centers). One vector b for every pixel
55
+ """
56
+ B = self._net(x)
57
+ eps = 1e-3
58
+ B = B + eps
59
+ B_widths_normed = B / B.sum(dim=1, keepdim=True)
60
+ B_widths = (self.max_depth - self.min_depth) * \
61
+ B_widths_normed # .shape NCHW
62
+ # pad has the form (left, right, top, bottom, front, back)
63
+ B_widths = nn.functional.pad(
64
+ B_widths, (0, 0, 0, 0, 1, 0), mode='constant', value=self.min_depth)
65
+ B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW
66
+
67
+ B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:, 1:, ...])
68
+ return B_widths_normed, B_centers
69
+
70
+
71
+ class SeedBinRegressorUnnormed(nn.Module):
72
+ def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10):
73
+ """Bin center regressor network. Bin centers are unbounded
74
+
75
+ Args:
76
+ in_features (int): input channels
77
+ n_bins (int, optional): Number of bin centers. Defaults to 16.
78
+ mlp_dim (int, optional): Hidden dimension. Defaults to 256.
79
+ min_depth (float, optional): Not used. (for compatibility with SeedBinRegressor)
80
+ max_depth (float, optional): Not used. (for compatibility with SeedBinRegressor)
81
+ """
82
+ super().__init__()
83
+ self.version = "1_1"
84
+ self._net = nn.Sequential(
85
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
86
+ nn.ReLU(inplace=True),
87
+ nn.Conv2d(mlp_dim, n_bins, 1, 1, 0),
88
+ nn.Softplus()
89
+ )
90
+
91
+ def forward(self, x):
92
+ """
93
+ Returns tensor of bin_width vectors (centers). One vector b for every pixel
94
+ """
95
+ B_centers = self._net(x)
96
+ return B_centers, B_centers
97
+
98
+
99
+ class Projector(nn.Module):
100
+ def __init__(self, in_features, out_features, mlp_dim=128):
101
+ """Projector MLP
102
+
103
+ Args:
104
+ in_features (int): input channels
105
+ out_features (int): output channels
106
+ mlp_dim (int, optional): hidden dimension. Defaults to 128.
107
+ """
108
+ super().__init__()
109
+
110
+ self._net = nn.Sequential(
111
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
112
+ nn.ReLU(inplace=True),
113
+ nn.Conv2d(mlp_dim, out_features, 1, 1, 0),
114
+ )
115
+
116
+ def forward(self, x):
117
+ return self._net(x)
118
+
119
+
120
+
121
+ class LinearSplitter(nn.Module):
122
+ def __init__(self, in_features, prev_nbins, split_factor=2, mlp_dim=128, min_depth=1e-3, max_depth=10):
123
+ super().__init__()
124
+
125
+ self.prev_nbins = prev_nbins
126
+ self.split_factor = split_factor
127
+ self.min_depth = min_depth
128
+ self.max_depth = max_depth
129
+
130
+ self._net = nn.Sequential(
131
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
132
+ nn.GELU(),
133
+ nn.Conv2d(mlp_dim, prev_nbins * split_factor, 1, 1, 0),
134
+ nn.ReLU()
135
+ )
136
+
137
+ def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
138
+ """
139
+ x : feature block; shape - n, c, h, w
140
+ b_prev : previous bin widths normed; shape - n, prev_nbins, h, w
141
+ """
142
+ if prev_b_embedding is not None:
143
+ if interpolate:
144
+ prev_b_embedding = nn.functional.interpolate(prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
145
+ x = x + prev_b_embedding
146
+ S = self._net(x)
147
+ eps = 1e-3
148
+ S = S + eps
149
+ n, c, h, w = S.shape
150
+ S = S.view(n, self.prev_nbins, self.split_factor, h, w)
151
+ S_normed = S / S.sum(dim=2, keepdim=True) # fractional splits
152
+
153
+ b_prev = nn.functional.interpolate(b_prev, (h,w), mode='bilinear', align_corners=True)
154
+
155
+
156
+ b_prev = b_prev / b_prev.sum(dim=1, keepdim=True) # renormalize for gurantees
157
+ # print(b_prev.shape, S_normed.shape)
158
+ # if is_for_query:(1).expand(-1, b_prev.size(0)//n, -1, -1, -1, -1).flatten(0,1) # TODO ? can replace all this with a single torch.repeat?
159
+ b = b_prev.unsqueeze(2) * S_normed
160
+ b = b.flatten(1,2) # .shape n, prev_nbins * split_factor, h, w
161
+
162
+ # calculate bin centers for loss calculation
163
+ B_widths = (self.max_depth - self.min_depth) * b # .shape N, nprev * splitfactor, H, W
164
+ # pad has the form (left, right, top, bottom, front, back)
165
+ B_widths = nn.functional.pad(B_widths, (0,0,0,0,1,0), mode='constant', value=self.min_depth)
166
+ B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW
167
+
168
+ B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:,1:,...])
169
+ return b, B_centers
ZoeDepth/zoedepth/models/layers/patch_transformer.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+
29
+ class PatchTransformerEncoder(nn.Module):
30
+ def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False):
31
+ """ViT-like transformer block
32
+
33
+ Args:
34
+ in_channels (int): Input channels
35
+ patch_size (int, optional): patch size. Defaults to 10.
36
+ embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128.
37
+ num_heads (int, optional): number of attention heads. Defaults to 4.
38
+ use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False.
39
+ """
40
+ super(PatchTransformerEncoder, self).__init__()
41
+ self.use_class_token = use_class_token
42
+ encoder_layers = nn.TransformerEncoderLayer(
43
+ embedding_dim, num_heads, dim_feedforward=1024)
44
+ self.transformer_encoder = nn.TransformerEncoder(
45
+ encoder_layers, num_layers=4) # takes shape S,N,E
46
+
47
+ self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim,
48
+ kernel_size=patch_size, stride=patch_size, padding=0)
49
+
50
+ def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'):
51
+ """Generate positional encodings
52
+
53
+ Args:
54
+ sequence_length (int): Sequence length
55
+ embedding_dim (int): Embedding dimension
56
+
57
+ Returns:
58
+ torch.Tensor SBE: Positional encodings
59
+ """
60
+ position = torch.arange(
61
+ 0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1)
62
+ index = torch.arange(
63
+ 0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0)
64
+ div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim))
65
+ pos_encoding = position * div_term
66
+ pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1)
67
+ pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1)
68
+ return pos_encoding
69
+
70
+
71
+ def forward(self, x):
72
+ """Forward pass
73
+
74
+ Args:
75
+ x (torch.Tensor - NCHW): Input feature tensor
76
+
77
+ Returns:
78
+ torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim
79
+ """
80
+ embeddings = self.embedding_convPxP(x).flatten(
81
+ 2) # .shape = n,c,s = n, embedding_dim, s
82
+ if self.use_class_token:
83
+ # extra special token at start ?
84
+ embeddings = nn.functional.pad(embeddings, (1, 0))
85
+
86
+ # change to S,N,E format required by transformer
87
+ embeddings = embeddings.permute(2, 0, 1)
88
+ S, N, E = embeddings.shape
89
+ embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device)
90
+ x = self.transformer_encoder(embeddings) # .shape = S, N, E
91
+ return x
ZoeDepth/zoedepth/models/model_io.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+
27
+ def load_state_dict(model, state_dict):
28
+ """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict.
29
+
30
+ DataParallel prefixes state_dict keys with 'module.' when saving.
31
+ If the model is not a DataParallel model but the state_dict is, then prefixes are removed.
32
+ If the model is a DataParallel model but the state_dict is not, then prefixes are added.
33
+ """
34
+ state_dict = state_dict.get('model', state_dict)
35
+ # if model is a DataParallel model, then state_dict keys are prefixed with 'module.'
36
+
37
+ do_prefix = isinstance(
38
+ model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel))
39
+ state = {}
40
+ for k, v in state_dict.items():
41
+ if k.startswith('module.') and not do_prefix:
42
+ k = k[7:]
43
+
44
+ if not k.startswith('module.') and do_prefix:
45
+ k = 'module.' + k
46
+
47
+ state[k] = v
48
+
49
+ model.load_state_dict(state)
50
+ print("Loaded successfully")
51
+ return model
52
+
53
+
54
+ def load_wts(model, checkpoint_path):
55
+ ckpt = torch.load(checkpoint_path, map_location='cpu')
56
+ return load_state_dict(model, ckpt)
57
+
58
+
59
+ def load_state_dict_from_url(model, url, **kwargs):
60
+ state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs)
61
+ return load_state_dict(model, state_dict)
62
+
63
+
64
+ def load_state_from_resource(model, resource: str):
65
+ """Loads weights to the model from a given resource. A resource can be of following types:
66
+ 1. URL. Prefixed with "url::"
67
+ e.g. url::http(s)://url.resource.com/ckpt.pt
68
+
69
+ 2. Local path. Prefixed with "local::"
70
+ e.g. local::/path/to/ckpt.pt
71
+
72
+
73
+ Args:
74
+ model (torch.nn.Module): Model
75
+ resource (str): resource string
76
+
77
+ Returns:
78
+ torch.nn.Module: Model with loaded weights
79
+ """
80
+ print(f"Using pretrained resource {resource}")
81
+
82
+ if resource.startswith('url::'):
83
+ url = resource.split('url::')[1]
84
+ return load_state_dict_from_url(model, url, progress=True)
85
+
86
+ elif resource.startswith('local::'):
87
+ path = resource.split('local::')[1]
88
+ return load_wts(model, path)
89
+
90
+ else:
91
+ raise ValueError("Invalid resource type, only url:: and local:: are supported")
92
+
ZoeDepth/zoedepth/models/zoedepth/__init__.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ from .zoedepth_v1 import ZoeDepth
26
+
27
+ all_versions = {
28
+ "v1": ZoeDepth,
29
+ }
30
+
31
+ get_version = lambda v : all_versions[v]
ZoeDepth/zoedepth/models/zoedepth/config_zoedepth.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "ZoeDepth",
4
+ "version_name": "v1",
5
+ "n_bins": 64,
6
+ "bin_embedding_dim": 128,
7
+ "bin_centers_type": "softplus",
8
+ "n_attractors":[16, 8, 4, 1],
9
+ "attractor_alpha": 1000,
10
+ "attractor_gamma": 2,
11
+ "attractor_kind" : "mean",
12
+ "attractor_type" : "inv",
13
+ "midas_model_type" : "DPT_BEiT_L_384",
14
+ "min_temp": 0.0212,
15
+ "max_temp": 50.0,
16
+ "output_distribution": "logbinomial",
17
+ "memory_efficient": true,
18
+ "inverse_midas": false,
19
+ "img_size": [384, 512]
20
+ },
21
+
22
+ "train": {
23
+ "train_midas": true,
24
+ "use_pretrained_midas": true,
25
+ "trainer": "zoedepth",
26
+ "epochs": 5,
27
+ "bs": 16,
28
+ "optim_kwargs": {"lr": 0.000161, "wd": 0.01},
29
+ "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
30
+ "same_lr": false,
31
+ "w_si": 1,
32
+ "w_domain": 0.2,
33
+ "w_reg": 0,
34
+ "w_grad": 0,
35
+ "avoid_boundary": false,
36
+ "random_crop": false,
37
+ "input_width": 640,
38
+ "input_height": 480,
39
+ "midas_lr_factor": 1,
40
+ "encoder_lr_factor":10,
41
+ "pos_enc_lr_factor":10,
42
+ "freeze_midas_bn": true
43
+
44
+ },
45
+
46
+ "infer":{
47
+ "train_midas": false,
48
+ "use_pretrained_midas": false,
49
+ "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt",
50
+ "force_keep_ar": true
51
+ },
52
+
53
+ "eval":{
54
+ "train_midas": false,
55
+ "use_pretrained_midas": false,
56
+ "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt"
57
+ }
58
+ }
ZoeDepth/zoedepth/models/zoedepth/config_zoedepth_kitti.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bin_centers_type": "normed",
4
+ "img_size": [384, 768]
5
+ },
6
+
7
+ "train": {
8
+ },
9
+
10
+ "infer":{
11
+ "train_midas": false,
12
+ "use_pretrained_midas": false,
13
+ "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt",
14
+ "force_keep_ar": true
15
+ },
16
+
17
+ "eval":{
18
+ "train_midas": false,
19
+ "use_pretrained_midas": false,
20
+ "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt"
21
+ }
22
+ }
ZoeDepth/zoedepth/models/zoedepth/zoedepth_v1.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import itertools
26
+
27
+ import torch
28
+ import torch.nn as nn
29
+ from zoedepth.models.depth_model import DepthModel
30
+ from zoedepth.models.base_models.midas import MidasCore
31
+ from zoedepth.models.layers.attractor import AttractorLayer, AttractorLayerUnnormed
32
+ from zoedepth.models.layers.dist_layers import ConditionalLogBinomial
33
+ from zoedepth.models.layers.localbins_layers import (Projector, SeedBinRegressor,
34
+ SeedBinRegressorUnnormed)
35
+ from zoedepth.models.model_io import load_state_from_resource
36
+
37
+
38
+ class ZoeDepth(DepthModel):
39
+ def __init__(self, core, n_bins=64, bin_centers_type="softplus", bin_embedding_dim=128, min_depth=1e-3, max_depth=10,
40
+ n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp', min_temp=5, max_temp=50, train_midas=True,
41
+ midas_lr_factor=10, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs):
42
+ """ZoeDepth model. This is the version of ZoeDepth that has a single metric head
43
+
44
+ Args:
45
+ core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features
46
+ n_bins (int, optional): Number of bin centers. Defaults to 64.
47
+ bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
48
+ For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus".
49
+ bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128.
50
+ min_depth (float, optional): Lower bound for normed bin centers. Defaults to 1e-3.
51
+ max_depth (float, optional): Upper bound for normed bin centers. Defaults to 10.
52
+ n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
53
+ attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300.
54
+ attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
55
+ attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'.
56
+ attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'.
57
+ min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5.
58
+ max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50.
59
+ train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True.
60
+ midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10.
61
+ encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10.
62
+ pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10.
63
+ """
64
+ super().__init__()
65
+
66
+ self.core = core
67
+ self.max_depth = max_depth
68
+ self.min_depth = min_depth
69
+ self.min_temp = min_temp
70
+ self.bin_centers_type = bin_centers_type
71
+
72
+ self.midas_lr_factor = midas_lr_factor
73
+ self.encoder_lr_factor = encoder_lr_factor
74
+ self.pos_enc_lr_factor = pos_enc_lr_factor
75
+ self.train_midas = train_midas
76
+ self.inverse_midas = inverse_midas
77
+
78
+ if self.encoder_lr_factor <= 0:
79
+ self.core.freeze_encoder(
80
+ freeze_rel_pos=self.pos_enc_lr_factor <= 0)
81
+
82
+ N_MIDAS_OUT = 32
83
+ btlnck_features = self.core.output_channels[0]
84
+ num_out_features = self.core.output_channels[1:]
85
+
86
+ self.conv2 = nn.Conv2d(btlnck_features, btlnck_features,
87
+ kernel_size=1, stride=1, padding=0) # btlnck conv
88
+
89
+ if bin_centers_type == "normed":
90
+ SeedBinRegressorLayer = SeedBinRegressor
91
+ Attractor = AttractorLayer
92
+ elif bin_centers_type == "softplus":
93
+ SeedBinRegressorLayer = SeedBinRegressorUnnormed
94
+ Attractor = AttractorLayerUnnormed
95
+ elif bin_centers_type == "hybrid1":
96
+ SeedBinRegressorLayer = SeedBinRegressor
97
+ Attractor = AttractorLayerUnnormed
98
+ elif bin_centers_type == "hybrid2":
99
+ SeedBinRegressorLayer = SeedBinRegressorUnnormed
100
+ Attractor = AttractorLayer
101
+ else:
102
+ raise ValueError(
103
+ "bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'")
104
+
105
+ self.seed_bin_regressor = SeedBinRegressorLayer(
106
+ btlnck_features, n_bins=n_bins, min_depth=min_depth, max_depth=max_depth)
107
+ self.seed_projector = Projector(btlnck_features, bin_embedding_dim)
108
+ self.projectors = nn.ModuleList([
109
+ Projector(num_out, bin_embedding_dim)
110
+ for num_out in num_out_features
111
+ ])
112
+ self.attractors = nn.ModuleList([
113
+ Attractor(bin_embedding_dim, n_bins, n_attractors=n_attractors[i], min_depth=min_depth, max_depth=max_depth,
114
+ alpha=attractor_alpha, gamma=attractor_gamma, kind=attractor_kind, attractor_type=attractor_type)
115
+ for i in range(len(num_out_features))
116
+ ])
117
+
118
+ last_in = N_MIDAS_OUT + 1 # +1 for relative depth
119
+
120
+ # use log binomial instead of softmax
121
+ self.conditional_log_binomial = ConditionalLogBinomial(
122
+ last_in, bin_embedding_dim, n_classes=n_bins, min_temp=min_temp, max_temp=max_temp)
123
+
124
+ def forward(self, x, return_final_centers=False, denorm=False, return_probs=False, **kwargs):
125
+ """
126
+ Args:
127
+ x (torch.Tensor): Input image tensor of shape (B, C, H, W)
128
+ return_final_centers (bool, optional): Whether to return the final bin centers. Defaults to False.
129
+ denorm (bool, optional): Whether to denormalize the input image. This reverses ImageNet normalization as midas normalization is different. Defaults to False.
130
+ return_probs (bool, optional): Whether to return the output probability distribution. Defaults to False.
131
+
132
+ Returns:
133
+ dict: Dictionary containing the following keys:
134
+ - rel_depth (torch.Tensor): Relative depth map of shape (B, H, W)
135
+ - metric_depth (torch.Tensor): Metric depth map of shape (B, 1, H, W)
136
+ - bin_centers (torch.Tensor): Bin centers of shape (B, n_bins). Present only if return_final_centers is True
137
+ - probs (torch.Tensor): Output probability distribution of shape (B, n_bins, H, W). Present only if return_probs is True
138
+
139
+ """
140
+ b, c, h, w = x.shape
141
+ # print("input shape ", x.shape)
142
+ self.orig_input_width = w
143
+ self.orig_input_height = h
144
+ rel_depth, out = self.core(x, denorm=denorm, return_rel_depth=True)
145
+ # print("output shapes", rel_depth.shape, out.shape)
146
+
147
+ outconv_activation = out[0]
148
+ btlnck = out[1]
149
+ x_blocks = out[2:]
150
+
151
+ x_d0 = self.conv2(btlnck)
152
+ x = x_d0
153
+ _, seed_b_centers = self.seed_bin_regressor(x)
154
+
155
+ if self.bin_centers_type == 'normed' or self.bin_centers_type == 'hybrid2':
156
+ b_prev = (seed_b_centers - self.min_depth) / \
157
+ (self.max_depth - self.min_depth)
158
+ else:
159
+ b_prev = seed_b_centers
160
+
161
+ prev_b_embedding = self.seed_projector(x)
162
+
163
+ # unroll this loop for better performance
164
+ for projector, attractor, x in zip(self.projectors, self.attractors, x_blocks):
165
+ b_embedding = projector(x)
166
+ b, b_centers = attractor(
167
+ b_embedding, b_prev, prev_b_embedding, interpolate=True)
168
+ b_prev = b.clone()
169
+ prev_b_embedding = b_embedding.clone()
170
+
171
+ last = outconv_activation
172
+
173
+ if self.inverse_midas:
174
+ # invert depth followed by normalization
175
+ rel_depth = 1.0 / (rel_depth + 1e-6)
176
+ rel_depth = (rel_depth - rel_depth.min()) / \
177
+ (rel_depth.max() - rel_depth.min())
178
+ # concat rel depth with last. First interpolate rel depth to last size
179
+ rel_cond = rel_depth.unsqueeze(1)
180
+ rel_cond = nn.functional.interpolate(
181
+ rel_cond, size=last.shape[2:], mode='bilinear', align_corners=True)
182
+ last = torch.cat([last, rel_cond], dim=1)
183
+
184
+ b_embedding = nn.functional.interpolate(
185
+ b_embedding, last.shape[-2:], mode='bilinear', align_corners=True)
186
+ x = self.conditional_log_binomial(last, b_embedding)
187
+
188
+ # Now depth value is Sum px * cx , where cx are bin_centers from the last bin tensor
189
+ # print(x.shape, b_centers.shape)
190
+ b_centers = nn.functional.interpolate(
191
+ b_centers, x.shape[-2:], mode='bilinear', align_corners=True)
192
+ out = torch.sum(x * b_centers, dim=1, keepdim=True)
193
+
194
+ # Structure output dict
195
+ output = dict(metric_depth=out)
196
+ if return_final_centers or return_probs:
197
+ output['bin_centers'] = b_centers
198
+
199
+ if return_probs:
200
+ output['probs'] = x
201
+
202
+ return output
203
+
204
+ def get_lr_params(self, lr):
205
+ """
206
+ Learning rate configuration for different layers of the model
207
+ Args:
208
+ lr (float) : Base learning rate
209
+ Returns:
210
+ list : list of parameters to optimize and their learning rates, in the format required by torch optimizers.
211
+ """
212
+ param_conf = []
213
+ if self.train_midas:
214
+ if self.encoder_lr_factor > 0:
215
+ param_conf.append({'params': self.core.get_enc_params_except_rel_pos(
216
+ ), 'lr': lr / self.encoder_lr_factor})
217
+
218
+ if self.pos_enc_lr_factor > 0:
219
+ param_conf.append(
220
+ {'params': self.core.get_rel_pos_params(), 'lr': lr / self.pos_enc_lr_factor})
221
+
222
+ midas_params = self.core.core.scratch.parameters()
223
+ midas_lr_factor = self.midas_lr_factor
224
+ param_conf.append(
225
+ {'params': midas_params, 'lr': lr / midas_lr_factor})
226
+
227
+ remaining_modules = []
228
+ for name, child in self.named_children():
229
+ if name != 'core':
230
+ remaining_modules.append(child)
231
+ remaining_params = itertools.chain(
232
+ *[child.parameters() for child in remaining_modules])
233
+
234
+ param_conf.append({'params': remaining_params, 'lr': lr})
235
+
236
+ return param_conf
237
+
238
+ @staticmethod
239
+ def build(midas_model_type="DPT_BEiT_L_384", pretrained_resource=None, use_pretrained_midas=False, train_midas=False, freeze_midas_bn=True, **kwargs):
240
+ core = MidasCore.build(midas_model_type=midas_model_type, use_pretrained_midas=use_pretrained_midas,
241
+ train_midas=train_midas, fetch_features=True, freeze_bn=freeze_midas_bn, **kwargs)
242
+ model = ZoeDepth(core, **kwargs)
243
+ if pretrained_resource:
244
+ assert isinstance(pretrained_resource, str), "pretrained_resource must be a string"
245
+ model = load_state_from_resource(model, pretrained_resource)
246
+ return model
247
+
248
+ @staticmethod
249
+ def build_from_config(config):
250
+ return ZoeDepth.build(**config)
ZoeDepth/zoedepth/models/zoedepth_nk/__init__.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ from .zoedepth_nk_v1 import ZoeDepthNK
26
+
27
+ all_versions = {
28
+ "v1": ZoeDepthNK,
29
+ }
30
+
31
+ get_version = lambda v : all_versions[v]
ZoeDepth/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "ZoeDepthNK",
4
+ "version_name": "v1",
5
+ "bin_conf" : [
6
+ {
7
+ "name": "nyu",
8
+ "n_bins": 64,
9
+ "min_depth": 1e-3,
10
+ "max_depth": 10.0
11
+ },
12
+ {
13
+ "name": "kitti",
14
+ "n_bins": 64,
15
+ "min_depth": 1e-3,
16
+ "max_depth": 80.0
17
+ }
18
+ ],
19
+ "bin_embedding_dim": 128,
20
+ "bin_centers_type": "softplus",
21
+ "n_attractors":[16, 8, 4, 1],
22
+ "attractor_alpha": 1000,
23
+ "attractor_gamma": 2,
24
+ "attractor_kind" : "mean",
25
+ "attractor_type" : "inv",
26
+ "min_temp": 0.0212,
27
+ "max_temp": 50.0,
28
+ "memory_efficient": true,
29
+ "midas_model_type" : "DPT_BEiT_L_384",
30
+ "img_size": [384, 512]
31
+ },
32
+
33
+ "train": {
34
+ "train_midas": true,
35
+ "use_pretrained_midas": true,
36
+ "trainer": "zoedepth_nk",
37
+ "epochs": 5,
38
+ "bs": 16,
39
+ "optim_kwargs": {"lr": 0.0002512, "wd": 0.01},
40
+ "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
41
+ "same_lr": false,
42
+ "w_si": 1,
43
+ "w_domain": 100,
44
+ "avoid_boundary": false,
45
+ "random_crop": false,
46
+ "input_width": 640,
47
+ "input_height": 480,
48
+ "w_grad": 0,
49
+ "w_reg": 0,
50
+ "midas_lr_factor": 10,
51
+ "encoder_lr_factor":10,
52
+ "pos_enc_lr_factor":10
53
+ },
54
+
55
+ "infer": {
56
+ "train_midas": false,
57
+ "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
58
+ "use_pretrained_midas": false,
59
+ "force_keep_ar": true
60
+ },
61
+
62
+ "eval": {
63
+ "train_midas": false,
64
+ "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
65
+ "use_pretrained_midas": false
66
+ }
67
+ }
ZoeDepth/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import itertools
26
+
27
+ import torch
28
+ import torch.nn as nn
29
+
30
+ from zoedepth.models.depth_model import DepthModel
31
+ from zoedepth.models.base_models.midas import MidasCore
32
+ from zoedepth.models.layers.attractor import AttractorLayer, AttractorLayerUnnormed
33
+ from zoedepth.models.layers.dist_layers import ConditionalLogBinomial
34
+ from zoedepth.models.layers.localbins_layers import (Projector, SeedBinRegressor,
35
+ SeedBinRegressorUnnormed)
36
+ from zoedepth.models.layers.patch_transformer import PatchTransformerEncoder
37
+ from zoedepth.models.model_io import load_state_from_resource
38
+
39
+
40
+ class ZoeDepthNK(DepthModel):
41
+ def __init__(self, core, bin_conf, bin_centers_type="softplus", bin_embedding_dim=128,
42
+ n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp',
43
+ min_temp=5, max_temp=50,
44
+ memory_efficient=False, train_midas=True,
45
+ is_midas_pretrained=True, midas_lr_factor=1, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs):
46
+ """ZoeDepthNK model. This is the version of ZoeDepth that has two metric heads and uses a learned router to route to experts.
47
+
48
+ Args:
49
+ core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features
50
+
51
+ bin_conf (List[dict]): A list of dictionaries that contain the bin configuration for each metric head. Each dictionary should contain the following keys:
52
+ "name" (str, typically same as the dataset name), "n_bins" (int), "min_depth" (float), "max_depth" (float)
53
+
54
+ The length of this list determines the number of metric heads.
55
+ bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
56
+ For "softplus", softplus activation is used and thus are unbounded. Defaults to "normed".
57
+ bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128.
58
+
59
+ n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
60
+ attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300.
61
+ attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
62
+ attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'.
63
+ attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'.
64
+
65
+ min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5.
66
+ max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50.
67
+
68
+ memory_efficient (bool, optional): Whether to use memory efficient version of attractor layers. Memory efficient version is slower but is recommended incase of multiple metric heads in order save GPU memory. Defaults to False.
69
+
70
+ train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True.
71
+ is_midas_pretrained (bool, optional): Is "core" pretrained? Defaults to True.
72
+ midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10.
73
+ encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10.
74
+ pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10.
75
+
76
+ """
77
+
78
+ super().__init__()
79
+
80
+ self.core = core
81
+ self.bin_conf = bin_conf
82
+ self.min_temp = min_temp
83
+ self.max_temp = max_temp
84
+ self.memory_efficient = memory_efficient
85
+ self.train_midas = train_midas
86
+ self.is_midas_pretrained = is_midas_pretrained
87
+ self.midas_lr_factor = midas_lr_factor
88
+ self.encoder_lr_factor = encoder_lr_factor
89
+ self.pos_enc_lr_factor = pos_enc_lr_factor
90
+ self.inverse_midas = inverse_midas
91
+
92
+ N_MIDAS_OUT = 32
93
+ btlnck_features = self.core.output_channels[0]
94
+ num_out_features = self.core.output_channels[1:]
95
+ # self.scales = [16, 8, 4, 2] # spatial scale factors
96
+
97
+ self.conv2 = nn.Conv2d(
98
+ btlnck_features, btlnck_features, kernel_size=1, stride=1, padding=0)
99
+
100
+ # Transformer classifier on the bottleneck
101
+ self.patch_transformer = PatchTransformerEncoder(
102
+ btlnck_features, 1, 128, use_class_token=True)
103
+ self.mlp_classifier = nn.Sequential(
104
+ nn.Linear(128, 128),
105
+ nn.ReLU(),
106
+ nn.Linear(128, 2)
107
+ )
108
+
109
+ if bin_centers_type == "normed":
110
+ SeedBinRegressorLayer = SeedBinRegressor
111
+ Attractor = AttractorLayer
112
+ elif bin_centers_type == "softplus":
113
+ SeedBinRegressorLayer = SeedBinRegressorUnnormed
114
+ Attractor = AttractorLayerUnnormed
115
+ elif bin_centers_type == "hybrid1":
116
+ SeedBinRegressorLayer = SeedBinRegressor
117
+ Attractor = AttractorLayerUnnormed
118
+ elif bin_centers_type == "hybrid2":
119
+ SeedBinRegressorLayer = SeedBinRegressorUnnormed
120
+ Attractor = AttractorLayer
121
+ else:
122
+ raise ValueError(
123
+ "bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'")
124
+ self.bin_centers_type = bin_centers_type
125
+ # We have bins for each bin conf.
126
+ # Create a map (ModuleDict) of 'name' -> seed_bin_regressor
127
+ self.seed_bin_regressors = nn.ModuleDict(
128
+ {conf['name']: SeedBinRegressorLayer(btlnck_features, conf["n_bins"], mlp_dim=bin_embedding_dim//2, min_depth=conf["min_depth"], max_depth=conf["max_depth"])
129
+ for conf in bin_conf}
130
+ )
131
+
132
+ self.seed_projector = Projector(
133
+ btlnck_features, bin_embedding_dim, mlp_dim=bin_embedding_dim//2)
134
+ self.projectors = nn.ModuleList([
135
+ Projector(num_out, bin_embedding_dim, mlp_dim=bin_embedding_dim//2)
136
+ for num_out in num_out_features
137
+ ])
138
+
139
+ # Create a map (ModuleDict) of 'name' -> attractors (ModuleList)
140
+ self.attractors = nn.ModuleDict(
141
+ {conf['name']: nn.ModuleList([
142
+ Attractor(bin_embedding_dim, n_attractors[i],
143
+ mlp_dim=bin_embedding_dim, alpha=attractor_alpha,
144
+ gamma=attractor_gamma, kind=attractor_kind,
145
+ attractor_type=attractor_type, memory_efficient=memory_efficient,
146
+ min_depth=conf["min_depth"], max_depth=conf["max_depth"])
147
+ for i in range(len(n_attractors))
148
+ ])
149
+ for conf in bin_conf}
150
+ )
151
+
152
+ last_in = N_MIDAS_OUT
153
+ # conditional log binomial for each bin conf
154
+ self.conditional_log_binomial = nn.ModuleDict(
155
+ {conf['name']: ConditionalLogBinomial(last_in, bin_embedding_dim, conf['n_bins'], bottleneck_factor=4, min_temp=self.min_temp, max_temp=self.max_temp)
156
+ for conf in bin_conf}
157
+ )
158
+
159
+ def forward(self, x, return_final_centers=False, denorm=False, return_probs=False, **kwargs):
160
+ """
161
+ Args:
162
+ x (torch.Tensor): Input image tensor of shape (B, C, H, W). Assumes all images are from the same domain.
163
+ return_final_centers (bool, optional): Whether to return the final centers of the attractors. Defaults to False.
164
+ denorm (bool, optional): Whether to denormalize the input image. Defaults to False.
165
+ return_probs (bool, optional): Whether to return the probabilities of the bins. Defaults to False.
166
+
167
+ Returns:
168
+ dict: Dictionary of outputs with keys:
169
+ - "rel_depth": Relative depth map of shape (B, 1, H, W)
170
+ - "metric_depth": Metric depth map of shape (B, 1, H, W)
171
+ - "domain_logits": Domain logits of shape (B, 2)
172
+ - "bin_centers": Bin centers of shape (B, N, H, W). Present only if return_final_centers is True
173
+ - "probs": Bin probabilities of shape (B, N, H, W). Present only if return_probs is True
174
+ """
175
+ b, c, h, w = x.shape
176
+ self.orig_input_width = w
177
+ self.orig_input_height = h
178
+ rel_depth, out = self.core(x, denorm=denorm, return_rel_depth=True)
179
+
180
+ outconv_activation = out[0]
181
+ btlnck = out[1]
182
+ x_blocks = out[2:]
183
+
184
+ x_d0 = self.conv2(btlnck)
185
+ x = x_d0
186
+
187
+ # Predict which path to take
188
+ embedding = self.patch_transformer(x)[0] # N, E
189
+ domain_logits = self.mlp_classifier(embedding) # N, 2
190
+ domain_vote = torch.softmax(domain_logits.sum(
191
+ dim=0, keepdim=True), dim=-1) # 1, 2
192
+
193
+ # Get the path
194
+ bin_conf_name = ["nyu", "kitti"][torch.argmax(
195
+ domain_vote, dim=-1).squeeze().item()]
196
+
197
+ try:
198
+ conf = [c for c in self.bin_conf if c.name == bin_conf_name][0]
199
+ except IndexError:
200
+ raise ValueError(
201
+ f"bin_conf_name {bin_conf_name} not found in bin_confs")
202
+
203
+ min_depth = conf['min_depth']
204
+ max_depth = conf['max_depth']
205
+
206
+ seed_bin_regressor = self.seed_bin_regressors[bin_conf_name]
207
+ _, seed_b_centers = seed_bin_regressor(x)
208
+ if self.bin_centers_type == 'normed' or self.bin_centers_type == 'hybrid2':
209
+ b_prev = (seed_b_centers - min_depth)/(max_depth - min_depth)
210
+ else:
211
+ b_prev = seed_b_centers
212
+ prev_b_embedding = self.seed_projector(x)
213
+
214
+ attractors = self.attractors[bin_conf_name]
215
+ for projector, attractor, x in zip(self.projectors, attractors, x_blocks):
216
+ b_embedding = projector(x)
217
+ b, b_centers = attractor(
218
+ b_embedding, b_prev, prev_b_embedding, interpolate=True)
219
+ b_prev = b
220
+ prev_b_embedding = b_embedding
221
+
222
+ last = outconv_activation
223
+
224
+ b_centers = nn.functional.interpolate(
225
+ b_centers, last.shape[-2:], mode='bilinear', align_corners=True)
226
+ b_embedding = nn.functional.interpolate(
227
+ b_embedding, last.shape[-2:], mode='bilinear', align_corners=True)
228
+
229
+ clb = self.conditional_log_binomial[bin_conf_name]
230
+ x = clb(last, b_embedding)
231
+
232
+ # Now depth value is Sum px * cx , where cx are bin_centers from the last bin tensor
233
+ # print(x.shape, b_centers.shape)
234
+ # b_centers = nn.functional.interpolate(b_centers, x.shape[-2:], mode='bilinear', align_corners=True)
235
+ out = torch.sum(x * b_centers, dim=1, keepdim=True)
236
+
237
+ output = dict(domain_logits=domain_logits, metric_depth=out)
238
+ if return_final_centers or return_probs:
239
+ output['bin_centers'] = b_centers
240
+
241
+ if return_probs:
242
+ output['probs'] = x
243
+ return output
244
+
245
+ def get_lr_params(self, lr):
246
+ """
247
+ Learning rate configuration for different layers of the model
248
+
249
+ Args:
250
+ lr (float) : Base learning rate
251
+ Returns:
252
+ list : list of parameters to optimize and their learning rates, in the format required by torch optimizers.
253
+ """
254
+ param_conf = []
255
+ if self.train_midas:
256
+ def get_rel_pos_params():
257
+ for name, p in self.core.core.pretrained.named_parameters():
258
+ if "relative_position" in name:
259
+ yield p
260
+
261
+ def get_enc_params_except_rel_pos():
262
+ for name, p in self.core.core.pretrained.named_parameters():
263
+ if "relative_position" not in name:
264
+ yield p
265
+
266
+ encoder_params = get_enc_params_except_rel_pos()
267
+ rel_pos_params = get_rel_pos_params()
268
+ midas_params = self.core.core.scratch.parameters()
269
+ midas_lr_factor = self.midas_lr_factor if self.is_midas_pretrained else 1.0
270
+ param_conf.extend([
271
+ {'params': encoder_params, 'lr': lr / self.encoder_lr_factor},
272
+ {'params': rel_pos_params, 'lr': lr / self.pos_enc_lr_factor},
273
+ {'params': midas_params, 'lr': lr / midas_lr_factor}
274
+ ])
275
+
276
+ remaining_modules = []
277
+ for name, child in self.named_children():
278
+ if name != 'core':
279
+ remaining_modules.append(child)
280
+ remaining_params = itertools.chain(
281
+ *[child.parameters() for child in remaining_modules])
282
+ param_conf.append({'params': remaining_params, 'lr': lr})
283
+ return param_conf
284
+
285
+ def get_conf_parameters(self, conf_name):
286
+ """
287
+ Returns parameters of all the ModuleDicts children that are exclusively used for the given bin configuration
288
+ """
289
+ params = []
290
+ for name, child in self.named_children():
291
+ if isinstance(child, nn.ModuleDict):
292
+ for bin_conf_name, module in child.items():
293
+ if bin_conf_name == conf_name:
294
+ params += list(module.parameters())
295
+ return params
296
+
297
+ def freeze_conf(self, conf_name):
298
+ """
299
+ Freezes all the parameters of all the ModuleDicts children that are exclusively used for the given bin configuration
300
+ """
301
+ for p in self.get_conf_parameters(conf_name):
302
+ p.requires_grad = False
303
+
304
+ def unfreeze_conf(self, conf_name):
305
+ """
306
+ Unfreezes all the parameters of all the ModuleDicts children that are exclusively used for the given bin configuration
307
+ """
308
+ for p in self.get_conf_parameters(conf_name):
309
+ p.requires_grad = True
310
+
311
+ def freeze_all_confs(self):
312
+ """
313
+ Freezes all the parameters of all the ModuleDicts children
314
+ """
315
+ for name, child in self.named_children():
316
+ if isinstance(child, nn.ModuleDict):
317
+ for bin_conf_name, module in child.items():
318
+ for p in module.parameters():
319
+ p.requires_grad = False
320
+
321
+ @staticmethod
322
+ def build(midas_model_type="DPT_BEiT_L_384", pretrained_resource=None, use_pretrained_midas=False, train_midas=False, freeze_midas_bn=True, **kwargs):
323
+ core = MidasCore.build(midas_model_type=midas_model_type, use_pretrained_midas=use_pretrained_midas,
324
+ train_midas=train_midas, fetch_features=True, freeze_bn=freeze_midas_bn, **kwargs)
325
+ model = ZoeDepthNK(core, **kwargs)
326
+ if pretrained_resource:
327
+ assert isinstance(pretrained_resource, str), "pretrained_resource must be a string"
328
+ model = load_state_from_resource(model, pretrained_resource)
329
+ return model
330
+
331
+ @staticmethod
332
+ def build_from_config(config):
333
+ return ZoeDepthNK.build(**config)