Spaces:
Sleeping
Sleeping
Realcat
commited on
Commit
·
63f3cf2
1
Parent(s):
4487d43
fix: eloftr
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- hloc/extractors/sfd2.py +5 -7
- hloc/matchers/eloftr.py +10 -6
- hloc/matchers/imp.py +5 -6
- third_party/pram/.gitignore +13 -0
- third_party/pram/LICENSE +2 -0
- third_party/pram/README.md +207 -0
- third_party/pram/assets/map_sparsification.gif +3 -0
- third_party/pram/assets/multi_recognition.png +3 -0
- third_party/pram/assets/overview.png +3 -0
- third_party/pram/assets/pipeline1.png +3 -0
- third_party/pram/assets/pram_demo.gif +3 -0
- third_party/pram/assets/sam_openvoc.png +3 -0
- third_party/pram/colmap_utils/camera_intrinsics.py +30 -0
- third_party/pram/colmap_utils/database.py +352 -0
- third_party/pram/colmap_utils/geometry.py +17 -0
- third_party/pram/colmap_utils/io.py +78 -0
- third_party/pram/colmap_utils/parsers.py +73 -0
- third_party/pram/colmap_utils/read_write_model.py +627 -0
- third_party/pram/colmap_utils/utils.py +1 -0
- third_party/pram/configs/config_train_12scenes_sfd2.yaml +102 -0
- third_party/pram/configs/config_train_7scenes_sfd2.yaml +104 -0
- third_party/pram/configs/config_train_aachen_sfd2.yaml +104 -0
- third_party/pram/configs/config_train_cambridge_sfd2.yaml +103 -0
- third_party/pram/configs/config_train_multiset_sfd2.yaml +100 -0
- third_party/pram/configs/datasets/12Scenes.yaml +166 -0
- third_party/pram/configs/datasets/7Scenes.yaml +96 -0
- third_party/pram/configs/datasets/Aachen.yaml +15 -0
- third_party/pram/configs/datasets/CambridgeLandmarks.yaml +67 -0
- third_party/pram/dataset/aachen.py +119 -0
- third_party/pram/dataset/basicdataset.py +477 -0
- third_party/pram/dataset/cambridge_landmarks.py +101 -0
- third_party/pram/dataset/customdataset.py +93 -0
- third_party/pram/dataset/get_dataset.py +89 -0
- third_party/pram/dataset/recdataset.py +95 -0
- third_party/pram/dataset/seven_scenes.py +115 -0
- third_party/pram/dataset/twelve_scenes.py +121 -0
- third_party/pram/dataset/utils.py +31 -0
- third_party/pram/environment.yml +173 -0
- third_party/pram/inference.py +62 -0
- third_party/pram/localization/base_model.py +45 -0
- third_party/pram/localization/camera.py +11 -0
- third_party/pram/localization/extract_features.py +256 -0
- third_party/pram/localization/frame.py +195 -0
- third_party/pram/localization/loc_by_rec_eval.py +299 -0
- third_party/pram/localization/loc_by_rec_online.py +225 -0
- third_party/pram/localization/localizer.py +217 -0
- third_party/pram/localization/match_features.py +156 -0
- third_party/pram/localization/match_features_batch.py +242 -0
- third_party/pram/localization/matchers/__init__.py +3 -0
- third_party/pram/localization/matchers/adagml.py +41 -0
hloc/extractors/sfd2.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# -*- coding: UTF-8 -*-
|
2 |
import sys
|
3 |
from pathlib import Path
|
4 |
|
@@ -7,10 +6,9 @@ import torchvision.transforms as tvf
|
|
7 |
from .. import logger
|
8 |
from ..utils.base_model import BaseModel
|
9 |
|
10 |
-
|
11 |
-
sys.path.append(str(
|
12 |
-
|
13 |
-
from nets.sfd2 import load_sfd2
|
14 |
|
15 |
|
16 |
class SFD2(BaseModel):
|
@@ -26,8 +24,8 @@ class SFD2(BaseModel):
|
|
26 |
self.norm_rgb = tvf.Normalize(
|
27 |
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
|
28 |
)
|
29 |
-
|
30 |
-
self.net = load_sfd2(weight_path=
|
31 |
|
32 |
logger.info("Load SFD2 model done.")
|
33 |
|
|
|
|
|
1 |
import sys
|
2 |
from pathlib import Path
|
3 |
|
|
|
6 |
from .. import logger
|
7 |
from ..utils.base_model import BaseModel
|
8 |
|
9 |
+
tp_path = Path(__file__).parent / "../../third_party"
|
10 |
+
sys.path.append(str(tp_path))
|
11 |
+
from pram.nets.sfd2 import load_sfd2
|
|
|
12 |
|
13 |
|
14 |
class SFD2(BaseModel):
|
|
|
24 |
self.norm_rgb = tvf.Normalize(
|
25 |
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
|
26 |
)
|
27 |
+
model_path = tp_path / "pram" / "weights" / self.conf["model_name"]
|
28 |
+
self.net = load_sfd2(weight_path=model_path).eval()
|
29 |
|
30 |
logger.info("Load SFD2 model done.")
|
31 |
|
hloc/matchers/eloftr.py
CHANGED
@@ -5,18 +5,22 @@ from pathlib import Path
|
|
5 |
|
6 |
import torch
|
7 |
|
8 |
-
|
9 |
-
sys.path.append(str(
|
10 |
|
11 |
-
from src.loftr import LoFTR as ELoFTR_
|
12 |
-
from src.loftr import
|
|
|
|
|
|
|
|
|
13 |
|
14 |
from hloc import logger
|
15 |
|
16 |
from ..utils.base_model import BaseModel
|
17 |
|
18 |
|
19 |
-
class
|
20 |
default_conf = {
|
21 |
"weights": "weights/eloftr_outdoor.ckpt",
|
22 |
"match_threshold": 0.2,
|
@@ -40,7 +44,7 @@ class LoFTR(BaseModel):
|
|
40 |
_default_cfg["mp"] = True
|
41 |
elif self.conf["precision"] == "fp16":
|
42 |
_default_cfg["half"] = True
|
43 |
-
model_path =
|
44 |
cfg = _default_cfg
|
45 |
cfg["match_coarse"]["thr"] = conf["match_threshold"]
|
46 |
# cfg["match_coarse"]["skh_iters"] = conf["sinkhorn_iterations"]
|
|
|
5 |
|
6 |
import torch
|
7 |
|
8 |
+
tp_path = Path(__file__).parent / "../../third_party"
|
9 |
+
sys.path.append(str(tp_path))
|
10 |
|
11 |
+
from EfficientLoFTR.src.loftr import LoFTR as ELoFTR_
|
12 |
+
from EfficientLoFTR.src.loftr import (
|
13 |
+
full_default_cfg,
|
14 |
+
opt_default_cfg,
|
15 |
+
reparameter,
|
16 |
+
)
|
17 |
|
18 |
from hloc import logger
|
19 |
|
20 |
from ..utils.base_model import BaseModel
|
21 |
|
22 |
|
23 |
+
class ELoFTR(BaseModel):
|
24 |
default_conf = {
|
25 |
"weights": "weights/eloftr_outdoor.ckpt",
|
26 |
"match_threshold": 0.2,
|
|
|
44 |
_default_cfg["mp"] = True
|
45 |
elif self.conf["precision"] == "fp16":
|
46 |
_default_cfg["half"] = True
|
47 |
+
model_path = tp_path / "EfficientLoFTR" / self.conf["weights"]
|
48 |
cfg = _default_cfg
|
49 |
cfg["match_coarse"]["thr"] = conf["match_threshold"]
|
50 |
# cfg["match_coarse"]["skh_iters"] = conf["sinkhorn_iterations"]
|
hloc/matchers/imp.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# -*- coding: UTF-8 -*-
|
2 |
import sys
|
3 |
from pathlib import Path
|
4 |
|
@@ -7,10 +6,9 @@ import torch
|
|
7 |
from .. import DEVICE, logger
|
8 |
from ..utils.base_model import BaseModel
|
9 |
|
10 |
-
|
11 |
-
sys.path.append(str(
|
12 |
-
|
13 |
-
from nets.gml import GML
|
14 |
|
15 |
|
16 |
class IMP(BaseModel):
|
@@ -33,7 +31,8 @@ class IMP(BaseModel):
|
|
33 |
|
34 |
def _init(self, conf):
|
35 |
self.conf = {**self.default_conf, **conf}
|
36 |
-
weight_path =
|
|
|
37 |
self.net = GML(self.conf).eval().to(DEVICE)
|
38 |
self.net.load_state_dict(
|
39 |
torch.load(weight_path, map_location="cpu")["model"], strict=True
|
|
|
|
|
1 |
import sys
|
2 |
from pathlib import Path
|
3 |
|
|
|
6 |
from .. import DEVICE, logger
|
7 |
from ..utils.base_model import BaseModel
|
8 |
|
9 |
+
tp_path = Path(__file__).parent / "../../third_party"
|
10 |
+
sys.path.append(str(tp_path))
|
11 |
+
from pram.nets.gml import GML
|
|
|
12 |
|
13 |
|
14 |
class IMP(BaseModel):
|
|
|
31 |
|
32 |
def _init(self, conf):
|
33 |
self.conf = {**self.default_conf, **conf}
|
34 |
+
weight_path = tp_path / "pram" / "weights" / self.conf["model_name"]
|
35 |
+
# self.net = nets.gml(self.conf).eval().to(DEVICE)
|
36 |
self.net = GML(self.conf).eval().to(DEVICE)
|
37 |
self.net.load_state_dict(
|
38 |
torch.load(weight_path, map_location="cpu")["model"], strict=True
|
third_party/pram/.gitignore
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.idea
|
2 |
+
__pycache__
|
3 |
+
weights/12scenes*
|
4 |
+
weights/7scenes*
|
5 |
+
weights/aachen*
|
6 |
+
weights/cambridgelandmarks*
|
7 |
+
weights/imp_adagml.80.pth
|
8 |
+
landmarks
|
9 |
+
3D-models
|
10 |
+
log_*
|
11 |
+
*.log
|
12 |
+
.nfs*
|
13 |
+
Pangolin
|
third_party/pram/LICENSE
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
This work is licensed under the Creative Commons Attribution-NonCommercial 4.0 International License.
|
2 |
+
To view a copy of this license, visit http://creativecommons.org/licenses/by-nc/4.0/.
|
third_party/pram/README.md
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## PRAM: Place Recognition Anywhere Model for Efficient Visual Localization
|
2 |
+
|
3 |
+
<p align="center">
|
4 |
+
<img src="assets/overview.png" width="960">
|
5 |
+
</p>
|
6 |
+
|
7 |
+
Humans localize themselves efficiently in known environments by first recognizing landmarks defined on certain objects
|
8 |
+
and their spatial relationships, and then verifying the location by aligning detailed structures of recognized objects
|
9 |
+
with those in the memory. Inspired by this, we propose the place recognition anywhere model (PRAM) to perform visual
|
10 |
+
localization as efficiently as humans do. PRAM consists of two main components - recognition and registration. In
|
11 |
+
detail, first of all, a self-supervised map-centric landmark definition strategy is adopted, making places in either
|
12 |
+
indoor or outdoor scenes act as unique landmarks. Then, sparse keypoints extracted from images, are utilized as the
|
13 |
+
input to a transformer-based deep neural network for landmark recognition; these keypoints enable PRAM to recognize
|
14 |
+
hundreds of landmarks with high time and memory efficiency. Keypoints along with recognized landmark labels are further
|
15 |
+
used for registration between query images and the 3D landmark map. Different from previous hierarchical methods, PRAM
|
16 |
+
discards global and local descriptors, and reduces over 90% storage. Since PRAM utilizes recognition and landmark-wise
|
17 |
+
verification to replace global reference search and exhaustive matching respectively, it runs 2.4 times faster than
|
18 |
+
prior state-of-the-art approaches. Moreover, PRAM opens new directions for visual localization including multi-modality
|
19 |
+
localization, map-centric feature learning, and hierarchical scene coordinate regression.
|
20 |
+
|
21 |
+
* Full paper
|
22 |
+
PDF: [Place Recognition Anywhere Model for Efficient Visual Localization](https://arxiv.org/pdf/2404.07785.pdf).
|
23 |
+
|
24 |
+
* Authors: *Fei Xue, Ignas Budvytis, Roberto Cipolla*
|
25 |
+
|
26 |
+
* Website: [PRAM](https://feixue94.github.io/pram-project) for videos, slides, recent updates, and datasets.
|
27 |
+
|
28 |
+
## Key Features
|
29 |
+
|
30 |
+
### 1. Self-supervised landmark definition on 3D space
|
31 |
+
|
32 |
+
- No need of segmentations on images
|
33 |
+
- No inconsistent semantic results from multi-view images
|
34 |
+
- No limitation to labels of only known objects
|
35 |
+
- Work in any places with known or unknown objects
|
36 |
+
- Landmark-wise 3D map sparsification
|
37 |
+
|
38 |
+
<p align="center">
|
39 |
+
<img src="assets/map_sparsification.gif" width="640">
|
40 |
+
</p>
|
41 |
+
|
42 |
+
### 2. Efficient landmark-wise coarse and fine localization
|
43 |
+
|
44 |
+
- Recognize landmarks as opposed to do global retrieval
|
45 |
+
- Local landmark-wise matching as opposed to exhaustive matching
|
46 |
+
- No global descriptors (e.g. NetVLAD)
|
47 |
+
- No reference images and their heavy repetative 2D keypoints and descriptors
|
48 |
+
- Automatic inlier/outlier idetification
|
49 |
+
|
50 |
+
<p align="center">
|
51 |
+
<img src="assets/pipeline1.png" width="640">
|
52 |
+
</p>
|
53 |
+
|
54 |
+
### 4. Sparse recognition
|
55 |
+
|
56 |
+
- Sparse SFD2 keypoints as tokens
|
57 |
+
- No uncertainties of points at boundaries
|
58 |
+
- Flexible to accept multi-modality inputs
|
59 |
+
|
60 |
+
### 5. Relocalization and temporal localization
|
61 |
+
|
62 |
+
- Per frame reclocalization from scratch
|
63 |
+
- Tracking previous frames for higher efficiency
|
64 |
+
|
65 |
+
### 6. One model one dataset
|
66 |
+
|
67 |
+
- All 7 subscenes in 7Scenes dataset share a model
|
68 |
+
- All 12 subscenes in 12Scenes dataset share a model
|
69 |
+
- All 5 subscenes in CambridgeLandmarks share a model
|
70 |
+
|
71 |
+
### 7. Robust to long-term changes
|
72 |
+
|
73 |
+
<p align="center">
|
74 |
+
<img src="assets/pram_demo.gif" width="640">
|
75 |
+
</p>
|
76 |
+
|
77 |
+
## Open problems
|
78 |
+
|
79 |
+
- Adaptive number landmarks determination
|
80 |
+
- Using SAM + open vocabulary to generate semantic map
|
81 |
+
- Multi-modality localization with other tokenized signals (e.g. text, language, GPS, Magonemeter)
|
82 |
+
- More effective solutions to 3D sparsification
|
83 |
+
|
84 |
+
## Preparation
|
85 |
+
|
86 |
+
1. Download the 7Scenes, 12Scenes, CambridgeLandmarks, and Aachen datasets (remove redundant depth images otherwise they
|
87 |
+
will be found in the sfm process)
|
88 |
+
2. Environments
|
89 |
+
|
90 |
+
2.1 Create a virtual environment
|
91 |
+
|
92 |
+
```
|
93 |
+
conda env create -f environment.yml
|
94 |
+
(do not activate pram before pangolin is installed)
|
95 |
+
```
|
96 |
+
|
97 |
+
2.2 Compile Pangolin for the installed python
|
98 |
+
|
99 |
+
```
|
100 |
+
git clone --recursive https://github.com/stevenlovegrove/Pangolin.git
|
101 |
+
cd Pangolin
|
102 |
+
git checkout v0.8
|
103 |
+
|
104 |
+
# Install dependencies
|
105 |
+
./scripts/install_prerequisites.sh recommended
|
106 |
+
|
107 |
+
# Compile with your python
|
108 |
+
cmake -DPython_EXECUTABLE=/your path to/anaconda3/envs/pram/bin/python3 -B build
|
109 |
+
cmake --build build -t pypangolin_pip_install
|
110 |
+
|
111 |
+
conda activate pram
|
112 |
+
```
|
113 |
+
|
114 |
+
## Run the localization with online visualization
|
115 |
+
|
116 |
+
1. Download the [3D-models](https://drive.google.com/drive/folders/1DUB073KxAjsc8lxhMpFuxPRf0ZBQS6NS?usp=drive_link),
|
117 |
+
pretrained [models](https://drive.google.com/drive/folders/1E2QvujCevqnyg_CM9FGAa0AxKkt4KbLD?usp=drive_link) ,
|
118 |
+
and [landmarks](https://drive.google.com/drive/folders/1r9src9bz7k3WYGfaPmKJ9gqxuvdfxZU0?usp=sharing)
|
119 |
+
2. Put pretrained models in ```weights``` directory
|
120 |
+
3. Run the demo (e.g. 7Scenes)
|
121 |
+
|
122 |
+
```
|
123 |
+
python3 inference.py --config configs/config_train_7scenes_sfd2.yaml --rec_weight_path weights/7scenes_nc113_birch_segnetvit.199.pth --landmark_path /your path to/landmarks --online
|
124 |
+
```
|
125 |
+
|
126 |
+
## Train the recognition model (e.g. for 7Scenes)
|
127 |
+
|
128 |
+
### 1. Do SfM with SFD2 including feature extraction (modify the dataset_dir, ref_sfm_dir, output_dir)
|
129 |
+
|
130 |
+
```
|
131 |
+
./sfm_scripts/reconstruct_7scenes.sh
|
132 |
+
```
|
133 |
+
|
134 |
+
This step will produce the SfM results together with the extracted keypoints
|
135 |
+
|
136 |
+
### 2. Generate 3D landmarks
|
137 |
+
|
138 |
+
```
|
139 |
+
python3 -m recognition.recmap --dataset 7Scenes --dataset_dir /your path to/7Scenes --sfm_dir /sfm_path/7Scenes --save_dir /save_path/landmakrs
|
140 |
+
```
|
141 |
+
|
142 |
+
This step will generate 3D landmarks, create virtual reference frame, and sparsify the 3D points for each landmark for
|
143 |
+
all scenes in 7Scenes
|
144 |
+
|
145 |
+
### 3. Train the sparse recognition model (one model one dataset)
|
146 |
+
|
147 |
+
```
|
148 |
+
python3 train.py --config configs/config_train_7scenes_sfd2.yaml
|
149 |
+
```
|
150 |
+
|
151 |
+
Remember to modify the paths in 'config_train_7scenes_sfd2.yaml'
|
152 |
+
|
153 |
+
## Your own dataset
|
154 |
+
|
155 |
+
1. Run colmap or hloc to obtain the SfM results
|
156 |
+
2. Do reconstruction with SFD2 keypoints with the sfm from step as refernece sfm
|
157 |
+
3. Do 3D landmark generation, VRF, map sparsification etc (Add DatasetName.yaml to configs/datasets)
|
158 |
+
4. Train the recognition model
|
159 |
+
5. Do evaluation
|
160 |
+
|
161 |
+
## Previous works can be found here
|
162 |
+
|
163 |
+
1. [Efficient large-scale localization by landmark recognition, CVPR 2022](https://github.com/feixue94/lbr)
|
164 |
+
2. [IMP: Iterative Matching and Pose Estimation with Adaptive Pooling, CVPR 2023](https://github.com/feixue94/imp-release)
|
165 |
+
3. [SFD2: Semantic-guided Feature Detection and Description, CVPR 2023](https://github.com/feixue94/sfd2)
|
166 |
+
4. [VRS-NeRF: Visual Relocalization with Sparse Neural Radiance Field, under review](https://github.com/feixue94/vrs-nerf)
|
167 |
+
|
168 |
+
## BibTeX Citation
|
169 |
+
|
170 |
+
If you use any ideas from the paper or code in this repo, please consider citing:
|
171 |
+
|
172 |
+
```
|
173 |
+
@article{xue2024pram,
|
174 |
+
author = {Fei Xue and Ignas Budvytis and Roberto Cipolla},
|
175 |
+
title = {PRAM: Place Recognition Anywhere Model for Efficient Visual Localization},
|
176 |
+
journal = {arXiv preprint arXiv:2404.07785},
|
177 |
+
year = {2024}
|
178 |
+
}
|
179 |
+
|
180 |
+
@inproceedings{xue2023sfd2,
|
181 |
+
author = {Fei Xue and Ignas Budvytis and Roberto Cipolla},
|
182 |
+
title = {SFD2: Semantic-guided Feature Detection and Description},
|
183 |
+
booktitle = {CVPR},
|
184 |
+
year = {2023}
|
185 |
+
}
|
186 |
+
|
187 |
+
@inproceedings{xue2022imp,
|
188 |
+
author = {Fei Xue and Ignas Budvytis and Roberto Cipolla},
|
189 |
+
title = {IMP: Iterative Matching and Pose Estimation with Adaptive Pooling},
|
190 |
+
booktitle = {CVPR},
|
191 |
+
year = {2023}
|
192 |
+
}
|
193 |
+
|
194 |
+
@inproceedings{xue2022efficient,
|
195 |
+
author = {Fei Xue and Ignas Budvytis and Daniel Olmeda Reino and Roberto Cipolla},
|
196 |
+
title = {Efficient Large-scale Localization by Global Instance Recognition},
|
197 |
+
booktitle = {CVPR},
|
198 |
+
year = {2022}
|
199 |
+
}
|
200 |
+
```
|
201 |
+
|
202 |
+
## Acknowledgements
|
203 |
+
|
204 |
+
Part of the code is from previous excellent works
|
205 |
+
including , [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork)
|
206 |
+
and [hloc](https://github.com/cvg/Hierarchical-Localization). You can find more details from their released
|
207 |
+
repositories if you are interested in their works.
|
third_party/pram/assets/map_sparsification.gif
ADDED
Git LFS Details
|
third_party/pram/assets/multi_recognition.png
ADDED
Git LFS Details
|
third_party/pram/assets/overview.png
ADDED
Git LFS Details
|
third_party/pram/assets/pipeline1.png
ADDED
Git LFS Details
|
third_party/pram/assets/pram_demo.gif
ADDED
Git LFS Details
|
third_party/pram/assets/sam_openvoc.png
ADDED
Git LFS Details
|
third_party/pram/colmap_utils/camera_intrinsics.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File localizer -> camera_intrinsics
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 15/08/2023 12:33
|
7 |
+
=================================================='''
|
8 |
+
import numpy as np
|
9 |
+
|
10 |
+
|
11 |
+
def intrinsics_from_camera(camera_model, params):
|
12 |
+
if camera_model in ("SIMPLE_PINHOLE", "SIMPLE_RADIAL", "RADIAL"):
|
13 |
+
fx = fy = params[0]
|
14 |
+
cx = params[1]
|
15 |
+
cy = params[2]
|
16 |
+
elif camera_model in ("PINHOLE", "OPENCV", "OPENCV_FISHEYE", "FULL_OPENCV"):
|
17 |
+
fx = params[0]
|
18 |
+
fy = params[1]
|
19 |
+
cx = params[2]
|
20 |
+
cy = params[3]
|
21 |
+
else:
|
22 |
+
raise Exception("Camera model not supported")
|
23 |
+
|
24 |
+
# intrinsics
|
25 |
+
K = np.identity(3)
|
26 |
+
K[0, 0] = fx
|
27 |
+
K[1, 1] = fy
|
28 |
+
K[0, 2] = cx
|
29 |
+
K[1, 2] = cy
|
30 |
+
return K
|
third_party/pram/colmap_utils/database.py
ADDED
@@ -0,0 +1,352 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# Redistribution and use in source and binary forms, with or without
|
5 |
+
# modification, are permitted provided that the following conditions are met:
|
6 |
+
#
|
7 |
+
# * Redistributions of source code must retain the above copyright
|
8 |
+
# notice, this list of conditions and the following disclaimer.
|
9 |
+
#
|
10 |
+
# * Redistributions in binary form must reproduce the above copyright
|
11 |
+
# notice, this list of conditions and the following disclaimer in the
|
12 |
+
# documentation and/or other materials provided with the distribution.
|
13 |
+
#
|
14 |
+
# * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
|
15 |
+
# its contributors may be used to endorse or promote products derived
|
16 |
+
# from this software without specific prior written permission.
|
17 |
+
#
|
18 |
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19 |
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20 |
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
21 |
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
22 |
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
23 |
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
24 |
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
25 |
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
26 |
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
27 |
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
28 |
+
# POSSIBILITY OF SUCH DAMAGE.
|
29 |
+
#
|
30 |
+
# Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
|
31 |
+
|
32 |
+
# This script is based on an original implementation by True Price.
|
33 |
+
|
34 |
+
import sys
|
35 |
+
import sqlite3
|
36 |
+
import numpy as np
|
37 |
+
|
38 |
+
|
39 |
+
IS_PYTHON3 = sys.version_info[0] >= 3
|
40 |
+
|
41 |
+
MAX_IMAGE_ID = 2**31 - 1
|
42 |
+
|
43 |
+
CREATE_CAMERAS_TABLE = """CREATE TABLE IF NOT EXISTS cameras (
|
44 |
+
camera_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
45 |
+
model INTEGER NOT NULL,
|
46 |
+
width INTEGER NOT NULL,
|
47 |
+
height INTEGER NOT NULL,
|
48 |
+
params BLOB,
|
49 |
+
prior_focal_length INTEGER NOT NULL)"""
|
50 |
+
|
51 |
+
CREATE_DESCRIPTORS_TABLE = """CREATE TABLE IF NOT EXISTS descriptors (
|
52 |
+
image_id INTEGER PRIMARY KEY NOT NULL,
|
53 |
+
rows INTEGER NOT NULL,
|
54 |
+
cols INTEGER NOT NULL,
|
55 |
+
data BLOB,
|
56 |
+
FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)"""
|
57 |
+
|
58 |
+
CREATE_IMAGES_TABLE = """CREATE TABLE IF NOT EXISTS images (
|
59 |
+
image_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
60 |
+
name TEXT NOT NULL UNIQUE,
|
61 |
+
camera_id INTEGER NOT NULL,
|
62 |
+
prior_qw REAL,
|
63 |
+
prior_qx REAL,
|
64 |
+
prior_qy REAL,
|
65 |
+
prior_qz REAL,
|
66 |
+
prior_tx REAL,
|
67 |
+
prior_ty REAL,
|
68 |
+
prior_tz REAL,
|
69 |
+
CONSTRAINT image_id_check CHECK(image_id >= 0 and image_id < {}),
|
70 |
+
FOREIGN KEY(camera_id) REFERENCES cameras(camera_id))
|
71 |
+
""".format(MAX_IMAGE_ID)
|
72 |
+
|
73 |
+
CREATE_TWO_VIEW_GEOMETRIES_TABLE = """
|
74 |
+
CREATE TABLE IF NOT EXISTS two_view_geometries (
|
75 |
+
pair_id INTEGER PRIMARY KEY NOT NULL,
|
76 |
+
rows INTEGER NOT NULL,
|
77 |
+
cols INTEGER NOT NULL,
|
78 |
+
data BLOB,
|
79 |
+
config INTEGER NOT NULL,
|
80 |
+
F BLOB,
|
81 |
+
E BLOB,
|
82 |
+
H BLOB)
|
83 |
+
"""
|
84 |
+
|
85 |
+
CREATE_KEYPOINTS_TABLE = """CREATE TABLE IF NOT EXISTS keypoints (
|
86 |
+
image_id INTEGER PRIMARY KEY NOT NULL,
|
87 |
+
rows INTEGER NOT NULL,
|
88 |
+
cols INTEGER NOT NULL,
|
89 |
+
data BLOB,
|
90 |
+
FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)
|
91 |
+
"""
|
92 |
+
|
93 |
+
CREATE_MATCHES_TABLE = """CREATE TABLE IF NOT EXISTS matches (
|
94 |
+
pair_id INTEGER PRIMARY KEY NOT NULL,
|
95 |
+
rows INTEGER NOT NULL,
|
96 |
+
cols INTEGER NOT NULL,
|
97 |
+
data BLOB)"""
|
98 |
+
|
99 |
+
CREATE_NAME_INDEX = \
|
100 |
+
"CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)"
|
101 |
+
|
102 |
+
CREATE_ALL = "; ".join([
|
103 |
+
CREATE_CAMERAS_TABLE,
|
104 |
+
CREATE_IMAGES_TABLE,
|
105 |
+
CREATE_KEYPOINTS_TABLE,
|
106 |
+
CREATE_DESCRIPTORS_TABLE,
|
107 |
+
CREATE_MATCHES_TABLE,
|
108 |
+
CREATE_TWO_VIEW_GEOMETRIES_TABLE,
|
109 |
+
CREATE_NAME_INDEX
|
110 |
+
])
|
111 |
+
|
112 |
+
|
113 |
+
def image_ids_to_pair_id(image_id1, image_id2):
|
114 |
+
if image_id1 > image_id2:
|
115 |
+
image_id1, image_id2 = image_id2, image_id1
|
116 |
+
return image_id1 * MAX_IMAGE_ID + image_id2
|
117 |
+
|
118 |
+
|
119 |
+
def pair_id_to_image_ids(pair_id):
|
120 |
+
image_id2 = pair_id % MAX_IMAGE_ID
|
121 |
+
image_id1 = (pair_id - image_id2) / MAX_IMAGE_ID
|
122 |
+
return image_id1, image_id2
|
123 |
+
|
124 |
+
|
125 |
+
def array_to_blob(array):
|
126 |
+
if IS_PYTHON3:
|
127 |
+
return array.tostring()
|
128 |
+
else:
|
129 |
+
return np.getbuffer(array)
|
130 |
+
|
131 |
+
|
132 |
+
def blob_to_array(blob, dtype, shape=(-1,)):
|
133 |
+
if IS_PYTHON3:
|
134 |
+
return np.fromstring(blob, dtype=dtype).reshape(*shape)
|
135 |
+
else:
|
136 |
+
return np.frombuffer(blob, dtype=dtype).reshape(*shape)
|
137 |
+
|
138 |
+
|
139 |
+
class COLMAPDatabase(sqlite3.Connection):
|
140 |
+
|
141 |
+
@staticmethod
|
142 |
+
def connect(database_path):
|
143 |
+
return sqlite3.connect(str(database_path), factory=COLMAPDatabase)
|
144 |
+
|
145 |
+
|
146 |
+
def __init__(self, *args, **kwargs):
|
147 |
+
super(COLMAPDatabase, self).__init__(*args, **kwargs)
|
148 |
+
|
149 |
+
self.create_tables = lambda: self.executescript(CREATE_ALL)
|
150 |
+
self.create_cameras_table = \
|
151 |
+
lambda: self.executescript(CREATE_CAMERAS_TABLE)
|
152 |
+
self.create_descriptors_table = \
|
153 |
+
lambda: self.executescript(CREATE_DESCRIPTORS_TABLE)
|
154 |
+
self.create_images_table = \
|
155 |
+
lambda: self.executescript(CREATE_IMAGES_TABLE)
|
156 |
+
self.create_two_view_geometries_table = \
|
157 |
+
lambda: self.executescript(CREATE_TWO_VIEW_GEOMETRIES_TABLE)
|
158 |
+
self.create_keypoints_table = \
|
159 |
+
lambda: self.executescript(CREATE_KEYPOINTS_TABLE)
|
160 |
+
self.create_matches_table = \
|
161 |
+
lambda: self.executescript(CREATE_MATCHES_TABLE)
|
162 |
+
self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX)
|
163 |
+
|
164 |
+
def add_camera(self, model, width, height, params,
|
165 |
+
prior_focal_length=False, camera_id=None):
|
166 |
+
params = np.asarray(params, np.float64)
|
167 |
+
cursor = self.execute(
|
168 |
+
"INSERT INTO cameras VALUES (?, ?, ?, ?, ?, ?)",
|
169 |
+
(camera_id, model, width, height, array_to_blob(params),
|
170 |
+
prior_focal_length))
|
171 |
+
return cursor.lastrowid
|
172 |
+
|
173 |
+
def add_image(self, name, camera_id,
|
174 |
+
prior_q=np.zeros(4), prior_t=np.zeros(3), image_id=None):
|
175 |
+
cursor = self.execute(
|
176 |
+
"INSERT INTO images VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
177 |
+
(image_id, name, camera_id, prior_q[0], prior_q[1], prior_q[2],
|
178 |
+
prior_q[3], prior_t[0], prior_t[1], prior_t[2]))
|
179 |
+
return cursor.lastrowid
|
180 |
+
|
181 |
+
def add_keypoints(self, image_id, keypoints):
|
182 |
+
assert(len(keypoints.shape) == 2)
|
183 |
+
assert(keypoints.shape[1] in [2, 4, 6])
|
184 |
+
|
185 |
+
keypoints = np.asarray(keypoints, np.float32)
|
186 |
+
self.execute(
|
187 |
+
"INSERT INTO keypoints VALUES (?, ?, ?, ?)",
|
188 |
+
(image_id,) + keypoints.shape + (array_to_blob(keypoints),))
|
189 |
+
|
190 |
+
def add_descriptors(self, image_id, descriptors):
|
191 |
+
descriptors = np.ascontiguousarray(descriptors, np.uint8)
|
192 |
+
self.execute(
|
193 |
+
"INSERT INTO descriptors VALUES (?, ?, ?, ?)",
|
194 |
+
(image_id,) + descriptors.shape + (array_to_blob(descriptors),))
|
195 |
+
|
196 |
+
def add_matches(self, image_id1, image_id2, matches):
|
197 |
+
assert(len(matches.shape) == 2)
|
198 |
+
assert(matches.shape[1] == 2)
|
199 |
+
|
200 |
+
if image_id1 > image_id2:
|
201 |
+
matches = matches[:,::-1]
|
202 |
+
|
203 |
+
pair_id = image_ids_to_pair_id(image_id1, image_id2)
|
204 |
+
matches = np.asarray(matches, np.uint32)
|
205 |
+
self.execute(
|
206 |
+
"INSERT INTO matches VALUES (?, ?, ?, ?)",
|
207 |
+
(pair_id,) + matches.shape + (array_to_blob(matches),))
|
208 |
+
|
209 |
+
def add_two_view_geometry(self, image_id1, image_id2, matches,
|
210 |
+
F=np.eye(3), E=np.eye(3), H=np.eye(3), config=2):
|
211 |
+
assert(len(matches.shape) == 2)
|
212 |
+
assert(matches.shape[1] == 2)
|
213 |
+
|
214 |
+
if image_id1 > image_id2:
|
215 |
+
matches = matches[:,::-1]
|
216 |
+
|
217 |
+
pair_id = image_ids_to_pair_id(image_id1, image_id2)
|
218 |
+
matches = np.asarray(matches, np.uint32)
|
219 |
+
F = np.asarray(F, dtype=np.float64)
|
220 |
+
E = np.asarray(E, dtype=np.float64)
|
221 |
+
H = np.asarray(H, dtype=np.float64)
|
222 |
+
self.execute(
|
223 |
+
"INSERT INTO two_view_geometries VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
224 |
+
(pair_id,) + matches.shape + (array_to_blob(matches), config,
|
225 |
+
array_to_blob(F), array_to_blob(E), array_to_blob(H)))
|
226 |
+
|
227 |
+
|
228 |
+
def example_usage():
|
229 |
+
import os
|
230 |
+
import argparse
|
231 |
+
|
232 |
+
parser = argparse.ArgumentParser()
|
233 |
+
parser.add_argument("--database_path", default="database.db")
|
234 |
+
args = parser.parse_args()
|
235 |
+
|
236 |
+
if os.path.exists(args.database_path):
|
237 |
+
print("ERROR: database path already exists -- will not modify it.")
|
238 |
+
return
|
239 |
+
|
240 |
+
# Open the database.
|
241 |
+
|
242 |
+
db = COLMAPDatabase.connect(args.database_path)
|
243 |
+
|
244 |
+
# For convenience, try creating all the tables upfront.
|
245 |
+
|
246 |
+
db.create_tables()
|
247 |
+
|
248 |
+
# Create dummy cameras.
|
249 |
+
|
250 |
+
model1, width1, height1, params1 = \
|
251 |
+
0, 1024, 768, np.array((1024., 512., 384.))
|
252 |
+
model2, width2, height2, params2 = \
|
253 |
+
2, 1024, 768, np.array((1024., 512., 384., 0.1))
|
254 |
+
|
255 |
+
camera_id1 = db.add_camera(model1, width1, height1, params1)
|
256 |
+
camera_id2 = db.add_camera(model2, width2, height2, params2)
|
257 |
+
|
258 |
+
# Create dummy images.
|
259 |
+
|
260 |
+
image_id1 = db.add_image("image1.png", camera_id1)
|
261 |
+
image_id2 = db.add_image("image2.png", camera_id1)
|
262 |
+
image_id3 = db.add_image("image3.png", camera_id2)
|
263 |
+
image_id4 = db.add_image("image4.png", camera_id2)
|
264 |
+
|
265 |
+
# Create dummy keypoints.
|
266 |
+
#
|
267 |
+
# Note that COLMAP supports:
|
268 |
+
# - 2D keypoints: (x, y)
|
269 |
+
# - 4D keypoints: (x, y, theta, scale)
|
270 |
+
# - 6D affine keypoints: (x, y, a_11, a_12, a_21, a_22)
|
271 |
+
|
272 |
+
num_keypoints = 1000
|
273 |
+
keypoints1 = np.random.rand(num_keypoints, 2) * (width1, height1)
|
274 |
+
keypoints2 = np.random.rand(num_keypoints, 2) * (width1, height1)
|
275 |
+
keypoints3 = np.random.rand(num_keypoints, 2) * (width2, height2)
|
276 |
+
keypoints4 = np.random.rand(num_keypoints, 2) * (width2, height2)
|
277 |
+
|
278 |
+
db.add_keypoints(image_id1, keypoints1)
|
279 |
+
db.add_keypoints(image_id2, keypoints2)
|
280 |
+
db.add_keypoints(image_id3, keypoints3)
|
281 |
+
db.add_keypoints(image_id4, keypoints4)
|
282 |
+
|
283 |
+
# Create dummy matches.
|
284 |
+
|
285 |
+
M = 50
|
286 |
+
matches12 = np.random.randint(num_keypoints, size=(M, 2))
|
287 |
+
matches23 = np.random.randint(num_keypoints, size=(M, 2))
|
288 |
+
matches34 = np.random.randint(num_keypoints, size=(M, 2))
|
289 |
+
|
290 |
+
db.add_matches(image_id1, image_id2, matches12)
|
291 |
+
db.add_matches(image_id2, image_id3, matches23)
|
292 |
+
db.add_matches(image_id3, image_id4, matches34)
|
293 |
+
|
294 |
+
# Commit the data to the file.
|
295 |
+
|
296 |
+
db.commit()
|
297 |
+
|
298 |
+
# Read and check cameras.
|
299 |
+
|
300 |
+
rows = db.execute("SELECT * FROM cameras")
|
301 |
+
|
302 |
+
camera_id, model, width, height, params, prior = next(rows)
|
303 |
+
params = blob_to_array(params, np.float64)
|
304 |
+
assert camera_id == camera_id1
|
305 |
+
assert model == model1 and width == width1 and height == height1
|
306 |
+
assert np.allclose(params, params1)
|
307 |
+
|
308 |
+
camera_id, model, width, height, params, prior = next(rows)
|
309 |
+
params = blob_to_array(params, np.float64)
|
310 |
+
assert camera_id == camera_id2
|
311 |
+
assert model == model2 and width == width2 and height == height2
|
312 |
+
assert np.allclose(params, params2)
|
313 |
+
|
314 |
+
# Read and check keypoints.
|
315 |
+
|
316 |
+
keypoints = dict(
|
317 |
+
(image_id, blob_to_array(data, np.float32, (-1, 2)))
|
318 |
+
for image_id, data in db.execute(
|
319 |
+
"SELECT image_id, data FROM keypoints"))
|
320 |
+
|
321 |
+
assert np.allclose(keypoints[image_id1], keypoints1)
|
322 |
+
assert np.allclose(keypoints[image_id2], keypoints2)
|
323 |
+
assert np.allclose(keypoints[image_id3], keypoints3)
|
324 |
+
assert np.allclose(keypoints[image_id4], keypoints4)
|
325 |
+
|
326 |
+
# Read and check matches.
|
327 |
+
|
328 |
+
pair_ids = [image_ids_to_pair_id(*pair) for pair in
|
329 |
+
((image_id1, image_id2),
|
330 |
+
(image_id2, image_id3),
|
331 |
+
(image_id3, image_id4))]
|
332 |
+
|
333 |
+
matches = dict(
|
334 |
+
(pair_id_to_image_ids(pair_id),
|
335 |
+
blob_to_array(data, np.uint32, (-1, 2)))
|
336 |
+
for pair_id, data in db.execute("SELECT pair_id, data FROM matches")
|
337 |
+
)
|
338 |
+
|
339 |
+
assert np.all(matches[(image_id1, image_id2)] == matches12)
|
340 |
+
assert np.all(matches[(image_id2, image_id3)] == matches23)
|
341 |
+
assert np.all(matches[(image_id3, image_id4)] == matches34)
|
342 |
+
|
343 |
+
# Clean up.
|
344 |
+
|
345 |
+
db.close()
|
346 |
+
|
347 |
+
if os.path.exists(args.database_path):
|
348 |
+
os.remove(args.database_path)
|
349 |
+
|
350 |
+
|
351 |
+
if __name__ == "__main__":
|
352 |
+
example_usage()
|
third_party/pram/colmap_utils/geometry.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
import numpy as np
|
3 |
+
import pycolmap
|
4 |
+
|
5 |
+
|
6 |
+
def to_homogeneous(p):
|
7 |
+
return np.pad(p, ((0, 0),) * (p.ndim - 1) + ((0, 1),), constant_values=1)
|
8 |
+
|
9 |
+
|
10 |
+
def compute_epipolar_errors(j_from_i: pycolmap.Rigid3d, p2d_i, p2d_j):
|
11 |
+
j_E_i = j_from_i.essential_matrix()
|
12 |
+
l2d_j = to_homogeneous(p2d_i) @ j_E_i.T
|
13 |
+
l2d_i = to_homogeneous(p2d_j) @ j_E_i
|
14 |
+
dist = np.abs(np.sum(to_homogeneous(p2d_i) * l2d_i, axis=1))
|
15 |
+
errors_i = dist / np.linalg.norm(l2d_i[:, :2], axis=1)
|
16 |
+
errors_j = dist / np.linalg.norm(l2d_j[:, :2], axis=1)
|
17 |
+
return errors_i, errors_j
|
third_party/pram/colmap_utils/io.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
from pathlib import Path
|
3 |
+
from typing import Tuple
|
4 |
+
|
5 |
+
import cv2
|
6 |
+
import h5py
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
from .parsers import names_to_pair, names_to_pair_old
|
10 |
+
|
11 |
+
|
12 |
+
def read_image(path, grayscale=False):
|
13 |
+
if grayscale:
|
14 |
+
mode = cv2.IMREAD_GRAYSCALE
|
15 |
+
else:
|
16 |
+
mode = cv2.IMREAD_COLOR
|
17 |
+
image = cv2.imread(str(path), mode)
|
18 |
+
if image is None:
|
19 |
+
raise ValueError(f"Cannot read image {path}.")
|
20 |
+
if not grayscale and len(image.shape) == 3:
|
21 |
+
image = image[:, :, ::-1] # BGR to RGB
|
22 |
+
return image
|
23 |
+
|
24 |
+
|
25 |
+
def list_h5_names(path):
|
26 |
+
names = []
|
27 |
+
with h5py.File(str(path), "r", libver="latest") as fd:
|
28 |
+
def visit_fn(_, obj):
|
29 |
+
if isinstance(obj, h5py.Dataset):
|
30 |
+
names.append(obj.parent.name.strip("/"))
|
31 |
+
|
32 |
+
fd.visititems(visit_fn)
|
33 |
+
return list(set(names))
|
34 |
+
|
35 |
+
|
36 |
+
def get_keypoints(
|
37 |
+
path: Path, name: str, return_uncertainty: bool = False
|
38 |
+
) -> np.ndarray:
|
39 |
+
with h5py.File(str(path), "r", libver="latest") as hfile:
|
40 |
+
dset = hfile[name]["keypoints"]
|
41 |
+
p = dset.__array__()
|
42 |
+
uncertainty = dset.attrs.get("uncertainty")
|
43 |
+
if return_uncertainty:
|
44 |
+
return p, uncertainty
|
45 |
+
return p
|
46 |
+
|
47 |
+
|
48 |
+
def find_pair(hfile: h5py.File, name0: str, name1: str):
|
49 |
+
pair = names_to_pair(name0, name1)
|
50 |
+
if pair in hfile:
|
51 |
+
return pair, False
|
52 |
+
pair = names_to_pair(name1, name0)
|
53 |
+
if pair in hfile:
|
54 |
+
return pair, True
|
55 |
+
# older, less efficient format
|
56 |
+
pair = names_to_pair_old(name0, name1)
|
57 |
+
if pair in hfile:
|
58 |
+
return pair, False
|
59 |
+
pair = names_to_pair_old(name1, name0)
|
60 |
+
if pair in hfile:
|
61 |
+
return pair, True
|
62 |
+
raise ValueError(
|
63 |
+
f"Could not find pair {(name0, name1)}... "
|
64 |
+
"Maybe you matched with a different list of pairs? "
|
65 |
+
)
|
66 |
+
|
67 |
+
|
68 |
+
def get_matches(path: Path, name0: str, name1: str) -> Tuple[np.ndarray]:
|
69 |
+
with h5py.File(str(path), "r", libver="latest") as hfile:
|
70 |
+
pair, reverse = find_pair(hfile, name0, name1)
|
71 |
+
matches = hfile[pair]["matches0"].__array__()
|
72 |
+
scores = hfile[pair]["matching_scores0"].__array__()
|
73 |
+
idx = np.where(matches != -1)[0]
|
74 |
+
matches = np.stack([idx, matches[idx]], -1)
|
75 |
+
if reverse:
|
76 |
+
matches = np.flip(matches, -1)
|
77 |
+
scores = scores[idx]
|
78 |
+
return matches, scores
|
third_party/pram/colmap_utils/parsers.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
|
3 |
+
from pathlib import Path
|
4 |
+
import logging
|
5 |
+
import numpy as np
|
6 |
+
from collections import defaultdict
|
7 |
+
|
8 |
+
|
9 |
+
def parse_image_lists_with_intrinsics(paths):
|
10 |
+
results = []
|
11 |
+
files = list(Path(paths.parent).glob(paths.name))
|
12 |
+
assert len(files) > 0
|
13 |
+
|
14 |
+
for lfile in files:
|
15 |
+
with open(lfile, 'r') as f:
|
16 |
+
raw_data = f.readlines()
|
17 |
+
|
18 |
+
logging.info(f'Importing {len(raw_data)} queries in {lfile.name}')
|
19 |
+
for data in raw_data:
|
20 |
+
data = data.strip('\n').split(' ')
|
21 |
+
name, camera_model, width, height = data[:4]
|
22 |
+
params = np.array(data[4:], float)
|
23 |
+
info = (camera_model, int(width), int(height), params)
|
24 |
+
results.append((name, info))
|
25 |
+
|
26 |
+
assert len(results) > 0
|
27 |
+
return results
|
28 |
+
|
29 |
+
|
30 |
+
def parse_img_lists_for_extended_cmu_seaons(paths):
|
31 |
+
Ks = {
|
32 |
+
"c0": "OPENCV 1024 768 868.993378 866.063001 525.942323 420.042529 -0.399431 0.188924 0.000153 0.000571",
|
33 |
+
"c1": "OPENCV 1024 768 868.993378 866.063001 525.942323 420.042529 -0.399431 0.188924 0.000153 0.000571"
|
34 |
+
}
|
35 |
+
|
36 |
+
results = []
|
37 |
+
files = list(Path(paths.parent).glob(paths.name))
|
38 |
+
assert len(files) > 0
|
39 |
+
|
40 |
+
for lfile in files:
|
41 |
+
with open(lfile, 'r') as f:
|
42 |
+
raw_data = f.readlines()
|
43 |
+
|
44 |
+
logging.info(f'Importing {len(raw_data)} queries in {lfile.name}')
|
45 |
+
for name in raw_data:
|
46 |
+
name = name.strip('\n')
|
47 |
+
camera = name.split('_')[2]
|
48 |
+
K = Ks[camera].split(' ')
|
49 |
+
camera_model, width, height = K[:3]
|
50 |
+
params = np.array(K[3:], float)
|
51 |
+
# print("camera: ", camera_model, width, height, params)
|
52 |
+
info = (camera_model, int(width), int(height), params)
|
53 |
+
results.append((name, info))
|
54 |
+
|
55 |
+
assert len(results) > 0
|
56 |
+
return results
|
57 |
+
|
58 |
+
|
59 |
+
def parse_retrieval(path):
|
60 |
+
retrieval = defaultdict(list)
|
61 |
+
with open(path, 'r') as f:
|
62 |
+
for p in f.read().rstrip('\n').split('\n'):
|
63 |
+
q, r = p.split(' ')
|
64 |
+
retrieval[q].append(r)
|
65 |
+
return dict(retrieval)
|
66 |
+
|
67 |
+
|
68 |
+
def names_to_pair_old(name0, name1):
|
69 |
+
return '_'.join((name0.replace('/', '-'), name1.replace('/', '-')))
|
70 |
+
|
71 |
+
|
72 |
+
def names_to_pair(name0, name1, separator="/"):
|
73 |
+
return separator.join((name0.replace("/", "-"), name1.replace("/", "-")))
|
third_party/pram/colmap_utils/read_write_model.py
ADDED
@@ -0,0 +1,627 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# Redistribution and use in source and binary forms, with or without
|
5 |
+
# modification, are permitted provided that the following conditions are met:
|
6 |
+
#
|
7 |
+
# * Redistributions of source code must retain the above copyright
|
8 |
+
# notice, this list of conditions and the following disclaimer.
|
9 |
+
#
|
10 |
+
# * Redistributions in binary form must reproduce the above copyright
|
11 |
+
# notice, this list of conditions and the following disclaimer in the
|
12 |
+
# documentation and/or other materials provided with the distribution.
|
13 |
+
#
|
14 |
+
# * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
|
15 |
+
# its contributors may be used to endorse or promote products derived
|
16 |
+
# from this software without specific prior written permission.
|
17 |
+
#
|
18 |
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19 |
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20 |
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
21 |
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
22 |
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
23 |
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
24 |
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
25 |
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
26 |
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
27 |
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
28 |
+
# POSSIBILITY OF SUCH DAMAGE.
|
29 |
+
#
|
30 |
+
# Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
|
31 |
+
|
32 |
+
import os
|
33 |
+
import sys
|
34 |
+
import collections
|
35 |
+
import numpy as np
|
36 |
+
import struct
|
37 |
+
import argparse
|
38 |
+
|
39 |
+
CameraModel = collections.namedtuple(
|
40 |
+
"CameraModel", ["model_id", "model_name", "num_params"])
|
41 |
+
Camera = collections.namedtuple(
|
42 |
+
"Camera", ["id", "model", "width", "height", "params"])
|
43 |
+
BaseImage = collections.namedtuple(
|
44 |
+
"Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
|
45 |
+
Point3D = collections.namedtuple(
|
46 |
+
"Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
|
47 |
+
|
48 |
+
|
49 |
+
class Image(BaseImage):
|
50 |
+
def qvec2rotmat(self):
|
51 |
+
return qvec2rotmat(self.qvec)
|
52 |
+
|
53 |
+
|
54 |
+
CAMERA_MODELS = {
|
55 |
+
CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
|
56 |
+
CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
|
57 |
+
CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
|
58 |
+
CameraModel(model_id=3, model_name="RADIAL", num_params=5),
|
59 |
+
CameraModel(model_id=4, model_name="OPENCV", num_params=8),
|
60 |
+
CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
|
61 |
+
CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
|
62 |
+
CameraModel(model_id=7, model_name="FOV", num_params=5),
|
63 |
+
CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
|
64 |
+
CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
|
65 |
+
CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
|
66 |
+
}
|
67 |
+
CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
|
68 |
+
for camera_model in CAMERA_MODELS])
|
69 |
+
CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
|
70 |
+
for camera_model in CAMERA_MODELS])
|
71 |
+
|
72 |
+
|
73 |
+
def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
|
74 |
+
"""Read and unpack the next bytes from a binary file.
|
75 |
+
:param fid:
|
76 |
+
:param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
|
77 |
+
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
|
78 |
+
:param endian_character: Any of {@, =, <, >, !}
|
79 |
+
:return: Tuple of read and unpacked values.
|
80 |
+
"""
|
81 |
+
data = fid.read(num_bytes)
|
82 |
+
return struct.unpack(endian_character + format_char_sequence, data)
|
83 |
+
|
84 |
+
|
85 |
+
def write_next_bytes(fid, data, format_char_sequence, endian_character="<"):
|
86 |
+
"""pack and write to a binary file.
|
87 |
+
:param fid:
|
88 |
+
:param data: data to send, if multiple elements are sent at the same time,
|
89 |
+
they should be encapsuled either in a list or a tuple
|
90 |
+
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
|
91 |
+
should be the same length as the data list or tuple
|
92 |
+
:param endian_character: Any of {@, =, <, >, !}
|
93 |
+
"""
|
94 |
+
if isinstance(data, (list, tuple)):
|
95 |
+
bytes = struct.pack(endian_character + format_char_sequence, *data)
|
96 |
+
else:
|
97 |
+
bytes = struct.pack(endian_character + format_char_sequence, data)
|
98 |
+
fid.write(bytes)
|
99 |
+
|
100 |
+
|
101 |
+
def read_cameras_text(path):
|
102 |
+
"""
|
103 |
+
see: src/base/reconstruction.cc
|
104 |
+
void Reconstruction::WriteCamerasText(const std::string& path)
|
105 |
+
void Reconstruction::ReadCamerasText(const std::string& path)
|
106 |
+
"""
|
107 |
+
cameras = {}
|
108 |
+
with open(path, "r") as fid:
|
109 |
+
while True:
|
110 |
+
line = fid.readline()
|
111 |
+
if not line:
|
112 |
+
break
|
113 |
+
line = line.strip()
|
114 |
+
if len(line) > 0 and line[0] != "#":
|
115 |
+
elems = line.split()
|
116 |
+
camera_id = int(elems[0])
|
117 |
+
model = elems[1]
|
118 |
+
width = int(elems[2])
|
119 |
+
height = int(elems[3])
|
120 |
+
params = np.array(tuple(map(float, elems[4:])))
|
121 |
+
cameras[camera_id] = Camera(id=camera_id, model=model,
|
122 |
+
width=width, height=height,
|
123 |
+
params=params)
|
124 |
+
return cameras
|
125 |
+
|
126 |
+
|
127 |
+
def read_cameras_binary(path_to_model_file):
|
128 |
+
"""
|
129 |
+
see: src/base/reconstruction.cc
|
130 |
+
void Reconstruction::WriteCamerasBinary(const std::string& path)
|
131 |
+
void Reconstruction::ReadCamerasBinary(const std::string& path)
|
132 |
+
"""
|
133 |
+
cameras = {}
|
134 |
+
with open(path_to_model_file, "rb") as fid:
|
135 |
+
num_cameras = read_next_bytes(fid, 8, "Q")[0]
|
136 |
+
for camera_line_index in range(num_cameras):
|
137 |
+
camera_properties = read_next_bytes(
|
138 |
+
fid, num_bytes=24, format_char_sequence="iiQQ")
|
139 |
+
camera_id = camera_properties[0]
|
140 |
+
model_id = camera_properties[1]
|
141 |
+
model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
|
142 |
+
width = camera_properties[2]
|
143 |
+
height = camera_properties[3]
|
144 |
+
num_params = CAMERA_MODEL_IDS[model_id].num_params
|
145 |
+
params = read_next_bytes(fid, num_bytes=8 * num_params,
|
146 |
+
format_char_sequence="d" * num_params)
|
147 |
+
cameras[camera_id] = Camera(id=camera_id,
|
148 |
+
model=model_name,
|
149 |
+
width=width,
|
150 |
+
height=height,
|
151 |
+
params=np.array(params))
|
152 |
+
assert len(cameras) == num_cameras
|
153 |
+
return cameras
|
154 |
+
|
155 |
+
|
156 |
+
def write_cameras_text(cameras, path):
|
157 |
+
"""
|
158 |
+
see: src/base/reconstruction.cc
|
159 |
+
void Reconstruction::WriteCamerasText(const std::string& path)
|
160 |
+
void Reconstruction::ReadCamerasText(const std::string& path)
|
161 |
+
"""
|
162 |
+
HEADER = '# Camera list with one line of data per camera:\n'
|
163 |
+
'# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n'
|
164 |
+
'# Number of cameras: {}\n'.format(len(cameras))
|
165 |
+
with open(path, "w") as fid:
|
166 |
+
fid.write(HEADER)
|
167 |
+
for _, cam in cameras.items():
|
168 |
+
to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
|
169 |
+
line = " ".join([str(elem) for elem in to_write])
|
170 |
+
fid.write(line + "\n")
|
171 |
+
|
172 |
+
|
173 |
+
def write_cameras_binary(cameras, path_to_model_file):
|
174 |
+
"""
|
175 |
+
see: src/base/reconstruction.cc
|
176 |
+
void Reconstruction::WriteCamerasBinary(const std::string& path)
|
177 |
+
void Reconstruction::ReadCamerasBinary(const std::string& path)
|
178 |
+
"""
|
179 |
+
with open(path_to_model_file, "wb") as fid:
|
180 |
+
write_next_bytes(fid, len(cameras), "Q")
|
181 |
+
for _, cam in cameras.items():
|
182 |
+
model_id = CAMERA_MODEL_NAMES[cam.model].model_id
|
183 |
+
camera_properties = [cam.id,
|
184 |
+
model_id,
|
185 |
+
cam.width,
|
186 |
+
cam.height]
|
187 |
+
write_next_bytes(fid, camera_properties, "iiQQ")
|
188 |
+
for p in cam.params:
|
189 |
+
write_next_bytes(fid, float(p), "d")
|
190 |
+
return cameras
|
191 |
+
|
192 |
+
|
193 |
+
def read_images_text(path):
|
194 |
+
"""
|
195 |
+
see: src/base/reconstruction.cc
|
196 |
+
void Reconstruction::ReadImagesText(const std::string& path)
|
197 |
+
void Reconstruction::WriteImagesText(const std::string& path)
|
198 |
+
"""
|
199 |
+
images = {}
|
200 |
+
with open(path, "r") as fid:
|
201 |
+
while True:
|
202 |
+
line = fid.readline()
|
203 |
+
if not line:
|
204 |
+
break
|
205 |
+
line = line.strip()
|
206 |
+
if len(line) > 0 and line[0] != "#":
|
207 |
+
elems = line.split()
|
208 |
+
image_id = int(elems[0])
|
209 |
+
qvec = np.array(tuple(map(float, elems[1:5])))
|
210 |
+
tvec = np.array(tuple(map(float, elems[5:8])))
|
211 |
+
camera_id = int(elems[8])
|
212 |
+
image_name = elems[9]
|
213 |
+
elems = fid.readline().split()
|
214 |
+
xys = np.column_stack([tuple(map(float, elems[0::3])),
|
215 |
+
tuple(map(float, elems[1::3]))])
|
216 |
+
point3D_ids = np.array(tuple(map(int, elems[2::3])))
|
217 |
+
images[image_id] = Image(
|
218 |
+
id=image_id, qvec=qvec, tvec=tvec,
|
219 |
+
camera_id=camera_id, name=image_name,
|
220 |
+
xys=xys, point3D_ids=point3D_ids)
|
221 |
+
return images
|
222 |
+
|
223 |
+
|
224 |
+
def read_images_binary(path_to_model_file):
|
225 |
+
"""
|
226 |
+
see: src/base/reconstruction.cc
|
227 |
+
void Reconstruction::ReadImagesBinary(const std::string& path)
|
228 |
+
void Reconstruction::WriteImagesBinary(const std::string& path)
|
229 |
+
"""
|
230 |
+
images = {}
|
231 |
+
with open(path_to_model_file, "rb") as fid:
|
232 |
+
num_reg_images = read_next_bytes(fid, 8, "Q")[0]
|
233 |
+
for image_index in range(num_reg_images):
|
234 |
+
binary_image_properties = read_next_bytes(
|
235 |
+
fid, num_bytes=64, format_char_sequence="idddddddi")
|
236 |
+
image_id = binary_image_properties[0]
|
237 |
+
qvec = np.array(binary_image_properties[1:5])
|
238 |
+
tvec = np.array(binary_image_properties[5:8])
|
239 |
+
camera_id = binary_image_properties[8]
|
240 |
+
image_name = ""
|
241 |
+
current_char = read_next_bytes(fid, 1, "c")[0]
|
242 |
+
while current_char != b"\x00": # look for the ASCII 0 entry
|
243 |
+
image_name += current_char.decode("utf-8")
|
244 |
+
current_char = read_next_bytes(fid, 1, "c")[0]
|
245 |
+
num_points2D = read_next_bytes(fid, num_bytes=8,
|
246 |
+
format_char_sequence="Q")[0]
|
247 |
+
x_y_id_s = read_next_bytes(fid, num_bytes=24 * num_points2D,
|
248 |
+
format_char_sequence="ddq" * num_points2D)
|
249 |
+
xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
|
250 |
+
tuple(map(float, x_y_id_s[1::3]))])
|
251 |
+
point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
|
252 |
+
images[image_id] = Image(
|
253 |
+
id=image_id, qvec=qvec, tvec=tvec,
|
254 |
+
camera_id=camera_id, name=image_name,
|
255 |
+
xys=xys, point3D_ids=point3D_ids)
|
256 |
+
return images
|
257 |
+
|
258 |
+
|
259 |
+
def write_images_text(images, path):
|
260 |
+
"""
|
261 |
+
see: src/base/reconstruction.cc
|
262 |
+
void Reconstruction::ReadImagesText(const std::string& path)
|
263 |
+
void Reconstruction::WriteImagesText(const std::string& path)
|
264 |
+
"""
|
265 |
+
if len(images) == 0:
|
266 |
+
mean_observations = 0
|
267 |
+
else:
|
268 |
+
mean_observations = sum((len(img.point3D_ids) for _, img in images.items())) / len(images)
|
269 |
+
HEADER = '# Image list with two lines of data per image:\n'
|
270 |
+
'# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n'
|
271 |
+
'# POINTS2D[] as (X, Y, POINT3D_ID)\n'
|
272 |
+
'# Number of images: {}, mean observations per image: {}\n'.format(len(images), mean_observations)
|
273 |
+
|
274 |
+
with open(path, "w") as fid:
|
275 |
+
fid.write(HEADER)
|
276 |
+
for _, img in images.items():
|
277 |
+
image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
|
278 |
+
first_line = " ".join(map(str, image_header))
|
279 |
+
fid.write(first_line + "\n")
|
280 |
+
|
281 |
+
points_strings = []
|
282 |
+
for xy, point3D_id in zip(img.xys, img.point3D_ids):
|
283 |
+
points_strings.append(" ".join(map(str, [*xy, point3D_id])))
|
284 |
+
fid.write(" ".join(points_strings) + "\n")
|
285 |
+
|
286 |
+
|
287 |
+
def write_images_binary(images, path_to_model_file):
|
288 |
+
"""
|
289 |
+
see: src/base/reconstruction.cc
|
290 |
+
void Reconstruction::ReadImagesBinary(const std::string& path)
|
291 |
+
void Reconstruction::WriteImagesBinary(const std::string& path)
|
292 |
+
"""
|
293 |
+
with open(path_to_model_file, "wb") as fid:
|
294 |
+
write_next_bytes(fid, len(images), "Q")
|
295 |
+
for _, img in images.items():
|
296 |
+
write_next_bytes(fid, img.id, "i")
|
297 |
+
write_next_bytes(fid, img.qvec.tolist(), "dddd")
|
298 |
+
write_next_bytes(fid, img.tvec.tolist(), "ddd")
|
299 |
+
write_next_bytes(fid, img.camera_id, "i")
|
300 |
+
for char in img.name:
|
301 |
+
write_next_bytes(fid, char.encode("utf-8"), "c")
|
302 |
+
write_next_bytes(fid, b"\x00", "c")
|
303 |
+
write_next_bytes(fid, len(img.point3D_ids), "Q")
|
304 |
+
for xy, p3d_id in zip(img.xys, img.point3D_ids):
|
305 |
+
write_next_bytes(fid, [*xy, p3d_id], "ddq")
|
306 |
+
|
307 |
+
|
308 |
+
def read_points3D_text(path):
|
309 |
+
"""
|
310 |
+
see: src/base/reconstruction.cc
|
311 |
+
void Reconstruction::ReadPoints3DText(const std::string& path)
|
312 |
+
void Reconstruction::WritePoints3DText(const std::string& path)
|
313 |
+
"""
|
314 |
+
points3D = {}
|
315 |
+
with open(path, "r") as fid:
|
316 |
+
while True:
|
317 |
+
line = fid.readline()
|
318 |
+
if not line:
|
319 |
+
break
|
320 |
+
line = line.strip()
|
321 |
+
if len(line) > 0 and line[0] != "#":
|
322 |
+
elems = line.split()
|
323 |
+
point3D_id = int(elems[0])
|
324 |
+
xyz = np.array(tuple(map(float, elems[1:4])))
|
325 |
+
rgb = np.array(tuple(map(int, elems[4:7])))
|
326 |
+
error = float(elems[7])
|
327 |
+
image_ids = np.array(tuple(map(int, elems[8::2])))
|
328 |
+
point2D_idxs = np.array(tuple(map(int, elems[9::2])))
|
329 |
+
points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb,
|
330 |
+
error=error, image_ids=image_ids,
|
331 |
+
point2D_idxs=point2D_idxs)
|
332 |
+
return points3D
|
333 |
+
|
334 |
+
|
335 |
+
def read_points3d_binary(path_to_model_file):
|
336 |
+
"""
|
337 |
+
see: src/base/reconstruction.cc
|
338 |
+
void Reconstruction::ReadPoints3DBinary(const std::string& path)
|
339 |
+
void Reconstruction::WritePoints3DBinary(const std::string& path)
|
340 |
+
"""
|
341 |
+
points3D = {}
|
342 |
+
with open(path_to_model_file, "rb") as fid:
|
343 |
+
num_points = read_next_bytes(fid, 8, "Q")[0]
|
344 |
+
for point_line_index in range(num_points):
|
345 |
+
binary_point_line_properties = read_next_bytes(
|
346 |
+
fid, num_bytes=43, format_char_sequence="QdddBBBd")
|
347 |
+
point3D_id = binary_point_line_properties[0]
|
348 |
+
xyz = np.array(binary_point_line_properties[1:4])
|
349 |
+
rgb = np.array(binary_point_line_properties[4:7])
|
350 |
+
error = np.array(binary_point_line_properties[7])
|
351 |
+
track_length = read_next_bytes(
|
352 |
+
fid, num_bytes=8, format_char_sequence="Q")[0]
|
353 |
+
track_elems = read_next_bytes(
|
354 |
+
fid, num_bytes=8 * track_length,
|
355 |
+
format_char_sequence="ii" * track_length)
|
356 |
+
image_ids = np.array(tuple(map(int, track_elems[0::2])))
|
357 |
+
point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
|
358 |
+
points3D[point3D_id] = Point3D(
|
359 |
+
id=point3D_id, xyz=xyz, rgb=rgb,
|
360 |
+
error=error, image_ids=image_ids,
|
361 |
+
point2D_idxs=point2D_idxs)
|
362 |
+
return points3D
|
363 |
+
|
364 |
+
|
365 |
+
def write_points3D_text(points3D, path):
|
366 |
+
"""
|
367 |
+
see: src/base/reconstruction.cc
|
368 |
+
void Reconstruction::ReadPoints3DText(const std::string& path)
|
369 |
+
void Reconstruction::WritePoints3DText(const std::string& path)
|
370 |
+
"""
|
371 |
+
if len(points3D) == 0:
|
372 |
+
mean_track_length = 0
|
373 |
+
else:
|
374 |
+
mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items())) / len(points3D)
|
375 |
+
HEADER = '# 3D point list with one line of data per point:\n'
|
376 |
+
'# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n'
|
377 |
+
'# Number of points: {}, mean track length: {}\n'.format(len(points3D), mean_track_length)
|
378 |
+
|
379 |
+
with open(path, "w") as fid:
|
380 |
+
fid.write(HEADER)
|
381 |
+
for _, pt in points3D.items():
|
382 |
+
point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
|
383 |
+
fid.write(" ".join(map(str, point_header)) + " ")
|
384 |
+
track_strings = []
|
385 |
+
for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
|
386 |
+
track_strings.append(" ".join(map(str, [image_id, point2D])))
|
387 |
+
fid.write(" ".join(track_strings) + "\n")
|
388 |
+
|
389 |
+
|
390 |
+
def write_points3d_binary(points3D, path_to_model_file):
|
391 |
+
"""
|
392 |
+
see: src/base/reconstruction.cc
|
393 |
+
void Reconstruction::ReadPoints3DBinary(const std::string& path)
|
394 |
+
void Reconstruction::WritePoints3DBinary(const std::string& path)
|
395 |
+
"""
|
396 |
+
with open(path_to_model_file, "wb") as fid:
|
397 |
+
write_next_bytes(fid, len(points3D), "Q")
|
398 |
+
for _, pt in points3D.items():
|
399 |
+
write_next_bytes(fid, pt.id, "Q")
|
400 |
+
write_next_bytes(fid, pt.xyz.tolist(), "ddd")
|
401 |
+
write_next_bytes(fid, pt.rgb.tolist(), "BBB")
|
402 |
+
write_next_bytes(fid, pt.error, "d")
|
403 |
+
track_length = pt.image_ids.shape[0]
|
404 |
+
write_next_bytes(fid, track_length, "Q")
|
405 |
+
for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
|
406 |
+
write_next_bytes(fid, [image_id, point2D_id], "ii")
|
407 |
+
|
408 |
+
|
409 |
+
def read_model(path, ext):
|
410 |
+
if ext == ".txt":
|
411 |
+
cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
|
412 |
+
images = read_images_text(os.path.join(path, "images" + ext))
|
413 |
+
points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
|
414 |
+
else:
|
415 |
+
cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
|
416 |
+
images = read_images_binary(os.path.join(path, "images" + ext))
|
417 |
+
points3D = read_points3d_binary(os.path.join(path, "points3D") + ext)
|
418 |
+
return cameras, images, points3D
|
419 |
+
|
420 |
+
|
421 |
+
def write_model(cameras, images, points3D, path, ext):
|
422 |
+
if ext == ".txt":
|
423 |
+
write_cameras_text(cameras, os.path.join(path, "cameras" + ext))
|
424 |
+
write_images_text(images, os.path.join(path, "images" + ext))
|
425 |
+
write_points3D_text(points3D, os.path.join(path, "points3D") + ext)
|
426 |
+
else:
|
427 |
+
write_cameras_binary(cameras, os.path.join(path, "cameras" + ext))
|
428 |
+
write_images_binary(images, os.path.join(path, "images" + ext))
|
429 |
+
write_points3d_binary(points3D, os.path.join(path, "points3D") + ext)
|
430 |
+
return cameras, images, points3D
|
431 |
+
|
432 |
+
|
433 |
+
def read_compressed_images_binary(path_to_model_file):
|
434 |
+
"""
|
435 |
+
see: src/base/reconstruction.cc
|
436 |
+
void Reconstruction::ReadImagesBinary(const std::string& path)
|
437 |
+
void Reconstruction::WriteImagesBinary(const std::string& path)
|
438 |
+
"""
|
439 |
+
images = {}
|
440 |
+
with open(path_to_model_file, "rb") as fid:
|
441 |
+
num_reg_images = read_next_bytes(fid, 8, "Q")[0]
|
442 |
+
for image_index in range(num_reg_images):
|
443 |
+
binary_image_properties = read_next_bytes(
|
444 |
+
fid, num_bytes=64, format_char_sequence="idddddddi")
|
445 |
+
image_id = binary_image_properties[0]
|
446 |
+
qvec = np.array(binary_image_properties[1:5])
|
447 |
+
tvec = np.array(binary_image_properties[5:8])
|
448 |
+
camera_id = binary_image_properties[8]
|
449 |
+
image_name = ""
|
450 |
+
current_char = read_next_bytes(fid, 1, "c")[0]
|
451 |
+
while current_char != b"\x00": # look for the ASCII 0 entry
|
452 |
+
image_name += current_char.decode("utf-8")
|
453 |
+
current_char = read_next_bytes(fid, 1, "c")[0]
|
454 |
+
num_points2D = read_next_bytes(fid, num_bytes=8,
|
455 |
+
format_char_sequence="Q")[0]
|
456 |
+
# x_y_id_s = read_next_bytes(fid, num_bytes=24 * num_points2D,
|
457 |
+
# format_char_sequence="ddq" * num_points2D)
|
458 |
+
# xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
|
459 |
+
# tuple(map(float, x_y_id_s[1::3]))])
|
460 |
+
x_y_id_s = read_next_bytes(fid, num_bytes=8 * num_points2D,
|
461 |
+
format_char_sequence="q" * num_points2D)
|
462 |
+
point3D_ids = np.array(x_y_id_s)
|
463 |
+
images[image_id] = Image(
|
464 |
+
id=image_id, qvec=qvec, tvec=tvec,
|
465 |
+
camera_id=camera_id, name=image_name,
|
466 |
+
xys=np.array([]), point3D_ids=point3D_ids)
|
467 |
+
return images
|
468 |
+
|
469 |
+
|
470 |
+
def write_compressed_images_binary(images, path_to_model_file):
|
471 |
+
"""
|
472 |
+
see: src/base/reconstruction.cc
|
473 |
+
void Reconstruction::ReadImagesBinary(const std::string& path)
|
474 |
+
void Reconstruction::WriteImagesBinary(const std::string& path)
|
475 |
+
"""
|
476 |
+
with open(path_to_model_file, "wb") as fid:
|
477 |
+
write_next_bytes(fid, len(images), "Q")
|
478 |
+
for _, img in images.items():
|
479 |
+
write_next_bytes(fid, img.id, "i")
|
480 |
+
write_next_bytes(fid, img.qvec.tolist(), "dddd")
|
481 |
+
write_next_bytes(fid, img.tvec.tolist(), "ddd")
|
482 |
+
write_next_bytes(fid, img.camera_id, "i")
|
483 |
+
for char in img.name:
|
484 |
+
write_next_bytes(fid, char.encode("utf-8"), "c")
|
485 |
+
write_next_bytes(fid, b"\x00", "c")
|
486 |
+
write_next_bytes(fid, len(img.point3D_ids), "Q")
|
487 |
+
for p3d_id in img.point3D_ids:
|
488 |
+
write_next_bytes(fid, p3d_id, "q")
|
489 |
+
# for xy, p3d_id in zip(img.xys, img.point3D_ids):
|
490 |
+
# write_next_bytes(fid, [*xy, p3d_id], "ddq")
|
491 |
+
|
492 |
+
|
493 |
+
def read_compressed_points3d_binary(path_to_model_file):
|
494 |
+
"""
|
495 |
+
see: src/base/reconstruction.cc
|
496 |
+
void Reconstruction::ReadPoints3DBinary(const std::string& path)
|
497 |
+
void Reconstruction::WritePoints3DBinary(const std::string& path)
|
498 |
+
"""
|
499 |
+
points3D = {}
|
500 |
+
with open(path_to_model_file, "rb") as fid:
|
501 |
+
num_points = read_next_bytes(fid, 8, "Q")[0]
|
502 |
+
for point_line_index in range(num_points):
|
503 |
+
binary_point_line_properties = read_next_bytes(
|
504 |
+
fid, num_bytes=43, format_char_sequence="QdddBBBd")
|
505 |
+
point3D_id = binary_point_line_properties[0]
|
506 |
+
xyz = np.array(binary_point_line_properties[1:4])
|
507 |
+
rgb = np.array(binary_point_line_properties[4:7])
|
508 |
+
error = np.array(binary_point_line_properties[7])
|
509 |
+
track_length = read_next_bytes(
|
510 |
+
fid, num_bytes=8, format_char_sequence="Q")[0]
|
511 |
+
track_elems = read_next_bytes(
|
512 |
+
fid, num_bytes=4 * track_length,
|
513 |
+
format_char_sequence="i" * track_length)
|
514 |
+
image_ids = np.array(track_elems)
|
515 |
+
# point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
|
516 |
+
points3D[point3D_id] = Point3D(
|
517 |
+
id=point3D_id, xyz=xyz, rgb=rgb,
|
518 |
+
error=error, image_ids=image_ids,
|
519 |
+
point2D_idxs=np.array([]))
|
520 |
+
return points3D
|
521 |
+
|
522 |
+
|
523 |
+
def write_compressed_points3d_binary(points3D, path_to_model_file):
|
524 |
+
"""
|
525 |
+
see: src/base/reconstruction.cc
|
526 |
+
void Reconstruction::ReadPoints3DBinary(const std::string& path)
|
527 |
+
void Reconstruction::WritePoints3DBinary(const std::string& path)
|
528 |
+
"""
|
529 |
+
with open(path_to_model_file, "wb") as fid:
|
530 |
+
write_next_bytes(fid, len(points3D), "Q")
|
531 |
+
for _, pt in points3D.items():
|
532 |
+
write_next_bytes(fid, pt.id, "Q")
|
533 |
+
write_next_bytes(fid, pt.xyz.tolist(), "ddd")
|
534 |
+
write_next_bytes(fid, pt.rgb.tolist(), "BBB")
|
535 |
+
write_next_bytes(fid, pt.error, "d")
|
536 |
+
track_length = pt.image_ids.shape[0]
|
537 |
+
write_next_bytes(fid, track_length, "Q")
|
538 |
+
# for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
|
539 |
+
# write_next_bytes(fid, [image_id, point2D_id], "ii")
|
540 |
+
for image_id in pt.image_ids:
|
541 |
+
write_next_bytes(fid, image_id, "i")
|
542 |
+
|
543 |
+
|
544 |
+
def read_compressed_model(path, ext):
|
545 |
+
if ext == ".txt":
|
546 |
+
cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
|
547 |
+
images = read_images_text(os.path.join(path, "images" + ext))
|
548 |
+
points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
|
549 |
+
else:
|
550 |
+
cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
|
551 |
+
images = read_compressed_images_binary(os.path.join(path, "images" + ext))
|
552 |
+
points3D = read_compressed_points3d_binary(os.path.join(path, "points3D") + ext)
|
553 |
+
return cameras, images, points3D
|
554 |
+
|
555 |
+
|
556 |
+
def qvec2rotmat(qvec):
|
557 |
+
return np.array([
|
558 |
+
[1 - 2 * qvec[2] ** 2 - 2 * qvec[3] ** 2,
|
559 |
+
2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
|
560 |
+
2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
|
561 |
+
[2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
|
562 |
+
1 - 2 * qvec[1] ** 2 - 2 * qvec[3] ** 2,
|
563 |
+
2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
|
564 |
+
[2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
|
565 |
+
2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
|
566 |
+
1 - 2 * qvec[1] ** 2 - 2 * qvec[2] ** 2]])
|
567 |
+
|
568 |
+
|
569 |
+
def rotmat2qvec(R):
|
570 |
+
Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
|
571 |
+
K = np.array([
|
572 |
+
[Rxx - Ryy - Rzz, 0, 0, 0],
|
573 |
+
[Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
|
574 |
+
[Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
|
575 |
+
[Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
|
576 |
+
eigvals, eigvecs = np.linalg.eigh(K)
|
577 |
+
qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
|
578 |
+
if qvec[0] < 0:
|
579 |
+
qvec *= -1
|
580 |
+
return qvec
|
581 |
+
|
582 |
+
|
583 |
+
def intrinsics_from_camera(camera_model, params):
|
584 |
+
if camera_model in ("SIMPLE_PINHOLE", "SIMPLE_RADIAL", "RADIAL"):
|
585 |
+
fx = fy = params[0]
|
586 |
+
cx = params[1]
|
587 |
+
cy = params[2]
|
588 |
+
elif camera_model in ("PINHOLE", "OPENCV", "OPENCV_FISHEYE", "FULL_OPENCV"):
|
589 |
+
fx = params[0]
|
590 |
+
fy = params[1]
|
591 |
+
cx = params[2]
|
592 |
+
cy = params[3]
|
593 |
+
else:
|
594 |
+
raise Exception("Camera model not supported")
|
595 |
+
|
596 |
+
# intrinsics
|
597 |
+
K = np.identity(3)
|
598 |
+
K[0, 0] = fx
|
599 |
+
K[1, 1] = fy
|
600 |
+
K[0, 2] = cx
|
601 |
+
K[1, 2] = cy
|
602 |
+
return K
|
603 |
+
|
604 |
+
|
605 |
+
def main():
|
606 |
+
parser = argparse.ArgumentParser(description='Read and write COLMAP binary and text models')
|
607 |
+
parser.add_argument('input_model', help='path to input model folder')
|
608 |
+
parser.add_argument('input_format', choices=['.bin', '.txt'],
|
609 |
+
help='input model format')
|
610 |
+
parser.add_argument('--output_model', metavar='PATH',
|
611 |
+
help='path to output model folder')
|
612 |
+
parser.add_argument('--output_format', choices=['.bin', '.txt'],
|
613 |
+
help='outut model format', default='.txt')
|
614 |
+
args = parser.parse_args()
|
615 |
+
|
616 |
+
cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format)
|
617 |
+
|
618 |
+
print("num_cameras:", len(cameras))
|
619 |
+
print("num_images:", len(images))
|
620 |
+
print("num_points3D:", len(points3D))
|
621 |
+
|
622 |
+
if args.output_model is not None:
|
623 |
+
write_model(cameras, images, points3D, path=args.output_model, ext=args.output_format)
|
624 |
+
|
625 |
+
|
626 |
+
if __name__ == "__main__":
|
627 |
+
main()
|
third_party/pram/colmap_utils/utils.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
third_party/pram/configs/config_train_12scenes_sfd2.yaml
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: [ '12Scenes' ]
|
2 |
+
|
3 |
+
network_1: "segnet"
|
4 |
+
network: "segnetvit"
|
5 |
+
|
6 |
+
local_rank: 0
|
7 |
+
gpu: [ 0 ]
|
8 |
+
|
9 |
+
feature: "sfd2"
|
10 |
+
save_path: '/scratches/flyer_2/fx221/exp/pram'
|
11 |
+
landmark_path: "/scratches/flyer_3/fx221/exp/pram/landmarks/sfd2-gml"
|
12 |
+
dataset_path: "/scratches/flyer_3/fx221/dataset"
|
13 |
+
config_path: 'configs/datasets'
|
14 |
+
|
15 |
+
image_dim: 3
|
16 |
+
feat_dim: 128
|
17 |
+
min_inliers: 32
|
18 |
+
max_inliers: 512
|
19 |
+
random_inliers: true
|
20 |
+
max_keypoints: 512
|
21 |
+
ignore_index: -1
|
22 |
+
output_dim: 1024
|
23 |
+
output_dim_: 2048
|
24 |
+
jitter_params:
|
25 |
+
brightness: 0.5
|
26 |
+
contrast: 0.5
|
27 |
+
saturation: 0.25
|
28 |
+
hue: 0.15
|
29 |
+
blur: 0
|
30 |
+
|
31 |
+
scale_params: [ 0.5, 1.0 ]
|
32 |
+
pre_load: false
|
33 |
+
train: true
|
34 |
+
inlier_th: 0.5
|
35 |
+
lr: 0.0001
|
36 |
+
min_lr: 0.00001
|
37 |
+
optimizer: "adamw"
|
38 |
+
seg_loss: "cew"
|
39 |
+
seg_loss_nx: "cei"
|
40 |
+
cls_loss: "ce"
|
41 |
+
cls_loss_: "bce"
|
42 |
+
ac_fn: "relu"
|
43 |
+
norm_fn: "bn"
|
44 |
+
workers: 8
|
45 |
+
layers: 15
|
46 |
+
log_intervals: 50
|
47 |
+
eval_n_epoch: 10
|
48 |
+
do_eval: false
|
49 |
+
|
50 |
+
use_mid_feature: true
|
51 |
+
norm_desc: false
|
52 |
+
with_score: false
|
53 |
+
with_aug: true
|
54 |
+
with_dist: true
|
55 |
+
|
56 |
+
batch_size: 32
|
57 |
+
its_per_epoch: 1000
|
58 |
+
decay_rate: 0.999992
|
59 |
+
decay_iter: 60000
|
60 |
+
epochs: 500
|
61 |
+
|
62 |
+
cluster_method: 'birch'
|
63 |
+
|
64 |
+
weight_path: null
|
65 |
+
weight_path_1: '20230719_220620_segnet_L15_T_resnet4x_B32_K1024_relu_bn_od1024_nc193_adamw_cew_md_A_birch/segnet.499.pth'
|
66 |
+
weight_path_2: '20240202_145337_segnetvit_L15_T_resnet4x_B32_K512_relu_bn_od1024_nc193_adam_cew_md_A_birch/segnetvit.499.pth'
|
67 |
+
|
68 |
+
resume_path: null
|
69 |
+
|
70 |
+
n_class: 193
|
71 |
+
|
72 |
+
eval_max_keypoints: 1024
|
73 |
+
|
74 |
+
localization:
|
75 |
+
loc_scene_name: [ 'apt1/kitchen' ]
|
76 |
+
save_path: '/scratches/flyer_2/fx221/exp/localizer/loc_results'
|
77 |
+
seg_k: 20
|
78 |
+
threshold: 8
|
79 |
+
min_kpts: 128
|
80 |
+
min_matches: 4
|
81 |
+
min_inliers: 64
|
82 |
+
matching_method_: "mnn"
|
83 |
+
matching_method_1: "spg"
|
84 |
+
matching_method_2: "gm"
|
85 |
+
matching_method: "gml"
|
86 |
+
matching_method_5: "adagml"
|
87 |
+
save: false
|
88 |
+
show: true
|
89 |
+
show_time: 1
|
90 |
+
max_vrf: 1
|
91 |
+
with_original: true
|
92 |
+
with_extra: false
|
93 |
+
with_compress: true
|
94 |
+
semantic_matching: true
|
95 |
+
do_refinement: true
|
96 |
+
refinement_method_: 'matching'
|
97 |
+
refinement_method: 'projection'
|
98 |
+
pre_filtering_th: 0.95
|
99 |
+
covisibility_frame: 20
|
100 |
+
refinement_radius: 20
|
101 |
+
refinement_nn_ratio: 0.9
|
102 |
+
refinement_max_matches: 0
|
third_party/pram/configs/config_train_7scenes_sfd2.yaml
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: [ '7Scenes' ]
|
2 |
+
|
3 |
+
network: "segnetvit"
|
4 |
+
|
5 |
+
local_rank: 0
|
6 |
+
gpu: [ 0 ]
|
7 |
+
# when using ddp, set gpu: [0,1,2,3]
|
8 |
+
with_dist: true
|
9 |
+
|
10 |
+
feature: "sfd2"
|
11 |
+
save_path_: '/scratches/flyer_2/fx221/exp/pram'
|
12 |
+
save_path: '/scratches/flyer_2/fx221/publications/test_pram/exp'
|
13 |
+
landmark_path_: "/scratches/flyer_3/fx221/exp/pram/landmarks/sfd2-gml"
|
14 |
+
landmark_path: "/scratches/flyer_2/fx221/publications/test_pram/landmakrs/sfd2-gml"
|
15 |
+
dataset_path: "/scratches/flyer_3/fx221/dataset"
|
16 |
+
config_path: 'configs/datasets'
|
17 |
+
|
18 |
+
image_dim: 3
|
19 |
+
feat_dim: 128
|
20 |
+
|
21 |
+
min_inliers: 32
|
22 |
+
max_inliers: 256
|
23 |
+
random_inliers: 1
|
24 |
+
max_keypoints: 512
|
25 |
+
ignore_index: -1
|
26 |
+
output_dim: 1024
|
27 |
+
output_dim_: 2048
|
28 |
+
jitter_params:
|
29 |
+
brightness: 0.5
|
30 |
+
contrast: 0.5
|
31 |
+
saturation: 0.25
|
32 |
+
hue: 0.15
|
33 |
+
blur: 0
|
34 |
+
|
35 |
+
scale_params: [ 0.5, 1.0 ]
|
36 |
+
pre_load: false
|
37 |
+
train: true
|
38 |
+
inlier_th: 0.5
|
39 |
+
lr: 0.0001
|
40 |
+
min_lr: 0.00001
|
41 |
+
cls_loss: "ce"
|
42 |
+
ac_fn: "relu"
|
43 |
+
norm_fn: "bn"
|
44 |
+
workers: 8
|
45 |
+
layers: 15
|
46 |
+
log_intervals: 50
|
47 |
+
eval_n_epoch: 10
|
48 |
+
do_eval: false
|
49 |
+
|
50 |
+
use_mid_feature: true
|
51 |
+
norm_desc: false
|
52 |
+
with_cls: false
|
53 |
+
with_score: false
|
54 |
+
with_aug: true
|
55 |
+
|
56 |
+
batch_size: 32
|
57 |
+
its_per_epoch: 1000
|
58 |
+
decay_rate: 0.999992
|
59 |
+
decay_iter: 80000
|
60 |
+
epochs: 200
|
61 |
+
|
62 |
+
cluster_method: 'birch'
|
63 |
+
|
64 |
+
weight_path: null
|
65 |
+
weight_path_1: '20230724_203230_segnet_L15_S_resnet4x_B32_K1024_relu_bn_od1024_nc113_adam_cew_md_A_birch/segnet.180.pth'
|
66 |
+
weight_path_2: '20240202_152519_segnetvit_L15_S_resnet4x_B32_K512_relu_bn_od1024_nc113_adamw_cew_md_A_birch/segnetvit.199.pth'
|
67 |
+
|
68 |
+
# used for resuming training
|
69 |
+
resume_path: null
|
70 |
+
|
71 |
+
# used for localization
|
72 |
+
n_class: 113
|
73 |
+
|
74 |
+
eval_max_keypoints: 1024
|
75 |
+
|
76 |
+
localization:
|
77 |
+
loc_scene_name: [ 'chess' ]
|
78 |
+
save_path: '/scratches/flyer_2/fx221/exp/localizer/loc_results'
|
79 |
+
|
80 |
+
seg_k: 20
|
81 |
+
threshold: 8
|
82 |
+
min_kpts: 128
|
83 |
+
min_matches: 16
|
84 |
+
min_inliers: 32
|
85 |
+
matching_method_: "mnn"
|
86 |
+
matching_method_1: "spg"
|
87 |
+
matching_method_2: "gm"
|
88 |
+
matching_method: "gml"
|
89 |
+
matching_method_4: "adagml"
|
90 |
+
save: false
|
91 |
+
show: true
|
92 |
+
show_time: 1
|
93 |
+
with_original: true
|
94 |
+
max_vrf: 1
|
95 |
+
with_compress: true
|
96 |
+
semantic_matching: true
|
97 |
+
do_refinement: true
|
98 |
+
pre_filtering_th: 0.95
|
99 |
+
refinement_method_: 'matching'
|
100 |
+
refinement_method: 'projection'
|
101 |
+
covisibility_frame: 20
|
102 |
+
refinement_radius: 20
|
103 |
+
refinement_nn_ratio: 0.9
|
104 |
+
refinement_max_matches: 0
|
third_party/pram/configs/config_train_aachen_sfd2.yaml
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: [ 'Aachen' ]
|
2 |
+
|
3 |
+
network_: "segnet"
|
4 |
+
network: "segnetvit"
|
5 |
+
local_rank: 0
|
6 |
+
gpu: [ 0 ]
|
7 |
+
|
8 |
+
feature: "sfd2"
|
9 |
+
save_path: '/scratches/flyer_2/fx221/exp/pram'
|
10 |
+
landmark_path: "/scratches/flyer_3/fx221/exp/pram/landmarks/sfd2-gml"
|
11 |
+
dataset_path: "/scratches/flyer_3/fx221/dataset"
|
12 |
+
|
13 |
+
config_path: 'configs/datasets'
|
14 |
+
|
15 |
+
image_dim: 3
|
16 |
+
feat_dim: 128
|
17 |
+
|
18 |
+
min_inliers: 32
|
19 |
+
max_inliers: 512
|
20 |
+
random_inliers: true
|
21 |
+
max_keypoints: 1024
|
22 |
+
ignore_index: -1
|
23 |
+
output_dim: 1024
|
24 |
+
output_dim_: 2048
|
25 |
+
jitter_params:
|
26 |
+
brightness: 0.5
|
27 |
+
contrast: 0.5
|
28 |
+
saturation: 0.25
|
29 |
+
hue: 0.15
|
30 |
+
blur: 0
|
31 |
+
|
32 |
+
scale_params: [ 0.5, 1.0 ]
|
33 |
+
pre_load: false
|
34 |
+
do_eval: true
|
35 |
+
train: true
|
36 |
+
inlier_th: 0.5
|
37 |
+
lr: 0.0001
|
38 |
+
min_lr: 0.00001
|
39 |
+
optimizer: "adam"
|
40 |
+
seg_loss: "cew"
|
41 |
+
seg_loss_nx: "cei"
|
42 |
+
cls_loss: "ce"
|
43 |
+
cls_loss_: "bce"
|
44 |
+
ac_fn: "relu"
|
45 |
+
norm_fn: "bn"
|
46 |
+
workers: 8
|
47 |
+
layers: 15
|
48 |
+
log_intervals: 50
|
49 |
+
eval_n_epoch: 10
|
50 |
+
|
51 |
+
use_mid_feature: true
|
52 |
+
norm_desc: false
|
53 |
+
with_sc: false
|
54 |
+
with_cls: true
|
55 |
+
with_score: false
|
56 |
+
with_aug: true
|
57 |
+
with_dist: true
|
58 |
+
|
59 |
+
batch_size: 32
|
60 |
+
its_per_epoch: 1000
|
61 |
+
decay_rate: 0.999992
|
62 |
+
decay_iter: 80000
|
63 |
+
epochs: 800
|
64 |
+
|
65 |
+
cluster_method: 'birch'
|
66 |
+
|
67 |
+
weight_path: null
|
68 |
+
weight_path_1: '20230719_221442_segnet_L15_A_resnet4x_B32_K1024_relu_bn_od1024_nc513_adamw_cew_md_A_birch/segnet.899.pth'
|
69 |
+
weight_path_2: '20240211_142623_segnetvit_L15_A_resnet4x_B32_K1024_relu_bn_od1024_nc513_adam_cew_md_A_birch/segnetvit.799.pth'
|
70 |
+
resume_path: null
|
71 |
+
|
72 |
+
n_class: 513
|
73 |
+
|
74 |
+
eval_max_keypoints: 4096
|
75 |
+
|
76 |
+
localization:
|
77 |
+
loc_scene_name: [ ]
|
78 |
+
save_path: '/scratches/flyer_2/fx221/exp/localizer/loc_results'
|
79 |
+
seg_k: 10
|
80 |
+
threshold: 12
|
81 |
+
min_kpts: 256
|
82 |
+
min_matches: 8
|
83 |
+
min_inliers: 128
|
84 |
+
matching_method_: "mnn"
|
85 |
+
matching_method_1: "spg"
|
86 |
+
matching_method_2: "gm"
|
87 |
+
matching_method: "gml"
|
88 |
+
matching_method_4: "adagml"
|
89 |
+
save: false
|
90 |
+
show: true
|
91 |
+
show_time: 1
|
92 |
+
with_original: true
|
93 |
+
with_extra: false
|
94 |
+
max_vrf: 1
|
95 |
+
with_compress: true
|
96 |
+
semantic_matching: true
|
97 |
+
refinement_method_: 'matching'
|
98 |
+
refinement_method: 'projection'
|
99 |
+
pre_filtering_th: 0.95
|
100 |
+
do_refinement: true
|
101 |
+
covisibility_frame: 50
|
102 |
+
refinement_radius: 30
|
103 |
+
refinement_nn_ratio: 0.9
|
104 |
+
refinement_max_matches: 0
|
third_party/pram/configs/config_train_cambridge_sfd2.yaml
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: [ 'CambridgeLandmarks' ]
|
2 |
+
|
3 |
+
network_: "segnet"
|
4 |
+
network: "segnetvit"
|
5 |
+
|
6 |
+
local_rank: 0
|
7 |
+
gpu: [ 0 ]
|
8 |
+
|
9 |
+
feature: "sfd2"
|
10 |
+
save_path: '/scratches/flyer_2/fx221/exp/pram'
|
11 |
+
landmark_path: "/scratches/flyer_3/fx221/exp/pram/landmarks/sfd2-gml"
|
12 |
+
dataset_path: "/scratches/flyer_3/fx221/dataset"
|
13 |
+
config_path: 'configs/datasets'
|
14 |
+
|
15 |
+
image_dim: 3
|
16 |
+
feat_dim: 128
|
17 |
+
|
18 |
+
min_inliers: 32
|
19 |
+
max_inliers: 512
|
20 |
+
random_inliers: 1
|
21 |
+
max_keypoints: 1024
|
22 |
+
ignore_index: -1
|
23 |
+
output_dim: 1024
|
24 |
+
output_dim_: 2048
|
25 |
+
jitter_params:
|
26 |
+
brightness: 0.5
|
27 |
+
contrast: 0.5
|
28 |
+
saturation: 0.25
|
29 |
+
hue: 0.15
|
30 |
+
blur: 0
|
31 |
+
|
32 |
+
scale_params: [ 0.5, 1.0 ]
|
33 |
+
pre_load: false
|
34 |
+
do_eval: false
|
35 |
+
train: true
|
36 |
+
inlier_th: 0.5
|
37 |
+
lr: 0.0001
|
38 |
+
min_lr: 0.00001
|
39 |
+
epochs: 300
|
40 |
+
seg_loss: "cew"
|
41 |
+
ac_fn: "relu"
|
42 |
+
norm_fn: "bn"
|
43 |
+
workers: 8
|
44 |
+
layers: 15
|
45 |
+
log_intervals: 50
|
46 |
+
eval_n_epoch: 10
|
47 |
+
|
48 |
+
use_mid_feature: true
|
49 |
+
norm_desc: false
|
50 |
+
with_score: false
|
51 |
+
with_aug: true
|
52 |
+
with_dist: true
|
53 |
+
|
54 |
+
batch_size: 32
|
55 |
+
its_per_epoch: 1000
|
56 |
+
decay_rate: 0.999992
|
57 |
+
decay_iter: 60000
|
58 |
+
|
59 |
+
cluster_method: 'birch'
|
60 |
+
|
61 |
+
weight_path: null
|
62 |
+
weight_path_1: '20230725_144044_segnet_L15_C_resnet4x_B32_K1024_relu_bn_od1024_nc161_adam_cew_md_A_birch/segnet.260.pth'
|
63 |
+
weight_path_2: '20240204_130323_segnetvit_L15_C_resnet4x_B32_K1024_relu_bn_od1024_nc161_adamw_cew_md_A_birch/segnetvit.399.pth'
|
64 |
+
|
65 |
+
resume_path: null
|
66 |
+
|
67 |
+
n_class: 161
|
68 |
+
|
69 |
+
eval_max_keypoints: 2048
|
70 |
+
|
71 |
+
localization:
|
72 |
+
loc_scene_name_1: [ 'GreatCourt' ]
|
73 |
+
loc_scene_name_2: [ 'KingsCollege' ]
|
74 |
+
loc_scene_name: [ 'StMarysChurch' ]
|
75 |
+
loc_scene_name_4: [ 'OldHospital' ]
|
76 |
+
save_path: '/scratches/flyer_2/fx221/exp/localizer/loc_results'
|
77 |
+
seg_k: 30
|
78 |
+
threshold: 12
|
79 |
+
min_kpts: 256
|
80 |
+
min_matches: 16
|
81 |
+
min_inliers_gm: 128
|
82 |
+
min_inliers: 128
|
83 |
+
matching_method_: "mnn"
|
84 |
+
matching_method_1: "spg"
|
85 |
+
matching_method_2: "gm"
|
86 |
+
matching_method: "gml"
|
87 |
+
matching_method_4: "adagml"
|
88 |
+
show: true
|
89 |
+
show_time: 1
|
90 |
+
save: false
|
91 |
+
with_original: true
|
92 |
+
max_vrf: 1
|
93 |
+
with_extra: false
|
94 |
+
with_compress: true
|
95 |
+
semantic_matching: true
|
96 |
+
do_refinement: true
|
97 |
+
pre_filtering_th: 0.95
|
98 |
+
refinement_method_: 'matching'
|
99 |
+
refinement_method: 'projection'
|
100 |
+
covisibility_frame: 20
|
101 |
+
refinement_radius: 20
|
102 |
+
refinement_nn_ratio: 0.9
|
103 |
+
refinement_max_matches: 0
|
third_party/pram/configs/config_train_multiset_sfd2.yaml
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: [ 'S', 'T', 'C', 'A' ]
|
2 |
+
|
3 |
+
network: "segnet"
|
4 |
+
network_: "gsegnet3"
|
5 |
+
|
6 |
+
local_rank: 0
|
7 |
+
gpu: [ 4 ]
|
8 |
+
|
9 |
+
feature: "resnet4x"
|
10 |
+
save_path: '/scratches/flyer_2/fx221/exp/localizer'
|
11 |
+
landmark_path: "/scratches/flyer_3/fx221/exp/localizer/resnet4x-20230511-210205-pho-0005-gm"
|
12 |
+
dataset_path: "/scratches/flyer_3/fx221/dataset"
|
13 |
+
config_path: 'configs/datasets'
|
14 |
+
|
15 |
+
image_dim: 3
|
16 |
+
min_inliers: 32
|
17 |
+
max_inliers: 512
|
18 |
+
random_inliers: 1
|
19 |
+
max_keypoints: 1024
|
20 |
+
ignore_index: -1
|
21 |
+
output_dim: 1024
|
22 |
+
output_dim_: 2048
|
23 |
+
jitter_params:
|
24 |
+
brightness: 0.5
|
25 |
+
contrast: 0.5
|
26 |
+
saturation: 0.25
|
27 |
+
hue: 0.15
|
28 |
+
blur: 0
|
29 |
+
|
30 |
+
scale_params: [ 0.5, 1.0 ]
|
31 |
+
pre_load: false
|
32 |
+
do_eval: true
|
33 |
+
train: true
|
34 |
+
inlier_th: 0.5
|
35 |
+
lr: 0.0001
|
36 |
+
min_lr: 0.00001
|
37 |
+
optimizer: "adam"
|
38 |
+
seg_loss: "cew"
|
39 |
+
seg_loss_nx: "cei"
|
40 |
+
cls_loss: "ce"
|
41 |
+
cls_loss_: "bce"
|
42 |
+
sc_loss: 'l1g'
|
43 |
+
ac_fn: "relu"
|
44 |
+
norm_fn: "bn"
|
45 |
+
workers: 8
|
46 |
+
layers: 15
|
47 |
+
log_intervals: 50
|
48 |
+
eval_n_epoch: 10
|
49 |
+
|
50 |
+
use_mid_feature: true
|
51 |
+
norm_desc: false
|
52 |
+
with_sc: false
|
53 |
+
with_cls: true
|
54 |
+
with_score: false
|
55 |
+
with_aug: true
|
56 |
+
with_dist: true
|
57 |
+
|
58 |
+
batch_size: 32
|
59 |
+
its_per_epoch: 1000
|
60 |
+
decay_rate: 0.999992
|
61 |
+
decay_iter: 150000
|
62 |
+
epochs: 1500
|
63 |
+
|
64 |
+
cluster_method_: 'kmeans'
|
65 |
+
cluster_method: 'birch'
|
66 |
+
|
67 |
+
weight_path_: null
|
68 |
+
weight_path: '20230805_132653_segnet_L15_STCA_resnet4x_B32_K1024_relu_bn_od1024_nc977_adam_cew_md_A_birch/segnet.485.pth'
|
69 |
+
resume_path: null
|
70 |
+
|
71 |
+
eval: false
|
72 |
+
#loc: false
|
73 |
+
loc: true
|
74 |
+
#n_class: 977
|
75 |
+
online: false
|
76 |
+
|
77 |
+
eval_max_keypoints: 4096
|
78 |
+
|
79 |
+
localization:
|
80 |
+
loc_scene_name: [ ]
|
81 |
+
save_path: '/scratches/flyer_2/fx221/exp/localizer/loc_results'
|
82 |
+
dataset: [ 'T' ]
|
83 |
+
seg_k: 50
|
84 |
+
threshold: 8 # 8 for indoor, 12 for outdoor
|
85 |
+
min_kpts: 256
|
86 |
+
min_matches: 4
|
87 |
+
min_inliers: 64
|
88 |
+
matching_method_: "mnn"
|
89 |
+
matching_method_1: "spg"
|
90 |
+
matching_method: "gm"
|
91 |
+
save: false
|
92 |
+
show: true
|
93 |
+
show_time: 1
|
94 |
+
do_refinement: true
|
95 |
+
with_original: true
|
96 |
+
with_extra: false
|
97 |
+
max_vrf: 1
|
98 |
+
with_compress: false
|
99 |
+
covisibility_frame: 20
|
100 |
+
observation_threshold: 3
|
third_party/pram/configs/datasets/12Scenes.yaml
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: '12Scenes'
|
2 |
+
scenes: [ 'apt1/kitchen',
|
3 |
+
'apt1/living',
|
4 |
+
'apt2/bed',
|
5 |
+
'apt2/kitchen',
|
6 |
+
'apt2/living',
|
7 |
+
'apt2/luke',
|
8 |
+
'office1/gates362',
|
9 |
+
'office1/gates381',
|
10 |
+
'office1/lounge',
|
11 |
+
'office1/manolis',
|
12 |
+
'office2/5a',
|
13 |
+
'office2/5b'
|
14 |
+
]
|
15 |
+
|
16 |
+
apt1/kitchen:
|
17 |
+
n_cluster: 16
|
18 |
+
cluster_mode: 'xy'
|
19 |
+
cluster_method: 'birch'
|
20 |
+
|
21 |
+
training_sample_ratio: 1
|
22 |
+
eval_sample_ratio: 5
|
23 |
+
query_path: 'queries_with_intrinsics.txt'
|
24 |
+
gt_pose_path: 'queries_poses.txt'
|
25 |
+
image_path_prefix: ''
|
26 |
+
|
27 |
+
|
28 |
+
apt1/living:
|
29 |
+
n_cluster: 16
|
30 |
+
cluster_mode: 'xy'
|
31 |
+
cluster_method: 'birch'
|
32 |
+
|
33 |
+
training_sample_ratio: 1
|
34 |
+
eval_sample_ratio: 5
|
35 |
+
image_path_prefix: ''
|
36 |
+
query_path: 'queries_with_intrinsics.txt'
|
37 |
+
gt_pose_path: 'queries_poses.txt'
|
38 |
+
|
39 |
+
apt2/bed:
|
40 |
+
n_cluster: 16
|
41 |
+
cluster_mode: 'xy'
|
42 |
+
cluster_method: 'birch'
|
43 |
+
|
44 |
+
training_sample_ratio: 1
|
45 |
+
eval_sample_ratio: 5
|
46 |
+
image_path_prefix: ''
|
47 |
+
|
48 |
+
query_path: 'queries_with_intrinsics.txt'
|
49 |
+
gt_pose_path: 'queries_poses.txt'
|
50 |
+
|
51 |
+
|
52 |
+
apt2/kitchen:
|
53 |
+
n_cluster: 16
|
54 |
+
cluster_mode: 'xy'
|
55 |
+
cluster_method: 'birch'
|
56 |
+
|
57 |
+
training_sample_ratio: 1
|
58 |
+
eval_sample_ratio: 5
|
59 |
+
image_path_prefix: ''
|
60 |
+
|
61 |
+
query_path: 'queries_with_intrinsics.txt'
|
62 |
+
gt_pose_path: 'queries_poses.txt'
|
63 |
+
|
64 |
+
|
65 |
+
apt2/living:
|
66 |
+
n_cluster: 16
|
67 |
+
cluster_mode: 'xy'
|
68 |
+
cluster_method: 'birch'
|
69 |
+
|
70 |
+
training_sample_ratio: 1
|
71 |
+
eval_sample_ratio: 5
|
72 |
+
image_path_prefix: ''
|
73 |
+
|
74 |
+
query_path: 'queries_with_intrinsics.txt'
|
75 |
+
gt_pose_path: 'queries_poses.txt'
|
76 |
+
|
77 |
+
|
78 |
+
apt2/luke:
|
79 |
+
n_cluster: 16
|
80 |
+
cluster_mode: 'xy'
|
81 |
+
cluster_method: 'birch'
|
82 |
+
|
83 |
+
training_sample_ratio: 1
|
84 |
+
eval_sample_ratio: 5
|
85 |
+
image_path_prefix: ''
|
86 |
+
|
87 |
+
query_path: 'queries_with_intrinsics.txt'
|
88 |
+
gt_pose_path: 'queries_poses.txt'
|
89 |
+
|
90 |
+
|
91 |
+
office1/gates362:
|
92 |
+
n_cluster: 16
|
93 |
+
cluster_mode: 'xy'
|
94 |
+
cluster_method: 'birch'
|
95 |
+
|
96 |
+
training_sample_ratio: 3
|
97 |
+
eval_sample_ratio: 5
|
98 |
+
image_path_prefix: ''
|
99 |
+
|
100 |
+
query_path: 'queries_with_intrinsics.txt'
|
101 |
+
gt_pose_path: 'queries_poses.txt'
|
102 |
+
|
103 |
+
|
104 |
+
office1/gates381:
|
105 |
+
n_cluster: 16
|
106 |
+
cluster_mode: 'xy'
|
107 |
+
cluster_method: 'birch'
|
108 |
+
|
109 |
+
training_sample_ratio: 3
|
110 |
+
eval_sample_ratio: 5
|
111 |
+
image_path_prefix: ''
|
112 |
+
|
113 |
+
query_path: 'queries_with_intrinsics.txt'
|
114 |
+
gt_pose_path: 'queries_poses.txt'
|
115 |
+
|
116 |
+
|
117 |
+
office1/lounge:
|
118 |
+
n_cluster: 16
|
119 |
+
cluster_mode: 'xy'
|
120 |
+
cluster_method: 'birch'
|
121 |
+
|
122 |
+
training_sample_ratio: 1
|
123 |
+
eval_sample_ratio: 5
|
124 |
+
image_path_prefix: ''
|
125 |
+
|
126 |
+
query_path: 'queries_with_intrinsics.txt'
|
127 |
+
gt_pose_path: 'queries_poses.txt'
|
128 |
+
|
129 |
+
|
130 |
+
office1/manolis:
|
131 |
+
n_cluster: 16
|
132 |
+
cluster_mode: 'xy'
|
133 |
+
cluster_method: 'birch'
|
134 |
+
|
135 |
+
training_sample_ratio: 1
|
136 |
+
eval_sample_ratio: 5
|
137 |
+
image_path_prefix: ''
|
138 |
+
|
139 |
+
query_path: 'queries_with_intrinsics.txt'
|
140 |
+
gt_pose_path: 'queries_poses.txt'
|
141 |
+
|
142 |
+
|
143 |
+
office2/5a:
|
144 |
+
n_cluster: 16
|
145 |
+
cluster_mode: 'xy'
|
146 |
+
cluster_method: 'birch'
|
147 |
+
|
148 |
+
training_sample_ratio: 1
|
149 |
+
eval_sample_ratio: 5
|
150 |
+
image_path_prefix: ''
|
151 |
+
|
152 |
+
query_path: 'queries_with_intrinsics.txt'
|
153 |
+
gt_pose_path: 'queries_poses.txt'
|
154 |
+
|
155 |
+
|
156 |
+
office2/5b:
|
157 |
+
n_cluster: 16
|
158 |
+
cluster_mode: 'xy'
|
159 |
+
cluster_method: 'birch'
|
160 |
+
|
161 |
+
training_sample_ratio: 1
|
162 |
+
eval_sample_ratio: 5
|
163 |
+
image_path_prefix: ''
|
164 |
+
|
165 |
+
query_path: 'queries_with_intrinsics.txt'
|
166 |
+
gt_pose_path: 'queries_poses.txt'
|
third_party/pram/configs/datasets/7Scenes.yaml
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: '7Scenes'
|
2 |
+
scenes: [ 'chess', 'heads', 'office', 'fire', 'stairs', 'redkitchen', 'pumpkin' ]
|
3 |
+
|
4 |
+
|
5 |
+
chess:
|
6 |
+
n_cluster: 16
|
7 |
+
cluster_mode: 'xz'
|
8 |
+
cluster_method_: 'kmeans'
|
9 |
+
cluster_method: 'birch'
|
10 |
+
|
11 |
+
training_sample_ratio: 2
|
12 |
+
eval_sample_ratio: 10
|
13 |
+
gt_pose_path: 'queries_poses.txt'
|
14 |
+
query_path: 'queries_with_intrinsics.txt'
|
15 |
+
image_path_prefix: ''
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
heads:
|
20 |
+
n_cluster: 16
|
21 |
+
cluster_mode: 'xz'
|
22 |
+
cluster_method_: 'kmeans'
|
23 |
+
cluster_method: 'birch'
|
24 |
+
|
25 |
+
training_sample_ratio: 1
|
26 |
+
eval_sample_ratio: 2
|
27 |
+
gt_pose_path: 'queries_poses.txt'
|
28 |
+
query_path: 'queries_with_intrinsics.txt'
|
29 |
+
image_path_prefix: ''
|
30 |
+
|
31 |
+
|
32 |
+
office:
|
33 |
+
n_cluster: 16
|
34 |
+
cluster_mode: 'xz'
|
35 |
+
cluster_method_: 'kmeans'
|
36 |
+
cluster_method: 'birch'
|
37 |
+
|
38 |
+
training_sample_ratio: 3
|
39 |
+
eval_sample_ratio: 10
|
40 |
+
gt_pose_path: 'queries_poses.txt'
|
41 |
+
query_path: 'queries_with_intrinsics.txt'
|
42 |
+
image_path_prefix: ''
|
43 |
+
|
44 |
+
fire:
|
45 |
+
n_cluster: 16
|
46 |
+
cluster_mode: 'xz'
|
47 |
+
cluster_method_: 'kmeans'
|
48 |
+
cluster_method: 'birch'
|
49 |
+
|
50 |
+
training_sample_ratio: 2
|
51 |
+
eval_sample_ratio: 5
|
52 |
+
gt_pose_path: 'queries_poses.txt'
|
53 |
+
query_path: 'queries_with_intrinsics.txt'
|
54 |
+
image_path_prefix: ''
|
55 |
+
|
56 |
+
|
57 |
+
stairs:
|
58 |
+
n_cluster: 16
|
59 |
+
cluster_mode: 'xz'
|
60 |
+
cluster_method_: 'kmeans'
|
61 |
+
cluster_method: 'birch'
|
62 |
+
|
63 |
+
training_sample_ratio: 1
|
64 |
+
eval_sample_ratio: 10
|
65 |
+
gt_pose_path: 'queries_poses.txt'
|
66 |
+
query_path: 'queries_with_intrinsics.txt'
|
67 |
+
image_path_prefix: ''
|
68 |
+
|
69 |
+
|
70 |
+
redkitchen:
|
71 |
+
n_cluster: 16
|
72 |
+
cluster_mode: 'xz'
|
73 |
+
cluster_method_: 'kmeans'
|
74 |
+
cluster_method: 'birch'
|
75 |
+
|
76 |
+
training_sample_ratio: 3
|
77 |
+
eval_sample_ratio: 10
|
78 |
+
gt_pose_path: 'queries_poses.txt'
|
79 |
+
query_path: 'queries_with_intrinsics.txt'
|
80 |
+
image_path_prefix: ''
|
81 |
+
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
+
pumpkin:
|
86 |
+
n_cluster: 16
|
87 |
+
cluster_mode: 'xz'
|
88 |
+
cluster_method_: 'kmeans'
|
89 |
+
cluster_method: 'birch'
|
90 |
+
|
91 |
+
training_sample_ratio: 2
|
92 |
+
eval_sample_ratio: 10
|
93 |
+
gt_pose_path: 'queries_poses.txt'
|
94 |
+
query_path: 'queries_with_intrinsics.txt'
|
95 |
+
image_path_prefix: ''
|
96 |
+
|
third_party/pram/configs/datasets/Aachen.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: 'Aachen'
|
2 |
+
|
3 |
+
scenes: [ 'Aachenv11' ]
|
4 |
+
|
5 |
+
Aachenv11:
|
6 |
+
n_cluster: 512
|
7 |
+
cluster_mode: 'xz'
|
8 |
+
cluster_method_: 'kmeans'
|
9 |
+
cluster_method: 'birch'
|
10 |
+
training_sample_ratio: 1
|
11 |
+
eval_sample_ratio: 1
|
12 |
+
image_path_prefix: 'images/images_upright'
|
13 |
+
query_path_: 'queries_with_intrinsics.txt'
|
14 |
+
query_path: 'queries_with_intrinsics_demo.txt'
|
15 |
+
gt_pose_path: 'queries_pose_spp_spg.txt'
|
third_party/pram/configs/datasets/CambridgeLandmarks.yaml
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset: 'CambridgeLandmarks'
|
2 |
+
scenes: [ 'GreatCourt', 'KingsCollege', 'OldHospital', 'ShopFacade', 'StMarysChurch' ]
|
3 |
+
|
4 |
+
GreatCourt:
|
5 |
+
n_cluster: 32
|
6 |
+
cluster_mode: 'xy'
|
7 |
+
cluster_method: 'birch'
|
8 |
+
|
9 |
+
training_sample_ratio: 1
|
10 |
+
eval_sample_ratio: 1
|
11 |
+
image_path_prefix: ''
|
12 |
+
query_path: 'queries_with_intrinsics.txt'
|
13 |
+
gt_pose_path: 'queries_poses.txt'
|
14 |
+
|
15 |
+
|
16 |
+
KingsCollege:
|
17 |
+
n_cluster: 32
|
18 |
+
cluster_mode: 'xy'
|
19 |
+
cluster_method: 'birch'
|
20 |
+
|
21 |
+
training_sample_ratio: 1
|
22 |
+
eval_sample_ratio: 1
|
23 |
+
image_path_prefix: ''
|
24 |
+
|
25 |
+
query_path: 'queries_with_intrinsics.txt'
|
26 |
+
gt_pose_path: 'queries_poses.txt'
|
27 |
+
|
28 |
+
|
29 |
+
OldHospital:
|
30 |
+
n_cluster: 32
|
31 |
+
cluster_mode: 'xz'
|
32 |
+
cluster_method: 'birch'
|
33 |
+
|
34 |
+
training_sample_ratio: 1
|
35 |
+
eval_sample_ratio: 1
|
36 |
+
image_path_prefix: ''
|
37 |
+
query_path: 'queries_with_intrinsics.txt'
|
38 |
+
gt_pose_path: 'queries_poses.txt'
|
39 |
+
|
40 |
+
|
41 |
+
ShopFacade:
|
42 |
+
n_cluster: 32
|
43 |
+
cluster_mode: 'xy'
|
44 |
+
cluster_method: 'birch'
|
45 |
+
|
46 |
+
training_sample_ratio: 1
|
47 |
+
eval_sample_ratio: 1
|
48 |
+
image_path_prefix: ''
|
49 |
+
|
50 |
+
query_path: 'queries_with_intrinsics.txt'
|
51 |
+
gt_pose_path: 'queries_poses.txt'
|
52 |
+
|
53 |
+
|
54 |
+
StMarysChurch:
|
55 |
+
n_cluster: 32
|
56 |
+
cluster_mode: 'xz'
|
57 |
+
cluster_method: 'birch'
|
58 |
+
|
59 |
+
training_sample_ratio: 1
|
60 |
+
eval_sample_ratio: 1
|
61 |
+
image_path_prefix: ''
|
62 |
+
|
63 |
+
query_path: 'queries_with_intrinsics.txt'
|
64 |
+
gt_pose_path: 'queries_poses.txt'
|
65 |
+
|
66 |
+
|
67 |
+
|
third_party/pram/dataset/aachen.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> aachen
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 29/01/2024 14:33
|
7 |
+
=================================================='''
|
8 |
+
import os.path as osp
|
9 |
+
import numpy as np
|
10 |
+
import cv2
|
11 |
+
from colmap_utils.read_write_model import read_model
|
12 |
+
import torchvision.transforms as tvt
|
13 |
+
from dataset.basicdataset import BasicDataset
|
14 |
+
|
15 |
+
|
16 |
+
class Aachen(BasicDataset):
|
17 |
+
def __init__(self, landmark_path, scene, dataset_path, n_class, seg_mode, seg_method, dataset='Aachen',
|
18 |
+
nfeatures=1024,
|
19 |
+
query_p3d_fn=None,
|
20 |
+
train=True,
|
21 |
+
with_aug=False,
|
22 |
+
min_inliers=0,
|
23 |
+
max_inliers=4096,
|
24 |
+
random_inliers=False,
|
25 |
+
jitter_params=None,
|
26 |
+
scale_params=None,
|
27 |
+
image_dim=3,
|
28 |
+
query_info_path=None,
|
29 |
+
sample_ratio=1, ):
|
30 |
+
self.landmark_path = osp.join(landmark_path, scene)
|
31 |
+
self.dataset_path = osp.join(dataset_path, scene)
|
32 |
+
self.n_class = n_class
|
33 |
+
self.dataset = dataset + '/' + scene
|
34 |
+
self.nfeatures = nfeatures
|
35 |
+
self.with_aug = with_aug
|
36 |
+
self.jitter_params = jitter_params
|
37 |
+
self.scale_params = scale_params
|
38 |
+
self.image_dim = image_dim
|
39 |
+
self.train = train
|
40 |
+
self.min_inliers = min_inliers
|
41 |
+
self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
|
42 |
+
self.random_inliers = random_inliers
|
43 |
+
self.image_prefix = 'images/images_upright'
|
44 |
+
|
45 |
+
train_transforms = []
|
46 |
+
if self.with_aug:
|
47 |
+
train_transforms.append(tvt.ColorJitter(
|
48 |
+
brightness=jitter_params['brightness'],
|
49 |
+
contrast=jitter_params['contrast'],
|
50 |
+
saturation=jitter_params['saturation'],
|
51 |
+
hue=jitter_params['hue']))
|
52 |
+
if jitter_params['blur'] > 0:
|
53 |
+
train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
|
54 |
+
self.train_transforms = tvt.Compose(train_transforms)
|
55 |
+
|
56 |
+
if train:
|
57 |
+
self.cameras, self.images, point3Ds = read_model(path=osp.join(self.landmark_path, '3D-models'), ext='.bin')
|
58 |
+
self.name_to_id = {image.name: i for i, image in self.images.items() if len(self.images[i].point3D_ids) > 0}
|
59 |
+
|
60 |
+
# only for testing of query images
|
61 |
+
if not self.train:
|
62 |
+
data = np.load(query_p3d_fn, allow_pickle=True)[()]
|
63 |
+
self.img_p3d = data
|
64 |
+
else:
|
65 |
+
self.img_p3d = {}
|
66 |
+
|
67 |
+
self.img_fns = []
|
68 |
+
if train:
|
69 |
+
with open(osp.join(self.dataset_path, 'aachen_db_imglist.txt'), 'r') as f:
|
70 |
+
lines = f.readlines()
|
71 |
+
for l in lines:
|
72 |
+
l = l.strip()
|
73 |
+
if l not in self.name_to_id.keys():
|
74 |
+
continue
|
75 |
+
self.img_fns.append(l)
|
76 |
+
else:
|
77 |
+
with open(osp.join(self.dataset_path, 'queries', 'day_time_queries_with_intrinsics.txt'), 'r') as f:
|
78 |
+
lines = f.readlines()
|
79 |
+
for l in lines:
|
80 |
+
l = l.strip().split()[0]
|
81 |
+
if l not in self.img_p3d.keys():
|
82 |
+
continue
|
83 |
+
self.img_fns.append(l)
|
84 |
+
with open(osp.join(self.dataset_path, 'queries', 'night_time_queries_with_intrinsics.txt'), 'r') as f:
|
85 |
+
lines = f.readlines()
|
86 |
+
for l in lines:
|
87 |
+
l = l.strip().split()[0]
|
88 |
+
if l not in self.img_p3d.keys():
|
89 |
+
continue
|
90 |
+
self.img_fns.append(l)
|
91 |
+
|
92 |
+
print(
|
93 |
+
'Load {} images from {} for {}...'.format(len(self.img_fns), self.dataset, 'training' if train else 'eval'))
|
94 |
+
|
95 |
+
data = np.load(osp.join(self.landmark_path,
|
96 |
+
'point3D_cluster_n{:d}_{:s}_{:s}.npy'.format(n_class - 1, seg_mode, seg_method)),
|
97 |
+
allow_pickle=True)[()]
|
98 |
+
p3d_id = data['id']
|
99 |
+
seg_id = data['label']
|
100 |
+
self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
|
101 |
+
xyzs = data['xyz']
|
102 |
+
self.p3d_xyzs = {p3d_id[i]: xyzs[i] for i in range(p3d_id.shape[0])}
|
103 |
+
|
104 |
+
with open(osp.join(self.landmark_path, 'sc_mean_scale.txt'), 'r') as f:
|
105 |
+
lines = f.readlines()
|
106 |
+
for l in lines:
|
107 |
+
l = l.strip().split()
|
108 |
+
self.mean_xyz = np.array([float(v) for v in l[:3]])
|
109 |
+
self.scale_xyz = np.array([float(v) for v in l[3:]])
|
110 |
+
|
111 |
+
if not train:
|
112 |
+
self.query_info = self.read_query_info(path=query_info_path)
|
113 |
+
|
114 |
+
self.nfeatures = nfeatures
|
115 |
+
self.feature_dir = osp.join(self.landmark_path, 'feats')
|
116 |
+
self.feats = {}
|
117 |
+
|
118 |
+
def read_image(self, image_name):
|
119 |
+
return cv2.imread(osp.join(self.dataset_path, 'images/images_upright/', image_name))
|
third_party/pram/dataset/basicdataset.py
ADDED
@@ -0,0 +1,477 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> basicdataset
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 29/01/2024 14:27
|
7 |
+
=================================================='''
|
8 |
+
import torchvision.transforms.functional as tvf
|
9 |
+
import torchvision.transforms as tvt
|
10 |
+
import os.path as osp
|
11 |
+
import numpy as np
|
12 |
+
import cv2
|
13 |
+
from colmap_utils.read_write_model import qvec2rotmat, read_model
|
14 |
+
from dataset.utils import normalize_size
|
15 |
+
|
16 |
+
|
17 |
+
class BasicDataset:
|
18 |
+
def __init__(self,
|
19 |
+
img_list_fn,
|
20 |
+
feature_dir,
|
21 |
+
sfm_path,
|
22 |
+
seg_fn,
|
23 |
+
dataset_path,
|
24 |
+
n_class,
|
25 |
+
dataset,
|
26 |
+
nfeatures=1024,
|
27 |
+
query_p3d_fn=None,
|
28 |
+
train=True,
|
29 |
+
with_aug=False,
|
30 |
+
min_inliers=0,
|
31 |
+
max_inliers=4096,
|
32 |
+
random_inliers=False,
|
33 |
+
jitter_params=None,
|
34 |
+
scale_params=None,
|
35 |
+
image_dim=1,
|
36 |
+
pre_load=False,
|
37 |
+
query_info_path=None,
|
38 |
+
sc_mean_scale_fn=None,
|
39 |
+
):
|
40 |
+
self.n_class = n_class
|
41 |
+
self.train = train
|
42 |
+
self.min_inliers = min_inliers
|
43 |
+
self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
|
44 |
+
self.random_inliers = random_inliers
|
45 |
+
self.dataset_path = dataset_path
|
46 |
+
self.with_aug = with_aug
|
47 |
+
self.dataset = dataset
|
48 |
+
self.jitter_params = jitter_params
|
49 |
+
self.scale_params = scale_params
|
50 |
+
self.image_dim = image_dim
|
51 |
+
self.image_prefix = ''
|
52 |
+
|
53 |
+
train_transforms = []
|
54 |
+
if self.with_aug:
|
55 |
+
train_transforms.append(tvt.ColorJitter(
|
56 |
+
brightness=jitter_params['brightness'],
|
57 |
+
contrast=jitter_params['contrast'],
|
58 |
+
saturation=jitter_params['saturation'],
|
59 |
+
hue=jitter_params['hue']))
|
60 |
+
if jitter_params['blur'] > 0:
|
61 |
+
train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
|
62 |
+
self.train_transforms = tvt.Compose(train_transforms)
|
63 |
+
|
64 |
+
# only for testing of query images
|
65 |
+
if not self.train:
|
66 |
+
data = np.load(query_p3d_fn, allow_pickle=True)[()]
|
67 |
+
self.img_p3d = data
|
68 |
+
else:
|
69 |
+
self.img_p3d = {}
|
70 |
+
|
71 |
+
self.img_fns = []
|
72 |
+
with open(img_list_fn, 'r') as f:
|
73 |
+
lines = f.readlines()
|
74 |
+
for l in lines:
|
75 |
+
l = l.strip()
|
76 |
+
self.img_fns.append(l)
|
77 |
+
print('Load {} images from {} for {}...'.format(len(self.img_fns), dataset, 'training' if train else 'eval'))
|
78 |
+
self.feats = {}
|
79 |
+
if train:
|
80 |
+
self.cameras, self.images, point3Ds = read_model(path=sfm_path, ext='.bin')
|
81 |
+
self.name_to_id = {image.name: i for i, image in self.images.items()}
|
82 |
+
|
83 |
+
data = np.load(seg_fn, allow_pickle=True)[()]
|
84 |
+
p3d_id = data['id']
|
85 |
+
seg_id = data['label']
|
86 |
+
self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
|
87 |
+
self.p3d_xyzs = {}
|
88 |
+
|
89 |
+
for pid in self.p3d_seg.keys():
|
90 |
+
p3d = point3Ds[pid]
|
91 |
+
self.p3d_xyzs[pid] = p3d.xyz
|
92 |
+
|
93 |
+
with open(sc_mean_scale_fn, 'r') as f:
|
94 |
+
lines = f.readlines()
|
95 |
+
for l in lines:
|
96 |
+
l = l.strip().split()
|
97 |
+
self.mean_xyz = np.array([float(v) for v in l[:3]])
|
98 |
+
self.scale_xyz = np.array([float(v) for v in l[3:]])
|
99 |
+
|
100 |
+
if not train:
|
101 |
+
self.query_info = self.read_query_info(path=query_info_path)
|
102 |
+
|
103 |
+
self.nfeatures = nfeatures
|
104 |
+
self.feature_dir = feature_dir
|
105 |
+
print('Pre loaded {} feats, mean xyz {}, scale xyz {}'.format(len(self.feats.keys()), self.mean_xyz,
|
106 |
+
self.scale_xyz))
|
107 |
+
|
108 |
+
def normalize_p3ds(self, p3ds):
|
109 |
+
mean_p3ds = np.ceil(np.mean(p3ds, axis=0))
|
110 |
+
p3ds_ = p3ds - mean_p3ds
|
111 |
+
dx = np.max(abs(p3ds_[:, 0]))
|
112 |
+
dy = np.max(abs(p3ds_[:, 1]))
|
113 |
+
dz = np.max(abs(p3ds_[:, 2]))
|
114 |
+
scale_p3ds = np.ceil(np.array([dx, dy, dz], dtype=float).reshape(3, ))
|
115 |
+
scale_p3ds[scale_p3ds < 1] = 1
|
116 |
+
scale_p3ds[scale_p3ds == 0] = 1
|
117 |
+
return mean_p3ds, scale_p3ds
|
118 |
+
|
119 |
+
def read_query_info(self, path):
|
120 |
+
query_info = {}
|
121 |
+
with open(path, 'r') as f:
|
122 |
+
lines = f.readlines()
|
123 |
+
for l in lines:
|
124 |
+
l = l.strip().split()
|
125 |
+
image_name = l[0]
|
126 |
+
cam_model = l[1]
|
127 |
+
h, w = int(l[2]), int(l[3])
|
128 |
+
params = np.array([float(v) for v in l[4:]])
|
129 |
+
query_info[image_name] = {
|
130 |
+
'width': w,
|
131 |
+
'height': h,
|
132 |
+
'model': cam_model,
|
133 |
+
'params': params,
|
134 |
+
}
|
135 |
+
return query_info
|
136 |
+
|
137 |
+
def extract_intrinsic_extrinsic_params(self, image_id):
|
138 |
+
cam = self.cameras[self.images[image_id].camera_id]
|
139 |
+
params = cam.params
|
140 |
+
model = cam.model
|
141 |
+
if model in ("SIMPLE_PINHOLE", "SIMPLE_RADIAL", "RADIAL"):
|
142 |
+
fx = fy = params[0]
|
143 |
+
cx = params[1]
|
144 |
+
cy = params[2]
|
145 |
+
elif model in ("PINHOLE", "OPENCV", "OPENCV_FISHEYE", "FULL_OPENCV"):
|
146 |
+
fx = params[0]
|
147 |
+
fy = params[1]
|
148 |
+
cx = params[2]
|
149 |
+
cy = params[3]
|
150 |
+
else:
|
151 |
+
raise Exception("Camera model not supported")
|
152 |
+
K = np.eye(3, dtype=float)
|
153 |
+
K[0, 0] = fx
|
154 |
+
K[1, 1] = fy
|
155 |
+
K[0, 2] = cx
|
156 |
+
K[1, 2] = cy
|
157 |
+
|
158 |
+
qvec = self.images[image_id].qvec
|
159 |
+
tvec = self.images[image_id].tvec
|
160 |
+
R = qvec2rotmat(qvec=qvec)
|
161 |
+
P = np.eye(4, dtype=float)
|
162 |
+
P[:3, :3] = R
|
163 |
+
P[:3, 3] = tvec.reshape(3, )
|
164 |
+
|
165 |
+
return {'K': K, 'P': P}
|
166 |
+
|
167 |
+
def get_item_train(self, idx):
|
168 |
+
img_name = self.img_fns[idx]
|
169 |
+
if img_name in self.feats.keys():
|
170 |
+
feat_data = self.feats[img_name]
|
171 |
+
else:
|
172 |
+
feat_data = np.load(osp.join(self.feature_dir, img_name.replace('/', '+') + '.npy'), allow_pickle=True)[()]
|
173 |
+
# descs = feat_data['descriptors'] # [N, D]
|
174 |
+
scores = feat_data['scores'] # [N, 1]
|
175 |
+
kpts = feat_data['keypoints'] # [N, 2]
|
176 |
+
image_size = feat_data['image_size']
|
177 |
+
|
178 |
+
nfeat = kpts.shape[0]
|
179 |
+
|
180 |
+
# print(img_name, self.name_to_id[img_name])
|
181 |
+
p3d_ids = self.images[self.name_to_id[img_name]].point3D_ids
|
182 |
+
p3d_xyzs = np.zeros(shape=(nfeat, 3), dtype=float)
|
183 |
+
|
184 |
+
seg_ids = np.zeros(shape=(nfeat,), dtype=int) # + self.n_class - 1
|
185 |
+
for i in range(nfeat):
|
186 |
+
p3d = p3d_ids[i]
|
187 |
+
if p3d in self.p3d_seg.keys():
|
188 |
+
seg_ids[i] = self.p3d_seg[p3d] + 1 # 0 for invalid
|
189 |
+
if seg_ids[i] == -1:
|
190 |
+
seg_ids[i] = 0
|
191 |
+
|
192 |
+
if p3d in self.p3d_xyzs.keys():
|
193 |
+
p3d_xyzs[i] = self.p3d_xyzs[p3d]
|
194 |
+
|
195 |
+
seg_ids = np.array(seg_ids).reshape(-1, )
|
196 |
+
|
197 |
+
n_inliers = np.sum(seg_ids > 0)
|
198 |
+
n_outliers = np.sum(seg_ids == 0)
|
199 |
+
inlier_ids = np.where(seg_ids > 0)[0]
|
200 |
+
outlier_ids = np.where(seg_ids == 0)[0]
|
201 |
+
|
202 |
+
if n_inliers <= self.min_inliers:
|
203 |
+
sel_inliers = n_inliers
|
204 |
+
sel_outliers = self.nfeatures - sel_inliers
|
205 |
+
|
206 |
+
out_ids = np.arange(n_outliers)
|
207 |
+
np.random.shuffle(out_ids)
|
208 |
+
sel_ids = np.hstack([inlier_ids, outlier_ids[out_ids[:self.nfeatures - n_inliers]]])
|
209 |
+
else:
|
210 |
+
sel_inliers = np.random.randint(self.min_inliers, self.max_inliers)
|
211 |
+
if sel_inliers > n_inliers:
|
212 |
+
sel_inliers = n_inliers
|
213 |
+
|
214 |
+
if sel_inliers + n_outliers < self.nfeatures:
|
215 |
+
sel_inliers = self.nfeatures - n_outliers
|
216 |
+
|
217 |
+
sel_outliers = self.nfeatures - sel_inliers
|
218 |
+
|
219 |
+
in_ids = np.arange(n_inliers)
|
220 |
+
np.random.shuffle(in_ids)
|
221 |
+
sel_inlier_ids = inlier_ids[in_ids[:sel_inliers]]
|
222 |
+
|
223 |
+
out_ids = np.arange(n_outliers)
|
224 |
+
np.random.shuffle(out_ids)
|
225 |
+
sel_outlier_ids = outlier_ids[out_ids[:sel_outliers]]
|
226 |
+
|
227 |
+
sel_ids = np.hstack([sel_inlier_ids, sel_outlier_ids])
|
228 |
+
|
229 |
+
# sel_descs = descs[sel_ids]
|
230 |
+
sel_scores = scores[sel_ids]
|
231 |
+
sel_kpts = kpts[sel_ids]
|
232 |
+
sel_seg_ids = seg_ids[sel_ids]
|
233 |
+
sel_xyzs = p3d_xyzs[sel_ids]
|
234 |
+
|
235 |
+
shuffle_ids = np.arange(sel_ids.shape[0])
|
236 |
+
np.random.shuffle(shuffle_ids)
|
237 |
+
# sel_descs = sel_descs[shuffle_ids]
|
238 |
+
sel_scores = sel_scores[shuffle_ids]
|
239 |
+
sel_kpts = sel_kpts[shuffle_ids]
|
240 |
+
sel_seg_ids = sel_seg_ids[shuffle_ids]
|
241 |
+
sel_xyzs = sel_xyzs[shuffle_ids]
|
242 |
+
|
243 |
+
if sel_kpts.shape[0] < self.nfeatures:
|
244 |
+
# print(sel_descs.shape, sel_kpts.shape, sel_scores.shape, sel_seg_ids.shape, sel_xyzs.shape)
|
245 |
+
valid_sel_ids = np.array([v for v in range(sel_kpts.shape[0]) if sel_seg_ids[v] > 0], dtype=int)
|
246 |
+
# ref_sel_id = np.random.choice(valid_sel_ids, size=1)[0]
|
247 |
+
if valid_sel_ids.shape[0] == 0:
|
248 |
+
valid_sel_ids = np.array([v for v in range(sel_kpts.shape[0])], dtype=int)
|
249 |
+
random_n = self.nfeatures - sel_kpts.shape[0]
|
250 |
+
random_scores = np.random.random((random_n,))
|
251 |
+
random_kpts, random_seg_ids, random_xyzs = self.random_points_from_reference(
|
252 |
+
n=random_n,
|
253 |
+
ref_kpts=sel_kpts[valid_sel_ids],
|
254 |
+
ref_segs=sel_seg_ids[valid_sel_ids],
|
255 |
+
ref_xyzs=sel_xyzs[valid_sel_ids],
|
256 |
+
radius=5,
|
257 |
+
)
|
258 |
+
# sel_descs = np.vstack([sel_descs, random_descs])
|
259 |
+
sel_scores = np.hstack([sel_scores, random_scores])
|
260 |
+
sel_kpts = np.vstack([sel_kpts, random_kpts])
|
261 |
+
sel_seg_ids = np.hstack([sel_seg_ids, random_seg_ids])
|
262 |
+
sel_xyzs = np.vstack([sel_xyzs, random_xyzs])
|
263 |
+
|
264 |
+
gt_n_seg = np.zeros(shape=(self.n_class,), dtype=int)
|
265 |
+
gt_cls = np.zeros(shape=(self.n_class,), dtype=int)
|
266 |
+
gt_cls_dist = np.zeros(shape=(self.n_class,), dtype=float)
|
267 |
+
uids = np.unique(sel_seg_ids).tolist()
|
268 |
+
for uid in uids:
|
269 |
+
if uid == 0:
|
270 |
+
continue
|
271 |
+
gt_cls[uid] = 1
|
272 |
+
gt_n_seg[uid] = np.sum(sel_seg_ids == uid)
|
273 |
+
gt_cls_dist[uid] = np.sum(seg_ids == uid) / np.sum(seg_ids > 0) # [valid_id / total_valid_id]
|
274 |
+
|
275 |
+
param_out = self.extract_intrinsic_extrinsic_params(image_id=self.name_to_id[img_name])
|
276 |
+
|
277 |
+
img = self.read_image(image_name=img_name)
|
278 |
+
image_size = img.shape[:2]
|
279 |
+
if self.image_dim == 1:
|
280 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
281 |
+
else:
|
282 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
283 |
+
if self.with_aug:
|
284 |
+
nh = img.shape[0]
|
285 |
+
nw = img.shape[1]
|
286 |
+
if self.scale_params is not None:
|
287 |
+
do_scale = np.random.random()
|
288 |
+
if do_scale <= 0.25:
|
289 |
+
p = np.random.randint(0, 11)
|
290 |
+
s = self.scale_params[0] + (self.scale_params[1] - self.scale_params[0]) / 10 * p
|
291 |
+
nh = int(img.shape[0] * s)
|
292 |
+
nw = int(img.shape[1] * s)
|
293 |
+
sh = nh / img.shape[0]
|
294 |
+
sw = nw / img.shape[1]
|
295 |
+
sel_kpts[:, 0] = sel_kpts[:, 0] * sw
|
296 |
+
sel_kpts[:, 1] = sel_kpts[:, 1] * sh
|
297 |
+
img = cv2.resize(img, dsize=(nw, nh))
|
298 |
+
|
299 |
+
brightness = np.random.uniform(-self.jitter_params['brightness'], self.jitter_params['brightness']) * 255
|
300 |
+
contrast = 1 + np.random.uniform(-self.jitter_params['contrast'], self.jitter_params['contrast'])
|
301 |
+
img = cv2.addWeighted(img, contrast, img, 0, brightness)
|
302 |
+
img = np.clip(img, a_min=0, a_max=255)
|
303 |
+
if self.image_dim == 1:
|
304 |
+
img = img[..., None]
|
305 |
+
img = img.astype(float) / 255.
|
306 |
+
image_size = np.array([nh, nw], dtype=int)
|
307 |
+
else:
|
308 |
+
if self.image_dim == 1:
|
309 |
+
img = img[..., None].astype(float) / 255.
|
310 |
+
|
311 |
+
output = {
|
312 |
+
# 'descriptors': sel_descs, # may not be used
|
313 |
+
'scores': sel_scores,
|
314 |
+
'keypoints': sel_kpts,
|
315 |
+
'norm_keypoints': normalize_size(x=sel_kpts, size=image_size),
|
316 |
+
'image': [img],
|
317 |
+
'gt_seg': sel_seg_ids,
|
318 |
+
'gt_cls': gt_cls,
|
319 |
+
'gt_cls_dist': gt_cls_dist,
|
320 |
+
'gt_n_seg': gt_n_seg,
|
321 |
+
'file_name': img_name,
|
322 |
+
'prefix_name': self.image_prefix,
|
323 |
+
# 'mean_xyz': self.mean_xyz,
|
324 |
+
# 'scale_xyz': self.scale_xyz,
|
325 |
+
# 'gt_sc': sel_xyzs,
|
326 |
+
# 'gt_norm_sc': (sel_xyzs - self.mean_xyz) / self.scale_xyz,
|
327 |
+
'K': param_out['K'],
|
328 |
+
'gt_P': param_out['P']
|
329 |
+
}
|
330 |
+
return output
|
331 |
+
|
332 |
+
def get_item_test(self, idx):
|
333 |
+
|
334 |
+
# evaluation of recognition only
|
335 |
+
img_name = self.img_fns[idx]
|
336 |
+
feat_data = np.load(osp.join(self.feature_dir, img_name.replace('/', '+') + '.npy'), allow_pickle=True)[()]
|
337 |
+
descs = feat_data['descriptors'] # [N, D]
|
338 |
+
scores = feat_data['scores'] # [N, 1]
|
339 |
+
kpts = feat_data['keypoints'] # [N, 2]
|
340 |
+
image_size = feat_data['image_size']
|
341 |
+
|
342 |
+
nfeat = descs.shape[0]
|
343 |
+
|
344 |
+
if img_name in self.img_p3d.keys():
|
345 |
+
p3d_ids = self.img_p3d[img_name]
|
346 |
+
p3d_xyzs = np.zeros(shape=(nfeat, 3), dtype=float)
|
347 |
+
seg_ids = np.zeros(shape=(nfeat,), dtype=int) # attention! by default invalid!!!
|
348 |
+
for i in range(nfeat):
|
349 |
+
p3d = p3d_ids[i]
|
350 |
+
if p3d in self.p3d_seg.keys():
|
351 |
+
seg_ids[i] = self.p3d_seg[p3d] + 1
|
352 |
+
if seg_ids[i] == -1:
|
353 |
+
seg_ids[i] = 0 # 0 for in valid
|
354 |
+
|
355 |
+
if p3d in self.p3d_xyzs.keys():
|
356 |
+
p3d_xyzs[i] = self.p3d_xyzs[p3d]
|
357 |
+
|
358 |
+
seg_ids = np.array(seg_ids).reshape(-1, )
|
359 |
+
|
360 |
+
if self.nfeatures > 0:
|
361 |
+
sorted_ids = np.argsort(scores)[::-1][:self.nfeatures] # large to small
|
362 |
+
descs = descs[sorted_ids]
|
363 |
+
scores = scores[sorted_ids]
|
364 |
+
kpts = kpts[sorted_ids]
|
365 |
+
p3d_xyzs = p3d_xyzs[sorted_ids]
|
366 |
+
|
367 |
+
seg_ids = seg_ids[sorted_ids]
|
368 |
+
|
369 |
+
gt_n_seg = np.zeros(shape=(self.n_class,), dtype=int)
|
370 |
+
gt_cls = np.zeros(shape=(self.n_class,), dtype=int)
|
371 |
+
gt_cls_dist = np.zeros(shape=(self.n_class,), dtype=float)
|
372 |
+
uids = np.unique(seg_ids).tolist()
|
373 |
+
for uid in uids:
|
374 |
+
if uid == 0:
|
375 |
+
continue
|
376 |
+
gt_cls[uid] = 1
|
377 |
+
gt_n_seg[uid] = np.sum(seg_ids == uid)
|
378 |
+
gt_cls_dist[uid] = np.sum(seg_ids == uid) / np.sum(
|
379 |
+
seg_ids < self.n_class - 1) # [valid_id / total_valid_id]
|
380 |
+
|
381 |
+
gt_cls[0] = 0
|
382 |
+
|
383 |
+
img = self.read_image(image_name=img_name)
|
384 |
+
if self.image_dim == 1:
|
385 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
386 |
+
img = img[..., None].astype(float) / 255.
|
387 |
+
else:
|
388 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(float) / 255.
|
389 |
+
return {
|
390 |
+
'descriptors': descs,
|
391 |
+
'scores': scores,
|
392 |
+
'keypoints': kpts,
|
393 |
+
'image_size': image_size,
|
394 |
+
'norm_keypoints': normalize_size(x=kpts, size=image_size),
|
395 |
+
'gt_seg': seg_ids,
|
396 |
+
'gt_cls': gt_cls,
|
397 |
+
'gt_cls_dist': gt_cls_dist,
|
398 |
+
'gt_n_seg': gt_n_seg,
|
399 |
+
'file_name': img_name,
|
400 |
+
'prefix_name': self.image_prefix,
|
401 |
+
'image': [img],
|
402 |
+
|
403 |
+
'mean_xyz': self.mean_xyz,
|
404 |
+
'scale_xyz': self.scale_xyz,
|
405 |
+
'gt_sc': p3d_xyzs,
|
406 |
+
'gt_norm_sc': (p3d_xyzs - self.mean_xyz) / self.scale_xyz
|
407 |
+
}
|
408 |
+
|
409 |
+
def __getitem__(self, idx):
|
410 |
+
if self.train:
|
411 |
+
return self.get_item_train(idx=idx)
|
412 |
+
else:
|
413 |
+
return self.get_item_test(idx=idx)
|
414 |
+
|
415 |
+
def __len__(self):
|
416 |
+
return len(self.img_fns)
|
417 |
+
|
418 |
+
def read_image(self, image_name):
|
419 |
+
return cv2.imread(osp.join(self.dataset_path, image_name))
|
420 |
+
|
421 |
+
def jitter_augmentation(self, img, params):
|
422 |
+
brightness, contrast, saturation, hue = params
|
423 |
+
p = np.random.randint(0, 20) / 20
|
424 |
+
b = brightness[0] + (brightness[1] - brightness[0]) / 20 * p
|
425 |
+
img = tvf.adjust_brightness(img=img, brightness_factor=b)
|
426 |
+
|
427 |
+
p = np.random.randint(0, 20) / 20
|
428 |
+
c = contrast[0] + (contrast[1] - contrast[0]) / 20 * p
|
429 |
+
img = tvf.adjust_contrast(img=img, contrast_factor=c)
|
430 |
+
|
431 |
+
p = np.random.randint(0, 20) / 20
|
432 |
+
s = saturation[0] + (saturation[1] - saturation[0]) / 20 * p
|
433 |
+
img = tvf.adjust_saturation(img=img, saturation_factor=s)
|
434 |
+
|
435 |
+
p = np.random.randint(0, 20) / 20
|
436 |
+
h = hue[0] + (hue[1] - hue[0]) / 20 * p
|
437 |
+
img = tvf.adjust_hue(img=img, hue_factor=h)
|
438 |
+
|
439 |
+
return img
|
440 |
+
|
441 |
+
def random_points(self, n, d, h, w):
|
442 |
+
desc = np.random.random((n, d))
|
443 |
+
desc = desc / np.linalg.norm(desc, ord=2, axis=1)[..., None]
|
444 |
+
xs = np.random.randint(0, w - 1, size=(n, 1))
|
445 |
+
ys = np.random.randint(0, h - 1, size=(n, 1))
|
446 |
+
kpts = np.hstack([xs, ys])
|
447 |
+
return desc, kpts
|
448 |
+
|
449 |
+
def random_points_from_reference(self, n, ref_kpts, ref_segs, ref_xyzs, radius=5):
|
450 |
+
n_ref = ref_kpts.shape[0]
|
451 |
+
if n_ref < n:
|
452 |
+
ref_ids = np.random.choice([i for i in range(n_ref)], size=n).tolist()
|
453 |
+
else:
|
454 |
+
ref_ids = [i for i in range(n)]
|
455 |
+
|
456 |
+
new_xs = []
|
457 |
+
new_ys = []
|
458 |
+
# new_descs = []
|
459 |
+
new_segs = []
|
460 |
+
new_xyzs = []
|
461 |
+
for i in ref_ids:
|
462 |
+
nx = np.random.randint(-radius, radius) + ref_kpts[i, 0]
|
463 |
+
ny = np.random.randint(-radius, radius) + ref_kpts[i, 1]
|
464 |
+
|
465 |
+
new_xs.append(nx)
|
466 |
+
new_ys.append(ny)
|
467 |
+
# new_descs.append(ref_descs[i])
|
468 |
+
new_segs.append(ref_segs[i])
|
469 |
+
new_xyzs.append(ref_xyzs[i])
|
470 |
+
|
471 |
+
new_xs = np.array(new_xs).reshape(n, 1)
|
472 |
+
new_ys = np.array(new_ys).reshape(n, 1)
|
473 |
+
new_segs = np.array(new_segs).reshape(n, )
|
474 |
+
new_kpts = np.hstack([new_xs, new_ys])
|
475 |
+
# new_descs = np.array(new_descs).reshape(n, -1)
|
476 |
+
new_xyzs = np.array(new_xyzs)
|
477 |
+
return new_kpts, new_segs, new_xyzs
|
third_party/pram/dataset/cambridge_landmarks.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> cambridge_landmarks
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 29/01/2024 14:41
|
7 |
+
=================================================='''
|
8 |
+
import os.path as osp
|
9 |
+
import numpy as np
|
10 |
+
from colmap_utils.read_write_model import read_model
|
11 |
+
import torchvision.transforms as tvt
|
12 |
+
from dataset.basicdataset import BasicDataset
|
13 |
+
|
14 |
+
|
15 |
+
class CambridgeLandmarks(BasicDataset):
|
16 |
+
def __init__(self, landmark_path, scene, dataset_path, n_class, seg_mode, seg_method, dataset='CambridgeLandmarks',
|
17 |
+
nfeatures=1024,
|
18 |
+
query_p3d_fn=None,
|
19 |
+
train=True,
|
20 |
+
with_aug=False,
|
21 |
+
min_inliers=0,
|
22 |
+
max_inliers=4096,
|
23 |
+
random_inliers=False,
|
24 |
+
jitter_params=None,
|
25 |
+
scale_params=None,
|
26 |
+
image_dim=3,
|
27 |
+
query_info_path=None,
|
28 |
+
sample_ratio=1,
|
29 |
+
):
|
30 |
+
self.landmark_path = osp.join(landmark_path, scene)
|
31 |
+
self.dataset_path = osp.join(dataset_path, scene)
|
32 |
+
self.n_class = n_class
|
33 |
+
self.dataset = dataset + '/' + scene
|
34 |
+
self.nfeatures = nfeatures
|
35 |
+
self.with_aug = with_aug
|
36 |
+
self.jitter_params = jitter_params
|
37 |
+
self.scale_params = scale_params
|
38 |
+
self.image_dim = image_dim
|
39 |
+
self.train = train
|
40 |
+
self.min_inliers = min_inliers
|
41 |
+
self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
|
42 |
+
self.random_inliers = random_inliers
|
43 |
+
self.image_prefix = ''
|
44 |
+
train_transforms = []
|
45 |
+
if self.with_aug:
|
46 |
+
train_transforms.append(tvt.ColorJitter(
|
47 |
+
brightness=jitter_params['brightness'],
|
48 |
+
contrast=jitter_params['contrast'],
|
49 |
+
saturation=jitter_params['saturation'],
|
50 |
+
hue=jitter_params['hue']))
|
51 |
+
if jitter_params['blur'] > 0:
|
52 |
+
train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
|
53 |
+
self.train_transforms = tvt.Compose(train_transforms)
|
54 |
+
|
55 |
+
if train:
|
56 |
+
self.cameras, self.images, point3Ds = read_model(path=osp.join(self.landmark_path, '3D-models'), ext='.bin')
|
57 |
+
self.name_to_id = {image.name: i for i, image in self.images.items() if len(self.images[i].point3D_ids) > 0}
|
58 |
+
|
59 |
+
# only for testing of query images
|
60 |
+
if not self.train:
|
61 |
+
data = np.load(query_p3d_fn, allow_pickle=True)[()]
|
62 |
+
self.img_p3d = data
|
63 |
+
else:
|
64 |
+
self.img_p3d = {}
|
65 |
+
|
66 |
+
self.img_fns = []
|
67 |
+
with open(osp.join(self.dataset_path, 'dataset_train.txt' if train else 'dataset_test.txt'), 'r') as f:
|
68 |
+
lines = f.readlines()[3:] # ignore the first 3 lines
|
69 |
+
for l in lines:
|
70 |
+
l = l.strip().split()[0]
|
71 |
+
if train and l not in self.name_to_id.keys():
|
72 |
+
continue
|
73 |
+
if not train and l not in self.img_p3d.keys():
|
74 |
+
continue
|
75 |
+
self.img_fns.append(l)
|
76 |
+
|
77 |
+
print('Load {} images from {} for {}...'.format(len(self.img_fns),
|
78 |
+
self.dataset, 'training' if train else 'eval'))
|
79 |
+
|
80 |
+
data = np.load(osp.join(self.landmark_path,
|
81 |
+
'point3D_cluster_n{:d}_{:s}_{:s}.npy'.format(n_class - 1, seg_mode, seg_method)),
|
82 |
+
allow_pickle=True)[()]
|
83 |
+
p3d_id = data['id']
|
84 |
+
seg_id = data['label']
|
85 |
+
self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
|
86 |
+
xyzs = data['xyz']
|
87 |
+
self.p3d_xyzs = {p3d_id[i]: xyzs[i] for i in range(p3d_id.shape[0])}
|
88 |
+
|
89 |
+
# with open(osp.join(self.landmark_path, 'sc_mean_scale.txt'), 'r') as f:
|
90 |
+
# lines = f.readlines()
|
91 |
+
# for l in lines:
|
92 |
+
# l = l.strip().split()
|
93 |
+
# self.mean_xyz = np.array([float(v) for v in l[:3]])
|
94 |
+
# self.scale_xyz = np.array([float(v) for v in l[3:]])
|
95 |
+
|
96 |
+
if not train:
|
97 |
+
self.query_info = self.read_query_info(path=query_info_path)
|
98 |
+
|
99 |
+
self.nfeatures = nfeatures
|
100 |
+
self.feature_dir = osp.join(self.landmark_path, 'feats')
|
101 |
+
self.feats = {}
|
third_party/pram/dataset/customdataset.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> customdataset.py
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 29/01/2024 14:38
|
7 |
+
=================================================='''
|
8 |
+
import os.path as osp
|
9 |
+
import numpy as np
|
10 |
+
from colmap_utils.read_write_model import read_model
|
11 |
+
import torchvision.transforms as tvt
|
12 |
+
from dataset.basicdataset import BasicDataset
|
13 |
+
|
14 |
+
|
15 |
+
class CustomDataset(BasicDataset):
|
16 |
+
def __init__(self, landmark_path, scene, dataset_path, n_class, seg_mode, seg_method, dataset,
|
17 |
+
nfeatures=1024,
|
18 |
+
query_p3d_fn=None,
|
19 |
+
train=True,
|
20 |
+
with_aug=False,
|
21 |
+
min_inliers=0,
|
22 |
+
max_inliers=4096,
|
23 |
+
random_inliers=False,
|
24 |
+
jitter_params=None,
|
25 |
+
scale_params=None,
|
26 |
+
image_dim=3,
|
27 |
+
query_info_path=None,
|
28 |
+
sample_ratio=1,
|
29 |
+
):
|
30 |
+
self.landmark_path = osp.join(landmark_path, scene)
|
31 |
+
self.dataset_path = osp.join(dataset_path, scene)
|
32 |
+
self.n_class = n_class
|
33 |
+
self.dataset = dataset + '/' + scene
|
34 |
+
self.nfeatures = nfeatures
|
35 |
+
self.with_aug = with_aug
|
36 |
+
self.jitter_params = jitter_params
|
37 |
+
self.scale_params = scale_params
|
38 |
+
self.image_dim = image_dim
|
39 |
+
self.train = train
|
40 |
+
self.min_inliers = min_inliers
|
41 |
+
self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
|
42 |
+
self.random_inliers = random_inliers
|
43 |
+
self.image_prefix = ''
|
44 |
+
|
45 |
+
train_transforms = []
|
46 |
+
if self.with_aug:
|
47 |
+
train_transforms.append(tvt.ColorJitter(
|
48 |
+
brightness=jitter_params['brightness'],
|
49 |
+
contrast=jitter_params['contrast'],
|
50 |
+
saturation=jitter_params['saturation'],
|
51 |
+
hue=jitter_params['hue']))
|
52 |
+
if jitter_params['blur'] > 0:
|
53 |
+
train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
|
54 |
+
self.train_transforms = tvt.Compose(train_transforms)
|
55 |
+
|
56 |
+
if train:
|
57 |
+
self.cameras, self.images, point3Ds = read_model(path=osp.join(self.landmark_path, '3D-models'), ext='.bin')
|
58 |
+
self.name_to_id = {image.name: i for i, image in self.images.items() if len(self.images[i].point3D_ids) > 0}
|
59 |
+
|
60 |
+
# only for testing of query images
|
61 |
+
if not self.train:
|
62 |
+
data = np.load(query_p3d_fn, allow_pickle=True)[()]
|
63 |
+
self.img_p3d = data
|
64 |
+
else:
|
65 |
+
self.img_p3d = {}
|
66 |
+
|
67 |
+
if train:
|
68 |
+
self.img_fns = [self.images[v].name for v in self.images.keys() if
|
69 |
+
self.images[v].name in self.name_to_id.keys()]
|
70 |
+
else:
|
71 |
+
self.img_fns = []
|
72 |
+
with open(osp.join(self.dataset_path, 'queries_with_intrinsics.txt'), 'r') as f:
|
73 |
+
lines = f.readlines()
|
74 |
+
for l in lines:
|
75 |
+
self.img_fns.append(l.strip().split()[0])
|
76 |
+
print('Load {} images from {} for {}...'.format(len(self.img_fns),
|
77 |
+
self.dataset, 'training' if train else 'eval'))
|
78 |
+
|
79 |
+
data = np.load(osp.join(self.landmark_path,
|
80 |
+
'point3D_cluster_n{:d}_{:s}_{:s}.npy'.format(n_class - 1, seg_mode, seg_method)),
|
81 |
+
allow_pickle=True)[()]
|
82 |
+
p3d_id = data['id']
|
83 |
+
seg_id = data['label']
|
84 |
+
self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
|
85 |
+
xyzs = data['xyz']
|
86 |
+
self.p3d_xyzs = {p3d_id[i]: xyzs[i] for i in range(p3d_id.shape[0])}
|
87 |
+
|
88 |
+
if not train:
|
89 |
+
self.query_info = self.read_query_info(path=query_info_path)
|
90 |
+
|
91 |
+
self.nfeatures = nfeatures
|
92 |
+
self.feature_dir = osp.join(self.landmark_path, 'feats')
|
93 |
+
self.feats = {}
|
third_party/pram/dataset/get_dataset.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> get_dataset
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 29/01/2024 14:40
|
7 |
+
=================================================='''
|
8 |
+
import os.path as osp
|
9 |
+
import yaml
|
10 |
+
from dataset.aachen import Aachen
|
11 |
+
from dataset.twelve_scenes import TwelveScenes
|
12 |
+
from dataset.seven_scenes import SevenScenes
|
13 |
+
from dataset.cambridge_landmarks import CambridgeLandmarks
|
14 |
+
from dataset.customdataset import CustomDataset
|
15 |
+
from dataset.recdataset import RecDataset
|
16 |
+
|
17 |
+
|
18 |
+
def get_dataset(dataset):
|
19 |
+
if dataset in ['7Scenes', 'S']:
|
20 |
+
return SevenScenes
|
21 |
+
elif dataset in ['12Scenes', 'T']:
|
22 |
+
return TwelveScenes
|
23 |
+
elif dataset in ['Aachen', 'A']:
|
24 |
+
return Aachen
|
25 |
+
elif dataset in ['CambridgeLandmarks', 'C']:
|
26 |
+
return CambridgeLandmarks
|
27 |
+
else:
|
28 |
+
return CustomDataset
|
29 |
+
|
30 |
+
|
31 |
+
def compose_datasets(datasets, config, train=True, sample_ratio=None):
|
32 |
+
sub_sets = []
|
33 |
+
for name in datasets:
|
34 |
+
if name == 'S':
|
35 |
+
ds_name = '7Scenes'
|
36 |
+
elif name == 'T':
|
37 |
+
ds_name = '12Scenes'
|
38 |
+
elif name == 'A':
|
39 |
+
ds_name = 'Aachen'
|
40 |
+
elif name == 'R':
|
41 |
+
ds_name = 'RobotCar-Seasons'
|
42 |
+
elif name == 'C':
|
43 |
+
ds_name = 'CambridgeLandmarks'
|
44 |
+
else:
|
45 |
+
ds_name = name
|
46 |
+
# raise '{} dataset does not exist'.format(name)
|
47 |
+
landmark_path = osp.join(config['landmark_path'], ds_name)
|
48 |
+
dataset_path = osp.join(config['dataset_path'], ds_name)
|
49 |
+
scene_config_path = 'configs/datasets/{:s}.yaml'.format(ds_name)
|
50 |
+
|
51 |
+
with open(scene_config_path, 'r') as f:
|
52 |
+
scene_config = yaml.load(f, Loader=yaml.Loader)
|
53 |
+
DSet = get_dataset(dataset=ds_name)
|
54 |
+
|
55 |
+
for scene in scene_config['scenes']:
|
56 |
+
if sample_ratio is None:
|
57 |
+
scene_sample_ratio = scene_config[scene]['training_sample_ratio'] if train else scene_config[scene][
|
58 |
+
'eval_sample_ratio']
|
59 |
+
else:
|
60 |
+
scene_sample_ratio = sample_ratio
|
61 |
+
scene_set = DSet(landmark_path=landmark_path,
|
62 |
+
dataset_path=dataset_path,
|
63 |
+
scene=scene,
|
64 |
+
seg_mode=scene_config[scene]['cluster_mode'],
|
65 |
+
seg_method=scene_config[scene]['cluster_method'],
|
66 |
+
n_class=scene_config[scene]['n_cluster'] + 1, # including invalid - 0
|
67 |
+
dataset=ds_name,
|
68 |
+
train=train,
|
69 |
+
nfeatures=config['max_keypoints'] if train else config['eval_max_keypoints'],
|
70 |
+
min_inliers=config['min_inliers'],
|
71 |
+
max_inliers=config['max_inliers'],
|
72 |
+
random_inliers=config['random_inliers'],
|
73 |
+
with_aug=config['with_aug'],
|
74 |
+
jitter_params=config['jitter_params'],
|
75 |
+
scale_params=config['scale_params'],
|
76 |
+
image_dim=config['image_dim'],
|
77 |
+
query_p3d_fn=osp.join(config['landmark_path'], ds_name, scene,
|
78 |
+
'point3D_query_n{:d}_{:s}_{:s}.npy'.format(
|
79 |
+
scene_config[scene]['n_cluster'],
|
80 |
+
scene_config[scene]['cluster_mode'],
|
81 |
+
scene_config[scene]['cluster_method'])),
|
82 |
+
query_info_path=osp.join(config['dataset_path'], ds_name, scene,
|
83 |
+
'queries_with_intrinsics.txt'),
|
84 |
+
sample_ratio=scene_sample_ratio,
|
85 |
+
)
|
86 |
+
|
87 |
+
sub_sets.append(scene_set)
|
88 |
+
|
89 |
+
return RecDataset(sub_sets=sub_sets)
|
third_party/pram/dataset/recdataset.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> recdataset
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 29/01/2024 14:42
|
7 |
+
=================================================='''
|
8 |
+
import numpy as np
|
9 |
+
from torch.utils.data import Dataset
|
10 |
+
|
11 |
+
|
12 |
+
class RecDataset(Dataset):
|
13 |
+
def __init__(self, sub_sets=[]):
|
14 |
+
assert len(sub_sets) >= 1
|
15 |
+
|
16 |
+
self.sub_sets = sub_sets
|
17 |
+
self.names = []
|
18 |
+
|
19 |
+
self.sub_set_index = []
|
20 |
+
self.seg_offsets = []
|
21 |
+
self.sub_set_item_index = []
|
22 |
+
self.dataset_names = []
|
23 |
+
self.scene_names = []
|
24 |
+
start_index_valid_seg = 1 # start from 1, 0 is for invalid
|
25 |
+
|
26 |
+
total_subset = 0
|
27 |
+
for scene_set in sub_sets: # [0, n_class]
|
28 |
+
name = scene_set.dataset
|
29 |
+
self.names.append(name)
|
30 |
+
n_samples = len(scene_set)
|
31 |
+
|
32 |
+
n_class = scene_set.n_class
|
33 |
+
self.seg_offsets = self.seg_offsets + [start_index_valid_seg for v in range(len(scene_set))]
|
34 |
+
start_index_valid_seg = start_index_valid_seg + n_class - 1
|
35 |
+
|
36 |
+
self.sub_set_index = self.sub_set_index + [total_subset for k in range(n_samples)]
|
37 |
+
self.sub_set_item_index = self.sub_set_item_index + [k for k in range(n_samples)]
|
38 |
+
|
39 |
+
# self.dataset_names = self.dataset_names + [name for k in range(n_samples)]
|
40 |
+
self.scene_names = self.scene_names + [name for k in range(n_samples)]
|
41 |
+
total_subset += 1
|
42 |
+
|
43 |
+
self.n_class = start_index_valid_seg
|
44 |
+
|
45 |
+
print('Load {} images {} segs from {} subsets from {}'.format(len(self.sub_set_item_index), self.n_class,
|
46 |
+
len(sub_sets), self.names))
|
47 |
+
|
48 |
+
def __len__(self):
|
49 |
+
return len(self.sub_set_item_index)
|
50 |
+
|
51 |
+
def __getitem__(self, idx):
|
52 |
+
subset_idx = self.sub_set_index[idx]
|
53 |
+
item_idx = self.sub_set_item_index[idx]
|
54 |
+
scene_name = self.scene_names[idx]
|
55 |
+
|
56 |
+
out = self.sub_sets[subset_idx][item_idx]
|
57 |
+
|
58 |
+
org_gt_seg = out['gt_seg']
|
59 |
+
org_gt_cls = out['gt_cls']
|
60 |
+
org_gt_cls_dist = out['gt_cls_dist']
|
61 |
+
org_gt_n_seg = out['gt_n_seg']
|
62 |
+
offset = self.seg_offsets[idx]
|
63 |
+
org_n_class = self.sub_sets[subset_idx].n_class
|
64 |
+
|
65 |
+
gt_seg = np.zeros(shape=(org_gt_seg.shape[0],), dtype=int) # [0, ..., n_features]
|
66 |
+
gt_n_seg = np.zeros(shape=(self.n_class,), dtype=int)
|
67 |
+
gt_cls = np.zeros(shape=(self.n_class,), dtype=int)
|
68 |
+
gt_cls_dist = np.zeros(shape=(self.n_class,), dtype=float)
|
69 |
+
|
70 |
+
# copy invalid segments
|
71 |
+
gt_n_seg[0] = org_gt_n_seg[0]
|
72 |
+
gt_cls[0] = org_gt_cls[0]
|
73 |
+
gt_cls_dist[0] = org_gt_cls_dist[0]
|
74 |
+
# print('org: ', org_n_class, org_gt_seg.shape, org_gt_n_seg.shape, org_gt_seg)
|
75 |
+
|
76 |
+
# copy valid segments
|
77 |
+
gt_seg[org_gt_seg > 0] = org_gt_seg[org_gt_seg > 0] + offset - 1 # [0, ..., 1023]
|
78 |
+
gt_n_seg[offset:offset + org_n_class - 1] = org_gt_n_seg[1:] # [0...,n_seg]
|
79 |
+
gt_cls[offset:offset + org_n_class - 1] = org_gt_cls[1:] # [0, ..., n_seg]
|
80 |
+
gt_cls_dist[offset:offset + org_n_class - 1] = org_gt_cls_dist[1:] # [0, ..., n_seg]
|
81 |
+
|
82 |
+
out['gt_seg'] = gt_seg
|
83 |
+
out['gt_cls'] = gt_cls
|
84 |
+
out['gt_cls_dist'] = gt_cls_dist
|
85 |
+
out['gt_n_seg'] = gt_n_seg
|
86 |
+
|
87 |
+
# print('gt: ', org_n_class, gt_seg.shape, gt_n_seg.shape, gt_seg)
|
88 |
+
out['scene_name'] = scene_name
|
89 |
+
|
90 |
+
# out['org_gt_seg'] = org_gt_seg
|
91 |
+
# out['org_gt_n_seg'] = org_gt_n_seg
|
92 |
+
# out['org_gt_cls'] = org_gt_cls
|
93 |
+
# out['org_gt_cls_dist'] = org_gt_cls_dist
|
94 |
+
|
95 |
+
return out
|
third_party/pram/dataset/seven_scenes.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> seven_scenes
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 29/01/2024 14:36
|
7 |
+
=================================================='''
|
8 |
+
import os
|
9 |
+
import os.path as osp
|
10 |
+
import numpy as np
|
11 |
+
from colmap_utils.read_write_model import read_model
|
12 |
+
import torchvision.transforms as tvt
|
13 |
+
from dataset.basicdataset import BasicDataset
|
14 |
+
|
15 |
+
|
16 |
+
class SevenScenes(BasicDataset):
|
17 |
+
def __init__(self, landmark_path, scene, dataset_path, n_class, seg_mode, seg_method, dataset='7Scenes',
|
18 |
+
nfeatures=1024,
|
19 |
+
query_p3d_fn=None,
|
20 |
+
train=True,
|
21 |
+
with_aug=False,
|
22 |
+
min_inliers=0,
|
23 |
+
max_inliers=4096,
|
24 |
+
random_inliers=False,
|
25 |
+
jitter_params=None,
|
26 |
+
scale_params=None,
|
27 |
+
image_dim=3,
|
28 |
+
query_info_path=None,
|
29 |
+
sample_ratio=1,
|
30 |
+
):
|
31 |
+
self.landmark_path = osp.join(landmark_path, scene)
|
32 |
+
self.dataset_path = osp.join(dataset_path, scene)
|
33 |
+
self.n_class = n_class
|
34 |
+
self.dataset = dataset + '/' + scene
|
35 |
+
self.nfeatures = nfeatures
|
36 |
+
self.with_aug = with_aug
|
37 |
+
self.jitter_params = jitter_params
|
38 |
+
self.scale_params = scale_params
|
39 |
+
self.image_dim = image_dim
|
40 |
+
self.train = train
|
41 |
+
self.min_inliers = min_inliers
|
42 |
+
self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
|
43 |
+
self.random_inliers = random_inliers
|
44 |
+
self.image_prefix = ''
|
45 |
+
|
46 |
+
train_transforms = []
|
47 |
+
if self.with_aug:
|
48 |
+
train_transforms.append(tvt.ColorJitter(
|
49 |
+
brightness=jitter_params['brightness'],
|
50 |
+
contrast=jitter_params['contrast'],
|
51 |
+
saturation=jitter_params['saturation'],
|
52 |
+
hue=jitter_params['hue']))
|
53 |
+
if jitter_params['blur'] > 0:
|
54 |
+
train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
|
55 |
+
self.train_transforms = tvt.Compose(train_transforms)
|
56 |
+
|
57 |
+
if train:
|
58 |
+
self.cameras, self.images, point3Ds = read_model(path=osp.join(self.landmark_path, '3D-models'), ext='.bin')
|
59 |
+
self.name_to_id = {image.name: i for i, image in self.images.items() if len(self.images[i].point3D_ids) > 0}
|
60 |
+
|
61 |
+
# only for testing of query images
|
62 |
+
if not self.train:
|
63 |
+
data = np.load(query_p3d_fn, allow_pickle=True)[()]
|
64 |
+
self.img_p3d = data
|
65 |
+
else:
|
66 |
+
self.img_p3d = {}
|
67 |
+
|
68 |
+
if self.train:
|
69 |
+
split_fn = osp.join(self.dataset_path, 'TrainSplit.txt')
|
70 |
+
else:
|
71 |
+
split_fn = osp.join(self.dataset_path, 'TestSplit.txt')
|
72 |
+
|
73 |
+
self.img_fns = []
|
74 |
+
with open(split_fn, 'r') as f:
|
75 |
+
lines = f.readlines()
|
76 |
+
for l in lines:
|
77 |
+
seq = int(l.strip()[8:])
|
78 |
+
fns = os.listdir(osp.join(self.dataset_path, osp.join('seq-{:02d}'.format(seq))))
|
79 |
+
fns = sorted(fns)
|
80 |
+
nf = 0
|
81 |
+
for fn in fns:
|
82 |
+
if fn.find('png') >= 0:
|
83 |
+
if train and 'seq-{:02d}'.format(seq) + '/' + fn not in self.name_to_id.keys():
|
84 |
+
continue
|
85 |
+
if not train and 'seq-{:02d}'.format(seq) + '/' + fn not in self.img_p3d.keys():
|
86 |
+
continue
|
87 |
+
if nf % sample_ratio == 0:
|
88 |
+
self.img_fns.append('seq-{:02d}'.format(seq) + '/' + fn)
|
89 |
+
nf += 1
|
90 |
+
|
91 |
+
print('Load {} images from {} for {}...'.format(len(self.img_fns),
|
92 |
+
self.dataset, 'training' if train else 'eval'))
|
93 |
+
|
94 |
+
data = np.load(osp.join(self.landmark_path,
|
95 |
+
'point3D_cluster_n{:d}_{:s}_{:s}.npy'.format(n_class - 1, seg_mode, seg_method)),
|
96 |
+
allow_pickle=True)[()]
|
97 |
+
p3d_id = data['id']
|
98 |
+
seg_id = data['label']
|
99 |
+
self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
|
100 |
+
xyzs = data['xyz']
|
101 |
+
self.p3d_xyzs = {p3d_id[i]: xyzs[i] for i in range(p3d_id.shape[0])}
|
102 |
+
|
103 |
+
# with open(osp.join(self.landmark_path, 'sc_mean_scale.txt'), 'r') as f:
|
104 |
+
# lines = f.readlines()
|
105 |
+
# for l in lines:
|
106 |
+
# l = l.strip().split()
|
107 |
+
# self.mean_xyz = np.array([float(v) for v in l[:3]])
|
108 |
+
# self.scale_xyz = np.array([float(v) for v in l[3:]])
|
109 |
+
|
110 |
+
if not train:
|
111 |
+
self.query_info = self.read_query_info(path=query_info_path)
|
112 |
+
|
113 |
+
self.nfeatures = nfeatures
|
114 |
+
self.feature_dir = osp.join(self.landmark_path, 'feats')
|
115 |
+
self.feats = {}
|
third_party/pram/dataset/twelve_scenes.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> twelve_scenes
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 29/01/2024 14:37
|
7 |
+
=================================================='''
|
8 |
+
import os
|
9 |
+
import os.path as osp
|
10 |
+
import numpy as np
|
11 |
+
from colmap_utils.read_write_model import read_model
|
12 |
+
import torchvision.transforms as tvt
|
13 |
+
from dataset.basicdataset import BasicDataset
|
14 |
+
|
15 |
+
|
16 |
+
class TwelveScenes(BasicDataset):
|
17 |
+
def __init__(self, landmark_path, scene, dataset_path, n_class, seg_mode, seg_method, dataset='12Scenes',
|
18 |
+
nfeatures=1024,
|
19 |
+
query_p3d_fn=None,
|
20 |
+
train=True,
|
21 |
+
with_aug=False,
|
22 |
+
min_inliers=0,
|
23 |
+
max_inliers=4096,
|
24 |
+
random_inliers=False,
|
25 |
+
jitter_params=None,
|
26 |
+
scale_params=None,
|
27 |
+
image_dim=3,
|
28 |
+
query_info_path=None,
|
29 |
+
sample_ratio=1,
|
30 |
+
):
|
31 |
+
self.landmark_path = osp.join(landmark_path, scene)
|
32 |
+
self.dataset_path = osp.join(dataset_path, scene)
|
33 |
+
self.n_class = n_class
|
34 |
+
self.dataset = dataset + '/' + scene
|
35 |
+
self.nfeatures = nfeatures
|
36 |
+
self.with_aug = with_aug
|
37 |
+
self.jitter_params = jitter_params
|
38 |
+
self.scale_params = scale_params
|
39 |
+
self.image_dim = image_dim
|
40 |
+
self.train = train
|
41 |
+
self.min_inliers = min_inliers
|
42 |
+
self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
|
43 |
+
self.random_inliers = random_inliers
|
44 |
+
self.image_prefix = ''
|
45 |
+
|
46 |
+
train_transforms = []
|
47 |
+
if self.with_aug:
|
48 |
+
train_transforms.append(tvt.ColorJitter(
|
49 |
+
brightness=jitter_params['brightness'],
|
50 |
+
contrast=jitter_params['contrast'],
|
51 |
+
saturation=jitter_params['saturation'],
|
52 |
+
hue=jitter_params['hue']))
|
53 |
+
if jitter_params['blur'] > 0:
|
54 |
+
train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
|
55 |
+
self.train_transforms = tvt.Compose(train_transforms)
|
56 |
+
|
57 |
+
if train:
|
58 |
+
self.cameras, self.images, point3Ds = read_model(path=osp.join(self.landmark_path, '3D-models'), ext='.bin')
|
59 |
+
self.name_to_id = {image.name: i for i, image in self.images.items() if len(self.images[i].point3D_ids) > 0}
|
60 |
+
|
61 |
+
# only for testing of query images
|
62 |
+
if not self.train:
|
63 |
+
data = np.load(query_p3d_fn, allow_pickle=True)[()]
|
64 |
+
self.img_p3d = data
|
65 |
+
else:
|
66 |
+
self.img_p3d = {}
|
67 |
+
|
68 |
+
with open(osp.join(self.dataset_path, 'split.txt'), 'r') as f:
|
69 |
+
l = f.readline()
|
70 |
+
l = l.strip().split(' ') # sequence0 [frames=357] [start=0 ; end=356], first sequence for testing
|
71 |
+
start_img_id = l[-3].split('=')[-1]
|
72 |
+
end_img_id = l[-1].split('=')[-1][:-1]
|
73 |
+
test_start_img_id = int(start_img_id)
|
74 |
+
test_end_img_id = int(end_img_id)
|
75 |
+
|
76 |
+
self.img_fns = []
|
77 |
+
fns = os.listdir(osp.join(self.dataset_path, 'data'))
|
78 |
+
fns = sorted(fns)
|
79 |
+
nf = 0
|
80 |
+
for fn in fns:
|
81 |
+
if fn.find('jpg') >= 0: # frame-001098.color.jpg
|
82 |
+
frame_id = int(fn.split('.')[0].split('-')[-1])
|
83 |
+
if not train and frame_id > test_end_img_id:
|
84 |
+
continue
|
85 |
+
if train and frame_id <= test_end_img_id:
|
86 |
+
continue
|
87 |
+
|
88 |
+
if train and 'data' + '/' + fn not in self.name_to_id.keys():
|
89 |
+
continue
|
90 |
+
|
91 |
+
if not train and 'data' + '/' + fn not in self.img_p3d.keys():
|
92 |
+
continue
|
93 |
+
if nf % sample_ratio == 0:
|
94 |
+
self.img_fns.append('data' + '/' + fn)
|
95 |
+
nf += 1
|
96 |
+
|
97 |
+
print('Load {} images from {} for {}...'.format(len(self.img_fns),
|
98 |
+
self.dataset, 'training' if train else 'eval'))
|
99 |
+
|
100 |
+
data = np.load(osp.join(self.landmark_path,
|
101 |
+
'point3D_cluster_n{:d}_{:s}_{:s}.npy'.format(n_class - 1, seg_mode, seg_method)),
|
102 |
+
allow_pickle=True)[()]
|
103 |
+
p3d_id = data['id']
|
104 |
+
seg_id = data['label']
|
105 |
+
self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
|
106 |
+
xyzs = data['xyz']
|
107 |
+
self.p3d_xyzs = {p3d_id[i]: xyzs[i] for i in range(p3d_id.shape[0])}
|
108 |
+
|
109 |
+
# with open(osp.join(self.landmark_path, 'sc_mean_scale.txt'), 'r') as f:
|
110 |
+
# lines = f.readlines()
|
111 |
+
# for l in lines:
|
112 |
+
# l = l.strip().split()
|
113 |
+
# self.mean_xyz = np.array([float(v) for v in l[:3]])
|
114 |
+
# self.scale_xyz = np.array([float(v) for v in l[3:]])
|
115 |
+
|
116 |
+
if not train:
|
117 |
+
self.query_info = self.read_query_info(path=query_info_path)
|
118 |
+
|
119 |
+
self.nfeatures = nfeatures
|
120 |
+
self.feature_dir = osp.join(self.landmark_path, 'feats')
|
121 |
+
self.feats = {}
|
third_party/pram/dataset/utils.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> utils
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 29/01/2024 14:31
|
7 |
+
=================================================='''
|
8 |
+
import torch
|
9 |
+
|
10 |
+
|
11 |
+
def normalize_size(x, size, scale=0.7):
|
12 |
+
size = size.reshape([1, 2])
|
13 |
+
norm_fac = size.max() + 0.5
|
14 |
+
return (x - size / 2) / (norm_fac * scale)
|
15 |
+
|
16 |
+
|
17 |
+
def collect_batch(batch):
|
18 |
+
out = {}
|
19 |
+
# if len(batch) == 0:
|
20 |
+
# return batch
|
21 |
+
# else:
|
22 |
+
for k in batch[0].keys():
|
23 |
+
tmp = []
|
24 |
+
for v in batch:
|
25 |
+
tmp.append(v[k])
|
26 |
+
if isinstance(batch[0][k], str) or isinstance(batch[0][k], list):
|
27 |
+
out[k] = tmp
|
28 |
+
else:
|
29 |
+
out[k] = torch.cat([torch.from_numpy(i)[None] for i in tmp], dim=0)
|
30 |
+
|
31 |
+
return out
|
third_party/pram/environment.yml
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: pram
|
2 |
+
channels:
|
3 |
+
- conda-forge
|
4 |
+
- defaults
|
5 |
+
dependencies:
|
6 |
+
- _libgcc_mutex=0.1=conda_forge
|
7 |
+
- _openmp_mutex=4.5=2_gnu
|
8 |
+
- binutils_impl_linux-64=2.38=h2a08ee3_1
|
9 |
+
- bzip2=1.0.8=h5eee18b_5
|
10 |
+
- ca-certificates=2024.3.11=h06a4308_0
|
11 |
+
- gcc=12.1.0=h9ea6d83_10
|
12 |
+
- gcc_impl_linux-64=12.1.0=hea43390_17
|
13 |
+
- kernel-headers_linux-64=2.6.32=he073ed8_17
|
14 |
+
- ld_impl_linux-64=2.38=h1181459_1
|
15 |
+
- libffi=3.4.4=h6a678d5_0
|
16 |
+
- libgcc-devel_linux-64=12.1.0=h1ec3361_17
|
17 |
+
- libgcc-ng=13.2.0=h807b86a_5
|
18 |
+
- libgomp=13.2.0=h807b86a_5
|
19 |
+
- libsanitizer=12.1.0=ha89aaad_17
|
20 |
+
- libstdcxx-ng=13.2.0=h7e041cc_5
|
21 |
+
- libuuid=1.41.5=h5eee18b_0
|
22 |
+
- ncurses=6.4=h6a678d5_0
|
23 |
+
- openssl=3.2.1=hd590300_1
|
24 |
+
- pip=23.3.1=py310h06a4308_0
|
25 |
+
- python=3.10.14=h955ad1f_0
|
26 |
+
- readline=8.2=h5eee18b_0
|
27 |
+
- setuptools=68.2.2=py310h06a4308_0
|
28 |
+
- sqlite=3.41.2=h5eee18b_0
|
29 |
+
- sysroot_linux-64=2.12=he073ed8_17
|
30 |
+
- tk=8.6.12=h1ccaba5_0
|
31 |
+
- wheel=0.41.2=py310h06a4308_0
|
32 |
+
- xz=5.4.6=h5eee18b_0
|
33 |
+
- zlib=1.2.13=h5eee18b_0
|
34 |
+
- pip:
|
35 |
+
- addict==2.4.0
|
36 |
+
- aiofiles==23.2.1
|
37 |
+
- aiohttp==3.9.3
|
38 |
+
- aioopenssl==0.6.0
|
39 |
+
- aiosasl==0.5.0
|
40 |
+
- aiosignal==1.3.1
|
41 |
+
- aioxmpp==0.13.3
|
42 |
+
- asttokens==2.4.1
|
43 |
+
- async-timeout==4.0.3
|
44 |
+
- attrs==23.2.0
|
45 |
+
- babel==2.14.0
|
46 |
+
- benbotasync==3.0.2
|
47 |
+
- blinker==1.7.0
|
48 |
+
- certifi==2024.2.2
|
49 |
+
- cffi==1.16.0
|
50 |
+
- charset-normalizer==3.3.2
|
51 |
+
- click==8.1.7
|
52 |
+
- colorama==0.4.6
|
53 |
+
- comm==0.2.2
|
54 |
+
- configargparse==1.7
|
55 |
+
- contourpy==1.2.1
|
56 |
+
- crayons==0.4.0
|
57 |
+
- cryptography==42.0.5
|
58 |
+
- cycler==0.12.1
|
59 |
+
- dash==2.16.1
|
60 |
+
- dash-core-components==2.0.0
|
61 |
+
- dash-html-components==2.0.0
|
62 |
+
- dash-table==5.0.0
|
63 |
+
- decorator==5.1.1
|
64 |
+
- dnspython==2.6.1
|
65 |
+
- einops==0.7.0
|
66 |
+
- exceptiongroup==1.2.0
|
67 |
+
- executing==2.0.1
|
68 |
+
- fastjsonschema==2.19.1
|
69 |
+
- filelock==3.13.3
|
70 |
+
- flask==3.0.2
|
71 |
+
- fonttools==4.50.0
|
72 |
+
- fortniteapiasync==0.1.7
|
73 |
+
- fortnitepy==3.6.9
|
74 |
+
- frozenlist==1.4.1
|
75 |
+
- fsspec==2024.3.1
|
76 |
+
- h5py==3.10.0
|
77 |
+
- html5tagger==1.3.0
|
78 |
+
- httptools==0.6.1
|
79 |
+
- idna==3.6
|
80 |
+
- importlib-metadata==7.1.0
|
81 |
+
- ipython==8.23.0
|
82 |
+
- ipywidgets==8.1.2
|
83 |
+
- itsdangerous==2.1.2
|
84 |
+
- jedi==0.19.1
|
85 |
+
- jinja2==3.1.3
|
86 |
+
- joblib==1.3.2
|
87 |
+
- jsonschema==4.21.1
|
88 |
+
- jsonschema-specifications==2023.12.1
|
89 |
+
- jupyter-core==5.7.2
|
90 |
+
- jupyterlab-widgets==3.0.10
|
91 |
+
- kiwisolver==1.4.5
|
92 |
+
- lxml==4.9.4
|
93 |
+
- markupsafe==2.1.5
|
94 |
+
- matplotlib==3.8.4
|
95 |
+
- matplotlib-inline==0.1.6
|
96 |
+
- mpmath==1.3.0
|
97 |
+
- multidict==6.0.5
|
98 |
+
- nbformat==5.10.4
|
99 |
+
- nest-asyncio==1.6.0
|
100 |
+
- networkx==3.2.1
|
101 |
+
- numpy==1.26.4
|
102 |
+
- nvidia-cublas-cu12==12.1.3.1
|
103 |
+
- nvidia-cuda-cupti-cu12==12.1.105
|
104 |
+
- nvidia-cuda-nvrtc-cu12==12.1.105
|
105 |
+
- nvidia-cuda-runtime-cu12==12.1.105
|
106 |
+
- nvidia-cudnn-cu12==8.9.2.26
|
107 |
+
- nvidia-cufft-cu12==11.0.2.54
|
108 |
+
- nvidia-curand-cu12==10.3.2.106
|
109 |
+
- nvidia-cusolver-cu12==11.4.5.107
|
110 |
+
- nvidia-cusparse-cu12==12.1.0.106
|
111 |
+
- nvidia-nccl-cu12==2.19.3
|
112 |
+
- nvidia-nvjitlink-cu12==12.4.127
|
113 |
+
- nvidia-nvtx-cu12==12.1.105
|
114 |
+
- open3d==0.18.0
|
115 |
+
- opencv-contrib-python==4.5.5.64
|
116 |
+
- packaging==24.0
|
117 |
+
- pandas==2.2.1
|
118 |
+
- parso==0.8.3
|
119 |
+
- pexpect==4.9.0
|
120 |
+
- pillow==10.3.0
|
121 |
+
- platformdirs==4.2.0
|
122 |
+
- plotly==5.20.0
|
123 |
+
- prompt-toolkit==3.0.43
|
124 |
+
- ptyprocess==0.7.0
|
125 |
+
- pure-eval==0.2.2
|
126 |
+
- pyasn1==0.6.0
|
127 |
+
- pyasn1-modules==0.4.0
|
128 |
+
- pybind11==2.12.0
|
129 |
+
- pycolmap==0.6.1
|
130 |
+
- pycparser==2.22
|
131 |
+
- pygments==2.17.2
|
132 |
+
- pyopengl==3.1.7
|
133 |
+
- pyopengl-accelerate==3.1.7
|
134 |
+
- pyopenssl==24.1.0
|
135 |
+
- pyparsing==3.1.2
|
136 |
+
- pyquaternion==0.9.9
|
137 |
+
- python-dateutil==2.9.0.post0
|
138 |
+
- pytz==2024.1
|
139 |
+
- pyyaml==6.0.1
|
140 |
+
- referencing==0.34.0
|
141 |
+
- requests==2.31.0
|
142 |
+
- retrying==1.3.4
|
143 |
+
- rpds-py==0.18.0
|
144 |
+
- sanic==23.12.1
|
145 |
+
- sanic-routing==23.12.0
|
146 |
+
- scikit-learn==1.4.1.post1
|
147 |
+
- scipy==1.13.0
|
148 |
+
- six==1.16.0
|
149 |
+
- sortedcollections==2.1.0
|
150 |
+
- sortedcontainers==2.4.0
|
151 |
+
- stack-data==0.6.3
|
152 |
+
- sympy==1.12
|
153 |
+
- tenacity==8.2.3
|
154 |
+
- threadpoolctl==3.4.0
|
155 |
+
- torch==2.2.2
|
156 |
+
- torchvision==0.17.2
|
157 |
+
- tqdm==4.66.2
|
158 |
+
- tracerite==1.1.1
|
159 |
+
- traitlets==5.14.2
|
160 |
+
- triton==2.2.0
|
161 |
+
- typing-extensions==4.10.0
|
162 |
+
- tzdata==2024.1
|
163 |
+
- tzlocal==5.2
|
164 |
+
- ujson==5.9.0
|
165 |
+
- urllib3==2.2.1
|
166 |
+
- uvloop==0.15.2
|
167 |
+
- wcwidth==0.2.13
|
168 |
+
- websockets==12.0
|
169 |
+
- werkzeug==3.0.2
|
170 |
+
- widgetsnbextension==4.0.10
|
171 |
+
- yaml2==0.0.1
|
172 |
+
- yarl==1.9.4
|
173 |
+
- zipp==3.18.1
|
third_party/pram/inference.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> inference
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 03/04/2024 16:06
|
7 |
+
=================================================='''
|
8 |
+
import argparse
|
9 |
+
import torch
|
10 |
+
import torchvision.transforms.transforms as tvt
|
11 |
+
import yaml
|
12 |
+
from nets.load_segnet import load_segnet
|
13 |
+
from nets.sfd2 import load_sfd2
|
14 |
+
from dataset.get_dataset import compose_datasets
|
15 |
+
|
16 |
+
parser = argparse.ArgumentParser(description='PRAM', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
17 |
+
parser.add_argument('--config', type=str, required=True, help='config of specifications')
|
18 |
+
parser.add_argument('--landmark_path', type=str, required=True, help='path of landmarks')
|
19 |
+
parser.add_argument('--feat_weight_path', type=str, default='weights/sfd2_20230511_210205_resnet4x.79.pth')
|
20 |
+
parser.add_argument('--rec_weight_path', type=str, required=True, help='recognition weight')
|
21 |
+
parser.add_argument('--online', action='store_true', help='online visualization with pangolin')
|
22 |
+
|
23 |
+
if __name__ == '__main__':
|
24 |
+
args = parser.parse_args()
|
25 |
+
with open(args.config, 'rt') as f:
|
26 |
+
config = yaml.load(f, Loader=yaml.Loader)
|
27 |
+
config['landmark_path'] = args.landmark_path
|
28 |
+
|
29 |
+
feat_model = load_sfd2(weight_path=args.feat_weight_path).cuda().eval()
|
30 |
+
print('Load SFD2 weight from {:s}'.format(args.feat_weight_path))
|
31 |
+
|
32 |
+
# rec_model = get_model(config=config)
|
33 |
+
rec_model = load_segnet(network=config['network'],
|
34 |
+
n_class=config['n_class'],
|
35 |
+
desc_dim=256 if config['use_mid_feature'] else 128,
|
36 |
+
n_layers=config['layers'],
|
37 |
+
output_dim=config['output_dim'])
|
38 |
+
state_dict = torch.load(args.rec_weight_path, map_location='cpu')['model']
|
39 |
+
rec_model.load_state_dict(state_dict, strict=True)
|
40 |
+
print('Load recognition weight from {:s}'.format(args.rec_weight_path))
|
41 |
+
|
42 |
+
img_transforms = []
|
43 |
+
img_transforms.append(tvt.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))
|
44 |
+
img_transforms = tvt.Compose(img_transforms)
|
45 |
+
|
46 |
+
dataset = config['dataset']
|
47 |
+
if not args.online:
|
48 |
+
from localization.loc_by_rec_eval import loc_by_rec_eval
|
49 |
+
|
50 |
+
test_set = compose_datasets(datasets=dataset, config=config, train=False, sample_ratio=1)
|
51 |
+
config['n_class'] = test_set.n_class
|
52 |
+
|
53 |
+
loc_by_rec_eval(rec_model=rec_model.cuda().eval(),
|
54 |
+
loader=test_set,
|
55 |
+
local_feat=feat_model.cuda().eval(),
|
56 |
+
config=config, img_transforms=img_transforms)
|
57 |
+
else:
|
58 |
+
from localization.loc_by_rec_online import loc_by_rec_online
|
59 |
+
|
60 |
+
loc_by_rec_online(rec_model=rec_model.cuda().eval(),
|
61 |
+
local_feat=feat_model.cuda().eval(),
|
62 |
+
config=config, img_transforms=img_transforms)
|
third_party/pram/localization/base_model.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABCMeta, abstractmethod
|
2 |
+
from torch import nn
|
3 |
+
from copy import copy
|
4 |
+
import inspect
|
5 |
+
|
6 |
+
|
7 |
+
class BaseModel(nn.Module, metaclass=ABCMeta):
|
8 |
+
default_conf = {}
|
9 |
+
required_data_keys = []
|
10 |
+
|
11 |
+
def __init__(self, conf):
|
12 |
+
"""Perform some logic and call the _init method of the child model."""
|
13 |
+
super().__init__()
|
14 |
+
self.conf = conf = {**self.default_conf, **conf}
|
15 |
+
self.required_data_keys = copy(self.required_data_keys)
|
16 |
+
self._init(conf)
|
17 |
+
|
18 |
+
def forward(self, data):
|
19 |
+
"""Check the data and call the _forward method of the child model."""
|
20 |
+
for key in self.required_data_keys:
|
21 |
+
assert key in data, 'Missing key {} in data'.format(key)
|
22 |
+
return self._forward(data)
|
23 |
+
|
24 |
+
@abstractmethod
|
25 |
+
def _init(self, conf):
|
26 |
+
"""To be implemented by the child class."""
|
27 |
+
raise NotImplementedError
|
28 |
+
|
29 |
+
@abstractmethod
|
30 |
+
def _forward(self, data):
|
31 |
+
"""To be implemented by the child class."""
|
32 |
+
raise NotImplementedError
|
33 |
+
|
34 |
+
|
35 |
+
def dynamic_load(root, model):
|
36 |
+
module_path = f'{root.__name__}.{model}'
|
37 |
+
module = __import__(module_path, fromlist=[''])
|
38 |
+
classes = inspect.getmembers(module, inspect.isclass)
|
39 |
+
# Filter classes defined in the module
|
40 |
+
classes = [c for c in classes if c[1].__module__ == module_path]
|
41 |
+
# Filter classes inherited from BaseModel
|
42 |
+
classes = [c for c in classes if issubclass(c[1], BaseModel)]
|
43 |
+
assert len(classes) == 1, classes
|
44 |
+
return classes[0][1]
|
45 |
+
# return getattr(module, 'Model')
|
third_party/pram/localization/camera.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> camera
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 04/03/2024 11:27
|
7 |
+
=================================================='''
|
8 |
+
import collections
|
9 |
+
|
10 |
+
Camera = collections.namedtuple(
|
11 |
+
"Camera", ["id", "model", "width", "height", "params"])
|
third_party/pram/localization/extract_features.py
ADDED
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> extract_features.py
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 07/02/2024 14:49
|
7 |
+
=================================================='''
|
8 |
+
import os
|
9 |
+
import os.path as osp
|
10 |
+
import h5py
|
11 |
+
import numpy as np
|
12 |
+
import progressbar
|
13 |
+
import yaml
|
14 |
+
import torch
|
15 |
+
import cv2
|
16 |
+
import torch.utils.data as Data
|
17 |
+
from tqdm import tqdm
|
18 |
+
from types import SimpleNamespace
|
19 |
+
import logging
|
20 |
+
import pprint
|
21 |
+
from pathlib import Path
|
22 |
+
import argparse
|
23 |
+
from nets.sfd2 import ResNet4x, extract_sfd2_return
|
24 |
+
from nets.superpoint import SuperPoint, extract_sp_return
|
25 |
+
|
26 |
+
confs = {
|
27 |
+
'superpoint-n4096': {
|
28 |
+
'output': 'feats-superpoint-n4096',
|
29 |
+
'model': {
|
30 |
+
'name': 'superpoint',
|
31 |
+
'outdim': 256,
|
32 |
+
'use_stability': False,
|
33 |
+
'nms_radius': 3,
|
34 |
+
'max_keypoints': 4096,
|
35 |
+
'conf_th': 0.005,
|
36 |
+
'multiscale': False,
|
37 |
+
'scales': [1.0],
|
38 |
+
'model_fn': osp.join(os.getcwd(),
|
39 |
+
"weights/superpoint_v1.pth"),
|
40 |
+
},
|
41 |
+
'preprocessing': {
|
42 |
+
'grayscale': True,
|
43 |
+
'resize_max': False,
|
44 |
+
},
|
45 |
+
},
|
46 |
+
|
47 |
+
'resnet4x-20230511-210205-pho-0005': {
|
48 |
+
'output': 'feats-resnet4x-20230511-210205-pho-0005',
|
49 |
+
'model': {
|
50 |
+
'outdim': 128,
|
51 |
+
'name': 'resnet4x',
|
52 |
+
'use_stability': False,
|
53 |
+
'max_keypoints': 4096,
|
54 |
+
'conf_th': 0.005,
|
55 |
+
'multiscale': False,
|
56 |
+
'scales': [1.0],
|
57 |
+
'model_fn': osp.join(os.getcwd(),
|
58 |
+
"weights/sfd2_20230511_210205_resnet4x.79.pth"),
|
59 |
+
},
|
60 |
+
'preprocessing': {
|
61 |
+
'grayscale': False,
|
62 |
+
'resize_max': False,
|
63 |
+
},
|
64 |
+
'mask': False,
|
65 |
+
},
|
66 |
+
|
67 |
+
'sfd2': {
|
68 |
+
'output': 'feats-sfd2',
|
69 |
+
'model': {
|
70 |
+
'outdim': 128,
|
71 |
+
'name': 'resnet4x',
|
72 |
+
'use_stability': False,
|
73 |
+
'max_keypoints': 4096,
|
74 |
+
'conf_th': 0.005,
|
75 |
+
'multiscale': False,
|
76 |
+
'scales': [1.0],
|
77 |
+
'model_fn': osp.join(os.getcwd(),
|
78 |
+
"weights/sfd2_20230511_210205_resnet4x.79.pth"),
|
79 |
+
},
|
80 |
+
'preprocessing': {
|
81 |
+
'grayscale': False,
|
82 |
+
'resize_max': False,
|
83 |
+
},
|
84 |
+
'mask': False,
|
85 |
+
},
|
86 |
+
}
|
87 |
+
|
88 |
+
|
89 |
+
class ImageDataset(Data.Dataset):
|
90 |
+
default_conf = {
|
91 |
+
'globs': ['*.jpg', '*.png', '*.jpeg', '*.JPG', '*.PNG'],
|
92 |
+
'grayscale': False,
|
93 |
+
'resize_max': None,
|
94 |
+
'resize_force': False,
|
95 |
+
}
|
96 |
+
|
97 |
+
def __init__(self, root, conf, image_list=None,
|
98 |
+
mask_root=None):
|
99 |
+
self.conf = conf = SimpleNamespace(**{**self.default_conf, **conf})
|
100 |
+
self.root = root
|
101 |
+
|
102 |
+
self.paths = []
|
103 |
+
if image_list is None:
|
104 |
+
for g in conf.globs:
|
105 |
+
self.paths += list(Path(root).glob('**/' + g))
|
106 |
+
if len(self.paths) == 0:
|
107 |
+
raise ValueError(f'Could not find any image in root: {root}.')
|
108 |
+
self.paths = [i.relative_to(root) for i in self.paths]
|
109 |
+
else:
|
110 |
+
with open(image_list, "r") as f:
|
111 |
+
lines = f.readlines()
|
112 |
+
for l in lines:
|
113 |
+
l = l.strip()
|
114 |
+
self.paths.append(Path(l))
|
115 |
+
|
116 |
+
logging.info(f'Found {len(self.paths)} images in root {root}.')
|
117 |
+
|
118 |
+
if mask_root is not None:
|
119 |
+
self.mask_root = mask_root
|
120 |
+
else:
|
121 |
+
self.mask_root = None
|
122 |
+
|
123 |
+
def __getitem__(self, idx):
|
124 |
+
path = self.paths[idx]
|
125 |
+
if self.conf.grayscale:
|
126 |
+
mode = cv2.IMREAD_GRAYSCALE
|
127 |
+
else:
|
128 |
+
mode = cv2.IMREAD_COLOR
|
129 |
+
image = cv2.imread(str(self.root / path), mode)
|
130 |
+
if not self.conf.grayscale:
|
131 |
+
image = image[:, :, ::-1] # BGR to RGB
|
132 |
+
if image is None:
|
133 |
+
raise ValueError(f'Cannot read image {str(path)}.')
|
134 |
+
image = image.astype(np.float32)
|
135 |
+
size = image.shape[:2][::-1]
|
136 |
+
w, h = size
|
137 |
+
|
138 |
+
if self.conf.resize_max and (self.conf.resize_force
|
139 |
+
or max(w, h) > self.conf.resize_max):
|
140 |
+
scale = self.conf.resize_max / max(h, w)
|
141 |
+
h_new, w_new = int(round(h * scale)), int(round(w * scale))
|
142 |
+
image = cv2.resize(
|
143 |
+
image, (w_new, h_new), interpolation=cv2.INTER_CUBIC)
|
144 |
+
|
145 |
+
if self.conf.grayscale:
|
146 |
+
image = image[None]
|
147 |
+
else:
|
148 |
+
image = image.transpose((2, 0, 1)) # HxWxC to CxHxW
|
149 |
+
image = image / 255.
|
150 |
+
|
151 |
+
data = {
|
152 |
+
'name': str(path),
|
153 |
+
'image': image,
|
154 |
+
'original_size': np.array(size),
|
155 |
+
}
|
156 |
+
|
157 |
+
if self.mask_root is not None:
|
158 |
+
mask_path = Path(str(path).replace("jpg", "png"))
|
159 |
+
if osp.exists(mask_path):
|
160 |
+
mask = cv2.imread(str(self.mask_root / mask_path))
|
161 |
+
mask = cv2.resize(mask, dsize=(image.shape[2], image.shape[1]), interpolation=cv2.INTER_NEAREST)
|
162 |
+
else:
|
163 |
+
mask = np.zeros(shape=(image.shape[1], image.shape[2], 3), dtype=np.uint8)
|
164 |
+
|
165 |
+
data['mask'] = mask
|
166 |
+
|
167 |
+
return data
|
168 |
+
|
169 |
+
def __len__(self):
|
170 |
+
return len(self.paths)
|
171 |
+
|
172 |
+
|
173 |
+
def get_model(model_name, weight_path, outdim=128, **kwargs):
|
174 |
+
if model_name == 'superpoint':
|
175 |
+
model = SuperPoint(config={
|
176 |
+
'descriptor_dim': 256,
|
177 |
+
'nms_radius': 4,
|
178 |
+
'keypoint_threshold': 0.005,
|
179 |
+
'max_keypoints': -1,
|
180 |
+
'remove_borders': 4,
|
181 |
+
'weight_path': weight_path,
|
182 |
+
}).eval()
|
183 |
+
|
184 |
+
extractor = extract_sp_return
|
185 |
+
|
186 |
+
if model_name == 'resnet4x':
|
187 |
+
model = ResNet4x(outdim=outdim).eval()
|
188 |
+
model.load_state_dict(torch.load(weight_path)['state_dict'], strict=True)
|
189 |
+
extractor = extract_sfd2_return
|
190 |
+
|
191 |
+
return model, extractor
|
192 |
+
|
193 |
+
|
194 |
+
@torch.no_grad()
|
195 |
+
def main(conf, image_dir, export_dir):
|
196 |
+
logging.info('Extracting local features with configuration:'
|
197 |
+
f'\n{pprint.pformat(conf)}')
|
198 |
+
model, extractor = get_model(model_name=conf['model']['name'], weight_path=conf["model"]["model_fn"],
|
199 |
+
use_stability=conf['model']['use_stability'], outdim=conf['model']['outdim'])
|
200 |
+
model = model.cuda()
|
201 |
+
loader = ImageDataset(image_dir,
|
202 |
+
conf['preprocessing'],
|
203 |
+
image_list=args.image_list,
|
204 |
+
mask_root=None)
|
205 |
+
loader = torch.utils.data.DataLoader(loader, num_workers=4)
|
206 |
+
|
207 |
+
os.makedirs(export_dir, exist_ok=True)
|
208 |
+
feature_path = Path(export_dir, conf['output'] + '.h5')
|
209 |
+
feature_path.parent.mkdir(exist_ok=True, parents=True)
|
210 |
+
feature_file = h5py.File(str(feature_path), 'a')
|
211 |
+
|
212 |
+
with tqdm(total=len(loader)) as t:
|
213 |
+
for idx, data in enumerate(loader):
|
214 |
+
t.update()
|
215 |
+
pred = extractor(model, img=data["image"],
|
216 |
+
topK=conf["model"]["max_keypoints"],
|
217 |
+
mask=None,
|
218 |
+
conf_th=conf["model"]["conf_th"],
|
219 |
+
scales=conf["model"]["scales"],
|
220 |
+
)
|
221 |
+
|
222 |
+
# pred = {k: v[0].cpu().numpy() for k, v in pred.items()}
|
223 |
+
pred['descriptors'] = pred['descriptors'].transpose()
|
224 |
+
|
225 |
+
t.set_postfix(npoints=pred['keypoints'].shape[0])
|
226 |
+
# print(pred['keypoints'].shape)
|
227 |
+
|
228 |
+
pred['image_size'] = original_size = data['original_size'][0].numpy()
|
229 |
+
# pred['descriptors'] = pred['descriptors'].T
|
230 |
+
if 'keypoints' in pred.keys():
|
231 |
+
size = np.array(data['image'].shape[-2:][::-1])
|
232 |
+
scales = (original_size / size).astype(np.float32)
|
233 |
+
pred['keypoints'] = (pred['keypoints'] + .5) * scales[None] - .5
|
234 |
+
|
235 |
+
grp = feature_file.create_group(data['name'][0])
|
236 |
+
for k, v in pred.items():
|
237 |
+
# print(k, v.shape)
|
238 |
+
grp.create_dataset(k, data=v)
|
239 |
+
|
240 |
+
del pred
|
241 |
+
|
242 |
+
feature_file.close()
|
243 |
+
logging.info('Finished exporting features.')
|
244 |
+
|
245 |
+
return feature_path
|
246 |
+
|
247 |
+
|
248 |
+
if __name__ == '__main__':
|
249 |
+
parser = argparse.ArgumentParser()
|
250 |
+
parser.add_argument('--image_dir', type=Path, required=True)
|
251 |
+
parser.add_argument('--image_list', type=str, default=None)
|
252 |
+
parser.add_argument('--mask_dir', type=Path, default=None)
|
253 |
+
parser.add_argument('--export_dir', type=Path, required=True)
|
254 |
+
parser.add_argument('--conf', type=str, required=True, choices=list(confs.keys()))
|
255 |
+
args = parser.parse_args()
|
256 |
+
main(confs[args.conf], args.image_dir, args.export_dir)
|
third_party/pram/localization/frame.py
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> frame
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 01/03/2024 10:08
|
7 |
+
=================================================='''
|
8 |
+
from collections import defaultdict
|
9 |
+
|
10 |
+
import numpy as np
|
11 |
+
import torch
|
12 |
+
import pycolmap
|
13 |
+
|
14 |
+
from localization.camera import Camera
|
15 |
+
from localization.utils import compute_pose_error
|
16 |
+
|
17 |
+
|
18 |
+
class Frame:
|
19 |
+
def __init__(self, image: np.ndarray, camera: pycolmap.Camera, id: int, name: str = None, qvec=None, tvec=None,
|
20 |
+
scene_name=None,
|
21 |
+
reference_frame_id=None):
|
22 |
+
self.image = image
|
23 |
+
self.camera = camera
|
24 |
+
self.id = id
|
25 |
+
self.name = name
|
26 |
+
self.image_size = np.array([camera.height, camera.width])
|
27 |
+
self.qvec = qvec
|
28 |
+
self.tvec = tvec
|
29 |
+
self.scene_name = scene_name
|
30 |
+
self.reference_frame_id = reference_frame_id
|
31 |
+
|
32 |
+
self.keypoints = None # [N, 3]
|
33 |
+
self.descriptors = None # [N, D]
|
34 |
+
self.segmentations = None # [N C]
|
35 |
+
self.seg_scores = None # [N C]
|
36 |
+
self.seg_ids = None # [N, 1]
|
37 |
+
self.point3D_ids = None # [N, 1]
|
38 |
+
self.xyzs = None
|
39 |
+
|
40 |
+
self.gt_qvec = None
|
41 |
+
self.gt_tvec = None
|
42 |
+
|
43 |
+
self.matched_scene_name = None
|
44 |
+
self.matched_keypoints = None
|
45 |
+
self.matched_keypoint_ids = None
|
46 |
+
self.matched_xyzs = None
|
47 |
+
self.matched_point3D_ids = None
|
48 |
+
self.matched_inliers = None
|
49 |
+
self.matched_sids = None
|
50 |
+
self.matched_order = None
|
51 |
+
|
52 |
+
self.refinement_reference_frame_ids = None
|
53 |
+
self.image_rec = None
|
54 |
+
self.image_matching = None
|
55 |
+
self.image_inlier = None
|
56 |
+
self.reference_frame_name = None
|
57 |
+
self.image_matching_tmp = None
|
58 |
+
self.image_inlier_tmp = None
|
59 |
+
self.reference_frame_name_tmp = None
|
60 |
+
|
61 |
+
self.tracking_status = None
|
62 |
+
|
63 |
+
self.time_feat = 0
|
64 |
+
self.time_rec = 0
|
65 |
+
self.time_loc = 0
|
66 |
+
self.time_ref = 0
|
67 |
+
|
68 |
+
def update_point3ds_old(self):
|
69 |
+
pt = torch.from_numpy(self.keypoints[:, :2]).unsqueeze(-1) # [M 2 1]
|
70 |
+
mpt = torch.from_numpy(self.matched_keypoints[:, :2].transpose()).unsqueeze(0) # [1 2 N]
|
71 |
+
dist = torch.sqrt(torch.sum((pt - mpt) ** 2, dim=1))
|
72 |
+
values, ids = torch.topk(dist, dim=1, k=1, largest=False)
|
73 |
+
values = values[:, 0].numpy()
|
74 |
+
ids = ids[:, 0].numpy()
|
75 |
+
mask = (values < 1) # 1 pixel error
|
76 |
+
self.point3D_ids = np.zeros(shape=(self.keypoints.shape[0],), dtype=int) - 1
|
77 |
+
self.point3D_ids[mask] = self.matched_point3D_ids[ids[mask]]
|
78 |
+
|
79 |
+
# self.xyzs = np.zeros(shape=(self.keypoints.shape[0], 3), dtype=float)
|
80 |
+
inlier_mask = self.matched_inliers
|
81 |
+
self.xyzs[mask] = self.matched_xyzs[ids[mask]]
|
82 |
+
self.seg_ids[mask] = self.matched_sids[ids[mask]]
|
83 |
+
|
84 |
+
def update_point3ds(self):
|
85 |
+
# print('Frame: update_point3ds: ', self.matched_keypoint_ids.shape, self.matched_xyzs.shape,
|
86 |
+
# self.matched_sids.shape, self.matched_point3D_ids.shape)
|
87 |
+
self.xyzs[self.matched_keypoint_ids] = self.matched_xyzs
|
88 |
+
self.seg_ids[self.matched_keypoint_ids] = self.matched_sids
|
89 |
+
self.point3D_ids[self.matched_keypoint_ids] = self.matched_point3D_ids
|
90 |
+
|
91 |
+
def add_keypoints(self, keypoints: np.ndarray, descriptors: np.ndarray):
|
92 |
+
self.keypoints = keypoints
|
93 |
+
self.descriptors = descriptors
|
94 |
+
self.initialize_localization_variables()
|
95 |
+
|
96 |
+
def add_segmentations(self, segmentations: torch.Tensor, filtering_threshold: float):
|
97 |
+
'''
|
98 |
+
:param segmentations: [number_points number_labels]
|
99 |
+
:return:
|
100 |
+
'''
|
101 |
+
seg_scores = torch.softmax(segmentations, dim=-1)
|
102 |
+
if filtering_threshold > 0:
|
103 |
+
scores_background = seg_scores[:, 0]
|
104 |
+
non_bg_mask = (scores_background < filtering_threshold)
|
105 |
+
print('pre filtering before: ', self.keypoints.shape)
|
106 |
+
if torch.sum(non_bg_mask) >= 0.4 * seg_scores.shape[0]:
|
107 |
+
self.keypoints = self.keypoints[non_bg_mask.cpu().numpy()]
|
108 |
+
self.descriptors = self.descriptors[non_bg_mask.cpu().numpy()]
|
109 |
+
# print('pre filtering after: ', self.keypoints.shape)
|
110 |
+
|
111 |
+
# update localization variables
|
112 |
+
self.initialize_localization_variables()
|
113 |
+
|
114 |
+
segmentations = segmentations[non_bg_mask]
|
115 |
+
seg_scores = seg_scores[non_bg_mask]
|
116 |
+
print('pre filtering after: ', self.keypoints.shape)
|
117 |
+
|
118 |
+
# extract initial segmentation info
|
119 |
+
self.segmentations = segmentations.cpu().numpy()
|
120 |
+
self.seg_scores = seg_scores.cpu().numpy()
|
121 |
+
self.seg_ids = segmentations.max(dim=-1)[1].cpu().numpy() - 1 # should start from 0
|
122 |
+
|
123 |
+
def filter_keypoints(self, seg_scores: np.ndarray, filtering_threshold: float):
|
124 |
+
scores_background = seg_scores[:, 0]
|
125 |
+
non_bg_mask = (scores_background < filtering_threshold)
|
126 |
+
print('pre filtering before: ', self.keypoints.shape)
|
127 |
+
if np.sum(non_bg_mask) >= 0.4 * seg_scores.shape[0]:
|
128 |
+
self.keypoints = self.keypoints[non_bg_mask]
|
129 |
+
self.descriptors = self.descriptors[non_bg_mask]
|
130 |
+
print('pre filtering after: ', self.keypoints.shape)
|
131 |
+
|
132 |
+
# update localization variables
|
133 |
+
self.initialize_localization_variables()
|
134 |
+
return non_bg_mask
|
135 |
+
else:
|
136 |
+
print('pre filtering after: ', self.keypoints.shape)
|
137 |
+
return None
|
138 |
+
|
139 |
+
def compute_pose_error(self, pred_qvec=None, pred_tvec=None):
|
140 |
+
if pred_qvec is not None and pred_tvec is not None:
|
141 |
+
if self.gt_qvec is not None and self.gt_tvec is not None:
|
142 |
+
return compute_pose_error(pred_qcw=pred_qvec, pred_tcw=pred_tvec,
|
143 |
+
gt_qcw=self.gt_qvec, gt_tcw=self.gt_tvec)
|
144 |
+
else:
|
145 |
+
return 100, 100
|
146 |
+
|
147 |
+
if self.qvec is None or self.tvec is None or self.gt_qvec is None or self.gt_tvec is None:
|
148 |
+
return 100, 100
|
149 |
+
else:
|
150 |
+
err_q, err_t = compute_pose_error(pred_qcw=self.qvec, pred_tcw=self.tvec,
|
151 |
+
gt_qcw=self.gt_qvec, gt_tcw=self.gt_tvec)
|
152 |
+
return err_q, err_t
|
153 |
+
|
154 |
+
def get_intrinsics(self) -> np.ndarray:
|
155 |
+
camera_model = self.camera.model.name
|
156 |
+
params = self.camera.params
|
157 |
+
if camera_model in ("SIMPLE_PINHOLE", "SIMPLE_RADIAL", "RADIAL"):
|
158 |
+
fx = fy = params[0]
|
159 |
+
cx = params[1]
|
160 |
+
cy = params[2]
|
161 |
+
elif camera_model in ("PINHOLE", "OPENCV", "OPENCV_FISHEYE", "FULL_OPENCV"):
|
162 |
+
fx = params[0]
|
163 |
+
fy = params[1]
|
164 |
+
cx = params[2]
|
165 |
+
cy = params[3]
|
166 |
+
else:
|
167 |
+
raise Exception("Camera model not supported")
|
168 |
+
|
169 |
+
# intrinsics
|
170 |
+
K = np.identity(3)
|
171 |
+
K[0, 0] = fx
|
172 |
+
K[1, 1] = fy
|
173 |
+
K[0, 2] = cx
|
174 |
+
K[1, 2] = cy
|
175 |
+
return K
|
176 |
+
|
177 |
+
def get_dominate_seg_id(self):
|
178 |
+
counts = np.bincount(self.seg_ids[self.seg_ids > 0])
|
179 |
+
return np.argmax(counts)
|
180 |
+
|
181 |
+
def clear_localization_track(self):
|
182 |
+
self.matched_scene_name = None
|
183 |
+
self.matched_keypoints = None
|
184 |
+
self.matched_xyzs = None
|
185 |
+
self.matched_point3D_ids = None
|
186 |
+
self.matched_inliers = None
|
187 |
+
self.matched_sids = None
|
188 |
+
|
189 |
+
self.refinement_reference_frame_ids = None
|
190 |
+
|
191 |
+
def initialize_localization_variables(self):
|
192 |
+
nkpt = self.keypoints.shape[0]
|
193 |
+
self.seg_ids = np.zeros(shape=(nkpt,), dtype=int) - 1
|
194 |
+
self.point3D_ids = np.zeros(shape=(nkpt,), dtype=int) - 1
|
195 |
+
self.xyzs = np.zeros(shape=(nkpt, 3), dtype=float)
|
third_party/pram/localization/loc_by_rec_eval.py
ADDED
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> loc_by_rec
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 08/02/2024 15:26
|
7 |
+
=================================================='''
|
8 |
+
import torch
|
9 |
+
from torch.autograd import Variable
|
10 |
+
from localization.multimap3d import MultiMap3D
|
11 |
+
from localization.frame import Frame
|
12 |
+
import yaml, cv2, time
|
13 |
+
import numpy as np
|
14 |
+
import os.path as osp
|
15 |
+
import threading
|
16 |
+
import os
|
17 |
+
from tqdm import tqdm
|
18 |
+
from recognition.vis_seg import vis_seg_point, generate_color_dic
|
19 |
+
from tools.metrics import compute_iou, compute_precision
|
20 |
+
from localization.tracker import Tracker
|
21 |
+
from localization.utils import read_query_info
|
22 |
+
from localization.camera import Camera
|
23 |
+
|
24 |
+
|
25 |
+
def loc_by_rec_eval(rec_model, loader, config, local_feat, img_transforms=None):
|
26 |
+
n_epoch = int(config['weight_path'].split('.')[1])
|
27 |
+
save_fn = osp.join(config['localization']['save_path'],
|
28 |
+
config['weight_path'].split('/')[0] + '_{:d}'.format(n_epoch) + '_{:d}'.format(
|
29 |
+
config['feat_dim']))
|
30 |
+
tag = 'k{:d}_th{:d}_mm{:d}_mi{:d}'.format(config['localization']['seg_k'], config['localization']['threshold'],
|
31 |
+
config['localization']['min_matches'],
|
32 |
+
config['localization']['min_inliers'])
|
33 |
+
if config['localization']['do_refinement']:
|
34 |
+
tag += '_op{:d}'.format(config['localization']['covisibility_frame'])
|
35 |
+
if config['localization']['with_compress']:
|
36 |
+
tag += '_comp'
|
37 |
+
|
38 |
+
save_fn = save_fn + '_' + tag
|
39 |
+
|
40 |
+
save = config['localization']['save']
|
41 |
+
save = config['localization']['save']
|
42 |
+
if save:
|
43 |
+
save_dir = save_fn
|
44 |
+
os.makedirs(save_dir, exist_ok=True)
|
45 |
+
else:
|
46 |
+
save_dir = None
|
47 |
+
|
48 |
+
seg_color = generate_color_dic(n_seg=2000)
|
49 |
+
dataset_path = config['dataset_path']
|
50 |
+
show = config['localization']['show']
|
51 |
+
if show:
|
52 |
+
cv2.namedWindow('img', cv2.WINDOW_NORMAL)
|
53 |
+
|
54 |
+
locMap = MultiMap3D(config=config, save_dir=None)
|
55 |
+
# start tracker
|
56 |
+
mTracker = Tracker(locMap=locMap, matcher=locMap.matcher, config=config)
|
57 |
+
|
58 |
+
dataset_name = config['dataset'][0]
|
59 |
+
all_scene_query_info = {}
|
60 |
+
with open(osp.join(config['config_path'], '{:s}.yaml'.format(dataset_name)), 'r') as f:
|
61 |
+
scene_config = yaml.load(f, Loader=yaml.Loader)
|
62 |
+
scenes = scene_config['scenes']
|
63 |
+
for scene in scenes:
|
64 |
+
query_path = osp.join(config['dataset_path'], dataset_name, scene, scene_config[scene]['query_path'])
|
65 |
+
query_info = read_query_info(query_fn=query_path)
|
66 |
+
all_scene_query_info[dataset_name + '/' + scene] = query_info
|
67 |
+
# print(scene, query_info.keys())
|
68 |
+
|
69 |
+
tracking = False
|
70 |
+
|
71 |
+
full_log = ''
|
72 |
+
failed_cases = []
|
73 |
+
success_cases = []
|
74 |
+
poses = {}
|
75 |
+
err_ths_cnt = [0, 0, 0, 0]
|
76 |
+
|
77 |
+
seg_results = {}
|
78 |
+
time_results = {
|
79 |
+
'feat': [],
|
80 |
+
'rec': [],
|
81 |
+
'loc': [],
|
82 |
+
'ref': [],
|
83 |
+
'total': [],
|
84 |
+
}
|
85 |
+
n_total = 0
|
86 |
+
|
87 |
+
loc_scene_names = config['localization']['loc_scene_name']
|
88 |
+
# loader = loader[8990:]
|
89 |
+
for bid, pred in tqdm(enumerate(loader), total=len(loader)):
|
90 |
+
pred = loader[bid]
|
91 |
+
image_name = pred['file_name'] # [0]
|
92 |
+
scene_name = pred['scene_name'] # [0] # dataset_scene
|
93 |
+
if len(loc_scene_names) > 0:
|
94 |
+
skip = True
|
95 |
+
for loc_scene in loc_scene_names:
|
96 |
+
if scene_name.find(loc_scene) > 0:
|
97 |
+
skip = False
|
98 |
+
break
|
99 |
+
if skip:
|
100 |
+
continue
|
101 |
+
with torch.no_grad():
|
102 |
+
for k in pred:
|
103 |
+
if k.find('name') >= 0:
|
104 |
+
continue
|
105 |
+
if k != 'image0' and k != 'image1' and k != 'depth0' and k != 'depth1':
|
106 |
+
if type(pred[k]) == np.ndarray:
|
107 |
+
pred[k] = Variable(torch.from_numpy(pred[k]).float().cuda())[None]
|
108 |
+
elif type(pred[k]) == torch.Tensor:
|
109 |
+
pred[k] = Variable(pred[k].float().cuda())
|
110 |
+
elif type(pred[k]) == list:
|
111 |
+
continue
|
112 |
+
else:
|
113 |
+
pred[k] = Variable(torch.stack(pred[k]).float().cuda())
|
114 |
+
print('scene: ', scene_name, image_name)
|
115 |
+
|
116 |
+
n_total += 1
|
117 |
+
with torch.no_grad():
|
118 |
+
img = pred['image']
|
119 |
+
while isinstance(img, list):
|
120 |
+
img = img[0]
|
121 |
+
|
122 |
+
new_im = torch.from_numpy(img).permute(2, 0, 1).cuda().float()
|
123 |
+
if img_transforms is not None:
|
124 |
+
new_im = img_transforms(new_im)[None]
|
125 |
+
else:
|
126 |
+
new_im = new_im[None]
|
127 |
+
img = (img * 255).astype(np.uint8)
|
128 |
+
|
129 |
+
fn = image_name
|
130 |
+
camera_model, width, height, params = all_scene_query_info[scene_name][fn]
|
131 |
+
camera = Camera(id=-1, model=camera_model, width=width, height=height, params=params)
|
132 |
+
curr_frame = Frame(image=img, camera=camera, id=0, name=fn, scene_name=scene_name)
|
133 |
+
gt_sub_map = locMap.sub_maps[curr_frame.scene_name]
|
134 |
+
if gt_sub_map.gt_poses is not None and curr_frame.name in gt_sub_map.gt_poses.keys():
|
135 |
+
curr_frame.gt_qvec = gt_sub_map.gt_poses[curr_frame.name]['qvec']
|
136 |
+
curr_frame.gt_tvec = gt_sub_map.gt_poses[curr_frame.name]['tvec']
|
137 |
+
|
138 |
+
t_start = time.time()
|
139 |
+
encoder_out = local_feat.extract_local_global(data={'image': new_im},
|
140 |
+
config=
|
141 |
+
{
|
142 |
+
# 'min_keypoints': 128,
|
143 |
+
'max_keypoints': config['eval_max_keypoints'],
|
144 |
+
}
|
145 |
+
)
|
146 |
+
t_feat = time.time() - t_start
|
147 |
+
# global_descriptors_cuda = encoder_out['global_descriptors']
|
148 |
+
# scores_cuda = encoder_out['scores'][0][None]
|
149 |
+
# kpts_cuda = encoder_out['keypoints'][0][None]
|
150 |
+
# descriptors_cuda = encoder_out['descriptors'][0][None].permute(0, 2, 1)
|
151 |
+
|
152 |
+
sparse_scores = pred['scores']
|
153 |
+
sparse_descs = pred['descriptors']
|
154 |
+
sparse_kpts = pred['keypoints']
|
155 |
+
gt_seg = pred['gt_seg']
|
156 |
+
|
157 |
+
curr_frame.add_keypoints(keypoints=np.hstack([sparse_kpts[0].cpu().numpy(),
|
158 |
+
sparse_scores[0].cpu().numpy().reshape(-1, 1)]),
|
159 |
+
descriptors=sparse_descs[0].cpu().numpy())
|
160 |
+
curr_frame.time_feat = t_feat
|
161 |
+
|
162 |
+
t_start = time.time()
|
163 |
+
_, seg_descriptors = local_feat.sample(score_map=encoder_out['score_map'],
|
164 |
+
semi_descs=encoder_out['mid_features'],
|
165 |
+
# kpts=kpts_cuda[0],
|
166 |
+
kpts=sparse_kpts[0],
|
167 |
+
norm_desc=config['norm_desc'])
|
168 |
+
rec_out = rec_model({'scores': sparse_scores,
|
169 |
+
'seg_descriptors': seg_descriptors[None].permute(0, 2, 1),
|
170 |
+
'keypoints': sparse_kpts,
|
171 |
+
'image': new_im})
|
172 |
+
t_rec = time.time() - t_start
|
173 |
+
curr_frame.time_rec = t_rec
|
174 |
+
|
175 |
+
pred = {
|
176 |
+
# 'scores': scores_cuda,
|
177 |
+
# 'keypoints': kpts_cuda,
|
178 |
+
# 'descriptors': descriptors_cuda,
|
179 |
+
# 'global_descriptors': global_descriptors_cuda,
|
180 |
+
'image_size': np.array([img.shape[1], img.shape[0]])[None],
|
181 |
+
}
|
182 |
+
|
183 |
+
pred = {**pred, **rec_out}
|
184 |
+
pred_seg = torch.max(pred['prediction'], dim=2)[1] # [B, N, C]
|
185 |
+
|
186 |
+
pred_seg = pred_seg[0].cpu().numpy()
|
187 |
+
kpts = sparse_kpts[0].cpu().numpy()
|
188 |
+
img_pred_seg = vis_seg_point(img=img, kpts=kpts, segs=pred_seg, seg_color=seg_color, radius=9)
|
189 |
+
show_text = 'kpts: {:d}'.format(kpts.shape[0])
|
190 |
+
img_pred_seg = cv2.putText(img=img_pred_seg, text=show_text,
|
191 |
+
org=(50, 30),
|
192 |
+
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
|
193 |
+
fontScale=1, color=(0, 0, 255),
|
194 |
+
thickness=2, lineType=cv2.LINE_AA)
|
195 |
+
curr_frame.image_rec = img_pred_seg
|
196 |
+
|
197 |
+
if show:
|
198 |
+
cv2.imshow('img', img)
|
199 |
+
key = cv2.waitKey(1)
|
200 |
+
if key == ord('q'):
|
201 |
+
exit(0)
|
202 |
+
elif key == ord('s'):
|
203 |
+
show_time = -1
|
204 |
+
elif key == ord('c'):
|
205 |
+
show_time = 1
|
206 |
+
|
207 |
+
segmentations = pred['prediction'][0] # .cpu().numpy() # [N, C]
|
208 |
+
curr_frame.add_segmentations(segmentations=segmentations,
|
209 |
+
filtering_threshold=config['localization']['pre_filtering_th'])
|
210 |
+
|
211 |
+
# Step1: do tracker first
|
212 |
+
success = not mTracker.lost and tracking
|
213 |
+
if success:
|
214 |
+
success = mTracker.run(frame=curr_frame)
|
215 |
+
if not success:
|
216 |
+
success = locMap.run(q_frame=curr_frame)
|
217 |
+
if success:
|
218 |
+
curr_frame.update_point3ds()
|
219 |
+
if tracking:
|
220 |
+
mTracker.lost = False
|
221 |
+
mTracker.last_frame = curr_frame
|
222 |
+
# '''
|
223 |
+
pred_seg = torch.max(pred['prediction'], dim=-1)[1] # [B, N, C]
|
224 |
+
pred_seg = pred_seg[0].cpu().numpy()
|
225 |
+
gt_seg = gt_seg[0].cpu().numpy()
|
226 |
+
iou = compute_iou(pred=pred_seg, target=gt_seg, n_class=pred_seg.shape[0],
|
227 |
+
ignored_ids=[0]) # 0 - background
|
228 |
+
prec = compute_precision(pred=pred_seg, target=gt_seg, ignored_ids=[0])
|
229 |
+
|
230 |
+
kpts = sparse_kpts[0].cpu().numpy()
|
231 |
+
if scene not in seg_results.keys():
|
232 |
+
seg_results[scene] = {
|
233 |
+
'day': {
|
234 |
+
'prec': [],
|
235 |
+
'iou': [],
|
236 |
+
'kpts': [],
|
237 |
+
},
|
238 |
+
'night': {
|
239 |
+
'prec': [],
|
240 |
+
'iou': [],
|
241 |
+
'kpts': [],
|
242 |
+
|
243 |
+
}
|
244 |
+
}
|
245 |
+
if fn.find('night') >= 0:
|
246 |
+
seg_results[scene]['night']['prec'].append(prec)
|
247 |
+
seg_results[scene]['night']['iou'].append(iou)
|
248 |
+
seg_results[scene]['night']['kpts'].append(kpts.shape[0])
|
249 |
+
else:
|
250 |
+
seg_results[scene]['day']['prec'].append(prec)
|
251 |
+
seg_results[scene]['day']['iou'].append(iou)
|
252 |
+
seg_results[scene]['day']['kpts'].append(kpts.shape[0])
|
253 |
+
|
254 |
+
print_text = 'name: {:s}, kpts: {:d}, iou: {:.3f}, prec: {:.3f}'.format(fn, kpts.shape[0], iou,
|
255 |
+
prec)
|
256 |
+
print(print_text)
|
257 |
+
# '''
|
258 |
+
|
259 |
+
t_feat = curr_frame.time_feat
|
260 |
+
t_rec = curr_frame.time_rec
|
261 |
+
t_loc = curr_frame.time_loc
|
262 |
+
t_ref = curr_frame.time_ref
|
263 |
+
t_total = t_feat + t_rec + t_loc + t_ref
|
264 |
+
time_results['feat'].append(t_feat)
|
265 |
+
time_results['rec'].append(t_rec)
|
266 |
+
time_results['loc'].append(t_loc)
|
267 |
+
time_results['ref'].append(t_ref)
|
268 |
+
time_results['total'].append(t_total)
|
269 |
+
|
270 |
+
poses[scene + '/' + fn] = (curr_frame.qvec, curr_frame.tvec)
|
271 |
+
q_err, t_err = curr_frame.compute_pose_error()
|
272 |
+
if q_err <= 5 and t_err <= 0.05:
|
273 |
+
err_ths_cnt[0] = err_ths_cnt[0] + 1
|
274 |
+
if q_err <= 2 and t_err <= 0.25:
|
275 |
+
err_ths_cnt[1] = err_ths_cnt[1] + 1
|
276 |
+
if q_err <= 5 and t_err <= 0.5:
|
277 |
+
err_ths_cnt[2] = err_ths_cnt[2] + 1
|
278 |
+
if q_err <= 10 and t_err <= 5:
|
279 |
+
err_ths_cnt[3] = err_ths_cnt[3] + 1
|
280 |
+
|
281 |
+
if success:
|
282 |
+
success_cases.append(scene + '/' + fn)
|
283 |
+
print_text = 'qname: {:s} localization success {:d}/{:d}, q_err: {:.2f}, t_err: {:.2f}, {:d}/{:d}/{:d}/{:d}/{:d}, time: {:.2f}/{:.2f}/{:.2f}/{:.2f}/{:.2f}'.format(
|
284 |
+
scene + '/' + fn, len(success_cases), n_total, q_err, t_err, err_ths_cnt[0],
|
285 |
+
err_ths_cnt[1],
|
286 |
+
err_ths_cnt[2],
|
287 |
+
err_ths_cnt[3],
|
288 |
+
n_total,
|
289 |
+
t_feat, t_rec, t_loc, t_ref, t_total
|
290 |
+
)
|
291 |
+
else:
|
292 |
+
failed_cases.append(scene + '/' + fn)
|
293 |
+
print_text = 'qname: {:s} localization fail {:d}/{:d}, q_err: {:.2f}, t_err: {:.2f}, {:d}/{:d}/{:d}/{:d}/{:d}, time: {:.2f}/{:.2f}/{:.2f}/{:.2f}/{:.2f}'.format(
|
294 |
+
scene + '/' + fn, len(failed_cases), n_total, q_err, t_err, err_ths_cnt[0],
|
295 |
+
err_ths_cnt[1],
|
296 |
+
err_ths_cnt[2],
|
297 |
+
err_ths_cnt[3],
|
298 |
+
n_total, t_feat, t_rec, t_loc, t_ref, t_total)
|
299 |
+
print(print_text)
|
third_party/pram/localization/loc_by_rec_online.py
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> loc_by_rec
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 08/02/2024 15:26
|
7 |
+
=================================================='''
|
8 |
+
import torch
|
9 |
+
import pycolmap
|
10 |
+
from localization.multimap3d import MultiMap3D
|
11 |
+
from localization.frame import Frame
|
12 |
+
import yaml, cv2, time
|
13 |
+
import numpy as np
|
14 |
+
import os.path as osp
|
15 |
+
import threading
|
16 |
+
from recognition.vis_seg import vis_seg_point, generate_color_dic
|
17 |
+
from tools.common import resize_img
|
18 |
+
from localization.viewer import Viewer
|
19 |
+
from localization.tracker import Tracker
|
20 |
+
from localization.utils import read_query_info
|
21 |
+
from tools.common import puttext_with_background
|
22 |
+
|
23 |
+
|
24 |
+
def loc_by_rec_online(rec_model, config, local_feat, img_transforms=None):
|
25 |
+
seg_color = generate_color_dic(n_seg=2000)
|
26 |
+
dataset_path = config['dataset_path']
|
27 |
+
show = config['localization']['show']
|
28 |
+
if show:
|
29 |
+
cv2.namedWindow('img', cv2.WINDOW_NORMAL)
|
30 |
+
|
31 |
+
locMap = MultiMap3D(config=config, save_dir=None)
|
32 |
+
if config['dataset'][0] in ['Aachen']:
|
33 |
+
viewer_config = {'scene': 'outdoor',
|
34 |
+
'image_size_indoor': 4,
|
35 |
+
'image_line_width_indoor': 8, }
|
36 |
+
elif config['dataset'][0] in ['C']:
|
37 |
+
viewer_config = {'scene': 'outdoor'}
|
38 |
+
elif config['dataset'][0] in ['12Scenes', '7Scenes']:
|
39 |
+
viewer_config = {'scene': 'indoor', }
|
40 |
+
else:
|
41 |
+
viewer_config = {'scene': 'outdoor',
|
42 |
+
'image_size_indoor': 0.4,
|
43 |
+
'image_line_width_indoor': 2, }
|
44 |
+
# start viewer
|
45 |
+
mViewer = Viewer(locMap=locMap, seg_color=seg_color, config=viewer_config)
|
46 |
+
mViewer.refinement = locMap.do_refinement
|
47 |
+
# locMap.viewer = mViewer
|
48 |
+
viewer_thread = threading.Thread(target=mViewer.run)
|
49 |
+
viewer_thread.start()
|
50 |
+
|
51 |
+
# start tracker
|
52 |
+
mTracker = Tracker(locMap=locMap, matcher=locMap.matcher, config=config)
|
53 |
+
|
54 |
+
dataset_name = config['dataset'][0]
|
55 |
+
all_scene_query_info = {}
|
56 |
+
with open(osp.join(config['config_path'], '{:s}.yaml'.format(dataset_name)), 'r') as f:
|
57 |
+
scene_config = yaml.load(f, Loader=yaml.Loader)
|
58 |
+
|
59 |
+
# multiple scenes in a single dataset
|
60 |
+
err_ths_cnt = [0, 0, 0, 0]
|
61 |
+
|
62 |
+
show_time = -1
|
63 |
+
scenes = scene_config['scenes']
|
64 |
+
n_total = 0
|
65 |
+
for scene in scenes:
|
66 |
+
if len(config['localization']['loc_scene_name']) > 0:
|
67 |
+
if scene not in config['localization']['loc_scene_name']:
|
68 |
+
continue
|
69 |
+
|
70 |
+
query_path = osp.join(config['dataset_path'], dataset_name, scene, scene_config[scene]['query_path'])
|
71 |
+
query_info = read_query_info(query_fn=query_path)
|
72 |
+
all_scene_query_info[dataset_name + '/' + scene] = query_info
|
73 |
+
image_path = osp.join(dataset_path, dataset_name, scene)
|
74 |
+
for fn in sorted(query_info.keys()):
|
75 |
+
# for fn in sorted(query_info.keys())[880:][::5]: # darwinRGB-loc-outdoor-aligned
|
76 |
+
# for fn in sorted(query_info.keys())[3161:][::5]: # darwinRGB-loc-indoor-aligned
|
77 |
+
# for fn in sorted(query_info.keys())[2840:][::5]: # darwinRGB-loc-indoor-aligned
|
78 |
+
|
79 |
+
# for fn in sorted(query_info.keys())[2100:][::5]: # darwinRGB-loc-outdoor
|
80 |
+
# for fn in sorted(query_info.keys())[4360:][::5]: # darwinRGB-loc-indoor
|
81 |
+
# for fn in sorted(query_info.keys())[1380:]: # Cam-Church
|
82 |
+
# for fn in sorted(query_info.keys())[::5]: #ACUED-test2
|
83 |
+
# for fn in sorted(query_info.keys())[1260:]: # jesus aligned
|
84 |
+
# for fn in sorted(query_info.keys())[1260:]: # jesus aligned
|
85 |
+
# for fn in sorted(query_info.keys())[4850:]:
|
86 |
+
img = cv2.imread(osp.join(image_path, fn)) # BGR
|
87 |
+
|
88 |
+
camera_model, width, height, params = all_scene_query_info[dataset_name + '/' + scene][fn]
|
89 |
+
# camera = Camera(id=-1, model=camera_model, width=width, height=height, params=params)
|
90 |
+
camera = pycolmap.Camera(model=camera_model, width=int(width), height=int(height), params=params)
|
91 |
+
curr_frame = Frame(image=img, camera=camera, id=0, name=fn, scene_name=dataset_name + '/' + scene)
|
92 |
+
gt_sub_map = locMap.sub_maps[curr_frame.scene_name]
|
93 |
+
if gt_sub_map.gt_poses is not None and curr_frame.name in gt_sub_map.gt_poses.keys():
|
94 |
+
curr_frame.gt_qvec = gt_sub_map.gt_poses[curr_frame.name]['qvec']
|
95 |
+
curr_frame.gt_tvec = gt_sub_map.gt_poses[curr_frame.name]['tvec']
|
96 |
+
|
97 |
+
with torch.no_grad():
|
98 |
+
if config['image_dim'] == 1:
|
99 |
+
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
100 |
+
img_cuda = torch.from_numpy(img_gray / 255)[None].cuda().float()
|
101 |
+
else:
|
102 |
+
img_cuda = torch.from_numpy(img / 255).permute(2, 0, 1).cuda().float()
|
103 |
+
if img_transforms is not None:
|
104 |
+
img_cuda = img_transforms(img_cuda)[None]
|
105 |
+
else:
|
106 |
+
img_cuda = img_cuda[None]
|
107 |
+
|
108 |
+
t_start = time.time()
|
109 |
+
encoder_out = local_feat.extract_local_global(data={'image': img_cuda},
|
110 |
+
config={'min_keypoints': 128,
|
111 |
+
'max_keypoints': config['eval_max_keypoints'],
|
112 |
+
}
|
113 |
+
)
|
114 |
+
t_feat = time.time() - t_start
|
115 |
+
# global_descriptors_cuda = encoder_out['global_descriptors']
|
116 |
+
scores_cuda = encoder_out['scores'][0][None]
|
117 |
+
kpts_cuda = encoder_out['keypoints'][0][None]
|
118 |
+
descriptors_cuda = encoder_out['descriptors'][0][None].permute(0, 2, 1)
|
119 |
+
|
120 |
+
curr_frame.add_keypoints(keypoints=np.hstack([kpts_cuda[0].cpu().numpy(),
|
121 |
+
scores_cuda[0].cpu().numpy().reshape(-1, 1)]),
|
122 |
+
descriptors=descriptors_cuda[0].cpu().numpy())
|
123 |
+
curr_frame.time_feat = t_feat
|
124 |
+
|
125 |
+
t_start = time.time()
|
126 |
+
_, seg_descriptors = local_feat.sample(score_map=encoder_out['score_map'],
|
127 |
+
semi_descs=encoder_out['mid_features'],
|
128 |
+
kpts=kpts_cuda[0],
|
129 |
+
norm_desc=config['norm_desc'])
|
130 |
+
rec_out = rec_model({'scores': scores_cuda,
|
131 |
+
'seg_descriptors': seg_descriptors[None].permute(0, 2, 1),
|
132 |
+
'keypoints': kpts_cuda,
|
133 |
+
'image': img_cuda})
|
134 |
+
t_rec = time.time() - t_start
|
135 |
+
curr_frame.time_rec = t_rec
|
136 |
+
|
137 |
+
pred = {
|
138 |
+
'scores': scores_cuda,
|
139 |
+
'keypoints': kpts_cuda,
|
140 |
+
'descriptors': descriptors_cuda,
|
141 |
+
# 'global_descriptors': global_descriptors_cuda,
|
142 |
+
'image_size': np.array([img.shape[1], img.shape[0]])[None],
|
143 |
+
}
|
144 |
+
|
145 |
+
pred = {**pred, **rec_out}
|
146 |
+
pred_seg = torch.max(pred['prediction'], dim=2)[1] # [B, N, C]
|
147 |
+
|
148 |
+
pred_seg = pred_seg[0].cpu().numpy()
|
149 |
+
kpts = kpts_cuda[0].cpu().numpy()
|
150 |
+
segmentations = pred['prediction'][0] # .cpu().numpy() # [N, C]
|
151 |
+
curr_frame.add_segmentations(segmentations=segmentations,
|
152 |
+
filtering_threshold=config['localization']['pre_filtering_th'])
|
153 |
+
|
154 |
+
img_pred_seg = vis_seg_point(img=img, kpts=curr_frame.keypoints,
|
155 |
+
segs=curr_frame.seg_ids + 1, seg_color=seg_color, radius=9)
|
156 |
+
show_text = 'kpts: {:d}'.format(kpts.shape[0])
|
157 |
+
img_pred_seg = cv2.putText(img=img_pred_seg,
|
158 |
+
text=show_text,
|
159 |
+
org=(50, 30),
|
160 |
+
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
|
161 |
+
fontScale=1, color=(0, 0, 255),
|
162 |
+
thickness=2, lineType=cv2.LINE_AA)
|
163 |
+
curr_frame.image_rec = img_pred_seg
|
164 |
+
|
165 |
+
if show:
|
166 |
+
img_text = puttext_with_background(image=img, text='Press C - continue | S - pause | Q - exit',
|
167 |
+
org=(30, 50),
|
168 |
+
bg_color=(255, 255, 255),
|
169 |
+
text_color=(0, 0, 255),
|
170 |
+
fontScale=1, thickness=2)
|
171 |
+
cv2.imshow('img', img_text)
|
172 |
+
key = cv2.waitKey(show_time)
|
173 |
+
if key == ord('q'):
|
174 |
+
exit(0)
|
175 |
+
elif key == ord('s'):
|
176 |
+
show_time = -1
|
177 |
+
elif key == ord('c'):
|
178 |
+
show_time = 1
|
179 |
+
|
180 |
+
# Step1: do tracker first
|
181 |
+
success = not mTracker.lost and mViewer.tracking
|
182 |
+
if success:
|
183 |
+
success = mTracker.run(frame=curr_frame)
|
184 |
+
if success:
|
185 |
+
mViewer.update(curr_frame=curr_frame)
|
186 |
+
|
187 |
+
if not success:
|
188 |
+
# success = locMap.run(q_frame=curr_frame, q_segs=segmentations)
|
189 |
+
success = locMap.run(q_frame=curr_frame)
|
190 |
+
if success:
|
191 |
+
mViewer.update(curr_frame=curr_frame)
|
192 |
+
|
193 |
+
if success:
|
194 |
+
curr_frame.update_point3ds()
|
195 |
+
if mViewer.tracking:
|
196 |
+
mTracker.lost = False
|
197 |
+
mTracker.last_frame = curr_frame
|
198 |
+
|
199 |
+
time.sleep(50 / 1000)
|
200 |
+
locMap.do_refinement = mViewer.refinement
|
201 |
+
|
202 |
+
n_total = n_total + 1
|
203 |
+
q_err, t_err = curr_frame.compute_pose_error()
|
204 |
+
if q_err <= 5 and t_err <= 0.05:
|
205 |
+
err_ths_cnt[0] = err_ths_cnt[0] + 1
|
206 |
+
if q_err <= 2 and t_err <= 0.25:
|
207 |
+
err_ths_cnt[1] = err_ths_cnt[1] + 1
|
208 |
+
if q_err <= 5 and t_err <= 0.5:
|
209 |
+
err_ths_cnt[2] = err_ths_cnt[2] + 1
|
210 |
+
if q_err <= 10 and t_err <= 5:
|
211 |
+
err_ths_cnt[3] = err_ths_cnt[3] + 1
|
212 |
+
time_total = curr_frame.time_feat + curr_frame.time_rec + curr_frame.time_loc + curr_frame.time_ref
|
213 |
+
print_text = 'qname: {:s} localization {:b}, q_err: {:.2f}, t_err: {:.2f}, {:d}/{:d}/{:d}/{:d}/{:d}, time: {:.2f}/{:.2f}/{:.2f}/{:.2f}/{:.2f}'.format(
|
214 |
+
scene + '/' + fn, success, q_err, t_err,
|
215 |
+
err_ths_cnt[0],
|
216 |
+
err_ths_cnt[1],
|
217 |
+
err_ths_cnt[2],
|
218 |
+
err_ths_cnt[3],
|
219 |
+
n_total,
|
220 |
+
curr_frame.time_feat, curr_frame.time_rec, curr_frame.time_loc, curr_frame.time_ref, time_total
|
221 |
+
)
|
222 |
+
print(print_text)
|
223 |
+
|
224 |
+
mViewer.terminate()
|
225 |
+
viewer_thread.join()
|
third_party/pram/localization/localizer.py
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> hloc
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 07/02/2024 16:45
|
7 |
+
=================================================='''
|
8 |
+
|
9 |
+
import os
|
10 |
+
import os.path as osp
|
11 |
+
from tqdm import tqdm
|
12 |
+
import argparse
|
13 |
+
import time
|
14 |
+
import logging
|
15 |
+
import h5py
|
16 |
+
import numpy as np
|
17 |
+
from pathlib import Path
|
18 |
+
from colmap_utils.read_write_model import read_model
|
19 |
+
from colmap_utils.parsers import parse_image_lists_with_intrinsics
|
20 |
+
# localization
|
21 |
+
from localization.match_features_batch import confs
|
22 |
+
from localization.base_model import dynamic_load
|
23 |
+
from localization import matchers
|
24 |
+
from localization.utils import compute_pose_error, read_gt_pose, read_retrieval_results
|
25 |
+
from localization.pose_estimator import pose_estimator_hloc, pose_estimator_iterative
|
26 |
+
|
27 |
+
|
28 |
+
def run(args):
|
29 |
+
if args.gt_pose_fn is not None:
|
30 |
+
gt_poses = read_gt_pose(path=args.gt_pose_fn)
|
31 |
+
else:
|
32 |
+
gt_poses = {}
|
33 |
+
retrievals = read_retrieval_results(args.retrieval)
|
34 |
+
|
35 |
+
save_root = args.save_root # path to save
|
36 |
+
os.makedirs(save_root, exist_ok=True)
|
37 |
+
matcher_name = args.matcher_method # matching method
|
38 |
+
print('matcher: ', confs[args.matcher_method]['model']['name'])
|
39 |
+
Model = dynamic_load(matchers, confs[args.matcher_method]['model']['name'])
|
40 |
+
matcher = Model(confs[args.matcher_method]['model']).eval().cuda()
|
41 |
+
|
42 |
+
local_feat_name = args.features.as_posix().split("/")[-1].split(".")[0] # name of local features
|
43 |
+
save_fn = '{:s}_{:s}'.format(local_feat_name, matcher_name)
|
44 |
+
if args.use_hloc:
|
45 |
+
save_fn = 'hloc_' + save_fn
|
46 |
+
save_fn = osp.join(save_root, save_fn)
|
47 |
+
|
48 |
+
queries = parse_image_lists_with_intrinsics(args.queries)
|
49 |
+
_, db_images, points3D = read_model(str(args.reference_sfm), '.bin')
|
50 |
+
db_name_to_id = {image.name: i for i, image in db_images.items()}
|
51 |
+
feature_file = h5py.File(args.features, 'r')
|
52 |
+
|
53 |
+
tag = ''
|
54 |
+
if args.do_covisible_opt:
|
55 |
+
tag = tag + "_o" + str(int(args.obs_thresh)) + 'op' + str(int(args.covisibility_frame))
|
56 |
+
tag = tag + "th" + str(int(args.opt_thresh))
|
57 |
+
if args.iters > 0:
|
58 |
+
tag = tag + "i" + str(int(args.iters))
|
59 |
+
|
60 |
+
log_fn = save_fn + tag
|
61 |
+
vis_dir = save_fn + tag
|
62 |
+
results = save_fn + tag
|
63 |
+
|
64 |
+
full_log_fn = log_fn + '_full.log'
|
65 |
+
loc_log_fn = log_fn + '_loc.npy'
|
66 |
+
results = Path(results + '.txt')
|
67 |
+
vis_dir = Path(vis_dir)
|
68 |
+
if vis_dir is not None:
|
69 |
+
Path(vis_dir).mkdir(exist_ok=True)
|
70 |
+
print("save_fn: ", log_fn)
|
71 |
+
|
72 |
+
logging.info('Starting localization...')
|
73 |
+
poses = {}
|
74 |
+
failed_cases = []
|
75 |
+
n_total = 0
|
76 |
+
n_failed = 0
|
77 |
+
full_log_info = ''
|
78 |
+
loc_results = {}
|
79 |
+
|
80 |
+
error_ths = ((0.25, 2), (0.5, 5), (5, 10))
|
81 |
+
success = [0, 0, 0]
|
82 |
+
total_loc_time = []
|
83 |
+
|
84 |
+
for qname, qinfo in tqdm(queries):
|
85 |
+
kpq = feature_file[qname]['keypoints'].__array__()
|
86 |
+
n_total += 1
|
87 |
+
time_start = time.time()
|
88 |
+
|
89 |
+
if qname in retrievals.keys():
|
90 |
+
cans = retrievals[qname]
|
91 |
+
db_ids = [db_name_to_id[v] for v in cans]
|
92 |
+
else:
|
93 |
+
cans = []
|
94 |
+
db_ids = []
|
95 |
+
time_coarse = time.time()
|
96 |
+
|
97 |
+
if args.use_hloc:
|
98 |
+
output = pose_estimator_hloc(qname=qname, qinfo=qinfo, db_ids=db_ids, db_images=db_images,
|
99 |
+
points3D=points3D,
|
100 |
+
feature_file=feature_file,
|
101 |
+
thresh=args.ransac_thresh,
|
102 |
+
image_dir=args.image_dir,
|
103 |
+
matcher=matcher,
|
104 |
+
log_info='',
|
105 |
+
query_img_prefix='',
|
106 |
+
db_img_prefix='')
|
107 |
+
else: # should be faster and more accurate than hloc
|
108 |
+
t_start = time.time()
|
109 |
+
output = pose_estimator_iterative(qname=qname,
|
110 |
+
qinfo=qinfo,
|
111 |
+
matcher=matcher,
|
112 |
+
db_ids=db_ids,
|
113 |
+
db_images=db_images,
|
114 |
+
points3D=points3D,
|
115 |
+
feature_file=feature_file,
|
116 |
+
thresh=args.ransac_thresh,
|
117 |
+
image_dir=args.image_dir,
|
118 |
+
do_covisibility_opt=args.do_covisible_opt,
|
119 |
+
covisibility_frame=args.covisibility_frame,
|
120 |
+
log_info='',
|
121 |
+
inlier_th=args.inlier_thresh,
|
122 |
+
obs_th=args.obs_thresh,
|
123 |
+
opt_th=args.opt_thresh,
|
124 |
+
gt_qvec=gt_poses[qname]['qvec'] if qname in gt_poses.keys() else None,
|
125 |
+
gt_tvec=gt_poses[qname]['tvec'] if qname in gt_poses.keys() else None,
|
126 |
+
query_img_prefix='',
|
127 |
+
db_img_prefix='database',
|
128 |
+
)
|
129 |
+
time_full = time.time()
|
130 |
+
|
131 |
+
qvec = output['qvec']
|
132 |
+
tvec = output['tvec']
|
133 |
+
loc_time = time_full - time_start
|
134 |
+
total_loc_time.append(loc_time)
|
135 |
+
|
136 |
+
poses[qname] = (qvec, tvec)
|
137 |
+
print_text = "All {:d}/{:d} failed cases, time[cs/fn]: {:.2f}/{:.2f}".format(
|
138 |
+
n_failed, n_total,
|
139 |
+
time_coarse - time_start,
|
140 |
+
time_full - time_coarse,
|
141 |
+
)
|
142 |
+
|
143 |
+
if qname in gt_poses.keys():
|
144 |
+
gt_qvec = gt_poses[qname]['qvec']
|
145 |
+
gt_tvec = gt_poses[qname]['tvec']
|
146 |
+
|
147 |
+
q_error, t_error = compute_pose_error(pred_qcw=qvec, pred_tcw=tvec, gt_qcw=gt_qvec, gt_tcw=gt_tvec)
|
148 |
+
|
149 |
+
for error_idx, th in enumerate(error_ths):
|
150 |
+
if t_error <= th[0] and q_error <= th[1]:
|
151 |
+
success[error_idx] += 1
|
152 |
+
print_text += (
|
153 |
+
', q_error:{:.2f} t_error:{:.2f} {:d}/{:d}/{:d}/{:d}, time: {:.2f}, {:d}pts'.format(q_error, t_error,
|
154 |
+
success[0],
|
155 |
+
success[1],
|
156 |
+
success[2], n_total,
|
157 |
+
loc_time,
|
158 |
+
kpq.shape[0]))
|
159 |
+
if output['num_inliers'] == 0:
|
160 |
+
failed_cases.append(qname)
|
161 |
+
|
162 |
+
loc_results[qname] = {
|
163 |
+
'keypoints_query': output['keypoints_query'],
|
164 |
+
'points3D_ids': output['points3D_ids'],
|
165 |
+
}
|
166 |
+
full_log_info = full_log_info + output['log_info']
|
167 |
+
full_log_info += (print_text + "\n")
|
168 |
+
print(print_text)
|
169 |
+
|
170 |
+
logs_path = f'{results}.failed'
|
171 |
+
with open(logs_path, 'w') as f:
|
172 |
+
for v in failed_cases:
|
173 |
+
print(v)
|
174 |
+
f.write(v + "\n")
|
175 |
+
|
176 |
+
logging.info(f'Localized {len(poses)} / {len(queries)} images.')
|
177 |
+
logging.info(f'Writing poses to {results}...')
|
178 |
+
# logging.info(f'Mean loc time: {np.mean(total_loc_time)}...')
|
179 |
+
print('Mean loc time: {:.2f}...'.format(np.mean(total_loc_time)))
|
180 |
+
with open(results, 'w') as f:
|
181 |
+
for q in poses:
|
182 |
+
qvec, tvec = poses[q]
|
183 |
+
qvec = ' '.join(map(str, qvec))
|
184 |
+
tvec = ' '.join(map(str, tvec))
|
185 |
+
name = q
|
186 |
+
f.write(f'{name} {qvec} {tvec}\n')
|
187 |
+
|
188 |
+
with open(full_log_fn, 'w') as f:
|
189 |
+
f.write(full_log_info)
|
190 |
+
|
191 |
+
np.save(loc_log_fn, loc_results)
|
192 |
+
print('Save logs to ', loc_log_fn)
|
193 |
+
logging.info('Done!')
|
194 |
+
|
195 |
+
|
196 |
+
if __name__ == '__main__':
|
197 |
+
parser = argparse.ArgumentParser()
|
198 |
+
parser.add_argument('--image_dir', type=str, required=True)
|
199 |
+
parser.add_argument('--dataset', type=str, required=True)
|
200 |
+
parser.add_argument('--reference_sfm', type=Path, required=True)
|
201 |
+
parser.add_argument('--queries', type=Path, required=True)
|
202 |
+
parser.add_argument('--features', type=Path, required=True)
|
203 |
+
parser.add_argument('--ransac_thresh', type=float, default=12)
|
204 |
+
parser.add_argument('--covisibility_frame', type=int, default=50)
|
205 |
+
parser.add_argument('--do_covisible_opt', action='store_true')
|
206 |
+
parser.add_argument('--use_hloc', action='store_true')
|
207 |
+
parser.add_argument('--matcher_method', type=str, default="NNM")
|
208 |
+
parser.add_argument('--inlier_thresh', type=int, default=50)
|
209 |
+
parser.add_argument('--obs_thresh', type=float, default=3)
|
210 |
+
parser.add_argument('--opt_thresh', type=float, default=12)
|
211 |
+
parser.add_argument('--save_root', type=str, required=True)
|
212 |
+
parser.add_argument('--retrieval', type=Path, default=None)
|
213 |
+
parser.add_argument('--gt_pose_fn', type=str, default=None)
|
214 |
+
|
215 |
+
args = parser.parse_args()
|
216 |
+
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
|
217 |
+
run(args=args)
|
third_party/pram/localization/match_features.py
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import torch
|
3 |
+
from pathlib import Path
|
4 |
+
import h5py
|
5 |
+
import logging
|
6 |
+
from tqdm import tqdm
|
7 |
+
import pprint
|
8 |
+
|
9 |
+
import localization.matchers as matchers
|
10 |
+
from localization.base_model import dynamic_load
|
11 |
+
from colmap_utils.parsers import names_to_pair
|
12 |
+
|
13 |
+
confs = {
|
14 |
+
'gm': {
|
15 |
+
'output': 'gm',
|
16 |
+
'model': {
|
17 |
+
'name': 'gm',
|
18 |
+
'weight_path': 'weights/imp_gm.900.pth',
|
19 |
+
'sinkhorn_iterations': 20,
|
20 |
+
},
|
21 |
+
},
|
22 |
+
'gml': {
|
23 |
+
'output': 'gml',
|
24 |
+
'model': {
|
25 |
+
'name': 'gml',
|
26 |
+
'weight_path': 'weights/imp_gml.920.pth',
|
27 |
+
'sinkhorn_iterations': 20,
|
28 |
+
},
|
29 |
+
},
|
30 |
+
|
31 |
+
'adagml': {
|
32 |
+
'output': 'adagml',
|
33 |
+
'model': {
|
34 |
+
'name': 'adagml',
|
35 |
+
'weight_path': 'weights/imp_adagml.80.pth',
|
36 |
+
'sinkhorn_iterations': 20,
|
37 |
+
},
|
38 |
+
},
|
39 |
+
|
40 |
+
'superglue': {
|
41 |
+
'output': 'superglue',
|
42 |
+
'model': {
|
43 |
+
'name': 'superglue',
|
44 |
+
'weights': 'outdoor',
|
45 |
+
'sinkhorn_iterations': 20,
|
46 |
+
'weight_path': 'weights/superglue_outdoor.pth',
|
47 |
+
},
|
48 |
+
},
|
49 |
+
'NNM': {
|
50 |
+
'output': 'NNM',
|
51 |
+
'model': {
|
52 |
+
'name': 'nearest_neighbor',
|
53 |
+
'do_mutual_check': True,
|
54 |
+
'distance_threshold': None,
|
55 |
+
},
|
56 |
+
},
|
57 |
+
}
|
58 |
+
|
59 |
+
|
60 |
+
@torch.no_grad()
|
61 |
+
def main(conf, pairs, features, export_dir, exhaustive=False):
|
62 |
+
logging.info('Matching local features with configuration:'
|
63 |
+
f'\n{pprint.pformat(conf)}')
|
64 |
+
|
65 |
+
feature_path = Path(export_dir, features + '.h5')
|
66 |
+
assert feature_path.exists(), feature_path
|
67 |
+
feature_file = h5py.File(str(feature_path), 'r')
|
68 |
+
pairs_name = pairs.stem
|
69 |
+
if not exhaustive:
|
70 |
+
assert pairs.exists(), pairs
|
71 |
+
with open(pairs, 'r') as f:
|
72 |
+
pair_list = f.read().rstrip('\n').split('\n')
|
73 |
+
elif exhaustive:
|
74 |
+
logging.info(f'Writing exhaustive match pairs to {pairs}.')
|
75 |
+
assert not pairs.exists(), pairs
|
76 |
+
|
77 |
+
# get the list of images from the feature file
|
78 |
+
images = []
|
79 |
+
feature_file.visititems(
|
80 |
+
lambda name, obj: images.append(obj.parent.name.strip('/'))
|
81 |
+
if isinstance(obj, h5py.Dataset) else None)
|
82 |
+
images = list(set(images))
|
83 |
+
|
84 |
+
pair_list = [' '.join((images[i], images[j]))
|
85 |
+
for i in range(len(images)) for j in range(i)]
|
86 |
+
with open(str(pairs), 'w') as f:
|
87 |
+
f.write('\n'.join(pair_list))
|
88 |
+
|
89 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
90 |
+
Model = dynamic_load(matchers, conf['model']['name'])
|
91 |
+
model = Model(conf['model']).eval().to(device)
|
92 |
+
|
93 |
+
match_name = f'{features}-{conf["output"]}-{pairs_name}'
|
94 |
+
match_path = Path(export_dir, match_name + '.h5')
|
95 |
+
|
96 |
+
match_file = h5py.File(str(match_path), 'a')
|
97 |
+
|
98 |
+
matched = set()
|
99 |
+
for pair in tqdm(pair_list, smoothing=.1):
|
100 |
+
name0, name1 = pair.split(' ')
|
101 |
+
pair = names_to_pair(name0, name1)
|
102 |
+
|
103 |
+
# Avoid to recompute duplicates to save time
|
104 |
+
if len({(name0, name1), (name1, name0)} & matched) \
|
105 |
+
or pair in match_file:
|
106 |
+
continue
|
107 |
+
|
108 |
+
data = {}
|
109 |
+
feats0, feats1 = feature_file[name0], feature_file[name1]
|
110 |
+
for k in feats1.keys():
|
111 |
+
# data[k + '0'] = feats0[k].__array__()
|
112 |
+
if k == 'descriptors':
|
113 |
+
data[k + '0'] = feats0[k][()].transpose() # [N D]
|
114 |
+
else:
|
115 |
+
data[k + '0'] = feats0[k][()]
|
116 |
+
for k in feats1.keys():
|
117 |
+
# data[k + '1'] = feats1[k].__array__()
|
118 |
+
# data[k + '1'] = feats1[k][()].transpose() # [N D]
|
119 |
+
if k == 'descriptors':
|
120 |
+
data[k + '1'] = feats1[k][()].transpose() # [N D]
|
121 |
+
else:
|
122 |
+
data[k + '1'] = feats1[k][()]
|
123 |
+
data = {k: torch.from_numpy(v)[None].float().to(device)
|
124 |
+
for k, v in data.items()}
|
125 |
+
|
126 |
+
# some matchers might expect an image but only use its size
|
127 |
+
data['image0'] = torch.empty((1, 1,) + tuple(feats0['image_size'])[::-1])
|
128 |
+
data['image1'] = torch.empty((1, 1,) + tuple(feats1['image_size'])[::-1])
|
129 |
+
|
130 |
+
pred = model(data)
|
131 |
+
grp = match_file.create_group(pair)
|
132 |
+
matches = pred['matches0'][0].cpu().short().numpy()
|
133 |
+
grp.create_dataset('matches0', data=matches)
|
134 |
+
|
135 |
+
if 'matching_scores0' in pred:
|
136 |
+
scores = pred['matching_scores0'][0].cpu().half().numpy()
|
137 |
+
grp.create_dataset('matching_scores0', data=scores)
|
138 |
+
|
139 |
+
matched |= {(name0, name1), (name1, name0)}
|
140 |
+
|
141 |
+
match_file.close()
|
142 |
+
logging.info('Finished exporting matches.')
|
143 |
+
|
144 |
+
return match_path
|
145 |
+
|
146 |
+
|
147 |
+
if __name__ == '__main__':
|
148 |
+
parser = argparse.ArgumentParser()
|
149 |
+
parser.add_argument('--export_dir', type=Path, required=True)
|
150 |
+
parser.add_argument('--features', type=str, required=True)
|
151 |
+
parser.add_argument('--pairs', type=Path, required=True)
|
152 |
+
parser.add_argument('--conf', type=str, required=True, choices=list(confs.keys()))
|
153 |
+
parser.add_argument('--exhaustive', action='store_true')
|
154 |
+
args = parser.parse_args()
|
155 |
+
main(confs[args.conf], args.pairs, args.features, args.export_dir,
|
156 |
+
exhaustive=args.exhaustive)
|
third_party/pram/localization/match_features_batch.py
ADDED
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import torch
|
3 |
+
from pathlib import Path
|
4 |
+
import h5py
|
5 |
+
import logging
|
6 |
+
from tqdm import tqdm
|
7 |
+
import pprint
|
8 |
+
from queue import Queue
|
9 |
+
from threading import Thread
|
10 |
+
from functools import partial
|
11 |
+
from typing import Dict, List, Optional, Tuple, Union
|
12 |
+
|
13 |
+
import localization.matchers as matchers
|
14 |
+
from localization.base_model import dynamic_load
|
15 |
+
from colmap_utils.parsers import names_to_pair, names_to_pair_old, parse_retrieval
|
16 |
+
|
17 |
+
confs = {
|
18 |
+
'gm': {
|
19 |
+
'output': 'gm',
|
20 |
+
'model': {
|
21 |
+
'name': 'gm',
|
22 |
+
'weight_path': 'weights/imp_gm.900.pth',
|
23 |
+
'sinkhorn_iterations': 20,
|
24 |
+
},
|
25 |
+
},
|
26 |
+
'gml': {
|
27 |
+
'output': 'gml',
|
28 |
+
'model': {
|
29 |
+
'name': 'gml',
|
30 |
+
'weight_path': 'weights/imp_gml.920.pth',
|
31 |
+
'sinkhorn_iterations': 20,
|
32 |
+
},
|
33 |
+
},
|
34 |
+
|
35 |
+
'adagml': {
|
36 |
+
'output': 'adagml',
|
37 |
+
'model': {
|
38 |
+
'name': 'adagml',
|
39 |
+
'weight_path': 'weights/imp_adagml.80.pth',
|
40 |
+
'sinkhorn_iterations': 20,
|
41 |
+
},
|
42 |
+
},
|
43 |
+
|
44 |
+
'superglue': {
|
45 |
+
'output': 'superglue',
|
46 |
+
'model': {
|
47 |
+
'name': 'superglue',
|
48 |
+
'weights': 'outdoor',
|
49 |
+
'sinkhorn_iterations': 20,
|
50 |
+
'weight_path': 'weights/superglue_outdoor.pth',
|
51 |
+
},
|
52 |
+
},
|
53 |
+
'NNM': {
|
54 |
+
'output': 'NNM',
|
55 |
+
'model': {
|
56 |
+
'name': 'nearest_neighbor',
|
57 |
+
'do_mutual_check': True,
|
58 |
+
'distance_threshold': None,
|
59 |
+
},
|
60 |
+
},
|
61 |
+
}
|
62 |
+
|
63 |
+
|
64 |
+
class WorkQueue:
|
65 |
+
def __init__(self, work_fn, num_threads=1):
|
66 |
+
self.queue = Queue(num_threads)
|
67 |
+
self.threads = [
|
68 |
+
Thread(target=self.thread_fn, args=(work_fn,)) for _ in range(num_threads)
|
69 |
+
]
|
70 |
+
for thread in self.threads:
|
71 |
+
thread.start()
|
72 |
+
|
73 |
+
def join(self):
|
74 |
+
for thread in self.threads:
|
75 |
+
self.queue.put(None)
|
76 |
+
for thread in self.threads:
|
77 |
+
thread.join()
|
78 |
+
|
79 |
+
def thread_fn(self, work_fn):
|
80 |
+
item = self.queue.get()
|
81 |
+
while item is not None:
|
82 |
+
work_fn(item)
|
83 |
+
item = self.queue.get()
|
84 |
+
|
85 |
+
def put(self, data):
|
86 |
+
self.queue.put(data)
|
87 |
+
|
88 |
+
|
89 |
+
class FeaturePairsDataset(torch.utils.data.Dataset):
|
90 |
+
def __init__(self, pairs, feature_path_q, feature_path_r):
|
91 |
+
self.pairs = pairs
|
92 |
+
self.feature_path_q = feature_path_q
|
93 |
+
self.feature_path_r = feature_path_r
|
94 |
+
|
95 |
+
def __getitem__(self, idx):
|
96 |
+
name0, name1 = self.pairs[idx]
|
97 |
+
data = {}
|
98 |
+
with h5py.File(self.feature_path_q, "r") as fd:
|
99 |
+
grp = fd[name0]
|
100 |
+
for k, v in grp.items():
|
101 |
+
data[k + "0"] = torch.from_numpy(v.__array__()).float()
|
102 |
+
if k == 'descriptors':
|
103 |
+
data[k + '0'] = data[k + '0'].t()
|
104 |
+
# some matchers might expect an image but only use its size
|
105 |
+
data["image0"] = torch.empty((1,) + tuple(grp["image_size"])[::-1])
|
106 |
+
with h5py.File(self.feature_path_r, "r") as fd:
|
107 |
+
grp = fd[name1]
|
108 |
+
for k, v in grp.items():
|
109 |
+
data[k + "1"] = torch.from_numpy(v.__array__()).float()
|
110 |
+
if k == 'descriptors':
|
111 |
+
data[k + '1'] = data[k + '1'].t()
|
112 |
+
data["image1"] = torch.empty((1,) + tuple(grp["image_size"])[::-1])
|
113 |
+
return data
|
114 |
+
|
115 |
+
def __len__(self):
|
116 |
+
return len(self.pairs)
|
117 |
+
|
118 |
+
|
119 |
+
def writer_fn(inp, match_path):
|
120 |
+
pair, pred = inp
|
121 |
+
with h5py.File(str(match_path), "a", libver="latest") as fd:
|
122 |
+
if pair in fd:
|
123 |
+
del fd[pair]
|
124 |
+
grp = fd.create_group(pair)
|
125 |
+
matches = pred["matches0"][0].cpu().short().numpy()
|
126 |
+
grp.create_dataset("matches0", data=matches)
|
127 |
+
if "matching_scores0" in pred:
|
128 |
+
scores = pred["matching_scores0"][0].cpu().half().numpy()
|
129 |
+
grp.create_dataset("matching_scores0", data=scores)
|
130 |
+
|
131 |
+
|
132 |
+
def main(
|
133 |
+
conf: Dict,
|
134 |
+
pairs: Path,
|
135 |
+
features: Union[Path, str],
|
136 |
+
export_dir: Optional[Path] = None,
|
137 |
+
matches: Optional[Path] = None,
|
138 |
+
features_ref: Optional[Path] = None,
|
139 |
+
overwrite: bool = False,
|
140 |
+
) -> Path:
|
141 |
+
if isinstance(features, Path) or Path(features).exists():
|
142 |
+
features_q = features
|
143 |
+
if matches is None:
|
144 |
+
raise ValueError(
|
145 |
+
"Either provide both features and matches as Path" " or both as names."
|
146 |
+
)
|
147 |
+
else:
|
148 |
+
if export_dir is None:
|
149 |
+
raise ValueError(
|
150 |
+
"Provide an export_dir if features is not" f" a file path: {features}."
|
151 |
+
)
|
152 |
+
features_q = Path(export_dir, features + ".h5")
|
153 |
+
if matches is None:
|
154 |
+
matches = Path(export_dir, f'{features}-{conf["output"]}-{pairs.stem}.h5')
|
155 |
+
|
156 |
+
if features_ref is None:
|
157 |
+
features_ref = features_q
|
158 |
+
match_from_paths(conf, pairs, matches, features_q, features_ref, overwrite)
|
159 |
+
|
160 |
+
return matches
|
161 |
+
|
162 |
+
|
163 |
+
def find_unique_new_pairs(pairs_all: List[Tuple[str]], match_path: Path = None):
|
164 |
+
"""Avoid to recompute duplicates to save time."""
|
165 |
+
pairs = set()
|
166 |
+
for i, j in pairs_all:
|
167 |
+
if (j, i) not in pairs:
|
168 |
+
pairs.add((i, j))
|
169 |
+
pairs = list(pairs)
|
170 |
+
if match_path is not None and match_path.exists():
|
171 |
+
with h5py.File(str(match_path), "r", libver="latest") as fd:
|
172 |
+
pairs_filtered = []
|
173 |
+
for i, j in pairs:
|
174 |
+
if (
|
175 |
+
names_to_pair(i, j) in fd
|
176 |
+
or names_to_pair(j, i) in fd
|
177 |
+
or names_to_pair_old(i, j) in fd
|
178 |
+
or names_to_pair_old(j, i) in fd
|
179 |
+
):
|
180 |
+
continue
|
181 |
+
pairs_filtered.append((i, j))
|
182 |
+
return pairs_filtered
|
183 |
+
return pairs
|
184 |
+
|
185 |
+
|
186 |
+
@torch.no_grad()
|
187 |
+
def match_from_paths(
|
188 |
+
conf: Dict,
|
189 |
+
pairs_path: Path,
|
190 |
+
match_path: Path,
|
191 |
+
feature_path_q: Path,
|
192 |
+
feature_path_ref: Path,
|
193 |
+
overwrite: bool = False,
|
194 |
+
) -> Path:
|
195 |
+
logging.info(
|
196 |
+
"Matching local features with configuration:" f"\n{pprint.pformat(conf)}"
|
197 |
+
)
|
198 |
+
|
199 |
+
if not feature_path_q.exists():
|
200 |
+
raise FileNotFoundError(f"Query feature file {feature_path_q}.")
|
201 |
+
if not feature_path_ref.exists():
|
202 |
+
raise FileNotFoundError(f"Reference feature file {feature_path_ref}.")
|
203 |
+
match_path.parent.mkdir(exist_ok=True, parents=True)
|
204 |
+
|
205 |
+
assert pairs_path.exists(), pairs_path
|
206 |
+
pairs = parse_retrieval(pairs_path)
|
207 |
+
pairs = [(q, r) for q, rs in pairs.items() for r in rs]
|
208 |
+
pairs = find_unique_new_pairs(pairs, None if overwrite else match_path)
|
209 |
+
if len(pairs) == 0:
|
210 |
+
logging.info("Skipping the matching.")
|
211 |
+
return
|
212 |
+
|
213 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
214 |
+
Model = dynamic_load(matchers, conf["model"]["name"])
|
215 |
+
model = Model(conf["model"]).eval().to(device)
|
216 |
+
|
217 |
+
dataset = FeaturePairsDataset(pairs, feature_path_q, feature_path_ref)
|
218 |
+
loader = torch.utils.data.DataLoader(
|
219 |
+
dataset, num_workers=4, batch_size=1, shuffle=False, pin_memory=True
|
220 |
+
)
|
221 |
+
writer_queue = WorkQueue(partial(writer_fn, match_path=match_path), 5)
|
222 |
+
|
223 |
+
for idx, data in enumerate(tqdm(loader, smoothing=0.1)):
|
224 |
+
data = {
|
225 |
+
k: v if k.startswith("image") else v.to(device, non_blocking=True)
|
226 |
+
for k, v in data.items()
|
227 |
+
}
|
228 |
+
pred = model(data)
|
229 |
+
pair = names_to_pair(*pairs[idx])
|
230 |
+
writer_queue.put((pair, pred))
|
231 |
+
writer_queue.join()
|
232 |
+
logging.info("Finished exporting matches.")
|
233 |
+
|
234 |
+
|
235 |
+
if __name__ == '__main__':
|
236 |
+
parser = argparse.ArgumentParser()
|
237 |
+
parser.add_argument('--export_dir', type=Path, required=True)
|
238 |
+
parser.add_argument('--features', type=str, required=True)
|
239 |
+
parser.add_argument('--pairs', type=Path, required=True)
|
240 |
+
parser.add_argument('--conf', type=str, required=True, choices=list(confs.keys()))
|
241 |
+
args = parser.parse_args()
|
242 |
+
main(confs[args.conf], args.pairs, args.features, args.export_dir)
|
third_party/pram/localization/matchers/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
def get_matcher(matcher):
|
2 |
+
mod = __import__(f'{__name__}.{matcher}', fromlist=[''])
|
3 |
+
return getattr(mod, 'Model')
|
third_party/pram/localization/matchers/adagml.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
'''=================================================
|
3 |
+
@Project -> File pram -> adagml
|
4 |
+
@IDE PyCharm
|
5 |
+
@Author [email protected]
|
6 |
+
@Date 11/02/2024 14:34
|
7 |
+
=================================================='''
|
8 |
+
import torch
|
9 |
+
from localization.base_model import BaseModel
|
10 |
+
from nets.adagml import AdaGML as GMatcher
|
11 |
+
|
12 |
+
|
13 |
+
class AdaGML(BaseModel):
|
14 |
+
default_config = {
|
15 |
+
'descriptor_dim': 128,
|
16 |
+
'hidden_dim': 256,
|
17 |
+
'weights': 'indoor',
|
18 |
+
'keypoint_encoder': [32, 64, 128, 256],
|
19 |
+
'GNN_layers': ['self', 'cross'] * 9, # [self, cross, self, cross, ...] 9 in total
|
20 |
+
'sinkhorn_iterations': 20,
|
21 |
+
'match_threshold': 0.2,
|
22 |
+
'with_pose': False,
|
23 |
+
'n_layers': 9,
|
24 |
+
'n_min_tokens': 256,
|
25 |
+
'with_sinkhorn': True,
|
26 |
+
'weight_path': None,
|
27 |
+
}
|
28 |
+
|
29 |
+
required_inputs = [
|
30 |
+
'image0', 'keypoints0', 'scores0', 'descriptors0',
|
31 |
+
'image1', 'keypoints1', 'scores1', 'descriptors1',
|
32 |
+
]
|
33 |
+
|
34 |
+
def _init(self, conf):
|
35 |
+
self.net = GMatcher(config=conf).eval()
|
36 |
+
state_dict = torch.load(conf['weight_path'], map_location='cpu')['model']
|
37 |
+
self.net.load_state_dict(state_dict, strict=True)
|
38 |
+
|
39 |
+
def _forward(self, data):
|
40 |
+
with torch.no_grad():
|
41 |
+
return self.net(data)
|