Commit
·
12d0a16
1
Parent(s):
303dddc
all lib
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .DS_Store +0 -0
- handler.py +5 -1
- insightface/.gitignore +103 -0
- insightface/CODE_OF_CONDUCT.md +128 -0
- insightface/README.md +258 -0
- insightface/alignment/README.md +42 -0
- insightface/alignment/_datasets_/README.md +57 -0
- insightface/alignment/coordinate_reg/README.md +58 -0
- insightface/alignment/coordinate_reg/image_infer.py +23 -0
- insightface/alignment/heatmap/README.md +10 -0
- insightface/alignment/heatmap/data.py +354 -0
- insightface/alignment/heatmap/img_helper.py +86 -0
- insightface/alignment/heatmap/metric.py +107 -0
- insightface/alignment/heatmap/optimizer.py +65 -0
- insightface/alignment/heatmap/sample_config.py +98 -0
- insightface/alignment/heatmap/symbol/sym_heatmap.py +1085 -0
- insightface/alignment/heatmap/test.py +100 -0
- insightface/alignment/heatmap/test_rec_nme.py +71 -0
- insightface/alignment/heatmap/train.py +236 -0
- insightface/alignment/synthetics/README.md +63 -0
- insightface/alignment/synthetics/datasets/augs.py +40 -0
- insightface/alignment/synthetics/datasets/dataset_synthetics.py +163 -0
- insightface/alignment/synthetics/test_synthetics.py +104 -0
- insightface/alignment/synthetics/tools/prepare_synthetics.py +70 -0
- insightface/alignment/synthetics/trainer_synthetics.py +140 -0
- insightface/attribute/README.md +33 -0
- insightface/attribute/_datasets_/README.md +15 -0
- insightface/attribute/gender_age/test.py +24 -0
- insightface/benchmarks/train/nvidia_a10.md +48 -0
- insightface/benchmarks/train/nvidia_a100.md +53 -0
- insightface/benchmarks/train/nvidia_a30.md +52 -0
- insightface/benchmarks/train/nvidia_rtx3080.md +58 -0
- insightface/benchmarks/train/nvidia_rtx3090.md +57 -0
- insightface/benchmarks/train/nvidia_v100.md +54 -0
- insightface/body/human_pose/ambiguity_aware/README.md +94 -0
- insightface/body/human_pose/ambiguity_aware/cfg/h36m_gt_adv.yaml +36 -0
- insightface/body/human_pose/ambiguity_aware/cfg/h36m_gt_scale.yaml +40 -0
- insightface/body/human_pose/ambiguity_aware/cfg/pre_adv.yaml +36 -0
- insightface/body/human_pose/ambiguity_aware/cfg/pre_tmc_klbone.yaml +39 -0
- insightface/body/human_pose/ambiguity_aware/requirements.txt +12 -0
- insightface/body/human_pose/ambiguity_aware/scripts/_init_paths.py +13 -0
- insightface/body/human_pose/ambiguity_aware/scripts/demo.sh +5 -0
- insightface/body/human_pose/ambiguity_aware/scripts/demo_input/0.jpg +0 -0
- insightface/body/human_pose/ambiguity_aware/scripts/demo_input/0.pkl +3 -0
- insightface/body/human_pose/ambiguity_aware/scripts/demo_input/1.jpg +0 -0
- insightface/body/human_pose/ambiguity_aware/scripts/demo_input/1.pkl +3 -0
- insightface/body/human_pose/ambiguity_aware/scripts/demo_input/10.jpg +0 -0
- insightface/body/human_pose/ambiguity_aware/scripts/demo_input/10.pkl +3 -0
- insightface/body/human_pose/ambiguity_aware/scripts/demo_input/11.jpg +0 -0
- insightface/body/human_pose/ambiguity_aware/scripts/demo_input/11.pkl +3 -0
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
handler.py
CHANGED
@@ -14,6 +14,10 @@ from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
|
|
14 |
|
15 |
from huggingface_hub import hf_hub_download
|
16 |
|
|
|
|
|
|
|
|
|
17 |
from insightface.app import FaceAnalysis
|
18 |
|
19 |
from style_template import styles
|
@@ -48,7 +52,7 @@ class EndpointHandler():
|
|
48 |
# providers=["CPUExecutionProvider"],
|
49 |
# )
|
50 |
self.app = FaceAnalysis(
|
51 |
-
name="
|
52 |
root="./",
|
53 |
providers=["CPUExecutionProvider"],
|
54 |
)
|
|
|
14 |
|
15 |
from huggingface_hub import hf_hub_download
|
16 |
|
17 |
+
import sys
|
18 |
+
root_local = './'
|
19 |
+
sys.path.insert(0, root_local)
|
20 |
+
|
21 |
from insightface.app import FaceAnalysis
|
22 |
|
23 |
from style_template import styles
|
|
|
52 |
# providers=["CPUExecutionProvider"],
|
53 |
# )
|
54 |
self.app = FaceAnalysis(
|
55 |
+
name="buffalo_l",
|
56 |
root="./",
|
57 |
providers=["CPUExecutionProvider"],
|
58 |
)
|
insightface/.gitignore
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
env/
|
12 |
+
build/
|
13 |
+
develop-eggs/
|
14 |
+
dist/
|
15 |
+
downloads/
|
16 |
+
eggs/
|
17 |
+
.eggs/
|
18 |
+
lib/
|
19 |
+
lib64/
|
20 |
+
parts/
|
21 |
+
sdist/
|
22 |
+
var/
|
23 |
+
wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
|
28 |
+
# PyInstaller
|
29 |
+
# Usually these files are written by a python script from a template
|
30 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
31 |
+
*.manifest
|
32 |
+
*.spec
|
33 |
+
|
34 |
+
# Installer logs
|
35 |
+
pip-log.txt
|
36 |
+
pip-delete-this-directory.txt
|
37 |
+
|
38 |
+
# Unit test / coverage reports
|
39 |
+
htmlcov/
|
40 |
+
.tox/
|
41 |
+
.coverage
|
42 |
+
.coverage.*
|
43 |
+
.cache
|
44 |
+
nosetests.xml
|
45 |
+
coverage.xml
|
46 |
+
*.cover
|
47 |
+
.hypothesis/
|
48 |
+
|
49 |
+
# Translations
|
50 |
+
*.mo
|
51 |
+
*.pot
|
52 |
+
|
53 |
+
# Django stuff:
|
54 |
+
*.log
|
55 |
+
local_settings.py
|
56 |
+
|
57 |
+
# Flask stuff:
|
58 |
+
instance/
|
59 |
+
.webassets-cache
|
60 |
+
|
61 |
+
# Scrapy stuff:
|
62 |
+
.scrapy
|
63 |
+
|
64 |
+
# Sphinx documentation
|
65 |
+
docs/_build/
|
66 |
+
|
67 |
+
# PyBuilder
|
68 |
+
target/
|
69 |
+
|
70 |
+
# Jupyter Notebook
|
71 |
+
.ipynb_checkpoints
|
72 |
+
|
73 |
+
# pyenv
|
74 |
+
.python-version
|
75 |
+
|
76 |
+
# celery beat schedule file
|
77 |
+
celerybeat-schedule
|
78 |
+
|
79 |
+
# SageMath parsed files
|
80 |
+
*.sage.py
|
81 |
+
|
82 |
+
# dotenv
|
83 |
+
.env
|
84 |
+
|
85 |
+
# virtualenv
|
86 |
+
.venv
|
87 |
+
venv/
|
88 |
+
ENV/
|
89 |
+
|
90 |
+
# Spyder project settings
|
91 |
+
.spyderproject
|
92 |
+
.spyproject
|
93 |
+
|
94 |
+
# Rope project settings
|
95 |
+
.ropeproject
|
96 |
+
|
97 |
+
# mkdocs documentation
|
98 |
+
/site
|
99 |
+
|
100 |
+
# mypy
|
101 |
+
.mypy_cache/
|
102 |
+
|
103 |
+
.DS_Store
|
insightface/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Contributor Covenant Code of Conduct
|
2 |
+
|
3 |
+
## Our Pledge
|
4 |
+
|
5 |
+
We as members, contributors, and leaders pledge to make participation in our
|
6 |
+
community a harassment-free experience for everyone, regardless of age, body
|
7 |
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
8 |
+
identity and expression, level of experience, education, socio-economic status,
|
9 |
+
nationality, personal appearance, race, religion, or sexual identity
|
10 |
+
and orientation.
|
11 |
+
|
12 |
+
We pledge to act and interact in ways that contribute to an open, welcoming,
|
13 |
+
diverse, inclusive, and healthy community.
|
14 |
+
|
15 |
+
## Our Standards
|
16 |
+
|
17 |
+
Examples of behavior that contributes to a positive environment for our
|
18 |
+
community include:
|
19 |
+
|
20 |
+
* Demonstrating empathy and kindness toward other people
|
21 |
+
* Being respectful of differing opinions, viewpoints, and experiences
|
22 |
+
* Giving and gracefully accepting constructive feedback
|
23 |
+
* Accepting responsibility and apologizing to those affected by our mistakes,
|
24 |
+
and learning from the experience
|
25 |
+
* Focusing on what is best not just for us as individuals, but for the
|
26 |
+
overall community
|
27 |
+
|
28 |
+
Examples of unacceptable behavior include:
|
29 |
+
|
30 |
+
* The use of sexualized language or imagery, and sexual attention or
|
31 |
+
advances of any kind
|
32 |
+
* Trolling, insulting or derogatory comments, and personal or political attacks
|
33 |
+
* Public or private harassment
|
34 |
+
* Publishing others' private information, such as a physical or email
|
35 |
+
address, without their explicit permission
|
36 |
+
* Other conduct which could reasonably be considered inappropriate in a
|
37 |
+
professional setting
|
38 |
+
|
39 |
+
## Enforcement Responsibilities
|
40 |
+
|
41 |
+
Community leaders are responsible for clarifying and enforcing our standards of
|
42 |
+
acceptable behavior and will take appropriate and fair corrective action in
|
43 |
+
response to any behavior that they deem inappropriate, threatening, offensive,
|
44 |
+
or harmful.
|
45 |
+
|
46 |
+
Community leaders have the right and responsibility to remove, edit, or reject
|
47 |
+
comments, commits, code, wiki edits, issues, and other contributions that are
|
48 |
+
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
49 |
+
decisions when appropriate.
|
50 |
+
|
51 |
+
## Scope
|
52 |
+
|
53 |
+
This Code of Conduct applies within all community spaces, and also applies when
|
54 |
+
an individual is officially representing the community in public spaces.
|
55 |
+
Examples of representing our community include using an official e-mail address,
|
56 |
+
posting via an official social media account, or acting as an appointed
|
57 |
+
representative at an online or offline event.
|
58 |
+
|
59 |
+
## Enforcement
|
60 |
+
|
61 |
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
62 |
+
reported to the community leaders responsible for enforcement at
|
63 | |
64 |
+
All complaints will be reviewed and investigated promptly and fairly.
|
65 |
+
|
66 |
+
All community leaders are obligated to respect the privacy and security of the
|
67 |
+
reporter of any incident.
|
68 |
+
|
69 |
+
## Enforcement Guidelines
|
70 |
+
|
71 |
+
Community leaders will follow these Community Impact Guidelines in determining
|
72 |
+
the consequences for any action they deem in violation of this Code of Conduct:
|
73 |
+
|
74 |
+
### 1. Correction
|
75 |
+
|
76 |
+
**Community Impact**: Use of inappropriate language or other behavior deemed
|
77 |
+
unprofessional or unwelcome in the community.
|
78 |
+
|
79 |
+
**Consequence**: A private, written warning from community leaders, providing
|
80 |
+
clarity around the nature of the violation and an explanation of why the
|
81 |
+
behavior was inappropriate. A public apology may be requested.
|
82 |
+
|
83 |
+
### 2. Warning
|
84 |
+
|
85 |
+
**Community Impact**: A violation through a single incident or series
|
86 |
+
of actions.
|
87 |
+
|
88 |
+
**Consequence**: A warning with consequences for continued behavior. No
|
89 |
+
interaction with the people involved, including unsolicited interaction with
|
90 |
+
those enforcing the Code of Conduct, for a specified period of time. This
|
91 |
+
includes avoiding interactions in community spaces as well as external channels
|
92 |
+
like social media. Violating these terms may lead to a temporary or
|
93 |
+
permanent ban.
|
94 |
+
|
95 |
+
### 3. Temporary Ban
|
96 |
+
|
97 |
+
**Community Impact**: A serious violation of community standards, including
|
98 |
+
sustained inappropriate behavior.
|
99 |
+
|
100 |
+
**Consequence**: A temporary ban from any sort of interaction or public
|
101 |
+
communication with the community for a specified period of time. No public or
|
102 |
+
private interaction with the people involved, including unsolicited interaction
|
103 |
+
with those enforcing the Code of Conduct, is allowed during this period.
|
104 |
+
Violating these terms may lead to a permanent ban.
|
105 |
+
|
106 |
+
### 4. Permanent Ban
|
107 |
+
|
108 |
+
**Community Impact**: Demonstrating a pattern of violation of community
|
109 |
+
standards, including sustained inappropriate behavior, harassment of an
|
110 |
+
individual, or aggression toward or disparagement of classes of individuals.
|
111 |
+
|
112 |
+
**Consequence**: A permanent ban from any sort of public interaction within
|
113 |
+
the community.
|
114 |
+
|
115 |
+
## Attribution
|
116 |
+
|
117 |
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
118 |
+
version 2.0, available at
|
119 |
+
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
|
120 |
+
|
121 |
+
Community Impact Guidelines were inspired by [Mozilla's code of conduct
|
122 |
+
enforcement ladder](https://github.com/mozilla/diversity).
|
123 |
+
|
124 |
+
[homepage]: https://www.contributor-covenant.org
|
125 |
+
|
126 |
+
For answers to common questions about this code of conduct, see the FAQ at
|
127 |
+
https://www.contributor-covenant.org/faq. Translations are available at
|
128 |
+
https://www.contributor-covenant.org/translations.
|
insightface/README.md
ADDED
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# InsightFace: 2D and 3D Face Analysis Project
|
3 |
+
|
4 |
+
<div align="left">
|
5 |
+
<img src="https://insightface.ai/assets/img/custom/logo3.jpg" width="240"/>
|
6 |
+
</div>
|
7 |
+
|
8 |
+
InsightFace project is mainly maintained By [Jia Guo](mailto:[email protected]?subject=[GitHub]%20InsightFace%20Project) and [Jiankang Deng](https://jiankangdeng.github.io/).
|
9 |
+
|
10 |
+
For all main contributors, please check [contributing](#contributing).
|
11 |
+
|
12 |
+
## License
|
13 |
+
|
14 |
+
The code of InsightFace is released under the MIT License. There is no limitation for both academic and commercial usage.
|
15 |
+
|
16 |
+
The training data containing the annotation (and the models trained with these data) are available for non-commercial research purposes only.
|
17 |
+
|
18 |
+
Both manual-downloading models from our github repo and auto-downloading models with our [python-library](python-package) follow the above license policy(which is for non-commercial research purposes only).
|
19 |
+
|
20 |
+
## Top News
|
21 |
+
|
22 |
+
**`2023-08-08`**: We released the implementation of [Generalizing Gaze Estimation with Weak-Supervision from Synthetic Views](https://arxiv.org/abs/2212.02997) at [reconstruction/gaze](reconstruction/gaze).
|
23 |
+
|
24 |
+
**`2023-05-03`**: We have launched the ongoing version of wild face anti-spoofing challenge. See details [here](https://github.com/deepinsight/insightface/tree/master/challenges/cvpr23-fas-wild#updates).
|
25 |
+
|
26 |
+
**`2023-04-01`**: We move the swapping demo to Discord bot, which support editing on Midjourney generated images, see detail at [web-demos/swapping_discord](web-demos/swapping_discord).
|
27 |
+
|
28 |
+
**`2023-02-13`**: We launch a large scale in the wild face anti-spoofing challenge on CVPR23 Workshop, see details at [challenges/cvpr23-fas-wild](challenges/cvpr23-fas-wild).
|
29 |
+
|
30 |
+
**`2022-11-28`**: Single line code for facial identity swapping in our python packge ver 0.7, please check the example [here](examples/in_swapper).
|
31 |
+
|
32 |
+
**`2022-10-28`**: [MFR-Ongoing](http://iccv21-mfr.com) website is refactored, please create issues if there's any bug.
|
33 |
+
|
34 |
+
**`2022-09-22`**: Now we have [web-demos](web-demos): [face-localization](http://demo.insightface.ai:7007/), [face-recognition](http://demo.insightface.ai:7008/), and [face-swapping](http://demo.insightface.ai:7009/).
|
35 |
+
|
36 |
+
**`2022-08-12`**: We achieved Rank-1st of
|
37 |
+
[Perspective Projection Based Monocular 3D Face Reconstruction Challenge](https://tianchi.aliyun.com/competition/entrance/531961/introduction)
|
38 |
+
of [ECCV-2022 WCPA Workshop](https://sites.google.com/view/wcpa2022), [paper](https://arxiv.org/abs/2208.07142) and [code](reconstruction/jmlr).
|
39 |
+
|
40 |
+
**`2022-03-30`**: [Partial FC](https://arxiv.org/abs/2203.15565) accepted by CVPR-2022.
|
41 |
+
|
42 |
+
**`2022-02-23`**: [SCRFD](detection/scrfd) accepted by [ICLR-2022](https://iclr.cc/Conferences/2022).
|
43 |
+
|
44 |
+
**`2021-11-30`**: [MFR-Ongoing](challenges/mfr) challenge launched(same with IFRT), which is an extended version of [iccv21-mfr](challenges/iccv21-mfr).
|
45 |
+
|
46 |
+
**`2021-10-29`**: We achieved 1st place on the [VISA track](https://pages.nist.gov/frvt/plots/11/visa.html) of [NIST-FRVT 1:1](https://pages.nist.gov/frvt/html/frvt11.html) by using Partial FC (Xiang An, Jiankang Deng, Jia Guo).
|
47 |
+
|
48 |
+
**`2021-10-11`**: [Leaderboard](https://insightface.ai/mfr21) of [ICCV21 - Masked Face Recognition Challenge](challenges/iccv21-mfr) released. Video: [Youtube](https://www.youtube.com/watch?v=lL-7l5t6x2w), [Bilibili](https://www.bilibili.com/video/BV15b4y1h79N/).
|
49 |
+
|
50 |
+
**`2021-06-05`**: We launch a [Masked Face Recognition Challenge & Workshop](challenges/iccv21-mfr) on ICCV 2021.
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
## Introduction
|
55 |
+
|
56 |
+
[InsightFace](https://insightface.ai) is an open source 2D&3D deep face analysis toolbox, mainly based on PyTorch and MXNet.
|
57 |
+
|
58 |
+
Please check our [website](https://insightface.ai) for detail.
|
59 |
+
|
60 |
+
The master branch works with **PyTorch 1.6+** and/or **MXNet=1.6-1.8**, with **Python 3.x**.
|
61 |
+
|
62 |
+
InsightFace efficiently implements a rich variety of state of the art algorithms of face recognition, face detection and face alignment, which optimized for both training and deployment.
|
63 |
+
|
64 |
+
## Quick Start
|
65 |
+
|
66 |
+
Please start with our [python-package](python-package/), for testing detection, recognition and alignment models on input images.
|
67 |
+
|
68 |
+
|
69 |
+
### ArcFace Video Demo
|
70 |
+
|
71 |
+
|
72 |
+
[<img src=https://insightface.ai/assets/img/github/facerecognitionfromvideo.PNG width="760" />](https://www.youtube.com/watch?v=y-D1tReryGA&t=81s)
|
73 |
+
|
74 |
+
|
75 |
+
Please click the image to watch the Youtube video. For Bilibili users, click [here](https://www.bilibili.com/video/av38041494?from=search&seid=11501833604850032313).
|
76 |
+
|
77 |
+
|
78 |
+
|
79 |
+
## Projects
|
80 |
+
|
81 |
+
The [page](https://insightface.ai/projects) on InsightFace website also describes all supported projects in InsightFace.
|
82 |
+
|
83 |
+
You may also interested in some [challenges](https://insightface.ai/challenges) hold by InsightFace.
|
84 |
+
|
85 |
+
|
86 |
+
|
87 |
+
## Face Recognition
|
88 |
+
|
89 |
+
### Introduction
|
90 |
+
|
91 |
+
In this module, we provide training data, network settings and loss designs for deep face recognition.
|
92 |
+
|
93 |
+
The supported methods are as follows:
|
94 |
+
|
95 |
+
- [x] [ArcFace_mxnet (CVPR'2019)](recognition/arcface_mxnet)
|
96 |
+
- [x] [ArcFace_torch (CVPR'2019)](recognition/arcface_torch)
|
97 |
+
- [x] [SubCenter ArcFace (ECCV'2020)](recognition/subcenter_arcface)
|
98 |
+
- [x] [PartialFC_mxnet (CVPR'2022)](recognition/partial_fc)
|
99 |
+
- [x] [PartialFC_torch (CVPR'2022)](recognition/arcface_torch)
|
100 |
+
- [x] [VPL (CVPR'2021)](recognition/vpl)
|
101 |
+
- [x] [Arcface_oneflow](recognition/arcface_oneflow)
|
102 |
+
- [x] [ArcFace_Paddle (CVPR'2019)](recognition/arcface_paddle)
|
103 |
+
|
104 |
+
Commonly used network backbones are included in most of the methods, such as IResNet, MobilefaceNet, MobileNet, InceptionResNet_v2, DenseNet, etc..
|
105 |
+
|
106 |
+
|
107 |
+
### Datasets
|
108 |
+
|
109 |
+
The training data includes, but not limited to the cleaned MS1M, VGG2 and CASIA-Webface datasets, which were already packed in MXNet binary format. Please [dataset](recognition/_datasets_) page for detail.
|
110 |
+
|
111 |
+
### Evaluation
|
112 |
+
|
113 |
+
We provide standard IJB and Megaface evaluation pipelines in [evaluation](recognition/_evaluation_)
|
114 |
+
|
115 |
+
|
116 |
+
### Pretrained Models
|
117 |
+
|
118 |
+
**Please check [Model-Zoo](https://github.com/deepinsight/insightface/wiki/Model-Zoo) for more pretrained models.**
|
119 |
+
|
120 |
+
### Third-party Re-implementation of ArcFace
|
121 |
+
|
122 |
+
- TensorFlow: [InsightFace_TF](https://github.com/auroua/InsightFace_TF)
|
123 |
+
- TensorFlow: [tf-insightface](https://github.com/AIInAi/tf-insightface)
|
124 |
+
- TensorFlow:[insightface](https://github.com/Fei-Wang/insightface)
|
125 |
+
- PyTorch: [InsightFace_Pytorch](https://github.com/TreB1eN/InsightFace_Pytorch)
|
126 |
+
- PyTorch: [arcface-pytorch](https://github.com/ronghuaiyang/arcface-pytorch)
|
127 |
+
- Caffe: [arcface-caffe](https://github.com/xialuxi/arcface-caffe)
|
128 |
+
- Caffe: [CombinedMargin-caffe](https://github.com/gehaocool/CombinedMargin-caffe)
|
129 |
+
- Tensorflow: [InsightFace-tensorflow](https://github.com/luckycallor/InsightFace-tensorflow)
|
130 |
+
- TensorRT: [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx)
|
131 |
+
- TensorRT: [InsightFace-REST](https://github.com/SthPhoenix/InsightFace-REST)
|
132 |
+
- ONNXRuntime C++: [ArcFace-ONNXRuntime](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/ort/cv/glint_arcface.cpp)
|
133 |
+
- ONNXRuntime Go: [arcface-go](https://github.com/jack139/arcface-go)
|
134 |
+
- MNN: [ArcFace-MNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/mnn/cv/mnn_glint_arcface.cpp)
|
135 |
+
- TNN: [ArcFace-TNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/tnn/cv/tnn_glint_arcface.cpp)
|
136 |
+
- NCNN: [ArcFace-NCNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/ncnn/cv/ncnn_glint_arcface.cpp)
|
137 |
+
|
138 |
+
## Face Detection
|
139 |
+
|
140 |
+
### Introduction
|
141 |
+
|
142 |
+
<div align="left">
|
143 |
+
<img src="https://insightface.ai/assets/img/github/11513D05.jpg" width="640"/>
|
144 |
+
</div>
|
145 |
+
|
146 |
+
In this module, we provide training data with annotation, network settings and loss designs for face detection training, evaluation and inference.
|
147 |
+
|
148 |
+
The supported methods are as follows:
|
149 |
+
|
150 |
+
- [x] [RetinaFace (CVPR'2020)](detection/retinaface)
|
151 |
+
- [x] [SCRFD (Arxiv'2021)](detection/scrfd)
|
152 |
+
- [x] [blazeface_paddle](detection/blazeface_paddle)
|
153 |
+
|
154 |
+
[RetinaFace](detection/retinaface) is a practical single-stage face detector which is accepted by [CVPR 2020](https://openaccess.thecvf.com/content_CVPR_2020/html/Deng_RetinaFace_Single-Shot_Multi-Level_Face_Localisation_in_the_Wild_CVPR_2020_paper.html). We provide training code, training dataset, pretrained models and evaluation scripts.
|
155 |
+
|
156 |
+
[SCRFD](detection/scrfd) is an efficient high accuracy face detection approach which is initialy described in [Arxiv](https://arxiv.org/abs/2105.04714). We provide an easy-to-use pipeline to train high efficiency face detectors with NAS supporting.
|
157 |
+
|
158 |
+
|
159 |
+
## Face Alignment
|
160 |
+
|
161 |
+
### Introduction
|
162 |
+
|
163 |
+
<div align="left">
|
164 |
+
<img src="https://insightface.ai/assets/img/custom/thumb_sdunet.png" width="600"/>
|
165 |
+
</div>
|
166 |
+
|
167 |
+
In this module, we provide datasets and training/inference pipelines for face alignment.
|
168 |
+
|
169 |
+
Supported methods:
|
170 |
+
|
171 |
+
- [x] [SDUNets (BMVC'2018)](alignment/heatmap)
|
172 |
+
- [x] [SimpleRegression](alignment/coordinate_reg)
|
173 |
+
|
174 |
+
|
175 |
+
[SDUNets](alignment/heatmap) is a heatmap based method which accepted on [BMVC](http://bmvc2018.org/contents/papers/0051.pdf).
|
176 |
+
|
177 |
+
[SimpleRegression](alignment/coordinate_reg) provides very lightweight facial landmark models with fast coordinate regression. The input of these models is loose cropped face image while the output is the direct landmark coordinates.
|
178 |
+
|
179 |
+
|
180 |
+
## Citation
|
181 |
+
|
182 |
+
If you find *InsightFace* useful in your research, please consider to cite the following related papers:
|
183 |
+
|
184 |
+
```
|
185 |
+
@inproceedings{ren2023pbidr,
|
186 |
+
title={Facial Geometric Detail Recovery via Implicit Representation},
|
187 |
+
author={Ren, Xingyu and Lattas, Alexandros and Gecer, Baris and Deng, Jiankang and Ma, Chao and Yang, Xiaokang},
|
188 |
+
booktitle={2023 IEEE 17th International Conference on Automatic Face and Gesture Recognition (FG)},
|
189 |
+
year={2023}
|
190 |
+
}
|
191 |
+
|
192 |
+
@article{guo2021sample,
|
193 |
+
title={Sample and Computation Redistribution for Efficient Face Detection},
|
194 |
+
author={Guo, Jia and Deng, Jiankang and Lattas, Alexandros and Zafeiriou, Stefanos},
|
195 |
+
journal={arXiv preprint arXiv:2105.04714},
|
196 |
+
year={2021}
|
197 |
+
}
|
198 |
+
|
199 |
+
@inproceedings{gecer2021ostec,
|
200 |
+
title={OSTeC: One-Shot Texture Completion},
|
201 |
+
author={Gecer, Baris and Deng, Jiankang and Zafeiriou, Stefanos},
|
202 |
+
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
203 |
+
year={2021}
|
204 |
+
}
|
205 |
+
|
206 |
+
@inproceedings{an2020partical_fc,
|
207 |
+
title={Partial FC: Training 10 Million Identities on a Single Machine},
|
208 |
+
author={An, Xiang and Zhu, Xuhan and Xiao, Yang and Wu, Lan and Zhang, Ming and Gao, Yuan and Qin, Bin and
|
209 |
+
Zhang, Debing and Fu Ying},
|
210 |
+
booktitle={Arxiv 2010.05222},
|
211 |
+
year={2020}
|
212 |
+
}
|
213 |
+
|
214 |
+
@inproceedings{deng2020subcenter,
|
215 |
+
title={Sub-center ArcFace: Boosting Face Recognition by Large-scale Noisy Web Faces},
|
216 |
+
author={Deng, Jiankang and Guo, Jia and Liu, Tongliang and Gong, Mingming and Zafeiriou, Stefanos},
|
217 |
+
booktitle={Proceedings of the IEEE Conference on European Conference on Computer Vision},
|
218 |
+
year={2020}
|
219 |
+
}
|
220 |
+
|
221 |
+
@inproceedings{Deng2020CVPR,
|
222 |
+
title = {RetinaFace: Single-Shot Multi-Level Face Localisation in the Wild},
|
223 |
+
author = {Deng, Jiankang and Guo, Jia and Ververas, Evangelos and Kotsia, Irene and Zafeiriou, Stefanos},
|
224 |
+
booktitle = {CVPR},
|
225 |
+
year = {2020}
|
226 |
+
}
|
227 |
+
|
228 |
+
@inproceedings{guo2018stacked,
|
229 |
+
title={Stacked Dense U-Nets with Dual Transformers for Robust Face Alignment},
|
230 |
+
author={Guo, Jia and Deng, Jiankang and Xue, Niannan and Zafeiriou, Stefanos},
|
231 |
+
booktitle={BMVC},
|
232 |
+
year={2018}
|
233 |
+
}
|
234 |
+
|
235 |
+
@article{deng2018menpo,
|
236 |
+
title={The Menpo benchmark for multi-pose 2D and 3D facial landmark localisation and tracking},
|
237 |
+
author={Deng, Jiankang and Roussos, Anastasios and Chrysos, Grigorios and Ververas, Evangelos and Kotsia, Irene and Shen, Jie and Zafeiriou, Stefanos},
|
238 |
+
journal={IJCV},
|
239 |
+
year={2018}
|
240 |
+
}
|
241 |
+
|
242 |
+
@inproceedings{deng2018arcface,
|
243 |
+
title={ArcFace: Additive Angular Margin Loss for Deep Face Recognition},
|
244 |
+
author={Deng, Jiankang and Guo, Jia and Niannan, Xue and Zafeiriou, Stefanos},
|
245 |
+
booktitle={CVPR},
|
246 |
+
year={2019}
|
247 |
+
}
|
248 |
+
```
|
249 |
+
|
250 |
+
## Contributing
|
251 |
+
|
252 |
+
Main contributors:
|
253 |
+
|
254 |
+
- [Jia Guo](https://github.com/nttstar), ``guojia[at]gmail.com``
|
255 |
+
- [Jiankang Deng](https://github.com/jiankangdeng) ``jiankangdeng[at]gmail.com``
|
256 |
+
- [Xiang An](https://github.com/anxiangsir) ``anxiangsir[at]gmail.com``
|
257 |
+
- [Jack Yu](https://github.com/szad670401) ``jackyu961127[at]gmail.com``
|
258 |
+
- [Baris Gecer](https://barisgecer.github.io/) ``barisgecer[at]msn.com``
|
insightface/alignment/README.md
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Face Alignment
|
2 |
+
|
3 |
+
|
4 |
+
<div align="left">
|
5 |
+
<img src="https://insightface.ai/assets/img/custom/logo3.jpg" width="240"/>
|
6 |
+
</div>
|
7 |
+
|
8 |
+
|
9 |
+
## Introduction
|
10 |
+
|
11 |
+
These are the face alignment methods of [InsightFace](https://insightface.ai)
|
12 |
+
|
13 |
+
|
14 |
+
<div align="left">
|
15 |
+
<img src="https://insightface.ai/assets/img/custom/thumb_sdunet.png" width="600"/>
|
16 |
+
</div>
|
17 |
+
|
18 |
+
|
19 |
+
### Datasets
|
20 |
+
|
21 |
+
Please refer to [datasets](_datasets_) page for the details of face alignment datasets used for training and evaluation.
|
22 |
+
|
23 |
+
### Evaluation
|
24 |
+
|
25 |
+
Please refer to [evaluation](_evaluation_) page for the details of face alignment evaluation.
|
26 |
+
|
27 |
+
|
28 |
+
## Methods
|
29 |
+
|
30 |
+
|
31 |
+
Supported methods:
|
32 |
+
|
33 |
+
- [x] [SDUNets (BMVC'2018)](heatmap)
|
34 |
+
- [x] [SimpleRegression](coordinate_reg)
|
35 |
+
- [x] [Alignment By Face Synthetics](synthetics)
|
36 |
+
|
37 |
+
|
38 |
+
## Contributing
|
39 |
+
|
40 |
+
We appreciate all contributions to improve the face alignment model zoo of InsightFace.
|
41 |
+
|
42 |
+
|
insightface/alignment/_datasets_/README.md
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Face Alignment Datasets
|
2 |
+
|
3 |
+
(Updating)
|
4 |
+
|
5 |
+
## Training Datasets
|
6 |
+
|
7 |
+
### Menpo2D-Train
|
8 |
+
|
9 |
+
https://ibug.doc.ic.ac.uk/resources/2nd-facial-landmark-tracking-competition-menpo-ben/
|
10 |
+
|
11 |
+
### 300W-Train
|
12 |
+
|
13 |
+
https://ibug.doc.ic.ac.uk/resources/300-W/
|
14 |
+
|
15 |
+
|
16 |
+
### LFPW
|
17 |
+
|
18 |
+
https://neerajkumar.org/databases/lfpw/
|
19 |
+
|
20 |
+
### Helen
|
21 |
+
|
22 |
+
http://www.ifp.illinois.edu/~vuongle2/helen/
|
23 |
+
|
24 |
+
### AFW
|
25 |
+
|
26 |
+
### AFLW
|
27 |
+
|
28 |
+
https://www.tugraz.at/institute/icg/research/team-bischof/lrs/downloads/aflw/
|
29 |
+
|
30 |
+
### FDDB
|
31 |
+
|
32 |
+
|
33 |
+
### Face Synthetics
|
34 |
+
|
35 |
+
https://github.com/microsoft/FaceSynthetics
|
36 |
+
|
37 |
+
### 300W-LP (3D annotation)
|
38 |
+
|
39 |
+
http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm
|
40 |
+
|
41 |
+
## Test Datasets
|
42 |
+
|
43 |
+
### 300W-Test
|
44 |
+
|
45 |
+
https://ibug.doc.ic.ac.uk/resources/300-W/
|
46 |
+
|
47 |
+
### COFW
|
48 |
+
|
49 |
+
http://www.vision.caltech.edu/xpburgos/ICCV13/#dataset
|
50 |
+
|
51 |
+
### Menpo2D-Test
|
52 |
+
|
53 |
+
https://ibug.doc.ic.ac.uk/resources/2nd-facial-landmark-tracking-competition-menpo-ben/
|
54 |
+
|
55 |
+
### AFLW2000-3D (3D annotation)
|
56 |
+
|
57 |
+
http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm
|
insightface/alignment/coordinate_reg/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### Introduction
|
2 |
+
|
3 |
+
Here we provide some lightweight facial landmark models with fast coordinate regression.
|
4 |
+
The input of these models is loose cropped face image while the output is the direct landmark coordinates.
|
5 |
+
|
6 |
+
|
7 |
+
### Pretrained Models
|
8 |
+
|
9 |
+
- **Model ``2d106det``**
|
10 |
+
|
11 |
+
**2021.07: We now support model inference by our `insightface` python package, please check [image_infer.py](image_infer.py) for detail.**
|
12 |
+
|
13 |
+
Given face detection bounding box, predict 2d-106 landmarks. Mainly used for static image inference.
|
14 |
+
|
15 |
+
Backbone: MobileNet-0.5, size 5MB.
|
16 |
+
|
17 |
+
Input: size 192x192, loose cropped detection bounding-box.
|
18 |
+
|
19 |
+
Download link:
|
20 |
+
|
21 |
+
[baidu cloud](https://pan.baidu.com/s/10m5GmtNV5snynDrq3KqIdg) (code: ``lqvv``)
|
22 |
+
|
23 |
+
[google drive](https://drive.google.com/file/d/13Pz8mH-a1s7RXpq_jFUXxaqCpDUE0oSr/view?usp=sharing)
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
- **Model ``2d106track``**
|
28 |
+
|
29 |
+
Given landmarks bounding box, predict 2d-106 landmarks. Used for video landmarks tracking.
|
30 |
+
|
31 |
+
Download link: coming soon
|
32 |
+
|
33 |
+
### Visualization
|
34 |
+
|
35 |
+
|
36 |
+
<p align="center">Points mark-up(ordered by point names):</p>
|
37 |
+
|
38 |
+
<div align="center">
|
39 |
+
<img src="https://github.com/nttstar/insightface-resources/blob/master/alignment/images/2d106markup.jpg" alt="markup" width="320">
|
40 |
+
</div>
|
41 |
+
|
42 |
+
|
43 |
+
<p align="center">Image result:</p>
|
44 |
+
|
45 |
+
<div align="center">
|
46 |
+
<img src="https://github.com/nttstar/insightface-resources/blob/master/alignment/images/t1_out.jpg" alt="imagevis" width="800">
|
47 |
+
</div>
|
48 |
+
|
49 |
+
|
50 |
+
<p align="center">Video result:</p>
|
51 |
+
|
52 |
+
<div align="center">
|
53 |
+
<img src="https://github.com/nttstar/insightface-resources/blob/master/alignment/images/C_jiaguo.gif" alt="videovis" width="240">
|
54 |
+
</div>
|
55 |
+
|
56 |
+
|
57 |
+
### FAQ
|
58 |
+
|
insightface/alignment/coordinate_reg/image_infer.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
import os
|
4 |
+
import insightface
|
5 |
+
from insightface.app import FaceAnalysis
|
6 |
+
from insightface.data import get_image as ins_get_image
|
7 |
+
|
8 |
+
if __name__ == '__main__':
|
9 |
+
app = FaceAnalysis(allowed_modules=['detection', 'landmark_2d_106'])
|
10 |
+
app.prepare(ctx_id=0, det_size=(640, 640))
|
11 |
+
img = ins_get_image('t1')
|
12 |
+
faces = app.get(img)
|
13 |
+
#assert len(faces)==6
|
14 |
+
tim = img.copy()
|
15 |
+
color = (200, 160, 75)
|
16 |
+
for face in faces:
|
17 |
+
lmk = face.landmark_2d_106
|
18 |
+
lmk = np.round(lmk).astype(np.int)
|
19 |
+
for i in range(lmk.shape[0]):
|
20 |
+
p = tuple(lmk[i])
|
21 |
+
cv2.circle(tim, p, 1, color, 1, cv2.LINE_AA)
|
22 |
+
cv2.imwrite('./test_out.jpg', tim)
|
23 |
+
|
insightface/alignment/heatmap/README.md
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
We provide our implementation of ``Stacked Dense U-Nets with Dual Transformers for Robust Face Alignment`` here at [BMVC](http://bmvc2018.org/contents/papers/0051.pdf) or link at [Arxiv](https://arxiv.org/abs/1812.01936).
|
2 |
+
|
3 |
+
We also provide some popular heatmap based approaches like stacked hourglass, etc.. You can define different loss-type/network structure/dataset in ``config.py``(from ``sample_config.py``).
|
4 |
+
|
5 |
+
For example, by default, you can train our approach by ``train.py --network sdu`` or train hourglass network by ``train.py --network hourglass``.
|
6 |
+
|
7 |
+
2D training/validation dataset is now available at [baiducloud](https://pan.baidu.com/s/1kdquiIGTlK7l26SPWO_cmw) or [dropbox](https://www.dropbox.com/s/por6mbguegmywo6/bmvc_sdu_data2d.zip?dl=0)
|
8 |
+
|
9 |
+
3D training/validation dataset is now available at [baiducloud](https://pan.baidu.com/s/1VjFWm6eEtIqGKk92GE2rgw) or [dropbox](https://www.dropbox.com/s/tjze176lh76nciw/bmvc_sdu_data3d.zip?dl=0)
|
10 |
+
|
insightface/alignment/heatmap/data.py
ADDED
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pylint: skip-file
|
2 |
+
import mxnet as mx
|
3 |
+
import numpy as np
|
4 |
+
import sys, os
|
5 |
+
import random
|
6 |
+
import math
|
7 |
+
import scipy.misc
|
8 |
+
import cv2
|
9 |
+
import logging
|
10 |
+
import sklearn
|
11 |
+
import datetime
|
12 |
+
import img_helper
|
13 |
+
from mxnet.io import DataIter
|
14 |
+
from mxnet import ndarray as nd
|
15 |
+
from mxnet import io
|
16 |
+
from mxnet import recordio
|
17 |
+
from PIL import Image
|
18 |
+
from config import config
|
19 |
+
from skimage import transform as tf
|
20 |
+
|
21 |
+
|
22 |
+
class FaceSegIter(DataIter):
|
23 |
+
def __init__(self,
|
24 |
+
batch_size,
|
25 |
+
per_batch_size=0,
|
26 |
+
path_imgrec=None,
|
27 |
+
aug_level=0,
|
28 |
+
force_mirror=False,
|
29 |
+
exf=1,
|
30 |
+
use_coherent=0,
|
31 |
+
args=None,
|
32 |
+
data_name="data",
|
33 |
+
label_name="softmax_label"):
|
34 |
+
self.aug_level = aug_level
|
35 |
+
self.force_mirror = force_mirror
|
36 |
+
self.use_coherent = use_coherent
|
37 |
+
self.exf = exf
|
38 |
+
self.batch_size = batch_size
|
39 |
+
self.per_batch_size = per_batch_size
|
40 |
+
self.data_name = data_name
|
41 |
+
self.label_name = label_name
|
42 |
+
assert path_imgrec
|
43 |
+
logging.info('loading recordio %s...', path_imgrec)
|
44 |
+
path_imgidx = path_imgrec[0:-4] + ".idx"
|
45 |
+
self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec,
|
46 |
+
'r') # pylint: disable=redefined-variable-type
|
47 |
+
self.oseq = list(self.imgrec.keys)
|
48 |
+
print('train size', len(self.oseq))
|
49 |
+
self.cur = 0
|
50 |
+
self.reset()
|
51 |
+
self.data_shape = (3, config.input_img_size, config.input_img_size)
|
52 |
+
self.num_classes = config.num_classes
|
53 |
+
self.input_img_size = config.input_img_size
|
54 |
+
#self.label_classes = self.num_classes
|
55 |
+
if config.losstype == 'heatmap':
|
56 |
+
if aug_level > 0:
|
57 |
+
self.output_label_size = config.output_label_size
|
58 |
+
self.label_shape = (self.num_classes, self.output_label_size,
|
59 |
+
self.output_label_size)
|
60 |
+
else:
|
61 |
+
self.output_label_size = self.input_img_size
|
62 |
+
#self.label_shape = (self.num_classes, 2)
|
63 |
+
self.label_shape = (self.num_classes, self.output_label_size,
|
64 |
+
self.output_label_size)
|
65 |
+
else:
|
66 |
+
if aug_level > 0:
|
67 |
+
self.output_label_size = config.output_label_size
|
68 |
+
self.label_shape = (self.num_classes, 2)
|
69 |
+
else:
|
70 |
+
self.output_label_size = self.input_img_size
|
71 |
+
#self.label_shape = (self.num_classes, 2)
|
72 |
+
self.label_shape = (self.num_classes, 2)
|
73 |
+
self.provide_data = [(data_name, (batch_size, ) + self.data_shape)]
|
74 |
+
self.provide_label = [(label_name, (batch_size, ) + self.label_shape)]
|
75 |
+
self.img_num = 0
|
76 |
+
self.invalid_num = 0
|
77 |
+
self.mode = 1
|
78 |
+
self.vis = 0
|
79 |
+
self.stats = [0, 0]
|
80 |
+
self.flip_order = [
|
81 |
+
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 26, 25,
|
82 |
+
24, 23, 22, 21, 20, 19, 18, 17, 27, 28, 29, 30, 35, 34, 33, 32, 31,
|
83 |
+
45, 44, 43, 42, 47, 46, 39, 38, 37, 36, 41, 40, 54, 53, 52, 51, 50,
|
84 |
+
49, 48, 59, 58, 57, 56, 55, 64, 63, 62, 61, 60, 67, 66, 65
|
85 |
+
]
|
86 |
+
#self.mirror_set = [
|
87 |
+
# (22,23),
|
88 |
+
# (21,24),
|
89 |
+
# (20,25),
|
90 |
+
# (19,26),
|
91 |
+
# (18,27),
|
92 |
+
# (40,43),
|
93 |
+
# (39,44),
|
94 |
+
# (38,45),
|
95 |
+
# (37,46),
|
96 |
+
# (42,47),
|
97 |
+
# (41,48),
|
98 |
+
# (33,35),
|
99 |
+
# (32,36),
|
100 |
+
# (51,53),
|
101 |
+
# (50,54),
|
102 |
+
# (62,64),
|
103 |
+
# (61,65),
|
104 |
+
# (49,55),
|
105 |
+
# (49,55),
|
106 |
+
# (68,66),
|
107 |
+
# (60,56),
|
108 |
+
# (59,57),
|
109 |
+
# (1,17),
|
110 |
+
# (2,16),
|
111 |
+
# (3,15),
|
112 |
+
# (4,14),
|
113 |
+
# (5,13),
|
114 |
+
# (6,12),
|
115 |
+
# (7,11),
|
116 |
+
# (8,10),
|
117 |
+
# ]
|
118 |
+
|
119 |
+
def get_data_shape(self):
|
120 |
+
return self.data_shape
|
121 |
+
|
122 |
+
#def get_label_shape(self):
|
123 |
+
# return self.label_shape
|
124 |
+
|
125 |
+
def get_shape_dict(self):
|
126 |
+
D = {}
|
127 |
+
for (k, v) in self.provide_data:
|
128 |
+
D[k] = v
|
129 |
+
for (k, v) in self.provide_label:
|
130 |
+
D[k] = v
|
131 |
+
return D
|
132 |
+
|
133 |
+
def get_label_names(self):
|
134 |
+
D = []
|
135 |
+
for (k, v) in self.provide_label:
|
136 |
+
D.append(k)
|
137 |
+
return D
|
138 |
+
|
139 |
+
def reset(self):
|
140 |
+
#print('reset')
|
141 |
+
if self.aug_level == 0:
|
142 |
+
self.seq = self.oseq
|
143 |
+
else:
|
144 |
+
self.seq = []
|
145 |
+
for _ in range(self.exf):
|
146 |
+
_seq = self.oseq[:]
|
147 |
+
random.shuffle(_seq)
|
148 |
+
self.seq += _seq
|
149 |
+
print('train size after reset', len(self.seq))
|
150 |
+
self.cur = 0
|
151 |
+
|
152 |
+
def next_sample(self):
|
153 |
+
"""Helper function for reading in next sample."""
|
154 |
+
if self.cur >= len(self.seq):
|
155 |
+
raise StopIteration
|
156 |
+
idx = self.seq[self.cur]
|
157 |
+
self.cur += 1
|
158 |
+
s = self.imgrec.read_idx(idx)
|
159 |
+
header, img = recordio.unpack(s)
|
160 |
+
img = mx.image.imdecode(img).asnumpy()
|
161 |
+
hlabel = np.array(header.label).reshape((self.num_classes, 2))
|
162 |
+
if not config.label_xfirst:
|
163 |
+
hlabel = hlabel[:, ::-1] #convert to X/W first
|
164 |
+
annot = {'scale': config.base_scale}
|
165 |
+
|
166 |
+
#ul = np.array( (50000,50000), dtype=np.int32)
|
167 |
+
#br = np.array( (0,0), dtype=np.int32)
|
168 |
+
#for i in range(hlabel.shape[0]):
|
169 |
+
# h = int(hlabel[i][0])
|
170 |
+
# w = int(hlabel[i][1])
|
171 |
+
# key = np.array((h,w))
|
172 |
+
# ul = np.minimum(key, ul)
|
173 |
+
# br = np.maximum(key, br)
|
174 |
+
|
175 |
+
return img, hlabel, annot
|
176 |
+
|
177 |
+
def get_flip(self, data, label):
|
178 |
+
data_flip = np.zeros_like(data)
|
179 |
+
label_flip = np.zeros_like(label)
|
180 |
+
for k in range(data_flip.shape[2]):
|
181 |
+
data_flip[:, :, k] = np.fliplr(data[:, :, k])
|
182 |
+
for k in range(label_flip.shape[0]):
|
183 |
+
label_flip[k, :] = np.fliplr(label[k, :])
|
184 |
+
#print(label[0,:].shape)
|
185 |
+
label_flip = label_flip[self.flip_order, :]
|
186 |
+
return data_flip, label_flip
|
187 |
+
|
188 |
+
def get_data(self, data, label, annot):
|
189 |
+
if self.vis:
|
190 |
+
self.img_num += 1
|
191 |
+
#if self.img_num<=self.vis:
|
192 |
+
# filename = './vis/raw_%d.jpg' % (self.img_num)
|
193 |
+
# print('save', filename)
|
194 |
+
# draw = data.copy()
|
195 |
+
# for i in range(label.shape[0]):
|
196 |
+
# cv2.circle(draw, (label[i][1], label[i][0]), 1, (0, 0, 255), 2)
|
197 |
+
# scipy.misc.imsave(filename, draw)
|
198 |
+
|
199 |
+
rotate = 0
|
200 |
+
#scale = 1.0
|
201 |
+
if 'scale' in annot:
|
202 |
+
scale = annot['scale']
|
203 |
+
else:
|
204 |
+
scale = max(data.shape[0], data.shape[1])
|
205 |
+
if 'center' in annot:
|
206 |
+
center = annot['center']
|
207 |
+
else:
|
208 |
+
center = np.array((data.shape[1] / 2, data.shape[0] / 2))
|
209 |
+
max_retry = 3
|
210 |
+
if self.aug_level == 0: #validation mode
|
211 |
+
max_retry = 6
|
212 |
+
retry = 0
|
213 |
+
found = False
|
214 |
+
base_scale = scale
|
215 |
+
while retry < max_retry:
|
216 |
+
retry += 1
|
217 |
+
succ = True
|
218 |
+
_scale = base_scale
|
219 |
+
if self.aug_level > 0:
|
220 |
+
rotate = np.random.randint(-40, 40)
|
221 |
+
scale_config = 0.2
|
222 |
+
#rotate = 0
|
223 |
+
#scale_config = 0.0
|
224 |
+
scale_ratio = min(
|
225 |
+
1 + scale_config,
|
226 |
+
max(1 - scale_config,
|
227 |
+
(np.random.randn() * scale_config) + 1))
|
228 |
+
_scale = int(base_scale * scale_ratio)
|
229 |
+
#translate = np.random.randint(-5, 5, size=(2,))
|
230 |
+
#center += translate
|
231 |
+
data_out, trans = img_helper.transform(data, center,
|
232 |
+
self.input_img_size, _scale,
|
233 |
+
rotate)
|
234 |
+
#data_out = img_helper.crop2(data, center, _scale, (self.input_img_size, self.input_img_size), rot=rotate)
|
235 |
+
label_out = np.zeros(self.label_shape, dtype=np.float32)
|
236 |
+
#print('out shapes', data_out.shape, label_out.shape)
|
237 |
+
for i in range(label.shape[0]):
|
238 |
+
pt = label[i].copy()
|
239 |
+
#pt = pt[::-1]
|
240 |
+
npt = img_helper.transform_pt(pt, trans)
|
241 |
+
if npt[0] >= data_out.shape[1] or npt[1] >= data_out.shape[
|
242 |
+
0] or npt[0] < 0 or npt[1] < 0:
|
243 |
+
succ = False
|
244 |
+
#print('err npt', npt)
|
245 |
+
break
|
246 |
+
if config.losstype == 'heatmap':
|
247 |
+
pt_scale = float(
|
248 |
+
self.output_label_size) / self.input_img_size
|
249 |
+
npt *= pt_scale
|
250 |
+
npt = npt.astype(np.int32)
|
251 |
+
img_helper.gaussian(label_out[i], npt, config.gaussian)
|
252 |
+
else:
|
253 |
+
label_out[i] = (npt / self.input_img_size)
|
254 |
+
#print('before gaussian', label_out[i].shape, pt.shape)
|
255 |
+
#trans = img_helper.transform(pt, center, _scale, (self.output_label_size, self.output_label_size), rot=rotate)
|
256 |
+
#print(trans.shape)
|
257 |
+
#if not img_helper.gaussian(label_out[i], trans, _g):
|
258 |
+
# succ = False
|
259 |
+
# break
|
260 |
+
if not succ:
|
261 |
+
if self.aug_level == 0:
|
262 |
+
base_scale += 20
|
263 |
+
continue
|
264 |
+
|
265 |
+
flip_data_out = None
|
266 |
+
flip_label_out = None
|
267 |
+
if config.net_coherent:
|
268 |
+
flip_data_out, flip_label_out = self.get_flip(
|
269 |
+
data_out, label_out)
|
270 |
+
elif ((self.aug_level > 0 and np.random.rand() < 0.5)
|
271 |
+
or self.force_mirror): #flip aug
|
272 |
+
flip_data_out, flip_label_out = self.get_flip(
|
273 |
+
data_out, label_out)
|
274 |
+
data_out, label_out = flip_data_out, flip_label_out
|
275 |
+
|
276 |
+
found = True
|
277 |
+
break
|
278 |
+
|
279 |
+
#self.stats[0]+=1
|
280 |
+
if not found:
|
281 |
+
#self.stats[1]+=1
|
282 |
+
#print('find aug error', retry)
|
283 |
+
#print(self.stats)
|
284 |
+
#print('!!!ERR')
|
285 |
+
return None
|
286 |
+
#print('found with scale', _scale, rotate)
|
287 |
+
|
288 |
+
if self.vis > 0 and self.img_num <= self.vis:
|
289 |
+
print('crop', data.shape, center, _scale, rotate, data_out.shape)
|
290 |
+
filename = './vis/cropped_%d.jpg' % (self.img_num)
|
291 |
+
print('save', filename)
|
292 |
+
draw = data_out.copy()
|
293 |
+
alabel = label_out.copy()
|
294 |
+
for i in range(label.shape[0]):
|
295 |
+
a = cv2.resize(alabel[i],
|
296 |
+
(self.input_img_size, self.input_img_size))
|
297 |
+
ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
|
298 |
+
cv2.circle(draw, (ind[1], ind[0]), 1, (0, 0, 255), 2)
|
299 |
+
scipy.misc.imsave(filename, draw)
|
300 |
+
filename = './vis/raw_%d.jpg' % (self.img_num)
|
301 |
+
scipy.misc.imsave(filename, data)
|
302 |
+
|
303 |
+
return data_out, label_out, flip_data_out, flip_label_out
|
304 |
+
|
305 |
+
def next(self):
|
306 |
+
"""Returns the next batch of data."""
|
307 |
+
#print('next')
|
308 |
+
batch_size = self.batch_size
|
309 |
+
batch_data = nd.empty((batch_size, ) + self.data_shape)
|
310 |
+
batch_label = nd.empty((batch_size, ) + self.label_shape)
|
311 |
+
i = 0
|
312 |
+
#self.cutoff = random.randint(800,1280)
|
313 |
+
try:
|
314 |
+
while i < batch_size:
|
315 |
+
#print('N', i)
|
316 |
+
data, label, annot = self.next_sample()
|
317 |
+
R = self.get_data(data, label, annot)
|
318 |
+
if R is None:
|
319 |
+
continue
|
320 |
+
data_out, label_out, flip_data_out, flip_label_out = R
|
321 |
+
if not self.use_coherent:
|
322 |
+
data = nd.array(data_out)
|
323 |
+
data = nd.transpose(data, axes=(2, 0, 1))
|
324 |
+
label = nd.array(label_out)
|
325 |
+
#print(data.shape, label.shape)
|
326 |
+
batch_data[i][:] = data
|
327 |
+
batch_label[i][:] = label
|
328 |
+
i += 1
|
329 |
+
else:
|
330 |
+
data = nd.array(data_out)
|
331 |
+
data = nd.transpose(data, axes=(2, 0, 1))
|
332 |
+
label = nd.array(label_out)
|
333 |
+
data2 = nd.array(flip_data_out)
|
334 |
+
data2 = nd.transpose(data2, axes=(2, 0, 1))
|
335 |
+
label2 = nd.array(flip_label_out)
|
336 |
+
#M = nd.array(M)
|
337 |
+
#print(data.shape, label.shape)
|
338 |
+
batch_data[i][:] = data
|
339 |
+
batch_label[i][:] = label
|
340 |
+
#i+=1
|
341 |
+
j = i + self.per_batch_size // 2
|
342 |
+
batch_data[j][:] = data2
|
343 |
+
batch_label[j][:] = label2
|
344 |
+
i += 1
|
345 |
+
if j % self.per_batch_size == self.per_batch_size - 1:
|
346 |
+
i = j + 1
|
347 |
+
except StopIteration:
|
348 |
+
if i < batch_size:
|
349 |
+
raise StopIteration
|
350 |
+
|
351 |
+
#return {self.data_name : batch_data,
|
352 |
+
# self.label_name : batch_label}
|
353 |
+
#print(batch_data.shape, batch_label.shape)
|
354 |
+
return mx.io.DataBatch([batch_data], [batch_label], batch_size - i)
|
insightface/alignment/heatmap/img_helper.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import math
|
3 |
+
import cv2
|
4 |
+
from skimage import transform as stf
|
5 |
+
|
6 |
+
|
7 |
+
def transform(data, center, output_size, scale, rotation):
|
8 |
+
scale_ratio = float(output_size) / scale
|
9 |
+
rot = float(rotation) * np.pi / 180.0
|
10 |
+
#translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
|
11 |
+
t1 = stf.SimilarityTransform(scale=scale_ratio)
|
12 |
+
cx = center[0] * scale_ratio
|
13 |
+
cy = center[1] * scale_ratio
|
14 |
+
t2 = stf.SimilarityTransform(translation=(-1 * cx, -1 * cy))
|
15 |
+
t3 = stf.SimilarityTransform(rotation=rot)
|
16 |
+
t4 = stf.SimilarityTransform(translation=(output_size / 2,
|
17 |
+
output_size / 2))
|
18 |
+
t = t1 + t2 + t3 + t4
|
19 |
+
trans = t.params[0:2]
|
20 |
+
#print('M', scale, rotation, trans)
|
21 |
+
cropped = cv2.warpAffine(data,
|
22 |
+
trans, (output_size, output_size),
|
23 |
+
borderValue=0.0)
|
24 |
+
return cropped, trans
|
25 |
+
|
26 |
+
|
27 |
+
def transform_pt(pt, trans):
|
28 |
+
new_pt = np.array([pt[0], pt[1], 1.]).T
|
29 |
+
new_pt = np.dot(trans, new_pt)
|
30 |
+
#print('new_pt', new_pt.shape, new_pt)
|
31 |
+
return new_pt[:2]
|
32 |
+
|
33 |
+
|
34 |
+
def gaussian(img, pt, sigma):
|
35 |
+
# Draw a 2D gaussian
|
36 |
+
assert (sigma >= 0)
|
37 |
+
if sigma == 0:
|
38 |
+
img[pt[1], pt[0]] = 1.0
|
39 |
+
return True
|
40 |
+
#assert pt[0]<=img.shape[1]
|
41 |
+
#assert pt[1]<=img.shape[0]
|
42 |
+
|
43 |
+
# Check that any part of the gaussian is in-bounds
|
44 |
+
ul = [int(pt[0] - 3 * sigma), int(pt[1] - 3 * sigma)]
|
45 |
+
br = [int(pt[0] + 3 * sigma + 1), int(pt[1] + 3 * sigma + 1)]
|
46 |
+
if (ul[0] > img.shape[1] or ul[1] >= img.shape[0] or br[0] < 0
|
47 |
+
or br[1] < 0):
|
48 |
+
# If not, just return the image as is
|
49 |
+
#print('gaussian error')
|
50 |
+
return False
|
51 |
+
#return img
|
52 |
+
|
53 |
+
# Generate gaussian
|
54 |
+
size = 6 * sigma + 1
|
55 |
+
x = np.arange(0, size, 1, float)
|
56 |
+
y = x[:, np.newaxis]
|
57 |
+
x0 = y0 = size // 2
|
58 |
+
# The gaussian is not normalized, we want the center value to equal 1
|
59 |
+
g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
|
60 |
+
|
61 |
+
# Usable gaussian range
|
62 |
+
g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
|
63 |
+
g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
|
64 |
+
# Image range
|
65 |
+
img_x = max(0, ul[0]), min(br[0], img.shape[1])
|
66 |
+
img_y = max(0, ul[1]), min(br[1], img.shape[0])
|
67 |
+
|
68 |
+
img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
|
69 |
+
return True
|
70 |
+
#return img
|
71 |
+
|
72 |
+
|
73 |
+
def estimate_trans_bbox(face, input_size, s=2.0):
|
74 |
+
w = face[2] - face[0]
|
75 |
+
h = face[3] - face[1]
|
76 |
+
wc = int((face[2] + face[0]) / 2)
|
77 |
+
hc = int((face[3] + face[1]) / 2)
|
78 |
+
im_size = max(w, h)
|
79 |
+
#size = int(im_size*1.2)
|
80 |
+
scale = input_size / (max(w, h) * s)
|
81 |
+
M = [
|
82 |
+
[scale, 0, input_size / 2 - wc * scale],
|
83 |
+
[0, scale, input_size / 2 - hc * scale],
|
84 |
+
]
|
85 |
+
M = np.array(M)
|
86 |
+
return M
|
insightface/alignment/heatmap/metric.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import mxnet as mx
|
2 |
+
import numpy as np
|
3 |
+
import math
|
4 |
+
import cv2
|
5 |
+
from config import config
|
6 |
+
|
7 |
+
|
8 |
+
class LossValueMetric(mx.metric.EvalMetric):
|
9 |
+
def __init__(self):
|
10 |
+
self.axis = 1
|
11 |
+
super(LossValueMetric, self).__init__('lossvalue',
|
12 |
+
axis=self.axis,
|
13 |
+
output_names=None,
|
14 |
+
label_names=None)
|
15 |
+
self.losses = []
|
16 |
+
|
17 |
+
def update(self, labels, preds):
|
18 |
+
loss = preds[0].asnumpy()[0]
|
19 |
+
self.sum_metric += loss
|
20 |
+
self.num_inst += 1.0
|
21 |
+
|
22 |
+
|
23 |
+
class NMEMetric(mx.metric.EvalMetric):
|
24 |
+
def __init__(self):
|
25 |
+
self.axis = 1
|
26 |
+
super(NMEMetric, self).__init__('NME',
|
27 |
+
axis=self.axis,
|
28 |
+
output_names=None,
|
29 |
+
label_names=None)
|
30 |
+
#self.losses = []
|
31 |
+
self.count = 0
|
32 |
+
|
33 |
+
def cal_nme(self, label, pred_label):
|
34 |
+
nme = []
|
35 |
+
for b in range(pred_label.shape[0]):
|
36 |
+
record = [None] * 6
|
37 |
+
item = []
|
38 |
+
if label.ndim == 4:
|
39 |
+
_heatmap = label[b][36]
|
40 |
+
if np.count_nonzero(_heatmap) == 0:
|
41 |
+
continue
|
42 |
+
else: #ndim==3
|
43 |
+
#print(label[b])
|
44 |
+
if np.count_nonzero(label[b]) == 0:
|
45 |
+
continue
|
46 |
+
for p in range(pred_label.shape[1]):
|
47 |
+
if label.ndim == 4:
|
48 |
+
heatmap_gt = label[b][p]
|
49 |
+
ind_gt = np.unravel_index(np.argmax(heatmap_gt, axis=None),
|
50 |
+
heatmap_gt.shape)
|
51 |
+
ind_gt = np.array(ind_gt)
|
52 |
+
else:
|
53 |
+
ind_gt = label[b][p]
|
54 |
+
#ind_gt = ind_gt.astype(np.int)
|
55 |
+
#print(ind_gt)
|
56 |
+
heatmap_pred = pred_label[b][p]
|
57 |
+
heatmap_pred = cv2.resize(
|
58 |
+
heatmap_pred,
|
59 |
+
(config.input_img_size, config.input_img_size))
|
60 |
+
ind_pred = np.unravel_index(np.argmax(heatmap_pred, axis=None),
|
61 |
+
heatmap_pred.shape)
|
62 |
+
ind_pred = np.array(ind_pred)
|
63 |
+
#print(ind_gt.shape)
|
64 |
+
#print(ind_pred)
|
65 |
+
if p == 36:
|
66 |
+
#print('b', b, p, ind_gt, np.count_nonzero(heatmap_gt))
|
67 |
+
record[0] = ind_gt
|
68 |
+
elif p == 39:
|
69 |
+
record[1] = ind_gt
|
70 |
+
elif p == 42:
|
71 |
+
record[2] = ind_gt
|
72 |
+
elif p == 45:
|
73 |
+
record[3] = ind_gt
|
74 |
+
if record[4] is None or record[5] is None:
|
75 |
+
record[4] = ind_gt
|
76 |
+
record[5] = ind_gt
|
77 |
+
else:
|
78 |
+
record[4] = np.minimum(record[4], ind_gt)
|
79 |
+
record[5] = np.maximum(record[5], ind_gt)
|
80 |
+
#print(ind_gt.shape, ind_pred.shape)
|
81 |
+
value = np.sqrt(np.sum(np.square(ind_gt - ind_pred)))
|
82 |
+
item.append(value)
|
83 |
+
_nme = np.mean(item)
|
84 |
+
if config.landmark_type == '2d':
|
85 |
+
left_eye = (record[0] + record[1]) / 2
|
86 |
+
right_eye = (record[2] + record[3]) / 2
|
87 |
+
_dist = np.sqrt(np.sum(np.square(left_eye - right_eye)))
|
88 |
+
#print('eye dist', _dist, left_eye, right_eye)
|
89 |
+
_nme /= _dist
|
90 |
+
else:
|
91 |
+
#_dist = np.sqrt(float(label.shape[2]*label.shape[3]))
|
92 |
+
_dist = np.sqrt(np.sum(np.square(record[5] - record[4])))
|
93 |
+
#print(_dist)
|
94 |
+
_nme /= _dist
|
95 |
+
nme.append(_nme)
|
96 |
+
return np.mean(nme)
|
97 |
+
|
98 |
+
def update(self, labels, preds):
|
99 |
+
self.count += 1
|
100 |
+
label = labels[0].asnumpy()
|
101 |
+
pred_label = preds[-1].asnumpy()
|
102 |
+
nme = self.cal_nme(label, pred_label)
|
103 |
+
|
104 |
+
#print('nme', nme)
|
105 |
+
#nme = np.mean(nme)
|
106 |
+
self.sum_metric += np.mean(nme)
|
107 |
+
self.num_inst += 1.0
|
insightface/alignment/heatmap/optimizer.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import mxnet as mx
|
2 |
+
import mxnet.optimizer as optimizer
|
3 |
+
from mxnet.ndarray import (NDArray, zeros, clip, sqrt, cast, maximum, abs as
|
4 |
+
NDabs)
|
5 |
+
#from mxnet.ndarray import (sgd_update, sgd_mom_update, adam_update, rmsprop_update, rmspropalex_update,
|
6 |
+
# mp_sgd_update, mp_sgd_mom_update, square, ftrl_update)
|
7 |
+
|
8 |
+
|
9 |
+
class ONadam(optimizer.Optimizer):
|
10 |
+
def __init__(self,
|
11 |
+
learning_rate=0.001,
|
12 |
+
beta1=0.9,
|
13 |
+
beta2=0.999,
|
14 |
+
epsilon=1e-8,
|
15 |
+
schedule_decay=0.004,
|
16 |
+
**kwargs):
|
17 |
+
super(ONadam, self).__init__(learning_rate=learning_rate, **kwargs)
|
18 |
+
self.beta1 = beta1
|
19 |
+
self.beta2 = beta2
|
20 |
+
self.epsilon = epsilon
|
21 |
+
self.schedule_decay = schedule_decay
|
22 |
+
self.m_schedule = 1.
|
23 |
+
|
24 |
+
def create_state(self, index, weight):
|
25 |
+
return (
|
26 |
+
zeros(weight.shape, weight.context, dtype=weight.dtype), # mean
|
27 |
+
zeros(weight.shape, weight.context,
|
28 |
+
dtype=weight.dtype)) # variance
|
29 |
+
|
30 |
+
def update(self, index, weight, grad, state):
|
31 |
+
assert (isinstance(weight, NDArray))
|
32 |
+
assert (isinstance(grad, NDArray))
|
33 |
+
self._update_count(index)
|
34 |
+
lr = self._get_lr(index)
|
35 |
+
wd = self._get_wd(index)
|
36 |
+
|
37 |
+
t = self._index_update_count[index]
|
38 |
+
|
39 |
+
# preprocess grad
|
40 |
+
#grad = grad * self.rescale_grad + wd * weight
|
41 |
+
grad *= self.rescale_grad + wd * weight
|
42 |
+
if self.clip_gradient is not None:
|
43 |
+
grad = clip(grad, -self.clip_gradient, self.clip_gradient)
|
44 |
+
|
45 |
+
# warming momentum schedule
|
46 |
+
momentum_t = self.beta1 * (1. - 0.5 *
|
47 |
+
(pow(0.96, t * self.schedule_decay)))
|
48 |
+
momentum_t_1 = self.beta1 * (1. - 0.5 *
|
49 |
+
(pow(0.96,
|
50 |
+
(t + 1) * self.schedule_decay)))
|
51 |
+
self.m_schedule = self.m_schedule * momentum_t
|
52 |
+
m_schedule_next = self.m_schedule * momentum_t_1
|
53 |
+
|
54 |
+
# update m_t and v_t
|
55 |
+
m_t, v_t = state
|
56 |
+
m_t[:] = self.beta1 * m_t + (1. - self.beta1) * grad
|
57 |
+
v_t[:] = self.beta2 * v_t + (1. - self.beta2) * grad * grad
|
58 |
+
|
59 |
+
grad_prime = grad / (1. - self.m_schedule)
|
60 |
+
m_t_prime = m_t / (1. - m_schedule_next)
|
61 |
+
v_t_prime = v_t / (1. - pow(self.beta2, t))
|
62 |
+
m_t_bar = (1. - momentum_t) * grad_prime + momentum_t_1 * m_t_prime
|
63 |
+
|
64 |
+
# update weight
|
65 |
+
weight[:] -= lr * m_t_bar / (sqrt(v_t_prime) + self.epsilon)
|
insightface/alignment/heatmap/sample_config.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from easydict import EasyDict as edict
|
3 |
+
|
4 |
+
config = edict()
|
5 |
+
|
6 |
+
#default training/dataset config
|
7 |
+
config.num_classes = 68
|
8 |
+
config.record_img_size = 384
|
9 |
+
config.base_scale = 256
|
10 |
+
config.input_img_size = 128
|
11 |
+
config.output_label_size = 64
|
12 |
+
config.label_xfirst = False
|
13 |
+
config.losstype = 'heatmap'
|
14 |
+
config.net_coherent = False
|
15 |
+
config.multiplier = 1.0
|
16 |
+
|
17 |
+
config.gaussian = 0
|
18 |
+
|
19 |
+
# network settings
|
20 |
+
network = edict()
|
21 |
+
|
22 |
+
network.hourglass = edict()
|
23 |
+
network.hourglass.net_coherent = False
|
24 |
+
network.hourglass.net_sta = 0
|
25 |
+
network.hourglass.net_n = 3
|
26 |
+
network.hourglass.net_dcn = 0
|
27 |
+
network.hourglass.net_stacks = 2
|
28 |
+
network.hourglass.net_block = 'resnet'
|
29 |
+
network.hourglass.net_binarize = False
|
30 |
+
network.hourglass.losstype = 'heatmap'
|
31 |
+
|
32 |
+
network.sdu = edict()
|
33 |
+
network.sdu.net_coherent = False
|
34 |
+
network.sdu.net_sta = 1
|
35 |
+
network.sdu.net_n = 3
|
36 |
+
network.sdu.net_dcn = 3
|
37 |
+
network.sdu.net_stacks = 2
|
38 |
+
network.sdu.net_block = 'cab'
|
39 |
+
network.sdu.net_binarize = False
|
40 |
+
network.sdu.losstype = 'heatmap'
|
41 |
+
|
42 |
+
# dataset settings
|
43 |
+
dataset = edict()
|
44 |
+
|
45 |
+
dataset.i2d = edict()
|
46 |
+
dataset.i2d.dataset = '2D'
|
47 |
+
dataset.i2d.landmark_type = '2d'
|
48 |
+
dataset.i2d.dataset_path = './data_2d'
|
49 |
+
dataset.i2d.num_classes = 68
|
50 |
+
dataset.i2d.record_img_size = 384
|
51 |
+
dataset.i2d.base_scale = 256
|
52 |
+
dataset.i2d.input_img_size = 128
|
53 |
+
dataset.i2d.output_label_size = 64
|
54 |
+
dataset.i2d.label_xfirst = False
|
55 |
+
dataset.i2d.val_targets = ['ibug', 'cofw_testset', '300W']
|
56 |
+
|
57 |
+
dataset.i3d = edict()
|
58 |
+
dataset.i3d.dataset = '3D'
|
59 |
+
dataset.i3d.landmark_type = '3d'
|
60 |
+
dataset.i3d.dataset_path = './data_3d'
|
61 |
+
dataset.i3d.num_classes = 68
|
62 |
+
dataset.i3d.record_img_size = 384
|
63 |
+
dataset.i3d.base_scale = 256
|
64 |
+
dataset.i3d.input_img_size = 128
|
65 |
+
dataset.i3d.output_label_size = 64
|
66 |
+
dataset.i3d.label_xfirst = False
|
67 |
+
dataset.i3d.val_targets = ['AFLW2000-3D']
|
68 |
+
|
69 |
+
# default settings
|
70 |
+
default = edict()
|
71 |
+
|
72 |
+
# default network
|
73 |
+
default.network = 'hourglass'
|
74 |
+
default.pretrained = ''
|
75 |
+
default.pretrained_epoch = 0
|
76 |
+
# default dataset
|
77 |
+
default.dataset = 'i2d'
|
78 |
+
default.frequent = 20
|
79 |
+
default.verbose = 200
|
80 |
+
default.kvstore = 'device'
|
81 |
+
|
82 |
+
default.prefix = 'model/A'
|
83 |
+
default.end_epoch = 10000
|
84 |
+
default.lr = 0.00025
|
85 |
+
default.wd = 0.0
|
86 |
+
default.per_batch_size = 20
|
87 |
+
default.lr_step = '16000,24000,30000'
|
88 |
+
|
89 |
+
|
90 |
+
def generate_config(_network, _dataset):
|
91 |
+
for k, v in network[_network].items():
|
92 |
+
config[k] = v
|
93 |
+
default[k] = v
|
94 |
+
for k, v in dataset[_dataset].items():
|
95 |
+
config[k] = v
|
96 |
+
default[k] = v
|
97 |
+
config.network = _network
|
98 |
+
config.dataset = _dataset
|
insightface/alignment/heatmap/symbol/sym_heatmap.py
ADDED
@@ -0,0 +1,1085 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import absolute_import
|
2 |
+
from __future__ import division
|
3 |
+
from __future__ import print_function
|
4 |
+
import mxnet as mx
|
5 |
+
import numpy as np
|
6 |
+
from config import config
|
7 |
+
|
8 |
+
ACT_BIT = 1
|
9 |
+
bn_mom = 0.9
|
10 |
+
workspace = 256
|
11 |
+
memonger = False
|
12 |
+
|
13 |
+
|
14 |
+
def Conv(**kwargs):
|
15 |
+
body = mx.sym.Convolution(**kwargs)
|
16 |
+
return body
|
17 |
+
|
18 |
+
|
19 |
+
def Act(data, act_type, name):
|
20 |
+
if act_type == 'prelu':
|
21 |
+
body = mx.sym.LeakyReLU(data=data, act_type='prelu', name=name)
|
22 |
+
else:
|
23 |
+
body = mx.symbol.Activation(data=data, act_type=act_type, name=name)
|
24 |
+
return body
|
25 |
+
|
26 |
+
|
27 |
+
#def lin(data, num_filter, workspace, name, binarize, dcn):
|
28 |
+
# bit = 1
|
29 |
+
# if not binarize:
|
30 |
+
# if not dcn:
|
31 |
+
# conv1 = Conv(data=data, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0),
|
32 |
+
# no_bias=True, workspace=workspace, name=name + '_conv')
|
33 |
+
# bn1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
|
34 |
+
# act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
|
35 |
+
# return act1
|
36 |
+
# else:
|
37 |
+
# bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
|
38 |
+
# act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
|
39 |
+
# conv1_offset = mx.symbol.Convolution(name=name+'_conv_offset', data = act1,
|
40 |
+
# num_filter=18, pad=(1, 1), kernel=(3, 3), stride=(1, 1))
|
41 |
+
# conv1 = mx.contrib.symbol.DeformableConvolution(name=name+"_conv", data=act1, offset=conv1_offset,
|
42 |
+
# num_filter=num_filter, pad=(1,1), kernel=(3, 3), num_deformable_group=1, stride=(1, 1), dilate=(1, 1), no_bias=False)
|
43 |
+
# #conv1 = Conv(data=act1, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1),
|
44 |
+
# # no_bias=False, workspace=workspace, name=name + '_conv')
|
45 |
+
# return conv1
|
46 |
+
# else:
|
47 |
+
# bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
|
48 |
+
# act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
|
49 |
+
# conv1 = mx.sym.QConvolution_v1(data=act1, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0),
|
50 |
+
# no_bias=True, workspace=workspace, name=name + '_conv', act_bit=ACT_BIT, weight_bit=bit)
|
51 |
+
# conv1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
|
52 |
+
# return conv1
|
53 |
+
|
54 |
+
|
55 |
+
def lin3(data, num_filter, workspace, name, k, g=1, d=1):
|
56 |
+
if k != 3:
|
57 |
+
conv1 = Conv(data=data,
|
58 |
+
num_filter=num_filter,
|
59 |
+
kernel=(k, k),
|
60 |
+
stride=(1, 1),
|
61 |
+
pad=((k - 1) // 2, (k - 1) // 2),
|
62 |
+
num_group=g,
|
63 |
+
no_bias=True,
|
64 |
+
workspace=workspace,
|
65 |
+
name=name + '_conv')
|
66 |
+
else:
|
67 |
+
conv1 = Conv(data=data,
|
68 |
+
num_filter=num_filter,
|
69 |
+
kernel=(k, k),
|
70 |
+
stride=(1, 1),
|
71 |
+
pad=(d, d),
|
72 |
+
num_group=g,
|
73 |
+
dilate=(d, d),
|
74 |
+
no_bias=True,
|
75 |
+
workspace=workspace,
|
76 |
+
name=name + '_conv')
|
77 |
+
bn1 = mx.sym.BatchNorm(data=conv1,
|
78 |
+
fix_gamma=False,
|
79 |
+
momentum=bn_mom,
|
80 |
+
eps=2e-5,
|
81 |
+
name=name + '_bn')
|
82 |
+
act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
|
83 |
+
ret = act1
|
84 |
+
return ret
|
85 |
+
|
86 |
+
|
87 |
+
def ConvFactory(data,
|
88 |
+
num_filter,
|
89 |
+
kernel,
|
90 |
+
stride=(1, 1),
|
91 |
+
pad=(0, 0),
|
92 |
+
act_type="relu",
|
93 |
+
mirror_attr={},
|
94 |
+
with_act=True,
|
95 |
+
dcn=False,
|
96 |
+
name=''):
|
97 |
+
if not dcn:
|
98 |
+
conv = mx.symbol.Convolution(data=data,
|
99 |
+
num_filter=num_filter,
|
100 |
+
kernel=kernel,
|
101 |
+
stride=stride,
|
102 |
+
pad=pad,
|
103 |
+
no_bias=True,
|
104 |
+
workspace=workspace,
|
105 |
+
name=name + '_conv')
|
106 |
+
else:
|
107 |
+
conv_offset = mx.symbol.Convolution(name=name + '_conv_offset',
|
108 |
+
data=data,
|
109 |
+
num_filter=18,
|
110 |
+
pad=(1, 1),
|
111 |
+
kernel=(3, 3),
|
112 |
+
stride=(1, 1))
|
113 |
+
conv = mx.contrib.symbol.DeformableConvolution(name=name + "_conv",
|
114 |
+
data=data,
|
115 |
+
offset=conv_offset,
|
116 |
+
num_filter=num_filter,
|
117 |
+
pad=(1, 1),
|
118 |
+
kernel=(3, 3),
|
119 |
+
num_deformable_group=1,
|
120 |
+
stride=stride,
|
121 |
+
dilate=(1, 1),
|
122 |
+
no_bias=False)
|
123 |
+
bn = mx.symbol.BatchNorm(data=conv,
|
124 |
+
fix_gamma=False,
|
125 |
+
momentum=bn_mom,
|
126 |
+
eps=2e-5,
|
127 |
+
name=name + '_bn')
|
128 |
+
if with_act:
|
129 |
+
act = Act(bn, act_type, name=name + '_relu')
|
130 |
+
#act = mx.symbol.Activation(
|
131 |
+
# data=bn, act_type=act_type, attr=mirror_attr, name=name+'_relu')
|
132 |
+
return act
|
133 |
+
else:
|
134 |
+
return bn
|
135 |
+
|
136 |
+
|
137 |
+
class CAB:
|
138 |
+
def __init__(self, data, nFilters, nModules, n, workspace, name, dilate,
|
139 |
+
group):
|
140 |
+
self.data = data
|
141 |
+
self.nFilters = nFilters
|
142 |
+
self.nModules = nModules
|
143 |
+
self.n = n
|
144 |
+
self.workspace = workspace
|
145 |
+
self.name = name
|
146 |
+
self.dilate = dilate
|
147 |
+
self.group = group
|
148 |
+
self.sym_map = {}
|
149 |
+
|
150 |
+
def get_output(self, w, h):
|
151 |
+
key = (w, h)
|
152 |
+
if key in self.sym_map:
|
153 |
+
return self.sym_map[key]
|
154 |
+
ret = None
|
155 |
+
if h == self.n:
|
156 |
+
if w == self.n:
|
157 |
+
ret = (self.data, self.nFilters)
|
158 |
+
else:
|
159 |
+
x = self.get_output(w + 1, h)
|
160 |
+
f = int(x[1] * 0.5)
|
161 |
+
if w != self.n - 1:
|
162 |
+
body = lin3(x[0], f, self.workspace,
|
163 |
+
"%s_w%d_h%d_1" % (self.name, w, h), 3,
|
164 |
+
self.group, 1)
|
165 |
+
else:
|
166 |
+
body = lin3(x[0], f, self.workspace,
|
167 |
+
"%s_w%d_h%d_1" % (self.name, w, h), 3,
|
168 |
+
self.group, self.dilate)
|
169 |
+
ret = (body, f)
|
170 |
+
else:
|
171 |
+
x = self.get_output(w + 1, h + 1)
|
172 |
+
y = self.get_output(w, h + 1)
|
173 |
+
if h % 2 == 1 and h != w:
|
174 |
+
xbody = lin3(x[0], x[1], self.workspace,
|
175 |
+
"%s_w%d_h%d_2" % (self.name, w, h), 3, x[1])
|
176 |
+
#xbody = xbody+x[0]
|
177 |
+
else:
|
178 |
+
xbody = x[0]
|
179 |
+
#xbody = x[0]
|
180 |
+
#xbody = lin3(x[0], x[1], self.workspace, "%s_w%d_h%d_2"%(self.name, w, h), 3, x[1])
|
181 |
+
if w == 0:
|
182 |
+
ybody = lin3(y[0], y[1], self.workspace,
|
183 |
+
"%s_w%d_h%d_3" % (self.name, w, h), 3, self.group)
|
184 |
+
else:
|
185 |
+
ybody = y[0]
|
186 |
+
ybody = mx.sym.concat(y[0], ybody, dim=1)
|
187 |
+
body = mx.sym.add_n(xbody,
|
188 |
+
ybody,
|
189 |
+
name="%s_w%d_h%d_add" % (self.name, w, h))
|
190 |
+
body = body / 2
|
191 |
+
ret = (body, x[1])
|
192 |
+
self.sym_map[key] = ret
|
193 |
+
return ret
|
194 |
+
|
195 |
+
def get(self):
|
196 |
+
return self.get_output(1, 1)[0]
|
197 |
+
|
198 |
+
|
199 |
+
def conv_resnet(data, num_filter, stride, dim_match, name, binarize, dcn,
|
200 |
+
dilate, **kwargs):
|
201 |
+
bit = 1
|
202 |
+
#print('in unit2')
|
203 |
+
# the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
|
204 |
+
bn1 = mx.sym.BatchNorm(data=data,
|
205 |
+
fix_gamma=False,
|
206 |
+
eps=2e-5,
|
207 |
+
momentum=bn_mom,
|
208 |
+
name=name + '_bn1')
|
209 |
+
if not binarize:
|
210 |
+
act1 = Act(data=bn1, act_type='relu', name=name + '_relu1')
|
211 |
+
conv1 = Conv(data=act1,
|
212 |
+
num_filter=int(num_filter * 0.5),
|
213 |
+
kernel=(1, 1),
|
214 |
+
stride=(1, 1),
|
215 |
+
pad=(0, 0),
|
216 |
+
no_bias=True,
|
217 |
+
workspace=workspace,
|
218 |
+
name=name + '_conv1')
|
219 |
+
else:
|
220 |
+
act1 = mx.sym.QActivation(data=bn1,
|
221 |
+
act_bit=ACT_BIT,
|
222 |
+
name=name + '_relu1',
|
223 |
+
backward_only=True)
|
224 |
+
conv1 = mx.sym.QConvolution(data=act1,
|
225 |
+
num_filter=int(num_filter * 0.5),
|
226 |
+
kernel=(1, 1),
|
227 |
+
stride=(1, 1),
|
228 |
+
pad=(0, 0),
|
229 |
+
no_bias=True,
|
230 |
+
workspace=workspace,
|
231 |
+
name=name + '_conv1',
|
232 |
+
act_bit=ACT_BIT,
|
233 |
+
weight_bit=bit)
|
234 |
+
bn2 = mx.sym.BatchNorm(data=conv1,
|
235 |
+
fix_gamma=False,
|
236 |
+
eps=2e-5,
|
237 |
+
momentum=bn_mom,
|
238 |
+
name=name + '_bn2')
|
239 |
+
if not binarize:
|
240 |
+
act2 = Act(data=bn2, act_type='relu', name=name + '_relu2')
|
241 |
+
conv2 = Conv(data=act2,
|
242 |
+
num_filter=int(num_filter * 0.5),
|
243 |
+
kernel=(3, 3),
|
244 |
+
stride=(1, 1),
|
245 |
+
pad=(1, 1),
|
246 |
+
no_bias=True,
|
247 |
+
workspace=workspace,
|
248 |
+
name=name + '_conv2')
|
249 |
+
else:
|
250 |
+
act2 = mx.sym.QActivation(data=bn2,
|
251 |
+
act_bit=ACT_BIT,
|
252 |
+
name=name + '_relu2',
|
253 |
+
backward_only=True)
|
254 |
+
conv2 = mx.sym.QConvolution(data=act2,
|
255 |
+
num_filter=int(num_filter * 0.5),
|
256 |
+
kernel=(3, 3),
|
257 |
+
stride=(1, 1),
|
258 |
+
pad=(1, 1),
|
259 |
+
no_bias=True,
|
260 |
+
workspace=workspace,
|
261 |
+
name=name + '_conv2',
|
262 |
+
act_bit=ACT_BIT,
|
263 |
+
weight_bit=bit)
|
264 |
+
bn3 = mx.sym.BatchNorm(data=conv2,
|
265 |
+
fix_gamma=False,
|
266 |
+
eps=2e-5,
|
267 |
+
momentum=bn_mom,
|
268 |
+
name=name + '_bn3')
|
269 |
+
if not binarize:
|
270 |
+
act3 = Act(data=bn3, act_type='relu', name=name + '_relu3')
|
271 |
+
conv3 = Conv(data=act3,
|
272 |
+
num_filter=num_filter,
|
273 |
+
kernel=(1, 1),
|
274 |
+
stride=(1, 1),
|
275 |
+
pad=(0, 0),
|
276 |
+
no_bias=True,
|
277 |
+
workspace=workspace,
|
278 |
+
name=name + '_conv3')
|
279 |
+
else:
|
280 |
+
act3 = mx.sym.QActivation(data=bn3,
|
281 |
+
act_bit=ACT_BIT,
|
282 |
+
name=name + '_relu3',
|
283 |
+
backward_only=True)
|
284 |
+
conv3 = mx.sym.QConvolution(data=act3,
|
285 |
+
num_filter=num_filter,
|
286 |
+
kernel=(1, 1),
|
287 |
+
stride=(1, 1),
|
288 |
+
pad=(0, 0),
|
289 |
+
no_bias=True,
|
290 |
+
workspace=workspace,
|
291 |
+
name=name + '_conv3',
|
292 |
+
act_bit=ACT_BIT,
|
293 |
+
weight_bit=bit)
|
294 |
+
#if binarize:
|
295 |
+
# conv3 = mx.sym.BatchNorm(data=conv3, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn4')
|
296 |
+
if dim_match:
|
297 |
+
shortcut = data
|
298 |
+
else:
|
299 |
+
if not binarize:
|
300 |
+
shortcut = Conv(data=act1,
|
301 |
+
num_filter=num_filter,
|
302 |
+
kernel=(1, 1),
|
303 |
+
stride=stride,
|
304 |
+
no_bias=True,
|
305 |
+
workspace=workspace,
|
306 |
+
name=name + '_sc')
|
307 |
+
else:
|
308 |
+
shortcut = mx.sym.QConvolution(data=act1,
|
309 |
+
num_filter=num_filter,
|
310 |
+
kernel=(1, 1),
|
311 |
+
stride=stride,
|
312 |
+
pad=(0, 0),
|
313 |
+
no_bias=True,
|
314 |
+
workspace=workspace,
|
315 |
+
name=name + '_sc',
|
316 |
+
act_bit=ACT_BIT,
|
317 |
+
weight_bit=bit)
|
318 |
+
if memonger:
|
319 |
+
shortcut._set_attr(mirror_stage='True')
|
320 |
+
return conv3 + shortcut
|
321 |
+
|
322 |
+
|
323 |
+
def conv_hpm(data, num_filter, stride, dim_match, name, binarize, dcn,
|
324 |
+
dilation, **kwargs):
|
325 |
+
bit = 1
|
326 |
+
#print('in unit2')
|
327 |
+
# the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
|
328 |
+
bn1 = mx.sym.BatchNorm(data=data,
|
329 |
+
fix_gamma=False,
|
330 |
+
eps=2e-5,
|
331 |
+
momentum=bn_mom,
|
332 |
+
name=name + '_bn1')
|
333 |
+
if not binarize:
|
334 |
+
act1 = Act(data=bn1, act_type='relu', name=name + '_relu1')
|
335 |
+
if not dcn:
|
336 |
+
conv1 = Conv(data=act1,
|
337 |
+
num_filter=int(num_filter * 0.5),
|
338 |
+
kernel=(3, 3),
|
339 |
+
stride=(1, 1),
|
340 |
+
pad=(dilation, dilation),
|
341 |
+
dilate=(dilation, dilation),
|
342 |
+
no_bias=True,
|
343 |
+
workspace=workspace,
|
344 |
+
name=name + '_conv1')
|
345 |
+
else:
|
346 |
+
conv1_offset = mx.symbol.Convolution(name=name + '_conv1_offset',
|
347 |
+
data=act1,
|
348 |
+
num_filter=18,
|
349 |
+
pad=(1, 1),
|
350 |
+
kernel=(3, 3),
|
351 |
+
stride=(1, 1))
|
352 |
+
conv1 = mx.contrib.symbol.DeformableConvolution(
|
353 |
+
name=name + '_conv1',
|
354 |
+
data=act1,
|
355 |
+
offset=conv1_offset,
|
356 |
+
num_filter=int(num_filter * 0.5),
|
357 |
+
pad=(1, 1),
|
358 |
+
kernel=(3, 3),
|
359 |
+
num_deformable_group=1,
|
360 |
+
stride=(1, 1),
|
361 |
+
dilate=(1, 1),
|
362 |
+
no_bias=True)
|
363 |
+
else:
|
364 |
+
act1 = mx.sym.QActivation(data=bn1,
|
365 |
+
act_bit=ACT_BIT,
|
366 |
+
name=name + '_relu1',
|
367 |
+
backward_only=True)
|
368 |
+
conv1 = mx.sym.QConvolution_v1(data=act1,
|
369 |
+
num_filter=int(num_filter * 0.5),
|
370 |
+
kernel=(3, 3),
|
371 |
+
stride=(1, 1),
|
372 |
+
pad=(1, 1),
|
373 |
+
no_bias=True,
|
374 |
+
workspace=workspace,
|
375 |
+
name=name + '_conv1',
|
376 |
+
act_bit=ACT_BIT,
|
377 |
+
weight_bit=bit)
|
378 |
+
bn2 = mx.sym.BatchNorm(data=conv1,
|
379 |
+
fix_gamma=False,
|
380 |
+
eps=2e-5,
|
381 |
+
momentum=bn_mom,
|
382 |
+
name=name + '_bn2')
|
383 |
+
if not binarize:
|
384 |
+
act2 = Act(data=bn2, act_type='relu', name=name + '_relu2')
|
385 |
+
if not dcn:
|
386 |
+
conv2 = Conv(data=act2,
|
387 |
+
num_filter=int(num_filter * 0.25),
|
388 |
+
kernel=(3, 3),
|
389 |
+
stride=(1, 1),
|
390 |
+
pad=(dilation, dilation),
|
391 |
+
dilate=(dilation, dilation),
|
392 |
+
no_bias=True,
|
393 |
+
workspace=workspace,
|
394 |
+
name=name + '_conv2')
|
395 |
+
else:
|
396 |
+
conv2_offset = mx.symbol.Convolution(name=name + '_conv2_offset',
|
397 |
+
data=act2,
|
398 |
+
num_filter=18,
|
399 |
+
pad=(1, 1),
|
400 |
+
kernel=(3, 3),
|
401 |
+
stride=(1, 1))
|
402 |
+
conv2 = mx.contrib.symbol.DeformableConvolution(
|
403 |
+
name=name + '_conv2',
|
404 |
+
data=act2,
|
405 |
+
offset=conv2_offset,
|
406 |
+
num_filter=int(num_filter * 0.25),
|
407 |
+
pad=(1, 1),
|
408 |
+
kernel=(3, 3),
|
409 |
+
num_deformable_group=1,
|
410 |
+
stride=(1, 1),
|
411 |
+
dilate=(1, 1),
|
412 |
+
no_bias=True)
|
413 |
+
else:
|
414 |
+
act2 = mx.sym.QActivation(data=bn2,
|
415 |
+
act_bit=ACT_BIT,
|
416 |
+
name=name + '_relu2',
|
417 |
+
backward_only=True)
|
418 |
+
conv2 = mx.sym.QConvolution_v1(data=act2,
|
419 |
+
num_filter=int(num_filter * 0.25),
|
420 |
+
kernel=(3, 3),
|
421 |
+
stride=(1, 1),
|
422 |
+
pad=(1, 1),
|
423 |
+
no_bias=True,
|
424 |
+
workspace=workspace,
|
425 |
+
name=name + '_conv2',
|
426 |
+
act_bit=ACT_BIT,
|
427 |
+
weight_bit=bit)
|
428 |
+
bn3 = mx.sym.BatchNorm(data=conv2,
|
429 |
+
fix_gamma=False,
|
430 |
+
eps=2e-5,
|
431 |
+
momentum=bn_mom,
|
432 |
+
name=name + '_bn3')
|
433 |
+
if not binarize:
|
434 |
+
act3 = Act(data=bn3, act_type='relu', name=name + '_relu3')
|
435 |
+
if not dcn:
|
436 |
+
conv3 = Conv(data=act3,
|
437 |
+
num_filter=int(num_filter * 0.25),
|
438 |
+
kernel=(3, 3),
|
439 |
+
stride=(1, 1),
|
440 |
+
pad=(dilation, dilation),
|
441 |
+
dilate=(dilation, dilation),
|
442 |
+
no_bias=True,
|
443 |
+
workspace=workspace,
|
444 |
+
name=name + '_conv3')
|
445 |
+
else:
|
446 |
+
conv3_offset = mx.symbol.Convolution(name=name + '_conv3_offset',
|
447 |
+
data=act3,
|
448 |
+
num_filter=18,
|
449 |
+
pad=(1, 1),
|
450 |
+
kernel=(3, 3),
|
451 |
+
stride=(1, 1))
|
452 |
+
conv3 = mx.contrib.symbol.DeformableConvolution(
|
453 |
+
name=name + '_conv3',
|
454 |
+
data=act3,
|
455 |
+
offset=conv3_offset,
|
456 |
+
num_filter=int(num_filter * 0.25),
|
457 |
+
pad=(1, 1),
|
458 |
+
kernel=(3, 3),
|
459 |
+
num_deformable_group=1,
|
460 |
+
stride=(1, 1),
|
461 |
+
dilate=(1, 1),
|
462 |
+
no_bias=True)
|
463 |
+
else:
|
464 |
+
act3 = mx.sym.QActivation(data=bn3,
|
465 |
+
act_bit=ACT_BIT,
|
466 |
+
name=name + '_relu3',
|
467 |
+
backward_only=True)
|
468 |
+
conv3 = mx.sym.QConvolution_v1(data=act3,
|
469 |
+
num_filter=int(num_filter * 0.25),
|
470 |
+
kernel=(3, 3),
|
471 |
+
stride=(1, 1),
|
472 |
+
pad=(1, 1),
|
473 |
+
no_bias=True,
|
474 |
+
workspace=workspace,
|
475 |
+
name=name + '_conv3',
|
476 |
+
act_bit=ACT_BIT,
|
477 |
+
weight_bit=bit)
|
478 |
+
conv4 = mx.symbol.Concat(*[conv1, conv2, conv3])
|
479 |
+
if binarize:
|
480 |
+
conv4 = mx.sym.BatchNorm(data=conv4,
|
481 |
+
fix_gamma=False,
|
482 |
+
eps=2e-5,
|
483 |
+
momentum=bn_mom,
|
484 |
+
name=name + '_bn4')
|
485 |
+
if dim_match:
|
486 |
+
shortcut = data
|
487 |
+
else:
|
488 |
+
if not binarize:
|
489 |
+
shortcut = Conv(data=act1,
|
490 |
+
num_filter=num_filter,
|
491 |
+
kernel=(1, 1),
|
492 |
+
stride=stride,
|
493 |
+
no_bias=True,
|
494 |
+
workspace=workspace,
|
495 |
+
name=name + '_sc')
|
496 |
+
else:
|
497 |
+
#assert(False)
|
498 |
+
shortcut = mx.sym.QConvolution_v1(data=act1,
|
499 |
+
num_filter=num_filter,
|
500 |
+
kernel=(1, 1),
|
501 |
+
stride=stride,
|
502 |
+
pad=(0, 0),
|
503 |
+
no_bias=True,
|
504 |
+
workspace=workspace,
|
505 |
+
name=name + '_sc',
|
506 |
+
act_bit=ACT_BIT,
|
507 |
+
weight_bit=bit)
|
508 |
+
shortcut = mx.sym.BatchNorm(data=shortcut,
|
509 |
+
fix_gamma=False,
|
510 |
+
eps=2e-5,
|
511 |
+
momentum=bn_mom,
|
512 |
+
name=name + '_sc_bn')
|
513 |
+
if memonger:
|
514 |
+
shortcut._set_attr(mirror_stage='True')
|
515 |
+
return conv4 + shortcut
|
516 |
+
#return bn4 + shortcut
|
517 |
+
#return act4 + shortcut
|
518 |
+
|
519 |
+
|
520 |
+
def block17(net,
|
521 |
+
input_num_channels,
|
522 |
+
scale=1.0,
|
523 |
+
with_act=True,
|
524 |
+
act_type='relu',
|
525 |
+
mirror_attr={},
|
526 |
+
name=''):
|
527 |
+
tower_conv = ConvFactory(net, 192, (1, 1), name=name + '_conv')
|
528 |
+
tower_conv1_0 = ConvFactory(net, 129, (1, 1), name=name + '_conv1_0')
|
529 |
+
tower_conv1_1 = ConvFactory(tower_conv1_0,
|
530 |
+
160, (1, 7),
|
531 |
+
pad=(1, 2),
|
532 |
+
name=name + '_conv1_1')
|
533 |
+
tower_conv1_2 = ConvFactory(tower_conv1_1,
|
534 |
+
192, (7, 1),
|
535 |
+
pad=(2, 1),
|
536 |
+
name=name + '_conv1_2')
|
537 |
+
tower_mixed = mx.symbol.Concat(*[tower_conv, tower_conv1_2])
|
538 |
+
tower_out = ConvFactory(tower_mixed,
|
539 |
+
input_num_channels, (1, 1),
|
540 |
+
with_act=False,
|
541 |
+
name=name + '_conv_out')
|
542 |
+
net = net + scale * tower_out
|
543 |
+
if with_act:
|
544 |
+
act = mx.symbol.Activation(data=net,
|
545 |
+
act_type=act_type,
|
546 |
+
attr=mirror_attr)
|
547 |
+
return act
|
548 |
+
else:
|
549 |
+
return net
|
550 |
+
|
551 |
+
|
552 |
+
def block35(net,
|
553 |
+
input_num_channels,
|
554 |
+
scale=1.0,
|
555 |
+
with_act=True,
|
556 |
+
act_type='relu',
|
557 |
+
mirror_attr={},
|
558 |
+
name=''):
|
559 |
+
M = 1.0
|
560 |
+
tower_conv = ConvFactory(net,
|
561 |
+
int(input_num_channels * 0.25 * M), (1, 1),
|
562 |
+
name=name + '_conv')
|
563 |
+
tower_conv1_0 = ConvFactory(net,
|
564 |
+
int(input_num_channels * 0.25 * M), (1, 1),
|
565 |
+
name=name + '_conv1_0')
|
566 |
+
tower_conv1_1 = ConvFactory(tower_conv1_0,
|
567 |
+
int(input_num_channels * 0.25 * M), (3, 3),
|
568 |
+
pad=(1, 1),
|
569 |
+
name=name + '_conv1_1')
|
570 |
+
tower_conv2_0 = ConvFactory(net,
|
571 |
+
int(input_num_channels * 0.25 * M), (1, 1),
|
572 |
+
name=name + '_conv2_0')
|
573 |
+
tower_conv2_1 = ConvFactory(tower_conv2_0,
|
574 |
+
int(input_num_channels * 0.375 * M), (3, 3),
|
575 |
+
pad=(1, 1),
|
576 |
+
name=name + '_conv2_1')
|
577 |
+
tower_conv2_2 = ConvFactory(tower_conv2_1,
|
578 |
+
int(input_num_channels * 0.5 * M), (3, 3),
|
579 |
+
pad=(1, 1),
|
580 |
+
name=name + '_conv2_2')
|
581 |
+
tower_mixed = mx.symbol.Concat(*[tower_conv, tower_conv1_1, tower_conv2_2])
|
582 |
+
tower_out = ConvFactory(tower_mixed,
|
583 |
+
input_num_channels, (1, 1),
|
584 |
+
with_act=False,
|
585 |
+
name=name + '_conv_out')
|
586 |
+
|
587 |
+
net = net + scale * tower_out
|
588 |
+
if with_act:
|
589 |
+
act = mx.symbol.Activation(data=net,
|
590 |
+
act_type=act_type,
|
591 |
+
attr=mirror_attr)
|
592 |
+
return act
|
593 |
+
else:
|
594 |
+
return net
|
595 |
+
|
596 |
+
|
597 |
+
def conv_inception(data, num_filter, stride, dim_match, name, binarize, dcn,
|
598 |
+
dilate, **kwargs):
|
599 |
+
assert not binarize
|
600 |
+
if stride[0] > 1 or not dim_match:
|
601 |
+
return conv_resnet(data, num_filter, stride, dim_match, name, binarize,
|
602 |
+
dcn, dilate, **kwargs)
|
603 |
+
conv4 = block35(data, num_filter, name=name + '_block35')
|
604 |
+
return conv4
|
605 |
+
|
606 |
+
|
607 |
+
def conv_cab(data, num_filter, stride, dim_match, name, binarize, dcn, dilate,
|
608 |
+
**kwargs):
|
609 |
+
if stride[0] > 1 or not dim_match:
|
610 |
+
return conv_hpm(data, num_filter, stride, dim_match, name, binarize,
|
611 |
+
dcn, dilate, **kwargs)
|
612 |
+
cab = CAB(data, num_filter, 1, 4, workspace, name, dilate, 1)
|
613 |
+
return cab.get()
|
614 |
+
|
615 |
+
|
616 |
+
def conv_block(data, num_filter, stride, dim_match, name, binarize, dcn,
|
617 |
+
dilate):
|
618 |
+
if config.net_block == 'resnet':
|
619 |
+
return conv_resnet(data, num_filter, stride, dim_match, name, binarize,
|
620 |
+
dcn, dilate)
|
621 |
+
elif config.net_block == 'inception':
|
622 |
+
return conv_inception(data, num_filter, stride, dim_match, name,
|
623 |
+
binarize, dcn, dilate)
|
624 |
+
elif config.net_block == 'hpm':
|
625 |
+
return conv_hpm(data, num_filter, stride, dim_match, name, binarize,
|
626 |
+
dcn, dilate)
|
627 |
+
elif config.net_block == 'cab':
|
628 |
+
return conv_cab(data, num_filter, stride, dim_match, name, binarize,
|
629 |
+
dcn, dilate)
|
630 |
+
|
631 |
+
|
632 |
+
def hourglass(data, nFilters, nModules, n, workspace, name, binarize, dcn):
|
633 |
+
s = 2
|
634 |
+
_dcn = False
|
635 |
+
up1 = data
|
636 |
+
for i in range(nModules):
|
637 |
+
up1 = conv_block(up1, nFilters, (1, 1), True, "%s_up1_%d" % (name, i),
|
638 |
+
binarize, _dcn, 1)
|
639 |
+
low1 = mx.sym.Pooling(data=data,
|
640 |
+
kernel=(s, s),
|
641 |
+
stride=(s, s),
|
642 |
+
pad=(0, 0),
|
643 |
+
pool_type='max')
|
644 |
+
for i in range(nModules):
|
645 |
+
low1 = conv_block(low1, nFilters, (1, 1), True,
|
646 |
+
"%s_low1_%d" % (name, i), binarize, _dcn, 1)
|
647 |
+
if n > 1:
|
648 |
+
low2 = hourglass(low1, nFilters, nModules, n - 1, workspace,
|
649 |
+
"%s_%d" % (name, n - 1), binarize, dcn)
|
650 |
+
else:
|
651 |
+
low2 = low1
|
652 |
+
for i in range(nModules):
|
653 |
+
low2 = conv_block(low2, nFilters, (1, 1), True,
|
654 |
+
"%s_low2_%d" % (name, i), binarize, _dcn,
|
655 |
+
1) #TODO
|
656 |
+
low3 = low2
|
657 |
+
for i in range(nModules):
|
658 |
+
low3 = conv_block(low3, nFilters, (1, 1), True,
|
659 |
+
"%s_low3_%d" % (name, i), binarize, _dcn, 1)
|
660 |
+
up2 = mx.symbol.UpSampling(low3,
|
661 |
+
scale=s,
|
662 |
+
sample_type='nearest',
|
663 |
+
workspace=512,
|
664 |
+
name='%s_upsampling_%s' % (name, n),
|
665 |
+
num_args=1)
|
666 |
+
return mx.symbol.add_n(up1, up2)
|
667 |
+
|
668 |
+
|
669 |
+
class STA:
|
670 |
+
def __init__(self, data, nFilters, nModules, n, workspace, name):
|
671 |
+
self.data = data
|
672 |
+
self.nFilters = nFilters
|
673 |
+
self.nModules = nModules
|
674 |
+
self.n = n
|
675 |
+
self.workspace = workspace
|
676 |
+
self.name = name
|
677 |
+
self.sym_map = {}
|
678 |
+
|
679 |
+
def get_conv(self, data, name, dilate=1, group=1):
|
680 |
+
cab = CAB(data, self.nFilters, self.nModules, 4, self.workspace, name,
|
681 |
+
dilate, group)
|
682 |
+
return cab.get()
|
683 |
+
|
684 |
+
def get_output(self, w, h):
|
685 |
+
#print(w,h)
|
686 |
+
assert w >= 1 and w <= config.net_n + 1
|
687 |
+
assert h >= 1 and h <= config.net_n + 1
|
688 |
+
s = 2
|
689 |
+
bn_mom = 0.9
|
690 |
+
key = (w, h)
|
691 |
+
if key in self.sym_map:
|
692 |
+
return self.sym_map[key]
|
693 |
+
ret = None
|
694 |
+
if h == self.n:
|
695 |
+
if w == self.n:
|
696 |
+
ret = self.data, 64
|
697 |
+
else:
|
698 |
+
x = self.get_output(w + 1, h)
|
699 |
+
body = self.get_conv(x[0], "%s_w%d_h%d_1" % (self.name, w, h))
|
700 |
+
body = mx.sym.Pooling(data=body,
|
701 |
+
kernel=(s, s),
|
702 |
+
stride=(s, s),
|
703 |
+
pad=(0, 0),
|
704 |
+
pool_type='max')
|
705 |
+
body = self.get_conv(body, "%s_w%d_h%d_2" % (self.name, w, h))
|
706 |
+
ret = body, x[1] // 2
|
707 |
+
else:
|
708 |
+
x = self.get_output(w + 1, h + 1)
|
709 |
+
y = self.get_output(w, h + 1)
|
710 |
+
|
711 |
+
HC = False
|
712 |
+
|
713 |
+
if h % 2 == 1 and h != w:
|
714 |
+
xbody = lin3(x[0], self.nFilters, self.workspace,
|
715 |
+
"%s_w%d_h%d_x" % (self.name, w, h), 3,
|
716 |
+
self.nFilters, 1)
|
717 |
+
HC = True
|
718 |
+
#xbody = x[0]
|
719 |
+
else:
|
720 |
+
xbody = x[0]
|
721 |
+
if x[1] // y[1] == 2:
|
722 |
+
if w > 1:
|
723 |
+
ybody = mx.symbol.Deconvolution(
|
724 |
+
data=y[0],
|
725 |
+
num_filter=self.nFilters,
|
726 |
+
kernel=(s, s),
|
727 |
+
stride=(s, s),
|
728 |
+
name='%s_upsampling_w%d_h%d' % (self.name, w, h),
|
729 |
+
attr={'lr_mult': '1.0'},
|
730 |
+
workspace=self.workspace)
|
731 |
+
ybody = mx.sym.BatchNorm(data=ybody,
|
732 |
+
fix_gamma=False,
|
733 |
+
momentum=bn_mom,
|
734 |
+
eps=2e-5,
|
735 |
+
name="%s_w%d_h%d_y_bn" %
|
736 |
+
(self.name, w, h))
|
737 |
+
ybody = Act(data=ybody,
|
738 |
+
act_type='relu',
|
739 |
+
name="%s_w%d_h%d_y_act" % (self.name, w, h))
|
740 |
+
else:
|
741 |
+
if h >= 1:
|
742 |
+
ybody = mx.symbol.UpSampling(
|
743 |
+
y[0],
|
744 |
+
scale=s,
|
745 |
+
sample_type='nearest',
|
746 |
+
workspace=512,
|
747 |
+
name='%s_upsampling_w%d_h%d' % (self.name, w, h),
|
748 |
+
num_args=1)
|
749 |
+
ybody = self.get_conv(
|
750 |
+
ybody, "%s_w%d_h%d_4" % (self.name, w, h))
|
751 |
+
else:
|
752 |
+
ybody = mx.symbol.Deconvolution(
|
753 |
+
data=y[0],
|
754 |
+
num_filter=self.nFilters,
|
755 |
+
kernel=(s, s),
|
756 |
+
stride=(s, s),
|
757 |
+
name='%s_upsampling_w%d_h%d' % (self.name, w, h),
|
758 |
+
attr={'lr_mult': '1.0'},
|
759 |
+
workspace=self.workspace)
|
760 |
+
ybody = mx.sym.BatchNorm(data=ybody,
|
761 |
+
fix_gamma=False,
|
762 |
+
momentum=bn_mom,
|
763 |
+
eps=2e-5,
|
764 |
+
name="%s_w%d_h%d_y_bn" %
|
765 |
+
(self.name, w, h))
|
766 |
+
ybody = Act(data=ybody,
|
767 |
+
act_type='relu',
|
768 |
+
name="%s_w%d_h%d_y_act" %
|
769 |
+
(self.name, w, h))
|
770 |
+
ybody = Conv(data=ybody,
|
771 |
+
num_filter=self.nFilters,
|
772 |
+
kernel=(3, 3),
|
773 |
+
stride=(1, 1),
|
774 |
+
pad=(1, 1),
|
775 |
+
no_bias=True,
|
776 |
+
name="%s_w%d_h%d_y_conv2" %
|
777 |
+
(self.name, w, h),
|
778 |
+
workspace=self.workspace)
|
779 |
+
ybody = mx.sym.BatchNorm(data=ybody,
|
780 |
+
fix_gamma=False,
|
781 |
+
momentum=bn_mom,
|
782 |
+
eps=2e-5,
|
783 |
+
name="%s_w%d_h%d_y_bn2" %
|
784 |
+
(self.name, w, h))
|
785 |
+
ybody = Act(data=ybody,
|
786 |
+
act_type='relu',
|
787 |
+
name="%s_w%d_h%d_y_act2" %
|
788 |
+
(self.name, w, h))
|
789 |
+
else:
|
790 |
+
ybody = self.get_conv(y[0], "%s_w%d_h%d_5" % (self.name, w, h))
|
791 |
+
#if not HC:
|
792 |
+
if config.net_sta == 2 and h == 3 and w == 2:
|
793 |
+
z = self.get_output(w + 1, h)
|
794 |
+
zbody = z[0]
|
795 |
+
zbody = mx.sym.Pooling(data=zbody,
|
796 |
+
kernel=(z[1], z[1]),
|
797 |
+
stride=(z[1], z[1]),
|
798 |
+
pad=(0, 0),
|
799 |
+
pool_type='avg')
|
800 |
+
body = xbody + ybody
|
801 |
+
body = body / 2
|
802 |
+
body = mx.sym.broadcast_mul(body, zbody)
|
803 |
+
else: #sta==1
|
804 |
+
body = xbody + ybody
|
805 |
+
body = body / 2
|
806 |
+
ret = body, x[1]
|
807 |
+
|
808 |
+
assert ret is not None
|
809 |
+
self.sym_map[key] = ret
|
810 |
+
return ret
|
811 |
+
|
812 |
+
def get(self):
|
813 |
+
return self.get_output(1, 1)[0]
|
814 |
+
|
815 |
+
|
816 |
+
class SymCoherent:
|
817 |
+
def __init__(self, per_batch_size):
|
818 |
+
self.per_batch_size = per_batch_size
|
819 |
+
self.flip_order = [
|
820 |
+
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 26, 25,
|
821 |
+
24, 23, 22, 21, 20, 19, 18, 17, 27, 28, 29, 30, 35, 34, 33, 32, 31,
|
822 |
+
45, 44, 43, 42, 47, 46, 39, 38, 37, 36, 41, 40, 54, 53, 52, 51, 50,
|
823 |
+
49, 48, 59, 58, 57, 56, 55, 64, 63, 62, 61, 60, 67, 66, 65
|
824 |
+
]
|
825 |
+
|
826 |
+
def get(self, data):
|
827 |
+
#data.shape[0]==per_batch_size
|
828 |
+
b = self.per_batch_size // 2
|
829 |
+
ux = mx.sym.slice_axis(data, axis=0, begin=0, end=b)
|
830 |
+
dx = mx.sym.slice_axis(data, axis=0, begin=b, end=b * 2)
|
831 |
+
ux = mx.sym.flip(ux, axis=3)
|
832 |
+
#ux = mx.sym.take(ux, indices = self.flip_order, axis=0)
|
833 |
+
ux_list = []
|
834 |
+
for o in self.flip_order:
|
835 |
+
_ux = mx.sym.slice_axis(ux, axis=1, begin=o, end=o + 1)
|
836 |
+
ux_list.append(_ux)
|
837 |
+
ux = mx.sym.concat(*ux_list, dim=1)
|
838 |
+
return ux, dx
|
839 |
+
|
840 |
+
|
841 |
+
def l2_loss(x, y):
|
842 |
+
loss = x - y
|
843 |
+
loss = mx.symbol.smooth_l1(loss, scalar=1.0)
|
844 |
+
#loss = loss*loss
|
845 |
+
loss = mx.symbol.mean(loss)
|
846 |
+
return loss
|
847 |
+
|
848 |
+
|
849 |
+
def ce_loss(x, y):
|
850 |
+
#loss = mx.sym.SoftmaxOutput(data = x, label = y, normalization='valid', multi_output=True)
|
851 |
+
x_max = mx.sym.max(x, axis=[2, 3], keepdims=True)
|
852 |
+
x = mx.sym.broadcast_minus(x, x_max)
|
853 |
+
body = mx.sym.exp(x)
|
854 |
+
sums = mx.sym.sum(body, axis=[2, 3], keepdims=True)
|
855 |
+
body = mx.sym.broadcast_div(body, sums)
|
856 |
+
loss = mx.sym.log(body)
|
857 |
+
loss = loss * y * -1.0
|
858 |
+
loss = mx.symbol.mean(loss, axis=[1, 2, 3])
|
859 |
+
#loss = mx.symbol.mean(loss)
|
860 |
+
return loss
|
861 |
+
|
862 |
+
|
863 |
+
def get_symbol(num_classes):
|
864 |
+
m = config.multiplier
|
865 |
+
sFilters = max(int(64 * m), 32)
|
866 |
+
mFilters = max(int(128 * m), 32)
|
867 |
+
nFilters = int(256 * m)
|
868 |
+
|
869 |
+
nModules = 1
|
870 |
+
nStacks = config.net_stacks
|
871 |
+
binarize = config.net_binarize
|
872 |
+
input_size = config.input_img_size
|
873 |
+
label_size = config.output_label_size
|
874 |
+
use_coherent = config.net_coherent
|
875 |
+
use_STA = config.net_sta
|
876 |
+
N = config.net_n
|
877 |
+
DCN = config.net_dcn
|
878 |
+
per_batch_size = config.per_batch_size
|
879 |
+
print('binarize', binarize)
|
880 |
+
print('use_coherent', use_coherent)
|
881 |
+
print('use_STA', use_STA)
|
882 |
+
print('use_N', N)
|
883 |
+
print('use_DCN', DCN)
|
884 |
+
print('per_batch_size', per_batch_size)
|
885 |
+
#assert(label_size==64 or label_size==32)
|
886 |
+
#assert(input_size==128 or input_size==256)
|
887 |
+
coherentor = SymCoherent(per_batch_size)
|
888 |
+
D = input_size // label_size
|
889 |
+
print(input_size, label_size, D)
|
890 |
+
data = mx.sym.Variable(name='data')
|
891 |
+
data = data - 127.5
|
892 |
+
data = data * 0.0078125
|
893 |
+
gt_label = mx.symbol.Variable(name='softmax_label')
|
894 |
+
losses = []
|
895 |
+
closses = []
|
896 |
+
ref_label = gt_label
|
897 |
+
if D == 4:
|
898 |
+
body = Conv(data=data,
|
899 |
+
num_filter=sFilters,
|
900 |
+
kernel=(7, 7),
|
901 |
+
stride=(2, 2),
|
902 |
+
pad=(3, 3),
|
903 |
+
no_bias=True,
|
904 |
+
name="conv0",
|
905 |
+
workspace=workspace)
|
906 |
+
else:
|
907 |
+
body = Conv(data=data,
|
908 |
+
num_filter=sFilters,
|
909 |
+
kernel=(3, 3),
|
910 |
+
stride=(1, 1),
|
911 |
+
pad=(1, 1),
|
912 |
+
no_bias=True,
|
913 |
+
name="conv0",
|
914 |
+
workspace=workspace)
|
915 |
+
body = mx.sym.BatchNorm(data=body,
|
916 |
+
fix_gamma=False,
|
917 |
+
eps=2e-5,
|
918 |
+
momentum=bn_mom,
|
919 |
+
name='bn0')
|
920 |
+
body = Act(data=body, act_type='relu', name='relu0')
|
921 |
+
|
922 |
+
dcn = False
|
923 |
+
body = conv_block(body, mFilters, (1, 1), sFilters == mFilters, 'res0',
|
924 |
+
False, dcn, 1)
|
925 |
+
|
926 |
+
body = mx.sym.Pooling(data=body,
|
927 |
+
kernel=(2, 2),
|
928 |
+
stride=(2, 2),
|
929 |
+
pad=(0, 0),
|
930 |
+
pool_type='max')
|
931 |
+
|
932 |
+
body = conv_block(body, mFilters, (1, 1), True, 'res1', False, dcn,
|
933 |
+
1) #TODO
|
934 |
+
body = conv_block(body, nFilters, (1, 1), mFilters == nFilters, 'res2',
|
935 |
+
binarize, dcn, 1) #binarize=True?
|
936 |
+
|
937 |
+
heatmap = None
|
938 |
+
|
939 |
+
for i in range(nStacks):
|
940 |
+
shortcut = body
|
941 |
+
if config.net_sta > 0:
|
942 |
+
sta = STA(body, nFilters, nModules, config.net_n + 1, workspace,
|
943 |
+
'sta%d' % (i))
|
944 |
+
body = sta.get()
|
945 |
+
else:
|
946 |
+
body = hourglass(body, nFilters, nModules, config.net_n, workspace,
|
947 |
+
'stack%d_hg' % (i), binarize, dcn)
|
948 |
+
for j in range(nModules):
|
949 |
+
body = conv_block(body, nFilters, (1, 1), True,
|
950 |
+
'stack%d_unit%d' % (i, j), binarize, dcn, 1)
|
951 |
+
_dcn = True if config.net_dcn >= 2 else False
|
952 |
+
ll = ConvFactory(body,
|
953 |
+
nFilters, (1, 1),
|
954 |
+
dcn=_dcn,
|
955 |
+
name='stack%d_ll' % (i))
|
956 |
+
_name = "heatmap%d" % (i) if i < nStacks - 1 else "heatmap"
|
957 |
+
_dcn = True if config.net_dcn >= 2 else False
|
958 |
+
if not _dcn:
|
959 |
+
out = Conv(data=ll,
|
960 |
+
num_filter=num_classes,
|
961 |
+
kernel=(1, 1),
|
962 |
+
stride=(1, 1),
|
963 |
+
pad=(0, 0),
|
964 |
+
name=_name,
|
965 |
+
workspace=workspace)
|
966 |
+
else:
|
967 |
+
out_offset = mx.symbol.Convolution(name=_name + '_offset',
|
968 |
+
data=ll,
|
969 |
+
num_filter=18,
|
970 |
+
pad=(1, 1),
|
971 |
+
kernel=(3, 3),
|
972 |
+
stride=(1, 1))
|
973 |
+
out = mx.contrib.symbol.DeformableConvolution(
|
974 |
+
name=_name,
|
975 |
+
data=ll,
|
976 |
+
offset=out_offset,
|
977 |
+
num_filter=num_classes,
|
978 |
+
pad=(1, 1),
|
979 |
+
kernel=(3, 3),
|
980 |
+
num_deformable_group=1,
|
981 |
+
stride=(1, 1),
|
982 |
+
dilate=(1, 1),
|
983 |
+
no_bias=False)
|
984 |
+
#out = Conv(data=ll, num_filter=num_classes, kernel=(3,3), stride=(1,1), pad=(1,1),
|
985 |
+
# name=_name, workspace=workspace)
|
986 |
+
if i == nStacks - 1:
|
987 |
+
heatmap = out
|
988 |
+
loss = ce_loss(out, ref_label)
|
989 |
+
#loss = loss/nStacks
|
990 |
+
#loss = l2_loss(out, ref_label)
|
991 |
+
losses.append(loss)
|
992 |
+
if config.net_coherent > 0:
|
993 |
+
ux, dx = coherentor.get(out)
|
994 |
+
closs = l2_loss(ux, dx)
|
995 |
+
closs = closs / nStacks
|
996 |
+
closses.append(closs)
|
997 |
+
|
998 |
+
if i < nStacks - 1:
|
999 |
+
ll2 = Conv(data=ll,
|
1000 |
+
num_filter=nFilters,
|
1001 |
+
kernel=(1, 1),
|
1002 |
+
stride=(1, 1),
|
1003 |
+
pad=(0, 0),
|
1004 |
+
name="stack%d_ll2" % (i),
|
1005 |
+
workspace=workspace)
|
1006 |
+
out2 = Conv(data=out,
|
1007 |
+
num_filter=nFilters,
|
1008 |
+
kernel=(1, 1),
|
1009 |
+
stride=(1, 1),
|
1010 |
+
pad=(0, 0),
|
1011 |
+
name="stack%d_out2" % (i),
|
1012 |
+
workspace=workspace)
|
1013 |
+
body = mx.symbol.add_n(shortcut, ll2, out2)
|
1014 |
+
_dcn = True if (config.net_dcn == 1
|
1015 |
+
or config.net_dcn == 3) else False
|
1016 |
+
if _dcn:
|
1017 |
+
_name = "stack%d_out3" % (i)
|
1018 |
+
out3_offset = mx.symbol.Convolution(name=_name + '_offset',
|
1019 |
+
data=body,
|
1020 |
+
num_filter=18,
|
1021 |
+
pad=(1, 1),
|
1022 |
+
kernel=(3, 3),
|
1023 |
+
stride=(1, 1))
|
1024 |
+
out3 = mx.contrib.symbol.DeformableConvolution(
|
1025 |
+
name=_name,
|
1026 |
+
data=body,
|
1027 |
+
offset=out3_offset,
|
1028 |
+
num_filter=nFilters,
|
1029 |
+
pad=(1, 1),
|
1030 |
+
kernel=(3, 3),
|
1031 |
+
num_deformable_group=1,
|
1032 |
+
stride=(1, 1),
|
1033 |
+
dilate=(1, 1),
|
1034 |
+
no_bias=False)
|
1035 |
+
body = out3
|
1036 |
+
|
1037 |
+
pred = mx.symbol.BlockGrad(heatmap)
|
1038 |
+
#loss = mx.symbol.add_n(*losses)
|
1039 |
+
#loss = mx.symbol.MakeLoss(loss)
|
1040 |
+
#syms = [loss]
|
1041 |
+
syms = []
|
1042 |
+
for loss in losses:
|
1043 |
+
loss = mx.symbol.MakeLoss(loss)
|
1044 |
+
syms.append(loss)
|
1045 |
+
if len(closses) > 0:
|
1046 |
+
coherent_weight = 0.0001
|
1047 |
+
closs = mx.symbol.add_n(*closses)
|
1048 |
+
closs = mx.symbol.MakeLoss(closs, grad_scale=coherent_weight)
|
1049 |
+
syms.append(closs)
|
1050 |
+
syms.append(pred)
|
1051 |
+
sym = mx.symbol.Group(syms)
|
1052 |
+
return sym
|
1053 |
+
|
1054 |
+
|
1055 |
+
def init_weights(sym, data_shape_dict):
|
1056 |
+
#print('in hg')
|
1057 |
+
arg_name = sym.list_arguments()
|
1058 |
+
aux_name = sym.list_auxiliary_states()
|
1059 |
+
arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
|
1060 |
+
arg_shape_dict = dict(zip(arg_name, arg_shape))
|
1061 |
+
aux_shape_dict = dict(zip(aux_name, aux_shape))
|
1062 |
+
#print(aux_shape)
|
1063 |
+
#print(aux_params)
|
1064 |
+
#print(arg_shape_dict)
|
1065 |
+
arg_params = {}
|
1066 |
+
aux_params = {}
|
1067 |
+
for k in arg_shape_dict:
|
1068 |
+
v = arg_shape_dict[k]
|
1069 |
+
#print(k,v)
|
1070 |
+
if k.endswith('offset_weight') or k.endswith('offset_bias'):
|
1071 |
+
print('initializing', k)
|
1072 |
+
arg_params[k] = mx.nd.zeros(shape=v)
|
1073 |
+
elif k.startswith('fc6_'):
|
1074 |
+
if k.endswith('_weight'):
|
1075 |
+
print('initializing', k)
|
1076 |
+
arg_params[k] = mx.random.normal(0, 0.01, shape=v)
|
1077 |
+
elif k.endswith('_bias'):
|
1078 |
+
print('initializing', k)
|
1079 |
+
arg_params[k] = mx.nd.zeros(shape=v)
|
1080 |
+
elif k.find('upsampling') >= 0:
|
1081 |
+
print('initializing upsampling_weight', k)
|
1082 |
+
arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k])
|
1083 |
+
init = mx.init.Initializer()
|
1084 |
+
init._init_bilinear(k, arg_params[k])
|
1085 |
+
return arg_params, aux_params
|
insightface/alignment/heatmap/test.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import cv2
|
3 |
+
import sys
|
4 |
+
import numpy as np
|
5 |
+
import os
|
6 |
+
import mxnet as mx
|
7 |
+
import datetime
|
8 |
+
import img_helper
|
9 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'deploy'))
|
10 |
+
from mtcnn_detector import MtcnnDetector
|
11 |
+
|
12 |
+
|
13 |
+
class Handler:
|
14 |
+
def __init__(self, prefix, epoch, ctx_id=0):
|
15 |
+
print('loading', prefix, epoch)
|
16 |
+
if ctx_id >= 0:
|
17 |
+
ctx = mx.gpu(ctx_id)
|
18 |
+
else:
|
19 |
+
ctx = mx.cpu()
|
20 |
+
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
21 |
+
all_layers = sym.get_internals()
|
22 |
+
sym = all_layers['heatmap_output']
|
23 |
+
image_size = (128, 128)
|
24 |
+
self.image_size = image_size
|
25 |
+
model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
|
26 |
+
#model = mx.mod.Module(symbol=sym, context=ctx)
|
27 |
+
model.bind(for_training=False,
|
28 |
+
data_shapes=[('data', (1, 3, image_size[0], image_size[1]))
|
29 |
+
])
|
30 |
+
model.set_params(arg_params, aux_params)
|
31 |
+
self.model = model
|
32 |
+
mtcnn_path = os.path.join(os.path.dirname(__file__), '..', 'deploy',
|
33 |
+
'mtcnn-model')
|
34 |
+
self.det_threshold = [0.6, 0.7, 0.8]
|
35 |
+
self.detector = MtcnnDetector(model_folder=mtcnn_path,
|
36 |
+
ctx=ctx,
|
37 |
+
num_worker=1,
|
38 |
+
accurate_landmark=True,
|
39 |
+
threshold=self.det_threshold)
|
40 |
+
|
41 |
+
def get(self, img):
|
42 |
+
ret = self.detector.detect_face(img, det_type=0)
|
43 |
+
if ret is None:
|
44 |
+
return None
|
45 |
+
bbox, points = ret
|
46 |
+
if bbox.shape[0] == 0:
|
47 |
+
return None
|
48 |
+
bbox = bbox[0, 0:4]
|
49 |
+
points = points[0, :].reshape((2, 5)).T
|
50 |
+
M = img_helper.estimate_trans_bbox(bbox, self.image_size[0], s=2.0)
|
51 |
+
rimg = cv2.warpAffine(img, M, self.image_size, borderValue=0.0)
|
52 |
+
img = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB)
|
53 |
+
img = np.transpose(img, (2, 0, 1)) #3*112*112, RGB
|
54 |
+
input_blob = np.zeros((1, 3, self.image_size[1], self.image_size[0]),
|
55 |
+
dtype=np.uint8)
|
56 |
+
input_blob[0] = img
|
57 |
+
ta = datetime.datetime.now()
|
58 |
+
data = mx.nd.array(input_blob)
|
59 |
+
db = mx.io.DataBatch(data=(data, ))
|
60 |
+
self.model.forward(db, is_train=False)
|
61 |
+
alabel = self.model.get_outputs()[-1].asnumpy()[0]
|
62 |
+
tb = datetime.datetime.now()
|
63 |
+
print('module time cost', (tb - ta).total_seconds())
|
64 |
+
ret = np.zeros((alabel.shape[0], 2), dtype=np.float32)
|
65 |
+
for i in range(alabel.shape[0]):
|
66 |
+
a = cv2.resize(alabel[i], (self.image_size[1], self.image_size[0]))
|
67 |
+
ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
|
68 |
+
#ret[i] = (ind[0], ind[1]) #h, w
|
69 |
+
ret[i] = (ind[1], ind[0]) #w, h
|
70 |
+
return ret, M
|
71 |
+
|
72 |
+
|
73 |
+
ctx_id = 4
|
74 |
+
img_path = '../deploy/Tom_Hanks_54745.png'
|
75 |
+
img = cv2.imread(img_path)
|
76 |
+
#img = np.zeros( (128,128,3), dtype=np.uint8 )
|
77 |
+
|
78 |
+
handler = Handler('./model/HG', 1, ctx_id)
|
79 |
+
for _ in range(10):
|
80 |
+
ta = datetime.datetime.now()
|
81 |
+
landmark, M = handler.get(img)
|
82 |
+
tb = datetime.datetime.now()
|
83 |
+
print('get time cost', (tb - ta).total_seconds())
|
84 |
+
#visualize landmark
|
85 |
+
IM = cv2.invertAffineTransform(M)
|
86 |
+
for i in range(landmark.shape[0]):
|
87 |
+
p = landmark[i]
|
88 |
+
point = np.ones((3, ), dtype=np.float32)
|
89 |
+
point[0:2] = p
|
90 |
+
point = np.dot(IM, point)
|
91 |
+
landmark[i] = point[0:2]
|
92 |
+
|
93 |
+
for i in range(landmark.shape[0]):
|
94 |
+
p = landmark[i]
|
95 |
+
point = (int(p[0]), int(p[1]))
|
96 |
+
cv2.circle(img, point, 1, (0, 255, 0), 2)
|
97 |
+
|
98 |
+
filename = './landmark_test.png'
|
99 |
+
print('writing', filename)
|
100 |
+
cv2.imwrite(filename, img)
|
insightface/alignment/heatmap/test_rec_nme.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import cv2
|
3 |
+
import sys
|
4 |
+
import numpy as np
|
5 |
+
import os
|
6 |
+
import mxnet as mx
|
7 |
+
import datetime
|
8 |
+
import img_helper
|
9 |
+
from config import config
|
10 |
+
from data import FaceSegIter
|
11 |
+
from metric import LossValueMetric, NMEMetric
|
12 |
+
|
13 |
+
parser = argparse.ArgumentParser(description='test nme on rec data')
|
14 |
+
# general
|
15 |
+
parser.add_argument('--rec',
|
16 |
+
default='./data_2d/ibug.rec',
|
17 |
+
help='rec data path')
|
18 |
+
parser.add_argument('--prefix', default='', help='model prefix')
|
19 |
+
parser.add_argument('--epoch', type=int, default=1, help='model epoch')
|
20 |
+
parser.add_argument('--gpu', type=int, default=0, help='')
|
21 |
+
parser.add_argument('--landmark-type', default='2d', help='')
|
22 |
+
parser.add_argument('--image-size', type=int, default=128, help='')
|
23 |
+
args = parser.parse_args()
|
24 |
+
|
25 |
+
rec_path = args.rec
|
26 |
+
ctx_id = args.gpu
|
27 |
+
prefix = args.prefix
|
28 |
+
epoch = args.epoch
|
29 |
+
image_size = (args.image_size, args.image_size)
|
30 |
+
config.landmark_type = args.landmark_type
|
31 |
+
config.input_img_size = image_size[0]
|
32 |
+
|
33 |
+
if ctx_id >= 0:
|
34 |
+
ctx = mx.gpu(ctx_id)
|
35 |
+
else:
|
36 |
+
ctx = mx.cpu()
|
37 |
+
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
38 |
+
all_layers = sym.get_internals()
|
39 |
+
sym = all_layers['heatmap_output']
|
40 |
+
#model = mx.mod.Module(symbol=sym, context=ctx, data_names=['data'], label_names=['softmax_label'])
|
41 |
+
model = mx.mod.Module(symbol=sym,
|
42 |
+
context=ctx,
|
43 |
+
data_names=['data'],
|
44 |
+
label_names=None)
|
45 |
+
#model = mx.mod.Module(symbol=sym, context=ctx)
|
46 |
+
model.bind(for_training=False,
|
47 |
+
data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
|
48 |
+
model.set_params(arg_params, aux_params)
|
49 |
+
|
50 |
+
val_iter = FaceSegIter(
|
51 |
+
path_imgrec=rec_path,
|
52 |
+
batch_size=1,
|
53 |
+
aug_level=0,
|
54 |
+
)
|
55 |
+
_metric = NMEMetric()
|
56 |
+
#val_metric = mx.metric.create(_metric)
|
57 |
+
#val_metric.reset()
|
58 |
+
#val_iter.reset()
|
59 |
+
nme = []
|
60 |
+
for i, eval_batch in enumerate(val_iter):
|
61 |
+
if i % 10 == 0:
|
62 |
+
print('processing', i)
|
63 |
+
#print(eval_batch.data[0].shape, eval_batch.label[0].shape)
|
64 |
+
batch_data = mx.io.DataBatch(eval_batch.data)
|
65 |
+
model.forward(batch_data, is_train=False)
|
66 |
+
#model.update_metric(val_metric, eval_batch.label, True)
|
67 |
+
pred_label = model.get_outputs()[-1].asnumpy()
|
68 |
+
label = eval_batch.label[0].asnumpy()
|
69 |
+
_nme = _metric.cal_nme(label, pred_label)
|
70 |
+
nme.append(_nme)
|
71 |
+
print(np.mean(nme))
|
insightface/alignment/heatmap/train.py
ADDED
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import absolute_import
|
2 |
+
from __future__ import division
|
3 |
+
from __future__ import print_function
|
4 |
+
|
5 |
+
import logging
|
6 |
+
import argparse
|
7 |
+
from data import FaceSegIter
|
8 |
+
import mxnet as mx
|
9 |
+
import mxnet.optimizer as optimizer
|
10 |
+
import numpy as np
|
11 |
+
import os
|
12 |
+
import sys
|
13 |
+
import math
|
14 |
+
import random
|
15 |
+
import cv2
|
16 |
+
from config import config, default, generate_config
|
17 |
+
from optimizer import ONadam
|
18 |
+
from metric import LossValueMetric, NMEMetric
|
19 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), 'symbol'))
|
20 |
+
import sym_heatmap
|
21 |
+
#import sym_fc
|
22 |
+
#from symbol import fc
|
23 |
+
|
24 |
+
args = None
|
25 |
+
logger = logging.getLogger()
|
26 |
+
logger.setLevel(logging.INFO)
|
27 |
+
|
28 |
+
|
29 |
+
def main(args):
|
30 |
+
_seed = 727
|
31 |
+
random.seed(_seed)
|
32 |
+
np.random.seed(_seed)
|
33 |
+
mx.random.seed(_seed)
|
34 |
+
ctx = []
|
35 |
+
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
|
36 |
+
if len(cvd) > 0:
|
37 |
+
for i in range(len(cvd.split(','))):
|
38 |
+
ctx.append(mx.gpu(i))
|
39 |
+
if len(ctx) == 0:
|
40 |
+
ctx = [mx.cpu()]
|
41 |
+
print('use cpu')
|
42 |
+
else:
|
43 |
+
print('gpu num:', len(ctx))
|
44 |
+
#ctx = [mx.gpu(0)]
|
45 |
+
args.ctx_num = len(ctx)
|
46 |
+
|
47 |
+
args.batch_size = args.per_batch_size * args.ctx_num
|
48 |
+
config.per_batch_size = args.per_batch_size
|
49 |
+
|
50 |
+
print('Call with', args, config)
|
51 |
+
train_iter = FaceSegIter(
|
52 |
+
path_imgrec=os.path.join(config.dataset_path, 'train.rec'),
|
53 |
+
batch_size=args.batch_size,
|
54 |
+
per_batch_size=args.per_batch_size,
|
55 |
+
aug_level=1,
|
56 |
+
exf=args.exf,
|
57 |
+
args=args,
|
58 |
+
)
|
59 |
+
|
60 |
+
data_shape = train_iter.get_data_shape()
|
61 |
+
#label_shape = train_iter.get_label_shape()
|
62 |
+
sym = sym_heatmap.get_symbol(num_classes=config.num_classes)
|
63 |
+
if len(args.pretrained) == 0:
|
64 |
+
#data_shape_dict = {'data' : (args.per_batch_size,)+data_shape, 'softmax_label' : (args.per_batch_size,)+label_shape}
|
65 |
+
data_shape_dict = train_iter.get_shape_dict()
|
66 |
+
arg_params, aux_params = sym_heatmap.init_weights(sym, data_shape_dict)
|
67 |
+
else:
|
68 |
+
vec = args.pretrained.split(',')
|
69 |
+
print('loading', vec)
|
70 |
+
_, arg_params, aux_params = mx.model.load_checkpoint(
|
71 |
+
vec[0], int(vec[1]))
|
72 |
+
#sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params)
|
73 |
+
|
74 |
+
model = mx.mod.Module(
|
75 |
+
context=ctx,
|
76 |
+
symbol=sym,
|
77 |
+
label_names=train_iter.get_label_names(),
|
78 |
+
)
|
79 |
+
#lr = 1.0e-3
|
80 |
+
#lr = 2.5e-4
|
81 |
+
_rescale_grad = 1.0 / args.ctx_num
|
82 |
+
#_rescale_grad = 1.0/args.batch_size
|
83 |
+
#lr = args.lr
|
84 |
+
#opt = optimizer.Nadam(learning_rate=args.lr, wd=args.wd, rescale_grad=_rescale_grad, clip_gradient=5.0)
|
85 |
+
if args.optimizer == 'onadam':
|
86 |
+
opt = ONadam(learning_rate=args.lr,
|
87 |
+
wd=args.wd,
|
88 |
+
rescale_grad=_rescale_grad,
|
89 |
+
clip_gradient=5.0)
|
90 |
+
elif args.optimizer == 'nadam':
|
91 |
+
opt = optimizer.Nadam(learning_rate=args.lr,
|
92 |
+
rescale_grad=_rescale_grad)
|
93 |
+
elif args.optimizer == 'rmsprop':
|
94 |
+
opt = optimizer.RMSProp(learning_rate=args.lr,
|
95 |
+
rescale_grad=_rescale_grad)
|
96 |
+
elif args.optimizer == 'adam':
|
97 |
+
opt = optimizer.Adam(learning_rate=args.lr, rescale_grad=_rescale_grad)
|
98 |
+
else:
|
99 |
+
opt = optimizer.SGD(learning_rate=args.lr,
|
100 |
+
momentum=0.9,
|
101 |
+
wd=args.wd,
|
102 |
+
rescale_grad=_rescale_grad)
|
103 |
+
initializer = mx.init.Xavier(rnd_type='gaussian',
|
104 |
+
factor_type="in",
|
105 |
+
magnitude=2)
|
106 |
+
_cb = mx.callback.Speedometer(args.batch_size, args.frequent)
|
107 |
+
_metric = LossValueMetric()
|
108 |
+
#_metric = NMEMetric()
|
109 |
+
#_metric2 = AccMetric()
|
110 |
+
#eval_metrics = [_metric, _metric2]
|
111 |
+
eval_metrics = [_metric]
|
112 |
+
lr_steps = [int(x) for x in args.lr_step.split(',')]
|
113 |
+
print('lr-steps', lr_steps)
|
114 |
+
global_step = [0]
|
115 |
+
|
116 |
+
def val_test():
|
117 |
+
all_layers = sym.get_internals()
|
118 |
+
vsym = all_layers['heatmap_output']
|
119 |
+
vmodel = mx.mod.Module(symbol=vsym, context=ctx, label_names=None)
|
120 |
+
#model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
|
121 |
+
vmodel.bind(data_shapes=[('data', (args.batch_size, ) + data_shape)])
|
122 |
+
arg_params, aux_params = model.get_params()
|
123 |
+
vmodel.set_params(arg_params, aux_params)
|
124 |
+
for target in config.val_targets:
|
125 |
+
_file = os.path.join(config.dataset_path, '%s.rec' % target)
|
126 |
+
if not os.path.exists(_file):
|
127 |
+
continue
|
128 |
+
val_iter = FaceSegIter(
|
129 |
+
path_imgrec=_file,
|
130 |
+
batch_size=args.batch_size,
|
131 |
+
#batch_size = 4,
|
132 |
+
aug_level=0,
|
133 |
+
args=args,
|
134 |
+
)
|
135 |
+
_metric = NMEMetric()
|
136 |
+
val_metric = mx.metric.create(_metric)
|
137 |
+
val_metric.reset()
|
138 |
+
val_iter.reset()
|
139 |
+
for i, eval_batch in enumerate(val_iter):
|
140 |
+
#print(eval_batch.data[0].shape, eval_batch.label[0].shape)
|
141 |
+
batch_data = mx.io.DataBatch(eval_batch.data)
|
142 |
+
model.forward(batch_data, is_train=False)
|
143 |
+
model.update_metric(val_metric, eval_batch.label)
|
144 |
+
nme_value = val_metric.get_name_value()[0][1]
|
145 |
+
print('[%d][%s]NME: %f' % (global_step[0], target, nme_value))
|
146 |
+
|
147 |
+
def _batch_callback(param):
|
148 |
+
_cb(param)
|
149 |
+
global_step[0] += 1
|
150 |
+
mbatch = global_step[0]
|
151 |
+
for _lr in lr_steps:
|
152 |
+
if mbatch == _lr:
|
153 |
+
opt.lr *= 0.2
|
154 |
+
print('lr change to', opt.lr)
|
155 |
+
break
|
156 |
+
if mbatch % 1000 == 0:
|
157 |
+
print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch)
|
158 |
+
if mbatch > 0 and mbatch % args.verbose == 0:
|
159 |
+
val_test()
|
160 |
+
if args.ckpt == 1:
|
161 |
+
msave = mbatch // args.verbose
|
162 |
+
print('saving', msave)
|
163 |
+
arg, aux = model.get_params()
|
164 |
+
mx.model.save_checkpoint(args.prefix, msave, model.symbol, arg,
|
165 |
+
aux)
|
166 |
+
if mbatch == lr_steps[-1]:
|
167 |
+
if args.ckpt == 2:
|
168 |
+
#msave = mbatch//args.verbose
|
169 |
+
msave = 1
|
170 |
+
print('saving', msave)
|
171 |
+
arg, aux = model.get_params()
|
172 |
+
mx.model.save_checkpoint(args.prefix, msave, model.symbol, arg,
|
173 |
+
aux)
|
174 |
+
sys.exit(0)
|
175 |
+
|
176 |
+
train_iter = mx.io.PrefetchingIter(train_iter)
|
177 |
+
|
178 |
+
model.fit(
|
179 |
+
train_iter,
|
180 |
+
begin_epoch=0,
|
181 |
+
num_epoch=9999,
|
182 |
+
#eval_data = val_iter,
|
183 |
+
eval_data=None,
|
184 |
+
eval_metric=eval_metrics,
|
185 |
+
kvstore='device',
|
186 |
+
optimizer=opt,
|
187 |
+
initializer=initializer,
|
188 |
+
arg_params=arg_params,
|
189 |
+
aux_params=aux_params,
|
190 |
+
allow_missing=True,
|
191 |
+
batch_end_callback=_batch_callback,
|
192 |
+
epoch_end_callback=None,
|
193 |
+
)
|
194 |
+
|
195 |
+
|
196 |
+
if __name__ == '__main__':
|
197 |
+
parser = argparse.ArgumentParser(description='Train face alignment')
|
198 |
+
# general
|
199 |
+
parser.add_argument('--network',
|
200 |
+
help='network name',
|
201 |
+
default=default.network,
|
202 |
+
type=str)
|
203 |
+
parser.add_argument('--dataset',
|
204 |
+
help='dataset name',
|
205 |
+
default=default.dataset,
|
206 |
+
type=str)
|
207 |
+
args, rest = parser.parse_known_args()
|
208 |
+
generate_config(args.network, args.dataset)
|
209 |
+
parser.add_argument('--prefix',
|
210 |
+
default=default.prefix,
|
211 |
+
help='directory to save model.')
|
212 |
+
parser.add_argument('--pretrained', default=default.pretrained, help='')
|
213 |
+
parser.add_argument('--optimizer', default='nadam', help='')
|
214 |
+
parser.add_argument('--lr', type=float, default=default.lr, help='')
|
215 |
+
parser.add_argument('--wd', type=float, default=default.wd, help='')
|
216 |
+
parser.add_argument('--per-batch-size',
|
217 |
+
type=int,
|
218 |
+
default=default.per_batch_size,
|
219 |
+
help='')
|
220 |
+
parser.add_argument('--lr-step',
|
221 |
+
help='learning rate steps (in epoch)',
|
222 |
+
default=default.lr_step,
|
223 |
+
type=str)
|
224 |
+
parser.add_argument('--ckpt', type=int, default=1, help='')
|
225 |
+
parser.add_argument('--norm', type=int, default=0, help='')
|
226 |
+
parser.add_argument('--exf', type=int, default=1, help='')
|
227 |
+
parser.add_argument('--frequent',
|
228 |
+
type=int,
|
229 |
+
default=default.frequent,
|
230 |
+
help='')
|
231 |
+
parser.add_argument('--verbose',
|
232 |
+
type=int,
|
233 |
+
default=default.verbose,
|
234 |
+
help='')
|
235 |
+
args = parser.parse_args()
|
236 |
+
main(args)
|
insightface/alignment/synthetics/README.md
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Introduction
|
2 |
+
|
3 |
+
We provide training and testing tools on synthetics data.
|
4 |
+
|
5 |
+
|
6 |
+
## Dataset
|
7 |
+
|
8 |
+
### Training dataset
|
9 |
+
|
10 |
+
Download `Face Synthetics dataset` from [https://github.com/microsoft/FaceSynthetics](https://github.com/microsoft/FaceSynthetics) and put it somewhere.
|
11 |
+
|
12 |
+
<div align="left">
|
13 |
+
<img src="https://github.com/microsoft/FaceSynthetics/raw/main/docs/img/dataset_samples_2.jpg" width="640"/>
|
14 |
+
</div>
|
15 |
+
<br/>
|
16 |
+
|
17 |
+
Then use [tools/prepare_synthetics.py](tools/prepare_synthetics.py) for training data preparation.
|
18 |
+
|
19 |
+
|
20 |
+
### Testing dataset
|
21 |
+
|
22 |
+
[300-W](https://ibug.doc.ic.ac.uk/resources/300-W/)
|
23 |
+
|
24 |
+
|
25 |
+
## Pretrained Model
|
26 |
+
|
27 |
+
[ResNet50d](https://drive.google.com/file/d/1kNP7qEl3AYNbaHFUg_ZiyRB1CtfDWXR4/view?usp=sharing)
|
28 |
+
|
29 |
+
|
30 |
+
## Train and Test
|
31 |
+
|
32 |
+
### Prerequisites
|
33 |
+
|
34 |
+
- pytorch_lightning
|
35 |
+
- timm
|
36 |
+
- albumentations
|
37 |
+
|
38 |
+
### Training
|
39 |
+
|
40 |
+
`` python -u trainer_synthetics.py ``
|
41 |
+
|
42 |
+
which uses `resnet50d` as backbone by default, please check the [code](trainer_synthetics.py) for detail.
|
43 |
+
|
44 |
+
### Testing
|
45 |
+
|
46 |
+
Please check [test_synthetics.py](test_synthetics.py) for detail.
|
47 |
+
|
48 |
+
|
49 |
+
## Result Visualization(3D 68 Keypoints)
|
50 |
+
|
51 |
+
<div align="left">
|
52 |
+
<img src="https://github.com/nttstar/insightface-resources/blob/master/alignment/images/image_008_1.jpg?raw=true" width="320"/>
|
53 |
+
</div>
|
54 |
+
|
55 |
+
<div align="left">
|
56 |
+
<img src="https://github.com/nttstar/insightface-resources/blob/master/alignment/images/image_017_1.jpg?raw=true" width="320"/>
|
57 |
+
</div>
|
58 |
+
|
59 |
+
<div align="left">
|
60 |
+
<img src="https://github.com/nttstar/insightface-resources/blob/master/alignment/images/image_039.jpg?raw=true" width="320"/>
|
61 |
+
</div>
|
62 |
+
|
63 |
+
|
insightface/alignment/synthetics/datasets/augs.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import albumentations as A
|
3 |
+
from albumentations.core.transforms_interface import ImageOnlyTransform
|
4 |
+
|
5 |
+
class RectangleBorderAugmentation(ImageOnlyTransform):
|
6 |
+
|
7 |
+
def __init__(
|
8 |
+
self,
|
9 |
+
fill_value = 0,
|
10 |
+
limit = 0.3,
|
11 |
+
always_apply=False,
|
12 |
+
p=1.0,
|
13 |
+
):
|
14 |
+
super(RectangleBorderAugmentation, self).__init__(always_apply, p)
|
15 |
+
assert limit>0.0 and limit<1.0
|
16 |
+
self.fill_value = 0
|
17 |
+
self.limit = limit
|
18 |
+
|
19 |
+
|
20 |
+
def apply(self, image, border_size_limit, **params):
|
21 |
+
assert len(border_size_limit)==4
|
22 |
+
border_size = border_size_limit.copy()
|
23 |
+
border_size[0] *= image.shape[1]
|
24 |
+
border_size[2] *= image.shape[1]
|
25 |
+
border_size[1] *= image.shape[0]
|
26 |
+
border_size[3] *= image.shape[0]
|
27 |
+
border_size = border_size.astype(np.int)
|
28 |
+
image[:,:border_size[0],:] = self.fill_value
|
29 |
+
image[:border_size[1],:,:] = self.fill_value
|
30 |
+
image[:,-border_size[2]:,:] = self.fill_value
|
31 |
+
image[-border_size[3]:,:,:] = self.fill_value
|
32 |
+
return image
|
33 |
+
|
34 |
+
def get_params(self):
|
35 |
+
border_size_limit = np.random.uniform(0.0, self.limit, size=4)
|
36 |
+
return {'border_size_limit': border_size_limit}
|
37 |
+
|
38 |
+
def get_transform_init_args_names(self):
|
39 |
+
return ('fill_value', 'limit')
|
40 |
+
|
insightface/alignment/synthetics/datasets/dataset_synthetics.py
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import os.path as osp
|
3 |
+
import queue as Queue
|
4 |
+
import pickle
|
5 |
+
import threading
|
6 |
+
import logging
|
7 |
+
import numpy as np
|
8 |
+
import torch
|
9 |
+
from torch.utils.data import DataLoader, Dataset
|
10 |
+
from torchvision import transforms
|
11 |
+
import cv2
|
12 |
+
import albumentations as A
|
13 |
+
from albumentations.pytorch import ToTensorV2
|
14 |
+
from .augs import RectangleBorderAugmentation
|
15 |
+
|
16 |
+
class BackgroundGenerator(threading.Thread):
|
17 |
+
def __init__(self, generator, local_rank, max_prefetch=6):
|
18 |
+
super(BackgroundGenerator, self).__init__()
|
19 |
+
self.queue = Queue.Queue(max_prefetch)
|
20 |
+
self.generator = generator
|
21 |
+
self.local_rank = local_rank
|
22 |
+
self.daemon = True
|
23 |
+
self.start()
|
24 |
+
|
25 |
+
def run(self):
|
26 |
+
torch.cuda.set_device(self.local_rank)
|
27 |
+
for item in self.generator:
|
28 |
+
self.queue.put(item)
|
29 |
+
self.queue.put(None)
|
30 |
+
|
31 |
+
def next(self):
|
32 |
+
next_item = self.queue.get()
|
33 |
+
if next_item is None:
|
34 |
+
raise StopIteration
|
35 |
+
return next_item
|
36 |
+
|
37 |
+
def __next__(self):
|
38 |
+
return self.next()
|
39 |
+
|
40 |
+
def __iter__(self):
|
41 |
+
return self
|
42 |
+
|
43 |
+
|
44 |
+
class DataLoaderX(DataLoader):
|
45 |
+
def __init__(self, local_rank, **kwargs):
|
46 |
+
super(DataLoaderX, self).__init__(**kwargs)
|
47 |
+
self.stream = torch.cuda.Stream(local_rank)
|
48 |
+
self.local_rank = local_rank
|
49 |
+
|
50 |
+
def __iter__(self):
|
51 |
+
self.iter = super(DataLoaderX, self).__iter__()
|
52 |
+
self.iter = BackgroundGenerator(self.iter, self.local_rank)
|
53 |
+
self.preload()
|
54 |
+
return self
|
55 |
+
|
56 |
+
def preload(self):
|
57 |
+
self.batch = next(self.iter, None)
|
58 |
+
if self.batch is None:
|
59 |
+
return None
|
60 |
+
with torch.cuda.stream(self.stream):
|
61 |
+
for k in range(len(self.batch)):
|
62 |
+
self.batch[k] = self.batch[k].to(device=self.local_rank,
|
63 |
+
non_blocking=True)
|
64 |
+
|
65 |
+
def __next__(self):
|
66 |
+
torch.cuda.current_stream().wait_stream(self.stream)
|
67 |
+
batch = self.batch
|
68 |
+
if batch is None:
|
69 |
+
raise StopIteration
|
70 |
+
self.preload()
|
71 |
+
return batch
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
class FaceDataset(Dataset):
|
76 |
+
def __init__(self, root_dir, is_train):
|
77 |
+
super(FaceDataset, self).__init__()
|
78 |
+
|
79 |
+
#self.local_rank = local_rank
|
80 |
+
self.is_train = is_train
|
81 |
+
self.input_size = 256
|
82 |
+
self.num_kps = 68
|
83 |
+
transform_list = []
|
84 |
+
if is_train:
|
85 |
+
transform_list += \
|
86 |
+
[
|
87 |
+
A.ColorJitter(brightness=0.8, contrast=0.5, p=0.5),
|
88 |
+
A.ToGray(p=0.1),
|
89 |
+
A.ISONoise(p=0.1),
|
90 |
+
A.MedianBlur(blur_limit=(1,7), p=0.1),
|
91 |
+
A.GaussianBlur(blur_limit=(1,7), p=0.1),
|
92 |
+
A.MotionBlur(blur_limit=(5,12), p=0.1),
|
93 |
+
A.ImageCompression(quality_lower=50, quality_upper=90, p=0.05),
|
94 |
+
A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=40, interpolation=cv2.INTER_LINEAR,
|
95 |
+
border_mode=cv2.BORDER_CONSTANT, value=0, mask_value=0, p=0.8),
|
96 |
+
A.HorizontalFlip(p=0.5),
|
97 |
+
RectangleBorderAugmentation(limit=0.33, fill_value=0, p=0.2),
|
98 |
+
]
|
99 |
+
transform_list += \
|
100 |
+
[
|
101 |
+
A.geometric.resize.Resize(self.input_size, self.input_size, interpolation=cv2.INTER_LINEAR, always_apply=True),
|
102 |
+
A.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
|
103 |
+
ToTensorV2(),
|
104 |
+
]
|
105 |
+
self.transform = A.ReplayCompose(
|
106 |
+
transform_list,
|
107 |
+
keypoint_params=A.KeypointParams(format='xy', remove_invisible=False)
|
108 |
+
)
|
109 |
+
self.root_dir = root_dir
|
110 |
+
with open(osp.join(root_dir, 'annot.pkl'), 'rb') as f:
|
111 |
+
annot = pickle.load(f)
|
112 |
+
self.X, self.Y = annot
|
113 |
+
train_size = int(len(self.X)*0.99)
|
114 |
+
|
115 |
+
if is_train:
|
116 |
+
self.X = self.X[:train_size]
|
117 |
+
self.Y = self.Y[:train_size]
|
118 |
+
else:
|
119 |
+
self.X = self.X[train_size:]
|
120 |
+
self.Y = self.Y[train_size:]
|
121 |
+
#if local_rank==0:
|
122 |
+
# logging.info('data_transform_list:%s'%transform_list)
|
123 |
+
flip_parts = ([1, 17], [2, 16], [3, 15], [4, 14], [5, 13], [6, 12], [7, 11], [8, 10],
|
124 |
+
[18, 27], [19, 26], [20, 25], [21, 24], [22, 23],
|
125 |
+
[32, 36], [33, 35],
|
126 |
+
[37, 46], [38, 45], [39, 44], [40, 43], [41, 48], [42, 47],
|
127 |
+
[49, 55], [50, 54], [51, 53], [62, 64], [61, 65], [68, 66], [59, 57], [60, 56])
|
128 |
+
self.flip_order = np.arange(self.num_kps)
|
129 |
+
for pair in flip_parts:
|
130 |
+
self.flip_order[pair[1]-1] = pair[0]-1
|
131 |
+
self.flip_order[pair[0]-1] = pair[1]-1
|
132 |
+
logging.info('len:%d'%len(self.X))
|
133 |
+
print('!!!len:%d'%len(self.X))
|
134 |
+
|
135 |
+
def __getitem__(self, index):
|
136 |
+
x = self.X[index]
|
137 |
+
y = self.Y[index]
|
138 |
+
image_path = os.path.join(self.root_dir, x)
|
139 |
+
img = cv2.imread(image_path)[:,:,::-1]
|
140 |
+
label = y
|
141 |
+
if self.transform is not None:
|
142 |
+
t = self.transform(image=img, keypoints=label)
|
143 |
+
flipped = False
|
144 |
+
for trans in t["replay"]["transforms"]:
|
145 |
+
if trans["__class_fullname__"].endswith('HorizontalFlip'):
|
146 |
+
if trans["applied"]:
|
147 |
+
flipped = True
|
148 |
+
img = t['image']
|
149 |
+
label = t['keypoints']
|
150 |
+
label = np.array(label, dtype=np.float32)
|
151 |
+
#print(img.shape)
|
152 |
+
if flipped:
|
153 |
+
#label[:, 0] = self.input_size - 1 - label[:, 0] #already applied in horizantal flip aug
|
154 |
+
label = label[self.flip_order,:]
|
155 |
+
label /= (self.input_size/2)
|
156 |
+
label -= 1.0
|
157 |
+
label = label.flatten()
|
158 |
+
label = torch.tensor(label, dtype=torch.float32)
|
159 |
+
return img, label
|
160 |
+
|
161 |
+
def __len__(self):
|
162 |
+
return len(self.X)
|
163 |
+
|
insightface/alignment/synthetics/test_synthetics.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from trainer_synthetics import FaceSynthetics
|
3 |
+
import sys
|
4 |
+
import glob
|
5 |
+
import torch
|
6 |
+
import os
|
7 |
+
import numpy as np
|
8 |
+
import cv2
|
9 |
+
import os.path as osp
|
10 |
+
import insightface
|
11 |
+
from insightface.app import FaceAnalysis
|
12 |
+
from insightface.utils import face_align
|
13 |
+
|
14 |
+
flip_parts = ([1, 17], [2, 16], [3, 15], [4, 14], [5, 13], [6, 12], [7, 11], [8, 10],
|
15 |
+
[18, 27], [19, 26], [20, 25], [21, 24], [22, 23],
|
16 |
+
[32, 36], [33, 35],
|
17 |
+
[37, 46], [38, 45], [39, 44], [40, 43], [41, 48], [42, 47],
|
18 |
+
[49, 55], [50, 54], [51, 53], [62, 64], [61, 65], [68, 66], [59, 57], [60, 56])
|
19 |
+
|
20 |
+
app = FaceAnalysis()
|
21 |
+
app.prepare(ctx_id=0, det_size=(224, 224))
|
22 |
+
input_size = 256
|
23 |
+
USE_FLIP = False
|
24 |
+
|
25 |
+
root = 'data/300W/Validation'
|
26 |
+
output_dir = 'outputs/'
|
27 |
+
|
28 |
+
if not osp.exists(output_dir):
|
29 |
+
os.makedirs(output_dir)
|
30 |
+
|
31 |
+
outf = open(osp.join(output_dir, 'pred.txt'), 'w')
|
32 |
+
|
33 |
+
model = FaceSynthetics.load_from_checkpoint(sys.argv[1]).cuda()
|
34 |
+
model.eval()
|
35 |
+
for line in open(osp.join(root, '300W_validation.txt'), 'r'):
|
36 |
+
line = line.strip().split()
|
37 |
+
img_path = osp.join(root, line[0])
|
38 |
+
gt = line[1:]
|
39 |
+
#print(len(gt))
|
40 |
+
name = img_path.split('/')[-1]
|
41 |
+
img = cv2.imread(img_path)
|
42 |
+
dimg = img.copy()
|
43 |
+
faces = app.get(img, max_num=1)
|
44 |
+
if len(faces)!=1:
|
45 |
+
continue
|
46 |
+
bbox = faces[0].bbox
|
47 |
+
w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
|
48 |
+
center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
|
49 |
+
rotate = 0
|
50 |
+
_scale = input_size / (max(w, h)*1.5)
|
51 |
+
aimg, M = face_align.transform(img, center, input_size, _scale, rotate)
|
52 |
+
#cv2.imwrite("outputs/a_%s"%name, aimg)
|
53 |
+
aimg = cv2.cvtColor(aimg, cv2.COLOR_BGR2RGB)
|
54 |
+
kps = None
|
55 |
+
flips = [0, 1] if USE_FLIP else [0]
|
56 |
+
for flip in flips:
|
57 |
+
input = aimg.copy()
|
58 |
+
if flip:
|
59 |
+
input = input[:,::-1,:].copy()
|
60 |
+
input = np.transpose(input, (2, 0, 1))
|
61 |
+
input = np.expand_dims(input, 0)
|
62 |
+
imgs = torch.Tensor(input).cuda()
|
63 |
+
imgs.div_(255).sub_(0.5).div_(0.5)
|
64 |
+
pred = model(imgs).detach().cpu().numpy().flatten().reshape( (-1, 2) )
|
65 |
+
pred[:, 0:2] += 1
|
66 |
+
pred[:, 0:2] *= (input_size // 2)
|
67 |
+
if flip:
|
68 |
+
pred_flip = pred.copy()
|
69 |
+
pred_flip[:, 0] = input_size - 1 - pred_flip[:, 0]
|
70 |
+
for pair in flip_parts:
|
71 |
+
tmp = pred_flip[pair[0] - 1, :].copy()
|
72 |
+
pred_flip[pair[0] - 1, :] = pred_flip[pair[1] - 1, :]
|
73 |
+
pred_flip[pair[1] - 1, :] = tmp
|
74 |
+
pred = pred_flip
|
75 |
+
if kps is None:
|
76 |
+
kps = pred
|
77 |
+
else:
|
78 |
+
kps += pred
|
79 |
+
kps /= 2.0
|
80 |
+
#print(pred.shape)
|
81 |
+
|
82 |
+
IM = cv2.invertAffineTransform(M)
|
83 |
+
kps = face_align.trans_points(kps, IM)
|
84 |
+
outf.write(line[0])
|
85 |
+
outf.write(' ')
|
86 |
+
outf.write(' '.join(["%.5f"%x for x in kps.flatten()]))
|
87 |
+
outf.write("\n")
|
88 |
+
box = bbox.astype(np.int)
|
89 |
+
color = (0, 0, 255)
|
90 |
+
cv2.rectangle(dimg, (box[0], box[1]), (box[2], box[3]), color, 2)
|
91 |
+
kps = kps.astype(np.int)
|
92 |
+
#print(landmark.shape)
|
93 |
+
for l in range(kps.shape[0]):
|
94 |
+
color = (0, 0, 255)
|
95 |
+
cv2.circle(dimg, (kps[l][0], kps[l][1]), 1, color, 2)
|
96 |
+
|
97 |
+
cv2.imwrite("outputs/%s"%name, dimg)
|
98 |
+
|
99 |
+
#ret = np.argmax(feat)
|
100 |
+
#print(feat)
|
101 |
+
#outf.write("%s %.4f %.4f %.4f\n"%(line[0], feat[0], feat[1], feat[2]))
|
102 |
+
|
103 |
+
outf.close()
|
104 |
+
|
insightface/alignment/synthetics/tools/prepare_synthetics.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import sys
|
3 |
+
import glob
|
4 |
+
import torch
|
5 |
+
import pickle
|
6 |
+
import os
|
7 |
+
import numpy as np
|
8 |
+
import cv2
|
9 |
+
import os.path as osp
|
10 |
+
import insightface
|
11 |
+
from insightface.app import FaceAnalysis
|
12 |
+
from insightface.utils import face_align
|
13 |
+
|
14 |
+
app = FaceAnalysis()
|
15 |
+
app.prepare(ctx_id=0, det_size=(224, 224))
|
16 |
+
output_size = 384
|
17 |
+
|
18 |
+
input_dir = '/root/codebase/FaceSynthetics'
|
19 |
+
output_dir = 'data/synthetics'
|
20 |
+
|
21 |
+
if not osp.exists(output_dir):
|
22 |
+
os.makedirs(output_dir)
|
23 |
+
|
24 |
+
X = []
|
25 |
+
Y = []
|
26 |
+
|
27 |
+
for i in range(0, 100000):
|
28 |
+
if i%1000==0:
|
29 |
+
print('loading', i)
|
30 |
+
x = "%06d.png"%i
|
31 |
+
img_path = osp.join(input_dir, x)
|
32 |
+
img = cv2.imread(img_path)
|
33 |
+
dimg = img.copy()
|
34 |
+
ylines = open(osp.join(input_dir, "%06d_ldmks.txt"%i)).readlines()
|
35 |
+
ylines = ylines[:68]
|
36 |
+
y = []
|
37 |
+
for yline in ylines:
|
38 |
+
lmk = [float(x) for x in yline.strip().split()]
|
39 |
+
y.append( tuple(lmk) )
|
40 |
+
pred = np.array(y)
|
41 |
+
faces = app.get(img, max_num=1)
|
42 |
+
if len(faces)!=1:
|
43 |
+
continue
|
44 |
+
bbox = faces[0].bbox
|
45 |
+
w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
|
46 |
+
center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
|
47 |
+
rotate = 0
|
48 |
+
_scale = output_size / (max(w, h)*1.5)
|
49 |
+
aimg, M = face_align.transform(dimg, center, output_size, _scale, rotate)
|
50 |
+
pred = face_align.trans_points(pred, M)
|
51 |
+
#box = bbox.astype(np.int)
|
52 |
+
#color = (0, 0, 255)
|
53 |
+
#cv2.rectangle(dimg, (box[0], box[1]), (box[2], box[3]), color, 2)
|
54 |
+
|
55 |
+
#kps = pred.astype(np.int)
|
56 |
+
#for l in range(kps.shape[0]):
|
57 |
+
# color = (0, 0, 255)
|
58 |
+
# cv2.circle(aimg, (kps[l][0], kps[l][1]), 1, color, 2)
|
59 |
+
x = x.replace('png', 'jpg')
|
60 |
+
X.append(x)
|
61 |
+
y = []
|
62 |
+
for k in range(pred.shape[0]):
|
63 |
+
y.append( (pred[k][0], pred[k][1]) )
|
64 |
+
Y.append(y)
|
65 |
+
cv2.imwrite("%s/%s"%(output_dir, x), aimg)
|
66 |
+
|
67 |
+
|
68 |
+
with open(osp.join(output_dir, 'annot.pkl'), 'wb') as pfile:
|
69 |
+
pickle.dump((X, Y), pfile, protocol=pickle.HIGHEST_PROTOCOL)
|
70 |
+
|
insightface/alignment/synthetics/trainer_synthetics.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from argparse import ArgumentParser
|
2 |
+
|
3 |
+
import os
|
4 |
+
import os.path as osp
|
5 |
+
import torch
|
6 |
+
import torch.nn as nn
|
7 |
+
from torch.nn import functional as F
|
8 |
+
from torch.utils.data import DataLoader
|
9 |
+
import pytorch_lightning as pl
|
10 |
+
from pytorch_lightning.callbacks import ModelCheckpoint
|
11 |
+
from pytorch_lightning.callbacks import LearningRateMonitor
|
12 |
+
from pytorch_lightning.loggers import TensorBoardLogger
|
13 |
+
import timm
|
14 |
+
from datasets.dataset_synthetics import FaceDataset, DataLoaderX
|
15 |
+
|
16 |
+
|
17 |
+
class FaceSynthetics(pl.LightningModule):
|
18 |
+
def __init__(self, backbone):
|
19 |
+
super().__init__()
|
20 |
+
self.save_hyperparameters()
|
21 |
+
backbone = timm.create_model(backbone, num_classes=68*2)
|
22 |
+
self.backbone = backbone
|
23 |
+
self.loss = nn.L1Loss(reduction='mean')
|
24 |
+
self.hard_mining = False
|
25 |
+
|
26 |
+
def forward(self, x):
|
27 |
+
# use forward for inference/predictions
|
28 |
+
y = self.backbone(x)
|
29 |
+
return y
|
30 |
+
|
31 |
+
def training_step(self, batch, batch_idx):
|
32 |
+
x, y = batch
|
33 |
+
y_hat = self.backbone(x)
|
34 |
+
if self.hard_mining:
|
35 |
+
loss = torch.abs(y_hat - y) #(B,K)
|
36 |
+
loss = torch.mean(loss, dim=1) #(B,)
|
37 |
+
B = len(loss)
|
38 |
+
S = int(B*0.5)
|
39 |
+
loss, _ = torch.sort(loss, descending=True)
|
40 |
+
loss = loss[:S]
|
41 |
+
loss = torch.mean(loss) * 5.0
|
42 |
+
else:
|
43 |
+
loss = self.loss(y_hat, y) * 5.0
|
44 |
+
self.log('train_loss', loss, on_epoch=True)
|
45 |
+
return loss
|
46 |
+
|
47 |
+
def validation_step(self, batch, batch_idx):
|
48 |
+
x, y = batch
|
49 |
+
y_hat = self.backbone(x)
|
50 |
+
loss = self.loss(y_hat, y)
|
51 |
+
self.log('val_loss', loss, on_step=True)
|
52 |
+
|
53 |
+
def test_step(self, batch, batch_idx):
|
54 |
+
x, y = batch
|
55 |
+
y_hat = self.backbone(x)
|
56 |
+
loss = self.loss(y_hat, y)
|
57 |
+
self.log('test_loss', loss)
|
58 |
+
|
59 |
+
def configure_optimizers(self):
|
60 |
+
#return torch.optim.Adam(self.parameters(), lr=0.0002)
|
61 |
+
opt = torch.optim.SGD(self.parameters(), lr = 0.1, momentum=0.9, weight_decay = 0.0005)
|
62 |
+
def lr_step_func(epoch):
|
63 |
+
return 0.1 ** len([m for m in [15, 25, 28] if m <= epoch])
|
64 |
+
scheduler = torch.optim.lr_scheduler.LambdaLR(
|
65 |
+
optimizer=opt, lr_lambda=lr_step_func)
|
66 |
+
lr_scheduler = {
|
67 |
+
'scheduler': scheduler,
|
68 |
+
'name': 'learning_rate',
|
69 |
+
'interval':'epoch',
|
70 |
+
'frequency': 1}
|
71 |
+
return [opt], [lr_scheduler]
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
def cli_main():
|
76 |
+
pl.seed_everything(727)
|
77 |
+
|
78 |
+
# ------------
|
79 |
+
# args
|
80 |
+
# ------------
|
81 |
+
parser = ArgumentParser()
|
82 |
+
parser.add_argument('--backbone', default='resnet50d', type=str)
|
83 |
+
parser.add_argument('--batch_size', default=64, type=int)
|
84 |
+
parser.add_argument('--root', default='data/synthetics', type=str)
|
85 |
+
parser.add_argument('--num-gpus', default=2, type=int)
|
86 |
+
parser.add_argument('--tf32', action='store_true')
|
87 |
+
parser = pl.Trainer.add_argparse_args(parser)
|
88 |
+
args = parser.parse_args()
|
89 |
+
|
90 |
+
if not args.tf32:
|
91 |
+
torch.backends.cuda.matmul.allow_tf32 = False
|
92 |
+
torch.backends.cudnn.allow_tf32 = False
|
93 |
+
else:
|
94 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
95 |
+
torch.backends.cudnn.allow_tf32 = True
|
96 |
+
torch.backends.cudnn.benchmark = True
|
97 |
+
|
98 |
+
# ------------
|
99 |
+
# data
|
100 |
+
# ------------
|
101 |
+
train_set = FaceDataset(root_dir=args.root, is_train=True)
|
102 |
+
val_set = FaceDataset(root_dir=args.root, is_train=False)
|
103 |
+
|
104 |
+
train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=3, pin_memory=True)
|
105 |
+
val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False)
|
106 |
+
|
107 |
+
# ------------
|
108 |
+
# model
|
109 |
+
# ------------
|
110 |
+
model = FaceSynthetics(backbone=args.backbone)
|
111 |
+
ckpt_path = 'work_dirs/synthetics'
|
112 |
+
if not os.path.exists(ckpt_path):
|
113 |
+
os.makedirs(ckpt_path)
|
114 |
+
|
115 |
+
# ------------
|
116 |
+
# training
|
117 |
+
# ------------
|
118 |
+
checkpoint_callback = ModelCheckpoint(
|
119 |
+
monitor='val_loss',
|
120 |
+
dirpath=ckpt_path,
|
121 |
+
filename='{epoch:02d}-{val_loss:.6f}',
|
122 |
+
save_top_k=10,
|
123 |
+
mode='min',
|
124 |
+
)
|
125 |
+
lr_monitor = LearningRateMonitor(logging_interval='step')
|
126 |
+
trainer = pl.Trainer(
|
127 |
+
gpus = args.num_gpus,
|
128 |
+
accelerator="ddp",
|
129 |
+
benchmark=True,
|
130 |
+
logger=TensorBoardLogger(osp.join(ckpt_path, 'logs')),
|
131 |
+
callbacks=[checkpoint_callback, lr_monitor],
|
132 |
+
check_val_every_n_epoch=1,
|
133 |
+
progress_bar_refresh_rate=1,
|
134 |
+
max_epochs=30,
|
135 |
+
)
|
136 |
+
trainer.fit(model, train_loader, val_loader)
|
137 |
+
|
138 |
+
if __name__ == '__main__':
|
139 |
+
cli_main()
|
140 |
+
|
insightface/attribute/README.md
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Face Attribute
|
2 |
+
|
3 |
+
|
4 |
+
<div align="left">
|
5 |
+
<img src="https://insightface.ai/assets/img/custom/logo3.jpg" width="320"/>
|
6 |
+
</div>
|
7 |
+
|
8 |
+
|
9 |
+
## Introduction
|
10 |
+
|
11 |
+
These are the face attribute methods of [InsightFace](https://insightface.ai)
|
12 |
+
|
13 |
+
|
14 |
+
<div align="left">
|
15 |
+
<img src="https://insightface.ai/assets/img/github/t1_genderage.jpg" width="600"/>
|
16 |
+
</div>
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
## Methods
|
21 |
+
|
22 |
+
|
23 |
+
Supported methods:
|
24 |
+
|
25 |
+
- [x] [Gender_Age](gender_age)
|
26 |
+
|
27 |
+
|
28 |
+
|
29 |
+
## Contributing
|
30 |
+
|
31 |
+
We appreciate all contributions to improve the face attribute module of InsightFace.
|
32 |
+
|
33 |
+
|
insightface/attribute/_datasets_/README.md
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Face Attribute Datasets
|
2 |
+
|
3 |
+
(Updating)
|
4 |
+
|
5 |
+
## Training Datasets
|
6 |
+
|
7 |
+
### CelebA
|
8 |
+
|
9 |
+
https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
## Test Datasets
|
14 |
+
|
15 |
+
|
insightface/attribute/gender_age/test.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import cv2
|
3 |
+
import sys
|
4 |
+
import numpy as np
|
5 |
+
import insightface
|
6 |
+
from insightface.app import FaceAnalysis
|
7 |
+
from insightface.data import get_image as ins_get_image
|
8 |
+
|
9 |
+
|
10 |
+
parser = argparse.ArgumentParser(description='insightface gender-age test')
|
11 |
+
# general
|
12 |
+
parser.add_argument('--ctx', default=0, type=int, help='ctx id, <0 means using cpu')
|
13 |
+
args = parser.parse_args()
|
14 |
+
|
15 |
+
app = FaceAnalysis(allowed_modules=['detection', 'genderage'])
|
16 |
+
app.prepare(ctx_id=args.ctx, det_size=(640,640))
|
17 |
+
|
18 |
+
img = ins_get_image('t1')
|
19 |
+
faces = app.get(img)
|
20 |
+
assert len(faces)==6
|
21 |
+
for face in faces:
|
22 |
+
print(face.bbox)
|
23 |
+
print(face.sex, face.age)
|
24 |
+
|
insightface/benchmarks/train/nvidia_a10.md
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Training performance report on NVIDIA A10
|
2 |
+
|
3 |
+
[NVIDIA A10 Tensor Core GPU](https://www.nvidia.com/en-us/data-center/products/a10-gpu/)
|
4 |
+
|
5 |
+
We can use A10 to train deep learning models by its FP16 and TF32 supports.
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
## Test Server Spec
|
10 |
+
|
11 |
+
| Key | Value |
|
12 |
+
| ------------ | ------------------------------------------------ |
|
13 |
+
| System | ServMax G408-X2 Rackmountable Server |
|
14 |
+
| CPU | 2 x Intel(R) Xeon(R) Gold 5220R CPU @ 2.20GHz |
|
15 |
+
| Memory | 384GB, 12 x Samsung 32GB DDR4-2933 |
|
16 |
+
| GPU | 8 x NVIDIA A10 22GB |
|
17 |
+
| Cooling | 2x Customized GPU Kit for GPU support FAN-1909L2 |
|
18 |
+
| Hard Drive | Intel SSD S4500 1.9TB/SATA/TLC/2.5" |
|
19 |
+
| OS | Ubuntu 16.04.7 LTS |
|
20 |
+
| Installation | CUDA 11.1, cuDNN 8.0.5 |
|
21 |
+
| Installation | Python 3.7.10 |
|
22 |
+
| Installation | PyTorch 1.9.0 (conda) |
|
23 |
+
|
24 |
+
This server is donated by [AMAX](https://www.amaxchina.com/), many thanks!
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
## Experiments on arcface_torch
|
29 |
+
|
30 |
+
We report training speed in following table, please also note that:
|
31 |
+
|
32 |
+
1. The training dataset is in mxnet record format and located on SSD hard drive.
|
33 |
+
|
34 |
+
2. Embedding-size are all set to 512.
|
35 |
+
|
36 |
+
3. We use a large dataset which contains about 618K identities to simulate real cases.
|
37 |
+
|
38 |
+
| Dataset | Classes | Backbone | Batch-size | FP16 | TF32 | Samples/sec |
|
39 |
+
| ----------- | ------- | ----------- | ---------- | ---- | ---- | ----------- |
|
40 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | × | × | ~2040 |
|
41 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | × | √ | ~2255 |
|
42 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | √ | × | ~3300 |
|
43 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | √ | √ | ~3360 |
|
44 |
+
| WebFace600K | 618K | IResNet-50 | 2048 | √ | √ | ~3940 |
|
45 |
+
| WebFace600K | 618K | IResNet-100 | 1024 | √ | √ | ~2210 |
|
46 |
+
| WebFace600K | 618K | IResNet-180 | 1024 | √ | √ | ~1410 |
|
47 |
+
|
48 |
+
|
insightface/benchmarks/train/nvidia_a100.md
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Training performance report on NVIDIA A100
|
2 |
+
|
3 |
+
[NVIDIA A100 Tensor Core GPU](https://www.nvidia.com/en-us/data-center/a100/)
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
## Test Server Spec
|
8 |
+
|
9 |
+
| Key | Value |
|
10 |
+
| ------------ | ------------------------------------------------ |
|
11 |
+
| System | ServMax G408-X2 Rackmountable Server |
|
12 |
+
| CPU | 2 x Intel(R) Xeon(R) Gold 5220R CPU @ 2.20GHz |
|
13 |
+
| Memory | 384GB, 12 x Samsung 32GB DDR4-2933 |
|
14 |
+
| GPU | 8 x NVIDIA A100 80GB |
|
15 |
+
| Cooling | 2x Customized GPU Kit for GPU support FAN-1909L2 |
|
16 |
+
| Hard Drive | Intel SSD S4500 1.9TB/SATA/TLC/2.5" |
|
17 |
+
| OS | Ubuntu 16.04.7 LTS |
|
18 |
+
| Installation | CUDA 11.1, cuDNN 8.0.5 |
|
19 |
+
| Installation | Python 3.7.10 |
|
20 |
+
| Installation | PyTorch 1.9.0 (conda) |
|
21 |
+
|
22 |
+
This server is donated by [AMAX](https://www.amaxchina.com/), many thanks!
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
## Experiments on arcface_torch
|
27 |
+
|
28 |
+
We report training speed in following table, please also note that:
|
29 |
+
|
30 |
+
1. The training dataset is in mxnet record format and located on SSD hard drive.
|
31 |
+
2. Embedding-size are all set to 512.
|
32 |
+
3. We use large datasets with about 618K/2M identities to simulate real cases.
|
33 |
+
4. We test the 10K batch-size on real dataset to take the full advantage of 80GB memory.
|
34 |
+
5. We also test on huge synthetic datasets which include 50M~80M classes.
|
35 |
+
|
36 |
+
| Dataset | Classes | Backbone | Batch-size | PFC | FP16 | TF32 | Samples/sec | GPU Mem(GB) |
|
37 |
+
| ----------- | ------- | ----------- | ---------- | ---- | ---- | ---- | ----------- | ----------- |
|
38 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | × | × | × | ~3670 | ~18.2 |
|
39 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | × | × | √ | ~4760 | ~15.0 |
|
40 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | × | √ | × | ~5170 | ~10.1 |
|
41 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | × | √ | √ | ~5400 | ~10.1 |
|
42 |
+
| WebFace600K | 618K | IResNet-50 | 2048 | × | √ | √ | ~7780 | ~16.4 |
|
43 |
+
| WebFace600K | 618K | IResNet-50 | 10240 | × | √ | √ | ~9400 | ~66.7 |
|
44 |
+
| WebFace600K | 618K | IResNet-100 | 1024 | × | √ | √ | ~3700 | ~13.1 |
|
45 |
+
| WebFace600K | 618K | IResNet-180 | 1024 | × | √ | √ | ~2380 | ~17.5 |
|
46 |
+
| WebFace2M | 2M | IResNet-100 | 1024 | × | √ | √ | ~3480 | ~20.5 |
|
47 |
+
| WebFace2M | 2M | IResNet-180 | 1024 | × | √ | √ | ~2350 | ~25.0 |
|
48 |
+
| WebFace2M | 2M | IResNet-300 | 1024 | × | √ | √ | ~1541 | ~32.6 |
|
49 |
+
| Virtual | 50M | IResNet-50 | 1024 | 0.1 | √ | √ | ~2700 | ~54.1 |
|
50 |
+
| Virtual | 70M | IResNet-50 | 1024 | 0.1 | √ | √ | ~2170 | ~73.7 |
|
51 |
+
| Virtual | 80M | IResNet-50 | 1024 | 0.1 | √ | √ | ~1080 | ~79.6 |
|
52 |
+
|
53 |
+
|
insightface/benchmarks/train/nvidia_a30.md
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Training performance report on NVIDIA A30
|
2 |
+
|
3 |
+
[NVIDIA A30 Tensor Core GPU](https://www.nvidia.com/en-us/data-center/products/a30-gpu/) is the most versatile mainstream
|
4 |
+
compute GPU for AI inference and mainstream enterprise
|
5 |
+
workloads.
|
6 |
+
|
7 |
+
Besides, we can also use A30 to train deep learning models by its FP16 and TF32 supports.
|
8 |
+
|
9 |
+
|
10 |
+
|
11 |
+
## Test Server Spec
|
12 |
+
|
13 |
+
| Key | Value |
|
14 |
+
| ------------ | ------------------------------------------------ |
|
15 |
+
| System | ServMax G408-X2 Rackmountable Server |
|
16 |
+
| CPU | 2 x Intel(R) Xeon(R) Gold 5220R CPU @ 2.20GHz |
|
17 |
+
| Memory | 384GB, 12 x Samsung 32GB DDR4-2933 |
|
18 |
+
| GPU | 8 x NVIDIA A30 24GB |
|
19 |
+
| Cooling | 2x Customized GPU Kit for GPU support FAN-1909L2 |
|
20 |
+
| Hard Drive | Intel SSD S4500 1.9TB/SATA/TLC/2.5" |
|
21 |
+
| OS | Ubuntu 16.04.7 LTS |
|
22 |
+
| Installation | CUDA 11.1, cuDNN 8.0.5 |
|
23 |
+
| Installation | Python 3.7.10 |
|
24 |
+
| Installation | PyTorch 1.9 (conda) |
|
25 |
+
|
26 |
+
This server is donated by [AMAX](https://www.amaxchina.com/), many thanks!
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
## Experiments on arcface_torch
|
31 |
+
|
32 |
+
We report training speed in following table, please also note that:
|
33 |
+
|
34 |
+
1. The training dataset is in mxnet record format and located on SSD hard drive.
|
35 |
+
|
36 |
+
2. Embedding-size are all set to 512.
|
37 |
+
|
38 |
+
3. We use a large dataset which contains about 618K identities to simulate real cases.
|
39 |
+
|
40 |
+
| Dataset | Classes | Backbone | Batch-size | FP16 | TF32 | Samples/sec |
|
41 |
+
| ----------- | ------- | ----------- | ---------- | ---- | ---- | ----------- |
|
42 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | × | × | ~2230 |
|
43 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | × | √ | ~3200 |
|
44 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | √ | × | ~3940 |
|
45 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | √ | √ | ~4350 |
|
46 |
+
| WebFace600K | 618K | IResNet-50 | 2048 | √ | √ | ~5100 |
|
47 |
+
| WebFace600K | 618K | IResNet-100 | 1024 | √ | √ | ~2810 |
|
48 |
+
| WebFace600K | 618K | IResNet-180 | 1024 | √ | √ | ~1800 |
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
|
insightface/benchmarks/train/nvidia_rtx3080.md
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Training performance report on NVIDIA RTX3080
|
2 |
+
|
3 |
+
[GeForce RTX 3080](https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3080-3080ti/)
|
4 |
+
The GeForce RTX™ 3080 Ti and RTX 3080 graphics cards deliver the ultra performance that gamers crave, powered by Ampere—NVIDIA’s 2nd gen RTX architecture. They are built with enhanced RT Cores and Tensor Cores, new streaming multiprocessors, and superfast G6X memory for an amazing gaming experience.
|
5 |
+
|
6 |
+
Besides, we can also use GeForce RTX™ 3080 to train deep learning models by its FP16 and TF32 supports.
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
## Test Server Spec
|
11 |
+
|
12 |
+
| Key | Value |
|
13 |
+
|--------------|---------------------------------------------------|
|
14 |
+
| CPU | 2 x Intel(R) Xeon(R) Platinum 8255C CPU @ 2.50GHz |
|
15 |
+
| Memory | 384GB |
|
16 |
+
| GPU | 8 x GeForce RTX™ 3080 |
|
17 |
+
| OS | Ubuntu 18.04.4 LTS |
|
18 |
+
| Installation | CUDA 11.1, |
|
19 |
+
| Installation | Python 3.7.3 |
|
20 |
+
| Installation | PyTorch 1.9.0 (pip) |
|
21 |
+
|
22 |
+
|
23 |
+
## Experiments on arcface_torch
|
24 |
+
|
25 |
+
We report training speed in following table, please also note that:
|
26 |
+
|
27 |
+
1. The training dataset is SyntheticDataset.
|
28 |
+
|
29 |
+
2. Embedding-size are all set to 512.
|
30 |
+
|
31 |
+
|
32 |
+
### 1. 2 Million Identities
|
33 |
+
|
34 |
+
We use a large dataset which contains about 2 millions identities to simulate real cases.
|
35 |
+
|
36 |
+
|
37 |
+
| Dataset | Classes | Backbone | Batch-size | FP16 | Partial FC | Samples/sec |
|
38 |
+
|------------|------------|------------|------------|------|------------|-------------|
|
39 |
+
| WebFace40M | 2 Millions | IResNet-50 | 512 | × | × | Fail |
|
40 |
+
| WebFace40M | 2 Millions | IResNet-50 | 512 | x | √ | ~2190 |
|
41 |
+
| WebFace40M | 2 Millions | IResNet-50 | 512 | √ | × | Fail |
|
42 |
+
| WebFace40M | 2 Millions | IResNet-50 | 512 | √ | √ | ~2620 |
|
43 |
+
| WebFace40M | 2 Millions | IResNet-50 | 1024 | × | × | Fail |
|
44 |
+
| WebFace40M | 2 Millions | IResNet-50 | 1024 | x | √ | Fail |
|
45 |
+
| WebFace40M | 2 Millions | IResNet-50 | 1024 | √ | × | Fail |
|
46 |
+
| WebFace40M | 2 Millions | IResNet-50 | 1024 | √ | √ | ~3800 |
|
47 |
+
|
48 |
+
### 2. 600K Identities
|
49 |
+
|
50 |
+
We use a large dataset which contains about 600k identities to simulate real cases.
|
51 |
+
|
52 |
+
| Dataset | Classes | Backbone | Batch-size | Partial FC | FP16 | Samples/sec |
|
53 |
+
|-------------|---------|------------|------------|------------|------|-------------|
|
54 |
+
| WebFace600K | 618K | IResNet-50 | 512 | × | × | ~2023 |
|
55 |
+
| WebFace600K | 618K | IResNet-50 | 512 | × | √ | ~2392 |
|
56 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | × | × | Fail |
|
57 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | × | √ | Fail |
|
58 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | √ | √ | ~4010 |
|
insightface/benchmarks/train/nvidia_rtx3090.md
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Training performance report on NVIDIA RTX3090
|
2 |
+
|
3 |
+
[GEFORCE RTX 3090](https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3090/)
|
4 |
+
The GeForce RTX™ 3090 is a big ferocious GPU (BFGPU) with TITAN class performance.
|
5 |
+
|
6 |
+
Besides, we can also use GeForce RTX™ 3090 to train deep learning models by its FP16 and TF32 supports.
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
## Test Server Spec
|
11 |
+
|
12 |
+
| Key | Value |
|
13 |
+
|--------------|---------------------------------------------------|
|
14 |
+
| CPU | 2 x Intel(R) Xeon(R) Platinum 8255C CPU @ 2.50GHz |
|
15 |
+
| Memory | 384GB |
|
16 |
+
| GPU | 8 x GeForce RTX™ 3090 |
|
17 |
+
| OS | Ubuntu 18.04.4 LTS |
|
18 |
+
| Installation | CUDA 11.1, |
|
19 |
+
| Installation | Python 3.7.3 |
|
20 |
+
| Installation | PyTorch 1.9.0 (pip) |
|
21 |
+
|
22 |
+
|
23 |
+
## Experiments on arcface_torch
|
24 |
+
|
25 |
+
We report training speed in following table, please also note that:
|
26 |
+
|
27 |
+
1. The training dataset is SyntheticDataset.
|
28 |
+
|
29 |
+
2. Embedding-size are all set to 512.
|
30 |
+
|
31 |
+
|
32 |
+
### 1. 2 Million Identities
|
33 |
+
|
34 |
+
We use a large dataset which contains about 2 millions identities to simulate real cases.
|
35 |
+
|
36 |
+
| Dataset | Classes | Backbone | Batch-size | FP16 | TF32 | Partial FC | Samples/sec |
|
37 |
+
|------------|------------|------------|------------|------|------|------------|-------------|
|
38 |
+
| WebFace40M | 2 Millions | IResNet-50 | 512 | × | × | × | ~1750 |
|
39 |
+
| WebFace40M | 2 Millions | IResNet-50 | 512 | × | √ | × | ~1810 |
|
40 |
+
| WebFace40M | 2 Millions | IResNet-50 | 512 | √ | √ | × | ~2056 |
|
41 |
+
| WebFace40M | 2 Millions | IResNet-50 | 512 | √ | √ | √ | ~2850 |
|
42 |
+
| WebFace40M | 2 Millions | IResNet-50 | 1024 | √ | √ | × | ~2810 |
|
43 |
+
| WebFace40M | 2 Millions | IResNet-50 | 1024 | √ | √ | √ | ~4220 |
|
44 |
+
| WebFace40M | 2 Millions | IResNet-50 | 2048 | √ | √ | √ | ~5330 |
|
45 |
+
|
46 |
+
|
47 |
+
### 2. 600K Identities
|
48 |
+
|
49 |
+
We use a large dataset which contains about 600k identities to simulate real cases.
|
50 |
+
|
51 |
+
| Dataset | Classes | Backbone | Batch-size | FP16 | Samples/sec |
|
52 |
+
|-------------|---------|------------|------------|------|-------------|
|
53 |
+
| WebFace600K | 618K | IResNet-50 | 512 | × | ~2220 |
|
54 |
+
| WebFace600K | 618K | IResNet-50 | 512 | √ | ~2610 |
|
55 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | × | ~2940 |
|
56 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | √ | ~3790 |
|
57 |
+
| WebFace600K | 618K | IResNet-50 | 2048 | √ | ~4680 |
|
insightface/benchmarks/train/nvidia_v100.md
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Training performance report on NVIDIA® V100
|
2 |
+
|
3 |
+
[NVIDIA® V100](https://www.nvidia.com/en-us/data-center/v100/)
|
4 |
+
NVIDIA® V100 Tensor Core is the most advanced data center GPU ever built to accelerate AI, high performance computing (HPC), data science and graphics. It’s powered by NVIDIA Volta architecture, comes in 16 and 32GB configurations, and offers the performance of up to 32 CPUs in a single GPU.
|
5 |
+
|
6 |
+
Besides, we can also use NVIDIA® V100 to train deep learning models by its FP16 and FP32 supports.
|
7 |
+
|
8 |
+
## Test Server Spec
|
9 |
+
|
10 |
+
| Key | Value |
|
11 |
+
|--------------|----------------------------------------------|
|
12 |
+
| CPU | 2 x Intel(R) Xeon(R) Gold 6133 CPU @ 2.50GHz |
|
13 |
+
| Memory | 384GB |
|
14 |
+
| GPU | 8 x Tesla V100-SXM2-32GB |
|
15 |
+
| OS | Ubuntu 16.04 LTS |
|
16 |
+
| Installation | CUDA 10.2 |
|
17 |
+
| Installation | Python 3.7.3 |
|
18 |
+
| Installation | PyTorch 1.9.0 (pip) |
|
19 |
+
|
20 |
+
## Experiments on arcface_torch
|
21 |
+
|
22 |
+
We report training speed in following table, please also note that:
|
23 |
+
|
24 |
+
1. The training dataset is SyntheticDataset.
|
25 |
+
|
26 |
+
2. Embedding-size are all set to 512.
|
27 |
+
|
28 |
+
### 1. 2 Million Identities
|
29 |
+
|
30 |
+
We use a large dataset which contains about 2 millions identities to simulate real cases.
|
31 |
+
|
32 |
+
| Dataset | Classes | Backbone | Batch-size | FP16 | Partial FC | Samples/sec |
|
33 |
+
|------------|------------|------------|------------|------|------------|-------------|
|
34 |
+
| WebFace40M | 2 Millions | IResNet-50 | 512 | × | × | ~1868 |
|
35 |
+
| WebFace40M | 2 Millions | IResNet-50 | 512 | x | √ | ~2712 |
|
36 |
+
| WebFace40M | 2 Millions | IResNet-50 | 512 | √ | × | ~2576 |
|
37 |
+
| WebFace40M | 2 Millions | IResNet-50 | 512 | √ | √ | ~4501 |
|
38 |
+
| WebFace40M | 2 Millions | IResNet-50 | 1024 | × | × | ~1960 |
|
39 |
+
| WebFace40M | 2 Millions | IResNet-50 | 1024 | x | √ | ~2922 |
|
40 |
+
| WebFace40M | 2 Millions | IResNet-50 | 1024 | √ | × | ~2810 |
|
41 |
+
| WebFace40M | 2 Millions | IResNet-50 | 1024 | √ | √ | ~5430 |
|
42 |
+
| WebFace40M | 2 Millions | IResNet-50 | 2048 | √ | √ | ~6095 |
|
43 |
+
|
44 |
+
### 2. 600K Identities
|
45 |
+
|
46 |
+
We use a large dataset which contains about 600k identities to simulate real cases.
|
47 |
+
|
48 |
+
| Dataset | Classes | Backbone | Batch-size | FP16 | Samples/sec |
|
49 |
+
|-------------|---------|------------|------------|------|-------------|
|
50 |
+
| WebFace600K | 618K | IResNet-50 | 512 | × | ~2430 |
|
51 |
+
| WebFace600K | 618K | IResNet-50 | 512 | √ | ~3889 |
|
52 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | × | ~2607 |
|
53 |
+
| WebFace600K | 618K | IResNet-50 | 1024 | √ | ~4322 |
|
54 |
+
| WebFace600K | 618K | IResNet-50 | 2048 | √ | ~4921 |
|
insightface/body/human_pose/ambiguity_aware/README.md
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Towards Alleviating the Modeling Ambiguity of Unsupervised Monocular 3D Human Pose Estimation
|
2 |
+
|
3 |
+
## Introduction
|
4 |
+
|
5 |
+
**Ambiguity-Aware** studies the ambiguity problem in the task of unsupervised 3D human pose estimation from 2D counterpart, please refer to [ICCV2022](https://openaccess.thecvf.com/content/ICCV2021/papers/Yu_Towards_Alleviating_the_Modeling_Ambiguity_of_Unsupervised_Monocular_3D_Human_ICCV_2021_paper.pdf) for more details.
|
6 |
+
|
7 |
+
|
8 |
+
<div align="center">
|
9 |
+
<img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/all.gif" alt="videovis" width="800">
|
10 |
+
</div>
|
11 |
+
|
12 |
+
|
13 |
+
## Installation
|
14 |
+
```
|
15 |
+
conda create -n uvhpe python=3.6
|
16 |
+
conda activate uvhpe
|
17 |
+
pip install -r requirements.txt
|
18 |
+
# for output, tensorboard, visualization
|
19 |
+
mkdir log output vis models data
|
20 |
+
```
|
21 |
+
|
22 |
+
## Dataset And Pretrained Models
|
23 |
+
Download our preprocessed dataset into `data` and pretrained models into `models` from [webpage](https://sites.google.com/view/ambiguity-aware-hpe)
|
24 |
+
|
25 |
+
This part will be updated soon.
|
26 |
+
## Inference
|
27 |
+
We put some samples with preprocessed 2d keypoints at `scripts/demo_input`. Run inference with command `sh demo.sh` and output can be found at `scripts/demo_output`.
|
28 |
+
|
29 |
+
## Evaluation
|
30 |
+
### Evaluation on Human3.6M
|
31 |
+
##### 2D ground-truth as inputs
|
32 |
+
* baseline `python main.py --cfg ../cfg/h36m_gt_adv.yaml --pretrain ../models/adv.pth.tar --gpu 0 --eval `
|
33 |
+
* scale `python main.py --cfg ../cfg/h36m_gt_scale.yaml --pretrain ../models/tmc_klbone.pth.tar --eval --gpu 0`
|
34 |
+
|
35 |
+
##### 2D predictions as inputs
|
36 |
+
* baseline `python main.py --cfg ../cfg/pre_adv.yaml --pretrain ../models/pre_adv.pth.tar --gpu 0 --eval `
|
37 |
+
* scale `python main.py --cfg ../cfg/pre_tmc_klbone.yaml --pretrain ../models/pre_tmc_klbone.pth.tar --gpu 0 --eval `
|
38 |
+
|
39 |
+
**Note:** baseline is our reproduced version fo "Unsupervised 3d pose estimation with geometric self-supervision"
|
40 |
+
|
41 |
+
### Evaluation on LSP
|
42 |
+
use the pretrained model from Human3.6M
|
43 |
+
|
44 |
+
`python eval_lsp.py --cfg ../cfg/h36m_gt_scale.yaml --pretrain ../models/tmc_klbone.pth.tar`
|
45 |
+
|
46 |
+
### Results
|
47 |
+
|
48 |
+
The expected **MPJPE** and **P-MPJPE** results on **Human36M** dataset are shown here:
|
49 |
+
|
50 |
+
| Input | Model | MPJPE | PMPJPE |
|
51 |
+
| :--------- | :------------ | :------------: | :------------: |
|
52 |
+
| GT | baseline | 105.0 | 46.0 |
|
53 |
+
| GT | best | 87.85 | 42.0 |
|
54 |
+
| Pre | baseline | 113.3 | 54.9 |
|
55 |
+
| Pre | best | 93.1 | 52.3 |
|
56 |
+
|
57 |
+
|
58 |
+
**Note:** MPJPE from the evaluation is slightly different from the performance we release in the paper. This is because MPJPE in the paper is the best MPJPE during training process.
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
## Training
|
63 |
+
### Human3.6M
|
64 |
+
* Using ground-truth 2D as inputs:
|
65 |
+
|
66 |
+
baseline `python main.py --cfg ../cfg/h36m_gt_adv.yaml --gpu 0 `
|
67 |
+
|
68 |
+
best `python main.py --cfg ../cfg/h36m_gt_scale.yaml --gpu 0`
|
69 |
+
|
70 |
+
* Using predicted 2D as inputs:
|
71 |
+
|
72 |
+
baseline `python main.py --cfg ../cfg/pre_adv.yaml --gpu 0 `
|
73 |
+
|
74 |
+
best `python main.py --cfg ../cfg/pre_tmc_klbone.yaml --gpu 0`
|
75 |
+
|
76 |
+
## Visualization
|
77 |
+
|
78 |
+
### Human3.6M
|
79 |
+
<div align="center">
|
80 |
+
<img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/S9_Discussion 1.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/S9_Phoning 1.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/S9_Photo.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/S9_WalkTogether 1.gif" width="200"/>
|
81 |
+
</div>
|
82 |
+
|
83 |
+
### Sureal
|
84 |
+
<div align="center">
|
85 |
+
<img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/surreal1.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/surreal2.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/surreal3.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/surreal4.gif" width="200"/>
|
86 |
+
</div>
|
87 |
+
|
88 |
+
### MPI-3DHP
|
89 |
+
<div align="center">
|
90 |
+
<img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/TS1.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/TS2.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/TS3.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/TS6.gif" width="200"/>
|
91 |
+
</div>
|
92 |
+
|
93 |
+
|
94 |
+
### The code of our another paper in ICCV2022 Skeleton2Mesh will be coming soon!
|
insightface/body/human_pose/ambiguity_aware/cfg/h36m_gt_adv.yaml
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BATCH_SIZE: 512
|
2 |
+
DATA:
|
3 |
+
NUM_FRAMES: 1
|
4 |
+
SCALE_MID_MEAN: 0.720643
|
5 |
+
SCALE_MID_STD: 0.058
|
6 |
+
USE_RANDOM_DIFF: true
|
7 |
+
NETWORK:
|
8 |
+
DIS_RES_BLOCKS: 2
|
9 |
+
DIS_TEMP_RES_BLOCKS: 2
|
10 |
+
DIS_USE_SPECTRAL_NORM: false
|
11 |
+
SCALER_INPUT_SIZE: 34
|
12 |
+
TRAIN:
|
13 |
+
BOUND_AZIM: 2.44346
|
14 |
+
BOUND_ELEV: 0.34906585
|
15 |
+
DIS_LR: 0.0002
|
16 |
+
LOSS_TYPE: ss_adv
|
17 |
+
LOSS_WEIGHTS:
|
18 |
+
- 1.0
|
19 |
+
- 1.0
|
20 |
+
- 1.0
|
21 |
+
- 1.0
|
22 |
+
MAINNET_CRITICS: 4
|
23 |
+
NUM_CRITICS: 3
|
24 |
+
NUM_CRITICS_TEMP: 3
|
25 |
+
POSE_LR: 0.0002
|
26 |
+
PRETRAIN_LIFTER: false
|
27 |
+
SCALE_LOSS_WEIGHTS:
|
28 |
+
- 0.001
|
29 |
+
- 1.0
|
30 |
+
SUBNET_CRITICS: 1
|
31 |
+
TEMP_LR: 0.0002
|
32 |
+
USE_CYCLE: false
|
33 |
+
USE_NEW_ROT: false
|
34 |
+
USE_NEW_TEMP: false
|
35 |
+
USE_SCALER: false
|
36 |
+
USE_GT: true
|
insightface/body/human_pose/ambiguity_aware/cfg/h36m_gt_scale.yaml
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BATCH_SIZE: 512
|
2 |
+
DATA:
|
3 |
+
NUM_FRAMES: 1
|
4 |
+
SCALE_MID_MEAN: 0.720643
|
5 |
+
SCALE_MID_STD: 0.058
|
6 |
+
USE_RANDOM_DIFF: true
|
7 |
+
NETWORK:
|
8 |
+
DIS_RES_BLOCKS: 2
|
9 |
+
DIS_TEMP_RES_BLOCKS: 2
|
10 |
+
DIS_USE_SPECTRAL_NORM: false
|
11 |
+
SCALER_INPUT_SIZE: 34
|
12 |
+
TRAIN:
|
13 |
+
BOUND_AZIM: 2.44346
|
14 |
+
BOUND_ELEV: 0.34906585
|
15 |
+
DIS_LR: 0.0001
|
16 |
+
LOSS_TYPE: ss_adv
|
17 |
+
LOSS_WEIGHTS:
|
18 |
+
- 0.5
|
19 |
+
- 5.0
|
20 |
+
- 1.0
|
21 |
+
- 1.0
|
22 |
+
MAINNET_CRITICS: 4
|
23 |
+
NUM_CRITICS: 3
|
24 |
+
NUM_CRITICS_TEMP: 3
|
25 |
+
POSE_LR: 0.00015
|
26 |
+
PRETRAIN_LIFTER: false
|
27 |
+
SCALE_LOSS_WEIGHTS:
|
28 |
+
- 0.001
|
29 |
+
- 1.0
|
30 |
+
SUBNET_CRITICS: 1
|
31 |
+
TEMP_LR: 0.0001
|
32 |
+
SCHEDULER_STEP_SIZE: 5
|
33 |
+
USE_CYCLE: true
|
34 |
+
USE_NEW_ROT: false
|
35 |
+
USE_NEW_TEMP: true
|
36 |
+
USE_SCALER: true
|
37 |
+
USE_GT: true
|
38 |
+
FIX:
|
39 |
+
FIX_TRAJ: true
|
40 |
+
FIX_TRAJ_BY_ROT: false
|
insightface/body/human_pose/ambiguity_aware/cfg/pre_adv.yaml
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BATCH_SIZE: 512
|
2 |
+
DATA:
|
3 |
+
NUM_FRAMES: 1
|
4 |
+
SCALE_MID_MEAN: 0.720643
|
5 |
+
SCALE_MID_STD: 0.058
|
6 |
+
USE_RANDOM_DIFF: true
|
7 |
+
NETWORK:
|
8 |
+
DIS_RES_BLOCKS: 2
|
9 |
+
DIS_TEMP_RES_BLOCKS: 2
|
10 |
+
DIS_USE_SPECTRAL_NORM: false
|
11 |
+
SCALER_INPUT_SIZE: 34
|
12 |
+
TRAIN:
|
13 |
+
BOUND_AZIM: 2.44346
|
14 |
+
BOUND_ELEV: 0.34906585
|
15 |
+
DIS_LR: 0.0001
|
16 |
+
LOSS_TYPE: ss_adv
|
17 |
+
LOSS_WEIGHTS:
|
18 |
+
- 0.5
|
19 |
+
- 5.0
|
20 |
+
- 1.0
|
21 |
+
- 1.0
|
22 |
+
MAINNET_CRITICS: 4
|
23 |
+
NUM_CRITICS: 3
|
24 |
+
NUM_CRITICS_TEMP: 3
|
25 |
+
POSE_LR: 0.0001
|
26 |
+
PRETRAIN_LIFTER: false
|
27 |
+
SCALE_LOSS_WEIGHTS:
|
28 |
+
- 0.001
|
29 |
+
- 1.0
|
30 |
+
SUBNET_CRITICS: 1
|
31 |
+
TEMP_LR: 0.0002
|
32 |
+
USE_CYCLE: false
|
33 |
+
USE_NEW_ROT: false
|
34 |
+
USE_NEW_TEMP: false
|
35 |
+
USE_SCALER: false
|
36 |
+
USE_GT: false
|
insightface/body/human_pose/ambiguity_aware/cfg/pre_tmc_klbone.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BATCH_SIZE: 512
|
2 |
+
DATA:
|
3 |
+
EXP_TMC: true
|
4 |
+
EXP_TMC_DETERMINISTIC: true
|
5 |
+
EXP_TMC_INTERVAL: 3
|
6 |
+
NUM_FRAMES: 1
|
7 |
+
SCALE_MID_MEAN: 0.720643
|
8 |
+
SCALE_MID_STD: 0.058
|
9 |
+
USE_RANDOM_DIFF: true
|
10 |
+
NETWORK:
|
11 |
+
DIS_RES_BLOCKS: 2
|
12 |
+
DIS_TEMP_RES_BLOCKS: 2
|
13 |
+
DIS_USE_SPECTRAL_NORM: false
|
14 |
+
SCALER_INPUT_SIZE: 34
|
15 |
+
TRAIN:
|
16 |
+
BOUND_AZIM: 2.44346
|
17 |
+
BOUND_ELEV: 0.34906585
|
18 |
+
DIS_LR: 0.0001
|
19 |
+
LOSS_TYPE: ss_adv
|
20 |
+
LOSS_WEIGHTS:
|
21 |
+
- 0.5
|
22 |
+
- 5.0
|
23 |
+
- 1.0
|
24 |
+
- 1.0
|
25 |
+
MAINNET_CRITICS: 4
|
26 |
+
NUM_CRITICS: 3
|
27 |
+
NUM_CRITICS_TEMP: 3
|
28 |
+
POSE_LR: 0.0001
|
29 |
+
PRETRAIN_LIFTER: false
|
30 |
+
SCALE_LOSS_WEIGHTS:
|
31 |
+
- 0.01
|
32 |
+
- 1.0
|
33 |
+
SUBNET_CRITICS: 1
|
34 |
+
TEMP_LR: 0.0002
|
35 |
+
USE_CYCLE: true
|
36 |
+
USE_NEW_ROT: false
|
37 |
+
USE_NEW_TEMP: false
|
38 |
+
USE_SCALER: true
|
39 |
+
USE_GT: false
|
insightface/body/human_pose/ambiguity_aware/requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==1.13.1
|
2 |
+
torchvision==0.5.0
|
3 |
+
tqdm
|
4 |
+
opencv-python
|
5 |
+
scikit-learn
|
6 |
+
matplotlib
|
7 |
+
h5py
|
8 |
+
pyyaml
|
9 |
+
seaborn
|
10 |
+
imageio
|
11 |
+
easydict
|
12 |
+
tensorboardX
|
insightface/body/human_pose/ambiguity_aware/scripts/_init_paths.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os.path as osp
|
2 |
+
import sys
|
3 |
+
|
4 |
+
|
5 |
+
def add_path(path):
|
6 |
+
if path not in sys.path:
|
7 |
+
sys.path.insert(0, path)
|
8 |
+
|
9 |
+
|
10 |
+
this_dir = osp.dirname(__file__)
|
11 |
+
|
12 |
+
lib_path = osp.join(this_dir, '..')
|
13 |
+
add_path(lib_path)
|
insightface/body/human_pose/ambiguity_aware/scripts/demo.sh
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
rm -rf demo_output
|
4 |
+
|
5 |
+
python inference.py --indir demo_input --outdir demo_output --cfg ../cfg/h36m_gt_scale.yaml --pretrain ../models/tmc_klbone.pth.tar
|
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/0.jpg
ADDED
![]() |
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15e90d8305cb0946a6f3c08c625dad617032102e520eb9c5d4c17d0af1609482
|
3 |
+
size 513
|
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/1.jpg
ADDED
![]() |
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/1.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6aa3178631c3dd2613b3def95a2c17e284bb262ee62cefdf647fe768a8c6efc6
|
3 |
+
size 513
|
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/10.jpg
ADDED
![]() |
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/10.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01f9a4cc1b06719e65b4af28a3ac62e27afaadacc1028c9ae8e8e96d5724e23e
|
3 |
+
size 513
|
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/11.jpg
ADDED
![]() |
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/11.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88c1b12e23512a66e8cb7802c06b447924ad5efae06098caf9de42ec996cd1ef
|
3 |
+
size 513
|